omnidotdev
diff --git a/‎content/docs/grid/synapse/api.mdx‎
Lines changed: 6 additions & 2 deletions b/‎content/docs/grid/synapse/api.mdx‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎content/docs/grid/synapse/authentication.mdx‎
Lines changed: 37 additions & 47 deletions b/‎content/docs/grid/synapse/authentication.mdx‎
Lines changed: 37 additions & 47 deletions
diff --git a/‎content/docs/grid/synapse/configuration.mdx‎
Lines changed: 91 additions & 31 deletions b/‎content/docs/grid/synapse/configuration.mdx‎
Lines changed: 91 additions & 31 deletions
@@ -328,13 +328,17 @@ Response:
 
 ## GET /health
 
-Health check endpoint. No authentication required.
+Health check endpoint. No authentication required. The gateway binary listens on port 3000 by default; Docker Compose maps host port 6000 to container port 3000.
 
 ```bash
+# Docker Compose (host port)
 curl http://localhost:6000/health
+
+# Binary (default port)
+curl http://localhost:3000/health
 ```
 
-Response:
+Response (plain text):
 
 ```
 ok
 
@@ -7,7 +7,7 @@ Synapse supports multiple authentication methods to secure access to its API end
 
 ## API Key Authentication
 
-The default authentication method. Clients pass an API key in the request header, and Synapse validates it against the configured key store.
+The default authentication method. Clients pass an API key in the request header, and Synapse validates it by calling the synapse-api service's `/internal/resolve-key` endpoint.
 
 ### Header Format
 
@@ -25,53 +25,46 @@ curl -H "x-api-key: your-api-key" \
 
 ```toml
 [auth]
-method = "api_key"
-
-# Static key list
-[[auth.api_keys]]
-key = "{{ env.SYNAPSE_API_KEY }}"
-name = "production"
+enabled = true
+api_url = "http://synapse-api:4000"
+gateway_secret = "{{ env.GATEWAY_SECRET }}"
+cache_ttl_seconds = 30
+cache_capacity = 10000
+public_paths = ["/health"]
 ```
 
+| Field | Description | Default |
+|-------|-------------|---------|
+| `enabled` | Enable API key auth | false |
+| `api_url` | URL of the synapse-api service | -- (required) |
+| `gateway_secret` | Shared secret for gateway-to-API authentication | -- (required) |
+| `cache_ttl_seconds` | How long to cache resolved API keys | 30 |
+| `cache_capacity` | Maximum number of cached key resolutions | 10000 |
+| `public_paths` | Paths that skip authentication | `["/health"]` |
+| `tls_skip_verify` | Skip TLS verification for API calls (dev only) | false |
+
 <Callout type="warning">
-  Always use environment variable interpolation for API keys. Never hardcode secrets in config files.
+  Always use environment variable interpolation for the gateway secret. Never hardcode secrets in config files.
 </Callout>
 
-### Key Validation
+### How It Works
 
-When Synapse runs alongside the Omni API (synapse-api), keys are validated against the API's key store. This enables per-key rate limits, usage tracking, and key rotation without restarting Synapse.
+When a request arrives with an API key, Synapse calls synapse-api's `/internal/resolve-key` endpoint (authenticated with `gateway_secret`) to validate the key and resolve the associated client identity. Resolved keys are cached locally for `cache_ttl_seconds` to avoid per-request API calls.
 
-## OAuth2 / JWT
+The synapse-api can push cache invalidations to the gateway via the `/internal/invalidate-key` endpoint when keys are rotated or revoked.
 
-For applications that use OAuth2 or JWT-based authentication, Synapse can validate tokens against a JWKS endpoint.
+### BYOK Vault Integration
 
-### Configuration
+When configured, Synapse resolves BYOK (Bring Your Own Key) provider keys from a Gatekeeper vault instead of the synapse-api database:
 
 ```toml
-[auth]
-method = "jwt"
-
-[auth.jwt]
-jwks_url = "https://auth.omni.dev/.well-known/jwks.json"
-issuer = "https://auth.omni.dev"
-audience = "synapse"
+[auth.vault]
+url = "https://gatekeeper.omni.dev"
+service_key = "{{ env.GATEKEEPER_SERVICE_KEY }}"
+cache_ttl_seconds = 300
+cache_capacity = 10000
 ```
 
-| Field | Description | Required |
-|-------|-------------|----------|
-| `jwks_url` | URL to the JWKS endpoint for public key discovery | Yes |
-| `issuer` | Expected `iss` claim in the JWT | Yes |
-| `audience` | Expected `aud` claim in the JWT | No |
-
-### Token Usage
-
-```bash
-curl -H "Authorization: Bearer eyJhbGciOiJSUzI1NiIs..." \
-  http://localhost:6000/v1/chat/completions ...
-```
-
-Synapse caches the JWKS response and refreshes it periodically. Tokens are validated for signature, expiry, issuer, and audience.
-
 ## CSRF Protection
 
 Synapse includes CSRF protection for browser-based clients.
@@ -81,10 +74,10 @@ Synapse includes CSRF protection for browser-based clients.
 ```toml
 [server.csrf]
 enabled = true
-token_header = "X-CSRF-Token"
+header_name = "X-Synapse-CSRF-Protection"
 ```
 
-When enabled, non-GET requests from browser clients must include a valid CSRF token in the configured header. Tokens are issued via a dedicated endpoint or embedded in the initial page load.
+When enabled, non-GET requests from browser clients must include a valid CSRF token in the configured header.
 
 <Callout type="info">
   CSRF protection is primarily relevant when Synapse is accessed directly from a browser. API clients using `Authorization` headers are not affected.
@@ -98,11 +91,13 @@ Synapse tracks which client is making each request for usage analytics and rate
 2. **JWT claims** -- `sub` or `client_id` claim
 3. **IP address** -- fallback when no other identifier is available
 
-### Custom Client Header
+### Custom Client Identification
+
+Client identification is configured separately under `[server.client_identification]`:
 
 ```toml
-[auth]
-client_id_header = "X-Client-Id"
+[server.client_identification]
+header_name = "X-Client-Id"
 ```
 
 When set, Synapse uses the value of this header as the client identifier, regardless of the authentication method.
@@ -119,14 +114,9 @@ When set, Synapse uses the value of this header as the client identifier, regard
 
 ## Unauthenticated Endpoints
 
-The following endpoints do not require authentication:
-
-- `GET /health` -- health check
-- `GET /v1/models` -- model list (configurable, can require auth)
-
-To require authentication on all endpoints:
+Paths listed in `public_paths` skip authentication. By default, only `/health` is public:
 
 ```toml
 [auth]
-require_auth_for_models = true
+public_paths = ["/health"]
 ```
@@ -9,9 +9,11 @@ Synapse is configured via a TOML file passed with `--config`. This page covers e
 
 ```toml
 [server]
-listen_address = "0.0.0.0:6000"
+listen_address = "0.0.0.0:3000"
 ```
 
+The binary listens on port 3000 by default. In Docker Compose, port 6000 on the host is mapped to 3000 inside the container.
+
 ### TLS
 
 ```toml
@@ -22,7 +24,13 @@ key_path = "/etc/synapse/key.pem"
 
 ### Health Endpoint
 
-The health endpoint is always available at `GET /health` and returns `ok` (plain text) when the server is ready.
+The health endpoint is enabled by default at `GET /health` and returns `ok` (plain text) when the server is ready. The path and enabled state are configurable:
+
+```toml
+[server.health]
+enabled = true
+path = "/health"
+```
 
 ### CORS
 
@@ -39,7 +47,7 @@ max_age = 3600
 ```toml
 [server.csrf]
 enabled = true
-token_header = "X-CSRF-Token"
+header_name = "X-Synapse-CSRF-Protection"
 ```
 
 ## Environment Variable Interpolation
@@ -69,28 +77,68 @@ See [synapse.omni.dev/pricing](https://synapse.omni.dev/pricing) for current pla
 
 ## Rate Limiting
 
-### In-memory
+Rate limiting lives under `[server.rate_limit]`. It supports global and per-IP request limits with either in-memory or cache-backed (distributed) storage.
+
+### In-memory (default)
 
 ```toml
-[rate_limit]
-backend = "memory"
-requests_per_minute = 60
-burst = 10
+[server.rate_limit]
+
+[server.rate_limit.storage]
+type = "memory"
+
+[server.rate_limit.global]
+requests = 60
+window = "1m"
 ```
 
-### Redis
+### Cache-backed (distributed)
 
 ```toml
-[rate_limit]
-backend = "redis"
-redis_url = "{{ env.REDIS_URL }}"
-requests_per_minute = 120
-burst = 20
-key_prefix = "synapse:rl:"
+[server.rate_limit]
+
+[server.rate_limit.storage]
+type = "cache"
+url = "{{ env.VALKEY_URL }}"
+pool_size = 10
+connect_timeout = 5
+
+[server.rate_limit.global]
+requests = 120
+window = "1m"
+
+[server.rate_limit.per_ip]
+requests = 30
+window = "1m"
 ```
 
+### Token-based rate limits
+
+For LLM token budgets, configure `[server.rate_limit.tokens]`:
+
+```toml
+[server.rate_limit.tokens.default]
+tokens = 100000
+window = "1h"
+
+[server.rate_limit.tokens.groups.premium]
+tokens = 1000000
+window = "1h"
+```
+
+| Field | Description | Default |
+|-------|-------------|---------|
+| `storage.type` | `memory` or `cache` | `memory` |
+| `storage.url` | Cache connection URL (required for `cache` type) | -- |
+| `storage.pool_size` | Connection pool size | 10 |
+| `storage.connect_timeout` | Connection timeout in seconds | 5 |
+| `global.requests` | Max requests per window (all clients) | -- |
+| `global.window` | Window duration (e.g. `"1m"`, `"1h"`) | -- |
+| `per_ip.requests` | Max requests per IP per window | -- |
+| `per_ip.window` | Window duration for per-IP limit | -- |
+
 <Callout type="info">
-  Redis-backed rate limiting is recommended for multi-instance deployments where limits should be shared across replicas.
+  Cache-backed rate limiting is recommended for multi-instance deployments where limits should be shared across replicas.
 </Callout>
 
 ## Failover
@@ -124,21 +172,21 @@ recovery_seconds = 30
 
 ## Logging
 
-```toml
-[logging]
-level = "info"
-format = "json"
+Log level is controlled by the `RUST_LOG` environment variable:
+
+```bash
+RUST_LOG=synapse=info,tower_http=info
 ```
 
-Supported levels: `trace`, `debug`, `info`, `warn`, `error`. The `json` format is recommended for production; use `pretty` for local development.
+Supported levels: `trace`, `debug`, `info`, `warn`, `error`.
 
 ## Production Example
 
 A full production config combining multiple sections:
 
 ```toml
 [server]
-listen_address = "0.0.0.0:6000"
+listen_address = "0.0.0.0:3000"
 
 [server.tls]
 cert_path = "/etc/synapse/cert.pem"
@@ -150,11 +198,22 @@ allowed_origins = ["https://app.omni.dev"]
 [server.csrf]
 enabled = true
 
-[rate_limit]
-backend = "redis"
-redis_url = "{{ env.REDIS_URL }}"
-requests_per_minute = 120
-burst = 20
+[server.rate_limit]
+[server.rate_limit.storage]
+type = "cache"
+url = "{{ env.VALKEY_URL }}"
+
+[server.rate_limit.global]
+requests = 120
+window = "1m"
+
+[server.rate_limit.per_ip]
+requests = 30
+window = "1m"
+
+[llm.failover]
+enabled = true
+max_attempts = 2
 
 [llm.failover.circuit_breaker]
 error_threshold = 5
@@ -165,10 +224,6 @@ recovery_seconds = 30
 name = "frontier"
 models = ["openai/gpt-4o", "anthropic/claude-sonnet-4-20250514"]
 
-[logging]
-level = "info"
-format = "json"
-
 [llm.providers.anthropic]
 type = "anthropic"
 api_key = "{{ env.ANTHROPIC_API_KEY }}"
@@ -177,6 +232,11 @@ api_key = "{{ env.ANTHROPIC_API_KEY }}"
 type = "openai"
 api_key = "{{ env.OPENAI_API_KEY }}"
 
+[auth]
+enabled = true
+api_url = "http://synapse-api:4000"
+gateway_secret = "{{ env.GATEWAY_SECRET }}"
+
 [stt.providers.whisper]
 type = "whisper"
 api_key = "{{ env.OPENAI_API_KEY }}"