From fe3b147476dbb17b158ac1b2b6ddd8364013b0ec Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 28 May 2026 10:49:38 -0500 Subject: [PATCH 1/4] feat(providers): add Google Vertex AI provider Adds Vertex AI provider profiles, routing, credential refresh plumbing, CLI support, docs, and regression coverage. Keeps the related NETLINK_ROUTE seccomp allowance needed by Vertex client tooling that calls getifaddrs. --- architecture/gateway.md | 83 +- crates/openshell-cli/src/main.rs | 66 +- crates/openshell-cli/src/run.rs | 588 +++++-- .../tests/provider_commands_integration.rs | 760 ++++++++- crates/openshell-core/src/inference.rs | 135 +- crates/openshell-providers/src/lib.rs | 17 +- crates/openshell-providers/src/profiles.rs | 115 ++ crates/openshell-router/src/backend.rs | 825 +++++++++- crates/openshell-router/src/config.rs | 39 +- crates/openshell-router/src/mock.rs | 2 + .../tests/backend_integration.rs | 230 +++ crates/openshell-sandbox/src/lib.rs | 228 ++- crates/openshell-sandbox/src/proxy.rs | 40 +- .../src/sandbox/linux/seccomp.rs | 161 +- .../tests/system_inference.rs | 6 + crates/openshell-server/src/grpc/provider.rs | 434 ++++- crates/openshell-server/src/inference.rs | 1452 ++++++++++++++++- docs/index.yml | 2 + docs/providers/google-vertex-ai.mdx | 147 ++ docs/sandboxes/inference-routing.mdx | 21 +- docs/sandboxes/manage-providers.mdx | 5 +- docs/sandboxes/providers-v2.mdx | 3 +- docs/security/best-practices.mdx | 2 +- proto/inference.proto | 5 + providers/google-vertex-ai.yaml | 82 + 25 files changed, 5051 insertions(+), 397 deletions(-) create mode 100644 docs/providers/google-vertex-ai.mdx create mode 100644 providers/google-vertex-ai.yaml diff --git a/architecture/gateway.md b/architecture/gateway.md index 01f377a2d..552b4c345 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -37,12 +37,6 @@ health, metrics, or tunnel routes. The plaintext service router also rejects browser requests whose Fetch Metadata, Origin, or Referer headers indicate a cross-origin or sibling-subdomain request. -Dedicated health listeners expose `/healthz` (process liveness only) and -`/readyz` (dependency-aware readiness). Readiness reflects the latest result -of an in-process background task that pings the persistence layer on a -fixed cadence; the handler reads a cached state, so responses are -sub-millisecond and never race the kubelet probe timeout. - Supported auth modes: | Mode | Use | @@ -151,8 +145,12 @@ not readable by other local users on shared hosts. The same restriction is reapplied to the `-wal` and `-shm` sidecars (created by SQLite's default WAL journal mode), which mirror the same sensitive contents. -Persisted state includes sandboxes, providers, SSH sessions, policy revisions, -settings, inference configuration, and deployment records. +Persisted state includes sandboxes, providers, provider credential refresh +state, SSH sessions, policy revisions, settings, inference configuration, and +deployment records. Provider refresh material is stored as a separate object +scoped to the provider instance through `objects.scope`; the provider record +keeps only the current injectable credential values and optional per-credential +expiry timestamps. ### Optimistic Concurrency (CAS) @@ -245,6 +243,75 @@ config path. A gateway-global policy can override sandbox-scoped policy. The sandbox supervisor polls for config revisions and hot-reloads dynamic policy when the policy engine accepts the update. +Provider credential expiry is enforced during gateway-to-sandbox credential +resolution and again by the sandbox placeholder resolver. This keeps expired +credentials from resolving even when a running sandbox still has retained +placeholder generations from an earlier provider credential snapshot. + +## Inference Resolution + +Cluster inference routes store only `provider_name`, `model_id`, and optional +timeout. The gateway resolves endpoint URLs, protocols, credentials, auth +style, and route-shaping metadata from the provider record when supervisors call +`GetInferenceBundle`. Supported provider types for cluster inference are +`openai`, `anthropic`, `nvidia`, and `google-vertex-ai`. + +The bundle carries enough information for sandbox-local routers to construct +upstream URLs without re-deriving provider-specific routing logic. Each resolved +route may include: + +| Field | Meaning | +|---|---| +| `model_in_path` | When true, the model identifier is part of the upstream URL path, not only the request body. | +| `request_path_override` | Path override or suffix. With `model_in_path=false`, replaces the protocol-derived path; with `model_in_path=true`, appended after the model ID. | + +For standard providers these fields stay unset and the sandbox router uses default +protocol paths. Vertex AI is model-aware: the gateway constructs the base URL +from provider config (`VERTEX_AI_PROJECT_ID`, `VERTEX_AI_REGION`, optional +`VERTEX_AI_PUBLISHER`) and emits route-shaping metadata so the sandbox router +stays provider-agnostic. + +Host selection follows the configured region: + +| Region value | Vertex host | +|---|---| +| `global` | `aiplatform.googleapis.com` | +| `us` or `eu` | `aiplatform.{region}.rep.googleapis.com` | +| Any other (e.g. `us-central1`) | `{region}-aiplatform.googleapis.com` | + +Route shaping by publisher: + +- **Anthropic (Claude)** — `model_in_path=true`, base path under + `publishers/anthropic/models`, protocol `anthropic_messages` only. The gateway + resolves `request_path_override=:rawPredict`; the sandbox router keeps + `:rawPredict` for buffered requests and upgrades to `:streamRawPredict` only + for streaming proxy calls. +- **All other models** (Gemini, third-party, unknown) — OpenAI-compatible + `.../endpoints/openapi` base with `request_path_override=/chat/completions`; + protocol `openai_chat_completions`. + +Callers may supply `GOOGLE_VERTEX_AI_BASE_URL` or `VERTEX_AI_BASE_URL` only for +non-Anthropic routes. Anthropic base URL overrides are rejected because they +cannot safely preserve model-path shaping and `anthropic_version` body +adaptation. Overrides still pin `request_path_override=/chat/completions` and +must use `https` with an official Vertex AI hostname (`aiplatform.googleapis.com`, +`aiplatform.{us,eu}.rep.googleapis.com`, or `{region}-aiplatform.googleapis.com`). + +Header passthrough is protocol-dependent. Vertex Claude rawPredict routes strip +client `anthropic-beta` headers; `anthropic-version` is not forwarded because +the sandbox router injects `anthropic_version` into the request body for Vertex +rawPredict. Non-Anthropic Vertex routes do not inherit Anthropic passthrough +headers. + +For `google-vertex-ai` providers created with CLI `--from-gcloud-adc`, the CLI +calls gateway `ConfigureProviderRefresh` with OAuth2 refresh material from gcloud +ADC, then `RotateProviderCredential` to mint the first access token before +reporting success. ADC-backed providers mint into `GOOGLE_VERTEX_AI_TOKEN`. A +successful create therefore yields an immediately usable provider; failures roll +back the provider record. Service-account JSON and private keys are gateway-side +refresh bootstrap material only; sandbox runtime inference receives minted +access tokens, not raw service-account material. + ## Supervisor Relay Sandbox workloads maintain an outbound supervisor session to the gateway. This diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 917c8faa1..042202ef0 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -184,7 +184,7 @@ fn resolve_sandbox_name(name: Option, gateway: &str) -> Result { let last = load_last_sandbox(gateway).ok_or_else(|| { miette::miette!( "No sandbox name provided and no last-used sandbox.\n\ - Specify a sandbox name or connect to one first: openshell sandbox connect " + Specify a sandbox name or connect to one first: nav sandbox connect " ) })?; eprintln!("{} Using sandbox '{}' (last used)", "→".bold(), last.bold()); @@ -716,7 +716,7 @@ impl From for openshell_cli::ssh::Editor { #[derive(Subcommand, Debug)] enum ProviderCommands { /// Create a provider config. - #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] + #[command(group = clap::ArgGroup::new("cred_source").required(true).args(["from_existing", "credentials", "from_gcloud_adc"]), help_template = LEAF_HELP_TEMPLATE, next_help_heading = "FLAGS")] Create { /// Provider name. #[arg(long)] @@ -727,17 +727,23 @@ enum ProviderCommands { provider_type: String, /// Load provider credentials/config from existing local state. - #[arg(long, conflicts_with = "credentials")] + #[arg(long, conflicts_with_all = ["credentials", "from_gcloud_adc"])] from_existing: bool, /// Provider credential pair (`KEY=VALUE`) or env lookup key (`KEY`). #[arg( long = "credential", value_name = "KEY[=VALUE]", - conflicts_with = "from_existing" + conflicts_with_all = ["from_existing", "from_gcloud_adc"] )] credentials: Vec, + /// Configure credentials from gcloud Application Default Credentials + /// (`~/.config/gcloud/application_default_credentials.json`). + /// Only valid for google-vertex-ai providers. + #[arg(long, group = "cred_source", conflicts_with_all = ["from_existing", "credentials"])] + from_gcloud_adc: bool, + /// Provider config key/value pair. #[arg(long = "config", value_name = "KEY=VALUE")] config: Vec, @@ -2609,7 +2615,7 @@ async fn main() -> Result<()> { apply_auth(&mut tls, &ctx.name); let sandbox_dest = dest.as_deref(); let local = std::path::Path::new(&local_path); - if !run::local_upload_path_exists(local) { + if !local.exists() { return Err(miette::miette!( "local path does not exist: {}", local.display() @@ -2617,10 +2623,7 @@ async fn main() -> Result<()> { } let dest_display = sandbox_dest.unwrap_or("~"); eprintln!("Uploading {} -> sandbox:{}", local.display(), dest_display); - if !no_git_ignore - && !run::local_upload_path_is_symlink(local) - && let Ok((base_dir, files)) = run::git_sync_files(local) - { + if !no_git_ignore && let Ok((base_dir, files)) = run::git_sync_files(local) { run::sandbox_sync_up_files( &ctx.endpoint, &name, @@ -2770,6 +2773,7 @@ async fn main() -> Result<()> { provider_type, from_existing, credentials, + from_gcloud_adc, config, } => { run::provider_create( @@ -2778,6 +2782,7 @@ async fn main() -> Result<()> { provider_type.as_str(), from_existing, &credentials, + from_gcloud_adc, &config, &tls, ) @@ -3485,7 +3490,7 @@ mod tests { let err = resolve_sandbox_name(None, "unknown-gateway").unwrap_err(); let msg = err.to_string(); assert!( - msg.contains("openshell sandbox connect"), + msg.contains("nav sandbox connect"), "expected helpful hint in error, got: {msg}" ); }); @@ -3810,6 +3815,47 @@ mod tests { } } + #[test] + fn provider_create_rejects_from_gcloud_adc_with_from_existing() { + let err = Cli::try_parse_from([ + "openshell", + "provider", + "create", + "--name", + "vertex-local", + "--type", + "google-vertex-ai", + "--from-existing", + "--from-gcloud-adc", + ]) + .expect_err("clap should reject conflicting credential sources"); + + let msg = err.to_string(); + assert!(msg.contains("--from-existing")); + assert!(msg.contains("--from-gcloud-adc")); + } + + #[test] + fn provider_create_rejects_from_gcloud_adc_with_credential() { + let err = Cli::try_parse_from([ + "openshell", + "provider", + "create", + "--name", + "vertex-local", + "--type", + "google-vertex-ai", + "--from-gcloud-adc", + "--credential", + "GOOGLE_VERTEX_AI_TOKEN=token", + ]) + .expect_err("clap should reject conflicting credential sources"); + + let msg = err.to_string(); + assert!(msg.contains("--credential")); + assert!(msg.contains("--from-gcloud-adc")); + } + #[test] fn provider_refresh_commands_parse() { let status = Cli::try_parse_from([ diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index b92be199e..9280b48dc 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -219,8 +219,6 @@ struct ProvisioningDisplay { completed_steps: Vec, /// Progress bars for completed steps (so they can be cleared). completed_bars: Vec, - /// The currently active provisioning step. - active_step: Option, /// The currently active step label (shown on the spinner). active_label: String, /// Detail text shown next to the active step (e.g. image name). @@ -255,7 +253,6 @@ impl ProvisioningDisplay { spacer, completed_steps: Vec::new(), completed_bars: Vec::new(), - active_step: None, active_label: ProvisioningStep::RequestingSandbox .active_label() .to_string(), @@ -293,15 +290,11 @@ impl ProvisioningDisplay { self.step_start = Instant::now(); self.spinner.reset_elapsed(); self.active_detail.clear(); - if self.active_step == Some(step) { - self.active_step = None; - } } /// Set the active (in-progress) step shown on the spinner. - fn set_active(&mut self, step: ProvisioningStep) { - self.active_step = Some(step); - self.active_label = step.active_label().to_string(); + fn set_active(&mut self, label: &str) { + self.active_label = label.to_string(); self.active_detail.clear(); // Reset the spinner's elapsed time for the new step. self.spinner.reset_elapsed(); @@ -311,17 +304,11 @@ impl ProvisioningDisplay { /// Set the active step from a known provisioning step enum. fn set_active_step(&mut self, step: ProvisioningStep) { - if self.active_step == Some(step) { - return; - } - self.set_active(step); + self.set_active(step.active_label()); } /// Set detail text shown alongside the active step (e.g. image name). fn set_active_detail(&mut self, detail: &str) { - if self.active_detail == detail { - return; - } self.active_detail = detail.to_string(); self.update_spinner(); } @@ -2020,17 +2007,7 @@ pub async fn sandbox_create( "\u{2022}".dimmed(), ); let local = Path::new(local_path); - if !local_upload_path_exists(local) { - return Err(miette::miette!( - "local path does not exist: {}", - local.display() - )); - } - - if *git_ignore - && !local_upload_path_is_symlink(local) - && let Ok((base_dir, files)) = git_sync_files(local) - { + if *git_ignore && let Ok((base_dir, files)) = git_sync_files(local) { sandbox_sync_up_files( &effective_server, &sandbox_name, @@ -2041,7 +2018,7 @@ pub async fn sandbox_create( &effective_tls, ) .await?; - } else { + } else if local.exists() { sandbox_sync_up( &effective_server, &sandbox_name, @@ -2386,7 +2363,7 @@ pub async fn sandbox_sync_command( match (up, down) { (Some(local_path), None) => { let local = Path::new(local_path); - if !local_upload_path_exists(local) { + if !local.exists() { return Err(miette::miette!( "local path does not exist: {}", local.display() @@ -4118,6 +4095,131 @@ fn service_display_name(service: &str) -> &str { if service.is_empty() { "-" } else { service } } +/// Read gcloud Application Default Credentials from disk. +/// +/// Returns `(client_id, client_secret, refresh_token)`. +/// +/// Checks `GOOGLE_APPLICATION_CREDENTIALS` first; falls back to +/// `$CLOUDSDK_CONFIG/application_default_credentials.json` when set, then to +/// `~/.config/gcloud/application_default_credentials.json`. +fn read_gcloud_adc() -> Result<(String, String, String)> { + let path = if let Some(env_path) = std::env::var("GOOGLE_APPLICATION_CREDENTIALS") + .ok() + .filter(|v| !v.is_empty()) + { + PathBuf::from(env_path) + } else if let Some(config_dir) = std::env::var("CLOUDSDK_CONFIG") + .ok() + .filter(|v| !v.is_empty()) + { + PathBuf::from(config_dir).join("application_default_credentials.json") + } else { + let home = std::env::var("HOME") + .map_err(|_| miette::miette!("HOME is not set; cannot locate gcloud ADC file"))?; + PathBuf::from(home) + .join(".config") + .join("gcloud") + .join("application_default_credentials.json") + }; + + let content = std::fs::read_to_string(&path).map_err(|err| { + miette::miette!( + "failed to read gcloud ADC file at {}: {}. \ + Run: gcloud auth application-default login", + path.display(), + err + ) + })?; + + let json: serde_json::Value = serde_json::from_str(&content) + .map_err(|err| miette::miette!("failed to parse gcloud ADC file: {err}"))?; + + let cred_type = json.get("type").and_then(|v| v.as_str()); + match cred_type { + Some("service_account") => { + return Err(miette::miette!( + "Application Default Credentials are a service account key, not user credentials. \ + To use a service account, create the provider with the service account JSON key \ + and configure gateway-managed refresh for 'GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN'. \ + See: openshell provider create --help" + )); + } + Some("authorized_user") => {} + Some(other) => { + return Err(miette::miette!( + "Application Default Credentials have unsupported type '{other}' \ + (expected 'authorized_user'). \ + Run: gcloud auth application-default login" + )); + } + None => { + return Err(miette::miette!( + "gcloud ADC file is missing the 'type' field. \ + The file may be malformed. \ + Run: gcloud auth application-default login" + )); + } + } + + let client_id = json + .get("client_id") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .ok_or_else(|| miette::miette!("gcloud ADC file is missing 'client_id'"))? + .to_string(); + + let client_secret = json + .get("client_secret") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .ok_or_else(|| miette::miette!("gcloud ADC file is missing 'client_secret'"))? + .to_string(); + + let refresh_token = json + .get("refresh_token") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .ok_or_else(|| miette::miette!("gcloud ADC file is missing 'refresh_token'"))? + .to_string(); + + Ok((client_id, client_secret, refresh_token)) +} + +async fn rollback_provider_create_after_vertex_adc_failure( + client: &mut crate::tls::GrpcClient, + provider_name: &str, + stage: &str, + source: &Status, +) -> Result<()> { + match client + .delete_provider(DeleteProviderRequest { + name: provider_name.to_string(), + }) + .await + { + Ok(_) => Err(miette!( + "failed to {stage} Vertex AI credentials from gcloud ADC for provider '{provider_name}': {source}. \ + The provider was rolled back successfully." + )), + Err(cleanup_err) => { + eprintln!( + "{} Failed to clean up provider '{}' after {} failed: {}. \ + Run 'openshell provider delete {}' to remove it manually.", + "⚠".yellow(), + provider_name, + stage, + cleanup_err, + provider_name + ); + Err(miette!( + "failed to {stage} Vertex AI credentials from gcloud ADC for provider '{provider_name}': {source}. \ + Cleanup also failed, so the provider may still exist. \ + Run 'openshell provider delete {provider_name}' to remove it manually." + )) + } + } +} + fn service_url_for_gateway(service_url: &str, gateway_endpoint: &str) -> String { let (Ok(mut service_url), Ok(gateway_endpoint)) = ( url::Url::parse(service_url), @@ -4187,9 +4289,27 @@ async fn discover_existing_provider_data( if gateway_providers_v2_enabled(client).await? { let profile = fetch_provider_profile(client, provider_type).await?; let profile = ProviderTypeProfile::from_proto(&profile); - discover_from_profile(&profile, &RealDiscoveryContext).map_err(|err| { - miette::miette!("failed to discover existing provider data from profile: {err}") - }) + let mut discovered = + discover_from_profile(&profile, &RealDiscoveryContext).map_err(|err| { + miette::miette!("failed to discover existing provider data from profile: {err}") + })?; + + // Vertex AI config keys (project ID, region, base URL, publisher) are not + // declared in the profile's discovery.credentials list, so discover_from_profile + // does not scan them. Scan them directly here so --from-existing captures them. + if provider_type == VERTEX_AI_PROVIDER_TYPE { + let discovered = discovered.get_or_insert_with(Default::default); + for key in openshell_core::inference::VERTEX_AI_CONFIG_KEY_NAMES { + if let Ok(val) = std::env::var(key) { + let val = val.trim().to_string(); + if !val.is_empty() { + discovered.config.entry(key.to_string()).or_insert(val); + } + } + } + } + + Ok(discovered) } else { let registry = ProviderRegistry::new(); registry @@ -4198,15 +4318,41 @@ async fn discover_existing_provider_data( } } +/// Canonical provider type string for Google Vertex AI. +const VERTEX_AI_PROVIDER_TYPE: &str = "google-vertex-ai"; + +fn missing_credentials_error(provider_type: &str) -> miette::Report { + if provider_type == VERTEX_AI_PROVIDER_TYPE { + return miette::miette!( + "no credentials resolved for provider type '{provider_type}'. \ + Set GOOGLE_VERTEX_AI_TOKEN, VERTEX_AI_TOKEN, \ + GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN, or VERTEX_AI_SERVICE_ACCOUNT_TOKEN; \ + or use --from-gcloud-adc / --from-existing with those env vars set." + ); + } + + miette::miette!( + "no credentials resolved for provider type '{provider_type}'. \ + Use --credential KEY[=VALUE] or --from-existing with the appropriate env vars set." + ) +} + +#[allow(clippy::too_many_arguments)] pub async fn provider_create( server: &str, name: &str, provider_type: &str, from_existing: bool, credentials: &[String], + from_gcloud_adc: bool, config: &[String], tls: &TlsOptions, ) -> Result<()> { + if from_gcloud_adc && (from_existing || !credentials.is_empty()) { + return Err(miette::miette!( + "--from-gcloud-adc cannot be combined with --from-existing or --credential" + )); + } if from_existing && !credentials.is_empty() { return Err(miette::miette!( "--from-existing cannot be combined with --credential" @@ -4243,6 +4389,12 @@ pub async fn provider_create( } }; + if from_gcloud_adc && provider_type != VERTEX_AI_PROVIDER_TYPE { + return Err(miette::miette!( + "--from-gcloud-adc is only valid for google-vertex-ai providers" + )); + } + let mut credential_map = parse_credential_pairs(credentials)?; let mut config_map = parse_key_value_pairs(config, "--config")?; @@ -4263,18 +4415,27 @@ pub async fn provider_create( } if credential_map.is_empty() { + if from_existing { + return Err(missing_credentials_error(&provider_type)); + } let allows_refresh_bootstrap = fetch_provider_profile(&mut client, &provider_type) .await .ok() .is_some_and(|profile| provider_profile_allows_refresh_bootstrap(&profile)); if !allows_refresh_bootstrap { - return Err(miette::miette!( - "no credentials resolved for provider type '{provider_type}'. \ - Use --credential KEY[=VALUE] or --from-existing with the appropriate env vars set." - )); + return Err(missing_credentials_error(&provider_type)); } } + // Validate and read the ADC file BEFORE creating the provider so that + // a bad/missing ADC does not leave an orphan provider behind. + let gcloud_adc_material = if from_gcloud_adc { + let (client_id, client_secret, refresh_token) = read_gcloud_adc()?; + Some((client_id, client_secret, refresh_token)) + } else { + None + }; + let response = client .create_provider(CreateProviderRequest { provider: Some(Provider { @@ -4298,37 +4459,66 @@ pub async fn provider_create( .into_inner() .provider .ok_or_else(|| miette::miette!("provider missing from response"))?; + let provider_name = provider.object_name().to_string(); + + if let Some((client_id, client_secret, refresh_token)) = gcloud_adc_material { + let mut material = HashMap::new(); + material.insert("client_id".to_string(), client_id); + material.insert("client_secret".to_string(), client_secret); + material.insert("refresh_token".to_string(), refresh_token); + + if let Err(configure_err) = client + .configure_provider_refresh(ConfigureProviderRefreshRequest { + provider: provider_name.clone(), + credential_key: openshell_core::inference::VERTEX_AI_ADC_TOKEN_KEY.to_string(), + strategy: ProviderCredentialRefreshStrategy::Oauth2RefreshToken as i32, + material, + secret_material_keys: vec![ + "client_secret".to_string(), + "refresh_token".to_string(), + ], + expires_at_ms: None, + }) + .await + { + return rollback_provider_create_after_vertex_adc_failure( + &mut client, + &provider_name, + "configure", + &configure_err, + ) + .await; + } - println!( - "{} Created provider {}", - "✓".green().bold(), - provider.object_name() - ); + if let Err(rotate_err) = client + .rotate_provider_credential(RotateProviderCredentialRequest { + provider: provider_name.clone(), + credential_key: openshell_core::inference::VERTEX_AI_ADC_TOKEN_KEY.to_string(), + }) + .await + { + return rollback_provider_create_after_vertex_adc_failure( + &mut client, + &provider_name, + "mint the initial access token for", + &rotate_err, + ) + .await; + } + + println!("{} Created provider {}", "✓".green().bold(), provider_name); + println!( + "Configured Vertex AI credentials from gcloud ADC and minted the initial access token" + ); + return Ok(()); + } + + println!("{} Created provider {}", "✓".green().bold(), provider_name); Ok(()) } fn provider_profile_allows_refresh_bootstrap(profile: &ProviderProfile) -> bool { - let required_credentials = profile - .credentials - .iter() - .filter(|credential| credential.required) - .collect::>(); - !required_credentials.is_empty() - && required_credentials.iter().all(|credential| { - credential - .refresh - .as_ref() - .is_some_and(|refresh| is_gateway_mintable_refresh_strategy(refresh.strategy)) - }) -} - -fn is_gateway_mintable_refresh_strategy(strategy: i32) -> bool { - matches!( - ProviderCredentialRefreshStrategy::try_from(strategy), - Ok(ProviderCredentialRefreshStrategy::Oauth2RefreshToken - | ProviderCredentialRefreshStrategy::Oauth2ClientCredentials - | ProviderCredentialRefreshStrategy::GoogleServiceAccountJwt) - ) + ProviderTypeProfile::from_proto(profile).allows_gateway_refresh_bootstrap() } pub async fn provider_get(server: &str, name: &str, tls: &TlsOptions) -> Result<()> { @@ -5312,7 +5502,9 @@ pub fn git_repo_root(local_path: &Path) -> Result { .parent() .ok_or_else(|| miette::miette!("path has no parent: {}", local_path.display()))? }; - let output = Command::new("git") + let mut command = Command::new("git"); + scrub_git_env(&mut command); + let output = command .args(["rev-parse", "--show-toplevel"]) .current_dir(git_dir) .output() @@ -5377,7 +5569,9 @@ pub fn git_sync_files(local_path: &Path) -> Result<(PathBuf, Vec)> { Some(relative_path.to_string_lossy().into_owned()) }; - let output = Command::new("git") + let mut command = Command::new("git"); + scrub_git_env(&mut command); + let output = command .args(["ls-files", "-co", "--exclude-standard", "-z"]) .args(pathspec.as_deref()) .current_dir(&repo_root) @@ -5422,12 +5616,19 @@ pub fn git_sync_files(local_path: &Path) -> Result<(PathBuf, Vec)> { Ok((base_dir, files)) } -pub fn local_upload_path_exists(path: &Path) -> bool { - std::fs::symlink_metadata(path).is_ok() -} - -pub fn local_upload_path_is_symlink(path: &Path) -> bool { - std::fs::symlink_metadata(path).is_ok_and(|metadata| metadata.file_type().is_symlink()) +fn scrub_git_env(command: &mut Command) -> &mut Command { + for key in [ + "GIT_DIR", + "GIT_WORK_TREE", + "GIT_INDEX_FILE", + "GIT_PREFIX", + "GIT_COMMON_DIR", + "GIT_OBJECT_DIRECTORY", + "GIT_ALTERNATE_OBJECT_DIRECTORIES", + ] { + command.env_remove(key); + } + command } // --------------------------------------------------------------------------- @@ -6980,18 +7181,17 @@ fn format_timestamp_ms(ms: i64) -> String { #[cfg(test)] mod tests { use super::{ - ProvisioningDisplay, ProvisioningStep, TlsOptions, build_sandbox_resource_limits, + ProvisioningStep, TlsOptions, build_sandbox_resource_limits, dockerfile_sources_supported_for_gateway, format_endpoint, format_gateway_select_header, format_gateway_select_items, format_provider_attachment_table, gateway_add, gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle, - inferred_provider_type, local_upload_path_exists, local_upload_path_is_symlink, - package_managed_tls_dirs, parse_cli_setting_value, parse_credential_expiry_cli_value, - parse_credential_expiry_pairs, parse_credential_pairs, plaintext_gateway_is_remote, - progress_step_from_metadata, provider_profile_allows_refresh_bootstrap, - provisioning_timeout_message, ready_false_condition_message, refresh_status_header, - refresh_status_row, resolve_from, sandbox_should_persist, service_expose_status_error, - service_url_for_gateway, + inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value, + parse_credential_expiry_cli_value, parse_credential_expiry_pairs, parse_credential_pairs, + plaintext_gateway_is_remote, progress_step_from_metadata, + provider_profile_allows_refresh_bootstrap, provisioning_timeout_message, + ready_false_condition_message, refresh_status_header, refresh_status_row, resolve_from, + sandbox_should_persist, service_expose_status_error, service_url_for_gateway, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -7002,7 +7202,6 @@ mod tests { use std::path::{Path, PathBuf}; use std::process::Command; use std::thread; - use std::time::{Duration, Instant}; use tonic::Status; use openshell_bootstrap::GatewayMetadata; @@ -7211,48 +7410,6 @@ mod tests { assert_eq!(progress_step_from_metadata("driver-private-step"), None); } - #[test] - fn provisioning_display_ignores_repeated_active_step_updates() { - let mut display = ProvisioningDisplay::new(); - display.set_active_step(ProvisioningStep::PullingSandboxImage); - display.set_active_detail("Downloading layer-1 (1 MB/2 MB)"); - - let original_step_start = Instant::now() - .checked_sub(Duration::from_secs(5)) - .expect("test duration should be representable"); - display.step_start = original_step_start; - - display.set_active_step(ProvisioningStep::PullingSandboxImage); - display.set_active_detail("Downloading layer-1 (1 MB/2 MB)"); - - assert_eq!( - display.active_step, - Some(ProvisioningStep::PullingSandboxImage) - ); - assert_eq!(display.active_detail, "Downloading layer-1 (1 MB/2 MB)"); - assert_eq!(display.step_start, original_step_start); - display.clear(); - } - - #[test] - fn provisioning_display_resets_detail_on_active_step_transition() { - let mut display = ProvisioningDisplay::new(); - display.set_active_step(ProvisioningStep::PullingSandboxImage); - display.set_active_detail("Downloading layer-1 (1 MB/2 MB)"); - - let original_step_start = Instant::now() - .checked_sub(Duration::from_secs(5)) - .expect("test duration should be representable"); - display.step_start = original_step_start; - - display.set_active_step(ProvisioningStep::StartingSandbox); - - assert_eq!(display.active_step, Some(ProvisioningStep::StartingSandbox)); - assert!(display.active_detail.is_empty()); - assert!(display.step_start > original_step_start); - display.clear(); - } - #[test] fn refresh_status_table_includes_operational_fields() { let header = refresh_status_header(); @@ -7335,7 +7492,7 @@ mod tests { }], ..Default::default() }; - assert!(!provider_profile_allows_refresh_bootstrap( + assert!(provider_profile_allows_refresh_bootstrap( &optional_refresh_profile )); } @@ -7728,7 +7885,9 @@ mod tests { } fn init_git_repo(path: &Path) { - let status = Command::new("git") + let mut command = Command::new("git"); + super::scrub_git_env(&mut command); + let status = command .args(["init"]) .current_dir(path) .status() @@ -7777,29 +7936,30 @@ mod tests { assert_eq!(files, vec!["file.txt", "inner/child.txt"]); } - #[cfg(unix)] #[test] - fn local_upload_path_helpers_accept_symlinks() { + fn git_sync_files_ignores_inherited_git_env() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); let tmpdir = tempfile::tempdir().expect("create tmpdir"); - let target = tmpdir.path().join("target.txt"); - let link = tmpdir.path().join("link.txt"); - fs::write(&target, "target").expect("write target"); - std::os::unix::fs::symlink("target.txt", &link).expect("create symlink"); + let repo = tmpdir.path().join("repo"); + fs::create_dir_all(repo.join("nested")).expect("create repo"); + init_git_repo(&repo); - assert!(local_upload_path_exists(&link)); - assert!(local_upload_path_is_symlink(&link)); - } + fs::write(repo.join("nested/file.txt"), "file").expect("write file.txt"); + fs::write(repo.join("top.txt"), "top").expect("write top.txt"); - #[cfg(unix)] - #[test] - fn local_upload_path_helpers_accept_dangling_symlinks() { - let tmpdir = tempfile::tempdir().expect("create tmpdir"); - let link = tmpdir.path().join("dangling-link.txt"); - std::os::unix::fs::symlink("missing.txt", &link).expect("create symlink"); + let _git_dir = EnvVarGuard::set("GIT_DIR", "/tmp/not-the-test-repo/.git"); + let _git_work_tree = EnvVarGuard::set("GIT_WORK_TREE", "/tmp/not-the-test-repo"); + + let result = git_sync_files(&repo.join("nested")); + let (base_dir, files) = result.expect("git_sync_files should succeed"); - assert!(local_upload_path_exists(&link)); - assert!(local_upload_path_is_symlink(&link)); - assert!(!link.exists(), "std::path::Path::exists follows symlinks"); + assert_eq!( + base_dir, + fs::canonicalize(repo.join("nested")).expect("canonicalize nested path") + ); + assert_eq!(files, vec!["file.txt"]); } #[test] @@ -8162,4 +8322,150 @@ mod tests { "host.example.test:443 [L7 rest, allow PUT /v1/example/resource, deny DELETE /v1/example/resource]" ); } + + #[test] + fn read_gcloud_adc_missing_file_errors() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _guard = EnvVarGuard::set( + "GOOGLE_APPLICATION_CREDENTIALS", + "/nonexistent/path/to/adc.json", + ); + let err = super::read_gcloud_adc().expect_err("missing file should error"); + assert!( + err.to_string().contains("failed to read gcloud ADC file"), + "unexpected error: {err}" + ); + } + + #[test] + fn read_gcloud_adc_wrong_type_errors() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let tmp = tempfile::NamedTempFile::new().expect("tempfile"); + let json = serde_json::json!({ + "type": "service_account", + "project_id": "my-project", + "private_key_id": "key123" + }); + Write::write_all(&mut tmp.as_file(), json.to_string().as_bytes()).expect("write tempfile"); + let _guard = EnvVarGuard::set( + "GOOGLE_APPLICATION_CREDENTIALS", + tmp.path().to_str().expect("tempfile path"), + ); + let err = super::read_gcloud_adc().expect_err("wrong type should error"); + // The service_account type gets a targeted message directing the user + // to the real Vertex service-account credential flow instead of the + // generic authorized_user hint. + assert!( + err.to_string() + .contains("GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN"), + "error should mention the service-account token key, got: {err}" + ); + } + + #[test] + fn read_gcloud_adc_parses_user_creds() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let tmp = tempfile::NamedTempFile::new().expect("tempfile"); + let json = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "test-refresh-token" + }); + Write::write_all(&mut tmp.as_file(), json.to_string().as_bytes()).expect("write tempfile"); + let _guard = EnvVarGuard::set( + "GOOGLE_APPLICATION_CREDENTIALS", + tmp.path().to_str().expect("tempfile path"), + ); + let (client_id, client_secret, refresh_token) = + super::read_gcloud_adc().expect("valid ADC should parse"); + assert_eq!(client_id, "test-client-id.apps.googleusercontent.com"); + assert_eq!(client_secret, "test-client-secret"); + assert_eq!(refresh_token, "test-refresh-token"); + } + + #[test] + fn read_gcloud_adc_uses_cloudsdk_config_fallback() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let dir = tempfile::tempdir().expect("tempdir"); + let adc_path = dir.path().join("application_default_credentials.json"); + let json = serde_json::json!({ + "type": "authorized_user", + "client_id": "cloudsdk-client-id.apps.googleusercontent.com", + "client_secret": "cloudsdk-client-secret", + "refresh_token": "cloudsdk-refresh-token" + }); + fs::write(&adc_path, json.to_string()).expect("write adc file"); + let _adc_guard = EnvVarGuard::unset("GOOGLE_APPLICATION_CREDENTIALS"); + let _cloudsdk_guard = + EnvVarGuard::set("CLOUDSDK_CONFIG", dir.path().to_str().expect("config path")); + + let (client_id, client_secret, refresh_token) = + super::read_gcloud_adc().expect("valid CLOUDSDK_CONFIG ADC should parse"); + assert_eq!(client_id, "cloudsdk-client-id.apps.googleusercontent.com"); + assert_eq!(client_secret, "cloudsdk-client-secret"); + assert_eq!(refresh_token, "cloudsdk-refresh-token"); + } + + #[test] + fn read_gcloud_adc_malformed_json_errors() { + let _lock = TEST_ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let tmp = tempfile::NamedTempFile::new().expect("tempfile"); + Write::write_all(&mut tmp.as_file(), b"not valid json at all {{{{") + .expect("write tempfile"); + let _guard = EnvVarGuard::set( + "GOOGLE_APPLICATION_CREDENTIALS", + tmp.path().to_str().expect("tempfile path"), + ); + let result = super::read_gcloud_adc(); + assert!( + result.is_err(), + "malformed JSON should produce an error, got: {result:?}" + ); + let err = result.unwrap_err(); + let msg = format!("{err}"); + assert!( + msg.contains("parse") + || msg.contains("JSON") + || msg.contains("json") + || msg.contains("invalid") + || msg.contains("failed"), + "error message should mention parse/JSON failure, got: {msg}" + ); + } + + #[test] + fn refresh_bootstrap_allows_oauth2_refresh_token() { + use openshell_core::proto::{ + ProviderCredentialRefresh, ProviderCredentialRefreshStrategy, ProviderProfile, + ProviderProfileCredential, + }; + + let strategy = ProviderCredentialRefreshStrategy::Oauth2RefreshToken as i32; + let profile = ProviderProfile { + credentials: vec![ProviderProfileCredential { + required: true, + refresh: Some(ProviderCredentialRefresh { + strategy, + ..Default::default() + }), + ..Default::default() + }], + ..Default::default() + }; + assert!( + provider_profile_allows_refresh_bootstrap(&profile), + "Oauth2RefreshToken should be allowed for refresh bootstrap" + ); + } } diff --git a/crates/openshell-cli/tests/provider_commands_integration.rs b/crates/openshell-cli/tests/provider_commands_integration.rs index 090097a20..759bc99bb 100644 --- a/crates/openshell-cli/tests/provider_commands_integration.rs +++ b/crates/openshell-cli/tests/provider_commands_integration.rs @@ -44,6 +44,10 @@ struct ProviderState { profiles: Arc>>, refresh_statuses: Arc>>, refresh_requests: Arc>>, + delete_provider_requests: Arc>>, + fail_configure_refresh_message: Arc>>, + fail_rotate_refresh_message: Arc>>, + fail_delete_provider_message: Arc>>, sandbox_providers: Arc>>>, sandbox_provider_requests: Arc>>, global_settings: Arc>>, @@ -338,6 +342,28 @@ impl OpenShell for TestOpenShell { .into_inner() .provider .ok_or_else(|| Status::invalid_argument("provider is required"))?; + if provider.credentials.is_empty() { + let bootstrap_allowed = + if let Some(profile) = openshell_providers::get_default_profile(&provider.r#type) { + profile.allows_gateway_refresh_bootstrap() + } else { + self.state + .profiles + .lock() + .await + .get(&provider.r#type) + .cloned() + .is_some_and(|profile| { + openshell_providers::ProviderTypeProfile::from_proto(&profile) + .allows_gateway_refresh_bootstrap() + }) + }; + if !bootstrap_allowed { + return Err(Status::invalid_argument( + "provider.credentials must not be empty", + )); + } + } let mut providers = self.state.providers.lock().await; let provider_name = provider.object_name().to_string(); if providers.contains_key(&provider_name) { @@ -569,6 +595,15 @@ impl OpenShell for TestOpenShell { credential_key: request.credential_key.clone(), expires_at_ms: request.expires_at_ms, }); + let configure_failure = self + .state + .fail_configure_refresh_message + .lock() + .await + .take(); + if let Some(message) = configure_failure { + return Err(Status::internal(message)); + } let providers = self.state.providers.lock().await; let provider = providers .get(&request.provider) @@ -602,21 +637,42 @@ impl OpenShell for TestOpenShell { request: tonic::Request, ) -> Result, Status> { let request = request.into_inner(); + let provider_name = request.provider.clone(); + let credential_key = request.credential_key.clone(); self.state .refresh_requests .lock() .await .push(ProviderRefreshRequestLog::Rotate { - provider_name: request.provider.clone(), - credential_key: request.credential_key.clone(), + provider_name: provider_name.clone(), + credential_key: credential_key.clone(), }); + let rotate_failure = self.state.fail_rotate_refresh_message.lock().await.take(); + if let Some(message) = rotate_failure { + return Err(Status::internal(message)); + } let mut refresh_statuses = self.state.refresh_statuses.lock().await; let status = refresh_statuses - .get_mut(&(request.provider, request.credential_key)) + .get_mut(&(provider_name.clone(), credential_key.clone())) .ok_or_else(|| Status::not_found("provider refresh state not found"))?; - status.status = "rotation_requested".to_string(); + status.status = "refreshed".to_string(); + status.last_refresh_at_ms = 1; + status.next_refresh_at_ms = 3_600_000; + status.expires_at_ms = 3_600_000; + let status = status.clone(); + drop(refresh_statuses); + let mut providers = self.state.providers.lock().await; + let provider = providers + .get_mut(&provider_name) + .ok_or_else(|| Status::not_found("provider not found"))?; + provider + .credentials + .insert(credential_key.clone(), format!("minted-{credential_key}")); + provider + .credential_expires_at_ms + .insert(credential_key, 3_600_000); Ok(Response::new(RotateProviderCredentialResponse { - status: Some(status.clone()), + status: Some(status), })) } @@ -648,6 +704,15 @@ impl OpenShell for TestOpenShell { request: tonic::Request, ) -> Result, Status> { let name = request.into_inner().name; + self.state + .delete_provider_requests + .lock() + .await + .push(name.clone()); + let delete_failure = self.state.fail_delete_provider_message.lock().await.take(); + if let Some(message) = delete_failure { + return Err(Status::internal(message)); + } let deleted = self.state.providers.lock().await.remove(&name).is_some(); Ok(Response::new(DeleteProviderResponse { deleted })) } @@ -924,6 +989,7 @@ async fn provider_cli_run_functions_support_full_crud_flow() { "claude", false, &["API_KEY=abc".to_string()], + false, &["profile=dev".to_string()], &ts.tls, ) @@ -973,6 +1039,7 @@ async fn provider_refresh_cli_run_functions_wire_requests() { "outlook", false, &["MS_GRAPH_ACCESS_TOKEN=token".to_string()], + false, &[], &ts.tls, ) @@ -1060,6 +1127,7 @@ async fn provider_create_allows_empty_credentials_for_gateway_refresh_profiles() "custom-refresh", false, &[], + false, &[], &ts.tls, ) @@ -1082,6 +1150,7 @@ async fn sandbox_provider_cli_run_functions_wire_requests_and_idempotent_results "github", false, &["GITHUB_TOKEN=ghp-test".to_string()], + false, &[], &ts.tls, ) @@ -1200,6 +1269,7 @@ binaries: [/usr/bin/custom] "custom-api", false, &["CUSTOM_API_KEY=abc".to_string()], + false, &[], &ts.tls, ) @@ -1253,6 +1323,7 @@ async fn provider_create_from_existing_uses_profile_discovery_when_v2_enabled() "custom-discovery", true, &[], + false, &[], &ts.tls, ) @@ -1285,6 +1356,7 @@ async fn provider_create_from_existing_uses_registry_discovery_when_v2_disabled( "openai", true, &[], + false, &[], &ts.tls, ) @@ -1307,21 +1379,94 @@ async fn provider_create_from_existing_uses_registry_discovery_when_v2_disabled( } #[tokio::test] -async fn provider_create_from_existing_requires_profile_when_v2_enabled() { +async fn provider_create_from_existing_vertex_discovers_credentials_and_config_when_v2_enabled() { let ts = run_server().await; enable_providers_v2(&ts).await; - let _env = EnvVarGuard::set(&[("OPENAI_API_KEY", "legacy-openai-secret")]); + let _env = EnvVarGuard::set(&[ + ("VERTEX_AI_TOKEN", "ya29.vertex-v2-fallback"), + ("VERTEX_AI_PROJECT_ID", "vertex-v2-project"), + ("VERTEX_AI_REGION", "europe-west4"), + ( + "GOOGLE_VERTEX_AI_BASE_URL", + "https://aiplatform.googleapis.com/v1beta1/projects/vertex-v2-project/locations/global/endpoints/openapi", + ), + ("VERTEX_AI_PUBLISHER", "anthropic"), + ]); + + run::provider_create( + &ts.endpoint, + "vertex-v2-discovered", + "google-vertex-ai", + true, + &[], + false, + &[], + &ts.tls, + ) + .await + .expect("vertex provider create --from-existing with v2 enabled"); - let err = run::provider_create(&ts.endpoint, "v2-openai", "openai", true, &[], &[], &ts.tls) + let provider = ts + .state + .providers + .lock() .await - .expect_err("v2 discovery without a profile should fail"); + .get("vertex-v2-discovered") + .cloned() + .expect("vertex provider should be stored"); + assert_eq!(provider.r#type, "google-vertex-ai"); + assert_eq!( + provider.credentials.get("VERTEX_AI_TOKEN"), + Some(&"ya29.vertex-v2-fallback".to_string()) + ); + assert_eq!( + provider.config.get("VERTEX_AI_PROJECT_ID"), + Some(&"vertex-v2-project".to_string()) + ); + assert_eq!( + provider.config.get("VERTEX_AI_REGION"), + Some(&"europe-west4".to_string()) + ); + assert_eq!( + provider.config.get("GOOGLE_VERTEX_AI_BASE_URL"), + Some( + &"https://aiplatform.googleapis.com/v1beta1/projects/vertex-v2-project/locations/global/endpoints/openapi" + .to_string() + ) + ); + assert_eq!( + provider.config.get("VERTEX_AI_PUBLISHER"), + Some(&"anthropic".to_string()) + ); +} + +#[tokio::test] +async fn provider_create_from_existing_requires_profile_when_v2_enabled() { + let ts = run_server().await; + enable_providers_v2(&ts).await; + // Use "generic" which is a normalised type but has no built-in provider + // profile, so v2 profile-based discovery fails with the expected message. + let _env = EnvVarGuard::set(&[("GENERIC_API_KEY", "some-secret")]); + + let err = run::provider_create( + &ts.endpoint, + "v2-generic", + "generic", + true, + &[], + false, + &[], + &ts.tls, + ) + .await + .expect_err("v2 discovery without a profile should fail"); assert!( err.to_string() .contains("providers v2 discovery requires a provider profile"), "unexpected error: {err}" ); - assert!(!ts.state.providers.lock().await.contains_key("v2-openai")); + assert!(!ts.state.providers.lock().await.contains_key("v2-generic")); } #[tokio::test] @@ -1352,6 +1497,7 @@ async fn provider_create_from_existing_fails_when_profile_discovery_finds_nothin "empty-discovery", true, &[], + false, &[], &ts.tls, ) @@ -1605,6 +1751,7 @@ async fn provider_create_rejects_key_only_credentials_without_local_env_value() "claude", false, &["INVALID_PAIR".to_string()], + false, &[], &ts.tls, ) @@ -1629,6 +1776,7 @@ async fn provider_create_supports_generic_type_and_env_lookup_credentials() { "generic", false, &["NAV_GENERIC_TEST_KEY".to_string()], + false, &[], &ts.tls, ) @@ -1663,6 +1811,7 @@ async fn provider_create_rejects_combined_from_existing_and_credentials() { "claude", true, &["API_KEY=abc".to_string()], + false, &[], &ts.tls, ) @@ -1676,6 +1825,56 @@ async fn provider_create_rejects_combined_from_existing_and_credentials() { ); } +#[tokio::test] +async fn provider_create_rejects_combined_from_gcloud_adc_and_from_existing() { + let ts = run_server().await; + + let err = run::provider_create( + &ts.endpoint, + "bad-vertex-provider", + "google-vertex-ai", + true, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("from-gcloud-adc and from-existing should be mutually exclusive"); + + assert!( + err.to_string() + .contains("--from-gcloud-adc cannot be combined with --from-existing or --credential"), + "unexpected error: {err}" + ); + assert!(ts.state.providers.lock().await.is_empty()); +} + +#[tokio::test] +async fn provider_create_rejects_combined_from_gcloud_adc_and_credentials() { + let ts = run_server().await; + + let err = run::provider_create( + &ts.endpoint, + "bad-vertex-provider", + "google-vertex-ai", + false, + &["GOOGLE_VERTEX_AI_TOKEN=token".to_string()], + true, + &[], + &ts.tls, + ) + .await + .expect_err("from-gcloud-adc and credentials should be mutually exclusive"); + + assert!( + err.to_string() + .contains("--from-gcloud-adc cannot be combined with --from-existing or --credential"), + "unexpected error: {err}" + ); + assert!(ts.state.providers.lock().await.is_empty()); +} + #[tokio::test] async fn provider_create_rejects_empty_env_var_for_key_only_credential() { let ts = run_server().await; @@ -1687,6 +1886,7 @@ async fn provider_create_rejects_empty_env_var_for_key_only_credential() { "generic", false, &["NAV_EMPTY_ENV_KEY".to_string()], + false, &[], &ts.tls, ) @@ -1711,6 +1911,7 @@ async fn provider_create_supports_nvidia_type_with_nvidia_api_key() { "nvidia", false, &["NVIDIA_API_KEY".to_string()], + false, &[], &ts.tls, ) @@ -1734,3 +1935,542 @@ async fn provider_create_supports_nvidia_type_with_nvidia_api_key() { Some(&"nvapi-live-test".to_string()) ); } + +// ── --from-gcloud-adc tests ─────────────────────────────────────────────────── + +#[tokio::test] +async fn provider_create_from_gcloud_adc_happy_path() { + let ts = run_server().await; + + // Write a temp ADC file simulating a valid authorized_user credential. + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + + // Point GOOGLE_APPLICATION_CREDENTIALS at the temp file so read_gcloud_adc + // picks it up without touching the real ~/.config/gcloud/ path. + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + run::provider_create( + &ts.endpoint, + "my-vertex", + "google-vertex-ai", + false, + &[], // no explicit credentials; refresh bootstrap covers it + true, // from_gcloud_adc + &[], + &ts.tls, + ) + .await + .expect("provider_create with --from-gcloud-adc should succeed"); + + // Provider must exist in the server state. + let providers = ts.state.providers.lock().await; + let provider = providers + .get("my-vertex") + .expect("provider should be stored after create"); + assert_eq!(provider.r#type, "google-vertex-ai"); + assert_eq!( + provider + .credentials + .get("GOOGLE_VERTEX_AI_TOKEN") + .map(String::as_str), + Some("minted-GOOGLE_VERTEX_AI_TOKEN"), + "initial rotate should materialize a usable access token" + ); + drop(providers); + + // ADC bootstrap must configure refresh and immediately mint the first token. + let requests = ts.state.refresh_requests.lock().await.clone(); + assert_eq!( + requests.len(), + 2, + "expected configure + rotate refresh requests" + ); + assert_eq!( + requests[0], + ProviderRefreshRequestLog::Configure { + provider_name: "my-vertex".to_string(), + credential_key: "GOOGLE_VERTEX_AI_TOKEN".to_string(), + expires_at_ms: None, + } + ); + assert_eq!( + requests[1], + ProviderRefreshRequestLog::Rotate { + provider_name: "my-vertex".to_string(), + credential_key: "GOOGLE_VERTEX_AI_TOKEN".to_string(), + } + ); + + // The refresh status must record the ADC material keys. + let refresh_statuses = ts.state.refresh_statuses.lock().await; + let status = refresh_statuses + .get(&( + "my-vertex".to_string(), + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + )) + .expect("refresh status should be stored"); + assert_eq!( + status.strategy, + ProviderCredentialRefreshStrategy::Oauth2RefreshToken as i32 + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_rejects_service_account() { + let ts = run_server().await; + + // Write a temp ADC file with type=service_account. + let adc_content = serde_json::json!({ + "type": "service_account", + "project_id": "my-project", + "private_key_id": "key-id", + "private_key": "-----BEGIN RSA PRIVATE KEY-----\n...", + "client_email": "sa@my-project.iam.gserviceaccount.com" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "my-vertex-sa", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("service_account ADC should be rejected"); + + assert!( + err.to_string() + .contains("GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN"), + "error should mention the service-account token key, got: {err}" + ); + + // create_provider must NOT have been called — no provider stored. + let providers = ts.state.providers.lock().await; + assert!( + providers.is_empty(), + "no provider should have been created on pre-flight failure" + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_missing_file() { + let ts = run_server().await; + + // Point to a path that does not exist. + let _guard = EnvVarGuard::set(&[( + "GOOGLE_APPLICATION_CREDENTIALS", + "/tmp/nonexistent-adc-file-openshell-test.json", + )]); + + let err = run::provider_create( + &ts.endpoint, + "my-vertex-missing", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("missing ADC file should produce an error"); + + // Error must mention the file path or the read failure. + let msg = err.to_string(); + assert!( + msg.contains("nonexistent-adc-file-openshell-test.json") + || msg.contains("failed to read gcloud ADC file"), + "error should reference the missing file, got: {msg}" + ); + + // create_provider must NOT have been called — no provider stored. + let providers = ts.state.providers.lock().await; + assert!( + providers.is_empty(), + "no provider should have been created on pre-flight failure" + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_rejects_wrong_provider_type_before_credential_check() { + let ts = run_server().await; + + let err = run::provider_create( + &ts.endpoint, + "my-openai-adc", + "openai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("wrong provider type should fail before generic credential validation"); + + assert!( + err.to_string() + .contains("--from-gcloud-adc is only valid for google-vertex-ai providers"), + "unexpected error: {err}" + ); + assert!(ts.state.providers.lock().await.is_empty()); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_rolls_back_provider_when_refresh_configure_fails() { + let ts = run_server().await; + *ts.state.fail_configure_refresh_message.lock().await = + Some("simulated configure failure".to_string()); + + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-rollback", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("configure_provider_refresh failure should bubble up"); + + assert!( + err.to_string().contains("simulated configure failure"), + "unexpected error: {err}" + ); + assert!( + !ts.state + .providers + .lock() + .await + .contains_key("vertex-rollback"), + "provider should be deleted on rollback" + ); + assert_eq!( + ts.state.delete_provider_requests.lock().await.clone(), + vec!["vertex-rollback".to_string()] + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_warn_path_keeps_provider_when_rollback_delete_fails() { + let ts = run_server().await; + *ts.state.fail_configure_refresh_message.lock().await = + Some("simulated configure failure".to_string()); + *ts.state.fail_delete_provider_message.lock().await = + Some("simulated delete failure".to_string()); + + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-cleanup-warning", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("cleanup failure path should still return configure error"); + + assert!( + err.to_string().contains("simulated configure failure"), + "unexpected error: {err}" + ); + assert!( + ts.state + .providers + .lock() + .await + .contains_key("vertex-cleanup-warning"), + "provider should remain when rollback deletion fails" + ); + assert_eq!( + ts.state.delete_provider_requests.lock().await.clone(), + vec!["vertex-cleanup-warning".to_string()] + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_rolls_back_provider_when_initial_rotate_fails() { + let ts = run_server().await; + *ts.state.fail_rotate_refresh_message.lock().await = + Some("simulated rotate failure".to_string()); + + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-rotate-rollback", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("initial rotate failure should roll back the provider"); + + assert!( + err.to_string().contains("simulated rotate failure"), + "unexpected error: {err}" + ); + assert!( + !ts.state + .providers + .lock() + .await + .contains_key("vertex-rotate-rollback"), + "provider should be deleted on initial-rotate rollback" + ); + assert_eq!( + ts.state.delete_provider_requests.lock().await.clone(), + vec!["vertex-rotate-rollback".to_string()] + ); +} + +#[tokio::test] +async fn provider_create_from_existing_vertex_config_only_reports_missing_vertex_credentials() { + let ts = run_server().await; + enable_providers_v2(&ts).await; + let _env = EnvVarGuard::set(&[ + ("VERTEX_AI_PROJECT_ID", "vertex-config-only-project"), + ("VERTEX_AI_REGION", "us-central1"), + ]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-config-only", + "google-vertex-ai", + true, + &[], + false, + &[], + &ts.tls, + ) + .await + .expect_err("config-only discovery should surface missing credential guidance"); + + let msg = err.to_string(); + assert!( + msg.contains("GOOGLE_VERTEX_AI_TOKEN") && msg.contains("VERTEX_AI_SERVICE_ACCOUNT_TOKEN"), + "unexpected error: {msg}" + ); + assert!( + !ts.state + .providers + .lock() + .await + .contains_key("vertex-config-only") + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_with_config_keys() { + let ts = run_server().await; + + // Write a valid authorized_user ADC file. + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + run::provider_create( + &ts.endpoint, + "vertex-with-config", + "google-vertex-ai", + false, + &[], // no explicit credentials; ADC flow + true, // from_gcloud_adc + &[ + "VERTEX_AI_PROJECT_ID=my-gcp-project".to_string(), + "VERTEX_AI_REGION=us-east1".to_string(), + ], + &ts.tls, + ) + .await + .expect("provider_create with --from-gcloud-adc and --config keys should succeed"); + + // Verify provider was created with the config keys. + let providers = ts.state.providers.lock().await; + let provider = providers + .get("vertex-with-config") + .expect("provider should be stored after create"); + assert_eq!(provider.r#type, "google-vertex-ai"); + assert_eq!( + provider + .config + .get("VERTEX_AI_PROJECT_ID") + .map(String::as_str), + Some("my-gcp-project"), + "VERTEX_AI_PROJECT_ID must be stored in provider config" + ); + assert_eq!( + provider.config.get("VERTEX_AI_REGION").map(String::as_str), + Some("us-east1"), + "VERTEX_AI_REGION must be stored in provider config" + ); + drop(providers); + + // ADC flow should configure refresh and eagerly mint the initial token. + let refresh_requests = ts.state.refresh_requests.lock().await.clone(); + assert_eq!( + refresh_requests.len(), + 2, + "exactly one configure call and one rotate call expected" + ); + assert_eq!( + refresh_requests[0], + ProviderRefreshRequestLog::Configure { + provider_name: "vertex-with-config".to_string(), + credential_key: "GOOGLE_VERTEX_AI_TOKEN".to_string(), + expires_at_ms: None, + } + ); + assert_eq!( + refresh_requests[1], + ProviderRefreshRequestLog::Rotate { + provider_name: "vertex-with-config".to_string(), + credential_key: "GOOGLE_VERTEX_AI_TOKEN".to_string(), + } + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_missing_refresh_token() { + let ts = run_server().await; + + // ADC file is valid authorized_user type but missing refresh_token. + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "client_secret": "test-client-secret" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-missing-refresh", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("missing refresh_token should produce an error"); + + let err_msg = err.to_string(); + assert!( + err_msg.contains("refresh_token"), + "error must mention 'refresh_token', got: {err_msg}" + ); + + // No provider should have been created. + let providers = ts.state.providers.lock().await; + assert!( + providers.is_empty(), + "no provider must be created when ADC validation fails" + ); +} + +#[tokio::test] +async fn provider_create_from_gcloud_adc_missing_client_secret() { + let ts = run_server().await; + + // ADC file is valid authorized_user type but missing client_secret. + let adc_content = serde_json::json!({ + "type": "authorized_user", + "client_id": "test-client-id.apps.googleusercontent.com", + "refresh_token": "1//test-refresh-token" + }); + let adc_file = tempfile::NamedTempFile::new().unwrap(); + serde_json::to_writer(&adc_file, &adc_content).unwrap(); + let adc_path = adc_file.path().to_str().unwrap().to_string(); + let _guard = EnvVarGuard::set(&[("GOOGLE_APPLICATION_CREDENTIALS", &adc_path)]); + + let err = run::provider_create( + &ts.endpoint, + "vertex-missing-secret", + "google-vertex-ai", + false, + &[], + true, + &[], + &ts.tls, + ) + .await + .expect_err("missing client_secret should produce an error"); + + let err_msg = err.to_string(); + assert!( + err_msg.contains("client_secret"), + "error must mention 'client_secret', got: {err_msg}" + ); + + // No provider should have been created. + let providers = ts.state.providers.lock().await; + assert!( + providers.is_empty(), + "no provider must be created when ADC validation fails" + ); +} diff --git a/crates/openshell-core/src/inference.rs b/crates/openshell-core/src/inference.rs index 0360cae5c..c04feb6b4 100644 --- a/crates/openshell-core/src/inference.rs +++ b/crates/openshell-core/src/inference.rs @@ -61,6 +61,13 @@ const OPENAI_PROTOCOLS: &[&str] = &[ const ANTHROPIC_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; +/// Default protocol set for the Vertex AI profile. These are overridden at route +/// resolution time in `resolve_vertex_ai_route`: Anthropic models use +/// `anthropic_messages`, while Gemini and other models use the OpenAI-compatible +/// endpoint with `openai_chat_completions`. This default applies only to the +/// base-URL-override escape hatch path. +const VERTEX_AI_PROTOCOLS: &[&str] = &["anthropic_messages", "model_discovery"]; + static OPENAI_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "openai", default_base_url: "https://api.openai.com/v1", @@ -83,6 +90,59 @@ static ANTHROPIC_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["anthropic-version", "anthropic-beta"], }; +/// Credential environment variable names for the Vertex AI provider, in priority order. +/// +/// These are referenced by both the provider discovery logic in `openshell-providers` +/// and the inference profile here so both crates agree on which env vars hold credentials. +pub const VERTEX_AI_CREDENTIAL_KEY_NAMES: &[&str] = &[ + "GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN", + "VERTEX_AI_SERVICE_ACCOUNT_TOKEN", + "GOOGLE_VERTEX_AI_TOKEN", + "VERTEX_AI_TOKEN", +]; + +/// The credential key used for tokens minted from gcloud Application Default Credentials. +/// +/// This is the key written by the gateway's `OAuth2` refresh worker when using the +/// `--from-gcloud-adc` CLI flow. It must match `VERTEX_AI_CREDENTIAL_KEY_NAMES[2]`. +pub const VERTEX_AI_ADC_TOKEN_KEY: &str = "GOOGLE_VERTEX_AI_TOKEN"; + +/// GCP project ID config key for Vertex AI providers. +pub const VERTEX_AI_PROJECT_ID_KEY: &str = "VERTEX_AI_PROJECT_ID"; + +/// GCP region/location config key for Vertex AI providers. +pub const VERTEX_AI_REGION_KEY: &str = "VERTEX_AI_REGION"; + +/// Publisher override config key for Vertex AI providers. +/// +/// Set to `"anthropic"` to force Anthropic Messages API routing regardless of model name, +/// or any other value to force OpenAI-compatible routing. +pub const VERTEX_AI_PUBLISHER_KEY: &str = "VERTEX_AI_PUBLISHER"; + +/// Config key names scanned during provider discovery, in addition to credential keys. +/// +/// These are referenced by the provider discovery plugin in `openshell-providers` to +/// collect Vertex AI config from the environment during `--from-existing` flows. +pub const VERTEX_AI_CONFIG_KEY_NAMES: &[&str] = &[ + VERTEX_AI_PROJECT_ID_KEY, + VERTEX_AI_REGION_KEY, + "GOOGLE_VERTEX_AI_BASE_URL", + "VERTEX_AI_BASE_URL", + VERTEX_AI_PUBLISHER_KEY, +]; + +static VERTEX_AI_PROFILE: InferenceProviderProfile = InferenceProviderProfile { + provider_type: "google-vertex-ai", + // Base URL is project/region specific and built at route resolution time. + default_base_url: "", + protocols: VERTEX_AI_PROTOCOLS, + credential_key_names: VERTEX_AI_CREDENTIAL_KEY_NAMES, + base_url_config_keys: &["GOOGLE_VERTEX_AI_BASE_URL", "VERTEX_AI_BASE_URL"], + auth: AuthHeader::Bearer, + default_headers: &[], + passthrough_headers: &[], +}; + static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { provider_type: "nvidia", default_base_url: "https://integrate.api.nvidia.com/v1", @@ -94,15 +154,35 @@ static NVIDIA_PROFILE: InferenceProviderProfile = InferenceProviderProfile { passthrough_headers: &["x-model-id"], }; +/// Canonicalize an inference provider type string to a well-known identifier. +/// +/// Returns `Some(canonical_name)` for recognized inference providers, +/// `None` for unrecognized inputs. This is the single source of truth for +/// Vertex AI (and other inference provider) alias resolution so that both +/// [`profile_for`] and `openshell-providers` normalization agree. +#[must_use] +pub fn normalize_inference_provider_type(input: &str) -> Option<&'static str> { + match input.trim().to_ascii_lowercase().as_str() { + "openai" => Some("openai"), + "anthropic" => Some("anthropic"), + "nvidia" => Some("nvidia"), + "google-vertex-ai" | "vertex" | "vertex-ai" | "google-vertex" | "gcp-vertex" => { + Some("google-vertex-ai") + } + _ => None, + } +} + /// Look up the inference provider profile for a given provider type. /// /// Returns `None` for provider types that don't support inference routing /// (e.g. `github`, `gitlab`, `outlook`). pub fn profile_for(provider_type: &str) -> Option<&'static InferenceProviderProfile> { - match provider_type.trim().to_ascii_lowercase().as_str() { + match normalize_inference_provider_type(provider_type)? { "openai" => Some(&OPENAI_PROFILE), "anthropic" => Some(&ANTHROPIC_PROFILE), "nvidia" => Some(&NVIDIA_PROFILE), + "google-vertex-ai" => Some(&VERTEX_AI_PROFILE), _ => None, } } @@ -142,6 +222,28 @@ pub fn route_headers_for_provider_type( ) } +/// Derive routing header policy for a specific resolved route. +/// +/// Most providers only need their provider type. Vertex AI is special because +/// Claude routes should forward `anthropic-beta`, while Gemini/OpenAI-compatible +/// routes should not inherit Anthropic passthrough headers. +pub fn route_headers_for_route( + provider_type: &str, + protocols: &[String], +) -> (AuthHeader, Vec<(String, String)>, Vec) { + let (auth, headers, mut passthrough_headers) = route_headers_for_provider_type(provider_type); + if profile_for(provider_type).is_some_and(|profile| profile.provider_type == "google-vertex-ai") + { + let is_vertex_anthropic = protocols + .iter() + .any(|protocol| protocol == "anthropic_messages"); + if is_vertex_anthropic && !passthrough_headers.iter().any(|h| h == "anthropic-beta") { + passthrough_headers.push("anthropic-beta".to_string()); + } + } + (auth, headers, passthrough_headers) +} + // --------------------------------------------------------------------------- // Protocol normalization // --------------------------------------------------------------------------- @@ -250,6 +352,37 @@ mod tests { assert!(headers.is_empty()); } + #[test] + fn profile_for_vertex_types() { + for key in &["google-vertex-ai", "vertex", "vertex-ai"] { + let profile = profile_for(key).expect("vertex profile should be Some"); + assert_eq!(profile.provider_type, "google-vertex-ai"); + } + } + + #[test] + fn auth_for_vertex_uses_bearer() { + let (auth, headers) = auth_for_provider_type("google-vertex-ai"); + assert_eq!(auth, AuthHeader::Bearer); + assert!(headers.is_empty()); + } + + #[test] + fn route_headers_for_vertex_anthropic_route_forward_beta_only() { + let (_, headers, passthrough_headers) = + route_headers_for_route("google-vertex-ai", &["anthropic_messages".to_string()]); + assert!(headers.is_empty()); + assert_eq!(passthrough_headers, vec!["anthropic-beta".to_string()]); + } + + #[test] + fn route_headers_for_vertex_openai_route_do_not_forward_anthropic_headers() { + let (_, headers, passthrough_headers) = + route_headers_for_route("google-vertex-ai", &["openai_chat_completions".to_string()]); + assert!(headers.is_empty()); + assert!(passthrough_headers.is_empty()); + } + #[test] fn auth_for_unknown_defaults_to_bearer() { let (auth, headers) = auth_for_provider_type("unknown"); diff --git a/crates/openshell-providers/src/lib.rs b/crates/openshell-providers/src/lib.rs index 21a1750ab..1d0d5a192 100644 --- a/crates/openshell-providers/src/lib.rs +++ b/crates/openshell-providers/src/lib.rs @@ -168,6 +168,11 @@ impl ProviderRegistry { #[must_use] pub fn normalize_provider_type(input: &str) -> Option<&'static str> { + // Inference provider aliases are canonicalized in openshell-core so that + // openshell-server and openshell-providers agree on the same mapping. + if let Some(canonical) = openshell_core::inference::normalize_inference_provider_type(input) { + return Some(canonical); + } let normalized = input.trim().to_ascii_lowercase(); match normalized.as_str() { "claude" | "claude-code" | "claude_code" => Some("claude-code"), @@ -175,9 +180,6 @@ pub fn normalize_provider_type(input: &str) -> Option<&'static str> { "copilot" => Some("copilot"), "opencode" => Some("opencode"), "generic" => Some("generic"), - "openai" => Some("openai"), - "anthropic" => Some("anthropic"), - "nvidia" => Some("nvidia"), "gitlab" | "glab" => Some("gitlab"), "github" | "gh" => Some("github"), "outlook" => Some("outlook"), @@ -211,6 +213,15 @@ mod tests { assert_eq!(normalize_provider_type("anthropic"), Some("anthropic")); assert_eq!(normalize_provider_type("nvidia"), Some("nvidia")); assert_eq!(normalize_provider_type("copilot"), Some("copilot")); + assert_eq!( + normalize_provider_type("google-vertex-ai"), + Some("google-vertex-ai") + ); + assert_eq!(normalize_provider_type("vertex"), Some("google-vertex-ai")); + assert_eq!( + normalize_provider_type("vertex-ai"), + Some("google-vertex-ai") + ); assert_eq!(normalize_provider_type("unknown"), None); } diff --git a/crates/openshell-providers/src/profiles.rs b/crates/openshell-providers/src/profiles.rs index 25c750e63..d94778ce3 100644 --- a/crates/openshell-providers/src/profiles.rs +++ b/crates/openshell-providers/src/profiles.rs @@ -19,6 +19,7 @@ use std::sync::OnceLock; const BUILT_IN_PROFILE_YAMLS: &[&str] = &[ include_str!("../../../providers/claude-code.yaml"), include_str!("../../../providers/github.yaml"), + include_str!("../../../providers/google-vertex-ai.yaml"), include_str!("../../../providers/nvidia.yaml"), ]; @@ -306,6 +307,25 @@ impl ProviderTypeProfile { vars } + /// Whether this profile can be created without an initial access token because + /// the gateway can mint at least one credential immediately from refresh + /// material, and no required credential falls outside that gateway-mintable set. + #[must_use] + pub fn allows_gateway_refresh_bootstrap(&self) -> bool { + let mut has_gateway_mintable_credential = false; + for credential in &self.credentials { + let is_gateway_mintable = credential + .refresh + .as_ref() + .is_some_and(CredentialRefreshProfile::is_gateway_mintable); + if credential.required && !is_gateway_mintable { + return false; + } + has_gateway_mintable_credential |= is_gateway_mintable; + } + has_gateway_mintable_credential + } + #[must_use] pub fn to_proto(&self) -> ProviderProfile { ProviderProfile { @@ -345,6 +365,18 @@ impl ProviderTypeProfile { } } +impl CredentialRefreshProfile { + #[must_use] + pub fn is_gateway_mintable(&self) -> bool { + matches!( + self.strategy, + ProviderCredentialRefreshStrategy::Oauth2RefreshToken + | ProviderCredentialRefreshStrategy::Oauth2ClientCredentials + | ProviderCredentialRefreshStrategy::GoogleServiceAccountJwt + ) + } +} + fn discovery_is_empty(discovery: &DiscoveryProfile) -> bool { discovery.credentials.is_empty() } @@ -1148,6 +1180,89 @@ mod tests { ); } + #[test] + fn vertex_profile_declares_discovery_and_fallback_token_env_vars() { + let profile = get_default_profile("google-vertex-ai").expect("vertex profile"); + let service_account_token = profile + .credentials + .iter() + .find(|credential| credential.name == "service_account_token") + .expect("vertex service-account token credential"); + let adc_credential = profile + .credentials + .iter() + .find(|credential| credential.name == "gcloud_adc_token") + .expect("vertex ADC credential"); + + assert_eq!( + service_account_token.env_vars, + vec![ + "GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN".to_string(), + "VERTEX_AI_SERVICE_ACCOUNT_TOKEN".to_string() + ] + ); + assert_eq!( + adc_credential.env_vars, + vec![ + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "VERTEX_AI_TOKEN".to_string() + ] + ); + assert_eq!( + profile.discovery.credentials, + vec!["service_account_token", "gcloud_adc_token"] + ); + assert!( + profile.allows_gateway_refresh_bootstrap(), + "Vertex profile should allow empty-create bootstrap via gateway-mintable credentials" + ); + } + + #[test] + fn refresh_bootstrap_requires_a_gateway_mintable_path_and_no_required_static_credentials() { + let optional_refresh_profile = parse_profile_yaml( + r" +id: optional-refresh +display_name: Optional Refresh +credentials: + - name: access_token + required: false + refresh: + strategy: oauth2_refresh_token +", + ) + .expect("profile"); + assert!(optional_refresh_profile.allows_gateway_refresh_bootstrap()); + + let mixed_required_profile = parse_profile_yaml( + r" +id: mixed-required +display_name: Mixed Required +credentials: + - name: access_token + required: true + refresh: + strategy: oauth2_client_credentials + - name: static_key + required: true +", + ) + .expect("profile"); + assert!(!mixed_required_profile.allows_gateway_refresh_bootstrap()); + + let static_only_profile = parse_profile_yaml( + r" +id: static-only +display_name: Static Only +credentials: + - name: api_key + required: false +", + ) + .expect("profile"); + assert!(!static_only_profile.allows_gateway_refresh_bootstrap()); + } + #[test] fn parse_profile_yaml_reads_single_provider_document() { let profile = parse_profile_yaml( diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index 88a6e213a..ee084040c 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -63,6 +63,13 @@ enum StreamingBody { Buffered(Option), } +/// The `anthropic_version` value required by Vertex AI's rawPredict endpoint for +/// Anthropic Claude models. Google publishes this version string; update here if +/// the Vertex AI Anthropic API version changes. +/// +/// See: +const VERTEX_ANTHROPIC_VERSION: &str = "vertex-2023-10-16"; + const COMMON_INFERENCE_REQUEST_HEADERS: [&str; 4] = ["content-type", "accept", "accept-encoding", "user-agent"]; @@ -110,6 +117,13 @@ fn sanitize_request_headers( .map(|(name, _)| name.to_ascii_lowercase()), ); + // Vertex AI Anthropic rawPredict endpoints do not accept the + // `anthropic-beta` header. Beta feature enablement for Vertex AI is + // controlled through Google Cloud, not HTTP headers. Strip it here so + // clients (e.g. Claude Code) that always send beta flags don't cause + // HTTP 400 errors from the Vertex AI backend. + let strip_anthropic_beta = is_vertex_anthropic_rawpredict_route(route); + headers .iter() .filter_map(|(name, value)| { @@ -117,6 +131,9 @@ fn sanitize_request_headers( if should_strip_request_header(&name_lc) || !allowed.contains(&name_lc) { return None; } + if strip_anthropic_beta && name_lc == "anthropic-beta" { + return None; + } Some((name.clone(), value.clone())) }) .collect() @@ -156,8 +173,9 @@ fn prepare_backend_request( path: &str, headers: &[(String, String)], body: bytes::Bytes, + stream_response: bool, ) -> Result<(reqwest::RequestBuilder, String), RouterError> { - let url = build_backend_url(&route.endpoint, path); + let url = build_provider_url(route, &route.model, path, stream_response); let headers = sanitize_request_headers(route, headers); let reqwest_method: reqwest::Method = method @@ -188,20 +206,45 @@ fn prepare_backend_request( } } - // Set the "model" field in the JSON body to the route's configured model so the - // backend receives the correct model ID regardless of what the client sent. - let body = match serde_json::from_slice::(&body) { - Ok(mut json) => { - if let Some(obj) = json.as_object_mut() { + // Rewrite the JSON body for backend compatibility: + // - Standard routes: set "model" to the route's configured model so the + // backend receives the correct model ID regardless of what the client sent. + // - Vertex AI rawPredict routes: remove "model" (it is encoded in the URL + // path) and inject "anthropic_version" (required in the body, not a header). + // Non-JSON bodies pass through unchanged; model rewrite and version injection + // are silently skipped. Such bodies would be rejected by the upstream anyway. + let body = serde_json::from_slice::(&body).map_or(body, |mut json| { + if let Some(obj) = json.as_object_mut() { + // Vertex AI Anthropic endpoints require anthropic_version in the body. + // Standard Anthropic SDK sends it as a header; Vertex AI needs it as a body field. + // We inject it only for the Vertex rawPredict-style route contract used for + // Anthropic publisher endpoints, not for arbitrary model-in-path routes. + let needs_vertex_anthropic_version = is_vertex_anthropic_rawpredict_route(route); + if needs_vertex_anthropic_version { + // Vertex AI rawPredict encodes the model in the URL path, not + // the request body. Clients using the standard Anthropic API + // (e.g. Claude Code via inference.local) always send "model" + // in the body; strip it so Vertex AI does not reject the + // request with "Extra inputs are not permitted". + obj.remove("model"); + } else { obj.insert( "model".to_string(), serde_json::Value::String(route.model.clone()), ); } - bytes::Bytes::from(serde_json::to_vec(&json).unwrap_or_else(|_| body.to_vec())) + if needs_vertex_anthropic_version && !obj.contains_key("anthropic_version") { + obj.insert( + "anthropic_version".to_string(), + serde_json::Value::String(VERTEX_ANTHROPIC_VERSION.to_string()), + ); + } } - Err(_) => body, - }; + bytes::Bytes::from( + serde_json::to_vec(&json) + .expect("re-serializing a valid serde_json::Value cannot fail"), + ) + }); builder = builder.body(body); Ok((builder, url)) @@ -230,7 +273,8 @@ async fn send_backend_request( headers: Vec<(String, String)>, body: bytes::Bytes, ) -> Result { - let (builder, url) = prepare_backend_request(client, route, method, path, &headers, body)?; + let (builder, url) = + prepare_backend_request(client, route, method, path, &headers, body, false)?; builder .timeout(route.timeout) .send() @@ -251,7 +295,8 @@ async fn send_backend_request_streaming( headers: Vec<(String, String)>, body: bytes::Bytes, ) -> Result { - let (builder, url) = prepare_backend_request(client, route, method, path, &headers, body)?; + let (builder, url) = + prepare_backend_request(client, route, method, path, &headers, body, true)?; builder.send().await.map_err(|e| map_send_error(e, &url)) } @@ -334,7 +379,7 @@ pub async fn verify_backend_endpoint( if mock::is_mock_route(route) { return Ok(ValidatedEndpoint { - url: build_backend_url(&route.endpoint, probe.path), + url: build_provider_url(route, &route.model, probe.path, false), protocol: probe.protocol.to_string(), }); } @@ -399,7 +444,7 @@ async fn try_validation_request( details, }, })?; - let url = build_backend_url(&route.endpoint, path); + let url = build_provider_url(route, &route.model, path, false); if response.status().is_success() { return Ok(ValidatedEndpoint { @@ -512,6 +557,64 @@ pub async fn proxy_to_backend_streaming( }) } +/// Build the upstream URL for a provider route. +/// +/// Behavior matrix (`request_path_override`, `model_in_path`): +/// - `(Some(suffix), true)`: `{endpoint}/{model_id}{suffix}` +/// Used by Vertex AI Anthropic: buffered requests keep `:rawPredict`, while +/// streaming requests upgrade the rawPredict suffix to `:streamRawPredict`. +/// - `(Some(override_path), false)`: `{endpoint}{override_path}` +/// Used when a fixed path replaces the protocol-derived path. +/// - `(None, true)`: `{endpoint}/{model_id}/{protocol_path}` +/// Model embedded before protocol path. +/// - `(None, false)`: delegates to `build_backend_url` (default, with /v1 dedup). +fn build_provider_url( + route: &ResolvedRoute, + model_id: &str, + protocol_path: &str, + stream_response: bool, +) -> String { + let base = route.endpoint.trim_end_matches('/'); + match (&route.request_path_override, route.model_in_path) { + // Vertex AI publisher endpoint: model in URL path with suffix + // e.g. .../publishers/anthropic/models/claude-3-5-sonnet@20241022:rawPredict + (Some(suffix), true) => { + // suffix is appended directly after model_id (e.g. ":rawPredict"). + // It must not start with '/' — use the (Some, false) arm for path overrides. + debug_assert!( + !suffix.starts_with('/'), + "suffix in model_in_path branch must not start with '/'; got: {suffix:?}" + ); + let suffix = if stream_response + && suffix == ":rawPredict" + && is_vertex_anthropic_rawpredict_route(route) + { + ":streamRawPredict" + } else { + suffix.as_str() + }; + format!("{base}/{model_id}{suffix}") + } + // Explicit path override, model NOT in URL. + // Normalize: ensure override_path begins with '/' so the concatenation + // never produces a broken URL like `https://host.compath`. + (Some(override_path), false) => { + if override_path.starts_with('/') || override_path.is_empty() { + format!("{base}{override_path}") + } else { + format!("{base}/{override_path}") + } + } + // Model in path, no override — append model then protocol-derived path + (None, true) => { + let path = protocol_path.trim_start_matches('/'); + format!("{base}/{model_id}/{path}") + } + // Default: existing behavior (includes /v1 deduplication) + (None, false) => build_backend_url(&route.endpoint, protocol_path), + } +} + fn build_backend_url(endpoint: &str, path: &str) -> String { let base = endpoint.trim_end_matches('/'); if base.ends_with("/v1") && (path == "/v1" || path.starts_with("/v1/")) { @@ -521,10 +624,25 @@ fn build_backend_url(endpoint: &str, path: &str) -> String { format!("{base}{path}") } +/// Check whether a route targets a Vertex AI Anthropic rawPredict endpoint. +/// +/// The router persists the neutral `:rawPredict` suffix on resolved routes and +/// upgrades it to `:streamRawPredict` only for streaming proxy calls. +fn is_vertex_anthropic_rawpredict_route(route: &ResolvedRoute) -> bool { + route.model_in_path + && route.protocols.iter().any(|p| p == "anthropic_messages") + && route + .request_path_override + .as_deref() + .is_some_and(|suffix| suffix == ":rawPredict") +} + #[cfg(test)] mod tests { - use super::{ValidationFailureKind, build_backend_url, verify_backend_endpoint}; - use crate::config::ResolvedRoute; + use super::{ + ValidationFailureKind, build_backend_url, build_provider_url, verify_backend_endpoint, + }; + use crate::config::{DEFAULT_ROUTE_TIMEOUT, ResolvedRoute}; use openshell_core::inference::AuthHeader; use wiremock::matchers::{body_partial_json, header, method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; @@ -566,7 +684,9 @@ mod tests { "anthropic-version".to_string(), "anthropic-beta".to_string(), ], - timeout: crate::config::DEFAULT_ROUTE_TIMEOUT, + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, } } @@ -581,7 +701,9 @@ mod tests { auth: AuthHeader::Bearer, default_headers: Vec::new(), passthrough_headers: vec!["openai-organization".to_string()], - timeout: crate::config::DEFAULT_ROUTE_TIMEOUT, + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }; let kept = super::sanitize_request_headers( @@ -651,6 +773,76 @@ mod tests { ); } + #[test] + fn vertex_anthropic_rawpredict_strips_anthropic_beta() { + // Vertex AI rawPredict endpoints reject the anthropic-beta header. + // The router must strip it before forwarding to avoid HTTP 400 errors + // from the Vertex AI backend when clients (e.g. Claude Code) always + // send beta feature flags. + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: "https://us-central1-aiplatform.googleapis.com/v1/projects/proj/locations/us-central1/publishers/anthropic/models".to_string(), + model: "claude-sonnet-4-20250514".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: vec![], + passthrough_headers: vec!["anthropic-beta".to_string()], + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + let headers = vec![ + ("content-type".to_string(), "application/json".to_string()), + ( + "anthropic-beta".to_string(), + "prompt-caching-scope-2026-01-05,redact-thinking-2026-02-12".to_string(), + ), + ]; + + let kept = super::sanitize_request_headers(&route, &headers); + + assert!( + kept.iter() + .any(|(name, _)| name.eq_ignore_ascii_case("content-type")), + "content-type should be preserved" + ); + assert!( + kept.iter() + .all(|(name, _)| !name.eq_ignore_ascii_case("anthropic-beta")), + "anthropic-beta must be stripped for Vertex AI rawPredict routes" + ); + } + + #[test] + fn direct_anthropic_preserves_anthropic_beta() { + // The anthropic-beta header must still pass through for direct + // Anthropic API routes -- only Vertex AI rawPredict strips it. + let route = test_route( + "https://api.anthropic.com/v1", + &["anthropic_messages"], + AuthHeader::Custom("x-api-key"), + ); + + let headers = vec![ + ("content-type".to_string(), "application/json".to_string()), + ( + "anthropic-beta".to_string(), + "prompt-caching-2024-07-31".to_string(), + ), + ]; + + let kept = super::sanitize_request_headers(&route, &headers); + + assert!( + kept.iter() + .any(|(name, value)| name.eq_ignore_ascii_case("anthropic-beta") + && value == "prompt-caching-2024-07-31"), + "anthropic-beta must be preserved for direct Anthropic API routes" + ); + } + #[tokio::test] async fn verify_backend_endpoint_uses_route_auth_and_shape() { let mock_server = MockServer::start().await; @@ -794,4 +986,603 @@ mod tests { ValidationFailureKind::RequestShape ); } + + #[tokio::test] + async fn verify_vertex_anthropic_route_uses_buffered_rawpredict_probe() { + let mock_server = MockServer::start().await; + let route = ResolvedRoute { + name: "vertex-anthropic".to_string(), + endpoint: format!( + "{}/v1/projects/my-project/locations/us-east5/publishers/anthropic/models", + mock_server.uri() + ), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + Mock::given(method("POST")) + .and(path( + "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet@20241022:rawPredict", + )) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": "msg_vertex_verify" + }))) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let validated = verify_backend_endpoint(&client, &route).await.unwrap(); + assert!( + validated.url.ends_with(":rawPredict"), + "buffered verification should probe the unary Vertex endpoint, got: {}", + validated.url + ); + } + + /// Vertex AI pattern: `model_in_path=true`, `request_path_override=Some(":rawPredict")` + /// means buffered requests POST to `base_url/model_id:rawPredict`. + #[test] + fn build_provider_url_model_in_path_with_suffix() { + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: + "https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models" + .to_string(), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + let url = build_provider_url(&route, "claude-3-5-sonnet@20241022", "/v1/messages", false); + assert!( + url.ends_with("/claude-3-5-sonnet@20241022:rawPredict"), + "expected URL to end with model id and suffix, got: {url}" + ); + assert!( + !url.contains("/v1/messages"), + "expected no protocol path appended, got: {url}" + ); + } + + #[test] + fn build_provider_url_vertex_anthropic_streaming_upgrades_to_stream_rawpredict() { + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: + "https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models" + .to_string(), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + let url = build_provider_url(&route, "claude-3-5-sonnet@20241022", "/v1/messages", true); + assert!( + url.ends_with("/claude-3-5-sonnet@20241022:streamRawPredict"), + "expected streaming URL to upgrade the suffix, got: {url}" + ); + } + + /// Vertex AI pattern: `model_in_path=true`, `request_path_override=Some("")` (empty suffix) + /// means POST directly to `base_url/model_id` with no additional path segment. + #[test] + fn build_provider_url_model_in_path_empty_suffix() { + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: "https://example.com/models".to_string(), + model: "my-model".to_string(), + api_key: "token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(String::new()), + }; + + let url = build_provider_url(&route, "my-model", "/v1/messages", false); + assert_eq!(url, "https://example.com/models/my-model"); + } + + /// Explicit path override: `request_path_override=Some("/v1/chat/completions")` + /// appends the override path to `base_url`, ignoring `model_in_path`. + #[test] + fn build_provider_url_with_path_override() { + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: "https://api.example.com".to_string(), + model: "some-model".to_string(), + api_key: "key".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: Some("/v1/chat/completions".to_string()), + }; + + let url = build_provider_url(&route, "some-model", "/v1/chat/completions", false); + assert!( + url.ends_with("/v1/chat/completions"), + "expected URL to end with path override, got: {url}" + ); + } + + /// Default behavior: `model_in_path=false`, `request_path_override=None` uses + /// the existing `build_backend_url` logic (protocol-derived path only). + #[test] + fn build_provider_url_default_behavior() { + let route = ResolvedRoute { + name: "inference.local".to_string(), + endpoint: "https://api.openai.com/v1".to_string(), + model: "gpt-4o".to_string(), + api_key: "key".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, + }; + + let url = build_provider_url(&route, "gpt-4o", "/v1/chat/completions", false); + assert_eq!( + url, "https://api.openai.com/v1/chat/completions", + "default behavior should dedupe v1 prefix and use protocol path" + ); + } + + #[test] + fn build_provider_url_override_path_normalizes_missing_leading_slash() { + // An override_path without a leading '/' must not produce a broken URL. + let route = ResolvedRoute { + name: "test".to_string(), + endpoint: "https://example.com/v1/projects/proj/locations/us/endpoints/openapi" + .to_string(), + model: "gemini-pro".to_string(), + api_key: "key".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: Some("chat/completions".to_string()), // no leading slash + }; + let url = build_provider_url(&route, &route.model, "/v1/chat/completions", false); + // Must not produce https://...openaichat/completions + assert!( + url.contains("/chat/completions"), + "URL must contain /chat/completions, got: {url}" + ); + assert!( + !url.contains("openaichat"), + "URL must not smash endpoint and path, got: {url}" + ); + assert_eq!( + url, + "https://example.com/v1/projects/proj/locations/us/endpoints/openapi/chat/completions" + ); + } + + /// Vertex AI Anthropic routes require `anthropic_version` in the request body. + /// Verify it is injected on the buffered `:rawPredict` path when the client + /// did not already include it. + #[tokio::test] + async fn vertex_ai_body_injects_anthropic_version() { + let mock_server = MockServer::start().await; + + // Build a Vertex-AI-style route: model in path, suffix :rawPredict + let base_path = "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models"; + let route = ResolvedRoute { + name: "vertex-anthropic".to_string(), + endpoint: format!("{}{base_path}", mock_server.uri()), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + Mock::given(method("POST")) + .and(path(format!( + "{base_path}/claude-3-5-sonnet@20241022:rawPredict" + ))) + .and(body_partial_json(serde_json::json!({ + "anthropic_version": "vertex-2023-10-16", + }))) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_vertex_1"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + let response = builder.send().await.unwrap(); + assert_eq!( + response.status().as_u16(), + 200, + "mock should match body with anthropic_version injected" + ); + let received = mock_server.received_requests().await.unwrap(); + assert_eq!(received.len(), 1); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body.as_object().unwrap().contains_key("model"), + "Vertex Anthropic route must not inject model into the body, got: {received_body}" + ); + } + + /// Claude Code and other Anthropic SDK clients always send "model" in the + /// request body. For Vertex AI rawPredict routes the model is in the URL + /// path; the body field must be stripped to avoid HTTP 400 + /// "Extra inputs are not permitted" from the Vertex AI backend. + #[tokio::test] + async fn vertex_ai_body_strips_client_model_field() { + let mock_server = MockServer::start().await; + + let base_path = "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models"; + let route = ResolvedRoute { + name: "vertex-anthropic".to_string(), + endpoint: format!("{}{base_path}", mock_server.uri()), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }; + + Mock::given(method("POST")) + .and(path(format!( + "{base_path}/claude-3-5-sonnet@20241022:rawPredict" + ))) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_1"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + // Simulate a client (e.g. Claude Code) that always sends "model" in the body. + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + let response = builder.send().await.unwrap(); + assert_eq!(response.status().as_u16(), 200); + let received = mock_server.received_requests().await.unwrap(); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body.as_object().unwrap().contains_key("model"), + "model field must be stripped from Vertex AI rawPredict body, got: {received_body}" + ); + } + + #[tokio::test] + async fn vertex_ai_body_preserves_client_anthropic_version() { + // When the client already sends anthropic_version, the router must NOT overwrite it. + let mock_server = MockServer::start().await; + + // Expect the body to contain the client's version, NOT "vertex-2023-10-16" + Mock::given(method("POST")) + .and(body_partial_json(serde_json::json!({ + "anthropic_version": "custom-client-version", + }))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": "msg_1", + "type": "message", + "role": "assistant", + "model": "claude-3-5-sonnet@20241022", + "content": [{"type": "text", "text": "ok"}] + }))) + .mount(&mock_server) + .await; + + let router = crate::Router::new().unwrap(); + let candidates = vec![ResolvedRoute { + name: "vertex-test".to_string(), + endpoint: format!( + "{}/v1/projects/proj/locations/us-east5/publishers/anthropic/models", + mock_server.uri() + ), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.test".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }]; + + let body = serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + "anthropic_version": "custom-client-version", + })) + .unwrap(); + + let response = router + .proxy_with_candidates( + "anthropic_messages", + "POST", + "/v1/messages", + vec![("content-type".to_string(), "application/json".to_string())], + bytes::Bytes::from(body), + &candidates, + ) + .await + .unwrap(); + + assert_eq!( + response.status, 200, + "proxy should succeed when client sends anthropic_version" + ); + } + + /// Standard Anthropic route (`model_in_path=false`) must NOT inject `anthropic_version`. + /// Vertex body injection must not affect non-Vertex Anthropic providers. + #[tokio::test] + async fn standard_anthropic_body_does_not_inject_vertex_anthropic_version() { + let mock_server = MockServer::start().await; + + let route = ResolvedRoute { + name: "anthropic-direct".to_string(), + endpoint: mock_server.uri(), + model: "claude-3-5-sonnet-20241022".to_string(), + api_key: "sk-ant-test".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Custom("x-api-key"), + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, + }; + + Mock::given(method("POST")) + .and(path("/v1/messages")) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_1"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + builder.send().await.unwrap(); + + let received = mock_server.received_requests().await.unwrap(); + assert_eq!(received.len(), 1); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body + .as_object() + .unwrap() + .contains_key("anthropic_version"), + "standard Anthropic route must not inject anthropic_version, got: {received_body}" + ); + } + + /// Model-in-path alone is not enough; only Vertex rawPredict-style routes should inject. + #[tokio::test] + async fn anthropic_model_in_path_without_rawpredict_suffix_does_not_inject_version() { + let mock_server = MockServer::start().await; + + let route = ResolvedRoute { + name: "non-vertex-model-path".to_string(), + endpoint: format!("{}/publisher/models", mock_server.uri()), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(String::new()), + }; + + Mock::given(method("POST")) + .respond_with( + ResponseTemplate::new(200) + .set_body_json(serde_json::json!({"id": "msg_model_path"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/messages", + &headers, + body, + false, + ) + .unwrap(); + + builder.send().await.unwrap(); + + let received = mock_server.received_requests().await.unwrap(); + assert_eq!(received.len(), 1); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body + .as_object() + .unwrap() + .contains_key("anthropic_version"), + "non-rawPredict model-in-path routes must not inject anthropic_version, got: {received_body}" + ); + } + + /// Vertex AI Gemini route (`model_in_path=false`, `openai_chat_completions`) must NOT inject. + #[tokio::test] + async fn vertex_gemini_body_does_not_inject_vertex_anthropic_version() { + let mock_server = MockServer::start().await; + + let route = ResolvedRoute { + name: "vertex-gemini".to_string(), + endpoint: format!( + "{}/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi", + mock_server.uri() + ), + model: "gemini-pro".to_string(), + api_key: "ya29.token".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, + }; + + Mock::given(method("POST")) + .respond_with( + ResponseTemplate::new(200).set_body_json(serde_json::json!({"id": "msg_gemini"})), + ) + .mount(&mock_server) + .await; + + let client = reqwest::Client::builder().build().unwrap(); + let body = bytes::Bytes::from( + serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + })) + .unwrap(), + ); + let headers = vec![("content-type".to_string(), "application/json".to_string())]; + + let (builder, _url) = super::prepare_backend_request( + &client, + &route, + "POST", + "/v1/chat/completions", + &headers, + body, + false, + ) + .unwrap(); + + builder.send().await.unwrap(); + + let received = mock_server.received_requests().await.unwrap(); + assert_eq!(received.len(), 1); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert!( + !received_body + .as_object() + .unwrap() + .contains_key("anthropic_version"), + "Vertex Gemini route must not inject anthropic_version, got: {received_body}" + ); + assert_eq!( + received_body + .as_object() + .unwrap() + .get("model") + .and_then(serde_json::Value::as_str), + Some("gemini-pro"), + "Vertex Gemini route must still rewrite the model field, got: {received_body}" + ); + } } diff --git a/crates/openshell-router/src/config.rs b/crates/openshell-router/src/config.rs index ef5d90946..81fac6048 100644 --- a/crates/openshell-router/src/config.rs +++ b/crates/openshell-router/src/config.rs @@ -52,6 +52,11 @@ pub struct ResolvedRoute { pub passthrough_headers: Vec, /// Per-request timeout for proxied inference calls. pub timeout: Duration, + /// When true, the model identifier is embedded in the URL path (e.g. Vertex AI). + pub model_in_path: bool, + /// Optional override for the request path. When set, replaces the protocol-derived path. + /// An empty string means POST directly to `base_url/model_id` with no additional path. + pub request_path_override: Option, } impl std::fmt::Debug for ResolvedRoute { @@ -66,6 +71,8 @@ impl std::fmt::Debug for ResolvedRoute { .field("default_headers", &self.default_headers) .field("passthrough_headers", &self.passthrough_headers) .field("timeout", &self.timeout) + .field("model_in_path", &self.model_in_path) + .field("request_path_override", &self.request_path_override) .finish() } } @@ -129,7 +136,7 @@ impl RouteConfig { } let (auth, default_headers, passthrough_headers) = - route_headers_from_provider_type(self.provider_type.as_deref()); + route_headers_from_provider_type(self.provider_type.as_deref(), &protocols); Ok(ResolvedRoute { name: self.name.clone(), @@ -141,6 +148,8 @@ impl RouteConfig { default_headers, passthrough_headers, timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }) } } @@ -152,8 +161,9 @@ impl RouteConfig { /// which uses the centralized `InferenceProviderProfile` registry. fn route_headers_from_provider_type( provider_type: Option<&str>, + protocols: &[String], ) -> (AuthHeader, Vec<(String, String)>, Vec) { - openshell_core::inference::route_headers_for_provider_type(provider_type.unwrap_or("")) + openshell_core::inference::route_headers_for_route(provider_type.unwrap_or(""), protocols) } #[cfg(test)] @@ -274,6 +284,8 @@ routes: default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }; let debug_output = format!("{route:?}"); assert!( @@ -288,8 +300,10 @@ routes: #[test] fn auth_from_anthropic_provider_uses_custom_header() { - let (auth, headers, passthrough_headers) = - route_headers_from_provider_type(Some("anthropic")); + let (auth, headers, passthrough_headers) = route_headers_from_provider_type( + Some("anthropic"), + &["anthropic_messages".to_string()], + ); assert_eq!(auth, AuthHeader::Custom("x-api-key")); assert!(headers.iter().any(|(k, _)| k == "anthropic-version")); assert!( @@ -301,7 +315,10 @@ routes: #[test] fn auth_from_openai_provider_uses_bearer() { - let (auth, headers, passthrough_headers) = route_headers_from_provider_type(Some("openai")); + let (auth, headers, passthrough_headers) = route_headers_from_provider_type( + Some("openai"), + &["openai_chat_completions".to_string()], + ); assert_eq!(auth, AuthHeader::Bearer); assert!(headers.is_empty()); assert!( @@ -313,9 +330,19 @@ routes: #[test] fn auth_from_none_defaults_to_bearer() { - let (auth, headers, passthrough_headers) = route_headers_from_provider_type(None); + let (auth, headers, passthrough_headers) = route_headers_from_provider_type(None, &[]); assert_eq!(auth, AuthHeader::Bearer); assert!(headers.is_empty()); assert!(passthrough_headers.is_empty()); } + + #[test] + fn vertex_anthropic_route_forwards_beta_only() { + let (_, headers, passthrough_headers) = route_headers_from_provider_type( + Some("google-vertex-ai"), + &["anthropic_messages".to_string()], + ); + assert!(headers.is_empty()); + assert_eq!(passthrough_headers, vec!["anthropic-beta".to_string()]); + } } diff --git a/crates/openshell-router/src/mock.rs b/crates/openshell-router/src/mock.rs index 66fc80414..92f3671ba 100644 --- a/crates/openshell-router/src/mock.rs +++ b/crates/openshell-router/src/mock.rs @@ -133,6 +133,8 @@ mod tests { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: crate::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, } } diff --git a/crates/openshell-router/tests/backend_integration.rs b/crates/openshell-router/tests/backend_integration.rs index 6b21de94d..02f94b868 100644 --- a/crates/openshell-router/tests/backend_integration.rs +++ b/crates/openshell-router/tests/backend_integration.rs @@ -17,6 +17,8 @@ fn mock_candidates(base_url: &str) -> Vec { default_headers: Vec::new(), passthrough_headers: vec!["openai-organization".to_string(), "x-model-id".to_string()], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }] } @@ -121,6 +123,8 @@ async fn proxy_no_compatible_route_returns_error() { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let err = router @@ -217,6 +221,8 @@ async fn proxy_mock_route_returns_canned_response() { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let body = serde_json::to_vec(&serde_json::json!({ @@ -356,6 +362,8 @@ async fn proxy_uses_x_api_key_for_anthropic_route() { "anthropic-beta".to_string(), ], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let body = serde_json::to_vec(&serde_json::json!({ @@ -419,6 +427,8 @@ async fn proxy_anthropic_does_not_send_bearer_auth() { "anthropic-beta".to_string(), ], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let response = router @@ -468,6 +478,8 @@ async fn proxy_forwards_client_anthropic_version_header() { "anthropic-beta".to_string(), ], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let body = serde_json::to_vec(&serde_json::json!({ @@ -500,6 +512,220 @@ async fn proxy_forwards_client_anthropic_version_header() { ); } +#[tokio::test] +async fn proxy_vertex_gemini_route_uses_chat_completions_override() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path( + "/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi/chat/completions", + )) + .and(bearer_token("ya29.test-token")) + .and(body_partial_json(serde_json::json!({ + "model": "gemini-2.0-flash-001", + }))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": "chatcmpl-vertex", + "object": "chat.completion", + "created": 1_700_000_000_i64, + "model": "gemini-2.0-flash-001", + "choices": [{ + "index": 0, + "message": { "role": "assistant", "content": "pong" }, + "finish_reason": "stop" + }] + }))) + .mount(&mock_server) + .await; + + let router = Router::new().unwrap(); + let candidates = vec![ResolvedRoute { + name: "inference.local".to_string(), + endpoint: format!( + "{}/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi", + mock_server.uri() + ), + model: "gemini-2.0-flash-001".to_string(), + api_key: "ya29.test-token".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: Vec::new(), + timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: Some("/chat/completions".to_string()), + }]; + + let body = serde_json::to_vec(&serde_json::json!({ + "model": "client-model", + "messages": [{"role": "user", "content": "ping"}] + })) + .unwrap(); + + let response = router + .proxy_with_candidates( + "openai_chat_completions", + "POST", + "/v1/chat/completions", + vec![("content-type".to_string(), "application/json".to_string())], + bytes::Bytes::from(body), + &candidates, + ) + .await + .unwrap(); + + assert_eq!(response.status, 200); +} + +#[tokio::test] +async fn proxy_vertex_anthropic_route_uses_model_path_suffix() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path( + "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet@20241022:rawPredict", + )) + .and(bearer_token("ya29.vertex-token")) + .and(body_partial_json(serde_json::json!({ + "anthropic_version": "vertex-2023-10-16", + }))) + .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({ + "id": "msg_vertex_1", + "type": "message", + "role": "assistant", + "model": "claude-3-5-sonnet@20241022", + "content": [{"type": "text", "text": "pong"}] + }))) + .mount(&mock_server) + .await; + + let router = Router::new().unwrap(); + let candidates = vec![ResolvedRoute { + name: "inference.local".to_string(), + endpoint: format!( + "{}/v1/projects/my-project/locations/us-east5/publishers/anthropic/models", + mock_server.uri() + ), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.vertex-token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: vec!["anthropic-beta".to_string()], + timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }]; + + // Include "model" in the body, as Claude Code and other Anthropic SDK + // clients always do. The router must strip it for Vertex AI rawPredict. + let body = serde_json::to_vec(&serde_json::json!({ + "model": "claude-3-5-sonnet-20241022", + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32 + })) + .unwrap(); + + let response = router + .proxy_with_candidates( + "anthropic_messages", + "POST", + "/v1/messages", + vec![ + ("content-type".to_string(), "application/json".to_string()), + ("anthropic-beta".to_string(), "tools-2024-05-16".to_string()), + ("anthropic-version".to_string(), "2023-06-01".to_string()), + ], + bytes::Bytes::from(body), + &candidates, + ) + .await + .unwrap(); + + assert_eq!(response.status, 200); + let received = mock_server.received_requests().await.unwrap(); + assert_eq!(received.len(), 1); + let received_body: serde_json::Value = serde_json::from_slice(&received[0].body).unwrap(); + assert_eq!( + received_body["anthropic_version"], + serde_json::json!("vertex-2023-10-16") + ); + // "model" must be stripped: Vertex AI encodes the model in the URL path + // and rejects "model" in the body with "Extra inputs are not permitted". + assert!( + received_body.get("model").is_none(), + "Vertex Anthropic requests must not have model in the body, got: {received_body}" + ); + // anthropic-beta must be stripped: Vertex AI rejects unknown beta values + // with HTTP 400 (e.g. prompt-caching-scope-2026-01-05). + assert!( + !received[0].headers.contains_key("anthropic-beta"), + "anthropic-beta must not reach the Vertex AI backend" + ); + assert!( + !received[0].headers.contains_key("anthropic-version"), + "anthropic-version must be converted to body anthropic_version, not forwarded as a header" + ); +} + +#[tokio::test] +async fn proxy_vertex_anthropic_streaming_route_uses_stream_rawpredict() { + let mock_server = MockServer::start().await; + + Mock::given(method("POST")) + .and(path( + "/v1/projects/my-project/locations/us-east5/publishers/anthropic/models/claude-3-5-sonnet@20241022:streamRawPredict", + )) + .and(bearer_token("ya29.vertex-token")) + .respond_with(ResponseTemplate::new(200).set_body_string("{\"id\":\"msg_vertex_stream\"}")) + .mount(&mock_server) + .await; + + let router = Router::new().unwrap(); + let candidates = vec![ResolvedRoute { + name: "inference.local".to_string(), + endpoint: format!( + "{}/v1/projects/my-project/locations/us-east5/publishers/anthropic/models", + mock_server.uri() + ), + model: "claude-3-5-sonnet@20241022".to_string(), + api_key: "ya29.vertex-token".to_string(), + protocols: vec!["anthropic_messages".to_string()], + auth: AuthHeader::Bearer, + default_headers: Vec::new(), + passthrough_headers: vec!["anthropic-beta".to_string()], + timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }]; + + let body = serde_json::to_vec(&serde_json::json!({ + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 32, + "stream": true + })) + .unwrap(); + + let mut response = router + .proxy_with_candidates_streaming( + "anthropic_messages", + "POST", + "/v1/messages", + vec![("content-type".to_string(), "application/json".to_string())], + bytes::Bytes::from(body), + &candidates, + ) + .await + .unwrap(); + + assert_eq!(response.status, 200); + let first_chunk = response.next_chunk().await.unwrap(); + assert!( + first_chunk.is_some(), + "streaming response should yield a body chunk" + ); +} + #[test] fn config_resolves_routes_with_protocol() { let config = RouterConfig { @@ -561,6 +787,8 @@ async fn streaming_proxy_completes_despite_exceeding_route_timeout() { // Route timeout shorter than the backend delay — streaming must // NOT be constrained by this. timeout: Duration::from_secs(1), + model_in_path: false, + request_path_override: None, }]; let body = serde_json::to_vec(&serde_json::json!({ @@ -623,6 +851,8 @@ async fn buffered_proxy_enforces_route_timeout() { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: Duration::from_secs(1), + model_in_path: false, + request_path_override: None, }]; let body = serde_json::to_vec(&serde_json::json!({ diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 4a0e61e57..54a26108c 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -1271,7 +1271,7 @@ pub(crate) fn bundle_to_resolved_routes( .iter() .map(|r| { let (auth, default_headers, passthrough_headers) = - openshell_core::inference::route_headers_for_provider_type(&r.provider_type); + openshell_core::inference::route_headers_for_route(&r.provider_type, &r.protocols); let timeout = if r.timeout_secs == 0 { openshell_router::config::DEFAULT_ROUTE_TIMEOUT } else { @@ -1287,6 +1287,8 @@ pub(crate) fn bundle_to_resolved_routes( default_headers, passthrough_headers, timeout, + model_in_path: r.model_in_path, + request_path_override: r.request_path_override.clone(), } }) .collect() @@ -1461,39 +1463,22 @@ fn enumerate_gpu_device_nodes() -> Vec { paths } -fn push_unique(paths: &mut Vec, path: String) { - if !paths.iter().any(|p| p == &path) { - paths.push(path); - } -} - -fn collect_baseline_enrichment_paths( - include_proxy: bool, - include_gpu: bool, - gpu_device_nodes: Vec, -) -> (Vec, Vec) { - let mut ro = Vec::new(); - let mut rw = Vec::new(); - - if include_proxy { - for &path in PROXY_BASELINE_READ_ONLY { - push_unique(&mut ro, path.to_string()); - } - for &path in PROXY_BASELINE_READ_WRITE { - push_unique(&mut rw, path.to_string()); - } - } +/// Collect all baseline paths for enrichment: proxy defaults + GPU (if present). +/// Returns `(read_only, read_write)` as owned `String` vecs. +fn baseline_enrichment_paths() -> (Vec, Vec) { + let mut ro: Vec = PROXY_BASELINE_READ_ONLY + .iter() + .map(|&s| s.to_string()) + .collect(); + let mut rw: Vec = PROXY_BASELINE_READ_WRITE + .iter() + .map(|&s| s.to_string()) + .collect(); - if include_gpu { - for &path in GPU_BASELINE_READ_ONLY { - push_unique(&mut ro, path.to_string()); - } - for &path in GPU_BASELINE_READ_WRITE { - push_unique(&mut rw, path.to_string()); - } - for path in gpu_device_nodes { - push_unique(&mut rw, path); - } + if has_gpu_devices() { + ro.extend(GPU_BASELINE_READ_ONLY.iter().map(|&s| s.to_string())); + rw.extend(GPU_BASELINE_READ_WRITE.iter().map(|&s| s.to_string())); + rw.extend(enumerate_gpu_device_nodes()); } // A path promoted to read_write (e.g. /proc for GPU) should not also @@ -1504,33 +1489,14 @@ fn collect_baseline_enrichment_paths( (ro, rw) } -fn active_baseline_enrichment_paths(include_proxy: bool) -> (Vec, Vec) { - let include_gpu = has_gpu_devices(); - let gpu_device_nodes = if include_gpu { - enumerate_gpu_device_nodes() - } else { - Vec::new() - }; - collect_baseline_enrichment_paths(include_proxy, include_gpu, gpu_device_nodes) -} - -/// Collect all active baseline paths for tests and diagnostics. -/// Returns `(read_only, read_write)` as owned `String` vecs. -#[cfg(test)] -fn baseline_enrichment_paths() -> (Vec, Vec) { - active_baseline_enrichment_paths(true) -} - -fn enrich_proto_baseline_paths_with( - proto: &mut openshell_core::proto::SandboxPolicy, - ro: &[String], - rw: &[String], - path_exists: F, -) -> bool -where - F: Fn(&str) -> bool, -{ - if ro.is_empty() && rw.is_empty() { +/// Ensure a proto `SandboxPolicy` includes the baseline filesystem paths +/// required for proxy-mode sandboxes. Paths are only added if missing; +/// user-specified paths are never removed. +/// +/// Returns `true` if the policy was modified (caller may want to sync back). +fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> bool { + // Only enrich if network_policies are present (proxy mode indicator). + if proto.network_policies.is_empty() { return false; } @@ -1541,10 +1507,17 @@ where ..Default::default() }); + let (ro, rw) = baseline_enrichment_paths(); + + // Baseline paths are system-injected, not user-specified. Skip paths + // that do not exist in this container image to avoid noisy warnings from + // Landlock and, more critically, to prevent a single missing baseline + // path from abandoning the entire Landlock ruleset under best-effort + // mode (see issue #664). let mut modified = false; - for path in ro { + for path in &ro { if !fs.read_only.iter().any(|p| p == path) && !fs.read_write.iter().any(|p| p == path) { - if !path_exists(path) { + if !std::path::Path::new(path).exists() { debug!( path, "Baseline read-only path does not exist, skipping enrichment" @@ -1555,11 +1528,11 @@ where modified = true; } } - for path in rw { + for path in &rw { if fs.read_only.iter().any(|p| p == path) || fs.read_write.iter().any(|p| p == path) { continue; } - if !path_exists(path) { + if !std::path::Path::new(path).exists() { debug!( path, "Baseline read-write path does not exist, skipping enrichment" @@ -1570,26 +1543,6 @@ where modified = true; } - modified -} - -/// Ensure a proto `SandboxPolicy` includes the baseline filesystem paths -/// required by proxy-mode sandboxes and GPU runtimes. Paths are only added if -/// missing; user-specified paths are never removed. -/// -/// Returns `true` if the policy was modified (caller may want to sync back). -fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) -> bool { - let (ro, rw) = active_baseline_enrichment_paths(!proto.network_policies.is_empty()); - - // Baseline paths are system-injected, not user-specified. Skip paths - // that do not exist in this container image to avoid noisy warnings from - // Landlock and, more critically, to prevent a single missing baseline - // path from abandoning the entire Landlock ruleset under best-effort - // mode (see issue #664). - let modified = enrich_proto_baseline_paths_with(proto, &ro, &rw, |path| { - std::path::Path::new(path).exists() - }); - if modified { ocsf_emit!( ConfigStateChangeBuilder::new(ocsf_ctx()) @@ -1605,15 +1558,15 @@ fn enrich_proto_baseline_paths(proto: &mut openshell_core::proto::SandboxPolicy) } /// Ensure a `SandboxPolicy` (Rust type) includes the baseline filesystem -/// paths required by proxy-mode sandboxes and GPU runtimes. Used for the -/// local-file code path where no proto is available. +/// paths required for proxy-mode sandboxes. Used for the local-file code +/// path where no proto is available. fn enrich_sandbox_baseline_paths(policy: &mut SandboxPolicy) { - let (ro, rw) = - active_baseline_enrichment_paths(matches!(policy.network.mode, NetworkMode::Proxy)); - if ro.is_empty() && rw.is_empty() { + if !matches!(policy.network.mode, NetworkMode::Proxy) { return; } + let (ro, rw) = baseline_enrichment_paths(); + let mut modified = false; for path in &ro { let p = std::path::PathBuf::from(path); @@ -1768,31 +1721,6 @@ mod baseline_tests { ); } - #[test] - fn proto_gpu_enrichment_adds_devices_without_network_policy() { - let mut policy = openshell_policy::restrictive_default_policy(); - assert!( - policy.network_policies.is_empty(), - "regression setup must exercise the no-network default path" - ); - let (ro, rw) = - collect_baseline_enrichment_paths(false, true, vec!["/dev/nvidia0".to_string()]); - - let enriched = enrich_proto_baseline_paths_with(&mut policy, &ro, &rw, |path| { - matches!(path, "/proc" | "/dev/nvidia0") - }); - - let filesystem = policy.filesystem.expect("filesystem policy"); - assert!( - enriched, - "GPU enrichment should not require network policies" - ); - assert!( - filesystem.read_write.contains(&"/dev/nvidia0".to_string()), - "GPU enrichment should add enumerated device nodes without network policies" - ); - } - #[test] fn gpu_baseline_read_write_contains_dxg() { // /dev/dxg must be present so WSL2 sandboxes get the Landlock @@ -2736,15 +2664,30 @@ mod tests { ], provider_type: "openai".to_string(), timeout_secs: 0, + model_in_path: false, + request_path_override: None, }, openshell_core::proto::ResolvedRoute { - name: "local".to_string(), - base_url: "http://vllm:8000/v1".to_string(), - api_key: "local-key".to_string(), - model_id: "llama-3".to_string(), - protocols: vec!["openai_chat_completions".to_string()], - provider_type: String::new(), + name: "vertex".to_string(), + base_url: "https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models".to_string(), + api_key: "ya29.vertex".to_string(), + model_id: "claude-3-5-sonnet@20241022".to_string(), + protocols: vec!["anthropic_messages".to_string()], + provider_type: "google-vertex-ai".to_string(), timeout_secs: 120, + model_in_path: true, + request_path_override: Some(":rawPredict".to_string()), + }, + openshell_core::proto::ResolvedRoute { + name: "vertex-gemini".to_string(), + base_url: "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi".to_string(), + api_key: "ya29.gemini".to_string(), + model_id: "gemini-2.0-flash-001".to_string(), + protocols: vec!["openai_chat_completions".to_string()], + provider_type: "google-vertex-ai".to_string(), + timeout_secs: 0, + model_in_path: false, + request_path_override: Some("/chat/completions".to_string()), }, ], revision: "abc123".to_string(), @@ -2753,7 +2696,7 @@ mod tests { let routes = bundle_to_resolved_routes(&bundle); - assert_eq!(routes.len(), 2); + assert_eq!(routes.len(), 3); assert_eq!(routes[0].endpoint, "https://api.example.com/v1"); assert_eq!(routes[0].model, "gpt-4"); assert_eq!(routes[0].api_key, "sk-test-key"); @@ -2770,16 +2713,49 @@ mod tests { openshell_router::config::DEFAULT_ROUTE_TIMEOUT, "timeout_secs=0 should map to default" ); - assert_eq!(routes[1].endpoint, "http://vllm:8000/v1"); + assert_eq!( + routes[0].passthrough_headers, + vec!["openai-organization".to_string(), "x-model-id".to_string()] + ); + assert_eq!( + routes[1].endpoint, + "https://us-east5-aiplatform.googleapis.com/v1/projects/my-project/locations/us-east5/publishers/anthropic/models" + ); assert_eq!( routes[1].auth, openshell_core::inference::AuthHeader::Bearer ); + assert_eq!(routes[1].model, "claude-3-5-sonnet@20241022"); + assert_eq!(routes[1].protocols, vec!["anthropic_messages"]); + assert!(routes[1].model_in_path); + assert_eq!( + routes[1].passthrough_headers, + vec!["anthropic-beta".to_string()] + ); + assert_eq!( + routes[1].request_path_override, + Some(":rawPredict".to_string()) + ); assert_eq!( routes[1].timeout, Duration::from_secs(120), "timeout_secs=120 should map to 120s" ); + assert_eq!( + routes[2].endpoint, + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi" + ); + assert_eq!(routes[2].model, "gemini-2.0-flash-001"); + assert_eq!(routes[2].protocols, vec!["openai_chat_completions"]); + assert!(!routes[2].model_in_path); + assert_eq!( + routes[2].request_path_override, + Some("/chat/completions".to_string()) + ); + assert!( + routes[2].passthrough_headers.is_empty(), + "Vertex Gemini routes must not inherit Anthropic passthrough headers" + ); } #[test] @@ -2805,6 +2781,8 @@ mod tests { protocols: vec!["openai_chat_completions".to_string()], provider_type: "openai".to_string(), timeout_secs: 0, + model_in_path: false, + request_path_override: None, }], revision: "rev".to_string(), generated_at_ms: 0, @@ -2827,6 +2805,8 @@ mod tests { default_headers: vec![], passthrough_headers: vec![], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }, openshell_router::config::ResolvedRoute { name: "sandbox-system".to_string(), @@ -2838,6 +2818,8 @@ mod tests { default_headers: vec![], passthrough_headers: vec![], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }, ]; @@ -3128,6 +3110,8 @@ filesystem_policy: default_headers: vec![], passthrough_headers: vec![], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }]; let cache = Arc::new(RwLock::new(routes)); diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 88deb1596..1e95c035b 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -4966,6 +4966,8 @@ network_policies: "x-model-id".to_string(), ], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }], vec![], ); @@ -5022,6 +5024,8 @@ network_policies: default_headers: vec![], passthrough_headers: vec![], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, } } @@ -6649,40 +6653,4 @@ network_policies: } } } - - #[test] - fn test_emit_denial_enqueues_denial_event() { - let (tx, mut rx) = mpsc::unbounded_channel::(); - let decision = ConnectDecision { - action: NetworkAction::Deny { - reason: "no matching policy".into(), - }, - generation: 0, - binary: Some(PathBuf::from("/usr/bin/curl")), - binary_pid: Some(1234), - ancestors: vec![], - cmdline_paths: vec![], - }; - - emit_denial( - &Some(tx), - "blocked.invalid", - 443, - "/usr/bin/curl", - &decision, - "no matching policy", - "connect", - ); - - let event = rx - .try_recv() - .expect("DenialEvent should be enqueued after L4 deny"); - assert_eq!(event.host, "blocked.invalid"); - assert_eq!(event.port, 443); - assert_eq!(event.binary, "/usr/bin/curl"); - assert_eq!(event.denial_stage, "connect"); - assert_eq!(event.deny_reason, "no matching policy"); - assert!(event.l7_method.is_none()); - assert!(event.l7_path.is_none()); - } } diff --git a/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs b/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs index 1044623f5..675b60b24 100644 --- a/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs +++ b/crates/openshell-sandbox/src/sandbox/linux/seccomp.rs @@ -11,6 +11,27 @@ //! 3. **Conditional syscall blocks** -- block dangerous flag combinations on otherwise //! needed syscalls (`execveat+AT_EMPTY_PATH`, `unshare+CLONE_NEWUSER`, //! `seccomp+SET_MODE_FILTER`) +//! +//! ## `AF_NETLINK` policy +//! +//! `AF_NETLINK` sockets are allowed **only** for the `NETLINK_ROUTE` protocol +//! (protocol value 0). All other netlink protocols are blocked with `EPERM`. +//! +//! `NETLINK_ROUTE` is required by `getifaddrs(3)` on Linux (used by Node.js, +//! Python, Go, and many HTTP/gRPC client libraries during startup). Without it +//! those runtimes fail to enumerate network interfaces even when they have no +//! intent to modify them. +//! +//! The risk is contained by existing sandbox layers: +//! - **Privilege drop**: `CAP_NET_ADMIN` is not granted, so all write operations +//! (add/delete routes, addresses, interfaces) fail with `EPERM` regardless. +//! - **Network namespace**: the sandboxed process sees only `lo` and one veth; +//! no host interfaces are visible. +//! - **nftables bypass rules**: all non-proxy traffic is rejected at the +//! netfilter level regardless of what the sandbox learns about its interfaces. +//! +//! Every other netlink protocol (`NETLINK_SOCK_DIAG`, `NETLINK_NETFILTER`, +//! `NETLINK_AUDIT`, `NETLINK_XFRM`, `NETLINK_GENERIC`, etc.) remains blocked. use crate::policy::{NetworkMode, SandboxPolicy}; use miette::{IntoDiagnostic, Result}; @@ -168,7 +189,8 @@ fn build_filter_rules(allow_inet: bool) -> Result libc::AF_PACKET, libc::AF_BLUETOOTH, libc::AF_VSOCK, - libc::AF_NETLINK, + // AF_NETLINK is handled separately below: NETLINK_ROUTE (protocol 0) + // is allowed for getifaddrs(3); all other netlink protocols are blocked. ]; if !allow_inet { blocked_domains.push(libc::AF_INET); @@ -180,6 +202,18 @@ fn build_filter_rules(allow_inet: bool) -> Result add_socket_domain_rule(&mut rules, domain)?; } + // Allow AF_NETLINK only for NETLINK_ROUTE (protocol 0). + // + // NETLINK_ROUTE is needed by getifaddrs(3) which is called by Node.js, + // Python, Go, and many HTTP/gRPC client libraries during startup to + // enumerate local network interfaces. Blocking it causes runtime errors + // such as "getifaddrs returned an error" in tools like Claude Code. + // + // The rule blocks socket(AF_NETLINK, *, protocol) for any protocol != 0. + // Write operations via NETLINK_ROUTE still require CAP_NET_ADMIN, which + // the sandbox does not grant, so interface/route modification is not possible. + add_netlink_non_route_rule(&mut rules)?; + // --- Unconditional syscall blocks --- // These syscalls are blocked entirely (empty rule vec = unconditional EPERM). @@ -272,6 +306,39 @@ fn add_socket_domain_rule(rules: &mut BTreeMap>, domain: i Ok(()) } +/// Block `socket(AF_NETLINK, *, protocol)` for every protocol except +/// `NETLINK_ROUTE` (protocol 0). +/// +/// Two AND'd conditions are required: +/// - arg0 == `AF_NETLINK` (domain) +/// - arg2 != 0 (protocol is not `NETLINK_ROUTE`) +/// +/// A seccomp rule fires (and returns EPERM) only when **all** conditions +/// match, so this rule is triggered for any `socket(AF_NETLINK, *, non-zero)` +/// call while leaving `socket(AF_NETLINK, *, 0)` (`NETLINK_ROUTE`) through. +#[allow(clippy::cast_sign_loss)] +fn add_netlink_non_route_rule(rules: &mut BTreeMap>) -> Result<()> { + let domain_condition = SeccompCondition::new( + 0, // domain argument + SeccompCmpArgLen::Dword, + SeccompCmpOp::Eq, + libc::AF_NETLINK as u64, + ) + .into_diagnostic()?; + + let protocol_condition = SeccompCondition::new( + 2, // protocol argument + SeccompCmpArgLen::Dword, + SeccompCmpOp::Ne, + 0, // NETLINK_ROUTE = 0 + ) + .into_diagnostic()?; + + let rule = SeccompRule::new(vec![domain_condition, protocol_condition]).into_diagnostic()?; + rules.entry(libc::SYS_socket).or_default().push(rule); + Ok(()) +} + /// Block a syscall when a specific bit pattern is set in an argument. /// /// Uses `MaskedEq` to check `(arg & flag_bit) == flag_bit`, which triggers @@ -412,6 +479,27 @@ mod tests { } } + #[test] + fn netlink_socket_rules_are_conditional_not_unconditional() { + // SYS_socket must appear in the rules map (for domain blocks and the + // AF_NETLINK+non-ROUTE filter), but it must NOT be an unconditional block + // (empty Vec). An empty Vec would block ALL socket() calls, including + // socket(AF_NETLINK, *, NETLINK_ROUTE=0) which getifaddrs(3) needs. + let filter_rules = build_filter_rules(true).unwrap(); + + assert!( + filter_rules.contains_key(&libc::SYS_socket), + "SYS_socket should be in the rules map (domain blocks present)" + ); + + // The Vec for SYS_socket must be non-empty (rules are + // conditional), which is the opposite of an unconditional block. + assert!( + !filter_rules[&libc::SYS_socket].is_empty(), + "SYS_socket should have conditional rules, not an unconditional block" + ); + } + #[test] fn supervisor_prelude_blocks_expected_syscalls() { let filter_rules = build_supervisor_prelude_rules(); @@ -679,4 +767,75 @@ mod tests { "additional seccomp filter installation should be blocked after startup" ); } + + #[test] + fn behavioral_netlink_route_allowed() { + // socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE=0) must succeed (not blocked). + // This is the call getifaddrs(3) makes on Linux to enumerate interfaces. + let filter = build_filter(true).unwrap(); + let pid = unsafe { libc::fork() }; + assert!(pid >= 0, "fork failed"); + if pid == 0 { + unsafe { + libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + apply_filter(&filter).expect("apply_filter"); + // NETLINK_ROUTE = 0 + let fd = libc::socket(libc::AF_NETLINK, libc::SOCK_RAW, 0); + if fd >= 0 { + libc::close(fd); + libc::_exit(0); + } else { + let errno = *libc::__errno_location(); + let msg = format!( + "socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE): expected success, got errno={errno}\n" + ); + libc::write(2, msg.as_ptr().cast(), msg.len()); + libc::_exit(1); + } + } + } + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid, &mut status, 0) }; + assert!( + unsafe { libc::WIFEXITED(status) && libc::WEXITSTATUS(status) == 0 }, + "socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) should be allowed for getifaddrs(3)" + ); + } + + #[test] + fn behavioral_netlink_non_route_blocked() { + // socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG=4) must be blocked. + // NETLINK_SOCK_DIAG is representative of non-ROUTE netlink protocols + // that have no legitimate use inside the sandbox. + let filter = build_filter(true).unwrap(); + let pid = unsafe { libc::fork() }; + assert!(pid >= 0, "fork failed"); + if pid == 0 { + unsafe { + libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + apply_filter(&filter).expect("apply_filter"); + // NETLINK_SOCK_DIAG = 4 + let fd = libc::socket(libc::AF_NETLINK, libc::SOCK_RAW, 4); + let errno = *libc::__errno_location(); + if fd == -1 && errno == libc::EPERM { + libc::_exit(0); + } else { + if fd >= 0 { + libc::close(fd); + } + let msg = format!( + "socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG): expected EPERM, got fd={fd} errno={errno}\n" + ); + libc::write(2, msg.as_ptr().cast(), msg.len()); + libc::_exit(1); + } + } + } + let mut status: libc::c_int = 0; + unsafe { libc::waitpid(pid, &mut status, 0) }; + assert!( + unsafe { libc::WIFEXITED(status) && libc::WEXITSTATUS(status) == 0 }, + "socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG) should be blocked with EPERM" + ); + } } diff --git a/crates/openshell-sandbox/tests/system_inference.rs b/crates/openshell-sandbox/tests/system_inference.rs index 20c39f3b6..ee090a255 100644 --- a/crates/openshell-sandbox/tests/system_inference.rs +++ b/crates/openshell-sandbox/tests/system_inference.rs @@ -22,6 +22,8 @@ fn make_system_route() -> ResolvedRoute { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, } } @@ -36,6 +38,8 @@ fn make_user_route() -> ResolvedRoute { default_headers: Vec::new(), passthrough_headers: Vec::new(), timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, } } @@ -133,6 +137,8 @@ async fn system_inference_with_anthropic_protocol() { "anthropic-beta".to_string(), ], timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path: false, + request_path_override: None, }; let ctx = InferenceContext::new(patterns, router, vec![], vec![system_route]); diff --git a/crates/openshell-server/src/grpc/provider.rs b/crates/openshell-server/src/grpc/provider.rs index 3ddaae037..2d660a834 100644 --- a/crates/openshell-server/src/grpc/provider.rs +++ b/crates/openshell-server/src/grpc/provider.rs @@ -440,6 +440,14 @@ pub(super) async fn resolve_provider_environment( .ok_or_else(|| Status::failed_precondition(format!("provider '{name}' not found")))?; for (key, value) in &provider.credentials { + if is_non_injectable_provider_credential(&provider, key) { + warn!( + provider_name = %name, + key = %key, + "skipping non-injectable provider credential" + ); + continue; + } if is_valid_env_key(key) { let expires_at_ms = provider .credential_expires_at_ms @@ -467,6 +475,55 @@ pub(super) async fn resolve_provider_environment( ); } } + + // For Vertex AI providers, inject agent-specific config env vars so that + // Claude Code, Goose, and OpenCode inside the sandbox can reach Vertex AI + // without additional configuration. Credentials from the loop above take + // precedence via entry().or_insert(), and sandbox --env overrides are + // applied at the process level after this environment is installed, so + // they naturally shadow these values. + if openshell_core::inference::normalize_inference_provider_type(&provider.r#type) + == Some("google-vertex-ai") + { + let project_id = provider + .config + .get(openshell_core::inference::VERTEX_AI_PROJECT_ID_KEY) + .map(String::as_str) + .unwrap_or_default() + .trim(); + let region = provider + .config + .get(openshell_core::inference::VERTEX_AI_REGION_KEY) + .map(String::as_str) + .unwrap_or_default() + .trim(); + + // Static flags -- always present for Vertex AI providers. + env.entry("CLAUDE_CODE_USE_VERTEX".to_string()) + .or_insert_with(|| "1".to_string()); + env.entry("GOOSE_PROVIDER".to_string()) + .or_insert_with(|| "gcp_vertex_ai".to_string()); + + // Project ID derived vars. + if !project_id.is_empty() { + env.entry("ANTHROPIC_VERTEX_PROJECT_ID".to_string()) + .or_insert_with(|| project_id.to_string()); + env.entry("GCP_PROJECT_ID".to_string()) + .or_insert_with(|| project_id.to_string()); + env.entry("GOOGLE_CLOUD_PROJECT".to_string()) + .or_insert_with(|| project_id.to_string()); + } + + // Region derived vars. + if !region.is_empty() { + env.entry("CLOUD_ML_REGION".to_string()) + .or_insert_with(|| region.to_string()); + env.entry("GCP_LOCATION".to_string()) + .or_insert_with(|| region.to_string()); + env.entry("VERTEX_LOCATION".to_string()) + .or_insert_with(|| region.to_string()); + } + } } Ok(ProviderEnvironment { @@ -587,6 +644,7 @@ fn active_provider_credential_keys(provider: &Provider, now_ms: i64) -> Vec Vec bool { + openshell_core::inference::normalize_inference_provider_type(&provider.r#type) + == Some("google-vertex-ai") + && key == "GOOGLE_SERVICE_ACCOUNT_KEY" +} + pub(super) fn is_valid_env_key(key: &str) -> bool { let mut bytes = key.bytes(); let Some(first) = bytes.next() else { @@ -893,17 +957,7 @@ async fn provider_type_allows_empty_credentials_for_refresh( let Some(profile) = get_provider_type_profile(store, provider_type).await? else { return Ok(false); }; - let required_credentials = profile - .credentials - .iter() - .filter(|credential| credential.required) - .collect::>(); - Ok(!required_credentials.is_empty() - && required_credentials.iter().all(|credential| { - credential.refresh.as_ref().is_some_and(|refresh| { - crate::provider_refresh::is_gateway_mintable_strategy(refresh.strategy) - }) - })) + Ok(profile.allows_gateway_refresh_bootstrap()) } async fn merged_provider_profiles(store: &Store) -> Result, Status> { @@ -1617,7 +1671,10 @@ mod tests { .iter() .map(|profile| profile.id.as_str()) .collect::>(); - assert_eq!(ids, vec!["claude-code", "github", "nvidia"]); + assert_eq!( + ids, + vec!["claude-code", "github", "google-vertex-ai", "nvidia",] + ); let github = response .profiles @@ -1738,13 +1795,15 @@ mod tests { #[tokio::test] async fn import_provider_profile_allows_legacy_provider_type_ids_without_built_in_profiles() { + // Use an ID that is not a built-in profile to test legacy import. + // "custom-llm" is not registered as a built-in and never will be. let state = test_server_state().await; let response = handle_import_provider_profiles( &state, Request::new(ImportProviderProfilesRequest { profiles: vec![ProviderProfileImportItem { - profile: Some(custom_profile("codex")), - source: "codex.yaml".to_string(), + profile: Some(custom_profile("custom-llm")), + source: "custom-llm.yaml".to_string(), }], }), ) @@ -1758,15 +1817,15 @@ mod tests { let imported = handle_get_provider_profile( &state, Request::new(GetProviderProfileRequest { - id: "codex".to_string(), + id: "custom-llm".to_string(), }), ) .await .unwrap() .into_inner() .profile - .expect("codex profile should be returned"); - assert_eq!(imported.id, "codex"); + .expect("custom-llm profile should be returned"); + assert_eq!(imported.id, "custom-llm"); } #[tokio::test] @@ -2184,6 +2243,68 @@ mod tests { ); } + #[tokio::test] + async fn configure_provider_refresh_accepts_vertex_service_account_token_key() { + let state = test_server_state().await; + create_provider_record( + state.store.as_ref(), + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-sa".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: std::iter::once(( + "GOOGLE_SERVICE_ACCOUNT_KEY".to_string(), + "{\"type\":\"service_account\"}".to_string(), + )) + .collect(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let response = handle_configure_provider_refresh( + &state, + Request::new(ConfigureProviderRefreshRequest { + provider: "vertex-sa".to_string(), + credential_key: "GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN".to_string(), + strategy: ProviderCredentialRefreshStrategy::GoogleServiceAccountJwt as i32, + material: HashMap::from([ + ( + "client_email".to_string(), + "sa@test-project.iam.gserviceaccount.com".to_string(), + ), + ( + "private_key".to_string(), + "-----BEGIN PRIVATE KEY-----\nkey\n-----END PRIVATE KEY-----".to_string(), + ), + ]), + secret_material_keys: vec!["private_key".to_string()], + expires_at_ms: None, + }), + ) + .await + .unwrap() + .into_inner() + .status + .expect("status"); + + assert_eq!( + response.credential_key, + "GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN" + ); + assert_eq!( + response.strategy, + ProviderCredentialRefreshStrategy::GoogleServiceAccountJwt as i32 + ); + } + #[tokio::test] async fn delete_provider_refresh_preserves_manually_updated_expiry() { let state = test_server_state().await; @@ -3053,6 +3174,26 @@ mod tests { .unwrap(); assert!(optional_static_empty.credentials.is_empty()); + let vertex_empty = create_provider_record( + store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-no-token-yet".to_string(), + created_at_ms: 1_000_000, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: HashMap::new(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + assert!(vertex_empty.credentials.is_empty()); + let get_err = get_provider_record(store, "").await.unwrap_err(); assert_eq!(get_err.code(), Code::InvalidArgument); @@ -3503,6 +3644,265 @@ mod tests { assert!(err.message().contains("provider-b")); } + #[tokio::test] + async fn resolve_provider_env_injects_vertex_agent_config() { + let store = test_store().await; + create_provider_record( + &store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-local".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: std::iter::once(( + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "ya29.token".to_string(), + )) + .collect(), + config: [ + ( + "VERTEX_AI_PROJECT_ID".to_string(), + "my-gcp-project".to_string(), + ), + ("VERTEX_AI_REGION".to_string(), "us-central1".to_string()), + ] + .into_iter() + .collect(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["vertex-local".to_string()]) + .await + .unwrap(); + + // Credential still injected. + assert_eq!( + result.get("GOOGLE_VERTEX_AI_TOKEN"), + Some(&"ya29.token".to_string()) + ); + // Static flags. + assert_eq!(result.get("CLAUDE_CODE_USE_VERTEX"), Some(&"1".to_string())); + assert_eq!( + result.get("GOOSE_PROVIDER"), + Some(&"gcp_vertex_ai".to_string()) + ); + // Project ID derived vars. + assert_eq!( + result.get("ANTHROPIC_VERTEX_PROJECT_ID"), + Some(&"my-gcp-project".to_string()) + ); + assert_eq!( + result.get("GCP_PROJECT_ID"), + Some(&"my-gcp-project".to_string()) + ); + assert_eq!( + result.get("GOOGLE_CLOUD_PROJECT"), + Some(&"my-gcp-project".to_string()) + ); + // Region derived vars. + assert_eq!( + result.get("CLOUD_ML_REGION"), + Some(&"us-central1".to_string()) + ); + assert_eq!(result.get("GCP_LOCATION"), Some(&"us-central1".to_string())); + assert_eq!( + result.get("VERTEX_LOCATION"), + Some(&"us-central1".to_string()) + ); + } + + #[tokio::test] + async fn resolve_provider_env_vertex_never_injects_service_account_key() { + let store = test_store().await; + create_provider_record( + &store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-bootstrap".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: [ + ( + "GOOGLE_SERVICE_ACCOUNT_KEY".to_string(), + r#"{"type":"service_account","private_key":"secret"}"#.to_string(), + ), + ( + "GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN".to_string(), + "ya29.short-lived".to_string(), + ), + ] + .into_iter() + .collect(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["vertex-bootstrap".to_string()]) + .await + .unwrap(); + + assert!(!result.contains_key("GOOGLE_SERVICE_ACCOUNT_KEY")); + assert_eq!( + result.get("GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN"), + Some(&"ya29.short-lived".to_string()) + ); + } + + #[tokio::test] + async fn resolve_provider_env_vertex_omits_agent_config_when_project_and_region_absent() { + let store = test_store().await; + create_provider_record( + &store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-no-config".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: std::iter::once(( + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "ya29.token".to_string(), + )) + .collect(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["vertex-no-config".to_string()]) + .await + .unwrap(); + + // Static flags still present. + assert_eq!(result.get("CLAUDE_CODE_USE_VERTEX"), Some(&"1".to_string())); + assert_eq!( + result.get("GOOSE_PROVIDER"), + Some(&"gcp_vertex_ai".to_string()) + ); + // Project ID and region derived vars are absent. + assert!(!result.contains_key("ANTHROPIC_VERTEX_PROJECT_ID")); + assert!(!result.contains_key("GCP_PROJECT_ID")); + assert!(!result.contains_key("GOOGLE_CLOUD_PROJECT")); + assert!(!result.contains_key("CLOUD_ML_REGION")); + assert!(!result.contains_key("GCP_LOCATION")); + assert!(!result.contains_key("VERTEX_LOCATION")); + } + + #[tokio::test] + async fn resolve_provider_env_vertex_credential_wins_over_agent_config_key() { + // If a credential happens to share a name with one of the injected agent + // config keys, the credential value takes precedence because the credential + // loop runs first and entry().or_insert() does not overwrite. + let store = test_store().await; + create_provider_record( + &store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "vertex-collision".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: [ + ( + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "ya29.token".to_string(), + ), + // Same key as the injected static flag. + ( + "CLAUDE_CODE_USE_VERTEX".to_string(), + "custom-value".to_string(), + ), + ] + .into_iter() + .collect(), + config: [ + ("VERTEX_AI_PROJECT_ID".to_string(), "my-project".to_string()), + ("VERTEX_AI_REGION".to_string(), "us-east1".to_string()), + ] + .into_iter() + .collect(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["vertex-collision".to_string()]) + .await + .unwrap(); + + // Credential value wins over the injected "1". + assert_eq!( + result.get("CLAUDE_CODE_USE_VERTEX"), + Some(&"custom-value".to_string()) + ); + // Other injected vars are still present. + assert_eq!( + result.get("GOOSE_PROVIDER"), + Some(&"gcp_vertex_ai".to_string()) + ); + } + + #[tokio::test] + async fn resolve_provider_env_non_vertex_provider_does_not_inject_agent_config() { + let store = test_store().await; + create_provider_record( + &store, + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: String::new(), + name: "openai-local".to_string(), + created_at_ms: 0, + labels: HashMap::new(), + resource_version: 0, + }), + r#type: "openai".to_string(), + credentials: std::iter::once(("OPENAI_API_KEY".to_string(), "sk-test".to_string())) + .collect(), + config: HashMap::new(), + credential_expires_at_ms: HashMap::new(), + }, + ) + .await + .unwrap(); + + let result = resolve_provider_environment(&store, &["openai-local".to_string()]) + .await + .unwrap(); + + assert_eq!(result.get("OPENAI_API_KEY"), Some(&"sk-test".to_string())); + assert!(!result.contains_key("CLAUDE_CODE_USE_VERTEX")); + assert!(!result.contains_key("GOOSE_PROVIDER")); + assert!(!result.contains_key("ANTHROPIC_VERTEX_PROJECT_ID")); + assert!(!result.contains_key("GCP_PROJECT_ID")); + assert!(!result.contains_key("GOOGLE_CLOUD_PROJECT")); + assert!(!result.contains_key("CLOUD_ML_REGION")); + assert!(!result.contains_key("GCP_LOCATION")); + assert!(!result.contains_key("VERTEX_LOCATION")); + } + #[tokio::test] async fn update_provider_rejects_credential_key_collision_for_attached_sandbox() { let store = test_store().await; diff --git a/crates/openshell-server/src/inference.rs b/crates/openshell-server/src/inference.rs index 183a80e74..9867b2506 100644 --- a/crates/openshell-server/src/inference.rs +++ b/crates/openshell-server/src/inference.rs @@ -4,12 +4,16 @@ #![allow(clippy::result_large_err)] // gRPC handlers return Result, Status> use openshell_core::ObjectId; +use openshell_core::inference::{ + VERTEX_AI_PROJECT_ID_KEY, VERTEX_AI_PUBLISHER_KEY, VERTEX_AI_REGION_KEY, +}; use openshell_core::proto::{ ClusterInferenceConfig, GetClusterInferenceRequest, GetClusterInferenceResponse, GetInferenceBundleRequest, GetInferenceBundleResponse, InferenceRoute, Provider, ResolvedRoute, SetClusterInferenceRequest, SetClusterInferenceResponse, ValidatedEndpoint, inference_server::Inference, }; +use openshell_providers::normalize_provider_type; use openshell_router::config::ResolvedRoute as RouterResolvedRoute; use openshell_router::{ValidationFailureKind, verify_backend_endpoint}; use prost::Message as _; @@ -167,7 +171,7 @@ async fn upsert_cluster_inference_route( Status::failed_precondition(format!("provider '{provider_name}' not found")) })?; - let resolved = resolve_provider_route(&provider)?; + let resolved = resolve_provider_route(&provider, model_id)?; let validation = if verify { vec![verify_provider_endpoint(provider.object_name(), model_id, &resolved).await?] } else { @@ -243,6 +247,7 @@ fn build_cluster_inference_config( } } +#[derive(Debug)] struct ResolvedProviderRoute { provider_type: String, route: RouterResolvedRoute, @@ -254,24 +259,397 @@ struct UpsertedInferenceRoute { validation: Vec, } -fn resolve_provider_route(provider: &Provider) -> Result { - let provider_type = provider.r#type.trim().to_ascii_lowercase(); +/// Infer the Vertex AI publisher segment from a model identifier. +/// +/// Currently only the `"anthropic"` result is consumed by +/// `resolve_vertex_ai_route` to select between the native Anthropic +/// Messages API (`rawPredict`) and the OpenAI-compatible endpoint. +/// Non-Anthropic publisher mappings (`meta`, `mistralai`, `ai21`, +/// `deepseek`, `google`) are maintained for forward compatibility +/// and documentation value — all non-Anthropic models route to the +/// same OpenAI-compatible endpoint regardless of publisher. +/// +/// Returns `None` for unrecognized models, which causes resolution to +/// fall back to the OpenAI-compatible endpoint +/// (`v1beta1/.../endpoints/openapi`). +fn infer_vertex_publisher(model_id: &str) -> Option<&'static str> { + if model_id.starts_with("claude-") { + Some("anthropic") + } else if model_id.starts_with("gemini-") + || model_id.starts_with("text-bison-") + || model_id.starts_with("chat-bison-") + { + Some("google") + } else if model_id.starts_with("llama-") { + Some("meta") + } else if model_id.starts_with("mistral-") || model_id.starts_with("codestral-") { + Some("mistralai") + } else if model_id.starts_with("jamba-") { + Some("ai21") + } else if model_id.starts_with("deepseek-") { + Some("deepseek") + } else { + None + } +} + +/// Return a required Vertex AI config value, or a `FailedPrecondition` status. +fn required_vertex_config<'a>( + config: &'a std::collections::HashMap, + key: &str, +) -> Result<&'a str, Status> { + config + .get(key) + .map(String::as_str) + .filter(|v| !v.trim().is_empty()) + .ok_or_else(|| { + Status::failed_precondition(format!("Vertex AI provider requires {key} config")) + }) +} + +/// Validate a GCP project ID against the documented format. +/// +/// GCP project IDs must be 6–30 characters, start with a lowercase letter, +/// contain only lowercase letters, digits, and hyphens, and not end with a hyphen. +fn validate_gcp_project_id(value: &str) -> Result<(), Status> { + let valid = value.len() >= 6 + && value.len() <= 30 + && value.starts_with(|c: char| c.is_ascii_lowercase()) + && !value.ends_with('-') + && value + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-'); + if valid { + Ok(()) + } else { + Err(Status::invalid_argument(format!( + "VERTEX_AI_PROJECT_ID has invalid format: {value:?}. \ + GCP project IDs must be 6-30 characters, start with a lowercase letter, \ + contain only lowercase letters, digits, and hyphens, and not end with a hyphen." + ))) + } +} + +/// Validate a GCP region/location value. +/// +/// Accepts the special keywords `global`, `us`, and `eu`, plus standard +/// regional patterns like `us-central1`, `europe-west4`, `us-east4-a`. +fn validate_gcp_region(value: &str) -> Result<(), Status> { + let lower = value.trim().to_ascii_lowercase(); + let valid = matches!(lower.as_str(), "global" | "us" | "eu") + || (lower + .chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '-') + && lower.contains('-') + && !lower.starts_with('-') + && !lower.ends_with('-')); + if valid { + Ok(()) + } else { + Err(Status::invalid_argument(format!( + "VERTEX_AI_REGION has invalid format: {value:?}. \ + Expected a GCP region (e.g. us-central1, europe-west4) \ + or one of: global, us, eu." + ))) + } +} + +/// Resolve the Vertex AI API host and normalized location from a configured region. +fn vertex_location_and_host(region: &str) -> (String, String) { + let location = region.trim().to_ascii_lowercase(); + let host = match location.as_str() { + "global" => "aiplatform.googleapis.com".to_string(), + "us" | "eu" => format!("aiplatform.{location}.rep.googleapis.com"), + _ => format!("{location}-aiplatform.googleapis.com"), + }; + (location, host) +} + +fn validate_vertex_model_id(value: &str) -> Result<(), Status> { + let trimmed = value.trim(); + if trimmed.is_empty() { + return Err(Status::invalid_argument("model_id is required")); + } + if value != trimmed { + return Err(Status::invalid_argument(format!( + "Vertex AI model_id must not include leading or trailing whitespace: {value:?}" + ))); + } + if value.contains('/') || value.contains('\\') { + return Err(Status::invalid_argument(format!( + "Vertex AI model_id must not contain path separators: {value:?}" + ))); + } + if value.chars().any(|c| matches!(c, '?' | '#' | '%')) { + return Err(Status::invalid_argument(format!( + "Vertex AI model_id must not contain URL delimiters or percent escapes: {value:?}" + ))); + } + if value.contains("..") { + return Err(Status::invalid_argument(format!( + "Vertex AI model_id must not contain traversal segments: {value:?}" + ))); + } + if value.chars().any(|c| c.is_control() || c.is_whitespace()) { + return Err(Status::invalid_argument(format!( + "Vertex AI model_id must not contain whitespace or control characters: {value:?}" + ))); + } + Ok(()) +} + +fn is_allowed_vertex_override_host(host: &str) -> bool { + matches!( + host, + "aiplatform.googleapis.com" + | "aiplatform.us.rep.googleapis.com" + | "aiplatform.eu.rep.googleapis.com" + ) || host.ends_with("-aiplatform.googleapis.com") +} + +fn validate_vertex_base_url(value: &str) -> Result { + let trimmed = value.trim(); + let url = url::Url::parse(trimmed).map_err(|err| { + Status::invalid_argument(format!("Vertex AI base URL override is invalid: {err}")) + })?; + + if url.scheme() != "https" { + return Err(Status::invalid_argument( + "Vertex AI base URL override must use https".to_string(), + )); + } + if !url.username().is_empty() || url.password().is_some() { + return Err(Status::invalid_argument( + "Vertex AI base URL override must not include userinfo".to_string(), + )); + } + if url.query().is_some() || url.fragment().is_some() { + return Err(Status::invalid_argument( + "Vertex AI base URL override must not include query or fragment components".to_string(), + )); + } + if let Some(port) = url.port() + && port != 443 + { + return Err(Status::invalid_argument(format!( + "Vertex AI base URL override must use port 443 when an explicit port is set, got {port}" + ))); + } + + match url.host() { + Some(url::Host::Domain(host)) if is_allowed_vertex_override_host(host) => {} + Some(url::Host::Domain(host)) => { + return Err(Status::invalid_argument(format!( + "Vertex AI base URL override must target an official Vertex AI hostname, got {host:?}" + ))); + } + Some(url::Host::Ipv4(_) | url::Host::Ipv6(_)) => { + return Err(Status::invalid_argument(format!( + "Vertex AI base URL override must not use IP literal hosts: {}", + url.host_str().unwrap_or("") + ))); + } + None => { + return Err(Status::invalid_argument( + "Vertex AI base URL override must include a host".to_string(), + )); + } + } + + Ok(trimmed.to_string()) +} + +/// Build a [`RouterResolvedRoute`] for Vertex AI without duplicating the 15-field struct. +#[allow(clippy::too_many_arguments)] +fn build_vertex_route( + route_name: &str, + endpoint: String, + model_id: &str, + api_key: &str, + protocols: Vec, + profile: &openshell_core::inference::InferenceProviderProfile, + model_in_path: bool, + request_path_override: Option, +) -> RouterResolvedRoute { + RouterResolvedRoute { + name: route_name.to_string(), + endpoint, + model: model_id.to_string(), + api_key: api_key.to_string(), + protocols, + auth: profile.auth.clone(), + default_headers: profile + .default_headers + .iter() + .map(|(k, v)| ((*k).to_string(), (*v).to_string())) + .collect(), + passthrough_headers: profile + .passthrough_headers + .iter() + .map(|p| (*p).to_string()) + .collect(), + timeout: openshell_router::config::DEFAULT_ROUTE_TIMEOUT, + model_in_path, + request_path_override, + } +} + +/// Resolve a Vertex AI route given provider config, model, and bearer token. +fn resolve_vertex_ai_route( + config: &std::collections::HashMap, + model_id: &str, + route_name: &str, + api_key: &str, + profile: &openshell_core::inference::InferenceProviderProfile, +) -> Result { + // Validate model_id early — it appears in URL paths for Anthropic routes + // and in JSON request bodies for all routes. Rejecting path separators, + // traversal segments, and control characters up front is defense-in-depth. + validate_vertex_model_id(model_id)?; + + // Determine if this is an Anthropic model. + // Explicit VERTEX_AI_PUBLISHER=anthropic overrides inference. + // All non-Anthropic models route to the OpenAI-compatible endpoint. + let explicit_publisher = config + .get(VERTEX_AI_PUBLISHER_KEY) + .map(String::as_str) + .filter(|v| !v.trim().is_empty()); + + let is_anthropic = explicit_publisher.map_or_else( + || infer_vertex_publisher(model_id) == Some("anthropic"), + |p| p.eq_ignore_ascii_case("anthropic"), + ); + + // Escape hatch: caller-supplied full base URL still uses the model-derived + // protocol and path contract, but only for the OpenAI-compatible Vertex surface. + // Anthropic-on-Vertex needs model-path shaping and body adaptation that a fully + // caller-controlled URL cannot safely preserve. + if let Some(base_url) = config + .get(profile.base_url_config_keys[0]) + .or_else(|| config.get(profile.base_url_config_keys[1])) + .map(String::as_str) + .filter(|v| !v.trim().is_empty()) + { + if is_anthropic { + return Err(Status::invalid_argument( + "Vertex AI base URL overrides are not supported for Anthropic models. \ + Remove GOOGLE_VERTEX_AI_BASE_URL / VERTEX_AI_BASE_URL and configure \ + VERTEX_AI_PROJECT_ID + VERTEX_AI_REGION instead." + .to_string(), + )); + } + let base_url = validate_vertex_base_url(base_url)?; + + return Ok(build_vertex_route( + route_name, + base_url, + model_id, + api_key, + vec!["openai_chat_completions".to_string()], + profile, + false, + Some("/chat/completions".to_string()), + )); + } + + let project = required_vertex_config(config, VERTEX_AI_PROJECT_ID_KEY)?; + validate_gcp_project_id(project)?; + let region = config + .get(VERTEX_AI_REGION_KEY) + .map(String::as_str) + .filter(|v| !v.trim().is_empty()) + .unwrap_or("us-central1"); + validate_gcp_region(region)?; + let (location, host) = vertex_location_and_host(region); + + if is_anthropic { + // Native Anthropic Messages API via rawPredict. + // model_id is NOT embedded in the endpoint — it is carried in route.model + // and appended with the suffix by build_provider_url(). The router upgrades + // `:rawPredict` to `:streamRawPredict` only for streaming proxy calls. + let endpoint = format!( + "https://{host}/v1/projects/{project}/locations/{location}/publishers/anthropic/models" + ); + let protocols = vec!["anthropic_messages".to_string()]; + Ok(build_vertex_route( + route_name, + endpoint, + model_id, + api_key, + protocols, + profile, + true, + Some(":rawPredict".to_string()), + )) + } else { + // OpenAI-compatible endpoint for all non-Anthropic models + // (Gemini, Llama, Mistral, unknown, etc.). Vertex's OpenAI-compatible + // surface uses `/chat/completions` under the `.../endpoints/openapi` + // base, so we pin the route to that path instead of appending the + // router's default `/v1/...` protocol path. + let endpoint = format!( + "https://{host}/v1beta1/projects/{project}/locations/{location}/endpoints/openapi" + ); + let protocols = vec!["openai_chat_completions".to_string()]; + Ok(build_vertex_route( + route_name, + endpoint, + model_id, + api_key, + protocols, + profile, + false, + Some("/chat/completions".to_string()), + )) + } +} + +fn resolve_provider_route( + provider: &Provider, + model_id: &str, +) -> Result { + let raw_provider_type = provider.r#type.trim(); + let provider_type = normalize_provider_type(raw_provider_type) + .map_or_else(|| raw_provider_type.to_ascii_lowercase(), str::to_string); let profile = openshell_core::inference::profile_for(&provider_type).ok_or_else(|| { Status::invalid_argument(format!( - "provider '{name}' has unsupported type '{provider_type}' for cluster inference \ - (supported: openai, anthropic, nvidia)", + "provider '{name}' has unsupported type '{raw_provider_type}' for cluster inference \ + (supported: openai, anthropic, nvidia, google-vertex-ai)", name = provider.object_name() )) })?; - let api_key = - find_provider_api_key(provider, profile.credential_key_names).ok_or_else(|| { - Status::invalid_argument(format!( - "provider '{name}' has no usable API key credential", - name = provider.object_name() - )) - })?; + let api_key = find_provider_api_key( + provider, + profile.credential_key_names, + if provider_type == "google-vertex-ai" { + CredentialLookup::PreferredOnly + } else { + CredentialLookup::PreferredThenAny + }, + ) + .ok_or_else(|| { + Status::invalid_argument(format!( + "provider '{name}' has no usable API key credential", + name = provider.object_name() + )) + })?; + + // Vertex AI requires a model-aware URL; delegate to specialised resolver. + if provider_type == "google-vertex-ai" { + let route = resolve_vertex_ai_route( + &provider.config, + model_id, + provider.object_name(), + &api_key, + profile, + )?; + return Ok(ResolvedProviderRoute { + provider_type, + route, + }); + } let base_url = find_provider_config_value(provider, profile.base_url_config_keys) .unwrap_or_else(|| profile.default_base_url.to_string()) @@ -290,7 +668,7 @@ fn resolve_provider_route(provider: &Provider) -> Result Result Option { +/// Controls whether `find_provider_api_key` is allowed to fall back to any +/// non-empty credential when the preferred key names produce no match. +/// +/// `PreferredOnly` is used for providers like Vertex AI where the fallback +/// would pick up JSON bootstrap material (e.g. service account keys) that +/// are not valid bearer tokens. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum CredentialLookup { + /// Only search `preferred_key_names`. Return `None` if none match. + PreferredOnly, + /// Search `preferred_key_names` first, then fall back to any non-empty credential. + PreferredThenAny, +} + +fn find_provider_api_key( + provider: &Provider, + preferred_key_names: &[&str], + lookup: CredentialLookup, +) -> Option { for key in preferred_key_names { if let Some(value) = provider.credentials.get(*key) && !value.trim().is_empty() @@ -382,6 +777,10 @@ fn find_provider_api_key(provider: &Provider, preferred_key_names: &[&str]) -> O } } + if lookup == CredentialLookup::PreferredOnly { + return None; + } + let mut keys = provider.credentials.keys().collect::>(); keys.sort(); for key in keys { @@ -450,6 +849,8 @@ async fn resolve_inference_bundle(store: &Store) -> Result 0); } + #[tokio::test] + async fn bundle_vertex_ai_anthropic_route_preserves_model_path_and_rawpredict() { + let store = test_store().await; + let config = [ + ( + "VERTEX_AI_PROJECT_ID".to_string(), + "my-gcp-project".to_string(), + ), + ("VERTEX_AI_REGION".to_string(), "us-central1".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-dev", config); + store + .put_message(&provider) + .await + .expect("persist provider"); + let route = make_route( + CLUSTER_INFERENCE_ROUTE_NAME, + "vertex-dev", + "claude-3-5-sonnet@20241022", + ); + store.put_message(&route).await.expect("persist route"); + + let resp = resolve_inference_bundle(&store) + .await + .expect("bundle should resolve"); + + assert_eq!(resp.routes.len(), 1); + let route = &resp.routes[0]; + assert_eq!(route.provider_type, "google-vertex-ai"); + assert_eq!(route.api_key, "ya29.test-token"); + assert_eq!(route.protocols, vec!["anthropic_messages"]); + assert!(route.model_in_path); + assert_eq!(route.request_path_override, Some(":rawPredict".to_string())); + assert_eq!(route.model_id, "claude-3-5-sonnet@20241022"); + assert_eq!( + route.base_url, + "https://us-central1-aiplatform.googleapis.com/v1/projects/my-gcp-project/locations/us-central1/publishers/anthropic/models" + ); + } + + #[tokio::test] + async fn bundle_vertex_ai_gemini_route_preserves_chat_completions_override() { + let store = test_store().await; + let config = [ + ( + "VERTEX_AI_PROJECT_ID".to_string(), + "my-gcp-project".to_string(), + ), + ("VERTEX_AI_REGION".to_string(), "us-central1".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-dev", config); + store + .put_message(&provider) + .await + .expect("persist provider"); + let route = make_route( + CLUSTER_INFERENCE_ROUTE_NAME, + "vertex-dev", + "gemini-2.0-flash-001", + ); + store.put_message(&route).await.expect("persist route"); + + let resp = resolve_inference_bundle(&store) + .await + .expect("bundle should resolve"); + + assert_eq!(resp.routes.len(), 1); + let route = &resp.routes[0]; + assert_eq!(route.provider_type, "google-vertex-ai"); + assert_eq!(route.api_key, "ya29.test-token"); + assert_eq!(route.protocols, vec!["openai_chat_completions"]); + assert!(!route.model_in_path); + assert_eq!( + route.request_path_override, + Some("/chat/completions".to_string()) + ); + assert_eq!(route.model_id, "gemini-2.0-flash-001"); + assert_eq!( + route.base_url, + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-gcp-project/locations/us-central1/endpoints/openapi" + ); + } + #[tokio::test] async fn bundle_without_cluster_route_returns_empty_routes() { let store = test_store().await; @@ -862,6 +1352,86 @@ mod tests { assert_eq!(config.model_id, "claude-sonnet-4-20250514"); } + #[tokio::test] + async fn upsert_cluster_inference_route_vertex_ai_anthropic_sets_model_in_path() { + let store = test_store().await; + + // Build a Vertex AI provider with the required config and a minted access token. + let provider = Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: "provider-vertex-test".to_string(), + name: "vertex-test".to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 0, + }), + r#type: "google-vertex-ai".to_string(), + credentials: std::iter::once(( + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "ya29.test-access-token".to_string(), + )) + .collect(), + config: [ + ( + "VERTEX_AI_PROJECT_ID".to_string(), + "my-gcp-project".to_string(), + ), + ("VERTEX_AI_REGION".to_string(), "us-central1".to_string()), + ] + .into_iter() + .collect(), + credential_expires_at_ms: std::collections::HashMap::new(), + }; + store + .put_message(&provider) + .await + .expect("persist provider"); + + let result = upsert_cluster_inference_route( + &store, + CLUSTER_INFERENCE_ROUTE_NAME, + "vertex-test", + "claude-3-5-sonnet@20241022", + 0, + false, // skip verification — no live endpoint + ) + .await + .expect("upsert should succeed for Vertex AI Anthropic model"); + + // Confirm the route was persisted with correct metadata + assert_eq!(result.route.object_name(), CLUSTER_INFERENCE_ROUTE_NAME); + let config = result.route.config.as_ref().expect("config"); + assert_eq!(config.provider_name, "vertex-test"); + assert_eq!(config.model_id, "claude-3-5-sonnet@20241022"); + + // Resolve the persisted route and assert Vertex AI Anthropic path contract + let resolved = resolve_route_by_name(&store, CLUSTER_INFERENCE_ROUTE_NAME) + .await + .expect("resolve should not fail") + .expect("route should exist after upsert"); + + assert!( + resolved.model_in_path, + "Anthropic-on-Vertex routes must set model_in_path=true" + ); + assert_eq!( + resolved.request_path_override, + Some(":rawPredict".to_string()), + "Anthropic-on-Vertex routes must persist the rawPredict suffix" + ); + assert_eq!(resolved.provider_type, "google-vertex-ai"); + assert!( + resolved.base_url.contains("publishers/anthropic/models"), + "endpoint must end with /publishers/anthropic/models, got: {}", + resolved.base_url + ); + assert!( + !resolved.base_url.contains("claude-3-5-sonnet"), + "model_id must not be embedded in the endpoint, got: {}", + resolved.base_url + ); + } + #[tokio::test] async fn bundle_includes_both_user_and_system_routes() { let store = test_store().await; @@ -1080,6 +1650,848 @@ mod tests { assert!(route.validation.is_empty()); } + // ------------------------------------------------------------------------- + // infer_vertex_publisher tests + // ------------------------------------------------------------------------- + + #[test] + fn infer_vertex_publisher_anthropic() { + assert_eq!( + infer_vertex_publisher("claude-3-5-sonnet@20241022"), + Some("anthropic") + ); + assert_eq!(infer_vertex_publisher("claude-opus-4"), Some("anthropic")); + } + + #[test] + fn infer_vertex_publisher_gemini() { + assert_eq!(infer_vertex_publisher("gemini-pro"), Some("google")); + assert_eq!(infer_vertex_publisher("gemini-1.5-flash"), Some("google")); + assert_eq!(infer_vertex_publisher("text-bison-001"), Some("google")); + assert_eq!(infer_vertex_publisher("chat-bison-001"), Some("google")); + } + + #[test] + fn infer_vertex_publisher_unknown() { + assert_eq!(infer_vertex_publisher("some-unknown-model"), None); + assert_eq!(infer_vertex_publisher("gpt-4o"), None); + } + + #[test] + fn infer_vertex_publisher_other_publishers() { + assert_eq!(infer_vertex_publisher("llama-3-70b"), Some("meta")); + assert_eq!(infer_vertex_publisher("mistral-large"), Some("mistralai")); + assert_eq!(infer_vertex_publisher("codestral-22b"), Some("mistralai")); + assert_eq!(infer_vertex_publisher("jamba-1.5-large"), Some("ai21")); + assert_eq!(infer_vertex_publisher("deepseek-r1"), Some("deepseek")); + } + + // ------------------------------------------------------------------------- + // resolve_vertex_ai_route tests + // ------------------------------------------------------------------------- + + fn make_vertex_provider_with_config( + name: &str, + config: std::collections::HashMap, + ) -> Provider { + Provider { + metadata: Some(openshell_core::proto::datamodel::v1::ObjectMeta { + id: format!("provider-{name}"), + name: name.to_string(), + created_at_ms: 1_000_000, + labels: std::collections::HashMap::new(), + resource_version: 1, + }), + r#type: "google-vertex-ai".to_string(), + credentials: std::iter::once(( + "GOOGLE_VERTEX_AI_TOKEN".to_string(), + "ya29.test-token".to_string(), + )) + .collect(), + config, + credential_expires_at_ms: std::collections::HashMap::new(), + } + } + + #[test] + fn resolve_vertex_ai_route_anthropic_model() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "my-project".to_string()), + ("VERTEX_AI_REGION".to_string(), "us-east1".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-dev", config); + + let resolved = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect("should resolve"); + + assert_eq!(resolved.provider_type, "google-vertex-ai"); + assert!(resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some(":rawPredict".to_string()) + ); + // model_id must NOT be embedded in the endpoint — it travels via route.model + assert!( + !resolved.route.endpoint.contains("claude-3-5-sonnet"), + "model_id must not be in endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + resolved + .route + .endpoint + .ends_with("/publishers/anthropic/models"), + "endpoint should end with /publishers/anthropic/models, got: {}", + resolved.route.endpoint + ); + assert!( + resolved + .route + .endpoint + .starts_with("https://us-east1-aiplatform.googleapis.com/"), + "expected regional Vertex host, got: {}", + resolved.route.endpoint + ); + assert!( + resolved.route.endpoint.contains("my-project"), + "expected project in URL" + ); + assert!( + resolved + .route + .protocols + .contains(&"anthropic_messages".to_string()), + "expected anthropic_messages protocol" + ); + assert_eq!(resolved.route.model, "claude-3-5-sonnet@20241022"); + } + + #[test] + fn resolve_vertex_ai_route_base_url_override() { + let config = std::iter::once(( + "VERTEX_AI_BASE_URL".to_string(), + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi".to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom", config); + + let resolved = resolve_provider_route(&provider, "any-model").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi" + ); + assert!(!resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + assert_eq!( + resolved.route.protocols, + vec!["openai_chat_completions".to_string()] + ); + assert_eq!(resolved.route.model, "any-model"); + } + + #[test] + fn resolve_vertex_ai_route_google_prefixed_base_url_override() { + // GOOGLE_VERTEX_AI_BASE_URL (the preferred key) must work on its own. + let config = std::iter::once(( + "GOOGLE_VERTEX_AI_BASE_URL".to_string(), + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global/endpoints/openapi".to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom-google", config); + + let resolved = resolve_provider_route(&provider, "any-model").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global/endpoints/openapi" + ); + assert!(!resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + } + + #[test] + fn resolve_vertex_ai_route_base_url_priority_google_wins() { + // When both override keys are set, GOOGLE_VERTEX_AI_BASE_URL takes priority. + let config = [ + ( + "GOOGLE_VERTEX_AI_BASE_URL".to_string(), + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global/endpoints/openapi".to_string(), + ), + ( + "VERTEX_AI_BASE_URL".to_string(), + "https://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi".to_string(), + ), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-priority", config); + + let resolved = resolve_provider_route(&provider, "any-model").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global/endpoints/openapi", + "GOOGLE_VERTEX_AI_BASE_URL must win over VERTEX_AI_BASE_URL" + ); + } + + #[test] + fn resolve_vertex_ai_route_base_url_override_rejects_anthropic_models() { + let config = std::iter::once(( + "GOOGLE_VERTEX_AI_BASE_URL".to_string(), + "https://aiplatform.googleapis.com/v1beta1/projects/my-project/locations/global/endpoints/openapi".to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom-anthropic", config); + + let err = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect_err("anthropic overrides should fail closed"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message() + .contains("base URL overrides are not supported") + ); + } + + #[test] + fn resolve_vertex_ai_route_base_url_override_rejects_non_vertex_host() { + let config = std::iter::once(( + "VERTEX_AI_BASE_URL".to_string(), + "https://custom.example.com/v1".to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom-invalid-host", config); + + let err = resolve_provider_route(&provider, "gemini-pro") + .expect_err("non-Vertex hosts must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message() + .contains("must target an official Vertex AI hostname") + ); + } + + #[test] + fn resolve_vertex_ai_route_base_url_override_rejects_non_https() { + let config = std::iter::once(( + "VERTEX_AI_BASE_URL".to_string(), + "http://us-central1-aiplatform.googleapis.com/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi".to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom-http", config); + + let err = resolve_provider_route(&provider, "gemini-pro") + .expect_err("non-https overrides must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("must use https")); + } + + #[test] + fn resolve_vertex_ai_route_base_url_override_rejects_ip_literal() { + let config = std::iter::once(( + "VERTEX_AI_BASE_URL".to_string(), + "https://127.0.0.1/v1beta1/projects/my-project/locations/us-central1/endpoints/openapi" + .to_string(), + )) + .collect(); + let provider = make_vertex_provider_with_config("vertex-custom-ip", config); + + let err = resolve_provider_route(&provider, "gemini-pro") + .expect_err("IP literal overrides must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("must not use IP literal hosts")); + } + + #[test] + fn resolve_vertex_ai_route_gemini_model() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-123".to_string())).collect(); + let provider = make_vertex_provider_with_config("vertex-gemini", config); + + let resolved = resolve_provider_route(&provider, "gemini-pro").expect("should resolve"); + + // Gemini routes to OpenAI-compatible endpoint, not publisher endpoint + assert!(!resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + assert!( + resolved.route.endpoint.contains("v1beta1"), + "gemini should use v1beta1 endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + resolved.route.endpoint.contains("endpoints/openapi"), + "gemini should use openapi endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + !resolved.route.endpoint.contains("publishers/google"), + "gemini must not embed publisher in endpoint, got: {}", + resolved.route.endpoint + ); + // Default region + assert!(resolved.route.endpoint.contains("us-central1")); + assert!( + resolved + .route + .protocols + .contains(&"openai_chat_completions".to_string()), + "expected openai_chat_completions protocol" + ); + assert!( + !resolved + .route + .protocols + .contains(&"anthropic_messages".to_string()), + "must not have anthropic_messages protocol for gemini" + ); + } + + #[test] + fn resolve_vertex_ai_route_unknown_model_uses_openai_compat() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-abc".to_string())).collect(); + let provider = make_vertex_provider_with_config("vertex-compat", config); + + let resolved = + resolve_provider_route(&provider, "some-unknown-model").expect("should resolve"); + + assert!(!resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + assert!( + resolved.route.endpoint.contains("v1beta1"), + "unknown model should use v1beta1 endpoint" + ); + assert!( + resolved.route.endpoint.contains("endpoints/openapi"), + "unknown model should use openapi endpoint" + ); + assert!( + resolved + .route + .protocols + .contains(&"openai_chat_completions".to_string()), + "expected openai_chat_completions protocol for unknown model" + ); + assert!( + !resolved + .route + .protocols + .contains(&"anthropic_messages".to_string()), + "must not have anthropic_messages for unknown model" + ); + } + + #[test] + fn resolve_vertex_ai_route_global_region_uses_global_host() { + let config = [ + ( + "VERTEX_AI_PROJECT_ID".to_string(), + "proj-global".to_string(), + ), + ("VERTEX_AI_REGION".to_string(), "GLOBAL".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-global", config); + + let resolved = + resolve_provider_route(&provider, "claude-opus-4-7").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://aiplatform.googleapis.com/v1/projects/proj-global/locations/global/publishers/anthropic/models" + ); + assert!(resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some(":rawPredict".to_string()) + ); + } + + #[test] + fn resolve_vertex_ai_route_us_multiregion_uses_rep_host() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-us".to_string()), + ("VERTEX_AI_REGION".to_string(), "us".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-us", config); + + let resolved = resolve_provider_route(&provider, "gemini-pro").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://aiplatform.us.rep.googleapis.com/v1beta1/projects/proj-us/locations/us/endpoints/openapi" + ); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + } + + #[test] + fn resolve_vertex_ai_route_eu_multiregion_uses_rep_host() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-eu".to_string()), + ("VERTEX_AI_REGION".to_string(), "eu".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-eu", config); + + let resolved = resolve_provider_route(&provider, "gemini-pro").expect("should resolve"); + + assert_eq!( + resolved.route.endpoint, + "https://aiplatform.eu.rep.googleapis.com/v1beta1/projects/proj-eu/locations/eu/endpoints/openapi" + ); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + } + + #[test] + fn resolve_vertex_ai_route_explicit_publisher_anthropic_override() { + // Explicit VERTEX_AI_PUBLISHER=anthropic → Anthropic Messages API path + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "my-proj".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "anthropic".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-pub-anthropic", config); + + let resolved = resolve_provider_route(&provider, "some-model").expect("should resolve"); + + assert!(resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some(":rawPredict".to_string()) + ); + assert!( + resolved + .route + .endpoint + .ends_with("/publishers/anthropic/models"), + "expected anthropic publisher endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + !resolved.route.endpoint.contains("some-model"), + "model must not be in endpoint" + ); + assert!( + resolved + .route + .protocols + .contains(&"anthropic_messages".to_string()), + "expected anthropic_messages protocol" + ); + } + + #[test] + fn resolve_vertex_ai_route_explicit_publisher_non_anthropic_uses_openai_compat() { + // Explicit VERTEX_AI_PUBLISHER=google (any non-anthropic) → OpenAI-compat endpoint + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "my-proj".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "google".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-pub-google", config); + + let resolved = resolve_provider_route(&provider, "some-model").expect("should resolve"); + + assert!(!resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some("/chat/completions".to_string()) + ); + assert!( + resolved.route.endpoint.contains("v1beta1"), + "non-anthropic publisher should use v1beta1 endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + resolved.route.endpoint.contains("endpoints/openapi"), + "non-anthropic publisher should use openapi endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + !resolved.route.endpoint.contains("publishers/google"), + "must not embed publisher in endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + resolved + .route + .protocols + .contains(&"openai_chat_completions".to_string()), + "expected openai_chat_completions for non-anthropic publisher" + ); + } + + #[test] + fn resolve_vertex_ai_route_missing_project_fails() { + let config = std::collections::HashMap::new(); + let provider = make_vertex_provider_with_config("vertex-no-proj", config); + + let err = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect_err("should fail without project"); + assert_eq!(err.code(), tonic::Code::FailedPrecondition); + assert!(err.message().contains("VERTEX_AI_PROJECT_ID")); + } + + #[test] + fn resolve_vertex_ai_route_whitespace_only_project_fails() { + // required_vertex_config rejects whitespace-only values via .filter(|v| !v.trim().is_empty()) + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), " ".to_string()), + ("VERTEX_AI_REGION".to_string(), "us-central1".to_string()), + ] + .into_iter() + .collect(); + let result = resolve_vertex_ai_route( + &config, + "claude-3-5-sonnet@20241022", + "test-route", + "dummy-token", + openshell_core::inference::profile_for("google-vertex-ai").unwrap(), + ); + assert!( + result.is_err(), + "whitespace-only project should fail, got: {result:?}" + ); + let status = result.unwrap_err(); + assert_eq!(status.code(), tonic::Code::FailedPrecondition); + } + + #[test] + fn resolve_vertex_ai_route_requires_minted_access_token() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string())).collect(); + let provider = Provider { + credentials: std::iter::once(( + "GOOGLE_SERVICE_ACCOUNT_KEY".to_string(), + "{\"type\":\"service_account\"}".to_string(), + )) + .collect(), + config, + ..make_vertex_provider_with_config( + "vertex-bootstrap-only", + std::collections::HashMap::new(), + ) + }; + + let err = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect_err("bootstrap JSON must not be treated as a bearer token"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("no usable API key credential")); + } + + #[test] + fn resolve_vertex_ai_route_alias_canonicalizes_provider_type() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string())).collect(); + let mut provider = make_vertex_provider_with_config("vertex-alias", config); + provider.r#type = "vertex-ai".to_string(); + + let resolved = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect("alias should resolve through Vertex routing"); + + assert_eq!(resolved.provider_type, "google-vertex-ai"); + assert!(resolved.route.model_in_path); + assert_eq!( + resolved.route.request_path_override, + Some(":rawPredict".to_string()) + ); + } + + #[test] + fn resolve_vertex_ai_route_anthropic_protocols() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string())).collect(); + let provider = make_vertex_provider_with_config("v", config); + let resolved = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022").unwrap(); + assert!( + resolved + .route + .protocols + .contains(&"anthropic_messages".to_string()) + ); + assert!( + !resolved + .route + .protocols + .contains(&"openai_chat_completions".to_string()) + ); + assert_eq!( + resolved.route.protocols, + vec!["anthropic_messages".to_string()] + ); + } + + #[test] + fn resolve_vertex_ai_route_openai_compat_protocols() { + let config = + std::iter::once(("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string())).collect(); + let provider = make_vertex_provider_with_config("v", config); + let resolved = resolve_provider_route(&provider, "gemini-pro").unwrap(); + assert!( + resolved + .route + .protocols + .contains(&"openai_chat_completions".to_string()) + ); + assert!( + resolved + .route + .protocols + .iter() + .all(|protocol| protocol == "openai_chat_completions") + ); + } + + #[test] + fn resolve_vertex_ai_route_model_not_in_endpoint() { + // model_id must NOT appear in the endpoint URL — it travels via route.model + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string()), + ("VERTEX_AI_REGION".to_string(), "us-east1".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("v", config); + let resolved = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022").unwrap(); + assert!( + !resolved.route.endpoint.contains("claude-3-5-sonnet"), + "model_id must not be in endpoint, got: {}", + resolved.route.endpoint + ); + assert!( + resolved + .route + .endpoint + .ends_with("/publishers/anthropic/models") + ); + } + + #[test] + fn resolve_vertex_ai_route_rejects_model_ids_with_path_separators() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "anthropic".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-bad-model", config); + + let err = resolve_provider_route(&provider, "claude/3-sonnet") + .expect_err("path-like model IDs must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!(err.message().contains("must not contain path separators")); + } + + #[test] + fn resolve_vertex_ai_route_rejects_model_ids_with_url_delimiters() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "anthropic".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-bad-model-url", config); + + for model_id in ["claude?alt=1", "claude#fragment", "claude%2Fbad"] { + let err = resolve_provider_route(&provider, model_id) + .expect_err("URL delimiter-bearing model IDs must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message() + .contains("must not contain URL delimiters or percent escapes"), + "unexpected error for {model_id:?}: {}", + err.message() + ); + } + } + + #[test] + fn resolve_vertex_ai_route_accepts_versioned_claude_model_id() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "anthropic".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-good-model", config); + + let resolved = resolve_provider_route(&provider, "claude-3-5-sonnet@20241022") + .expect("versioned Claude model IDs must remain valid"); + + assert!(resolved.route.model_in_path); + assert_eq!(resolved.route.model, "claude-3-5-sonnet@20241022"); + } + + #[test] + fn resolve_vertex_ai_route_rejects_model_ids_with_whitespace() { + let config = [ + ("VERTEX_AI_PROJECT_ID".to_string(), "proj-id".to_string()), + ("VERTEX_AI_PUBLISHER".to_string(), "anthropic".to_string()), + ] + .into_iter() + .collect(); + let provider = make_vertex_provider_with_config("vertex-bad-model-whitespace", config); + + let err = resolve_provider_route(&provider, "some model") + .expect_err("whitespace in Anthropic Vertex model IDs must be rejected"); + + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message() + .contains("must not contain whitespace or control characters") + ); + } + + #[test] + fn validate_gcp_project_id_accepts_valid() { + assert!(validate_gcp_project_id("my-project").is_ok()); + assert!(validate_gcp_project_id("my-project-123").is_ok()); + assert!(validate_gcp_project_id("abcdef").is_ok()); // min length 6 + } + + #[test] + fn validate_gcp_project_id_rejects_invalid() { + assert!(validate_gcp_project_id("").is_err()); // empty + assert!(validate_gcp_project_id("ab").is_err()); // too short + assert!(validate_gcp_project_id("../admin").is_err()); // path traversal + assert!(validate_gcp_project_id("MY-PROJECT").is_err()); // uppercase + assert!(validate_gcp_project_id("my-project-").is_err()); // trailing hyphen + assert!(validate_gcp_project_id("1my-project").is_err()); // starts with digit + } + + #[test] + fn validate_gcp_region_accepts_valid() { + assert!(validate_gcp_region("us-central1").is_ok()); + assert!(validate_gcp_region("europe-west4").is_ok()); + assert!(validate_gcp_region("global").is_ok()); + assert!(validate_gcp_region("us").is_ok()); + assert!(validate_gcp_region("eu").is_ok()); + assert!(validate_gcp_region("us-east4-a").is_ok()); // zone-like + } + + #[test] + fn validate_gcp_region_rejects_invalid() { + assert!(validate_gcp_region("").is_err()); + assert!(validate_gcp_region("../../etc").is_err()); // path traversal + assert!(validate_gcp_region("us central1").is_err()); // space + assert!(validate_gcp_region("-us-central1").is_err()); // leading hyphen + assert!(validate_gcp_region("us-central1-").is_err()); // trailing hyphen + } + + // ------------------------------------------------------------------------- + // validate_vertex_base_url edge-case tests + // ------------------------------------------------------------------------- + + #[test] + fn validate_vertex_base_url_rejects_ipv6_literal() { + let err = validate_vertex_base_url( + "https://[::1]/v1beta1/projects/p/locations/l/endpoints/openapi", + ) + .expect_err("IPv6 literals must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("IP literal"), + "expected IP literal error, got: {}", + err.message() + ); + } + + #[test] + fn validate_vertex_base_url_rejects_userinfo() { + let err = + validate_vertex_base_url("https://user:pass@us-central1-aiplatform.googleapis.com/v1") + .expect_err("userinfo must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("userinfo"), + "expected userinfo error, got: {}", + err.message() + ); + } + + #[test] + fn validate_vertex_base_url_rejects_query_string() { + let err = + validate_vertex_base_url("https://us-central1-aiplatform.googleapis.com/v1?key=val") + .expect_err("query string must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("query or fragment"), + "expected query/fragment error, got: {}", + err.message() + ); + } + + #[test] + fn validate_vertex_base_url_rejects_fragment() { + let err = + validate_vertex_base_url("https://us-central1-aiplatform.googleapis.com/v1#section") + .expect_err("fragment must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("query or fragment"), + "expected query/fragment error, got: {}", + err.message() + ); + } + + #[test] + fn validate_vertex_base_url_rejects_non_443_port() { + let err = validate_vertex_base_url("https://us-central1-aiplatform.googleapis.com:8443/v1") + .expect_err("non-443 port must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("443"), + "expected port 443 error, got: {}", + err.message() + ); + } + + #[test] + fn validate_vertex_model_id_rejects_double_dot_traversal() { + // ".." without a slash should still be rejected as a path traversal segment. + let err = validate_vertex_model_id("model..v2") + .expect_err("double-dot traversal must be rejected"); + assert_eq!(err.code(), tonic::Code::InvalidArgument); + assert!( + err.message().contains("traversal"), + "expected path traversal error, got: {}", + err.message() + ); + } + #[test] fn effective_route_name_defaults_empty_to_inference_local() { assert_eq!( diff --git a/docs/index.yml b/docs/index.yml index 1e2005f45..b2443e4af 100644 --- a/docs/index.yml +++ b/docs/index.yml @@ -17,6 +17,8 @@ navigation: skip-slug: true - folder: sandboxes title: "Manage OpenShell" +- folder: providers + title: "Providers" - folder: observability title: "Observability" - folder: kubernetes diff --git a/docs/providers/google-vertex-ai.mdx b/docs/providers/google-vertex-ai.mdx new file mode 100644 index 000000000..913fb32dc --- /dev/null +++ b/docs/providers/google-vertex-ai.mdx @@ -0,0 +1,147 @@ +--- +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +title: "Google Vertex AI" +sidebar-title: "Google Vertex AI" +description: "Configure OpenShell to route inference traffic through Google Vertex AI, including Anthropic Claude and Gemini models." +keywords: "Generative AI, Cybersecurity, AI Agents, Sandboxing, Google Vertex AI, Anthropic Claude, Inference Routing" +--- + +Google Vertex AI is a managed machine learning platform that hosts Anthropic Claude, Gemini, and third-party models through Google Cloud. OpenShell can route `inference.local` traffic to Vertex AI using gateway-managed credential refresh, so sandbox agents do not handle GCP credentials directly. + +## Prerequisites + +Before creating a Vertex AI provider, ensure you have: + +- A GCP project with the [Vertex AI API](https://console.cloud.google.com/apis/library/aiplatform.googleapis.com) enabled. +- One of the following: + - A GCP service account with the **Vertex AI User** role and a downloaded JSON key file, for production use. + - The `gcloud` CLI with Application Default Credentials configured, for local development. + +## Authentication + +The `google-vertex-ai` provider supports two credential sources. + +### Service Account Key + +Supply the JSON key file content as the `GOOGLE_SERVICE_ACCOUNT_KEY` credential. OpenShell persists that value only as gateway-side refresh bootstrap material until you update or delete it. The raw service-account JSON and private key are not sandbox runtime credentials and are not exposed to sandboxes. Runtime inference requests use short-lived access tokens minted by the gateway and stored under a separate credential key. + +```shell +openshell provider create \ + --name vertex-prod \ + --type google-vertex-ai \ + --credential GOOGLE_SERVICE_ACCOUNT_KEY="$(cat /path/to/key.json)" \ + --config VERTEX_AI_PROJECT_ID=my-gcp-project \ + --config VERTEX_AI_REGION=us-central1 +``` + +Then configure gateway-managed refresh so the gateway uses the private key as refresh bootstrap material and rotates access tokens: + +```shell +openshell provider refresh configure vertex-prod \ + --credential-key GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN \ + --strategy google-service-account-jwt \ + --material client_email="sa@my-gcp-project.iam.gserviceaccount.com" \ + --material private_key="$(jq -r .private_key /path/to/key.json)" \ + --secret-material-key private_key +``` + +### gcloud Application Default Credentials + +For local development, configure ADC first, then pass `--from-gcloud-adc`: + +```shell +gcloud auth application-default login +``` + +```shell +openshell provider create \ + --name vertex-local \ + --type google-vertex-ai \ + --from-gcloud-adc \ + --config VERTEX_AI_PROJECT_ID=my-gcp-project \ + --config VERTEX_AI_REGION=us-central1 +``` + +`--from-gcloud-adc` reads `GOOGLE_APPLICATION_CREDENTIALS` first, then falls back to `$CLOUDSDK_CONFIG/application_default_credentials.json` when that environment variable is set, then to `~/.config/gcloud/application_default_credentials.json`. It configures an OAuth2 refresh token flow on the gateway and immediately mints the first access token before the command returns. If the command succeeds, the provider is ready for inference right away. It only works with user credentials generated by `gcloud auth application-default login`. If your ADC file is a service account key, the CLI returns an error and directs you to use the service account key method above. + +ADC-backed providers mint and rotate access tokens into `GOOGLE_VERTEX_AI_TOKEN`. + + +`--from-gcloud-adc` is only valid for `google-vertex-ai` providers. + + +## Configuration Keys + +Pass these as `--config KEY=VALUE` when creating the provider, or set them as environment variables and use `--from-existing`. + +| Key | Required | Default | Description | +|---|---|---|---| +| `VERTEX_AI_PROJECT_ID` | Yes (unless `GOOGLE_VERTEX_AI_BASE_URL` or `VERTEX_AI_BASE_URL` is set) | — | GCP project ID. | +| `VERTEX_AI_REGION` | No | `us-central1` | Vertex location selector. Use a regional location such as `us-central1`, or `global`, `us`, or `eu` for the supported global and multi-region endpoints. | +| `GOOGLE_VERTEX_AI_BASE_URL` | No | — | Full base URL override for non-Anthropic routes. Must be an official Vertex AI HTTPS endpoint root. | +| `VERTEX_AI_BASE_URL` | No | — | Backward-compatible alias for `GOOGLE_VERTEX_AI_BASE_URL`. | +| `VERTEX_AI_PUBLISHER` | No | Inferred from model name | Set to `anthropic` to force Anthropic Messages API routing, or any other value for OpenAI-compatible routing. | + +When `VERTEX_AI_PROJECT_ID` is set and no base URL override is present, the gateway maps `VERTEX_AI_REGION` to the Vertex host automatically: + +- Regional locations such as `us-central1` use `https://-aiplatform.googleapis.com`. +- `global` uses `https://aiplatform.googleapis.com`. +- `us` and `eu` use `https://aiplatform..rep.googleapis.com`. + +For Anthropic models, OpenShell builds the publisher-model Vertex path automatically and injects `anthropic_version` into the request body. Vertex rawPredict does not receive `anthropic-version` as a header, and OpenShell strips `anthropic-beta` for Vertex Claude routes. For non-Anthropic models, OpenShell uses Vertex's OpenAI-compatible Chat Completions route under `.../endpoints/openapi/chat/completions`. + + +Use `GOOGLE_VERTEX_AI_BASE_URL` or `VERTEX_AI_BASE_URL` only for non-Anthropic Vertex routes. OpenShell rejects Anthropic models when a base URL override is set because Anthropic routes require model-path shaping and `anthropic_version` body injection. Overrides must use `https://` and an official Vertex AI hostname such as `aiplatform.googleapis.com`, `aiplatform.us.rep.googleapis.com`, `aiplatform.eu.rep.googleapis.com`, or `-aiplatform.googleapis.com`. + + +## Supported Models + +Vertex AI hosts Anthropic Claude models (claude-3-5-sonnet, claude-3-opus, and others) through a native Messages API integration, and Gemini and other third-party models through Vertex's OpenAI-compatible Chat Completions endpoint. OpenShell infers the routing path from the model name. For the full list of available models and regions, refer to the [Google Cloud model garden documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/overview). + +Model names that match the `claude-*` pattern route through the Anthropic Messages API on Vertex. All other model names route through Vertex Chat Completions. Set `VERTEX_AI_PUBLISHER=anthropic` to force Anthropic routing when the model name does not follow the standard pattern. + +OpenShell exposes Anthropic Vertex routes for inference only. It does not advertise OpenAI-style model discovery for those routes, so use the Google Cloud docs or Model Garden to discover supported Anthropic model IDs. + +## Configure Inference Routing + +After creating the provider, point `inference.local` at it: + +```shell +openshell inference set \ + --provider vertex-prod \ + --model claude-3-5-sonnet@20241022 +``` + +Sandboxes on that gateway reach the model at `https://inference.local`. For full details on inference routing, refer to [Inference Routing](/sandboxes/inference-routing). + +## From Existing Environment + +If one of these token env vars is already set in your shell, create the provider with `--from-existing`: + +- `GOOGLE_VERTEX_AI_TOKEN` or `VERTEX_AI_TOKEN` +- `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` or `VERTEX_AI_SERVICE_ACCOUNT_TOKEN` + +OpenShell also reads these config env vars during `--from-existing`: + +- `VERTEX_AI_PROJECT_ID` +- `VERTEX_AI_REGION` +- `GOOGLE_VERTEX_AI_BASE_URL` or `VERTEX_AI_BASE_URL` +- `VERTEX_AI_PUBLISHER` + +Then create the provider: + +```shell +openshell provider create \ + --name vertex-env \ + --type google-vertex-ai \ + --from-existing +``` + +This reads credentials and config from the environment variables listed in the configuration keys table above. + +## Next Steps + +- To configure `inference.local` routing, refer to [Inference Routing](/sandboxes/inference-routing). +- To manage provider credentials and refresh, refer to [Providers](/sandboxes/manage-providers). +- To apply network policies to sandboxes using this provider, refer to [Policies](/sandboxes/policies). diff --git a/docs/sandboxes/inference-routing.mdx b/docs/sandboxes/inference-routing.mdx index a65eec437..58744883c 100644 --- a/docs/sandboxes/inference-routing.mdx +++ b/docs/sandboxes/inference-routing.mdx @@ -24,9 +24,9 @@ If code calls an external inference host directly, OpenShell evaluates that traf | Property | Detail | |---|---| | Credentials | No sandbox API keys needed. Credentials come from the configured provider record. The router strips caller-supplied `Authorization` before forwarding the request. | -| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. NVIDIA routes allow `x-model-id`. All other caller headers are stripped. | +| Header forwarding | `inference.local` forwards only a per-provider header allowlist. OpenAI routes allow `openai-organization` and `x-model-id`. Anthropic routes allow `anthropic-version` and `anthropic-beta`. Vertex Claude rawPredict routes strip `anthropic-beta` and do not forward `anthropic-version` as a header because the router injects `anthropic_version` into the Vertex request body. NVIDIA routes allow `x-model-id`. All other caller headers are stripped. | | Configuration | One provider and one model define sandbox inference for the active gateway. Every sandbox on that gateway sees the same `inference.local` backend. | -| Provider support | NVIDIA, any OpenAI-compatible provider, and Anthropic all work through the same endpoint. | +| Provider support | NVIDIA, Anthropic, Google Vertex AI, and any OpenAI-compatible provider all work through the same endpoint. Vertex routes Claude models through `/v1/messages` and non-Anthropic models through `/v1/chat/completions`. The gateway resolves the upstream Vertex host from the provider config, including regional, global, and supported multi-region endpoints. | | Streaming reliability | The router tolerates idle gaps of up to 120 seconds between streamed chunks so long reasoning responses are not cut off mid-stream. | | Hot refresh | OpenShell picks up provider credential changes and inference updates without recreating sandboxes. Changes propagate within about 5 seconds by default. | @@ -58,6 +58,8 @@ Supported request patterns depend on the provider configured for `inference.loca Requests to `inference.local` that do not match the configured provider's supported patterns are denied. +Google Vertex AI does not expose every OpenAI-compatible path through `inference.local`. Vertex routes for Gemini and other non-Anthropic models currently support Chat Completions. Vertex routes for Claude models use the Anthropic Messages pattern. Base URL overrides are only supported for non-Anthropic Vertex routes. + ## Configure Inference Routing The managed local inference endpoint uses three values: @@ -101,6 +103,21 @@ Replace the base URL and API key with the values from your provider. For support + + +```shell +openshell provider create \ + --name vertex-local \ + --type google-vertex-ai \ + --from-gcloud-adc \ + --config VERTEX_AI_PROJECT_ID=my-gcp-project \ + --config VERTEX_AI_REGION=us-central1 +``` + +Use [Google Vertex AI](/providers/google-vertex-ai) for the full auth flows, including the production service-account refresh path, ADC-backed providers that mint `GOOGLE_VERTEX_AI_TOKEN`, and `--from-existing` support. + + + ```shell diff --git a/docs/sandboxes/manage-providers.mdx b/docs/sandboxes/manage-providers.mdx index 7bc1f977f..55db6ec1c 100644 --- a/docs/sandboxes/manage-providers.mdx +++ b/docs/sandboxes/manage-providers.mdx @@ -27,6 +27,8 @@ openshell provider list-profiles Providers can be created from local environment variables or with explicit credential values. +For refresh-backed providers such as `google-vertex-ai --from-gcloud-adc`, `openshell provider create` now waits for the gateway to configure refresh metadata and mint the initial access token before it reports success. + ### From Local Credentials The fastest way to create a provider is to let the CLI discover credentials from @@ -269,6 +271,7 @@ The following providers have been tested with `inference.local`. Any provider th |---|---|---|---|---| | NVIDIA API Catalog | `nvidia-prod` | `nvidia` | `https://integrate.api.nvidia.com/v1` | `NVIDIA_API_KEY` | | Anthropic | `anthropic-prod` | `anthropic` | `https://api.anthropic.com` | `ANTHROPIC_API_KEY` | +| Google Vertex AI | `vertex-prod` | `google-vertex-ai` | Regional, global, or multi-region Vertex endpoint | `GOOGLE_VERTEX_AI_TOKEN` or `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN` | | Baseten | `baseten` | `openai` | `https://inference.baseten.co/v1` | `OPENAI_API_KEY` | | Bitdeer AI | `bitdeer` | `openai` | `https://api-inference.bitdeer.ai/v1` | `OPENAI_API_KEY` | | Deepinfra | `deepinfra` | `openai` | `https://api.deepinfra.com/v1/openai` | `OPENAI_API_KEY` | @@ -276,7 +279,7 @@ The following providers have been tested with `inference.local`. Any provider th | Ollama (local) | `ollama` | `openai` | `http://host.openshell.internal:11434/v1` | `OPENAI_API_KEY` | | LM Studio (local) | `lmstudio` | `openai` | `http://host.openshell.internal:1234/v1` | `OPENAI_API_KEY` | -Refer to your provider's documentation for the correct base URL, available models, and API key setup. To configure inference routing, refer to [Inference Routing](/sandboxes/inference-routing). +Refer to your provider's documentation for the correct base URL, available models, and API key setup. For the Vertex-specific auth flows and config keys, refer to [Google Vertex AI](/providers/google-vertex-ai). To configure inference routing, refer to [Inference Routing](/sandboxes/inference-routing). ## Next Steps diff --git a/docs/sandboxes/providers-v2.mdx b/docs/sandboxes/providers-v2.mdx index 9a4a9dc35..3ac248ff8 100644 --- a/docs/sandboxes/providers-v2.mdx +++ b/docs/sandboxes/providers-v2.mdx @@ -55,7 +55,7 @@ Providers v2 currently includes these user-facing features: - `openshell provider list-profiles` with table, YAML, and JSON output. - `openshell provider profile export`, `import`, `lint`, and `delete` for custom profiles. - Provider instances created from built-in or imported profile IDs with `openshell provider create --type `. -- Profile-backed credential discovery for explicit `openshell provider create --from-existing` and `openshell provider update --from-existing` flows. +- Profile-backed credential discovery for explicit `openshell provider create --from-existing` and `openshell provider update --from-existing` flows. The built-in `google-vertex-ai` profile also supplements discovery with Vertex config env vars such as `VERTEX_AI_PROJECT_ID` and `VERTEX_AI_REGION`. - Just-in-time effective policy composition from sandbox policy plus attached provider profiles. - Runtime sandbox provider lifecycle commands under `openshell sandbox provider list|attach|detach`. - Credential refresh configuration with `openshell provider refresh status|configure|rotate|delete`. @@ -94,6 +94,7 @@ Built-in Providers v2 profiles currently include: |---|---|---| | `claude-code` | `agent` | `ANTHROPIC_API_KEY`, `CLAUDE_API_KEY` | | `github` | `source_control` | `GITHUB_TOKEN`, `GH_TOKEN` | +| `google-vertex-ai` | `inference` | `GOOGLE_SERVICE_ACCOUNT_KEY`, `GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `VERTEX_AI_SERVICE_ACCOUNT_TOKEN`, `GOOGLE_VERTEX_AI_TOKEN`, `VERTEX_AI_TOKEN` | | `nvidia` | `inference` | `NVIDIA_API_KEY` | Export a built-in profile as YAML: diff --git a/docs/security/best-practices.mdx b/docs/security/best-practices.mdx index 6473d63d2..7512ff39f 100644 --- a/docs/security/best-practices.mdx +++ b/docs/security/best-practices.mdx @@ -213,7 +213,7 @@ OpenShell applies seccomp in two phases. A narrow supervisor-startup prelude run | Aspect | Detail | |---|---| | Startup prelude | After privileged bootstrap helpers complete, the supervisor sets `PR_SET_NO_NEW_PRIVS` and synchronizes a seccomp filter across all runtime threads that blocks `mount`, the new mount API syscalls, `pivot_root`, `umount2`, `bpf`, `perf_event_open`, `userfaultfd`, module-loading syscalls, and kexec. This closes the long-lived privileged remount and kernel-surface window while leaving required setup syscalls such as `setns` available. | -| Socket domains | The filter allows `AF_INET` and `AF_INET6` (for proxy communication) and blocks `AF_NETLINK`, `AF_PACKET`, `AF_BLUETOOTH`, and `AF_VSOCK` with `EPERM`. | +| Socket domains | The filter allows `AF_INET` and `AF_INET6` (for proxy communication) and blocks `AF_PACKET`, `AF_BLUETOOTH`, and `AF_VSOCK` with `EPERM`. `AF_NETLINK` is partially allowed: only `NETLINK_ROUTE` (protocol 0) is permitted so that `getifaddrs(3)` works; all other netlink protocols are blocked. Write operations via `NETLINK_ROUTE` still require `CAP_NET_ADMIN`, which the sandbox does not grant. | | Runtime unconditional syscall blocks | `memfd_create`, `ptrace`, `bpf`, `process_vm_readv`, `process_vm_writev`, `pidfd_open`, `pidfd_getfd`, `pidfd_send_signal`, `io_uring_setup`, `mount`, `fsopen`, `fsconfig`, `fsmount`, `fspick`, `move_mount`, `open_tree`, `setns`, `umount2`, `pivot_root`, `userfaultfd`, `perf_event_open`. | | Conditional syscall blocks | `execveat` with `AT_EMPTY_PATH`, `unshare` and `clone` with `CLONE_NEWUSER`, and `seccomp(SECCOMP_SET_MODE_FILTER)` are denied with `EPERM`. | | What you can change | This is not a user-facing knob. OpenShell enforces it automatically. | diff --git a/proto/inference.proto b/proto/inference.proto index 743f245f9..b0bc581e8 100644 --- a/proto/inference.proto +++ b/proto/inference.proto @@ -109,6 +109,11 @@ message ResolvedRoute { string provider_type = 6; // Per-route request timeout in seconds. 0 means use default (60s). uint64 timeout_secs = 7; + // When true, the model identifier is embedded in the URL path (e.g. Vertex AI). + bool model_in_path = 8; + // Optional override for the request path. When set, replaces the protocol-derived path. + // An empty string means POST directly to base_url/model_id with no additional path. + optional string request_path_override = 9; } message GetInferenceBundleResponse { diff --git a/providers/google-vertex-ai.yaml b/providers/google-vertex-ai.yaml new file mode 100644 index 000000000..86be2e430 --- /dev/null +++ b/providers/google-vertex-ai.yaml @@ -0,0 +1,82 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: google-vertex-ai +display_name: Google Vertex AI +description: Google Vertex AI inference provider (Anthropic Claude, Gemini, and third-party models) +category: inference +inference_capable: true +credentials: + - name: service_account_key + description: Google service account JSON refresh bootstrap material; not injected into sandboxes + env_vars: [GOOGLE_SERVICE_ACCOUNT_KEY] + required: false + - name: service_account_token + description: Google Cloud access token refreshed from service account JWT material + env_vars: [GOOGLE_VERTEX_AI_SERVICE_ACCOUNT_TOKEN, VERTEX_AI_SERVICE_ACCOUNT_TOKEN] + required: false + auth_style: bearer + header_name: authorization + refresh: + strategy: google_service_account_jwt + token_url: https://oauth2.googleapis.com/token + scopes: [https://www.googleapis.com/auth/cloud-platform] + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_email + description: Google service account email + required: true + - name: private_key + description: Google service account private key + required: true + secret: true + - name: subject + description: Optional delegated user email for domain-wide delegation + - name: gcloud_adc_token + description: Google Cloud access token refreshed via gcloud Application Default Credentials + env_vars: [GOOGLE_VERTEX_AI_TOKEN, VERTEX_AI_TOKEN] + required: false + auth_style: bearer + header_name: authorization + refresh: + strategy: oauth2_refresh_token + token_url: https://oauth2.googleapis.com/token + scopes: [https://www.googleapis.com/auth/cloud-platform] + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_id + description: Google OAuth2 client ID from gcloud ADC + required: true + - name: client_secret + description: Google OAuth2 client secret from gcloud ADC + required: true + secret: true + - name: refresh_token + description: Google OAuth2 refresh token from gcloud ADC + required: true + secret: true +discovery: + credentials: [service_account_token, gcloud_adc_token] +endpoints: + - host: "*-aiplatform.googleapis.com" + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: "aiplatform.googleapis.com" + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: "aiplatform.us.rep.googleapis.com" + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: "aiplatform.eu.rep.googleapis.com" + port: 443 + protocol: rest + access: read-write + enforcement: enforce From 19e7bed23efd46ef6fe65d21da2144572dfb2732 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 28 May 2026 12:29:30 -0500 Subject: [PATCH 2/4] docs: add Vertex AI sandbox usage for Claude Code and OpenCode Cover the full end-to-end setup for running Claude Code and OpenCode inside an OpenShell sandbox via inference.local with a Vertex AI backend: - google-vertex-ai.mdx: add 'Use from a Sandbox' section with tabbed examples for Claude Code (--bare flag, no /v1 suffix) and OpenCode (/v1 suffix required). Add providers_v2_enabled prerequisite and --no-verify note for global region. Document policy proposals table covering metadata.google.internal (always blocked), downloads.claude.ai, and storage.googleapis.com. - inference-routing.mdx: expand 'Use the Local Endpoint' section with tabbed examples for Claude Code, OpenCode, Python OpenAI SDK, and Python Anthropic SDK. Add notes explaining the /v1 path suffix difference between clients. - supported-agents.mdx: update Claude Code and OpenCode rows to mention inference.local support and correct base URL requirements. --- docs/about/supported-agents.mdx | 4 +- docs/providers/google-vertex-ai.mdx | 81 +++++++++++++++++++++++++++- docs/sandboxes/inference-routing.mdx | 52 ++++++++++++++++-- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/docs/about/supported-agents.mdx b/docs/about/supported-agents.mdx index 7eeb3db5c..6570204ee 100644 --- a/docs/about/supported-agents.mdx +++ b/docs/about/supported-agents.mdx @@ -10,8 +10,8 @@ The following table summarizes the agents that run in OpenShell sandboxes. Most | Agent | Source | Default Policy | Notes | |---|---|---|---| -| [Claude Code](https://docs.anthropic.com/en/docs/claude-code) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | Full coverage | Works out of the box. Requires `ANTHROPIC_API_KEY`. | -| [OpenCode](https://opencode.ai/) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | Partial coverage | Pre-installed. Add `opencode.ai` endpoint and OpenCode binary paths to the policy for full functionality. | +| [Claude Code](https://docs.anthropic.com/en/docs/claude-code) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | Full coverage | Works out of the box. Requires `ANTHROPIC_API_KEY` for direct Anthropic access, or use `inference.local` with a configured provider (e.g. Vertex AI). | +| [OpenCode](https://opencode.ai/) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | Partial coverage | Pre-installed. Use `ANTHROPIC_BASE_URL="https://inference.local/v1"` with a configured provider. Add `opencode.ai` endpoint and OpenCode binary paths to the policy for full functionality. | | [Codex](https://developers.openai.com/codex) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | No coverage | Pre-installed. Requires a custom policy with OpenAI endpoints and Codex binary paths. Requires `OPENAI_API_KEY`. | | [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli) | [`base`](https://github.com/NVIDIA/OpenShell-Community/tree/main/sandboxes/base) | Full coverage | Pre-installed. Works out of the box. Requires `GITHUB_TOKEN` or `COPILOT_GITHUB_TOKEN`. | | [OpenClaw](https://openclaw.ai/) | [NemoClaw](https://github.com/NVIDIA/NemoClaw) | Blueprint-managed | Run OpenClaw more securely inside NVIDIA OpenShell with managed inference using NemoClaw. | diff --git a/docs/providers/google-vertex-ai.mdx b/docs/providers/google-vertex-ai.mdx index 913fb32dc..631d6c395 100644 --- a/docs/providers/google-vertex-ai.mdx +++ b/docs/providers/google-vertex-ai.mdx @@ -105,16 +105,93 @@ OpenShell exposes Anthropic Vertex routes for inference only. It does not advert ## Configure Inference Routing -After creating the provider, point `inference.local` at it: +Before configuring inference routing, enable provider endpoint injection so the Vertex AI network endpoints are automatically included in sandbox policies: + +```shell +openshell settings set --global --key providers_v2_enabled --value true --yes +``` + +Then point `inference.local` at the provider: ```shell openshell inference set \ --provider vertex-prod \ - --model claude-3-5-sonnet@20241022 + --model claude-sonnet-4-6 +``` + +Use `--no-verify` if the endpoint verification fails. This is common with the `global` region, where the validation probe may not match the actual rawPredict path: + +```shell +openshell inference set \ + --provider vertex-prod \ + --model claude-sonnet-4-6 \ + --no-verify ``` Sandboxes on that gateway reach the model at `https://inference.local`. For full details on inference routing, refer to [Inference Routing](/sandboxes/inference-routing). +## Use from a Sandbox + +Agents inside sandboxes should reach Vertex AI through `inference.local`, not by connecting to Vertex AI directly. The gateway manages GCP credential refresh and request translation; the agent only needs to point its SDK at the local endpoint. + +The complete setup from scratch: + +```shell +# 1. Enable provider endpoint injection +openshell settings set --global --key providers_v2_enabled --value true --yes + +# 2. Create the provider +openshell provider create \ + --name vertex-local \ + --type google-vertex-ai \ + --from-gcloud-adc \ + --config VERTEX_AI_PROJECT_ID=my-gcp-project \ + --config VERTEX_AI_REGION=us-central1 + +# 3. Configure inference routing +openshell inference set --provider vertex-local --model claude-sonnet-4-6 --no-verify + +# 4. Create a sandbox with the provider attached +openshell sandbox create --name my-sandbox --provider vertex-local +``` + +Then inside the sandbox, launch the agent as shown below. + + + + +```shell +ANTHROPIC_BASE_URL="https://inference.local" ANTHROPIC_API_KEY=unused claude --bare +``` + +`--bare` skips the OAuth login flow and uses `ANTHROPIC_API_KEY` directly for authentication. The key value does not reach Vertex AI — `inference.local` strips it and injects the real GCP access token before forwarding. + + +Do not set `CLAUDE_CODE_USE_VERTEX=1` inside the sandbox. That flag makes Claude Code connect directly to Vertex AI and attempt GCP credential discovery (ADC file, metadata service), which fails because the sandbox does not expose GCP credentials. Use `inference.local` instead. + + + + + +```shell +ANTHROPIC_BASE_URL="https://inference.local/v1" ANTHROPIC_API_KEY=unused opencode +``` + +OpenCode requires `/v1` in the base URL. Without it, OpenCode sends `POST /messages` instead of `POST /v1/messages`, which does not match the inference pattern and is denied. + + + + +### Policy Proposals + +After running an agent, the TUI (`openshell term`) may show policy proposals for denied endpoints. Common ones for Vertex AI sandboxes: + +| Endpoint | Action | Reason | +|---|---|---| +| `metadata.google.internal:80` | **Reject** | Resolves to `169.254.169.254` (GCE metadata service). Always blocked regardless of policy — the proxy blocks the resolved IP unconditionally to prevent credential exfiltration. | +| `downloads.claude.ai:443` | Approve if desired | Claude Code update checking and asset loading. Not required for inference. | +| `storage.googleapis.com:443` | Approve if desired | Google Cloud Storage. Used by some Claude Code features. Not required for inference. | + ## From Existing Environment If one of these token env vars is already set in your shell, create the provider with `--from-existing`: diff --git a/docs/sandboxes/inference-routing.mdx b/docs/sandboxes/inference-routing.mdx index 58744883c..3d8c48cd8 100644 --- a/docs/sandboxes/inference-routing.mdx +++ b/docs/sandboxes/inference-routing.mdx @@ -195,7 +195,34 @@ openshell inference update --timeout 120 ## Use the Local Endpoint from a Sandbox -After inference is configured, code inside any sandbox can call `https://inference.local` directly: +After inference is configured, code inside any sandbox can call `https://inference.local` directly. The client-supplied `model` and `api_key` values are not sent upstream — the privacy router injects the real credentials from the configured provider and rewrites the model before forwarding. Some SDKs require a non-empty API key even though `inference.local` does not use the sandbox-provided value; pass any placeholder such as `unused`. + + + + +```shell +ANTHROPIC_BASE_URL="https://inference.local" ANTHROPIC_API_KEY=unused claude --bare +``` + +`--bare` skips the OAuth login flow and uses `ANTHROPIC_API_KEY` directly. The key is stripped by the proxy and never reaches the upstream provider. + + +Claude Code appends `/v1/messages` to `ANTHROPIC_BASE_URL`, so omit the `/v1` suffix from the base URL. + + + + + +```shell +ANTHROPIC_BASE_URL="https://inference.local/v1" ANTHROPIC_API_KEY=unused opencode +``` + + +OpenCode appends `/messages` directly to `ANTHROPIC_BASE_URL`. Include the `/v1` suffix so the full path becomes `/v1/messages`, which matches the inference pattern. + + + + ```python from openai import OpenAI @@ -208,9 +235,28 @@ response = client.chat.completions.create( ) ``` -The client-supplied `model` and `api_key` values are not sent upstream. The privacy router injects the real credentials from the configured provider and rewrites the model before forwarding. Some SDKs require a non-empty API key even though `inference.local` does not use the sandbox-provided value. In those cases, pass any placeholder such as `test` or `unused`. + + + +```python +import anthropic + +client = anthropic.Anthropic( + base_url="https://inference.local", + api_key="unused", +) + +message = client.messages.create( + model="anything", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}], +) +``` + + + -Use this endpoint when inference should stay local to the host for privacy and security reasons. External providers that should be reached directly belong in `network_policies` instead. +Use `inference.local` when inference should stay private and credentials should not be exposed inside the sandbox. External providers reached directly belong in `network_policies` instead. When the upstream runs on the same machine as the gateway, bind it to `0.0.0.0` and point the provider at `host.openshell.internal` or the host's LAN IP. `127.0.0.1` and `localhost` usually fail because the request originates from the gateway or sandbox runtime, not from your shell. From 09ddf589c3fd27f13cc9bcbdd3fee84c4b33af98 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 28 May 2026 13:17:25 -0500 Subject: [PATCH 3/4] fix: address vertex review findings --- crates/openshell-cli/src/main.rs | 4 +- crates/openshell-cli/src/run.rs | 124 ++++++++++++++++++++----- crates/openshell-router/src/backend.rs | 65 +++++++------ 3 files changed, 139 insertions(+), 54 deletions(-) diff --git a/crates/openshell-cli/src/main.rs b/crates/openshell-cli/src/main.rs index 042202ef0..162bfc1db 100644 --- a/crates/openshell-cli/src/main.rs +++ b/crates/openshell-cli/src/main.rs @@ -184,7 +184,7 @@ fn resolve_sandbox_name(name: Option, gateway: &str) -> Result { let last = load_last_sandbox(gateway).ok_or_else(|| { miette::miette!( "No sandbox name provided and no last-used sandbox.\n\ - Specify a sandbox name or connect to one first: nav sandbox connect " + Specify a sandbox name or connect to one first: openshell sandbox connect " ) })?; eprintln!("{} Using sandbox '{}' (last used)", "→".bold(), last.bold()); @@ -3490,7 +3490,7 @@ mod tests { let err = resolve_sandbox_name(None, "unknown-gateway").unwrap_err(); let msg = err.to_string(); assert!( - msg.contains("nav sandbox connect"), + msg.contains("openshell sandbox connect"), "expected helpful hint in error, got: {msg}" ); }); diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 9280b48dc..17895fe1b 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -60,7 +60,7 @@ use openshell_providers::{ }; use owo_colors::OwoColorize; use std::collections::{HashMap, HashSet}; -use std::io::{IsTerminal, Read, Write}; +use std::io::{ErrorKind, IsTerminal, Read, Write}; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::{Duration, Instant}; @@ -76,6 +76,15 @@ pub use openshell_core::forward::{ find_forward_by_port, list_forwards, stop_forward, stop_forwards_for_sandbox, }; +#[derive(Debug, PartialEq, Eq)] +enum SandboxUploadPlan { + GitAware { + base_dir: PathBuf, + files: Vec, + }, + Regular, +} + /// Convert a sandbox phase integer to a human-readable string. fn phase_name(phase: i32) -> &'static str { match SandboxPhase::try_from(phase) { @@ -2007,26 +2016,29 @@ pub async fn sandbox_create( "\u{2022}".dimmed(), ); let local = Path::new(local_path); - if *git_ignore && let Ok((base_dir, files)) = git_sync_files(local) { - sandbox_sync_up_files( - &effective_server, - &sandbox_name, - &base_dir, - &files, - local, - dest, - &effective_tls, - ) - .await?; - } else if local.exists() { - sandbox_sync_up( - &effective_server, - &sandbox_name, - local, - dest, - &effective_tls, - ) - .await?; + match sandbox_upload_plan(local, *git_ignore)? { + SandboxUploadPlan::GitAware { base_dir, files } => { + sandbox_sync_up_files( + &effective_server, + &sandbox_name, + &base_dir, + &files, + local, + dest, + &effective_tls, + ) + .await?; + } + SandboxUploadPlan::Regular => { + sandbox_sync_up( + &effective_server, + &sandbox_name, + local, + dest, + &effective_tls, + ) + .await?; + } } eprintln!(" {} Files uploaded", "\u{2713}".green().bold()); } @@ -5616,6 +5628,28 @@ pub fn git_sync_files(local_path: &Path) -> Result<(PathBuf, Vec)> { Ok((base_dir, files)) } +fn sandbox_upload_plan(local_path: &Path, git_ignore: bool) -> Result { + let metadata = std::fs::symlink_metadata(local_path).map_err(|err| { + if err.kind() == ErrorKind::NotFound { + miette::miette!("local path does not exist: {}", local_path.display()) + } else { + miette::miette!( + "failed to inspect local upload path: {}", + local_path.display() + ) + } + })?; + + if git_ignore + && !metadata.file_type().is_symlink() + && let Ok((base_dir, files)) = git_sync_files(local_path) + { + return Ok(SandboxUploadPlan::GitAware { base_dir, files }); + } + + Ok(SandboxUploadPlan::Regular) +} + fn scrub_git_env(command: &mut Command) -> &mut Command { for key in [ "GIT_DIR", @@ -7191,7 +7225,8 @@ mod tests { plaintext_gateway_is_remote, progress_step_from_metadata, provider_profile_allows_refresh_bootstrap, provisioning_timeout_message, ready_false_condition_message, refresh_status_header, refresh_status_row, resolve_from, - sandbox_should_persist, service_expose_status_error, service_url_for_gateway, + sandbox_should_persist, sandbox_upload_plan, service_expose_status_error, + service_url_for_gateway, }; use crate::TEST_ENV_LOCK; use hyper::StatusCode; @@ -7936,6 +7971,51 @@ mod tests { assert_eq!(files, vec!["file.txt", "inner/child.txt"]); } + #[test] + fn sandbox_upload_plan_errors_for_missing_local_path() { + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let missing = tmpdir.path().join("missing"); + + let err = sandbox_upload_plan(&missing, false).expect_err("missing path should error"); + + assert!( + err.to_string().contains("local path does not exist"), + "expected missing-path error, got: {err}" + ); + } + + #[test] + fn sandbox_upload_plan_errors_for_missing_local_path_with_git_ignore() { + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let repo = tmpdir.path().join("repo"); + fs::create_dir_all(&repo).expect("create repo"); + init_git_repo(&repo); + let missing = repo.join("missing"); + + let err = sandbox_upload_plan(&missing, true).expect_err("missing path should error"); + + assert!( + err.to_string().contains("local path does not exist"), + "expected missing-path error, got: {err}" + ); + } + + #[cfg(unix)] + #[test] + fn sandbox_upload_plan_uses_regular_upload_for_symlinks() { + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let repo = tmpdir.path().join("repo"); + fs::create_dir_all(repo.join("real-dir")).expect("create repo"); + init_git_repo(&repo); + fs::write(repo.join("real-dir/file.txt"), "file").expect("write file.txt"); + std::os::unix::fs::symlink("real-dir", repo.join("link-dir")).expect("create symlink"); + + let plan = sandbox_upload_plan(&repo.join("link-dir"), true) + .expect("symlink upload should be planned"); + + assert_eq!(plan, super::SandboxUploadPlan::Regular); + } + #[test] fn git_sync_files_ignores_inherited_git_env() { let _lock = TEST_ENV_LOCK diff --git a/crates/openshell-router/src/backend.rs b/crates/openshell-router/src/backend.rs index ee084040c..f948ddc8b 100644 --- a/crates/openshell-router/src/backend.rs +++ b/crates/openshell-router/src/backend.rs @@ -213,38 +213,43 @@ fn prepare_backend_request( // path) and inject "anthropic_version" (required in the body, not a header). // Non-JSON bodies pass through unchanged; model rewrite and version injection // are silently skipped. Such bodies would be rejected by the upstream anyway. - let body = serde_json::from_slice::(&body).map_or(body, |mut json| { - if let Some(obj) = json.as_object_mut() { - // Vertex AI Anthropic endpoints require anthropic_version in the body. - // Standard Anthropic SDK sends it as a header; Vertex AI needs it as a body field. - // We inject it only for the Vertex rawPredict-style route contract used for - // Anthropic publisher endpoints, not for arbitrary model-in-path routes. - let needs_vertex_anthropic_version = is_vertex_anthropic_rawpredict_route(route); - if needs_vertex_anthropic_version { - // Vertex AI rawPredict encodes the model in the URL path, not - // the request body. Clients using the standard Anthropic API - // (e.g. Claude Code via inference.local) always send "model" - // in the body; strip it so Vertex AI does not reject the - // request with "Extra inputs are not permitted". - obj.remove("model"); - } else { - obj.insert( - "model".to_string(), - serde_json::Value::String(route.model.clone()), - ); - } - if needs_vertex_anthropic_version && !obj.contains_key("anthropic_version") { - obj.insert( - "anthropic_version".to_string(), - serde_json::Value::String(VERTEX_ANTHROPIC_VERSION.to_string()), - ); + let body = match serde_json::from_slice::(&body) { + Ok(mut json) => { + if let Some(obj) = json.as_object_mut() { + // Vertex AI Anthropic endpoints require anthropic_version in the body. + // Standard Anthropic SDK sends it as a header; Vertex AI needs it as a body field. + // We inject it only for the Vertex rawPredict-style route contract used for + // Anthropic publisher endpoints, not for arbitrary model-in-path routes. + let needs_vertex_anthropic_version = is_vertex_anthropic_rawpredict_route(route); + if needs_vertex_anthropic_version { + // Vertex AI rawPredict encodes the model in the URL path, not + // the request body. Clients using the standard Anthropic API + // (e.g. Claude Code via inference.local) always send "model" + // in the body; strip it so Vertex AI does not reject the + // request with "Extra inputs are not permitted". + obj.remove("model"); + } else { + obj.insert( + "model".to_string(), + serde_json::Value::String(route.model.clone()), + ); + } + if needs_vertex_anthropic_version && !obj.contains_key("anthropic_version") { + obj.insert( + "anthropic_version".to_string(), + serde_json::Value::String(VERTEX_ANTHROPIC_VERSION.to_string()), + ); + } } + + bytes::Bytes::from(serde_json::to_vec(&json).map_err(|err| { + RouterError::Internal(format!( + "failed to serialize rewritten inference request body: {err}" + )) + })?) } - bytes::Bytes::from( - serde_json::to_vec(&json) - .expect("re-serializing a valid serde_json::Value cannot fail"), - ) - }); + Err(_) => body, + }; builder = builder.body(body); Ok((builder, url)) From 1ed05e3a728bd46e5976f01a1a5d2045a22374c1 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Fri, 29 May 2026 08:41:25 -0500 Subject: [PATCH 4/4] test(sandbox): retry on spurious Ok in fork-exec ambiguity test On arm64 under heavy CI load, the /proc fd scan in find_socket_inode_owners can transiently miss the parent process's socket fd entry, returning only the child as an owner. This causes resolve_process_identity to return Ok (single owner, no ambiguity check fires) instead of the expected ambiguous-ownership Err. Extend the retry loop to also handle unexpected Ok results, mirroring the existing retry for transient Err results. 10 retries at 50ms gives a 500ms settling window, which is sufficient for procfs to stabilize on loaded arm64 runners. --- crates/openshell-sandbox/src/proxy.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 1e95c035b..1ce926353 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -6616,12 +6616,21 @@ network_policies: let cache = BinaryIdentityCache::new(); let mut result = resolve_process_identity(entrypoint_pid, peer_port, &cache); - for _ in 0..5 { + for _ in 0..10 { match &result { Err(err) if err.reason.contains("No such file or directory") || err.reason.contains("os error 2") => { + // /proc//fd scan transiently failed; give procfs time to settle. + std::thread::sleep(Duration::from_millis(50)); + result = resolve_process_identity(entrypoint_pid, peer_port, &cache); + } + Ok(_) => { + // On arm64 under heavy CI load the /proc fd scan can transiently + // miss the parent process's socket fd, making the scan return only + // the child as owner and yielding a spurious Ok. Retry to give + // both owners time to appear consistently in /proc//fd. std::thread::sleep(Duration::from_millis(50)); result = resolve_process_identity(entrypoint_pid, peer_port, &cache); }