diff --git a/crates/openshell-driver-vm/README.md b/crates/openshell-driver-vm/README.md index 8da0b96a4..0c66726e2 100644 --- a/crates/openshell-driver-vm/README.md +++ b/crates/openshell-driver-vm/README.md @@ -146,7 +146,7 @@ Select the VM driver with `--drivers vm`, `OPENSHELL_DRIVERS=vm`, or `compute_dr | `vcpus` | `2` | vCPUs per sandbox. | | `mem_mib` | `2048` | Memory per sandbox, in MiB. | | `overlay_disk_mib` | `4096` | Sparse writable overlay disk size per sandbox, in MiB. | -| `krun_log_level` | `1` | libkrun verbosity (0-5). | +| `hypervisor_log_level` | `1` | Hypervisor helper verbosity. `krun_log_level` remains accepted as a compatibility alias. | | `guest_tls_ca` | unset | CA cert for the guest's mTLS client bundle. Required when `grpc_endpoint` uses `https://`. | | `guest_tls_cert` | unset | Guest client certificate. | | `guest_tls_key` | unset | Guest client private key. | diff --git a/crates/openshell-server/src/cli.rs b/crates/openshell-server/src/cli.rs index b8d345f9e..0124edb73 100644 --- a/crates/openshell-server/src/cli.rs +++ b/crates/openshell-server/src/cli.rs @@ -1474,4 +1474,50 @@ default_image = "k8s-specific:1.0" .expect("deserializes"); assert_eq!(parsed.default_image, "k8s-specific:1.0"); } + + #[test] + fn vm_config_parses_driver_specific_gpu_fields() { + let file = config_file_from_toml( + r#" +[openshell.gateway] +default_image = "gateway-default:1.0" +guest_tls_ca = "/tls/ca.pem" +guest_tls_cert = "/tls/client.pem" +guest_tls_key = "/tls/client-key.pem" + +[openshell.drivers.vm] +grpc_endpoint = "http://127.0.0.1:17670" +gpu_enabled = true +gpu_mem_mib = 12288 +gpu_vcpus = 6 +"#, + ); + + let cfg = super::build_vm_config(Some(&file), None, true, 17670).unwrap(); + + assert_eq!(cfg.default_image, "gateway-default:1.0"); + assert_eq!(cfg.guest_tls_ca, Some("/tls/ca.pem".into())); + assert_eq!(cfg.guest_tls_cert, Some("/tls/client.pem".into())); + assert_eq!(cfg.guest_tls_key, Some("/tls/client-key.pem".into())); + assert!(cfg.gpu_enabled); + assert_eq!(cfg.gpu_mem_mib, 12288); + assert_eq!(cfg.gpu_vcpus, 6); + } + + #[test] + fn vm_config_rejects_unknown_driver_field() { + let file = config_file_from_toml( + r" +[openshell.drivers.vm] +unknown_vm_field = true +", + ); + + let err = super::build_vm_config(Some(&file), None, true, 17670).unwrap_err(); + + assert!( + err.to_string() + .contains("invalid [openshell.drivers.vm] table") + ); + } } diff --git a/crates/openshell-server/src/compute/vm.rs b/crates/openshell-server/src/compute/vm.rs index efdc9daab..a272a4032 100644 --- a/crates/openshell-server/src/compute/vm.rs +++ b/crates/openshell-server/src/compute/vm.rs @@ -25,7 +25,7 @@ //! generalized compute-driver interface, the CLI-arg plumbing below should //! be replaced with a driver-agnostic launcher that speaks gRPC to //! configure the driver — and this file should collapse to the types that -//! are genuinely VM-specific (libkrun log level, vCPU / memory shape) plus a +//! are genuinely VM-specific (hypervisor helper log level, vCPU / memory shape) plus a //! trait implementation registering the VM driver against the generic //! interface. @@ -39,6 +39,8 @@ use openshell_core::proto::compute::v1::{ }; use openshell_core::{Config, Error, Result}; #[cfg(unix)] +use std::ffi::OsString; +#[cfg(unix)] use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt}; #[cfg(unix)] use std::path::Path; @@ -58,6 +60,24 @@ use tower::service_fn; const DRIVER_BIN_NAME: &str = "openshell-driver-vm"; const COMPUTE_DRIVER_SOCKET_RUN_DIR: &str = "run"; const COMPUTE_DRIVER_SOCKET_NAME: &str = "compute-driver.sock"; +#[cfg(unix)] +const VM_DRIVER_CONFIG_ENV_VARS: &[&str] = &[ + "OPENSHELL_GRPC_ENDPOINT", + "OPENSHELL_SANDBOX_IMAGE", + "OPENSHELL_VM_BOOTSTRAP_IMAGE", + "OPENSHELL_VM_DRIVER_STATE_DIR", + "OPENSHELL_VM_HYPERVISOR_LOG_LEVEL", + "OPENSHELL_VM_KRUN_LOG_LEVEL", + "OPENSHELL_VM_DRIVER_VCPUS", + "OPENSHELL_VM_DRIVER_MEM_MIB", + "OPENSHELL_VM_OVERLAY_DISK_MIB", + "OPENSHELL_VM_GPU", + "OPENSHELL_VM_GPU_MEM_MIB", + "OPENSHELL_VM_GPU_VCPUS", + "OPENSHELL_VM_TLS_CA", + "OPENSHELL_VM_TLS_CERT", + "OPENSHELL_VM_TLS_KEY", +]; /// Configuration for launching and talking to the VM compute driver. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] @@ -79,8 +99,9 @@ pub struct VmComputeConfig { /// Bootstrap image used to boot and prepare VM sandbox target images. pub bootstrap_image: String, - /// libkrun log level used by the VM driver helper. - pub krun_log_level: u32, + /// Hypervisor helper log level used by the VM driver. + #[serde(alias = "krun_log_level")] + pub hypervisor_log_level: u32, /// Default vCPU count for VM sandboxes. pub vcpus: u8, @@ -99,6 +120,15 @@ pub struct VmComputeConfig { /// Host-side private key for the guest's mTLS client bundle. pub guest_tls_key: Option, + + /// Enable host GPU passthrough support in the VM driver. + pub gpu_enabled: bool, + + /// Memory allocation for GPU VM sandboxes, in MiB. + pub gpu_mem_mib: u32, + + /// vCPU count for GPU VM sandboxes. + pub gpu_vcpus: u8, } impl VmComputeConfig { @@ -111,9 +141,9 @@ impl VmComputeConfig { ) } - /// Default libkrun log level. + /// Default hypervisor helper log level. #[must_use] - pub const fn default_krun_log_level() -> u32 { + pub const fn default_hypervisor_log_level() -> u32 { 1 } @@ -135,6 +165,18 @@ impl VmComputeConfig { 4096 } + /// Default memory allocation for GPU VM sandboxes, in MiB. + #[must_use] + pub const fn default_gpu_mem_mib() -> u32 { + 8192 + } + + /// Default vCPU count for GPU VM sandboxes. + #[must_use] + pub const fn default_gpu_vcpus() -> u8 { + 4 + } + #[must_use] fn default_driver_search_dirs(home: Option) -> Vec { let mut dirs = Vec::new(); @@ -156,13 +198,16 @@ impl Default for VmComputeConfig { default_image: openshell_core::image::default_sandbox_image(), grpc_endpoint: String::new(), bootstrap_image: String::new(), - krun_log_level: Self::default_krun_log_level(), + hypervisor_log_level: Self::default_hypervisor_log_level(), vcpus: Self::default_vcpus(), mem_mib: Self::default_mem_mib(), overlay_disk_mib: Self::default_overlay_disk_mib(), guest_tls_ca: None, guest_tls_cert: None, guest_tls_key: None, + gpu_enabled: false, + gpu_mem_mib: Self::default_gpu_mem_mib(), + gpu_vcpus: Self::default_gpu_vcpus(), } } } @@ -444,6 +489,161 @@ pub fn compute_driver_guest_tls_paths( Ok(Some(VmGuestTlsPaths { ca, cert, key })) } +#[cfg(unix)] +fn vm_config_with_env_overrides(vm_config: &VmComputeConfig) -> Result { + let mut cfg = vm_config.clone(); + + if let Some(endpoint) = env_string("OPENSHELL_GRPC_ENDPOINT")? { + cfg.grpc_endpoint = endpoint; + } + if let Some(image) = env_string("OPENSHELL_SANDBOX_IMAGE")? { + cfg.default_image = image; + } + if let Some(image) = env_string("OPENSHELL_VM_BOOTSTRAP_IMAGE")? { + cfg.bootstrap_image = image; + } + if let Some(path) = std::env::var_os("OPENSHELL_VM_DRIVER_STATE_DIR") { + cfg.state_dir = PathBuf::from(path); + } + if let Some(value) = env_parse("OPENSHELL_VM_HYPERVISOR_LOG_LEVEL")? { + cfg.hypervisor_log_level = value; + } else if let Some(value) = env_parse("OPENSHELL_VM_KRUN_LOG_LEVEL")? { + cfg.hypervisor_log_level = value; + } + if let Some(value) = env_parse("OPENSHELL_VM_DRIVER_VCPUS")? { + cfg.vcpus = value; + } + if let Some(value) = env_parse("OPENSHELL_VM_DRIVER_MEM_MIB")? { + cfg.mem_mib = value; + } + if let Some(value) = env_parse("OPENSHELL_VM_OVERLAY_DISK_MIB")? { + cfg.overlay_disk_mib = value; + } + if let Some(value) = env_bool("OPENSHELL_VM_GPU")? { + cfg.gpu_enabled = value; + } + if let Some(value) = env_parse("OPENSHELL_VM_GPU_MEM_MIB")? { + cfg.gpu_mem_mib = value; + } + if let Some(value) = env_parse("OPENSHELL_VM_GPU_VCPUS")? { + cfg.gpu_vcpus = value; + } + if let Some(path) = std::env::var_os("OPENSHELL_VM_TLS_CA") { + cfg.guest_tls_ca = Some(PathBuf::from(path)); + } + if let Some(path) = std::env::var_os("OPENSHELL_VM_TLS_CERT") { + cfg.guest_tls_cert = Some(PathBuf::from(path)); + } + if let Some(path) = std::env::var_os("OPENSHELL_VM_TLS_KEY") { + cfg.guest_tls_key = Some(PathBuf::from(path)); + } + + if cfg.state_dir.as_os_str().is_empty() { + cfg.state_dir = VmComputeConfig::default_state_dir(); + } + + Ok(cfg) +} + +#[cfg(unix)] +fn env_string(name: &'static str) -> Result> { + match std::env::var(name) { + Ok(value) => Ok(Some(value)), + Err(std::env::VarError::NotPresent) => Ok(None), + Err(std::env::VarError::NotUnicode(_)) => { + Err(Error::config(format!("{name} must be valid UTF-8"))) + } + } +} + +#[cfg(unix)] +fn env_parse(name: &'static str) -> Result> +where + T: std::str::FromStr, + T::Err: std::fmt::Display, +{ + let Some(value) = env_string(name)? else { + return Ok(None); + }; + value + .parse::() + .map(Some) + .map_err(|err| Error::config(format!("invalid {name} value '{value}': {err}"))) +} + +#[cfg(unix)] +fn env_bool(name: &'static str) -> Result> { + let Some(value) = env_string(name)? else { + return Ok(None); + }; + match value.to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => Ok(Some(true)), + "0" | "false" | "no" | "off" => Ok(Some(false)), + _ => Err(Error::config(format!( + "invalid {name} value '{value}': expected true or false" + ))), + } +} + +#[cfg(unix)] +fn compute_driver_args( + config: &Config, + vm_config: &VmComputeConfig, + socket_path: &Path, + expected_peer_pid: u32, + guest_tls_paths: Option<&VmGuestTlsPaths>, +) -> Vec { + let mut args = vec![ + OsString::from("--bind-socket"), + socket_path.as_os_str().to_os_string(), + OsString::from("--expected-peer-pid"), + OsString::from(expected_peer_pid.to_string()), + OsString::from("--log-level"), + OsString::from(&config.log_level), + OsString::from("--openshell-endpoint"), + OsString::from(&vm_config.grpc_endpoint), + OsString::from("--state-dir"), + vm_config.state_dir.as_os_str().to_os_string(), + ]; + + if !vm_config.default_image.trim().is_empty() { + args.push(OsString::from("--default-image")); + args.push(OsString::from(&vm_config.default_image)); + } + if !vm_config.bootstrap_image.trim().is_empty() { + args.push(OsString::from("--bootstrap-image")); + args.push(OsString::from(&vm_config.bootstrap_image)); + } + + args.push(OsString::from("--krun-log-level")); + args.push(OsString::from(vm_config.hypervisor_log_level.to_string())); + args.push(OsString::from("--vcpus")); + args.push(OsString::from(vm_config.vcpus.to_string())); + args.push(OsString::from("--mem-mib")); + args.push(OsString::from(vm_config.mem_mib.to_string())); + args.push(OsString::from("--overlay-disk-mib")); + args.push(OsString::from(vm_config.overlay_disk_mib.to_string())); + + if vm_config.gpu_enabled { + args.push(OsString::from("--gpu")); + } + args.push(OsString::from("--gpu-mem-mib")); + args.push(OsString::from(vm_config.gpu_mem_mib.to_string())); + args.push(OsString::from("--gpu-vcpus")); + args.push(OsString::from(vm_config.gpu_vcpus.to_string())); + + if let Some(tls) = guest_tls_paths { + args.push(OsString::from("--guest-tls-ca")); + args.push(tls.ca.as_os_str().to_os_string()); + args.push(OsString::from("--guest-tls-cert")); + args.push(tls.cert.as_os_str().to_os_string()); + args.push(OsString::from("--guest-tls-key")); + args.push(tls.key.as_os_str().to_os_string()); + } + + args +} + /// Launch the VM compute-driver subprocess, wait for its UDS to come up, /// and return a gRPC `Channel` connected to it plus a process handle that /// kills the subprocess and removes the socket on drop. @@ -452,51 +652,34 @@ pub async fn spawn( config: &Config, vm_config: &VmComputeConfig, ) -> Result<(Channel, Arc)> { + let vm_config = vm_config_with_env_overrides(vm_config)?; if vm_config.grpc_endpoint.trim().is_empty() { return Err(Error::config( "grpc_endpoint is required when using the vm compute driver", )); } - let driver_bin = resolve_compute_driver_bin(vm_config)?; - let socket_path = compute_driver_socket_path(vm_config); - let guest_tls_paths = compute_driver_guest_tls_paths(vm_config)?; - prepare_compute_driver_socket_path(vm_config, &socket_path)?; + let driver_bin = resolve_compute_driver_bin(&vm_config)?; + let socket_path = compute_driver_socket_path(&vm_config); + let guest_tls_paths = compute_driver_guest_tls_paths(&vm_config)?; + prepare_compute_driver_socket_path(&vm_config, &socket_path)?; let mut command = Command::new(&driver_bin); command.kill_on_drop(true); command.stdin(Stdio::null()); command.stdout(Stdio::inherit()); command.stderr(Stdio::inherit()); - command.arg("--bind-socket").arg(&socket_path); - command - .arg("--expected-peer-pid") - .arg(std::process::id().to_string()); - command.arg("--log-level").arg(&config.log_level); - command - .arg("--openshell-endpoint") - .arg(&vm_config.grpc_endpoint); - command.arg("--state-dir").arg(&vm_config.state_dir); - if !vm_config.default_image.trim().is_empty() { - command.arg("--default-image").arg(&vm_config.default_image); + for key in VM_DRIVER_CONFIG_ENV_VARS { + command.env_remove(key); } - if !vm_config.bootstrap_image.trim().is_empty() { - command - .arg("--bootstrap-image") - .arg(&vm_config.bootstrap_image); - } - command - .arg("--krun-log-level") - .arg(vm_config.krun_log_level.to_string()); - command.arg("--vcpus").arg(vm_config.vcpus.to_string()); - command.arg("--mem-mib").arg(vm_config.mem_mib.to_string()); - command - .arg("--overlay-disk-mib") - .arg(vm_config.overlay_disk_mib.to_string()); - if let Some(tls) = guest_tls_paths { - command.arg("--guest-tls-ca").arg(tls.ca); - command.arg("--guest-tls-cert").arg(tls.cert); - command.arg("--guest-tls-key").arg(tls.key); + for arg in compute_driver_args( + config, + &vm_config, + &socket_path, + std::process::id(), + guest_tls_paths.as_ref(), + ) { + command.arg(arg); } let mut child = command.spawn().map_err(|e| { @@ -581,15 +764,73 @@ async fn connect_compute_driver(socket_path: &Path) -> Result { #[cfg(all(test, unix))] mod tests { use super::{ - VmComputeConfig, compute_driver_guest_tls_paths, compute_driver_socket_path, current_euid, + VM_DRIVER_CONFIG_ENV_VARS, VmComputeConfig, compute_driver_args, + compute_driver_guest_tls_paths, compute_driver_socket_path, current_euid, prepare_compute_driver_socket_path, prepare_vm_state_dir, resolve_compute_driver_bin, - resolve_driver_search_dirs, + resolve_driver_search_dirs, vm_config_with_env_overrides, }; + use crate::TEST_ENV_LOCK as ENV_LOCK; + use openshell_core::Config; + use std::ffi::OsString; use std::os::unix::fs::PermissionsExt; use std::os::unix::net::UnixListener as StdUnixListener; - use std::path::PathBuf; + use std::path::{Path, PathBuf}; use tempfile::tempdir; + fn args_to_strings(args: Vec) -> Vec { + args.into_iter() + .map(|arg| arg.to_string_lossy().into_owned()) + .collect() + } + + fn arg_value(args: &[String], flag: &str) -> Option { + args.windows(2) + .find(|window| window[0] == flag) + .map(|window| window[1].clone()) + } + + struct EnvVarGuard { + key: &'static str, + original: Option, + } + + impl EnvVarGuard { + #[allow(unsafe_code)] + fn set(key: &'static str, value: &str) -> Self { + let original = std::env::var_os(key); + // SAFETY: tests serialize environment mutation with ENV_LOCK. + unsafe { std::env::set_var(key, value) }; + Self { key, original } + } + + #[allow(unsafe_code)] + fn remove(key: &'static str) -> Self { + let original = std::env::var_os(key); + // SAFETY: tests serialize environment mutation with ENV_LOCK. + unsafe { std::env::remove_var(key) }; + Self { key, original } + } + } + + impl Drop for EnvVarGuard { + #[allow(unsafe_code)] + fn drop(&mut self) { + match self.original.as_ref() { + // SAFETY: tests serialize environment mutation with ENV_LOCK. + Some(value) => unsafe { std::env::set_var(self.key, value) }, + // SAFETY: tests serialize environment mutation with ENV_LOCK. + None => unsafe { std::env::remove_var(self.key) }, + } + } + } + + fn remove_vm_env_overrides() -> Vec { + VM_DRIVER_CONFIG_ENV_VARS + .iter() + .map(|key| EnvVarGuard::remove(key)) + .collect() + } + #[test] fn resolve_driver_bin_uses_driver_dir_when_binary_present() { let dir = tempdir().unwrap(); @@ -631,6 +872,191 @@ mod tests { assert!(dirs.contains(&PathBuf::from("/usr/local/libexec"))); } + #[test] + fn vm_compute_config_deserializes_gpu_fields() { + let cfg: VmComputeConfig = toml::from_str( + r" +gpu_enabled = true +gpu_mem_mib = 12288 +gpu_vcpus = 6 +", + ) + .unwrap(); + + assert!(cfg.gpu_enabled); + assert_eq!(cfg.gpu_mem_mib, 12288); + assert_eq!(cfg.gpu_vcpus, 6); + } + + #[test] + fn vm_compute_config_deserializes_hypervisor_log_level_aliases() { + let cfg: VmComputeConfig = toml::from_str("hypervisor_log_level = 4").unwrap(); + assert_eq!(cfg.hypervisor_log_level, 4); + + let legacy: VmComputeConfig = toml::from_str("krun_log_level = 2").unwrap(); + assert_eq!(legacy.hypervisor_log_level, 2); + } + + #[test] + fn vm_compute_config_rejects_unknown_fields() { + let err = toml::from_str::( + r" +unknown_vm_field = true +", + ) + .expect_err("unknown fields should be rejected"); + + assert!(err.to_string().contains("unknown field")); + } + + #[test] + fn vm_config_env_overrides_apply_to_effective_spawn_config() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _env = remove_vm_env_overrides(); + let _g1 = EnvVarGuard::set("OPENSHELL_GRPC_ENDPOINT", "http://env-gateway:17670"); + let _g2 = EnvVarGuard::set("OPENSHELL_SANDBOX_IMAGE", "env-sandbox:latest"); + let _g3 = EnvVarGuard::set("OPENSHELL_VM_BOOTSTRAP_IMAGE", "env-bootstrap:latest"); + let _g4 = EnvVarGuard::set("OPENSHELL_VM_DRIVER_STATE_DIR", "/tmp/env-vm-state"); + let _g5 = EnvVarGuard::set("OPENSHELL_VM_HYPERVISOR_LOG_LEVEL", "3"); + let _g6 = EnvVarGuard::set("OPENSHELL_VM_DRIVER_VCPUS", "5"); + let _g7 = EnvVarGuard::set("OPENSHELL_VM_DRIVER_MEM_MIB", "5120"); + let _g8 = EnvVarGuard::set("OPENSHELL_VM_OVERLAY_DISK_MIB", "8192"); + let _g9 = EnvVarGuard::set("OPENSHELL_VM_GPU", "true"); + let _g10 = EnvVarGuard::set("OPENSHELL_VM_GPU_MEM_MIB", "24576"); + let _g11 = EnvVarGuard::set("OPENSHELL_VM_GPU_VCPUS", "10"); + let base = VmComputeConfig { + grpc_endpoint: "http://file-gateway:17670".to_string(), + default_image: "file-sandbox:latest".to_string(), + bootstrap_image: "file-bootstrap:latest".to_string(), + state_dir: PathBuf::from("/tmp/file-vm-state"), + hypervisor_log_level: 1, + vcpus: 2, + mem_mib: 2048, + overlay_disk_mib: 4096, + gpu_enabled: false, + gpu_mem_mib: 8192, + gpu_vcpus: 4, + ..Default::default() + }; + + let cfg = vm_config_with_env_overrides(&base).unwrap(); + + assert_eq!(cfg.grpc_endpoint, "http://env-gateway:17670"); + assert_eq!(cfg.default_image, "env-sandbox:latest"); + assert_eq!(cfg.bootstrap_image, "env-bootstrap:latest"); + assert_eq!(cfg.state_dir, PathBuf::from("/tmp/env-vm-state")); + assert_eq!(cfg.hypervisor_log_level, 3); + assert_eq!(cfg.vcpus, 5); + assert_eq!(cfg.mem_mib, 5120); + assert_eq!(cfg.overlay_disk_mib, 8192); + assert!(cfg.gpu_enabled); + assert_eq!(cfg.gpu_mem_mib, 24576); + assert_eq!(cfg.gpu_vcpus, 10); + } + + #[test] + fn vm_config_env_overrides_accept_legacy_krun_log_level() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _env = remove_vm_env_overrides(); + let _legacy = EnvVarGuard::set("OPENSHELL_VM_KRUN_LOG_LEVEL", "4"); + let base = VmComputeConfig { + hypervisor_log_level: 1, + ..Default::default() + }; + + let cfg = vm_config_with_env_overrides(&base).unwrap(); + + assert_eq!(cfg.hypervisor_log_level, 4); + } + + #[test] + fn vm_config_env_prefers_hypervisor_log_level_over_legacy_krun() { + let _lock = ENV_LOCK + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + let _env = remove_vm_env_overrides(); + let _preferred = EnvVarGuard::set("OPENSHELL_VM_HYPERVISOR_LOG_LEVEL", "5"); + let _legacy = EnvVarGuard::set("OPENSHELL_VM_KRUN_LOG_LEVEL", "4"); + let base = VmComputeConfig { + hypervisor_log_level: 1, + ..Default::default() + }; + + let cfg = vm_config_with_env_overrides(&base).unwrap(); + + assert_eq!(cfg.hypervisor_log_level, 5); + } + + #[test] + fn vm_driver_args_omit_gpu_flag_when_disabled() { + let config = Config::new(None).with_log_level("debug"); + let vm_config = VmComputeConfig { + grpc_endpoint: "http://127.0.0.1:17670".to_string(), + state_dir: PathBuf::from("/tmp/openshell-vm"), + gpu_enabled: false, + gpu_mem_mib: 12288, + gpu_vcpus: 6, + ..Default::default() + }; + + let args = args_to_strings(compute_driver_args( + &config, + &vm_config, + Path::new("/tmp/openshell-vm.sock"), + 1234, + None, + )); + + assert!(!args.iter().any(|arg| arg == "--gpu")); + assert_eq!(arg_value(&args, "--gpu-mem-mib").as_deref(), Some("12288")); + assert_eq!(arg_value(&args, "--gpu-vcpus").as_deref(), Some("6")); + } + + #[test] + fn vm_driver_args_include_gpu_flag_when_enabled() { + let config = Config::new(None).with_log_level("debug"); + let vm_config = VmComputeConfig { + grpc_endpoint: "http://127.0.0.1:17670".to_string(), + state_dir: PathBuf::from("/tmp/openshell-vm"), + bootstrap_image: "ghcr.io/nvidia/openshell/bootstrap:test".to_string(), + gpu_enabled: true, + gpu_mem_mib: 16384, + gpu_vcpus: 8, + ..Default::default() + }; + + let args = args_to_strings(compute_driver_args( + &config, + &vm_config, + Path::new("/tmp/openshell-vm.sock"), + 1234, + None, + )); + + assert!(args.iter().any(|arg| arg == "--gpu")); + assert_eq!(arg_value(&args, "--log-level").as_deref(), Some("debug")); + assert_eq!( + arg_value(&args, "--openshell-endpoint").as_deref(), + Some("http://127.0.0.1:17670") + ); + assert_eq!( + arg_value(&args, "--bootstrap-image").as_deref(), + Some("ghcr.io/nvidia/openshell/bootstrap:test") + ); + assert_eq!(arg_value(&args, "--vcpus").as_deref(), Some("2")); + assert_eq!(arg_value(&args, "--mem-mib").as_deref(), Some("2048")); + assert_eq!( + arg_value(&args, "--overlay-disk-mib").as_deref(), + Some("4096") + ); + assert_eq!(arg_value(&args, "--gpu-mem-mib").as_deref(), Some("16384")); + assert_eq!(arg_value(&args, "--gpu-vcpus").as_deref(), Some("8")); + } + #[test] fn vm_compute_driver_tls_requires_explicit_guest_bundle() { let vm_config = VmComputeConfig { diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index d0f4ef32b..a5ed471d4 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -211,7 +211,7 @@ image_pull_policy = "missing" # Podman vocabulary: always | missing | never | ### MicroVM -Each sandbox runs inside its own libkrun microVM managed by the standalone `openshell-driver-vm` subprocess. Use this driver when you want stronger isolation than container namespaces alone. +Each sandbox runs inside its own libkrun microVM managed by one `openshell-driver-vm` subprocess spawned by the gateway. Configure VM driver settings in this gateway TOML file under `[openshell.drivers.vm]`. ```toml [openshell] @@ -224,6 +224,7 @@ log_level = "info" compute_drivers = ["vm"] default_image = "ghcr.io/nvidia/openshell/sandbox:latest" +# Guest mTLS material is gateway-level input inherited by local compute drivers. guest_tls_ca = "/var/lib/openshell/guest-tls/ca.pem" guest_tls_cert = "/var/lib/openshell/guest-tls/client.pem" guest_tls_key = "/var/lib/openshell/guest-tls/client-key.pem" @@ -233,7 +234,22 @@ grpc_endpoint = "https://host.containers.internal:17670" state_dir = "/var/lib/openshell/vm" # Where the gateway looks for the openshell-driver-vm subprocess binary. driver_dir = "/usr/local/libexec/openshell" -vcpus = 2 -mem_mib = 2048 -krun_log_level = 1 + +# Optional bootstrap image. When unset, the VM driver uses default_image, then +# the requested sandbox image. +# bootstrap_image = "ghcr.io/nvidia/openshell/sandbox-bootstrap:latest" + +# Default VM shape. +vcpus = 2 +mem_mib = 2048 +overlay_disk_mib = 4096 + +# Hypervisor helper verbosity. Distinct from [openshell.gateway].log_level, +# which controls gateway and driver process logs. +hypervisor_log_level = 1 + +# GPU passthrough support. Leave disabled unless the host is prepared for VFIO. +gpu_enabled = false +gpu_mem_mib = 8192 +gpu_vcpus = 4 ``` diff --git a/docs/reference/sandbox-compute-drivers.mdx b/docs/reference/sandbox-compute-drivers.mdx index 1ab5c9b2f..7db71071d 100644 --- a/docs/reference/sandbox-compute-drivers.mdx +++ b/docs/reference/sandbox-compute-drivers.mdx @@ -87,7 +87,7 @@ compute_drivers = ["vm"] For a launch-time override, set `OPENSHELL_DRIVERS=vm` in the gateway environment and restart the service. -Configure VM driver values such as `grpc_endpoint`, `driver_dir`, `state_dir`, `default_image`, `bootstrap_image`, `vcpus`, `mem_mib`, `overlay_disk_mib`, `krun_log_level`, and `guest_tls_*` in `[openshell.drivers.vm]`. The VM `state_dir` stores overlay disks, console logs, runtime state, image-rootfs cache, and the private `run/compute-driver.sock` socket. +Configure VM driver values such as `grpc_endpoint`, `driver_dir`, `state_dir`, `default_image`, `bootstrap_image`, `vcpus`, `mem_mib`, `overlay_disk_mib`, `hypervisor_log_level`, and `guest_tls_*` in `[openshell.drivers.vm]`. The VM `state_dir` stores overlay disks, console logs, runtime state, image-rootfs cache, and the private `run/compute-driver.sock` socket. The gateway starts `openshell-driver-vm` over a private Unix socket and passes its process ID so the driver can reject unexpected local clients. The driver's standalone TCP listener is disabled unless `--allow-unauthenticated-tcp` is set for local development.