Skip to content

Commit f257ed0

Browse files
authored
refactor(packaging): rely on gateway runtime defaults (#1415)
* fix(packaging): use gateway TOML config in packages * refactor(packaging): rely on gateway runtime defaults
1 parent 7f16d60 commit f257ed0

35 files changed

Lines changed: 764 additions & 709 deletions

crates/openshell-cli/src/run.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -675,8 +675,8 @@ fn is_loopback_gateway_endpoint(endpoint: &str) -> bool {
675675
/// would serve this endpoint.
676676
///
677677
/// Loopback endpoints (`localhost`, `127.0.0.1`, `::1`) resolve to the
678-
/// `"openshell"` gateway name, matching the convention used by
679-
/// `init-pki.sh` and the TLS cert resolver in `tls.rs`.
678+
/// `"openshell"` gateway name, matching the convention used by local
679+
/// `openshell-gateway generate-certs` and the TLS cert resolver in `tls.rs`.
680680
fn mtls_certs_exist_for_endpoint(name: &str, endpoint: &str) -> bool {
681681
let cert_name = if is_loopback_gateway_endpoint(endpoint) {
682682
"openshell"
@@ -901,7 +901,7 @@ pub async fn gateway_add(
901901

902902
// Derive a gateway name from the hostname when none is provided.
903903
// Loopback endpoints use the canonical "openshell" name, matching the
904-
// convention in init-pki.sh and default_tls_dir.
904+
// convention in local cert generation and default_tls_dir.
905905
let derived_name;
906906
let name = if let Some(n) = name {
907907
n
@@ -7240,7 +7240,7 @@ mod tests {
72407240
});
72417241

72427242
// Loopback endpoints derive the canonical "openshell" gateway
7243-
// name, matching init-pki.sh and default_tls_dir conventions.
7243+
// name, matching local cert generation and default_tls_dir conventions.
72447244
let metadata = load_gateway_metadata("openshell").expect("load stored gateway");
72457245
assert_eq!(metadata.auth_mode.as_deref(), Some("plaintext"));
72467246
assert!(!metadata.is_remote);

crates/openshell-core/src/config.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::str::FromStr;
2222
pub const DEFAULT_SSH_PORT: u16 = 2222;
2323

2424
/// Default gateway server port.
25-
pub const DEFAULT_SERVER_PORT: u16 = 8080;
25+
pub const DEFAULT_SERVER_PORT: u16 = 17670;
2626

2727
/// Default container stop timeout in seconds (SIGTERM → SIGKILL).
2828
pub const DEFAULT_STOP_TIMEOUT_SECS: u32 = 10;
@@ -34,7 +34,7 @@ pub const DEFAULT_DOCKER_NETWORK_NAME: &str = "openshell-docker";
3434
pub const DEFAULT_SERVICE_ROUTING_DOMAIN: &str = "openshell.localhost";
3535

3636
/// Default OCI image for the openshell-sandbox supervisor binary.
37-
pub const DEFAULT_SUPERVISOR_IMAGE: &str = "openshell/supervisor:latest";
37+
pub const DEFAULT_SUPERVISOR_IMAGE: &str = "ghcr.io/nvidia/openshell/supervisor:latest";
3838

3939
/// CDI device identifier for requesting all NVIDIA GPUs.
4040
pub const CDI_GPU_DEVICE_ALL: &str = "nvidia.com/gpu=all";
@@ -451,7 +451,7 @@ impl Default for ServiceRoutingConfig {
451451
}
452452

453453
fn default_bind_address() -> SocketAddr {
454-
"127.0.0.1:8080".parse().expect("valid default address")
454+
"127.0.0.1:17670".parse().expect("valid default address")
455455
}
456456

457457
fn default_service_routing_domains() -> Vec<String> {
@@ -557,7 +557,7 @@ mod tests {
557557

558558
#[test]
559559
fn config_defaults_to_loopback_bind_address() {
560-
let expected: SocketAddr = "127.0.0.1:8080".parse().expect("valid address");
560+
let expected: SocketAddr = "127.0.0.1:17670".parse().expect("valid address");
561561
assert_eq!(Config::new(None).bind_address, expected);
562562
}
563563

crates/openshell-core/src/paths.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,24 @@ pub fn openshell_config_dir() -> Result<PathBuf> {
2929
Ok(xdg_config_dir()?.join("openshell"))
3030
}
3131

32+
/// Resolve the XDG state base directory.
33+
///
34+
/// Returns `$XDG_STATE_HOME` if set, otherwise `$HOME/.local/state`.
35+
pub fn xdg_state_dir() -> Result<PathBuf> {
36+
if let Ok(path) = std::env::var("XDG_STATE_HOME") {
37+
return Ok(PathBuf::from(path));
38+
}
39+
let home = std::env::var("HOME")
40+
.into_diagnostic()
41+
.wrap_err("HOME is not set")?;
42+
Ok(PathBuf::from(home).join(".local").join("state"))
43+
}
44+
45+
/// The top-level `OpenShell` state directory: `$XDG_STATE_HOME/openshell/`.
46+
pub fn openshell_state_dir() -> Result<PathBuf> {
47+
Ok(xdg_state_dir()?.join("openshell"))
48+
}
49+
3250
/// Resolve the XDG data base directory.
3351
///
3452
/// Returns `$XDG_DATA_HOME` if set, otherwise `$HOME/.local/share`.
@@ -130,6 +148,15 @@ mod tests {
130148
);
131149
}
132150

151+
#[test]
152+
fn openshell_state_dir_appends_openshell() {
153+
let dir = openshell_state_dir().unwrap();
154+
assert!(
155+
dir.ends_with("openshell"),
156+
"expected path ending with 'openshell', got: {dir:?}"
157+
);
158+
}
159+
133160
#[cfg(unix)]
134161
#[test]
135162
fn create_dir_restricted_sets_0o700() {

crates/openshell-driver-docker/src/lib.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -252,12 +252,6 @@ impl DockerComputeDriver {
252252
docker_config: &DockerComputeConfig,
253253
supervisor_readiness: Arc<dyn SupervisorReadiness>,
254254
) -> CoreResult<Self> {
255-
if docker_config.grpc_endpoint.trim().is_empty() {
256-
return Err(Error::config(
257-
"grpc_endpoint is required when using the docker compute driver",
258-
));
259-
}
260-
261255
let docker = Docker::connect_with_local_defaults()
262256
.map_err(|err| Error::execution(format!("failed to create Docker client: {err}")))?;
263257
let version = docker.version().await.map_err(|err| {
@@ -281,14 +275,24 @@ impl DockerComputeDriver {
281275
let host_gateway_ip = parse_optional_host_gateway_ip(&docker_config.host_gateway_ip)?;
282276
let gateway_route =
283277
docker_gateway_route(&info, bridge_gateway_ip, gateway_port, host_gateway_ip);
278+
let mut docker_config = docker_config.clone();
279+
if docker_config.grpc_endpoint.trim().is_empty() {
280+
let scheme = if docker_guest_tls_configured(&docker_config) {
281+
"https"
282+
} else {
283+
"http"
284+
};
285+
docker_config.grpc_endpoint =
286+
format!("{scheme}://{HOST_OPENSHELL_INTERNAL}:{gateway_port}");
287+
}
284288
let grpc_endpoint = docker_container_openshell_endpoint(
285289
&docker_config.grpc_endpoint,
286290
HOST_OPENSHELL_INTERNAL,
287291
gateway_port,
288292
);
289293
let daemon_arch = normalize_docker_arch(version.arch.as_deref().unwrap_or_default());
290-
let supervisor_bin = resolve_supervisor_bin(&docker, docker_config, &daemon_arch).await?;
291-
let guest_tls = docker_guest_tls_paths(docker_config)?;
294+
let supervisor_bin = resolve_supervisor_bin(&docker, &docker_config, &daemon_arch).await?;
295+
let guest_tls = docker_guest_tls_paths(&docker_config)?;
292296

293297
let driver = Self {
294298
docker: Arc::new(docker),
@@ -2009,6 +2013,12 @@ pub(crate) fn validate_linux_elf_binary(path: &Path) -> CoreResult<()> {
20092013
Ok(())
20102014
}
20112015

2016+
fn docker_guest_tls_configured(docker_config: &DockerComputeConfig) -> bool {
2017+
docker_config.guest_tls_ca.is_some()
2018+
&& docker_config.guest_tls_cert.is_some()
2019+
&& docker_config.guest_tls_key.is_some()
2020+
}
2021+
20122022
pub(crate) fn docker_guest_tls_paths(
20132023
docker_config: &DockerComputeConfig,
20142024
) -> CoreResult<Option<DockerGuestTlsPaths>> {

crates/openshell-driver-docker/src/tests.rs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,23 @@ fn container_visible_endpoint_rewrites_loopback_hosts() {
7474
HOST_OPENSHELL_INTERNAL,
7575
DEFAULT_SERVER_PORT,
7676
),
77-
"https://host.openshell.internal:8080/"
77+
"https://host.openshell.internal:17670/"
7878
);
7979
assert_eq!(
8080
docker_container_openshell_endpoint(
8181
"http://127.0.0.1:8080",
8282
HOST_OPENSHELL_INTERNAL,
8383
DEFAULT_SERVER_PORT,
8484
),
85-
"http://host.openshell.internal:8080/"
85+
"http://host.openshell.internal:17670/"
8686
);
8787
assert_eq!(
8888
docker_container_openshell_endpoint(
8989
"https://gateway.internal:8443",
9090
HOST_OPENSHELL_INTERNAL,
9191
DEFAULT_SERVER_PORT,
9292
),
93-
"https://host.openshell.internal:8080/"
93+
"https://host.openshell.internal:17670/"
9494
);
9595
}
9696

@@ -273,7 +273,7 @@ fn docker_gateway_route_uses_bridge_gateway_for_linux_docker() {
273273
assert_eq!(
274274
route,
275275
DockerGatewayRoute::Bridge {
276-
bind_address: "172.18.0.1:8080".parse().unwrap(),
276+
bind_address: "172.18.0.1:17670".parse().unwrap(),
277277
host_alias_ip: IpAddr::V4(Ipv4Addr::new(172, 18, 0, 1)),
278278
}
279279
);
@@ -303,7 +303,7 @@ fn docker_gateway_route_prefers_configured_host_gateway_ip() {
303303
assert_eq!(
304304
route,
305305
DockerGatewayRoute::Bridge {
306-
bind_address: "172.20.0.4:8080".parse().unwrap(),
306+
bind_address: "172.20.0.4:17670".parse().unwrap(),
307307
host_alias_ip: IpAddr::V4(Ipv4Addr::new(172, 20, 0, 4)),
308308
}
309309
);

crates/openshell-driver-podman/NETWORKING.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,9 @@ Supervisor proxy in container netns
357357

358358
The Podman driver auto-detects the callback endpoint scheme based on whether
359359
TLS client certificates are configured. When the RPM's auto-generated PKI is in
360-
place, the endpoint is `https://host.containers.internal:8080` and the
360+
place, the endpoint is `https://host.containers.internal:17670` and the
361361
supervisor connects with mTLS. Without TLS configuration, it falls back to
362-
`http://host.containers.internal:8080`.
362+
`http://host.containers.internal:<gateway-port>`.
363363

364364
```text
365365
Supervisor in container netns
@@ -382,10 +382,9 @@ Gateway
382382
9. Same gRPC channel reused for RelayStream calls
383383
```
384384

385-
The gateway binds to `0.0.0.0` by default in the RPM packaging. mTLS prevents
386-
unauthenticated access even though the gateway is reachable from the network.
387-
Client certificates are auto-generated by `init-pki.sh` on first start and
388-
bind-mounted into sandbox containers by the Podman driver.
385+
The gateway binds to `127.0.0.1:17670` by default in the RPM packaging. Client
386+
certificates are auto-generated by `openshell-gateway generate-certs` on first
387+
start and bind-mounted into sandbox containers by the Podman driver.
389388

390389
## Differences from the Kubernetes Driver
391390

@@ -412,7 +411,7 @@ published ports, or the supervisor relay.
412411

413412
| Port | Component | Purpose |
414413
|---|---|---|
415-
| `8080` | Gateway | gRPC and HTTP multiplexed default server port. |
414+
| `17670` | Gateway | Default local gRPC and HTTP multiplexed server port. |
416415
| `2222` | Sandbox | Container port mapping default for the SSH compatibility port. |
417416
| `3128` | Sandbox proxy | HTTP CONNECT proxy inside the sandbox network model. |
418417
| `0` | Host | Ephemeral host port requested for the container SSH compatibility port. |

crates/openshell-driver-podman/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,10 @@ connection back to the gateway. On SELinux systems, the bind mounts include
120120
Podman's shared relabel option so the container process can read the files.
121121

122122
The RPM packaging auto-generates a self-signed PKI on first start via
123-
`init-pki.sh`. Client certs are placed in the CLI auto-discovery directory
124-
(`~/.config/openshell/gateways/openshell/mtls/`) so the CLI connects with mTLS
125-
without manual configuration. See `deploy/rpm/CONFIGURATION.md` for the full
126-
RPM configuration reference.
123+
`openshell-gateway generate-certs`. Client certs are placed in the CLI
124+
auto-discovery directory (`~/.config/openshell/gateways/openshell/mtls/`) so
125+
the CLI connects with mTLS without manual configuration. See
126+
`deploy/rpm/CONFIGURATION.md` for the full RPM configuration reference.
127127

128128
## Network Model
129129

@@ -134,7 +134,7 @@ the supervisor for sandbox process isolation.
134134
```mermaid
135135
graph TB
136136
subgraph Host
137-
GW["Gateway Server<br/>127.0.0.1:8080"]
137+
GW["Gateway Server<br/>127.0.0.1:17670"]
138138
PS["Podman Socket"]
139139
end
140140
@@ -289,11 +289,11 @@ Podman resources after out-of-band container removal or label drift.
289289
| `OPENSHELL_SANDBOX_IMAGE` | `--sandbox-image` | From gateway config | Default OCI image for sandboxes. |
290290
| `OPENSHELL_SANDBOX_IMAGE_PULL_POLICY` | `--sandbox-image-pull-policy` | `missing` | Pull policy: `always`, `missing`, `never`, or `newer`. |
291291
| `OPENSHELL_GRPC_ENDPOINT` | `--grpc-endpoint` | Auto-detected via `host.containers.internal` | Gateway gRPC endpoint for sandbox callbacks. |
292-
| `OPENSHELL_GATEWAY_PORT` | `--gateway-port` | `8080` | Gateway port used for endpoint auto-detection by the standalone binary. |
292+
| `OPENSHELL_GATEWAY_PORT` | `--gateway-port` | `17670` | Gateway port used for endpoint auto-detection by the standalone binary. |
293293
| `OPENSHELL_NETWORK_NAME` | `--network-name` | `openshell` | Podman bridge network name. |
294294
| `OPENSHELL_SANDBOX_SSH_SOCKET_PATH` | `--sandbox-ssh-socket-path` | `/run/openshell/ssh.sock` | Supervisor Unix socket path in `PodmanComputeConfig`. |
295295
| `OPENSHELL_STOP_TIMEOUT` | `--stop-timeout` | `10` | Container stop timeout in seconds. |
296-
| `OPENSHELL_SUPERVISOR_IMAGE` | `--supervisor-image` | `openshell/supervisor:latest` through the gateway, required standalone | OCI image containing the supervisor binary. |
296+
| `OPENSHELL_SUPERVISOR_IMAGE` | `--supervisor-image` | `ghcr.io/nvidia/openshell/supervisor:latest` through the gateway, required standalone | OCI image containing the supervisor binary. |
297297
| `OPENSHELL_PODMAN_TLS_CA` | `--podman-tls-ca` | unset | Host path to the CA certificate mounted for sandbox mTLS. |
298298
| `OPENSHELL_PODMAN_TLS_CERT` | `--podman-tls-cert` | unset | Host path to the client certificate mounted for sandbox mTLS. |
299299
| `OPENSHELL_PODMAN_TLS_KEY` | `--podman-tls-key` | unset | Host path to the client private key mounted for sandbox mTLS. |

crates/openshell-driver-podman/src/container.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1036,7 +1036,7 @@ mod tests {
10361036
let vol = &image_volumes[0];
10371037
assert_eq!(
10381038
vol["source"].as_str(),
1039-
Some("openshell/supervisor:latest"),
1039+
Some("ghcr.io/nvidia/openshell/supervisor:latest"),
10401040
"image volume source should be the supervisor image"
10411041
);
10421042
assert_eq!(

crates/openshell-server/src/certgen.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
//! - **Kubernetes mode** (default): create two `kubernetes.io/tls` Secrets
99
//! in the supplied namespace. Used by the Helm pre-install hook. Requires
1010
//! `--namespace`, `--server-secret-name`, `--client-secret-name`.
11-
//! - **Local mode** (`--output-dir <DIR>`): write PEMs to a filesystem layout
12-
//! used by the RPM systemd unit's `ExecStartPre`. Also copies client
13-
//! materials to
11+
//! - **Local mode** (`--output-dir <DIR>`): write PEMs to the local package
12+
//! filesystem layout. Used by systemd units' `ExecStartPre`. Also copies
13+
//! client materials to
1414
//! `$XDG_CONFIG_HOME/openshell/gateways/openshell/mtls/` so the local CLI
1515
//! picks them up automatically.
1616
//!

0 commit comments

Comments
 (0)