Skip to content

Commit 65a3a7c

Browse files
authored
test(e2e): close Podman driver test coverage gaps (#1439)
* test(e2e): close Podman driver test coverage gaps Add podman_gateway_resume test following the VM pattern (no container-state assertions since Podman keeps containers running across gateway restarts). Widen websocket_conformance feature gate from e2e-docker to e2e-host-gateway so it runs on both Docker and Podman. Fix e2e-podman.sh to default to the e2e-podman feature so Podman- specific and host-gateway tests are actually included in Podman CI runs. * refactor(e2e): extract shared CLI helpers from gateway_resume tests Move run_cli, wait_for_healthy, sandbox_names, and wait_for_sandbox_exec_contains into a new harness::cli module. All three gateway_resume variants (Docker, Podman, VM) now import these shared helpers instead of defining identical copies. Docker-specific container introspection functions remain local to gateway_resume.rs.
1 parent 436c59a commit 65a3a7c

8 files changed

Lines changed: 212 additions & 214 deletions

File tree

e2e/rust/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ name = "gateway_resume"
4646
path = "tests/gateway_resume.rs"
4747
required-features = ["e2e-docker"]
4848

49+
[[test]]
50+
name = "podman_gateway_resume"
51+
path = "tests/podman_gateway_resume.rs"
52+
required-features = ["e2e-podman"]
53+
4954
[[test]]
5055
name = "vm_gateway_resume"
5156
path = "tests/vm_gateway_resume.rs"
@@ -54,7 +59,7 @@ required-features = ["e2e-vm"]
5459
[[test]]
5560
name = "websocket_conformance"
5661
path = "tests/websocket_conformance.rs"
57-
required-features = ["e2e-docker"]
62+
required-features = ["e2e-host-gateway"]
5863

5964
[[test]]
6065
name = "user_namespaces"

e2e/rust/e2e-podman.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ set -euo pipefail
1010

1111
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
1212
E2E_TEST="${OPENSHELL_E2E_PODMAN_TEST:-}"
13-
E2E_FEATURES="${OPENSHELL_E2E_PODMAN_FEATURES:-e2e}"
13+
E2E_FEATURES="${OPENSHELL_E2E_PODMAN_FEATURES:-e2e-podman}"
1414

1515
cargo build -p openshell-cli --features openshell-core/dev-settings
1616

e2e/rust/src/harness/cli.rs

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Shared CLI helpers for e2e tests that need to invoke `openshell` commands
5+
//! and poll for readiness.
6+
7+
use std::process::Stdio;
8+
use std::time::{Duration, Instant};
9+
10+
use tokio::time::sleep;
11+
12+
use super::binary::openshell_cmd;
13+
use super::output::strip_ansi;
14+
15+
pub async fn run_cli(args: &[&str]) -> (String, i32) {
16+
let mut cmd = openshell_cmd();
17+
cmd.args(args).stdout(Stdio::piped()).stderr(Stdio::piped());
18+
19+
let output = cmd.output().await.expect("spawn openshell");
20+
let stdout = String::from_utf8_lossy(&output.stdout);
21+
let stderr = String::from_utf8_lossy(&output.stderr);
22+
let combined = format!("{stdout}{stderr}");
23+
let code = output.status.code().unwrap_or(-1);
24+
(combined, code)
25+
}
26+
27+
pub async fn wait_for_healthy(timeout: Duration) -> Result<(), String> {
28+
let start = Instant::now();
29+
let mut last_output: String;
30+
31+
loop {
32+
let (output, code) = run_cli(&["status"]).await;
33+
let clean = strip_ansi(&output);
34+
let lower = clean.to_lowercase();
35+
if code == 0
36+
&& (lower.contains("healthy")
37+
|| lower.contains("running")
38+
|| lower.contains("connected"))
39+
{
40+
return Ok(());
41+
}
42+
last_output = clean;
43+
44+
if start.elapsed() > timeout {
45+
return Err(format!(
46+
"gateway did not become healthy within {}s. Last output:\n{last_output}",
47+
timeout.as_secs()
48+
));
49+
}
50+
sleep(Duration::from_secs(2)).await;
51+
}
52+
}
53+
54+
pub async fn sandbox_names() -> Result<Vec<String>, String> {
55+
let (output, code) = run_cli(&["sandbox", "list", "--names"]).await;
56+
let clean = strip_ansi(&output);
57+
if code != 0 {
58+
return Err(format!("sandbox list failed (exit {code}):\n{clean}"));
59+
}
60+
61+
Ok(clean
62+
.lines()
63+
.map(str::trim)
64+
.filter(|line| !line.is_empty())
65+
.map(ToOwned::to_owned)
66+
.collect())
67+
}
68+
69+
pub async fn wait_for_sandbox_exec_contains(
70+
sandbox_name: &str,
71+
command: &[&str],
72+
expected: &str,
73+
timeout: Duration,
74+
) -> Result<(), String> {
75+
let start = Instant::now();
76+
let mut last_output: String;
77+
78+
loop {
79+
let mut cmd = openshell_cmd();
80+
cmd.args(["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--"])
81+
.args(command)
82+
.stdout(Stdio::piped())
83+
.stderr(Stdio::piped());
84+
85+
match cmd.output().await {
86+
Ok(output) => {
87+
let stdout = String::from_utf8_lossy(&output.stdout);
88+
let stderr = String::from_utf8_lossy(&output.stderr);
89+
last_output = strip_ansi(&format!("{stdout}{stderr}"));
90+
if output.status.success() && last_output.contains(expected) {
91+
return Ok(());
92+
}
93+
}
94+
Err(err) => {
95+
last_output = format!("failed to spawn openshell sandbox exec: {err}");
96+
}
97+
}
98+
99+
if start.elapsed() > timeout {
100+
return Err(format!(
101+
"sandbox '{sandbox_name}' exec did not produce '{expected}' within {}s. Last output:\n{last_output}",
102+
timeout.as_secs()
103+
));
104+
}
105+
sleep(Duration::from_secs(2)).await;
106+
}
107+
}

e2e/rust/src/harness/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
//! Shared test harness modules for CLI e2e tests.
55
66
pub mod binary;
7+
pub mod cli;
78
pub mod container;
89
pub mod gateway;
910
pub mod output;

e2e/rust/tests/gateway_resume.rs

Lines changed: 8 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010
//! gateway process, so they skip this restart-only coverage.
1111
1212
use std::process::{Command, Stdio};
13-
use std::time::{Duration, Instant};
13+
use std::time::Duration;
1414

15-
use openshell_e2e::harness::binary::openshell_cmd;
15+
use openshell_e2e::harness::cli::{
16+
sandbox_names, wait_for_healthy, wait_for_sandbox_exec_contains,
17+
};
1618
use openshell_e2e::harness::gateway::ManagedGateway;
17-
use openshell_e2e::harness::output::strip_ansi;
1819
use openshell_e2e::harness::sandbox::SandboxGuard;
1920
use tokio::time::sleep;
2021

@@ -24,100 +25,6 @@ const RESUME_FILE: &str = "/sandbox/gateway-resume-state";
2425
const SANDBOX_NAMESPACE_LABEL: &str = "openshell.ai/sandbox-namespace";
2526
const SANDBOX_NAME_LABEL: &str = "openshell.ai/sandbox-name";
2627

27-
async fn run_cli(args: &[&str]) -> (String, i32) {
28-
let mut cmd = openshell_cmd();
29-
cmd.args(args).stdout(Stdio::piped()).stderr(Stdio::piped());
30-
31-
let output = cmd.output().await.expect("spawn openshell");
32-
let stdout = String::from_utf8_lossy(&output.stdout);
33-
let stderr = String::from_utf8_lossy(&output.stderr);
34-
let combined = format!("{stdout}{stderr}");
35-
let code = output.status.code().unwrap_or(-1);
36-
(combined, code)
37-
}
38-
39-
async fn wait_for_healthy(timeout: Duration) -> Result<(), String> {
40-
let start = Instant::now();
41-
let mut last_output: String;
42-
43-
loop {
44-
let (output, code) = run_cli(&["status"]).await;
45-
let clean = strip_ansi(&output);
46-
let lower = clean.to_lowercase();
47-
if code == 0
48-
&& (lower.contains("healthy")
49-
|| lower.contains("running")
50-
|| lower.contains("connected"))
51-
{
52-
return Ok(());
53-
}
54-
last_output = clean;
55-
56-
if start.elapsed() > timeout {
57-
return Err(format!(
58-
"gateway did not become healthy within {}s. Last output:\n{last_output}",
59-
timeout.as_secs()
60-
));
61-
}
62-
sleep(Duration::from_secs(2)).await;
63-
}
64-
}
65-
66-
async fn sandbox_names() -> Result<Vec<String>, String> {
67-
let (output, code) = run_cli(&["sandbox", "list", "--names"]).await;
68-
let clean = strip_ansi(&output);
69-
if code != 0 {
70-
return Err(format!("sandbox list failed (exit {code}):\n{clean}"));
71-
}
72-
73-
Ok(clean
74-
.lines()
75-
.map(str::trim)
76-
.filter(|line| !line.is_empty())
77-
.map(ToOwned::to_owned)
78-
.collect())
79-
}
80-
81-
async fn wait_for_sandbox_exec_contains(
82-
sandbox_name: &str,
83-
command: &[&str],
84-
expected: &str,
85-
timeout: Duration,
86-
) -> Result<(), String> {
87-
let start = Instant::now();
88-
let mut last_output: String;
89-
90-
loop {
91-
let mut cmd = openshell_cmd();
92-
cmd.args(["sandbox", "exec", "--name", sandbox_name, "--no-tty", "--"])
93-
.args(command)
94-
.stdout(Stdio::piped())
95-
.stderr(Stdio::piped());
96-
97-
match cmd.output().await {
98-
Ok(output) => {
99-
let stdout = String::from_utf8_lossy(&output.stdout);
100-
let stderr = String::from_utf8_lossy(&output.stderr);
101-
last_output = strip_ansi(&format!("{stdout}{stderr}"));
102-
if output.status.success() && last_output.contains(expected) {
103-
return Ok(());
104-
}
105-
}
106-
Err(err) => {
107-
last_output = format!("failed to spawn openshell sandbox exec: {err}");
108-
}
109-
}
110-
111-
if start.elapsed() > timeout {
112-
return Err(format!(
113-
"sandbox '{sandbox_name}' exec did not produce '{expected}' within {}s. Last output:\n{last_output}",
114-
timeout.as_secs()
115-
));
116-
}
117-
sleep(Duration::from_secs(2)).await;
118-
}
119-
}
120-
12128
fn sandbox_container_id(namespace: &str, sandbox_name: &str) -> Result<String, String> {
12229
let namespace_filter = format!("label={SANDBOX_NAMESPACE_LABEL}={namespace}");
12330
let sandbox_name_filter = format!("label={SANDBOX_NAME_LABEL}={sandbox_name}");
@@ -189,7 +96,7 @@ async fn wait_for_container_running(
18996
expected: bool,
19097
timeout: Duration,
19198
) -> Result<(), String> {
192-
let start = Instant::now();
99+
let start = std::time::Instant::now();
193100
let mut last_state: String;
194101

195102
loop {
@@ -231,12 +138,9 @@ async fn docker_gateway_restart_resumes_running_sandbox() {
231138
let script = format!(
232139
"echo before-restart > {RESUME_FILE}; echo {READY_MARKER}; while true; do sleep 1; done"
233140
);
234-
let mut sandbox = SandboxGuard::create_keep(
235-
&["sh", "-lc", &script],
236-
READY_MARKER,
237-
)
238-
.await
239-
.expect("create long-running sandbox");
141+
let mut sandbox = SandboxGuard::create_keep(&["sh", "-lc", &script], READY_MARKER)
142+
.await
143+
.expect("create long-running sandbox");
240144

241145
let before_restart = sandbox
242146
.exec(&["cat", RESUME_FILE])
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#![cfg(feature = "e2e-podman")]
5+
6+
//! Podman-specific E2E coverage for resuming sandboxes after a standalone
7+
//! gateway restart.
8+
//!
9+
//! Unlike the Docker driver, Podman does not stop sandbox containers when the
10+
//! gateway process exits — the containers keep running and the restarted
11+
//! gateway re-adopts them. This test follows the `vm_gateway_resume.rs`
12+
//! pattern: verify sandbox survival at the application level without asserting
13+
//! intermediate container-state transitions.
14+
15+
use std::time::Duration;
16+
17+
use openshell_e2e::harness::cli::{sandbox_names, wait_for_healthy, wait_for_sandbox_exec_contains};
18+
use openshell_e2e::harness::gateway::ManagedGateway;
19+
use openshell_e2e::harness::sandbox::SandboxGuard;
20+
21+
const READY_MARKER: &str = "podman-gateway-resume-ready";
22+
const RESUME_FILE: &str = "/sandbox/podman-gateway-resume-state";
23+
24+
#[tokio::test]
25+
async fn podman_gateway_restart_resumes_running_sandbox() {
26+
if std::env::var("OPENSHELL_E2E_DRIVER").as_deref() != Ok("podman") {
27+
eprintln!("Skipping Podman gateway resume test: e2e driver is not podman");
28+
return;
29+
}
30+
let Some(gateway) = ManagedGateway::from_env().expect("load managed e2e gateway metadata")
31+
else {
32+
eprintln!(
33+
"Skipping Podman gateway resume test: e2e gateway is not managed by this test run"
34+
);
35+
return;
36+
};
37+
38+
wait_for_healthy(Duration::from_secs(30))
39+
.await
40+
.expect("gateway should start healthy");
41+
42+
let script = format!(
43+
"echo before-restart > {RESUME_FILE}; echo {READY_MARKER}; while true; do sleep 1; done"
44+
);
45+
let mut sandbox = SandboxGuard::create_keep(&["sh", "-lc", &script], READY_MARKER)
46+
.await
47+
.expect("create long-running Podman sandbox");
48+
49+
let before_restart = sandbox
50+
.exec(&["cat", RESUME_FILE])
51+
.await
52+
.expect("read Podman sandbox state before restart");
53+
assert!(
54+
before_restart.contains("before-restart"),
55+
"sandbox state was not written before restart:\n{before_restart}"
56+
);
57+
58+
gateway.stop().expect("stop e2e gateway");
59+
gateway.start().expect("restart e2e gateway");
60+
wait_for_healthy(Duration::from_secs(120))
61+
.await
62+
.expect("gateway should become healthy after restart");
63+
64+
let names = sandbox_names().await.expect("list sandboxes after restart");
65+
assert!(
66+
names.contains(&sandbox.name),
67+
"sandbox '{}' should still be listed after gateway restart. Names: {names:?}",
68+
sandbox.name
69+
);
70+
71+
wait_for_sandbox_exec_contains(
72+
&sandbox.name,
73+
&["cat", RESUME_FILE],
74+
"before-restart",
75+
Duration::from_secs(240),
76+
)
77+
.await
78+
.expect("Podman sandbox should become ready again with its state preserved");
79+
80+
sandbox.cleanup().await;
81+
}

0 commit comments

Comments
 (0)