Skip to content

Commit 755bb07

Browse files
committed
test(e2e): add simple e2e test with kubernetes to test /readyz
Signed-off-by: Adrien Langou <alangou@nvidia.com>
1 parent 2e85d01 commit 755bb07

7 files changed

Lines changed: 427 additions & 1 deletion

File tree

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use bytes::Bytes;
5+
use http_body_util::{BodyExt, Empty};
6+
use hyper::{Request, StatusCode};
7+
use hyper_util::rt::TokioIo;
8+
use openshell_server::{Store, health_router};
9+
use serde_json::Value;
10+
use std::sync::Arc;
11+
use std::time::Duration;
12+
use tokio::net::TcpListener;
13+
14+
async fn start_health_server(
15+
store: Arc<Store>,
16+
) -> (std::net::SocketAddr, tokio::task::JoinHandle<()>) {
17+
let listener = TcpListener::bind("127.0.0.1:0")
18+
.await
19+
.expect("bind ephemeral health test listener");
20+
let addr = listener
21+
.local_addr()
22+
.expect("resolve local address for health test listener");
23+
24+
let router = health_router(store);
25+
let server = tokio::spawn(async move {
26+
let _ = axum::serve(listener, router.into_make_service()).await;
27+
});
28+
29+
(addr, server)
30+
}
31+
32+
async fn http_get_json(addr: std::net::SocketAddr, path: &str) -> (StatusCode, Value) {
33+
let stream = tokio::net::TcpStream::connect(addr)
34+
.await
35+
.expect("connect test HTTP client");
36+
let (mut sender, conn) = hyper::client::conn::http1::Builder::new()
37+
.handshake(TokioIo::new(stream))
38+
.await
39+
.expect("handshake HTTP/1 test client");
40+
tokio::spawn(async move {
41+
let _ = conn.await;
42+
});
43+
44+
let req = Request::builder()
45+
.method("GET")
46+
.uri(format!("http://{addr}{path}"))
47+
.body(Empty::<Bytes>::new())
48+
.expect("build HTTP request");
49+
let resp = sender.send_request(req).await.expect("send HTTP request");
50+
let status = resp.status();
51+
let bytes = resp
52+
.into_body()
53+
.collect()
54+
.await
55+
.expect("collect response body")
56+
.to_bytes();
57+
let body = if bytes.is_empty() {
58+
Value::Null
59+
} else {
60+
serde_json::from_slice(&bytes).expect("response body must be valid JSON")
61+
};
62+
(status, body)
63+
}
64+
65+
#[tokio::test]
66+
async fn readyz_reports_healthy_when_database_is_reachable() {
67+
let store = Arc::new(
68+
Store::connect("sqlite::memory:")
69+
.await
70+
.expect("connect in-memory sqlite store for health integration test"),
71+
);
72+
let (addr, server) = start_health_server(store.clone()).await;
73+
74+
// `health_router` does not block on the first poll, so /readyz starts in
75+
// `Initializing → 503` until the background monitor publishes the first
76+
// healthy state (sub-millisecond for in-memory SQLite, but still a race).
77+
let (status, body) = wait_for_status(addr, StatusCode::OK, Duration::from_secs(2))
78+
.await
79+
.expect("/readyz did not become healthy within 2s");
80+
assert_eq!(status, StatusCode::OK);
81+
assert_eq!(body["status"], "healthy");
82+
assert_eq!(body["checks"]["database"]["status"], "healthy");
83+
84+
server.abort();
85+
}
86+
87+
#[cfg(feature = "test-support")]
88+
#[tokio::test]
89+
async fn readyz_reports_database_health_transition_after_close() {
90+
let store = Arc::new(
91+
Store::connect("sqlite::memory:")
92+
.await
93+
.expect("connect in-memory sqlite store for health integration test"),
94+
);
95+
let (addr, server) = start_health_server(store.clone()).await;
96+
97+
let (status, body) = wait_for_status(addr, StatusCode::OK, Duration::from_secs(2))
98+
.await
99+
.expect("/readyz did not become healthy within 2s");
100+
assert_eq!(status, StatusCode::OK);
101+
assert_eq!(body["status"], "healthy");
102+
assert_eq!(body["checks"]["database"]["status"], "healthy");
103+
104+
store.close().await;
105+
106+
// The handler reads the cached state published by the background
107+
// readiness monitor, so the transition to Unhealthy can only show up
108+
// after the monitor's next tick. With the default 5s interval the
109+
// outage surfaces within ~5s; poll with a generous deadline so the
110+
// assertion never races the polling cycle.
111+
let (status, body) = wait_for_status(
112+
addr,
113+
StatusCode::SERVICE_UNAVAILABLE,
114+
Duration::from_secs(10),
115+
)
116+
.await
117+
.expect("/readyz did not transition to 503 after store.close() within 10s");
118+
assert_eq!(status, StatusCode::SERVICE_UNAVAILABLE);
119+
assert_eq!(body["status"], "unhealthy");
120+
assert_eq!(body["checks"]["database"]["status"], "unhealthy");
121+
assert_eq!(body["checks"]["database"]["error"], "database unavailable");
122+
123+
server.abort();
124+
}
125+
126+
/// Poll `/readyz` until it returns `expected`, or give up after `timeout`.
127+
///
128+
/// Used to bridge the gap between `health_router`'s non-blocking startup
129+
/// and the background monitor publishing its first probe outcome.
130+
async fn wait_for_status(
131+
addr: std::net::SocketAddr,
132+
expected: StatusCode,
133+
timeout: Duration,
134+
) -> Option<(StatusCode, Value)> {
135+
let deadline = tokio::time::Instant::now() + timeout;
136+
loop {
137+
let observation = http_get_json(addr, "/readyz").await;
138+
if observation.0 == expected {
139+
return Some(observation);
140+
}
141+
if tokio::time::Instant::now() >= deadline {
142+
return None;
143+
}
144+
tokio::time::sleep(Duration::from_millis(50)).await;
145+
}
146+
}

e2e/rust/Cargo.lock

Lines changed: 113 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

e2e/rust/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ name = "vm_gateway_resume"
5656
path = "tests/vm_gateway_resume.rs"
5757
required-features = ["e2e-vm"]
5858

59+
[[test]]
60+
name = "readyz_health"
61+
path = "tests/readyz_health.rs"
62+
required-features = ["e2e-kubernetes"]
63+
5964
[[test]]
6065
name = "websocket_conformance"
6166
path = "tests/websocket_conformance.rs"
@@ -88,6 +93,10 @@ required-features = ["e2e-gpu"]
8893

8994
[dependencies]
9095
base64 = "0.22"
96+
bytes = "1"
97+
http-body-util = "0.1"
98+
hyper = { version = "1", features = ["client", "http1"] }
99+
hyper-util = { version = "0.1", features = ["tokio"] }
91100
tokio = { version = "1.43", features = ["full"] }
92101
tempfile = "3"
93102
sha1 = "0.10"

e2e/rust/e2e-kubernetes.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ set -euo pipefail
1919

2020
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
2121

22-
E2E_FEATURES="${OPENSHELL_E2E_KUBERNETES_FEATURES:-e2e,e2e-host-gateway}"
22+
E2E_FEATURES="${OPENSHELL_E2E_KUBERNETES_FEATURES:-e2e,e2e-host-gateway,e2e-kubernetes}"
2323

2424
cargo build -p openshell-cli --features openshell-core/dev-settings
2525

0 commit comments

Comments
 (0)