Skip to content

Commit 528fb29

Browse files
mjamivjohntmyers
andauthored
fix(sandbox): allow first-label L7 host wildcards (#1304)
* fix(sandbox): allow first-label L7 host wildcards * docs(sandbox): document L7 host wildcard contract + add OPA runtime tests - Add Host Wildcards section to architecture/security-policy.md describing accepted (first-label *, **, intra-label *-X) and rejected (bare, TLD, non-first-label, recursive-in-label) forms, and noting that wildcards never cross '.' boundaries. - Expand the policy-schema.mdx 'host' field description to reflect the same contract instead of only mentioning '*.example.com'. - Add OPA runtime tests asserting '*-aiplatform.googleapis.com' matches 'us-central1-aiplatform.googleapis.com' and does not match 'us-central1.aiplatform.googleapis.com' (cross-dot boundary). Locks validator/runtime alignment for intra-label wildcards. * chore: update mise lockfile * test(server): tolerate serialized inference upserts --------- Co-authored-by: John Myers <9696606+johntmyers@users.noreply.github.com>
1 parent 2d9e532 commit 528fb29

6 files changed

Lines changed: 244 additions & 111 deletions

File tree

architecture/security-policy.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,27 @@ Ordinary network traffic follows this order:
3636
Explicit deny and hardening checks win over allow rules. If no rule matches, the
3737
request is denied.
3838

39+
## Host Wildcards
40+
41+
Network endpoint `host` patterns accept a `*` wildcard inside the first DNS
42+
label only. The OPA runtime matches with a `.` label boundary, so a wildcard
43+
never spans dots. The validator enforces the same boundary so that policy load
44+
fails fast instead of silently mismatching at the proxy.
45+
46+
| Pattern | Accepted | Example match | Notes |
47+
|---|---|---|---|
48+
| `*.example.com` | Yes | `api.example.com` | Single first label of any value. |
49+
| `**.example.com` | Yes | `a.b.example.com` | Recursive wildcard as the entire first label. |
50+
| `*-aiplatform.googleapis.com` | Yes | `us-central1-aiplatform.googleapis.com` | Intra-label wildcard inside the first DNS label. |
51+
| `*` or `**` | No || Matches every host. |
52+
| `*.com`, `**.com` | No || TLD wildcards (`labels <= 2`). |
53+
| `foo.*.example.com` | No || Wildcard outside the first DNS label. |
54+
| `foo**.example.com` | No || Recursive `**` mixed inside a label; allowed only as the entire first label. |
55+
56+
Validation rejects the disallowed patterns at policy load time with a message
57+
that names the offending host. Exact hosts and IP addresses do not use this
58+
path.
59+
3960
## TLS and L7 Inspection
4061

4162
For HTTP endpoints that need request-level controls, the proxy can terminate TLS

crates/openshell-sandbox/src/l7/mod.rs

Lines changed: 129 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,43 @@ fn check_glob_syntax(pattern: &str) -> Option<String> {
353353
None
354354
}
355355

356+
fn validate_host_wildcard(errors: &mut Vec<String>, loc: &str, host: &str) {
357+
if !host.contains('*') {
358+
return;
359+
}
360+
361+
if host == "*" || host == "**" {
362+
errors.push(format!(
363+
"{loc}: host wildcard '{host}' matches all hosts; use specific patterns like '*.example.com'"
364+
));
365+
return;
366+
}
367+
368+
let labels: Vec<&str> = host.split('.').collect();
369+
let first_label = labels.first().copied().unwrap_or_default();
370+
if labels.iter().skip(1).any(|label| label.contains('*')) {
371+
errors.push(format!(
372+
"{loc}: host wildcard may only appear in the first DNS label, got '{host}'"
373+
));
374+
return;
375+
}
376+
if first_label.contains("**") && first_label != "**" {
377+
errors.push(format!(
378+
"{loc}: recursive host wildcard '**' is only allowed as the entire first DNS label, got '{host}'"
379+
));
380+
return;
381+
}
382+
383+
// Reject TLD or single-label wildcards. They are accepted by the policy
384+
// engine but silently fail at the proxy layer (see #787).
385+
if labels.len() <= 2 {
386+
errors.push(format!(
387+
"{loc}: TLD wildcard '{host}' is not allowed; \
388+
use subdomain wildcards like '*.example.com' instead"
389+
));
390+
}
391+
}
392+
356393
fn validate_graphql_operation_type(
357394
errors: &mut Vec<String>,
358395
loc: &str,
@@ -529,29 +566,7 @@ pub fn validate_l7_policies(data_json: &serde_json::Value) -> (Vec<String>, Vec<
529566
}
530567
}
531568

532-
// Validate host wildcard patterns.
533-
if host.contains('*') {
534-
if host == "*" || host == "**" {
535-
errors.push(format!(
536-
"{loc}: host wildcard '{host}' matches all hosts; use specific patterns like '*.example.com'"
537-
));
538-
} else if !host.starts_with("*.") && !host.starts_with("**.") {
539-
errors.push(format!(
540-
"{loc}: host wildcard must start with '*.' or '**.' (e.g., '*.example.com'), got '{host}'"
541-
));
542-
} else {
543-
// Reject TLD wildcards like *.com (2 labels) — they are
544-
// accepted by the policy engine but silently fail at the
545-
// proxy layer (see #787).
546-
let label_count = host.split('.').count();
547-
if label_count <= 2 {
548-
errors.push(format!(
549-
"{loc}: TLD wildcard '{host}' is not allowed; \
550-
use subdomain wildcards like '*.example.com' instead"
551-
));
552-
}
553-
}
554-
}
569+
validate_host_wildcard(&mut errors, &loc, host);
555570

556571
// port + ports mutual exclusion
557572
let has_scalar_port = ep
@@ -1793,7 +1808,27 @@ mod tests {
17931808
}
17941809

17951810
#[test]
1796-
fn validate_wildcard_host_no_star_dot_error() {
1811+
fn validate_wildcard_host_mid_label_error() {
1812+
let data = serde_json::json!({
1813+
"network_policies": {
1814+
"test": {
1815+
"endpoints": [{
1816+
"host": "foo.*.example.com",
1817+
"port": 443
1818+
}],
1819+
"binaries": []
1820+
}
1821+
}
1822+
});
1823+
let (errors, _warnings) = validate_l7_policies(&data);
1824+
assert!(
1825+
errors.iter().any(|e| e.contains("first DNS label")),
1826+
"Mid-label wildcard should be rejected, got errors: {errors:?}"
1827+
);
1828+
}
1829+
1830+
#[test]
1831+
fn validate_wildcard_host_single_label_error() {
17971832
let data = serde_json::json!({
17981833
"network_policies": {
17991834
"test": {
@@ -1807,8 +1842,28 @@ mod tests {
18071842
});
18081843
let (errors, _warnings) = validate_l7_policies(&data);
18091844
assert!(
1810-
errors.iter().any(|e| e.contains("must start with")),
1811-
"Malformed wildcard should be rejected, got errors: {errors:?}"
1845+
errors.iter().any(|e| e.contains("TLD wildcard")),
1846+
"Single-label wildcard should be rejected, got errors: {errors:?}"
1847+
);
1848+
}
1849+
1850+
#[test]
1851+
fn validate_wildcard_host_recursive_intra_label_error() {
1852+
let data = serde_json::json!({
1853+
"network_policies": {
1854+
"test": {
1855+
"endpoints": [{
1856+
"host": "foo**.example.com",
1857+
"port": 443
1858+
}],
1859+
"binaries": []
1860+
}
1861+
}
1862+
});
1863+
let (errors, _warnings) = validate_l7_policies(&data);
1864+
assert!(
1865+
errors.iter().any(|e| e.contains("recursive host wildcard")),
1866+
"Recursive intra-label wildcard should be rejected, got errors: {errors:?}"
18121867
);
18131868
}
18141869

@@ -1876,6 +1931,54 @@ mod tests {
18761931
);
18771932
}
18781933

1934+
#[test]
1935+
fn validate_wildcard_host_double_star_valid_no_error() {
1936+
let data = serde_json::json!({
1937+
"network_policies": {
1938+
"test": {
1939+
"endpoints": [{
1940+
"host": "**.example.com",
1941+
"port": 443
1942+
}],
1943+
"binaries": []
1944+
}
1945+
}
1946+
});
1947+
let (errors, warnings) = validate_l7_policies(&data);
1948+
assert!(
1949+
errors.is_empty(),
1950+
"**.example.com should be valid, got errors: {errors:?}"
1951+
);
1952+
assert!(
1953+
warnings.is_empty(),
1954+
"**.example.com should not warn, got warnings: {warnings:?}"
1955+
);
1956+
}
1957+
1958+
#[test]
1959+
fn validate_wildcard_host_intra_label_valid_no_error() {
1960+
let data = serde_json::json!({
1961+
"network_policies": {
1962+
"test": {
1963+
"endpoints": [{
1964+
"host": "*-aiplatform.googleapis.com",
1965+
"port": 443
1966+
}],
1967+
"binaries": []
1968+
}
1969+
}
1970+
});
1971+
let (errors, warnings) = validate_l7_policies(&data);
1972+
assert!(
1973+
errors.is_empty(),
1974+
"*-aiplatform.googleapis.com should be valid, got errors: {errors:?}"
1975+
);
1976+
assert!(
1977+
warnings.is_empty(),
1978+
"*-aiplatform.googleapis.com should not warn, got warnings: {warnings:?}"
1979+
);
1980+
}
1981+
18791982
#[test]
18801983
fn validate_port_and_ports_mutually_exclusive() {
18811984
let data = serde_json::json!({

crates/openshell-sandbox/src/opa.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3991,6 +3991,69 @@ network_policies:
39913991
assert!(!decision.allowed, "Wildcard host on wrong port should deny");
39923992
}
39933993

3994+
#[test]
3995+
fn wildcard_host_intra_label_matches() {
3996+
// First-label intra-label wildcard: `*` matches the variable prefix
3997+
// within a single DNS label. Locks validator/runtime alignment for
3998+
// the pattern accepted by `validate_host_wildcard`.
3999+
let data = r#"
4000+
network_policies:
4001+
intra_label:
4002+
name: intra_label
4003+
endpoints:
4004+
- { host: "*-aiplatform.googleapis.com", port: 443 }
4005+
binaries:
4006+
- { path: /usr/bin/curl }
4007+
"#;
4008+
let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap();
4009+
let input = NetworkInput {
4010+
host: "us-central1-aiplatform.googleapis.com".into(),
4011+
port: 443,
4012+
binary_path: PathBuf::from("/usr/bin/curl"),
4013+
binary_sha256: "unused".into(),
4014+
ancestors: vec![],
4015+
cmdline_paths: vec![],
4016+
};
4017+
let decision = engine.evaluate_network(&input).unwrap();
4018+
assert!(
4019+
decision.allowed,
4020+
"*-aiplatform.googleapis.com should match us-central1-aiplatform.googleapis.com: {}",
4021+
decision.reason
4022+
);
4023+
}
4024+
4025+
#[test]
4026+
fn wildcard_host_intra_label_does_not_cross_dot() {
4027+
// `glob.match(..., ["."])` treats `.` as a label boundary that `*`
4028+
// cannot cross. `*-aiplatform.googleapis.com` must not match a host
4029+
// whose first label is `us-central1` and where `aiplatform` is a
4030+
// separate label.
4031+
let data = r#"
4032+
network_policies:
4033+
intra_label:
4034+
name: intra_label
4035+
endpoints:
4036+
- { host: "*-aiplatform.googleapis.com", port: 443 }
4037+
binaries:
4038+
- { path: /usr/bin/curl }
4039+
"#;
4040+
let engine = OpaEngine::from_strings(TEST_POLICY, data).unwrap();
4041+
let input = NetworkInput {
4042+
host: "us-central1.aiplatform.googleapis.com".into(),
4043+
port: 443,
4044+
binary_path: PathBuf::from("/usr/bin/curl"),
4045+
binary_sha256: "unused".into(),
4046+
ancestors: vec![],
4047+
cmdline_paths: vec![],
4048+
};
4049+
let decision = engine.evaluate_network(&input).unwrap();
4050+
assert!(
4051+
!decision.allowed,
4052+
"*-aiplatform.googleapis.com must NOT match us-central1.aiplatform.googleapis.com \
4053+
(would cross a `.` boundary)"
4054+
);
4055+
}
4056+
39944057
#[test]
39954058
fn wildcard_host_multi_port() {
39964059
let data = r#"

crates/openshell-server/src/inference.rs

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1113,7 +1113,9 @@ mod tests {
11131113
let result1 = handle1.await.unwrap();
11141114
let result2 = handle2.await.unwrap();
11151115

1116-
// One should succeed with MustCreate, the other should fail
1116+
// If both tasks observe a missing route before either insert commits, MustCreate
1117+
// should let exactly one win. If the scheduler serializes them, the second call
1118+
// may legitimately observe the new route and take the update path.
11171119
let successes = [&result1, &result2].iter().filter(|r| r.is_ok()).count();
11181120
let failures = [&result1, &result2]
11191121
.iter()
@@ -1127,22 +1129,40 @@ mod tests {
11271129
})
11281130
.count();
11291131

1130-
assert_eq!(
1131-
successes, 1,
1132-
"exactly one create should succeed, got: {result1:?}, {result2:?}"
1133-
);
1134-
assert_eq!(
1135-
failures, 1,
1136-
"exactly one create should fail, got: {result1:?}, {result2:?}"
1132+
assert!(
1133+
successes == 1 || successes == 2,
1134+
"one racing create should succeed, or both serialized upserts should succeed, got: {result1:?}, {result2:?}"
11371135
);
1136+
if successes == 1 {
1137+
assert_eq!(
1138+
failures, 1,
1139+
"the losing racing create should fail, got: {result1:?}, {result2:?}"
1140+
);
1141+
} else {
1142+
assert_eq!(
1143+
failures, 0,
1144+
"serialized upserts should not fail, got: {result1:?}, {result2:?}"
1145+
);
1146+
let mut versions = [&result1, &result2]
1147+
.into_iter()
1148+
.map(|result| result.as_ref().expect("success").route.version)
1149+
.collect::<Vec<_>>();
1150+
versions.sort_unstable();
1151+
assert_eq!(
1152+
versions,
1153+
vec![1, 2],
1154+
"serialized create-then-update should return versions 1 and 2"
1155+
);
1156+
}
11381157

1139-
// Only one route should exist
1158+
// Only one route should exist.
11401159
let route = store
11411160
.get_message_by_name::<InferenceRoute>(CLUSTER_INFERENCE_ROUTE_NAME)
11421161
.await
11431162
.expect("fetch")
11441163
.expect("route should exist");
1145-
assert_eq!(route.version, 1);
1164+
let expected_version = if successes == 1 { 1 } else { 2 };
1165+
assert_eq!(route.version, expected_version);
11461166
}
11471167

11481168
#[tokio::test]

docs/reference/policy-schema.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ Each endpoint defines a reachable destination and optional inspection rules.
152152

153153
| Field | Type | Required | Description |
154154
|---|---|---|---|
155-
| `host` | string | Yes | Hostname or IP address. Supports wildcards: `*.example.com` matches any subdomain. |
155+
| `host` | string | Yes | Hostname or IP address. Supports a `*` wildcard inside the first DNS label only: `*.example.com`, `**.example.com`, and intra-label patterns like `*-aiplatform.googleapis.com` are accepted; bare `*`/`**`, TLD wildcards (`*.com`), and wildcards outside the first label are rejected at load time. |
156156
| `port` | integer | Yes | TCP port number. |
157157
| `path` | string | No | Optional HTTP path glob used to select between L7 endpoints that share the same host and port. Empty means all paths. Use this when REST and GraphQL live under the same host, such as `/repos/**` and `/graphql`. |
158158
| `protocol` | string | No | Set to `rest` for HTTP method/path inspection, `websocket` for RFC 6455 upgrade and client text-message inspection, or `graphql` for GraphQL-over-HTTP operation inspection. WebSocket endpoints can also use GraphQL operation rules for GraphQL-over-WebSocket traffic. Omit for TCP passthrough. |

0 commit comments

Comments
 (0)