From 92a2a1431561cb8d4c56dfb77ef6228908ceef91 Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Wed, 10 Jun 2026 09:26:26 +0200 Subject: [PATCH 1/2] Restrict SONiC SNMP and gNMI access to the OOB network The generated ConfigDB binds the SNMP agent to the OOB IP in the mgmt VRF but never restricts which source networks can reach it, and the gNMI/telemetry server from the static TELEMETRY|gnmi base entry listens on all interfaces with no restriction at all. Emit per-device control-plane ACLs handled by caclmgrd: ACL_TABLE entries of type CTRLPLANE bound to the SNMP and EXTERNAL_CLIENT (gNMI) services, each with an ACL_RULE accepting only the device's network-normalised OOB management subnet. Binding a service in a CTRLPLANE table makes caclmgrd install an implicit default-drop for it, so all other sources lose access. caclmgrd's EXTERNAL_CLIENT service carries no built-in destination port (verified against the ACL_SERVICES map in sonic-host-services 202211 through master): the rule must provide it via L4_DST_PORT, or caclmgrd skips the whole table with only a syslog warning. The GNMI_ONLY rule therefore sets L4_DST_PORT from TELEMETRY|gnmi|port, falling back to the telemetry container default 8080. ACL_TABLE and ACL_RULE become generator-owned on-demand tables: they are dropped from the base config up front and rebuilt only when the device has an IPv4 OOB IP, so stale entries cannot survive a regen and devices without an OOB IP get no ACL tables at all. The new tables are not yet covered by the generated YANG schemas, so the validator reports them as warnings (validation skipped), not errors. Closes #2330 AI-assisted: Claude Code Signed-off-by: Christian Berendt --- .../tasks/conductor/sonic/config_generator.py | 84 ++++++++++++- .../test_config_generator_ctrlplane_acls.py | 119 ++++++++++++++++++ 2 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 tests/unit/tasks/conductor/sonic/test_config_generator_ctrlplane_acls.py diff --git a/osism/tasks/conductor/sonic/config_generator.py b/osism/tasks/conductor/sonic/config_generator.py index cc7658a5..7b9746bc 100644 --- a/osism/tasks/conductor/sonic/config_generator.py +++ b/osism/tasks/conductor/sonic/config_generator.py @@ -56,6 +56,10 @@ # VXLAN VTEP name used for VXLAN tunnel configuration VXLAN_VTEP_NAME = "vtepServ" +# Default listen port of the SONiC telemetry/gNMI container, used for the +# GNMI_ONLY control-plane ACL rule when TELEMETRY|gnmi does not set a port. +DEFAULT_GNMI_PORT = "8080" + # Top-level scaffold keys that the orchestrator and downstream helpers index # into directly. Kept as a single source of truth so that the production code # and the test helpers cannot drift apart when keys are added or removed. @@ -105,11 +109,11 @@ # difference being only that the generator depends on this one as an input. # Distinct from inherited tables, which the generator also writes selected # fields into; the distinction is read-only vs. read-and-update, not the access -# syntax (inherited tables are read via config.get() too). Empty for now; the -# first consumer is the gNMI listen port, read from TELEMETRY. Must stay +# syntax (inherited tables are read via config.get() too). The first (and so +# far only) consumer is the gNMI listen port, read from TELEMETRY. Must stay # disjoint from OWNED_TABLE_KEYS -- an image-consumed table dropped up front # would always read back empty. -IMAGE_CONSUMED_TABLE_KEYS = () +IMAGE_CONSUMED_TABLE_KEYS = ("TELEMETRY",) # Owned tables that are also scaffolded: every scaffold key except the # inherited ones. The orchestrator setdefault-creates these up front, so @@ -126,6 +130,8 @@ # defaults (location "Data Center", contact "info@example.com") even when the # device has no SNMP data in NetBox. ON_DEMAND_OWNED_TABLE_KEYS = ( + "ACL_RULE", + "ACL_TABLE", "ROUTE_REDISTRIBUTE", "SNMP_SERVER", "SNMP_AGENT_ADDRESS_CONFIG", @@ -424,6 +430,8 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version= config["MGMT_INTERFACE"][f"eth0|{oob_ip}/{prefix_len}"] = {} metalbox_ip = _get_metalbox_ip_for_device(device) config["STATIC_ROUTE"]["mgmt|0.0.0.0/0"] = {"nexthop": metalbox_ip} + # Restrict control-plane services (SNMP, gNMI) to the OOB network + _add_ctrlplane_acls(config, oob_ip, prefix_len) else: oob_ip = None @@ -2326,3 +2334,73 @@ def _add_snmp_configuration(config, device, oob_ip): counter += 1 logger.debug(f"Added snmp_server_target {host}") + + +def _get_gnmi_port(config): + """Return the gNMI server port for the GNMI_ONLY control-plane ACL rule. + + The telemetry/gNMI container reads its listen port from + TELEMETRY|gnmi|port and falls back to 8080 when unset, so the ACL rule + follows the same lookup against the (image-consumed) TELEMETRY table. + """ + gnmi_config = config.get("TELEMETRY", {}).get("gnmi", {}) + return str(gnmi_config.get("port", DEFAULT_GNMI_PORT)) + + +def _add_ctrlplane_acls(config, oob_ip, prefix_len): + """Add control-plane ACLs restricting SNMP and gNMI to the OOB network. + + Emits ACL_TABLE entries of type CTRLPLANE bound to the SNMP and + EXTERNAL_CLIENT (gNMI/telemetry) caclmgrd services, plus one ACL_RULE + per table accepting only the device's OOB management subnet (the + network-normalised oob_ip/prefix_len). caclmgrd installs an implicit + default-drop for every service bound in a CTRLPLANE table, so sources + outside the OOB subnet can no longer reach SNMP or gNMI. The SSH_ONLY + table (#2329) belongs here as well once implemented. + + caclmgrd's EXTERNAL_CLIENT service has no built-in destination port + (verified against sonic-host-services 202211 through master): the rule + must carry it in L4_DST_PORT, otherwise caclmgrd skips the whole table + and the gNMI restriction would silently not exist. + + ACL_TABLE and ACL_RULE are generator-owned (ON_DEMAND_OWNED_TABLE_KEYS), + so they are rebuilt from scratch on every regen and stay absent when the + device has no OOB IP. They are also multi-owner + (MULTI_OWNER_OWNED_TABLE_KEYS): the SSH_ONLY table (#2329) belongs here as + well once implemented, so this helper merges only its own SNMP_ONLY / + GNMI_ONLY keys per key rather than rebinding the table wholesale -- the + central owned-table drop in generate_sonic_config clears stale entries up + front, and per-key merge lets coexisting control-plane helpers compose. The + rules are IPv4 (SRC_IP); a non-IPv4 OOB IP logs a warning and emits nothing + rather than failing the whole config generation. + """ + network = ipaddress.ip_network(f"{oob_ip}/{prefix_len}", strict=False) + if network.version != 4: + logger.warning( + f"OOB IP {oob_ip}/{prefix_len} is not IPv4; skipping control-plane ACLs" + ) + return + + accept_from_oob = { + "PRIORITY": "9999", + "PACKET_ACTION": "ACCEPT", + "SRC_IP": str(network), + "IP_TYPE": "IP", + } + config.setdefault("ACL_TABLE", {}) + config["ACL_TABLE"]["SNMP_ONLY"] = { + "policy_desc": "SNMP_ONLY", + "type": "CTRLPLANE", + "services": ["SNMP"], + } + config["ACL_TABLE"]["GNMI_ONLY"] = { + "policy_desc": "GNMI_ONLY", + "type": "CTRLPLANE", + "services": ["EXTERNAL_CLIENT"], + } + config.setdefault("ACL_RULE", {}) + config["ACL_RULE"]["SNMP_ONLY|RULE_1"] = dict(accept_from_oob) + config["ACL_RULE"]["GNMI_ONLY|RULE_1"] = { + **accept_from_oob, + "L4_DST_PORT": _get_gnmi_port(config), + } diff --git a/tests/unit/tasks/conductor/sonic/test_config_generator_ctrlplane_acls.py b/tests/unit/tasks/conductor/sonic/test_config_generator_ctrlplane_acls.py new file mode 100644 index 00000000..3f8689de --- /dev/null +++ b/tests/unit/tasks/conductor/sonic/test_config_generator_ctrlplane_acls.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Unit tests for ``_add_ctrlplane_acls`` in ``config_generator`` (#2330).""" + +from osism.tasks.conductor.sonic.config_generator import ( + ON_DEMAND_OWNED_TABLE_KEYS, + TOP_LEVEL_SCAFFOLD_KEYS, + _add_ctrlplane_acls, +) + + +def test_add_ctrlplane_acls_emits_snmp_and_gnmi_ctrlplane_tables(): + config = {} + + _add_ctrlplane_acls(config, "10.42.0.5", 24) + + assert config["ACL_TABLE"] == { + "SNMP_ONLY": { + "policy_desc": "SNMP_ONLY", + "type": "CTRLPLANE", + "services": ["SNMP"], + }, + "GNMI_ONLY": { + "policy_desc": "GNMI_ONLY", + "type": "CTRLPLANE", + "services": ["EXTERNAL_CLIENT"], + }, + } + + +def test_add_ctrlplane_acls_rules_accept_network_normalised_oob_subnet(): + """SRC_IP must carry the network address, not the device's host IP.""" + config = {} + + _add_ctrlplane_acls(config, "10.42.0.5", 24) + + assert config["ACL_RULE"]["SNMP_ONLY|RULE_1"] == { + "PRIORITY": "9999", + "PACKET_ACTION": "ACCEPT", + "SRC_IP": "10.42.0.0/24", + "IP_TYPE": "IP", + } + assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"] == { + "PRIORITY": "9999", + "PACKET_ACTION": "ACCEPT", + "SRC_IP": "10.42.0.0/24", + "IP_TYPE": "IP", + "L4_DST_PORT": "8080", + } + + +def test_add_ctrlplane_acls_gnmi_rule_requires_dst_port_snmp_does_not(): + """caclmgrd's EXTERNAL_CLIENT service has no built-in destination port; + without L4_DST_PORT in the rule it skips the whole table and the gNMI + restriction would silently not exist. SNMP has a fixed service port + (161) in caclmgrd's ACL_SERVICES, so its rule must not pin one.""" + config = {} + + _add_ctrlplane_acls(config, "10.42.0.5", 24) + + assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"]["L4_DST_PORT"] == "8080" + assert "L4_DST_PORT" not in config["ACL_RULE"]["SNMP_ONLY|RULE_1"] + + +def test_add_ctrlplane_acls_gnmi_port_follows_telemetry_table(): + """A TELEMETRY|gnmi|port from the base config wins over the 8080 default + (and non-string values are normalised to the string ConfigDB expects).""" + config = {"TELEMETRY": {"gnmi": {"port": 50051}}} + + _add_ctrlplane_acls(config, "10.42.0.5", 24) + + assert config["ACL_RULE"]["GNMI_ONLY|RULE_1"]["L4_DST_PORT"] == "50051" + + +def test_add_ctrlplane_acls_merges_into_co_owned_tables_per_key(): + """ACL_TABLE / ACL_RULE are multi-owner (SSH, SNMP, gNMI): the helper must + merge only its own keys, never rebind the table wholesale, so a sibling + control-plane helper's entries survive. Stale carry-over from a prior regen + is cleared by the central owned-table drop in generate_sonic_config, not + here (see the ownership model and MULTI_OWNER_OWNED_TABLE_KEYS).""" + config = { + "ACL_TABLE": {"SSH_ONLY": {"type": "CTRLPLANE", "services": ["SSH"]}}, + "ACL_RULE": {"SSH_ONLY|RULE_1": {"PRIORITY": "9999"}}, + } + + _add_ctrlplane_acls(config, "10.42.0.5", 24) + + # A sibling helper's entries are left untouched ... + assert config["ACL_TABLE"]["SSH_ONLY"] == {"type": "CTRLPLANE", "services": ["SSH"]} + assert config["ACL_RULE"]["SSH_ONLY|RULE_1"] == {"PRIORITY": "9999"} + # ... and this helper's own entries are added alongside them. + assert set(config["ACL_TABLE"]) == {"SSH_ONLY", "SNMP_ONLY", "GNMI_ONLY"} + assert set(config["ACL_RULE"]) == { + "SSH_ONLY|RULE_1", + "SNMP_ONLY|RULE_1", + "GNMI_ONLY|RULE_1", + } + + +def test_add_ctrlplane_acls_non_ipv4_oob_ip_emits_nothing(): + """The rules are IPv4-only (SRC_IP); an IPv6 OOB IP must not produce + half-correct tables — and must not break config generation either.""" + config = {} + + _add_ctrlplane_acls(config, "2001:db8::5", 64) + + assert "ACL_TABLE" not in config + assert "ACL_RULE" not in config + + +def test_ctrlplane_acl_tables_are_on_demand_owned(): + """#2330 requires ACL_TABLE/ACL_RULE to be rebuilt from scratch on every + regen and absent without an OOB IP. Both guarantees hang on the keys + being on-demand owned: owned tables are dropped from the base config up + front, and on-demand (unlike scaffolded) tables are not re-created as + empty dicts.""" + for key in ("ACL_TABLE", "ACL_RULE"): + assert key in ON_DEMAND_OWNED_TABLE_KEYS + assert key not in TOP_LEVEL_SCAFFOLD_KEYS From 5f30a862b7fc4c4526f12b2a1619cd4d60571a41 Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Wed, 10 Jun 2026 09:27:10 +0200 Subject: [PATCH 2/2] Add orchestrator wiring tests for control-plane ACLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch _add_ctrlplane_acls in the orchestrator helper fixture like the other section helpers, so the orchestrator glue stays exercised in isolation, and pin the wiring for #2330: the helper runs exactly once with the OOB IP and prefix when one exists, and is not invoked when the device has no OOB IP — in which case the owned ACL_TABLE and ACL_RULE tables stay absent even if the base config carried stale entries. AI-assisted: Claude Code Signed-off-by: Christian Berendt --- .../test_config_generator_orchestrator.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py b/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py index be149e67..bb6a8264 100644 --- a/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py +++ b/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py @@ -95,6 +95,7 @@ def patch(name, **kw): _add_loopback_configuration=patch("_add_loopback_configuration"), _add_log_server_configuration=patch("_add_log_server_configuration"), _add_snmp_configuration=patch("_add_snmp_configuration"), + _add_ctrlplane_acls=patch("_add_ctrlplane_acls"), _add_vrf_configuration=patch("_add_vrf_configuration"), _add_portchannel_configuration=patch("_add_portchannel_configuration"), get_cached_device_interfaces=patch( @@ -395,6 +396,43 @@ def test_generate_sonic_config_no_oob_ip_leaves_mgmt_empty_and_passes_none( assert snmp_oob is None +def test_generate_sonic_config_oob_ip_wires_ctrlplane_acls( + mocker, patch_orchestrator_helpers, make_orchestrator_device +): + """With an OOB IP the orchestrator delegates the control-plane ACLs + (#2330) to ``_add_ctrlplane_acls`` with the raw OOB IP and prefix — + network normalisation is the helper's job.""" + patch_base_config(mocker) + patch_orchestrator_helpers.get_device_oob_ip.return_value = ("10.42.0.5", 24) + device = make_orchestrator_device() + + config = generate_sonic_config(device, "HWSKU") + + patch_orchestrator_helpers._add_ctrlplane_acls.assert_called_once_with( + config, "10.42.0.5", 24 + ) + + +def test_generate_sonic_config_no_oob_ip_skips_ctrlplane_acls( + mocker, patch_orchestrator_helpers, make_orchestrator_device +): + """Without an OOB IP no control-plane ACLs are wired and the owned + ACL_TABLE / ACL_RULE tables stay absent — stale base-config content is + removed by the up-front owned-table drop, not re-created.""" + base = make_base_config() + base["ACL_TABLE"] = {"SNMP_ONLY": {"type": "CTRLPLANE"}} + base["ACL_RULE"] = {"SNMP_ONLY|RULE_1": {"PRIORITY": "9999"}} + patch_base_config(mocker, base_config=base) + patch_orchestrator_helpers.get_device_oob_ip.return_value = None + device = make_orchestrator_device() + + config = generate_sonic_config(device, "HWSKU") + + patch_orchestrator_helpers._add_ctrlplane_acls.assert_not_called() + assert "ACL_TABLE" not in config + assert "ACL_RULE" not in config + + # --------------------------------------------------------------------------- # generate_sonic_config — breakout merge # ---------------------------------------------------------------------------