From f97e1584687d4fbb2b4a91ee10b08b319240e1cd Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Wed, 17 Jun 2026 17:20:37 +0200 Subject: [PATCH 1/2] Add 'osism reset facts' command to clear the fact cache Operators troubleshooting a deployment need a clean way to drop cached Ansible facts. Stale or wrong facts can make Ansible act on outdated information, producing confusing results that are hard to diagnose, and there is no direct command to clear them today. Add a cliff command 'osism reset facts' that deletes the 'ansible_facts*' fact-cache keys from Redis. Without arguments it flushes the whole cache; with '-l/--limit' it resolves the host or group pattern via 'ansible-inventory --list --limit' and deletes only the matching hosts' keys. The command only clears the cache; it does not start a fact-gathering run and asks for no confirmation, since the cache is rebuilt automatically on the next Ansible run. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Christian Berendt --- osism/commands/reset.py | 104 ++++++++++++++++++++++++++++++++++++++++ setup.cfg | 1 + 2 files changed, 105 insertions(+) create mode 100644 osism/commands/reset.py diff --git a/osism/commands/reset.py b/osism/commands/reset.py new file mode 100644 index 000000000..029f4062a --- /dev/null +++ b/osism/commands/reset.py @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: Apache-2.0 + +import json +import subprocess + +from cliff.command import Command +from loguru import logger +from redis.exceptions import RedisError + +from osism import utils +from osism.utils.inventory import get_hosts_from_inventory, get_inventory_path + + +class Facts(Command): + """Reset (clear) the cached Ansible facts. + + By default the whole fact cache is flushed. Use ``--limit`` to clear + only the facts of selected hosts or groups. The command does not + gather new facts; the cache is rebuilt on the next Ansible run that + collects facts. + """ + + def get_parser(self, prog_name): + parser = super(Facts, self).get_parser(prog_name) + parser.add_argument( + "-l", + "--limit", + type=str, + help="Limit the reset to selected hosts or groups (Ansible host pattern)", + ) + return parser + + def take_action(self, parsed_args): + if parsed_args.limit: + return self._reset_limited(parsed_args.limit) + return self._reset_all() + + def _reset_all(self): + removed = 0 + try: + cursor = 0 + while True: + cursor, batch = utils.redis.scan( + cursor, match="ansible_facts*", count=100 + ) + if batch: + utils.redis.delete(*batch) + removed += len(batch) + if cursor == 0: + break + except RedisError as exc: + logger.error(f"Failed to reset Ansible fact cache: {exc}") + return 1 + + logger.info(f"Removed cached facts for {removed} host(s)") + return 0 + + def _reset_limited(self, limit): + try: + result = subprocess.run( + [ + "ansible-inventory", + "-i", + get_inventory_path("/ansible/inventory/hosts.yml"), + "--list", + "--limit", + limit, + ], + capture_output=True, + text=True, + timeout=30, + ) + + if result.returncode != 0: + logger.error( + f"Error loading inventory (rc={result.returncode}): " + f"{result.stderr}" + ) + return 1 + except subprocess.TimeoutExpired: + logger.error("Timeout loading inventory.") + return 1 + + try: + inventory = json.loads(result.stdout) + except json.JSONDecodeError as exc: + logger.error(f"Failed to parse inventory output: {exc}") + return 1 + + hosts = get_hosts_from_inventory(inventory) + + if not hosts: + logger.warning("No hosts matched the given limit.") + return 0 + + keys = [f"ansible_facts{host}" for host in hosts] + try: + deleted = utils.redis.delete(*keys) + except RedisError as exc: + logger.error(f"Failed to reset Ansible fact cache: {exc}") + return 1 + + logger.info(f"Removed cached facts for {deleted} host(s)") + return 0 diff --git a/setup.cfg b/setup.cfg index 77a397fd9..213683df1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -129,6 +129,7 @@ osism.commands: report memory = osism.commands.report:Memory reconciler = osism.commands.reconciler:Run reconciler sync = osism.commands.reconciler:Sync + reset facts = osism.commands.reset:Facts service = osism.commands.service:Run set bootstrap = osism.commands.set:Bootstrap set maintenance = osism.commands.set:Maintenance From 8d0c51e8712e2064c54acb571c4668bdf9768954 Mon Sep 17 00:00:00 2001 From: Christian Berendt Date: Wed, 17 Jun 2026 17:30:39 +0200 Subject: [PATCH 2/2] Add unit tests for 'osism reset facts' Cover both reset paths and their edges: flushing the whole ansible_facts* cache, the empty-cache no-op, a Redis error, the --limit host expansion, and the inventory-load failure and timeout contracts. Assisted-by: Claude:claude-opus-4-8 Signed-off-by: Christian Berendt --- tests/unit/commands/test_reset.py | 196 ++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 tests/unit/commands/test_reset.py diff --git a/tests/unit/commands/test_reset.py b/tests/unit/commands/test_reset.py new file mode 100644 index 000000000..d5390d48f --- /dev/null +++ b/tests/unit/commands/test_reset.py @@ -0,0 +1,196 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for the ``osism reset facts`` command. + +These cover the two reset paths and their edge cases: flushing the whole +``ansible_facts*`` cache (including the empty-cache no-op), restricting the +reset to the hosts a ``--limit`` pattern resolves to, and the error contracts +for a failed inventory load and an unreachable Redis. +""" + +import subprocess +from unittest.mock import MagicMock, patch + +import pytest +from redis.exceptions import RedisError + +from osism.commands import reset + + +def _make(): + return reset.Facts(MagicMock(), MagicMock()) + + +def _parse(*args): + return _make().get_parser("test").parse_args(list(args)) + + +@pytest.fixture +def mock_redis(): + """Provide a mock Redis client wherever the command resolves it. + + ``osism.utils.redis`` is a lazily-initialised module attribute that opens + a real connection on first access, so patch both the attribute and its + initialiser to keep the test offline. + """ + client = MagicMock() + with patch("osism.utils._init_redis", return_value=client), patch( + "osism.commands.reset.utils.redis", client, create=True + ): + yield client + + +# --- flush-all path --- + + +def test_facts_flushes_all_keys_when_no_limit(mock_redis, loguru_logs): + mock_redis.scan.return_value = ( + 0, + [b"ansible_factsnode1", b"ansible_factsnode2"], + ) + + rc = _make().take_action(_parse()) + + assert rc == 0 + mock_redis.scan.assert_called_once_with(0, match="ansible_facts*", count=100) + mock_redis.delete.assert_called_once_with( + b"ansible_factsnode1", b"ansible_factsnode2" + ) + assert any("2 host(s)" in r["message"] for r in loguru_logs) + + +def test_facts_succeeds_and_skips_delete_when_cache_empty(mock_redis, loguru_logs): + mock_redis.scan.return_value = (0, []) + + rc = _make().take_action(_parse()) + + assert rc == 0 + mock_redis.delete.assert_not_called() + infos = [r for r in loguru_logs if r["level"] == "INFO"] + assert any("0 host(s)" in r["message"] for r in infos) + + +def test_facts_returns_nonzero_on_redis_error(mock_redis, loguru_logs): + mock_redis.scan.side_effect = RedisError("connection refused") + + rc = _make().take_action(_parse()) + + assert rc == 1 + mock_redis.delete.assert_not_called() + errors = [r for r in loguru_logs if r["level"] == "ERROR"] + assert any("Failed to reset Ansible fact cache" in r["message"] for r in errors) + + +# --- limited path --- + + +def test_facts_limit_deletes_only_selected_hosts(mock_redis): + mock_redis.delete.return_value = 1 + ok = MagicMock() + ok.returncode = 0 + ok.stdout = "{}" + + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch("osism.commands.reset.subprocess.run", return_value=ok), patch( + "osism.commands.reset.get_hosts_from_inventory", + return_value=["node1", "node2"], + ): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 0 + mock_redis.scan.assert_not_called() + mock_redis.delete.assert_called_once_with( + "ansible_factsnode1", "ansible_factsnode2" + ) + + +def test_facts_limit_returns_nonzero_when_inventory_fails(mock_redis, loguru_logs): + failed = MagicMock() + failed.returncode = 1 + failed.stderr = "boom" + + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch("osism.commands.reset.subprocess.run", return_value=failed): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 1 + mock_redis.delete.assert_not_called() + errors = [r for r in loguru_logs if r["level"] == "ERROR"] + assert any("Error loading inventory" in r["message"] for r in errors) + assert any("boom" in r["message"] for r in errors) + + +def test_facts_limit_returns_nonzero_on_invalid_inventory_json(mock_redis, loguru_logs): + ok = MagicMock() + ok.returncode = 0 + ok.stdout = "{not valid json" + + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch("osism.commands.reset.subprocess.run", return_value=ok): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 1 + mock_redis.delete.assert_not_called() + errors = [r for r in loguru_logs if r["level"] == "ERROR"] + assert any("Failed to parse inventory output" in r["message"] for r in errors) + + +def test_facts_limit_returns_nonzero_when_inventory_times_out(mock_redis, loguru_logs): + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch( + "osism.commands.reset.subprocess.run", + side_effect=subprocess.TimeoutExpired("ansible-inventory", 30), + ): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 1 + mock_redis.delete.assert_not_called() + errors = [r for r in loguru_logs if r["level"] == "ERROR"] + assert any("Timeout loading inventory." in r["message"] for r in errors) + + +def test_facts_limit_warns_and_succeeds_when_no_hosts_match(mock_redis, loguru_logs): + ok = MagicMock() + ok.returncode = 0 + ok.stdout = "{}" + + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch("osism.commands.reset.subprocess.run", return_value=ok), patch( + "osism.commands.reset.get_hosts_from_inventory", return_value=[] + ): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 0 + mock_redis.delete.assert_not_called() + warnings = [r for r in loguru_logs if r["level"] == "WARNING"] + assert any("No hosts matched the given limit." in r["message"] for r in warnings) + + +def test_facts_limit_returns_nonzero_on_redis_error(mock_redis, loguru_logs): + mock_redis.delete.side_effect = RedisError("connection refused") + ok = MagicMock() + ok.returncode = 0 + ok.stdout = "{}" + + with patch( + "osism.commands.reset.get_inventory_path", + return_value="/ansible/inventory/hosts.yml", + ), patch("osism.commands.reset.subprocess.run", return_value=ok), patch( + "osism.commands.reset.get_hosts_from_inventory", + return_value=["node1", "node2"], + ): + rc = _make().take_action(_parse("-l", "control")) + + assert rc == 1 + errors = [r for r in loguru_logs if r["level"] == "ERROR"] + assert any("Failed to reset Ansible fact cache" in r["message"] for r in errors)