diff --git a/haystack/__init__.py b/haystack/__init__.py index 2699f5bac7..e0f3285da0 100644 --- a/haystack/__init__.py +++ b/haystack/__init__.py @@ -18,8 +18,9 @@ from haystack.version import __version__ # noqa: F401 # Initialize the logging configuration -# This is a no-op unless `structlog` is installed -haystack.logging.configure_logging() +# This is a no-op unless `structlog` is installed, and `force=False` makes it skip configuration if `structlog` has +# already been configured by the host application (so importing Haystack does not overwrite their setup). +haystack.logging.configure_logging(force=False) # Same for tracing (no op if `opentelemetry` or `ddtrace` is not installed) haystack.tracing.auto_enable_tracing() diff --git a/haystack/logging.py b/haystack/logging.py index 76139aa5b6..36373c52aa 100644 --- a/haystack/logging.py +++ b/haystack/logging.py @@ -8,6 +8,7 @@ import os import sys import typing +from collections.abc import Sequence from typing import Any if typing.TYPE_CHECKING: @@ -16,6 +17,10 @@ HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON" HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG" +# Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we +# use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls. +_PATCHED_MARKER = "_haystack_patched" + class PatchedLogger(typing.Protocol): """Class which enables using type checkers to find wrong logger usage.""" @@ -220,16 +225,13 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None: if not isinstance(logger, _FixedFindCallerLogger): return - # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames + # Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of + # ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward + # `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and + # structlog is an optional dependency, so forwarding it would break logging on older versions. def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001 - try: - sinfo: str | None - # we need to exclude `haystack.logging` from the stack - f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) - sinfo = _format_stack(f) if stack_info else None - except Exception as error: - print(f"Error in findCaller: {error}") - + f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) + sinfo = _format_stack(f) if stack_info else None return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo logger.findCaller = findCaller # type: ignore @@ -247,6 +249,11 @@ def getLogger(name: str) -> PatchedLogger: - it makes structure logging effective, not just an available feature """ logger = logging.getLogger(name) + if getattr(logger, _PATCHED_MARKER, False): + # Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers + # and interpolate the message more than once. + return typing.cast(PatchedLogger, logger) + logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore @@ -262,6 +269,8 @@ def getLogger(name: str) -> PatchedLogger: # We also patch the `makeRecord` method to use keyword string interpolation logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore + setattr(logger, _PATCHED_MARKER, True) + return typing.cast(PatchedLogger, logger) @@ -295,7 +304,12 @@ def correlate_logs_with_traces(_: "WrappedLogger", __: str, event_dict: "EventDi return event_dict -def configure_logging(use_json: bool | None = None) -> None: +def configure_logging( + use_json: bool | None = None, + logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), + propagate: bool = True, + force: bool = True, +) -> None: """ Configure logging for Haystack. @@ -306,6 +320,25 @@ def configure_logging(use_json: bool | None = None) -> None: - If `structlog` is installed, you can JSON format all logs. Enable this by - setting the `use_json` parameter to `True` when calling this function - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true` + + :param use_json: Whether to format logs as JSON. If `None`, we try to guess based on the environment. + :param logger_name: + The name (or names) of the logger our formatting handler is attached to. Defaults to Haystack's own + namespaces (`"haystack"`, `"haystack_integrations"` and `"haystack_experimental"`), so that we only touch + Haystack's own loggers and leave the logging configuration of the host application and any other libraries + running in the same process untouched. Pass an empty string (`""`) to attach the handler to the root logger + instead - this restores the legacy behavior of formatting *every* log record in the process. + :param propagate: + Whether the configured loggers should propagate their records to ancestor loggers (ultimately the root + logger). The default (`True`) keeps records flowing to handlers configured by the host application and to + capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its + own logs - this avoids duplicate log lines when the host application also configures the root logger. It has + no effect when `logger_name=""` (the root logger has no ancestors). + :param force: + Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default + (`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is + already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing + Haystack does not overwrite a `structlog` configuration set up by the host application. """ import haystack.utils.jupyter # to avoid circular imports @@ -322,6 +355,11 @@ def configure_logging(use_json: bool | None = None) -> None: # If the user wants to ignore structlog, we don't configure it and fall back to standard logging return + # When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we + # leave its configuration and handlers untouched. + if not force and structlog.is_configured(): + return + # We roughly follow the structlog documentation here: # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`. @@ -352,11 +390,16 @@ def configure_logging(use_json: bool | None = None) -> None: shared_processors.append(correlate_logs_with_traces) structlog.configure( - processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], + # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so changes + # to the log level made after `configure_logging` runs (e.g. by the host application) are respected. + processors=[ + structlog.stdlib.filter_by_level, + *shared_processors, + structlog.stdlib.ProcessorFormatter.wrap_for_formatter, + ], logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), cache_logger_on_first_use=True, - # This is a filter that will filter out log entries that are below the log level of the root logger. - wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()), + wrapper_class=structlog.stdlib.BoundLogger, ) renderers: list[Processor] @@ -392,12 +435,17 @@ def configure_logging(use_json: bool | None = None) -> None: # Use OUR `ProcessorFormatter` to format all `logging` entries. handler.setFormatter(formatter) - root_logger = logging.getLogger() - # avoid adding our handler twice - old_handlers = [ - h - for h in root_logger.handlers - if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") - ] - new_handlers = [handler, *old_handlers] - root_logger.handlers = new_handlers + # By default we attach to Haystack's own namespaces so that we only format Haystack's log records and leave the + # loggers of the host application and other libraries in the same process untouched. Pass `logger_name=""` to + # attach to the root logger instead (legacy behavior - formats every record in the process). + logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) + for name in logger_names: + target_logger = logging.getLogger(name) + # avoid adding our handler twice + old_handlers = [ + h + for h in target_logger.handlers + if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") + ] + target_logger.handlers = [handler, *old_handlers] + target_logger.propagate = propagate diff --git a/haystack/utils/requests_utils.py b/haystack/utils/requests_utils.py index c5270cc055..642b0ad45e 100644 --- a/haystack/utils/requests_utils.py +++ b/haystack/utils/requests_utils.py @@ -8,7 +8,10 @@ import httpx from tenacity import after_log, before_log, retry, retry_if_exception_type, stop_after_attempt, wait_exponential -logger = logging.getLogger(__file__) +# NOTE: this uses the standard library logger (not `haystack.logging`) on purpose: tenacity's `before_log`/`after_log` +# call the logger with positional arguments, which Haystack's keyword-only patched logger would reject. We still name +# it with `__name__` so it lives under the `haystack` namespace and is picked up by `configure_logging`. +logger = logging.getLogger(__name__) def request_with_retry( diff --git a/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml new file mode 100644 index 0000000000..d49aaf8f3f --- /dev/null +++ b/releasenotes/notes/scope-logging-configuration-4a38bf0c8ea89fc9.yaml @@ -0,0 +1,34 @@ +--- +upgrade: + - | + ``haystack.logging.configure_logging`` now attaches its formatting handler to Haystack's own logger namespaces + (``haystack``, ``haystack_integrations`` and ``haystack_experimental``) instead of the root logger. This makes + Haystack a better-behaved library when it runs next to other services in the same process: the log records of the + host application and other libraries are no longer reformatted or duplicated by Haystack. To restore the previous + behavior of formatting every log record in the process, call ``configure_logging(logger_name="")``. +fixes: + - | + ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was + captured once when the function ran (at ``import haystack``), so a log level set later by the application was + ignored for ``structlog``-native loggers. The effective level is now read on every log call. + - | + Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of + ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. + - | + Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up. + The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured. + - | + ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name + wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format`` + once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of + another field. Each logger is now patched only once. + - | + The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer + masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation. +features: + - | + ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the + formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to + ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set + ``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the + host application also configures the root logger. diff --git a/test/test_logging.py b/test/test_logging.py index 20bdd5947f..3ae48ad6d7 100644 --- a/test/test_logging.py +++ b/test/test_logging.py @@ -7,13 +7,15 @@ import logging import os import sys -from collections.abc import Generator +from collections.abc import Callable, Generator from datetime import datetime, timezone from pathlib import Path from unittest.mock import ANY import pytest import structlog +import structlog._frames +import structlog.stdlib from _pytest.capture import CaptureFixture from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch @@ -25,10 +27,59 @@ @pytest.fixture(autouse=True) def reset_logging_config() -> None: - old_handlers = logging.root.handlers.copy() + # `configure_logging` attaches its handler to Haystack's own namespaces (and may flip `propagate`), so we snapshot + # and restore the root logger plus those namespaces to keep tests isolated. + names = ["haystack", "haystack_integrations", "haystack_experimental"] + root_handlers = logging.root.handlers.copy() + snapshots = {name: _snapshot_logger(name) for name in names} yield - # Reset the logging configuration after each test to avoid impacting other tests - logging.root.handlers = old_handlers + logging.root.handlers = root_handlers + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) + + +def _snapshot_logger(name: str) -> tuple[list[logging.Handler], bool, int]: + logger = logging.getLogger(name) + return logger.handlers.copy(), logger.propagate, logger.level + + +@pytest.fixture() +def restore_named_loggers() -> Generator[Callable[[str], logging.Logger], None, None]: + """Snapshot a named logger's handlers/propagate/level and restore them after the test.""" + snapshots: dict[str, tuple[list[logging.Handler], bool, int]] = {} + + def _snapshot(name: str) -> logging.Logger: + logger = logging.getLogger(name) + snapshots[name] = (logger.handlers.copy(), logger.propagate, logger.level) + return logger + + yield _snapshot + + for name, (handlers, propagate, level) in snapshots.items(): + logger = logging.getLogger(name) + logger.handlers = handlers + logger.propagate = propagate + logger.setLevel(level) + + +@pytest.fixture() +def restore_structlog_config() -> Generator[None, None, None]: + """Snapshot the global structlog configuration and restore it after the test.""" + was_configured = structlog.is_configured() + config = structlog.get_config() + yield + if was_configured: + structlog.configure(**config) + else: + structlog.reset_defaults() + + +def _sentinel_processor(logger: object, method_name: str, event_dict: dict) -> dict: + """A no-op processor used to detect whether an existing structlog config was left untouched.""" + return event_dict @pytest.fixture() @@ -47,7 +98,7 @@ def test_skip_logging_configuration( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module @@ -61,7 +112,7 @@ class TestStructuredLoggingConsoleRendering: def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.INFO) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -72,7 +123,7 @@ def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -89,7 +140,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_python_config( haystack_logging.configure_logging(use_json=False) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -105,7 +156,7 @@ def test_console_rendered_structured_log_if_in_ipython( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -121,7 +172,7 @@ def test_console_rendered_structured_log_even_in_jupyter( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -137,7 +188,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -149,7 +200,7 @@ def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -170,7 +221,7 @@ def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: def test_logging_exceptions(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): raise ValueError("This is an error") @@ -188,7 +239,7 @@ def function_that_raises_and_adds_to_stack_trace(): def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None: haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -202,7 +253,7 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M monkeypatch.setattr(sys.stderr, "isatty", lambda: False) haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -216,13 +267,13 @@ def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: M "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -236,14 +287,14 @@ def test_logging_as_json(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -257,7 +308,7 @@ def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypat "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_of_contextvars( @@ -266,7 +317,7 @@ def test_logging_of_contextvars( monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result @@ -281,13 +332,13 @@ def test_logging_of_contextvars( "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") def function_that_raises_and_adds_to_stack_trace(): my_local_variable = "my_local_variable" # noqa: F841 @@ -306,7 +357,7 @@ def function_that_raises_and_adds_to_stack_trace(): "level": "error", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", "exception": [ { "exc_notes": [], @@ -340,7 +391,7 @@ def test_trace_log_correlation_python_logs_with_console_rendering( haystack_logging.configure_logging(use_json=False) with spying_tracer.trace("test-operation"): - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -350,7 +401,7 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca haystack_logging.configure_logging(use_json=True) with spying_tracer.trace("test-operation") as span: - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err @@ -365,13 +416,13 @@ def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, ca "trace_id": span.trace_id, "span_id": span.span_id, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -385,13 +436,13 @@ def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) @@ -405,7 +456,7 @@ def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: "level": "warning", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @@ -416,12 +467,12 @@ def test_correct_stack_level_with_stdlib_rendering( monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() - logger = logging.getLogger(__name__) + logger = logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module assert caplog.messages == ["Hello, structured logging!"] - assert caplog.records[0].name == "test.test_logging" + assert caplog.records[0].name == "haystack.test_logging" # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" @@ -429,11 +480,11 @@ def test_correct_stack_level_with_stdlib_rendering( def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err - assert "test.test_logging" in output + assert "haystack.test_logging" in output @pytest.mark.parametrize( "method, expected_level", @@ -450,7 +501,7 @@ def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) @@ -467,13 +518,13 @@ def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_le "level": expected_level, "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") @@ -489,13 +540,13 @@ def test_log(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_json_content(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2") @@ -511,13 +562,13 @@ def test_log_json_content(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") @@ -533,7 +584,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: "level": "debug", "timestamp": ANY, "lineno": ANY, - "module": "test.test_logging", + "module": "haystack.test_logging", } @pytest.mark.parametrize( @@ -551,7 +602,7 @@ def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) with pytest.raises(TypeError): @@ -572,7 +623,7 @@ def test_haystack_logger_with_positional_args(self, method: str, expected_level: def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) - logger = haystack_logging.getLogger(__name__) + logger = haystack_logging.getLogger("haystack.test_logging") logger.setLevel(logging.DEBUG) # does not raise - hence we need to check this separately @@ -600,3 +651,233 @@ def test_that_haystack_logger_is_used(self) -> None: f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " f"standard library logger and add plenty of keyword arguments." ) + + +class TestLoggingScope: + """ + Haystack is a library that usually runs next to other services in the same process (e.g. a web server, or another + app's logging setup). These tests pin down that `configure_logging` only touches Haystack's own loggers and does + not reformat or hijack the logs of everything else in the process. + """ + + def test_handler_is_attached_to_haystack_namespaces_and_not_root(self) -> None: + haystack_logging.configure_logging(use_json=True) + + # Haystack's own namespace and the ones used by integration and experimental packages get the handler. + for name in ["haystack", "haystack_integrations", "haystack_experimental"]: + logger = logging.getLogger(name) + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logger.handlers) + + # The root logger - shared by every other library/service in the process - is left untouched. + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + def test_integrations_logs_are_formatted_by_haystack(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + logging.getLogger("haystack_integrations.components.demo").warning("a log line from an integration") + + output = capfd.readouterr().err + assert json.loads(output)["event"] == "a log line from an integration" + + def test_other_services_logs_are_not_reformatted_by_haystack( + self, capfd: CaptureFixture, restore_named_loggers: Callable[[str], logging.Logger] + ) -> None: + # Stand-in for another service that configured its own plain-text logging (e.g. uvicorn) before Haystack runs. + other_service = restore_named_loggers("some_other_service") + other_service.handlers = [logging.StreamHandler()] + other_service.setLevel(logging.INFO) + other_service.propagate = False + + haystack_logging.configure_logging(use_json=True) + + other_service.info("a log line from another service") + logging.getLogger("haystack.test_logging").warning("a log line from haystack") + + lines = [line for line in capfd.readouterr().err.splitlines() if line.strip()] + + # Haystack formats its own record as JSON ... + haystack_json = [json.loads(line) for line in lines if line.startswith("{") and "from haystack" in line] + assert any(record["event"] == "a log line from haystack" for record in haystack_json) + + # ... but the other service's record stays exactly as that service rendered it - plain text, not Haystack JSON. + plain_lines = [line for line in lines if "from another service" in line] + assert plain_lines + for line in plain_lines: + with pytest.raises(json.JSONDecodeError): + json.loads(line) + + def test_legacy_root_behavior_still_available_via_empty_logger_name(self, capfd: CaptureFixture) -> None: + # Opt back into the old behavior of formatting *every* record in the process. + haystack_logging.configure_logging(use_json=True, logger_name="") + + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in logging.root.handlers) + + # A non-Haystack logger is now formatted by Haystack because the handler sits on the root logger. + logging.getLogger("some_other_service").warning("formatted by haystack") + output = capfd.readouterr().err + assert json.loads(output)["event"] == "formatted by haystack" + + def test_propagate_is_true_by_default(self, caplog: LogCaptureFixture) -> None: + # The default keeps records flowing to ancestor loggers, so tooling that captures via the root logger (such as + # pytest's `caplog`) keeps working. + haystack_logging.configure_logging(use_json=True) + assert logging.getLogger("haystack").propagate is True + + with caplog.at_level(logging.WARNING, logger="haystack.test_logging"): + logging.getLogger("haystack.test_logging").warning("captured via propagation") + assert "captured via propagation" in caplog.text + + def test_propagate_false_stops_records_from_reaching_root(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True, propagate=False) + assert logging.getLogger("haystack").propagate is False + + # A plain handler on the root logger should NOT see Haystack's records when propagation is disabled. + root_handler = logging.StreamHandler() + root_handler.setFormatter(logging.Formatter("ROOT | %(message)s")) + logging.root.addHandler(root_handler) + try: + logging.getLogger("haystack.test_logging").warning("haystack owns this line") + finally: + logging.root.removeHandler(root_handler) + + output = capfd.readouterr().err + # Formatted once by Haystack (JSON), never by the root handler. + assert "ROOT |" not in output + assert json.loads(output)["event"] == "haystack owns this line" + + +class TestDynamicLogLevel: + """ + `configure_logging` runs at import time (in `haystack/__init__.py`), long before an application sets its desired + log level. These tests pin down that a log level set *after* `configure_logging` is still respected, instead of + being frozen to whatever the root level happened to be at import time. + """ + + def test_structlog_native_logger_respects_level_lowered_after_configure(self, capfd: CaptureFixture) -> None: + # Mirror the real ordering: configure while the level is high ... + logging.getLogger("haystack").setLevel(logging.WARNING) + haystack_logging.configure_logging(use_json=True) + + # ... then have the application lower the level afterwards. + logging.getLogger("haystack").setLevel(logging.DEBUG) + + structlog.get_logger("haystack.native_dynamic_level").debug("debug emitted after lowering the level") + + assert "debug emitted after lowering the level" in capfd.readouterr().err + + def test_structlog_native_logger_still_filters_below_level(self, capfd: CaptureFixture) -> None: + logging.getLogger("haystack").setLevel(logging.INFO) + haystack_logging.configure_logging(use_json=True) + + structlog.get_logger("haystack.native_filtered_level").debug("debug below the configured level") + + assert "debug below the configured level" not in capfd.readouterr().err + + +class TestStructlogConfigIsPreserved: + """ + `structlog.configure` writes to a single process-global configuration. These tests pin down that merely importing + Haystack (which calls `configure_logging(force=False)`) does not overwrite a structlog configuration that the host + application already set up, while an explicit call still takes over. + """ + + def test_not_forced_skips_when_structlog_already_configured(self, restore_structlog_config: None) -> None: + # Stand-in for the host application configuring structlog before Haystack is imported/configured. + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(force=False) + + # The application's structlog configuration is left untouched ... + assert structlog.get_config()["processors"] == [_sentinel_processor] + # ... and we did not attach our handler on top of their setup. + assert not any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_forced_takes_over_existing_structlog_config(self, restore_structlog_config: None) -> None: + structlog.reset_defaults() + structlog.configure(processors=[_sentinel_processor]) + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + + haystack_logging.configure_logging(use_json=True, force=True) + + assert structlog.get_config()["processors"] != [_sentinel_processor] + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + def test_not_forced_still_configures_when_structlog_is_unconfigured(self, restore_structlog_config: None) -> None: + # This is the real import-time situation: nobody configured structlog yet, so we set up our nice defaults. + structlog.reset_defaults() + haystack_logger = logging.getLogger("haystack") + haystack_logger.handlers = [] + assert not structlog.is_configured() + + haystack_logging.configure_logging(force=False) + + assert structlog.is_configured() + assert any(getattr(h, "name", None) == "HaystackLoggingHandler" for h in haystack_logger.handlers) + + +class TestGetLoggerIsIdempotent: + """ + `logging.getLogger(name)` returns a process-wide singleton. `haystack.logging.getLogger` patches that shared + object in place, so calling it more than once for the same name (different modules, re-imports, ...) must not wrap + the already-wrapped methods again. The user-visible symptom of re-wrapping is that the message is run through + `str.format` once per wrap, so a field value that itself contains `{...}` gets re-interpolated. + """ + + def test_repeated_get_logger_interpolates_the_message_exactly_once(self, capfd: CaptureFixture) -> None: + haystack_logging.configure_logging(use_json=True) + + # Two modules grabbing the same logger name is the realistic trigger for re-wrapping. + haystack_logging.getLogger("haystack.idempotency_test") + logger = haystack_logging.getLogger("haystack.idempotency_test") + logger.setLevel(logging.INFO) + + # `a`'s value contains a `{b}` placeholder. With a single interpolation it must be left as-is; a second + # interpolation would expand it using `b` and leak "SECRET" into the message. + logger.info("Hello {a}", a="{b}", b="SECRET") + + parsed_output = json.loads(capfd.readouterr().err) + assert parsed_output["event"] == "Hello {b}" + assert "SECRET" not in parsed_output["event"] + + def test_repeated_get_logger_does_not_rewrap_methods(self) -> None: + haystack_logging.getLogger("haystack.idempotency_identity_test") + # Capture the patched methods after the first call, before the second one runs. + patched = logging.getLogger("haystack.idempotency_identity_test") + debug_after_first = patched.debug + make_record_after_first = patched.makeRecord + + haystack_logging.getLogger("haystack.idempotency_identity_test") + + # The second call must leave the already-patched methods in place, not wrap a fresh layer on top. + assert patched.debug is debug_after_first + assert patched.makeRecord is make_record_after_first + + +class TestFindCallerMatchesStructlog: + """ + `_patch_structlog_call_information` mirrors structlog's `_FixedFindCallerLogger.findCaller`, only adding + `haystack.logging` to the ignored frames. structlog itself does not guard the frame lookup, so neither do we: any + error must propagate as-is instead of being swallowed and printed to stdout. + """ + + def test_find_caller_does_not_print_or_mask_errors(self, capsys: CaptureFixture, monkeypatch: MonkeyPatch) -> None: + # Force the frame lookup to fail. It is imported inside `_patch_structlog_call_information`, so we patch the + # module attribute before patching the logger. + def boom(*args: object, **kwargs: object) -> tuple: + raise RuntimeError("frame lookup failed") + + monkeypatch.setattr(structlog._frames, "_find_first_app_frame_and_name", boom) + + logger = structlog.stdlib._FixedFindCallerLogger("haystack.find_caller_test") + haystack_logging._patch_structlog_call_information(logger) + + # The original error must propagate (not be masked by a NameError on an unbound `f`) ... + with pytest.raises(RuntimeError, match="frame lookup failed"): + logger.findCaller() + + # ... and nothing must be written to stdout. + assert capsys.readouterr().out == ""