-
Notifications
You must be signed in to change notification settings - Fork 2.9k
fix: Fix issues related to automatic logging set up in Haystack #11635
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
| import os | ||
| import sys | ||
| import typing | ||
| from collections.abc import Sequence | ||
| from typing import Any | ||
|
|
||
| if typing.TYPE_CHECKING: | ||
|
|
@@ -16,6 +17,10 @@ | |
| HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON" | ||
| HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG" | ||
|
|
||
| # Attribute set on a logger once we have patched its methods. `logging.getLogger` returns a shared singleton, so we | ||
| # use this marker to patch each logger only once and avoid wrapping the already-wrapped methods on repeated calls. | ||
| _PATCHED_MARKER = "_haystack_patched" | ||
|
|
||
|
|
||
| class PatchedLogger(typing.Protocol): | ||
| """Class which enables using type checkers to find wrong logger usage.""" | ||
|
|
@@ -220,16 +225,13 @@ def _patch_structlog_call_information(logger: logging.Logger) -> None: | |
| if not isinstance(logger, _FixedFindCallerLogger): | ||
| return | ||
|
|
||
| # completely copied from structlog. We only add `haystack.logging` to the list of ignored frames | ||
| # Copied from structlog's `_FixedFindCallerLogger.findCaller`, only adding `haystack.logging` to the list of | ||
| # ignored frames so our own logging wrappers don't show up as the caller. We deliberately do not forward | ||
| # `stacklevel` to `_find_first_app_frame_and_name`: that parameter only exists in structlog >= 25.5.0 and | ||
| # structlog is an optional dependency, so forwarding it would break logging on older versions. | ||
| def findCaller(stack_info: bool = False, stacklevel: int = 1) -> tuple[str, int, str, str | None]: # noqa: ARG001 | ||
| try: | ||
| sinfo: str | None | ||
| # we need to exclude `haystack.logging` from the stack | ||
| f, name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) | ||
| sinfo = _format_stack(f) if stack_info else None | ||
| except Exception as error: | ||
| print(f"Error in findCaller: {error}") | ||
|
|
||
| f, _name = _find_first_app_frame_and_name(["logging", "haystack.logging"]) | ||
| sinfo = _format_stack(f) if stack_info else None | ||
| return f.f_code.co_filename, f.f_lineno, f.f_code.co_name, sinfo | ||
|
|
||
| logger.findCaller = findCaller # type: ignore | ||
|
|
@@ -247,6 +249,11 @@ def getLogger(name: str) -> PatchedLogger: | |
| - it makes structure logging effective, not just an available feature | ||
| """ | ||
| logger = logging.getLogger(name) | ||
| if getattr(logger, _PATCHED_MARKER, False): | ||
| # Already patched: `logging.getLogger` returned the same singleton, so re-patching would stack the wrappers | ||
| # and interpolate the message more than once. | ||
| return typing.cast(PatchedLogger, logger) | ||
|
|
||
| logger.debug = patch_log_method_to_kwargs_only(logger.debug) # type: ignore | ||
| logger.info = patch_log_method_to_kwargs_only(logger.info) # type: ignore | ||
| logger.warn = patch_log_method_to_kwargs_only(logger.warn) # type: ignore | ||
|
|
@@ -262,6 +269,8 @@ def getLogger(name: str) -> PatchedLogger: | |
| # We also patch the `makeRecord` method to use keyword string interpolation | ||
| logger.makeRecord = patch_make_records_to_use_kwarg_string_interpolation(logger.makeRecord) # type: ignore | ||
|
|
||
| setattr(logger, _PATCHED_MARKER, True) | ||
|
|
||
| return typing.cast(PatchedLogger, logger) | ||
|
|
||
|
|
||
|
|
@@ -295,7 +304,12 @@ def correlate_logs_with_traces(_: "WrappedLogger", __: str, event_dict: "EventDi | |
| return event_dict | ||
|
|
||
|
|
||
| def configure_logging(use_json: bool | None = None) -> None: | ||
| def configure_logging( | ||
| use_json: bool | None = None, | ||
| logger_name: str | Sequence[str] = ("haystack", "haystack_integrations", "haystack_experimental"), | ||
| propagate: bool = True, | ||
| force: bool = True, | ||
| ) -> None: | ||
| """ | ||
| Configure logging for Haystack. | ||
|
|
||
|
|
@@ -306,6 +320,25 @@ def configure_logging(use_json: bool | None = None) -> None: | |
| - If `structlog` is installed, you can JSON format all logs. Enable this by | ||
| - setting the `use_json` parameter to `True` when calling this function | ||
| - setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true` | ||
|
|
||
| :param use_json: Whether to format logs as JSON. If `None`, we try to guess based on the environment. | ||
| :param logger_name: | ||
| The name (or names) of the logger our formatting handler is attached to. Defaults to Haystack's own | ||
| namespaces (`"haystack"`, `"haystack_integrations"` and `"haystack_experimental"`), so that we only touch | ||
| Haystack's own loggers and leave the logging configuration of the host application and any other libraries | ||
| running in the same process untouched. Pass an empty string (`""`) to attach the handler to the root logger | ||
| instead - this restores the legacy behavior of formatting *every* log record in the process. | ||
| :param propagate: | ||
| Whether the configured loggers should propagate their records to ancestor loggers (ultimately the root | ||
| logger). The default (`True`) keeps records flowing to handlers configured by the host application and to | ||
| capturing tools such as `pytest`'s `caplog`. Set it to `False` to make Haystack fully own the output of its | ||
| own logs - this avoids duplicate log lines when the host application also configures the root logger. It has | ||
| no effect when `logger_name=""` (the root logger has no ancestors). | ||
| :param force: | ||
| Whether to (re)configure logging even if `structlog` has already been configured by someone else. The default | ||
| (`True`) means an explicit call always takes over. Pass `False` to make this a no-op when `structlog` is | ||
| already configured - this is used by the import-time call in `haystack/__init__.py` so that merely importing | ||
| Haystack does not overwrite a `structlog` configuration set up by the host application. | ||
| """ | ||
| import haystack.utils.jupyter # to avoid circular imports | ||
|
|
||
|
|
@@ -322,6 +355,11 @@ def configure_logging(use_json: bool | None = None) -> None: | |
| # If the user wants to ignore structlog, we don't configure it and fall back to standard logging | ||
| return | ||
|
|
||
| # When not forcing, skip configuration if structlog is already configured (e.g. by the host application) so we | ||
| # leave its configuration and handlers untouched. | ||
| if not force and structlog.is_configured(): | ||
| return | ||
|
|
||
| # We roughly follow the structlog documentation here: | ||
| # https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging | ||
| # This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`. | ||
|
|
@@ -352,11 +390,16 @@ def configure_logging(use_json: bool | None = None) -> None: | |
| shared_processors.append(correlate_logs_with_traces) | ||
|
|
||
| structlog.configure( | ||
| processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter], | ||
| # `filter_by_level` reads the effective level from the underlying stdlib logger on *every* call, so changes | ||
| # to the log level made after `configure_logging` runs (e.g. by the host application) are respected. | ||
| processors=[ | ||
| structlog.stdlib.filter_by_level, | ||
| *shared_processors, | ||
| structlog.stdlib.ProcessorFormatter.wrap_for_formatter, | ||
| ], | ||
| logger_factory=structlog.stdlib.LoggerFactory(ignore_frame_names=["haystack.logging"]), | ||
| cache_logger_on_first_use=True, | ||
| # This is a filter that will filter out log entries that are below the log level of the root logger. | ||
| wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()), | ||
| wrapper_class=structlog.stdlib.BoundLogger, | ||
| ) | ||
|
|
||
| renderers: list[Processor] | ||
|
|
@@ -392,12 +435,17 @@ def configure_logging(use_json: bool | None = None) -> None: | |
| # Use OUR `ProcessorFormatter` to format all `logging` entries. | ||
| handler.setFormatter(formatter) | ||
|
|
||
| root_logger = logging.getLogger() | ||
| # avoid adding our handler twice | ||
| old_handlers = [ | ||
| h | ||
| for h in root_logger.handlers | ||
| if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") | ||
| ] | ||
| new_handlers = [handler, *old_handlers] | ||
| root_logger.handlers = new_handlers | ||
| # By default we attach to Haystack's own namespaces so that we only format Haystack's log records and leave the | ||
| # loggers of the host application and other libraries in the same process untouched. Pass `logger_name=""` to | ||
| # attach to the root logger instead (legacy behavior - formats every record in the process). | ||
| logger_names = [logger_name] if isinstance(logger_name, str) else list(logger_name) | ||
| for name in logger_names: | ||
| target_logger = logging.getLogger(name) | ||
| # avoid adding our handler twice | ||
| old_handlers = [ | ||
| h | ||
| for h in target_logger.handlers | ||
| if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler") | ||
| ] | ||
| target_logger.handlers = [handler, *old_handlers] | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This may duplicate Haystack logs when users opt back into root formatting with
Not 100% sure that this happens, would try to verify with e.g.: And maybe extend this test to cover also the |
||
| target_logger.propagate = propagate | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| --- | ||
| upgrade: | ||
| - | | ||
| ``haystack.logging.configure_logging`` now attaches its formatting handler to Haystack's own logger namespaces | ||
| (``haystack``, ``haystack_integrations`` and ``haystack_experimental``) instead of the root logger. This makes | ||
| Haystack a better-behaved library when it runs next to other services in the same process: the log records of the | ||
| host application and other libraries are no longer reformatted or duplicated by Haystack. To restore the previous | ||
| behavior of formatting every log record in the process, call ``configure_logging(logger_name="")``. | ||
| fixes: | ||
| - | | ||
| ``haystack.logging.configure_logging`` no longer freezes the log level at import time. Previously the level was | ||
| captured once when the function ran (at ``import haystack``), so a log level set later by the application was | ||
| ignored for ``structlog``-native loggers. The effective level is now read on every log call. | ||
| - | | ||
| Fixed the logger in ``haystack.utils.requests_utils`` being named after the module's file path instead of | ||
| ``haystack.utils.requests_utils``, which kept its records outside the ``haystack`` logger namespace. | ||
| - | | ||
| Importing Haystack no longer overwrites a ``structlog`` configuration that the host application already set up. | ||
| The import-time call to ``configure_logging`` now skips configuration when ``structlog`` is already configured. | ||
| - | | ||
| ``haystack.logging.getLogger`` is now idempotent. Previously, calling it more than once for the same logger name | ||
| wrapped the already-wrapped logger methods again, which caused the log message to be run through ``str.format`` | ||
| once per call. As a result a field value containing ``{...}`` could be re-interpolated and pull in the value of | ||
| another field. Each logger is now patched only once. | ||
| - | | ||
| The patched ``findCaller`` used to determine the source of a log entry no longer prints to stdout and no longer | ||
| masks errors with a misleading ``NameError``, matching structlog's own ``findCaller`` implementation. | ||
| features: | ||
| - | | ||
| ``haystack.logging.configure_logging`` gained three parameters: ``logger_name`` to choose which logger(s) the | ||
| formatting handler is attached to, ``propagate`` to control whether Haystack's loggers propagate their records to | ||
| ancestor loggers, and ``force`` to control whether an existing ``structlog`` configuration is replaced. Set | ||
| ``propagate=False`` to let Haystack fully own the output of its own logs and avoid duplicate log lines when the | ||
| host application also configures the root logger. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
structlog.configure(...)call is (still?) process-global.If Haystack is imported before the host app configures
structlog, any non-Haystackstructloglogger used while Haystack’s config is active will still run through Haystack’s processor chain (regardless of logger name).Maybe we could move
structlog.configure(...)intoconfigure_logging(...)calls.