# SPDX-FileCopyrightText: 2022-present deepset GmbH # # SPDX-License-Identifier: Apache-2.0 import builtins import json import logging import os import sys from datetime import datetime, timezone from pathlib import Path from typing import Generator import structlog from test.tracing.utils import SpyingTracer from unittest.mock import ANY import pytest from _pytest.capture import CaptureFixture from _pytest.logging import LogCaptureFixture from _pytest.monkeypatch import MonkeyPatch import haystack.utils.jupyter from haystack import logging as haystack_logging @pytest.fixture(autouse=True) def reset_logging_config() -> None: old_handlers = logging.root.handlers.copy() yield # Reset the logging configuration after each test to avoid impacting other tests logging.root.handlers = old_handlers @pytest.fixture() def set_context_var_key() -> Generator[str, None, None]: structlog.contextvars.bind_contextvars(context_var="value") yield "context_var" structlog.contextvars.unbind_contextvars("context_var") class TestSkipLoggingConfiguration: def test_skip_logging_configuration( self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture ) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module assert caplog.messages == ["Hello, structured logging!"] # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" class TestStructuredLoggingConsoleRendering: def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert output == "" def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "Hello, structured logging" in output assert "{" not in output, "Seems JSON rendering is enabled when it should not be" def test_console_rendered_structured_log_even_if_no_tty_but_python_config( self, capfd: CaptureFixture, monkeypatch: MonkeyPatch ) -> None: monkeypatch.setattr(sys.stderr, "isatty", lambda: False) haystack_logging.configure_logging(use_json=False) logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "Hello, structured logging!" in output assert "{" not in output, "Seems JSON rendering is enabled when it should not be" def test_console_rendered_structured_log_if_in_ipython( self, capfd: CaptureFixture, monkeypatch: MonkeyPatch ) -> None: monkeypatch.setattr(builtins, "__IPYTHON__", "true", raising=False) haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "Hello, structured logging!" in output assert "{" not in output, "Seems JSON rendering is enabled when it should not be" def test_console_rendered_structured_log_even_in_jupyter( self, capfd: CaptureFixture, monkeypatch: MonkeyPatch ) -> None: monkeypatch.setattr(haystack.utils.jupyter, haystack.utils.jupyter.is_in_jupyter.__name__, lambda: True) haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "Hello, structured logging!" in output assert "{" not in output, "Seems JSON rendering is enabled when it should not be" def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env( self, capfd: CaptureFixture, monkeypatch: MonkeyPatch ) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "false") haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "Hello, structured logging!" in output assert "{" not in output, "Seems JSON rendering is enabled when it should not be" def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err # Only check for the minute to be a bit more robust today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "") assert today in output log_level = "warning" assert log_level in output assert "Hello, structured logging!" in output assert "key1" in output assert "value1" in output def test_logging_exceptions(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging() logger = logging.getLogger(__name__) def function_that_raises_and_adds_to_stack_trace(): raise ValueError("This is an error") try: function_that_raises_and_adds_to_stack_trace() except ValueError: logger.exception("An error happened") # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert "An error happened" in output def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None: haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err assert set_context_var_key in output class TestStructuredLoggingJSONRendering: def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: monkeypatch.setattr(sys.stderr, "isatty", lambda: False) haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err parsed_output = json.loads(output) # should not raise an error assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_logging_as_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err parsed_output = json.loads(output) # should not raise an error assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err parsed_output = json.loads(output) # should not raise an error assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_logging_of_contextvars( self, capfd: CaptureFixture, monkeypatch: MonkeyPatch, set_context_var_key: str ) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true") haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err parsed_output = json.loads(output) # should not raise an error assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", set_context_var_key: "value", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = logging.getLogger(__name__) def function_that_raises_and_adds_to_stack_trace(): my_local_variable = "my_local_variable" # noqa: F841 raise ValueError("This is an error") try: function_that_raises_and_adds_to_stack_trace() except ValueError: logger.exception("An error happened ") # Use `capfd` to capture the output of the final structlog rendering result output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "An error happened ", "level": "error", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", "exception": [ { "exc_notes": [], "exc_type": "ValueError", "exc_value": "This is an error", "exceptions": [], "syntax_error": None, "is_cause": False, "is_group": False, "frames": [ { "filename": str(Path.cwd() / "test" / "test_logging.py"), "lineno": ANY, # otherwise the test breaks if you add a line :-) "name": "test_logging_exceptions_json", }, { "filename": str(Path.cwd() / "test" / "test_logging.py"), "lineno": ANY, # otherwise the test breaks if you add a line :-) "name": "function_that_raises_and_adds_to_stack_trace", }, ], } ], } class TestLogTraceCorrelation: def test_trace_log_correlation_python_logs_with_console_rendering( self, spying_tracer: SpyingTracer, capfd: CaptureFixture ) -> None: haystack_logging.configure_logging(use_json=False) with spying_tracer.trace("test-operation"): logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err assert "trace_id" not in output def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) with spying_tracer.trace("test-operation") as span: logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "trace_id": span.trace_id, "span_id": span.span_id, "lineno": ANY, "module": "test.test_logging", } def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "Hello, structured logging!", "key1": "value1", "key2": "value2", "level": "warning", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } class TestCompositeLogger: def test_correct_stack_level_with_stdlib_rendering( self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture ) -> None: monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true") haystack_logging.configure_logging() logger = logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) # the pytest fixture caplog only captures logs being rendered from the stdlib logging module assert caplog.messages == ["Hello, structured logging!"] assert caplog.records[0].name == "test.test_logging" # Nothing should be captured by capfd since structlog is not configured assert capfd.readouterr().err == "" def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None: haystack_logging.configure_logging(use_json=False) logger = haystack_logging.getLogger(__name__) logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"}) output = capfd.readouterr().err assert "test.test_logging" in output @pytest.mark.parametrize( "method, expected_level", [ ("debug", "debug"), ("info", "info"), ("warning", "warning"), ("error", "error"), ("fatal", "critical"), ("exception", "error"), ("critical", "critical"), ], ) def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2") output = capfd.readouterr().err parsed_output = json.loads(output) # should not raise an error assert parsed_output == { "event": "Hello, structured logging!", "key": "logging", "key1": "value1", "key2": "value2", "level": expected_level, "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_log(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2") output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "Hello, structured 'logging'!", "key": "logging", "key1": "value1", "key2": "value2", "level": "debug", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_log_json_content(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2") output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": 'Hello, structured: {"key": "value"}', "key": "logging", "key1": "value1", "key2": "value2", "level": "debug", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2") output = capfd.readouterr().err parsed_output = json.loads(output) assert parsed_output == { "event": "Hello, structured ''!", "key": "", "key1": "value1", "key2": "value2", "level": "debug", "timestamp": ANY, "lineno": ANY, "module": "test.test_logging", } @pytest.mark.parametrize( "method, expected_level", [ ("debug", "debug"), ("info", "info"), ("warning", "warning"), ("error", "error"), ("fatal", "critical"), ("exception", "exception"), ("critical", "critical"), ], ) def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) with pytest.raises(TypeError): getattr(logger, method)("Hello, structured logging %s!", "logging") @pytest.mark.parametrize( "method, expected_level", [ ("debug", "debug"), ("info", "info"), ("warning", "warning"), ("error", "error"), ("fatal", "critical"), ("exception", "exception"), ("critical", "critical"), ], ) def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None: haystack_logging.configure_logging(use_json=True) logger = haystack_logging.getLogger(__name__) logger.setLevel(logging.DEBUG) # does not raise - hence we need to check this separately getattr(logger, method)("Hello, structured logging %s!", key="logging") def test_that_haystack_logger_is_used(self) -> None: """Forces the usage of the Haystack logger instead of the standard library logger.""" allowed_list = [Path("haystack") / "logging.py"] for root, dirs, files in os.walk("haystack"): for file in files: path = Path(root) / file if not path.suffix.endswith(".py"): continue if path in allowed_list: continue content = path.read_text(encoding="utf-8") # that looks like somebody is using our standard logger if " logging.getLogger" in content: haystack_logger_in_content = " haystack import logging" in content or ", logging" in content assert haystack_logger_in_content, ( f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the " f"standard library logger and add plenty of keyword arguments." )