haystack/test/test_logging.py
Sebastian Husch Lee 12e3de364a
Fix test (#9475)
2025-06-03 08:00:10 +00:00

605 lines
22 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import builtins
import json
import logging
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Generator
import structlog
from test.tracing.utils import SpyingTracer
from unittest.mock import ANY
import pytest
from _pytest.capture import CaptureFixture
from _pytest.logging import LogCaptureFixture
from _pytest.monkeypatch import MonkeyPatch
import haystack.utils.jupyter
from haystack import logging as haystack_logging
@pytest.fixture(autouse=True)
def reset_logging_config() -> None:
old_handlers = logging.root.handlers.copy()
yield
# Reset the logging configuration after each test to avoid impacting other tests
logging.root.handlers = old_handlers
@pytest.fixture()
def set_context_var_key() -> Generator[str, None, None]:
structlog.contextvars.bind_contextvars(context_var="value")
yield "context_var"
structlog.contextvars.unbind_contextvars("context_var")
class TestSkipLoggingConfiguration:
def test_skip_logging_configuration(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]
# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""
class TestStructuredLoggingConsoleRendering:
def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert output == ""
def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_if_no_tty_but_python_config(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_if_in_ipython(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(builtins, "__IPYTHON__", "true", raising=False)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_in_jupyter(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(haystack.utils.jupyter, haystack.utils.jupyter.is_in_jupyter.__name__, lambda: True)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "false")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
# Only check for the minute to be a bit more robust
today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
assert today in output
log_level = "warning"
assert log_level in output
assert "Hello, structured logging!" in output
assert "key1" in output
assert "value1" in output
def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
def function_that_raises_and_adds_to_stack_trace():
raise ValueError("This is an error")
try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened")
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "An error happened" in output
def test_logging_of_contextvars(self, capfd: CaptureFixture, set_context_var_key: str) -> None:
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert set_context_var_key in output
class TestStructuredLoggingJSONRendering:
def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_as_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_of_contextvars(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch, set_context_var_key: str
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
set_context_var_key: "value",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
def function_that_raises_and_adds_to_stack_trace():
my_local_variable = "my_local_variable" # noqa: F841
raise ValueError("This is an error")
try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened ")
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "An error happened ",
"level": "error",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
"exception": [
{
"exc_notes": [],
"exc_type": "ValueError",
"exc_value": "This is an error",
"exceptions": [],
"syntax_error": None,
"is_cause": False,
"is_group": False,
"frames": [
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "test_logging_exceptions_json",
},
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "function_that_raises_and_adds_to_stack_trace",
},
],
}
],
}
class TestLogTraceCorrelation:
def test_trace_log_correlation_python_logs_with_console_rendering(
self, spying_tracer: SpyingTracer, capfd: CaptureFixture
) -> None:
haystack_logging.configure_logging(use_json=False)
with spying_tracer.trace("test-operation"):
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
assert "trace_id" not in output
def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
with spying_tracer.trace("test-operation") as span:
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"trace_id": span.trace_id,
"span_id": span.span_id,
"lineno": ANY,
"module": "test.test_logging",
}
def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
class TestCompositeLogger:
def test_correct_stack_level_with_stdlib_rendering(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]
assert caplog.records[0].name == "test.test_logging"
# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""
def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = haystack_logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
assert "test.test_logging" in output
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "error"),
("critical", "critical"),
],
)
def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key": "logging",
"key1": "value1",
"key2": "value2",
"level": expected_level,
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_log(self, capfd: LogCaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured 'logging'!",
"key": "logging",
"key1": "value1",
"key2": "value2",
"level": "debug",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_log_json_content(self, capfd: LogCaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG, 'Hello, structured: {"key": "value"}', key="logging", key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": 'Hello, structured: {"key": "value"}',
"key": "logging",
"key1": "value1",
"key2": "value2",
"level": "debug",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured '<class '_pytest.logging.LogCaptureFixture'>'!",
"key": "<class '_pytest.logging.LogCaptureFixture'>",
"key1": "value1",
"key2": "value2",
"level": "debug",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "exception"),
("critical", "critical"),
],
)
def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
with pytest.raises(TypeError):
getattr(logger, method)("Hello, structured logging %s!", "logging")
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "exception"),
("critical", "critical"),
],
)
def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# does not raise - hence we need to check this separately
getattr(logger, method)("Hello, structured logging %s!", key="logging")
def test_that_haystack_logger_is_used(self) -> None:
"""Forces the usage of the Haystack logger instead of the standard library logger."""
allowed_list = [Path("haystack") / "logging.py"]
for root, dirs, files in os.walk("haystack"):
for file in files:
path = Path(root) / file
if not path.suffix.endswith(".py"):
continue
if path in allowed_list:
continue
content = path.read_text(encoding="utf-8")
# that looks like somebody is using our standard logger
if " logging.getLogger" in content:
haystack_logger_in_content = " haystack import logging" in content or ", logging" in content
assert haystack_logger_in_content, (
f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the "
f"standard library logger and add plenty of keyword arguments."
)