haystack/test/test_logging.py

535 lines
20 KiB
Python
Raw Normal View History

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import builtins
import json
import logging
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from test.tracing.utils import SpyingTracer
from unittest.mock import ANY
import pytest
from _pytest.capture import CaptureFixture
from _pytest.logging import LogCaptureFixture
from _pytest.monkeypatch import MonkeyPatch
import haystack.utils.jupyter
from haystack import logging as haystack_logging
@pytest.fixture(autouse=True)
def reset_logging_config() -> None:
old_handlers = logging.root.handlers.copy()
yield
# Reset the logging configuration after each test to avoid impacting other tests
logging.root.handlers = old_handlers
class TestSkipLoggingConfiguration:
def test_skip_logging_configuration(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]
# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""
class TestStructuredLoggingConsoleRendering:
def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert output == ""
def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_if_no_tty_but_python_config(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
haystack_logging.configure_logging(use_json=False)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_if_in_ipython(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(builtins, "__IPYTHON__", "true", raising=False)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_in_jupyter(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setattr(haystack.utils.jupyter, haystack.utils.jupyter.is_in_jupyter.__name__, lambda: True)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log_even_if_no_tty_but_forced_through_env(
self, capfd: CaptureFixture, monkeypatch: MonkeyPatch
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "false")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "Hello, structured logging!" in output
assert "{" not in output, "Seems JSON rendering is enabled when it should not be"
def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
# Only check for the minute to be a bit more robust
today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
assert today in output
log_level = "warning"
assert log_level in output
assert "Hello, structured logging!" in output
assert "key1" in output
assert "value1" in output
def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
def function_that_raises_and_adds_to_stack_trace():
raise ValueError("This is an error")
try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened")
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert "An error happened" in output
class TestStructuredLoggingJSONRendering:
def test_logging_as_json_if_not_atty(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(sys.stderr, "isatty", lambda: False)
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_as_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
def function_that_raises_and_adds_to_stack_trace():
my_local_variable = "my_local_variable" # noqa: F841
raise ValueError("This is an error")
try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened ")
# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "An error happened ",
"level": "error",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
"exception": [
{
"exc_type": "ValueError",
"exc_value": "This is an error",
"syntax_error": None,
"is_cause": False,
"frames": [
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "test_logging_exceptions_json",
"line": "",
"locals": None,
},
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "function_that_raises_and_adds_to_stack_trace",
"line": "",
"locals": None,
},
],
}
],
}
class TestLogTraceCorrelation:
def test_trace_log_correlation_python_logs_with_console_rendering(
self, spying_tracer: SpyingTracer, capfd: CaptureFixture
) -> None:
haystack_logging.configure_logging(use_json=False)
with spying_tracer.trace("test-operation"):
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
assert "trace_id" not in output
def test_trace_log_correlation_python_logs(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
with spying_tracer.trace("test-operation") as span:
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"trace_id": span.trace_id,
"span_id": span.span_id,
"lineno": ANY,
"module": "test.test_logging",
}
def test_trace_log_correlation_no_span(self, spying_tracer: SpyingTracer, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_trace_log_correlation_no_tracer(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
class TestCompositeLogger:
def test_correct_stack_level_with_stdlib_rendering(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
haystack_logging.configure_logging()
logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]
assert caplog.records[0].name == "test.test_logging"
# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""
def test_correct_stack_level_with_consoler_rendering(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=False)
logger = haystack_logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
output = capfd.readouterr().err
assert "test.test_logging" in output
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "error"),
("critical", "critical"),
],
)
def test_various_levels(self, capfd: LogCaptureFixture, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
getattr(logger, method)("Hello, structured {key}!", key="logging", key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error
assert parsed_output == {
"event": "Hello, structured logging!",
"key": "logging",
"key1": "value1",
"key2": "value2",
"level": expected_level,
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_log(self, capfd: LogCaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG, "Hello, structured '{key}'!", key="logging", key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured 'logging'!",
"key": "logging",
"key1": "value1",
"key2": "value2",
"level": "debug",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
def test_log_with_string_cast(self, capfd: LogCaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.log(logging.DEBUG, "Hello, structured '{key}'!", key=LogCaptureFixture, key1="value1", key2="value2")
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "Hello, structured '<class '_pytest.logging.LogCaptureFixture'>'!",
"key": "<class '_pytest.logging.LogCaptureFixture'>",
"key1": "value1",
"key2": "value2",
"level": "debug",
"timestamp": ANY,
"lineno": ANY,
"module": "test.test_logging",
}
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "exception"),
("critical", "critical"),
],
)
def test_haystack_logger_with_positional_args(self, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
with pytest.raises(TypeError):
getattr(logger, method)("Hello, structured logging %s!", "logging")
@pytest.mark.parametrize(
"method, expected_level",
[
("debug", "debug"),
("info", "info"),
("warning", "warning"),
("error", "error"),
("fatal", "critical"),
("exception", "exception"),
("critical", "critical"),
],
)
def test_haystack_logger_with_old_interpolation(self, method: str, expected_level: str) -> None:
haystack_logging.configure_logging(use_json=True)
logger = haystack_logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# does not raise - hence we need to check this separately
getattr(logger, method)("Hello, structured logging %s!", key="logging")
def test_that_haystack_logger_is_used(self) -> None:
"""Forces the usage of the Haystack logger instead of the standard library logger."""
allowed_list = [Path("haystack") / "logging.py"]
for root, dirs, files in os.walk("haystack"):
for file in files:
path = Path(root) / file
if not path.suffix.endswith(".py"):
continue
if path in allowed_list:
continue
content = path.read_text(encoding="utf-8")
# that looks like somebody is using our standard logger
if " logging.getLogger" in content:
haystack_logger_in_content = " haystack import logging" in content or ", logging" in content
assert haystack_logger_in_content, (
f"{path} doesn't use the Haystack logger. Please use the Haystack logger instead of the "
f"standard library logger and add plenty of keyword arguments."
)