mirror of
https://github.com/deepset-ai/haystack.git
synced 2025-08-26 09:26:25 +00:00
feat: implement support for structured logging (#7126)
* feat: implement support for structured logging * docs: add release notes * style: add explanatory comment * chore: test + import fixes * tests: fix windows tests
This commit is contained in:
parent
e0b692467b
commit
2a591280ab
@ -3,6 +3,11 @@ from haystack.core.errors import ComponentError, DeserializationError
|
||||
from haystack.core.pipeline import Pipeline
|
||||
from haystack.core.serialization import default_from_dict, default_to_dict
|
||||
from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer
|
||||
import haystack.logging
|
||||
|
||||
# Initialize the logging configuration
|
||||
# This is a no-op unless `structlog` is installed
|
||||
haystack.logging.configure_logging()
|
||||
|
||||
__all__ = [
|
||||
"component",
|
||||
|
97
haystack/logging.py
Normal file
97
haystack/logging.py
Normal file
@ -0,0 +1,97 @@
|
||||
import logging
|
||||
import os
|
||||
import typing
|
||||
from typing import List
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
from structlog.typing import Processor
|
||||
|
||||
HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON"
|
||||
HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG"
|
||||
|
||||
|
||||
def configure_logging(use_json: bool = False) -> None:
|
||||
"""Configure logging for Haystack.
|
||||
|
||||
- If `structlog` is not installed, we keep everything as it is. The user is responsible for configuring logging
|
||||
themselves.
|
||||
- If `structlog` is installed, we configure it to format log entries including its key-value data. To disable this
|
||||
behavior set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG` to `true`.
|
||||
- If `structlog` is installed, you can JSON format all logs. Enable this by
|
||||
- setting the `use_json` parameter to `True` when calling this function
|
||||
- setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true`
|
||||
"""
|
||||
try:
|
||||
import structlog
|
||||
from structlog.processors import ExceptionRenderer
|
||||
from structlog.tracebacks import ExceptionDictTransformer
|
||||
|
||||
except ImportError:
|
||||
# structlog is not installed - fall back to standard logging
|
||||
return
|
||||
|
||||
if os.getenv(HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR, "false").lower() == "true":
|
||||
# If the user wants to ignore structlog, we don't configure it and fall back to standard logging
|
||||
return
|
||||
|
||||
# We roughly follow the structlog documentation here:
|
||||
# https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging
|
||||
# This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`.
|
||||
|
||||
shared_processors: List[Processor] = [
|
||||
# Add the log level to the event_dict for structlog to use
|
||||
structlog.stdlib.add_log_level,
|
||||
# Adds the current timestamp in ISO format to logs
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
]
|
||||
|
||||
structlog.configure(
|
||||
processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
cache_logger_on_first_use=True,
|
||||
# This is a filter that will filter out log entries that are below the log level of the root logger.
|
||||
wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()),
|
||||
)
|
||||
|
||||
renderers: List[Processor]
|
||||
if os.getenv(HAYSTACK_LOGGING_USE_JSON_ENV_VAR, "false").lower() == "true" or use_json:
|
||||
renderers = [
|
||||
ExceptionRenderer(
|
||||
# don't show locals in production logs - this can be quite sensitive information
|
||||
ExceptionDictTransformer(show_locals=False)
|
||||
),
|
||||
structlog.processors.JSONRenderer(),
|
||||
]
|
||||
else:
|
||||
renderers = [structlog.dev.ConsoleRenderer()]
|
||||
|
||||
formatter = structlog.stdlib.ProcessorFormatter(
|
||||
# These run ONLY on `logging` entries that do NOT originate within
|
||||
# structlog.
|
||||
foreign_pre_chain=shared_processors
|
||||
+ [
|
||||
# Add the information from the `logging` `extras` to the event dictionary
|
||||
structlog.stdlib.ExtraAdder()
|
||||
],
|
||||
# These run on ALL entries after the pre_chain is done.
|
||||
processors=[
|
||||
# Remove _record & _from_structlog. to avoid that this metadata is added to the final log record
|
||||
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
||||
*renderers,
|
||||
],
|
||||
)
|
||||
|
||||
handler = logging.StreamHandler()
|
||||
handler.name = "HaystackLoggingHandler"
|
||||
# Use OUR `ProcessorFormatter` to format all `logging` entries.
|
||||
handler.setFormatter(formatter)
|
||||
|
||||
root_logger = logging.getLogger()
|
||||
# avoid adding our handler twice
|
||||
old_handlers = [
|
||||
h
|
||||
for h in root_logger.handlers
|
||||
if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler")
|
||||
]
|
||||
new_handlers = [handler, *old_handlers]
|
||||
root_logger.handlers = new_handlers
|
@ -122,6 +122,9 @@ extra-dependencies = [
|
||||
# Tracing
|
||||
"opentelemetry-sdk",
|
||||
"ddtrace",
|
||||
|
||||
# Structured logging
|
||||
"structlog",
|
||||
]
|
||||
|
||||
[tool.hatch.envs.test.scripts]
|
||||
|
18
releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
Normal file
18
releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
Normal file
@ -0,0 +1,18 @@
|
||||
---
|
||||
features:
|
||||
- |
|
||||
Haystack now supports structured logging out-of-the box.
|
||||
Logging can be separated into 3 categories:
|
||||
- If [`structlog`](https://www.structlog.org/en/stable/) is not installed, Haystack will
|
||||
use the standard Python logging library with whatever configuration is present.
|
||||
- If `structlog` is installed, Haystack will log through [`structlog`](https://www.structlog.org/en/stable/) using
|
||||
structlog's console renderer.
|
||||
To disable structlog, set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR` to `true`.
|
||||
- To log in JSON, install [`structlog`](https://www.structlog.org/en/stable/) and
|
||||
- set the environment variable `HAYSTACK_LOGGING_JSON` to `true` or
|
||||
- enable JSON logging from Python
|
||||
```python
|
||||
import haystack.logging
|
||||
|
||||
haystack.logging.configure_logging(use_json=True)
|
||||
```
|
206
test/test_logging.py
Normal file
206
test/test_logging.py
Normal file
@ -0,0 +1,206 @@
|
||||
import builtins
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import ANY, Mock
|
||||
|
||||
import pytest
|
||||
from _pytest.capture import CaptureFixture
|
||||
from _pytest.logging import LogCaptureFixture
|
||||
from _pytest.monkeypatch import MonkeyPatch
|
||||
|
||||
from haystack import logging as haystack_logging
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_logging_config() -> None:
|
||||
old_handlers = logging.root.handlers.copy()
|
||||
yield
|
||||
# Reset the logging configuration after each test to avoid impacting other tests
|
||||
logging.root.handlers = old_handlers
|
||||
|
||||
|
||||
class TestSkipLoggingConfiguration:
|
||||
def test_skip_logging_configuration(
|
||||
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
|
||||
) -> None:
|
||||
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
|
||||
assert caplog.messages == ["Hello, structured logging!"]
|
||||
|
||||
# Nothing should be captured by capfd since structlog is not configured
|
||||
assert capfd.readouterr().err == ""
|
||||
|
||||
def test_skip_logging_if_structlog_not_installed(
|
||||
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
|
||||
) -> None:
|
||||
monkeypatch.delitem(sys.modules, "structlog", raising=False)
|
||||
monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError))
|
||||
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
|
||||
assert caplog.messages == ["Hello, structured logging!"]
|
||||
|
||||
# Nothing should be captured by capfd since structlog is not configured
|
||||
assert capfd.readouterr().err == ""
|
||||
|
||||
|
||||
class TestStructuredLoggingConsoleRendering:
|
||||
def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
assert output == ""
|
||||
|
||||
def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.DEBUG)
|
||||
|
||||
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
assert output != ""
|
||||
|
||||
def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
|
||||
# Only check for the minute to be a bit more robust
|
||||
today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
|
||||
assert today in output
|
||||
|
||||
log_level = "warning"
|
||||
assert log_level in output
|
||||
|
||||
assert "Hello, structured logging!" in output
|
||||
|
||||
assert "key1" in output
|
||||
assert "value1" in output
|
||||
|
||||
def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def function_that_raises_and_adds_to_stack_trace():
|
||||
raise ValueError("This is an error")
|
||||
|
||||
try:
|
||||
function_that_raises_and_adds_to_stack_trace()
|
||||
except ValueError:
|
||||
logger.exception("An error happened")
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
|
||||
assert "An error happened" in output
|
||||
|
||||
|
||||
class TestStructuredLoggingJSONRendering:
|
||||
def test_logging_as_json(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging(use_json=True)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
parsed_output = json.loads(output) # should not raise an error
|
||||
|
||||
assert parsed_output == {
|
||||
"event": "Hello, structured logging!",
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
"level": "warning",
|
||||
"timestamp": ANY,
|
||||
}
|
||||
|
||||
def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
|
||||
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
|
||||
haystack_logging.configure_logging()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
parsed_output = json.loads(output) # should not raise an error
|
||||
|
||||
assert parsed_output == {
|
||||
"event": "Hello, structured logging!",
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
"level": "warning",
|
||||
"timestamp": ANY,
|
||||
}
|
||||
|
||||
def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
|
||||
haystack_logging.configure_logging(use_json=True)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def function_that_raises_and_adds_to_stack_trace():
|
||||
my_local_variable = "my_local_variable" # noqa: F841
|
||||
raise ValueError("This is an error")
|
||||
|
||||
try:
|
||||
function_that_raises_and_adds_to_stack_trace()
|
||||
except ValueError:
|
||||
logger.exception("An error happened ")
|
||||
|
||||
# Use `capfd` to capture the output of the final structlog rendering result
|
||||
output = capfd.readouterr().err
|
||||
parsed_output = json.loads(output)
|
||||
assert parsed_output == {
|
||||
"event": "An error happened ",
|
||||
"level": "error",
|
||||
"timestamp": ANY,
|
||||
"exception": [
|
||||
{
|
||||
"exc_type": "ValueError",
|
||||
"exc_value": "This is an error",
|
||||
"syntax_error": None,
|
||||
"is_cause": False,
|
||||
"frames": [
|
||||
{
|
||||
"filename": str(Path.cwd() / "test" / "test_logging.py"),
|
||||
"lineno": ANY, # otherwise the test breaks if you add a line :-)
|
||||
"name": "test_logging_exceptions_json",
|
||||
"line": "",
|
||||
"locals": None,
|
||||
},
|
||||
{
|
||||
"filename": str(Path.cwd() / "test" / "test_logging.py"),
|
||||
"lineno": ANY, # otherwise the test breaks if you add a line :-)
|
||||
"name": "function_that_raises_and_adds_to_stack_trace",
|
||||
"line": "",
|
||||
"locals": None,
|
||||
},
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
@ -17,8 +17,6 @@ def datadog_tracer(monkeypatch: MonkeyPatch) -> ddtrace.Tracer:
|
||||
|
||||
tracer = ddtrace.Tracer()
|
||||
|
||||
# monkeypatch.setattr(ddtrace, "tracer", tracer)
|
||||
|
||||
return tracer
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user