diff --git a/haystack/tracing/datadog.py b/haystack/tracing/datadog.py new file mode 100644 index 000000000..10d796a07 --- /dev/null +++ b/haystack/tracing/datadog.py @@ -0,0 +1,43 @@ +import contextlib +from typing import Optional, Dict, Any, Iterator + +from haystack.lazy_imports import LazyImport +from haystack.tracing import Tracer, Span +from haystack.tracing import utils as tracing_utils + +with LazyImport("Run 'pip install ddtrace'") as ddtrace_import: + import ddtrace + + +class DatadogSpan(Span): + def __init__(self, span: "ddtrace.Span") -> None: + self._span = span + + def set_tag(self, key: str, value: Any) -> None: + coerced_value = tracing_utils.coerce_tag_value(value) + self._span.set_tag(key, coerced_value) + + def raw_span(self) -> Any: + return self._span + + +class DatadogTracer(Tracer): + def __init__(self, tracer: "ddtrace.Tracer") -> None: + ddtrace_import.check() + self._tracer = tracer + + @contextlib.contextmanager + def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]: + with self._tracer.trace(operation_name) as span: + span = DatadogSpan(span) + if tags: + span.set_tags(tags) + + yield span + + def current_span(self) -> Optional[Span]: + current_span = self._tracer.current_span() + if current_span is None: + return None + + return DatadogSpan(current_span) diff --git a/haystack/tracing/tracer.py b/haystack/tracing/tracer.py index 64878afc0..8e320ed3b 100644 --- a/haystack/tracing/tracer.py +++ b/haystack/tracing/tracer.py @@ -137,7 +137,7 @@ def auto_enable_tracing() -> None: if is_tracing_enabled(): return # tracing already enabled - tracer = _auto_configured_opentelemetry_tracer() + tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer() if tracer: enable_tracing(tracer) logger.info("Tracing enabled via '%s'", tracer.__class__.__name__) @@ -165,4 +165,18 @@ def _auto_configured_opentelemetry_tracer() -> Optional[Tracer]: return None +def _auto_configured_datadog_tracer() -> Optional[Tracer]: + # we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed + try: + from ddtrace import tracer + from haystack.tracing.datadog import DatadogTracer + + if tracer.enabled: + return DatadogTracer(tracer=tracer) + except ImportError: + pass + + return None + + auto_enable_tracing() diff --git a/releasenotes/notes/datadog-tracer-b084cf64fcc575c6.yaml b/releasenotes/notes/datadog-tracer-b084cf64fcc575c6.yaml new file mode 100644 index 000000000..58d24fe53 --- /dev/null +++ b/releasenotes/notes/datadog-tracer-b084cf64fcc575c6.yaml @@ -0,0 +1,21 @@ +--- +features: + - | + Added out-of-the-box support for the Datadog Tracer. This allows you to instrument pipeline and component + runs using Datadog and send traces to your preferred backend. + + To use the Datadog Tracer you need to have the `ddtrace` package installed in your environment. + To instruct Haystack to use the Datadog tracer, you have multiple options: + + * Run your Haystack application using the `ddtrace` command line tool as described in the + the [ddtrace documentation](https://ddtrace.readthedocs.io/en/stable/installation_quickstart.html#tracing). + This behavior can be disabled by setting the `HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR` environment variable to `false`. + * Configure the tracer manually in your code using the `ddtrace` package: + ```python + from haystack.tracing import DatadogTracer + import haystack.tracing + import ddtrace + + tracer = ddtrace.tracer + tracing.enable_tracing(DatadogTracer(tracer)) + ``` diff --git a/test/test_requirements.txt b/test/test_requirements.txt index 33576782c..2e1626729 100644 --- a/test/test_requirements.txt +++ b/test/test_requirements.txt @@ -27,3 +27,4 @@ jsonschema # Tracing opentelemetry-sdk +ddtrace diff --git a/test/tracing/test_datadog.py b/test/tracing/test_datadog.py new file mode 100644 index 000000000..d7e647ae0 --- /dev/null +++ b/test/tracing/test_datadog.py @@ -0,0 +1,87 @@ +import functools +import json +from typing import List, Dict + +import ddtrace +import pytest +from _pytest.capture import CaptureFixture +from _pytest.monkeypatch import MonkeyPatch + +from haystack.tracing.datadog import DatadogTracer + + +@pytest.fixture() +def datadog_tracer(monkeypatch: MonkeyPatch) -> ddtrace.Tracer: + # For the purpose of the tests we want to use the log writer + monkeypatch.setattr(ddtrace.Tracer, ddtrace.Tracer._use_log_writer.__name__, lambda *_: True) + + tracer = ddtrace.Tracer() + + # monkeypatch.setattr(ddtrace, "tracer", tracer) + + return tracer + + +def get_traces_from_console(capfd: CaptureFixture) -> List[Dict]: + output = capfd.readouterr().out + parsed = json.loads(output) + nested_traces = parsed["traces"] + flattened = list(functools.reduce(lambda x, y: x + y, nested_traces, [])) + + return flattened + + +class TestDatadogTracer: + def test_opentelemetry_tracer(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None: + tracer = DatadogTracer(datadog_tracer) + + with tracer.trace("test") as span: + span.set_tag("key", "value") + + traces = get_traces_from_console(capfd) + assert len(traces) == 1 + + trace = traces[0] + + assert trace["name"] == "test" + + def test_tagging(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None: + tracer = DatadogTracer(datadog_tracer) + + with tracer.trace("test", tags={"key1": "value1"}) as span: + span.set_tag("key2", "value2") + + spans = get_traces_from_console(capfd) + assert len(spans) == 1 + assert spans[0]["meta"]["key1"] == "value1" + assert spans[0]["meta"]["key2"] == "value2" + + def test_current_span(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None: + tracer = DatadogTracer(datadog_tracer) + + with tracer.trace("test"): + current_span = tracer.current_span() + assert tracer.current_span() is not None + + current_span.set_tag("key1", "value1") + + raw_span = current_span.raw_span() + assert raw_span is not None + assert isinstance(raw_span, ddtrace.Span) + + raw_span.set_tag("key2", "value2") + + spans = get_traces_from_console(capfd) + assert len(spans) == 1 + assert spans[0]["meta"]["key1"] == "value1" + assert spans[0]["meta"]["key2"] == "value2" + + def test_tracing_complex_values(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None: + tracer = DatadogTracer(datadog_tracer) + + with tracer.trace("test") as span: + span.set_tag("key", {"a": 1, "b": [2, 3, 4]}) + + spans = get_traces_from_console(capfd) + assert len(spans) == 1 + assert spans[0]["meta"]["key"] == '{"a": 1, "b": [2, 3, 4]}' diff --git a/test/tracing/test_tracer.py b/test/tracing/test_tracer.py index 93b317546..3d82acde5 100644 --- a/test/tracing/test_tracer.py +++ b/test/tracing/test_tracer.py @@ -2,6 +2,7 @@ import builtins import sys from unittest.mock import Mock +import ddtrace import opentelemetry.trace import pytest from _pytest.monkeypatch import MonkeyPatch @@ -10,6 +11,7 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter +from haystack.tracing.datadog import DatadogTracer from haystack.tracing.opentelemetry import OpenTelemetryTracer from haystack.tracing.tracer import ( NullTracer, @@ -93,6 +95,10 @@ class TestAutoEnableTracer: opentelemetry.trace._TRACER_PROVIDER = None disable_tracing() + @pytest.fixture() + def uninstalled_ddtrace_package(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(ddtrace.tracer, "enabled", False) + def test_skip_auto_enable_tracer_if_already_configured(self) -> None: my_tracker = Mock(spec=Tracer) # anything else than `NullTracer` works for this test enable_tracing(my_tracker) @@ -127,3 +133,19 @@ class TestAutoEnableTracer: activated_tracer = tracer.actual_tracer assert isinstance(activated_tracer, NullTracer) assert not is_tracing_enabled() + + def test_skip_add_datadog_tracer_if_import_error(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.delitem(sys.modules, "ddtrace", raising=False) + monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError)) + auto_enable_tracing() + + activated_tracer = tracer.actual_tracer + assert isinstance(activated_tracer, NullTracer) + assert not is_tracing_enabled() + + def test_add_datadog_tracer(self) -> None: + auto_enable_tracing() + + activated_tracer = tracer.actual_tracer + assert isinstance(activated_tracer, DatadogTracer) + assert is_tracing_enabled()