feat: datadog tracer (#7058)

* feat: implement datadog tracer

* feat: autoenable for ddtrace

* docs: add release notes

* ci: add missing test dependency

* chore: use forward references
This commit is contained in:
Tobias Wochinger 2024-02-23 09:18:32 +01:00 committed by GitHub
parent 08e97d874e
commit bc8a48cc3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 189 additions and 1 deletions

View File

@ -0,0 +1,43 @@
import contextlib
from typing import Optional, Dict, Any, Iterator
from haystack.lazy_imports import LazyImport
from haystack.tracing import Tracer, Span
from haystack.tracing import utils as tracing_utils
with LazyImport("Run 'pip install ddtrace'") as ddtrace_import:
import ddtrace
class DatadogSpan(Span):
def __init__(self, span: "ddtrace.Span") -> None:
self._span = span
def set_tag(self, key: str, value: Any) -> None:
coerced_value = tracing_utils.coerce_tag_value(value)
self._span.set_tag(key, coerced_value)
def raw_span(self) -> Any:
return self._span
class DatadogTracer(Tracer):
def __init__(self, tracer: "ddtrace.Tracer") -> None:
ddtrace_import.check()
self._tracer = tracer
@contextlib.contextmanager
def trace(self, operation_name: str, tags: Optional[Dict[str, Any]] = None) -> Iterator[Span]:
with self._tracer.trace(operation_name) as span:
span = DatadogSpan(span)
if tags:
span.set_tags(tags)
yield span
def current_span(self) -> Optional[Span]:
current_span = self._tracer.current_span()
if current_span is None:
return None
return DatadogSpan(current_span)

View File

@ -137,7 +137,7 @@ def auto_enable_tracing() -> None:
if is_tracing_enabled():
return # tracing already enabled
tracer = _auto_configured_opentelemetry_tracer()
tracer = _auto_configured_opentelemetry_tracer() or _auto_configured_datadog_tracer()
if tracer:
enable_tracing(tracer)
logger.info("Tracing enabled via '%s'", tracer.__class__.__name__)
@ -165,4 +165,18 @@ def _auto_configured_opentelemetry_tracer() -> Optional[Tracer]:
return None
def _auto_configured_datadog_tracer() -> Optional[Tracer]:
# we implement this here and not in the `datadog` module to avoid import warnings when Datadog is not installed
try:
from ddtrace import tracer
from haystack.tracing.datadog import DatadogTracer
if tracer.enabled:
return DatadogTracer(tracer=tracer)
except ImportError:
pass
return None
auto_enable_tracing()

View File

@ -0,0 +1,21 @@
---
features:
- |
Added out-of-the-box support for the Datadog Tracer. This allows you to instrument pipeline and component
runs using Datadog and send traces to your preferred backend.
To use the Datadog Tracer you need to have the `ddtrace` package installed in your environment.
To instruct Haystack to use the Datadog tracer, you have multiple options:
* Run your Haystack application using the `ddtrace` command line tool as described in the
the [ddtrace documentation](https://ddtrace.readthedocs.io/en/stable/installation_quickstart.html#tracing).
This behavior can be disabled by setting the `HAYSTACK_AUTO_TRACE_ENABLED_ENV_VAR` environment variable to `false`.
* Configure the tracer manually in your code using the `ddtrace` package:
```python
from haystack.tracing import DatadogTracer
import haystack.tracing
import ddtrace
tracer = ddtrace.tracer
tracing.enable_tracing(DatadogTracer(tracer))
```

View File

@ -27,3 +27,4 @@ jsonschema
# Tracing
opentelemetry-sdk
ddtrace

View File

@ -0,0 +1,87 @@
import functools
import json
from typing import List, Dict
import ddtrace
import pytest
from _pytest.capture import CaptureFixture
from _pytest.monkeypatch import MonkeyPatch
from haystack.tracing.datadog import DatadogTracer
@pytest.fixture()
def datadog_tracer(monkeypatch: MonkeyPatch) -> ddtrace.Tracer:
# For the purpose of the tests we want to use the log writer
monkeypatch.setattr(ddtrace.Tracer, ddtrace.Tracer._use_log_writer.__name__, lambda *_: True)
tracer = ddtrace.Tracer()
# monkeypatch.setattr(ddtrace, "tracer", tracer)
return tracer
def get_traces_from_console(capfd: CaptureFixture) -> List[Dict]:
output = capfd.readouterr().out
parsed = json.loads(output)
nested_traces = parsed["traces"]
flattened = list(functools.reduce(lambda x, y: x + y, nested_traces, []))
return flattened
class TestDatadogTracer:
def test_opentelemetry_tracer(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None:
tracer = DatadogTracer(datadog_tracer)
with tracer.trace("test") as span:
span.set_tag("key", "value")
traces = get_traces_from_console(capfd)
assert len(traces) == 1
trace = traces[0]
assert trace["name"] == "test"
def test_tagging(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None:
tracer = DatadogTracer(datadog_tracer)
with tracer.trace("test", tags={"key1": "value1"}) as span:
span.set_tag("key2", "value2")
spans = get_traces_from_console(capfd)
assert len(spans) == 1
assert spans[0]["meta"]["key1"] == "value1"
assert spans[0]["meta"]["key2"] == "value2"
def test_current_span(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None:
tracer = DatadogTracer(datadog_tracer)
with tracer.trace("test"):
current_span = tracer.current_span()
assert tracer.current_span() is not None
current_span.set_tag("key1", "value1")
raw_span = current_span.raw_span()
assert raw_span is not None
assert isinstance(raw_span, ddtrace.Span)
raw_span.set_tag("key2", "value2")
spans = get_traces_from_console(capfd)
assert len(spans) == 1
assert spans[0]["meta"]["key1"] == "value1"
assert spans[0]["meta"]["key2"] == "value2"
def test_tracing_complex_values(self, datadog_tracer: ddtrace.Tracer, capfd: CaptureFixture) -> None:
tracer = DatadogTracer(datadog_tracer)
with tracer.trace("test") as span:
span.set_tag("key", {"a": 1, "b": [2, 3, 4]})
spans = get_traces_from_console(capfd)
assert len(spans) == 1
assert spans[0]["meta"]["key"] == '{"a": 1, "b": [2, 3, 4]}'

View File

@ -2,6 +2,7 @@ import builtins
import sys
from unittest.mock import Mock
import ddtrace
import opentelemetry.trace
import pytest
from _pytest.monkeypatch import MonkeyPatch
@ -10,6 +11,7 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter
from haystack.tracing.datadog import DatadogTracer
from haystack.tracing.opentelemetry import OpenTelemetryTracer
from haystack.tracing.tracer import (
NullTracer,
@ -93,6 +95,10 @@ class TestAutoEnableTracer:
opentelemetry.trace._TRACER_PROVIDER = None
disable_tracing()
@pytest.fixture()
def uninstalled_ddtrace_package(self, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setattr(ddtrace.tracer, "enabled", False)
def test_skip_auto_enable_tracer_if_already_configured(self) -> None:
my_tracker = Mock(spec=Tracer) # anything else than `NullTracer` works for this test
enable_tracing(my_tracker)
@ -127,3 +133,19 @@ class TestAutoEnableTracer:
activated_tracer = tracer.actual_tracer
assert isinstance(activated_tracer, NullTracer)
assert not is_tracing_enabled()
def test_skip_add_datadog_tracer_if_import_error(self, monkeypatch: MonkeyPatch) -> None:
monkeypatch.delitem(sys.modules, "ddtrace", raising=False)
monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError))
auto_enable_tracing()
activated_tracer = tracer.actual_tracer
assert isinstance(activated_tracer, NullTracer)
assert not is_tracing_enabled()
def test_add_datadog_tracer(self) -> None:
auto_enable_tracing()
activated_tracer = tracer.actual_tracer
assert isinstance(activated_tracer, DatadogTracer)
assert is_tracing_enabled()