haystack/test/test_telemetry.py
mathislucka e5b9bdeb66
feat: AsyncPipeline that can schedule components to run concurrently (#8812)
* add component checks

* pipeline should run deterministically

* add FIFOQueue

* add agent tests

* add order dependent tests

* run new tests

* remove code that is not needed

* test: intermediate from cycle outputs are available outside cycle

* add tests for component checks (Claude)

* adapt tests for component checks (o1 review)

* chore: format

* remove tests that aren't needed anymore

* add _calculate_priority tests

* revert accidental change in pyproject.toml

* test format conversion

* adapt to naming convention

* chore: proper docstrings and type hints for PQ

* format

* add more unit tests

* rm unneeded comments

* test input consumption

* lint

* fix: docstrings

* lint

* format

* format

* fix license header

* fix license header

* add component run tests

* fix: pass correct input format to tracing

* fix types

* format

* format

* types

* add defaults from Socket instead of signature

- otherwise components with dynamic inputs would fail

* fix test names

* still wait for optional inputs on greedy variadic sockets

- mirrors previous behavior

* fix format

* wip: warn for ambiguous running order

* wip: alternative warning

* fix license header

* make code more readable

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Introduce content tracing to a behavioral test

* Fixing linting

* Remove debug print statements

* Fix tracer tests

* remove print

* test: test for component inputs

* test: remove testing for run order

* chore: update component checks from experimental

* chore: update pipeline and base from experimental

* refactor: remove unused method

* refactor: remove unused method

* refactor: outdated comment

* refactor: inputs state is updated as side effect

- to prepare for AsyncPipeline implementation

* format

* test: add file conversion test

* format

* fix: original implementation deepcopies outputs

* lint

* fix: from_dict was updated

* fix: format

* fix: test

* test: add test for thread safety

* remove unused imports

* format

* test: FIFOPriorityQueue

* chore: add release note

* feat: add AsyncPipeline

* chore: Add release notes

* fix: format

* debug: switch run order to debug ubuntu and windows tests

* fix: consider priorities of other components while waiting for DEFER

* refactor: simplify code

* fix: resolve merge conflict with mermaid changes

* fix: format

* fix: remove unused import

* refactor: rename to avoid accidental conflicts

* fix: track pipeline type

* fix: and extend test

* fix: format

* style: sort alphabetically

* Update test/core/pipeline/features/conftest.py

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Update test/core/pipeline/features/conftest.py

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>

* Update releasenotes/notes/feat-async-pipeline-338856a142e1318c.yaml

* fix: indentation, do not close loop

* fix: use asyncio.run

* fix: format

---------

Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com>
Co-authored-by: David S. Batista <dsbatista@gmail.com>
2025-02-07 16:37:29 +01:00

116 lines
3.6 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import datetime
import logging
from unittest.mock import Mock, patch
import pytest
from haystack import AsyncPipeline, Pipeline, component
from haystack.core.serialization import generate_qualified_class_name
from haystack.telemetry._telemetry import pipeline_running
from haystack.utils.auth import Secret, TokenSecret
@pytest.mark.parametrize("pipeline_class", [Pipeline, AsyncPipeline])
@patch("haystack.telemetry._telemetry.telemetry")
def test_pipeline_running(telemetry, pipeline_class):
telemetry.send_event = Mock()
@component
class Component:
def _get_telemetry_data(self):
return {"key": "values"}
@component.output_types(value=int)
def run(self):
pass
pipe = pipeline_class()
pipe.add_component("component", Component())
pipeline_running(pipe)
expected_type = generate_qualified_class_name(type(pipe))
# First run is always sent
telemetry.send_event.assert_called_once_with(
"Pipeline run (2.x)",
{
"pipeline_id": str(id(pipe)),
"pipeline_type": expected_type,
"runs": 1,
"components": {"test.test_telemetry.Component": [{"name": "component", "key": "values"}]},
},
)
# Running again before one minute has passed should not send another event
telemetry.send_event.reset_mock()
pipeline_running(pipe)
telemetry.send_event.assert_not_called()
# Set the last telemetry sent time to pretend one minute has passed
pipe._last_telemetry_sent = pipe._last_telemetry_sent - datetime.timedelta(minutes=1)
telemetry.send_event.reset_mock()
pipeline_running(pipe)
telemetry.send_event.assert_called_once_with(
"Pipeline run (2.x)",
{
"pipeline_id": str(id(pipe)),
"pipeline_type": expected_type,
"runs": 3,
"components": {"test.test_telemetry.Component": [{"name": "component", "key": "values"}]},
},
)
@patch("haystack.telemetry._telemetry.telemetry")
def test_pipeline_running_with_non_serializable_component(telemetry):
telemetry.send_event = Mock()
@component
class Component:
def __init__(self, api_key: Secret = TokenSecret("api_key")):
self.api_key = api_key
def _get_telemetry_data(self):
return {"key": "values"}
@component.output_types(value=int)
def run(self):
pass
pipe = Pipeline()
pipe.add_component("component", Component())
pipeline_running(pipe)
telemetry.send_event.assert_called_once_with(
"Pipeline run (2.x)",
{
"pipeline_id": str(id(pipe)),
"pipeline_type": "haystack.core.pipeline.pipeline.Pipeline",
"runs": 1,
"components": {"test.test_telemetry.Component": [{"name": "component", "key": "values"}]},
},
)
def test_pipeline_running_with_non_dict_telemetry_data(caplog):
@component
class Component:
def __init__(self, api_key: Secret = TokenSecret("api_key")):
self.api_key = api_key
# telemetry data should be a dictionary but is a list
def _get_telemetry_data(self):
return ["values"]
@component.output_types(value=int)
def run(self):
pass
pipe = Pipeline()
pipe.add_component("my_component", Component())
with caplog.at_level(logging.DEBUG):
pipeline_running(pipe)
assert "TypeError: Telemetry data for component my_component must be a dictionary" in caplog.text