haystack/test/conftest.py
David S. Batista 3b9b1ae802
feat: adding debugging breakpoints to Pipeline and Agent (#9611)
* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* wip: fixing tests

* fixing circular imports

* decoupling resume and initial run() for agent

* adding release notes

* re-raising BreakPointException from pipeline.run()

* fixing imports

* refactor: Refactor suggestions for Pipeline breakpoints (#9614)

* Refactoring

* Start adding debug_path into Breakpoint class

* Fully move debug_path into Breakpoint dataclass

* Simplifications in pipeline run logic

* More simplification

* lint

* More simplification

* Updates

* Rename resume_state to pipeline_snapshot

* PR comments

* Missed renaming of state in a few more places

* feat: Add dataclasses to represent a `PipelineSnapshot` and refactored to use it (#9619)

* Refactor to use dataclasses for PipelineSnapshot and AgentSnapshot

* Fix integration tests

* Mypy

* Fix mypy

* Fix lint

* Refactor AgentSnapshot to only contain needed info

* Fix mypy

* More refactoring

* removing unused import

---------

Co-authored-by: David S. Batista <dsbatista@gmail.com>

* feat: saving include_outputs_from intermediate results to `PipelineState` object (#9629)

* saving intermediate components results in include_outputs_from into the PipelineSnaptshot

* cleaning up

* fixing tests

* fixing tests

* extending tests

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* moving intermediate results to pipeline state and adding pipeline outputs to state

* moving ordered_component_names and include_outputs_from to PipelineSnapshot

* moving original_input_data to PipelineSnapshot

* simplifying saving the intermediate results

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/dataclasses/breakpoints.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* linting

* cleaning up

* avoiding creating PipelineSnapshot for every component run

* removing unecessary code

* Update checks in Agent to not unecessarily create AgentSnapshot when not needed.

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* Update haystack/components/agents/agent.py

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>

* cleaning up tests

* linting

---------

Co-authored-by: Sebastian Husch Lee <10526848+sjrl@users.noreply.github.com>
Co-authored-by: Sebastian Husch Lee <sjrl423@gmail.com>
2025-07-24 08:54:23 +00:00

119 lines
3.6 KiB
Python

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0
import asyncio
import time
from pathlib import Path
from typing import Dict, Generator
from unittest.mock import Mock
import pytest
from haystack import component, tracing
from haystack.core.pipeline.breakpoint import load_pipeline_snapshot
from haystack.testing.test_utils import set_all_seeds
from test.tracing.utils import SpyingTracer
set_all_seeds(0)
# Tracing is disable by default to avoid failures in CI
tracing.disable_tracing()
@pytest.fixture()
def waiting_component():
@component
class Waiter:
@component.output_types(waited_for=int)
def run(self, wait_for: int) -> Dict[str, int]:
time.sleep(wait_for)
return {"waited_for": wait_for}
@component.output_types(waited_for=int)
async def run_async(self, wait_for: int) -> Dict[str, int]:
await asyncio.sleep(wait_for)
return {"waited_for": wait_for}
return Waiter
@pytest.fixture()
def mock_tokenizer():
"""
Tokenizes the string by splitting on spaces.
"""
tokenizer = Mock()
tokenizer.encode = lambda text: text.split()
tokenizer.decode = lambda tokens: " ".join(tokens)
return tokenizer
@pytest.fixture()
def test_files_path():
return Path(__file__).parent / "test_files"
@pytest.fixture(autouse=True)
def request_blocker(request: pytest.FixtureRequest, monkeypatch):
"""
This fixture is applied automatically to all tests.
Those that are not marked as integration will have the requests module
monkeypatched to avoid making HTTP requests by mistake.
"""
marker = request.node.get_closest_marker("integration")
if marker is not None:
return
def urlopen_mock(self, method, url, *args, **kwargs):
raise RuntimeError(f"The test was about to {method} {self.scheme}://{self.host}{url}")
monkeypatch.setattr("urllib3.connectionpool.HTTPConnectionPool.urlopen", urlopen_mock)
@pytest.fixture()
def spying_tracer() -> Generator[SpyingTracer, None, None]:
tracer = SpyingTracer()
tracing.enable_tracing(tracer)
tracer.is_content_tracing_enabled = True
yield tracer
# Make sure to disable tracing after the test to avoid affecting other tests
tracing.disable_tracing()
def load_and_resume_pipeline_snapshot(pipeline, output_directory: Path, component_name: str, data: Dict = None) -> Dict:
"""
Utility function to load and resume pipeline snapshot from a breakpoint file.
:param pipeline: The pipeline instance to resume
:param output_directory: Directory containing the breakpoint files
:param component_name: Component name to look for in breakpoint files
:param data: Data to pass to the pipeline run (defaults to empty dict)
:returns:
Dict containing the pipeline run results
:raises:
ValueError: If no breakpoint file is found for the given component
"""
data = data or {}
all_files = list(output_directory.glob("*"))
file_found = False
for full_path in all_files:
f_name = Path(full_path).name
if str(f_name).startswith(component_name):
pipeline_snapshot = load_pipeline_snapshot(full_path)
return pipeline.run(data=data, pipeline_snapshot=pipeline_snapshot)
if not file_found:
msg = f"No files found for {component_name} in {output_directory}."
raise ValueError(msg)
@pytest.fixture()
def base64_image_string():
return "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mP8/x8AAwMCAO+ip1sAAAAASUVORK5CYII="