haystack/test/core/pipeline/test_utils.py

# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
#
# SPDX-License-Identifier: Apache-2.0

import pytest

from haystack.core.pipeline.utils import parse_connect_string, FIFOPriorityQueue


def test_parse_connection():
    assert parse_connect_string("foobar") == ("foobar", None)
    assert parse_connect_string("foo.bar") == ("foo", "bar")


@pytest.fixture
def empty_queue():
    """Fixture providing a fresh empty queue for each test."""
    return FIFOPriorityQueue()


def test_empty_queue_initialization(empty_queue):
    """Test that a new queue is empty."""
    assert len(empty_queue) == 0
    assert not bool(empty_queue)


def test_push_single_item(empty_queue):
    """Test pushing a single item."""
    empty_queue.push("item1", 1)
    assert len(empty_queue) == 1
    assert bool(empty_queue)
    assert empty_queue.peek() == (1, "item1")


def test_push_multiple_items_different_priorities(empty_queue):
    """Test pushing multiple items with different priorities."""
    items = [("item3", 3), ("item1", 1), ("item2", 2)]
    for item, priority in items:
        empty_queue.push(item, priority)

    # Items should come out in priority order
    assert empty_queue.pop() == (1, "item1")
    assert empty_queue.pop() == (2, "item2")
    assert empty_queue.pop() == (3, "item3")


def test_push_multiple_items_same_priority(empty_queue):
    """Test FIFO behavior for items with equal priority."""
    items = [("first", 1), ("second", 1), ("third", 1)]
    for item, priority in items:
        empty_queue.push(item, priority)

    # Items should come out in insertion order
    assert empty_queue.pop() == (1, "first")
    assert empty_queue.pop() == (1, "second")
    assert empty_queue.pop() == (1, "third")


def test_mixed_priority_and_fifo(empty_queue):
    """Test mixed priority levels with some equal priorities."""
    empty_queue.push("medium1", 2)
    empty_queue.push("high", 1)
    empty_queue.push("medium2", 2)
    empty_queue.push("low", 3)

    # Check extraction order
    assert empty_queue.pop() == (1, "high")
    assert empty_queue.pop() == (2, "medium1")
    assert empty_queue.pop() == (2, "medium2")
    assert empty_queue.pop() == (3, "low")


def test_peek_behavior(empty_queue):
    """Test that peek returns items without removing them."""
    empty_queue.push("item1", 1)
    empty_queue.push("item2", 2)

    # Peek multiple times
    for _ in range(3):
        assert empty_queue.peek() == (1, "item1")
        assert len(empty_queue) == 2


def test_get_behavior(empty_queue):
    """Test the get method with both empty and non-empty queues."""
    # Test on empty queue
    assert empty_queue.get() is None

    # Test with items
    empty_queue.push("item1", 1)
    assert empty_queue.get() == (1, "item1")
    assert empty_queue.get() is None  # Queue should be empty again


def test_pop_empty_queue(empty_queue):
    """Test that pop raises IndexError on empty queue."""
    with pytest.raises(IndexError, match="pop from empty queue"):
        empty_queue.pop()


def test_peek_empty_queue(empty_queue):
    """Test that peek raises IndexError on empty queue."""
    with pytest.raises(IndexError, match="peek at empty queue"):
        empty_queue.peek()


def test_length_updates(empty_queue):
    """Test that length updates correctly with pushes and pops."""
    initial_len = len(empty_queue)
    assert initial_len == 0

    # Test length increases
    empty_queue.push("item1", 1)
    assert len(empty_queue) == 1
    empty_queue.push("item2", 2)
    assert len(empty_queue) == 2

    # Test length decreases
    empty_queue.pop()
    assert len(empty_queue) == 1
    empty_queue.pop()
    assert len(empty_queue) == 0


def test_bool_conversion(empty_queue):
    """Test boolean conversion in various states."""
    # Empty queue should be False
    assert not bool(empty_queue)

    # Queue with items should be True
    empty_queue.push("item", 1)
    assert bool(empty_queue)

    # Queue should be False again after removing item
    empty_queue.pop()
    assert not bool(empty_queue)


def test_large_number_of_items(empty_queue):
    """Test handling of a large number of items with mixed priorities."""
    # Add 1000 items with 10 different priority levels
    for i in range(1000):
        priority = i % 10
        empty_queue.push(f"item{i}", priority)

    # Verify FIFO order within same priority
    last_priority = -1
    last_index = -1
    for _ in range(1000):
        priority, item = empty_queue.pop()
        current_index = int(item[4:])  # Extract index from "itemX"

        if priority == last_priority:
            assert current_index > last_index, "FIFO order violated within same priority"
        else:
            assert priority > last_priority, "Priority order violated"

        last_priority = priority
        last_index = current_index


@pytest.mark.parametrize(
    "items",
    [
        [(1, "A"), (1, "B"), (1, "C")],  # Same priority
        [(3, "A"), (2, "B"), (1, "C")],  # Different priorities
        [(2, "A"), (1, "B"), (2, "C")],  # Mixed priorities
    ],
)
def test_queue_ordering_parametrized(empty_queue, items):
    """Parametrized test for different ordering scenarios."""
    for priority, item in items:
        empty_queue.push(item, priority)

    sorted_items = sorted(items, key=lambda x: (x[0], items.index(x)))
    for priority, item in sorted_items:
        assert empty_queue.pop() == (priority, item)
Move tests from test_connect.py in test_pipeline.py and test_utils.py (#7742) 2024-05-24 16:41:38 +02:00			`# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>`
			`#`
			`# SPDX-License-Identifier: Apache-2.0`
fix: pipeline run bugs in cyclic and acyclic pipelines (#8707) * add component checks * pipeline should run deterministically * add FIFOQueue * add agent tests * add order dependent tests * run new tests * remove code that is not needed * test: intermediate from cycle outputs are available outside cycle * add tests for component checks (Claude) * adapt tests for component checks (o1 review) * chore: format * remove tests that aren't needed anymore * add _calculate_priority tests * revert accidental change in pyproject.toml * test format conversion * adapt to naming convention * chore: proper docstrings and type hints for PQ * format * add more unit tests * rm unneeded comments * test input consumption * lint * fix: docstrings * lint * format * format * fix license header * fix license header * add component run tests * fix: pass correct input format to tracing * fix types * format * format * types * add defaults from Socket instead of signature - otherwise components with dynamic inputs would fail * fix test names * still wait for optional inputs on greedy variadic sockets - mirrors previous behavior * fix format * wip: warn for ambiguous running order * wip: alternative warning * fix license header * make code more readable Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> * Introduce content tracing to a behavioral test * Fixing linting * Remove debug print statements * Fix tracer tests * remove print * test: test for component inputs * test: remove testing for run order * chore: update component checks from experimental * chore: update pipeline and base from experimental * refactor: remove unused method * refactor: remove unused method * refactor: outdated comment * refactor: inputs state is updated as side effect - to prepare for AsyncPipeline implementation * format * test: add file conversion test * format * fix: original implementation deepcopies outputs * lint * fix: from_dict was updated * fix: format * fix: test * test: add test for thread safety * remove unused imports * format * test: FIFOPriorityQueue * chore: add release note * fix: resolve merge conflict with mermaid changes * fix: format * fix: remove unused import * refactor: rename to avoid accidental conflicts * chore: remove unused inputs, add missing license header * chore: extend release notes * Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> * fix: format * fix: format * Update release note --------- Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> Co-authored-by: David S. Batista <dsbatista@gmail.com> 2025-02-06 15:19:47 +01:00
			`import pytest`

			`from haystack.core.pipeline.utils import parse_connect_string, FIFOPriorityQueue`
Move tests from test_connect.py in test_pipeline.py and test_utils.py (#7742) 2024-05-24 16:41:38 +02:00

			`def test_parse_connection():`
			`assert parse_connect_string("foobar") == ("foobar", None)`
			`assert parse_connect_string("foo.bar") == ("foo", "bar")`
fix: pipeline run bugs in cyclic and acyclic pipelines (#8707) * add component checks * pipeline should run deterministically * add FIFOQueue * add agent tests * add order dependent tests * run new tests * remove code that is not needed * test: intermediate from cycle outputs are available outside cycle * add tests for component checks (Claude) * adapt tests for component checks (o1 review) * chore: format * remove tests that aren't needed anymore * add _calculate_priority tests * revert accidental change in pyproject.toml * test format conversion * adapt to naming convention * chore: proper docstrings and type hints for PQ * format * add more unit tests * rm unneeded comments * test input consumption * lint * fix: docstrings * lint * format * format * fix license header * fix license header * add component run tests * fix: pass correct input format to tracing * fix types * format * format * types * add defaults from Socket instead of signature - otherwise components with dynamic inputs would fail * fix test names * still wait for optional inputs on greedy variadic sockets - mirrors previous behavior * fix format * wip: warn for ambiguous running order * wip: alternative warning * fix license header * make code more readable Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> * Introduce content tracing to a behavioral test * Fixing linting * Remove debug print statements * Fix tracer tests * remove print * test: test for component inputs * test: remove testing for run order * chore: update component checks from experimental * chore: update pipeline and base from experimental * refactor: remove unused method * refactor: remove unused method * refactor: outdated comment * refactor: inputs state is updated as side effect - to prepare for AsyncPipeline implementation * format * test: add file conversion test * format * fix: original implementation deepcopies outputs * lint * fix: from_dict was updated * fix: format * fix: test * test: add test for thread safety * remove unused imports * format * test: FIFOPriorityQueue * chore: add release note * fix: resolve merge conflict with mermaid changes * fix: format * fix: remove unused import * refactor: rename to avoid accidental conflicts * chore: remove unused inputs, add missing license header * chore: extend release notes * Update releasenotes/notes/fix-pipeline-run-2fefeafc705a6d91.yaml Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> * fix: format * fix: format * Update release note --------- Co-authored-by: Amna Mubashar <amnahkhan.ak@gmail.com> Co-authored-by: David S. Batista <dsbatista@gmail.com> 2025-02-06 15:19:47 +01:00

			`@pytest.fixture`
			`def empty_queue():`
			`"""Fixture providing a fresh empty queue for each test."""`
			`return FIFOPriorityQueue()`


			`def test_empty_queue_initialization(empty_queue):`
			`"""Test that a new queue is empty."""`
			`assert len(empty_queue) == 0`
			`assert not bool(empty_queue)`


			`def test_push_single_item(empty_queue):`
			`"""Test pushing a single item."""`
			`empty_queue.push("item1", 1)`
			`assert len(empty_queue) == 1`
			`assert bool(empty_queue)`
			`assert empty_queue.peek() == (1, "item1")`


			`def test_push_multiple_items_different_priorities(empty_queue):`
			`"""Test pushing multiple items with different priorities."""`
			`items = [("item3", 3), ("item1", 1), ("item2", 2)]`
			`for item, priority in items:`
			`empty_queue.push(item, priority)`

			`# Items should come out in priority order`
			`assert empty_queue.pop() == (1, "item1")`
			`assert empty_queue.pop() == (2, "item2")`
			`assert empty_queue.pop() == (3, "item3")`


			`def test_push_multiple_items_same_priority(empty_queue):`
			`"""Test FIFO behavior for items with equal priority."""`
			`items = [("first", 1), ("second", 1), ("third", 1)]`
			`for item, priority in items:`
			`empty_queue.push(item, priority)`

			`# Items should come out in insertion order`
			`assert empty_queue.pop() == (1, "first")`
			`assert empty_queue.pop() == (1, "second")`
			`assert empty_queue.pop() == (1, "third")`


			`def test_mixed_priority_and_fifo(empty_queue):`
			`"""Test mixed priority levels with some equal priorities."""`
			`empty_queue.push("medium1", 2)`
			`empty_queue.push("high", 1)`
			`empty_queue.push("medium2", 2)`
			`empty_queue.push("low", 3)`

			`# Check extraction order`
			`assert empty_queue.pop() == (1, "high")`
			`assert empty_queue.pop() == (2, "medium1")`
			`assert empty_queue.pop() == (2, "medium2")`
			`assert empty_queue.pop() == (3, "low")`


			`def test_peek_behavior(empty_queue):`
			`"""Test that peek returns items without removing them."""`
			`empty_queue.push("item1", 1)`
			`empty_queue.push("item2", 2)`

			`# Peek multiple times`
			`for _ in range(3):`
			`assert empty_queue.peek() == (1, "item1")`
			`assert len(empty_queue) == 2`


			`def test_get_behavior(empty_queue):`
			`"""Test the get method with both empty and non-empty queues."""`
			`# Test on empty queue`
			`assert empty_queue.get() is None`

			`# Test with items`
			`empty_queue.push("item1", 1)`
			`assert empty_queue.get() == (1, "item1")`
			`assert empty_queue.get() is None # Queue should be empty again`


			`def test_pop_empty_queue(empty_queue):`
			`"""Test that pop raises IndexError on empty queue."""`
			`with pytest.raises(IndexError, match="pop from empty queue"):`
			`empty_queue.pop()`


			`def test_peek_empty_queue(empty_queue):`
			`"""Test that peek raises IndexError on empty queue."""`
			`with pytest.raises(IndexError, match="peek at empty queue"):`
			`empty_queue.peek()`


			`def test_length_updates(empty_queue):`
			`"""Test that length updates correctly with pushes and pops."""`
			`initial_len = len(empty_queue)`
			`assert initial_len == 0`

			`# Test length increases`
			`empty_queue.push("item1", 1)`
			`assert len(empty_queue) == 1`
			`empty_queue.push("item2", 2)`
			`assert len(empty_queue) == 2`

			`# Test length decreases`
			`empty_queue.pop()`
			`assert len(empty_queue) == 1`
			`empty_queue.pop()`
			`assert len(empty_queue) == 0`


			`def test_bool_conversion(empty_queue):`
			`"""Test boolean conversion in various states."""`
			`# Empty queue should be False`
			`assert not bool(empty_queue)`

			`# Queue with items should be True`
			`empty_queue.push("item", 1)`
			`assert bool(empty_queue)`

			`# Queue should be False again after removing item`
			`empty_queue.pop()`
			`assert not bool(empty_queue)`


			`def test_large_number_of_items(empty_queue):`
			`"""Test handling of a large number of items with mixed priorities."""`
			`# Add 1000 items with 10 different priority levels`
			`for i in range(1000):`
			`priority = i % 10`
			`empty_queue.push(f"item{i}", priority)`

			`# Verify FIFO order within same priority`
			`last_priority = -1`
			`last_index = -1`
			`for _ in range(1000):`
			`priority, item = empty_queue.pop()`
			`current_index = int(item[4:]) # Extract index from "itemX"`

			`if priority == last_priority:`
			`assert current_index > last_index, "FIFO order violated within same priority"`
			`else:`
			`assert priority > last_priority, "Priority order violated"`

			`last_priority = priority`
			`last_index = current_index`


			`@pytest.mark.parametrize(`
			`"items",`
			`[`
			`[(1, "A"), (1, "B"), (1, "C")], # Same priority`
			`[(3, "A"), (2, "B"), (1, "C")], # Different priorities`
			`[(2, "A"), (1, "B"), (2, "C")], # Mixed priorities`
			`],`
			`)`
			`def test_queue_ordering_parametrized(empty_queue, items):`
			`"""Parametrized test for different ordering scenarios."""`
			`for priority, item in items:`
			`empty_queue.push(item, priority)`

			`sorted_items = sorted(items, key=lambda x: (x[0], items.index(x)))`
			`for priority, item in sorted_items:`
			`assert empty_queue.pop() == (priority, item)`