datahub/datahub-actions/tests/unit/cli/test_actions.py

import logging
import os
import tempfile
from contextlib import contextmanager
from typing import Generator
from unittest.mock import Mock

import pytest
from click.testing import CliRunner

from datahub_actions.cli.actions import actions, pipeline_config_to_pipeline
from datahub_actions.pipeline.pipeline import Pipeline
from datahub_actions.pipeline.pipeline_manager import PipelineManager


@contextmanager
def local_monkeypatch(monkeypatch, target, replacement):
    """Apply monkeypatch temporarily within a context."""
    monkeypatch.setattr(target, replacement)
    try:
        yield
    finally:
        monkeypatch.undo()


@pytest.fixture
def capture_logger(
    caplog: pytest.LogCaptureFixture,
) -> Generator[pytest.LogCaptureFixture, None, None]:
    """Fixture to capture logger output with the proper level."""
    caplog.set_level(logging.INFO)
    yield caplog


@pytest.fixture
def mock_pipeline() -> Mock:
    """Create a mock pipeline object."""
    mock = Mock(spec=Pipeline)
    mock.name = "test_pipeline"
    return mock


@pytest.fixture
def mock_pipeline_manager() -> Mock:
    """Create a mock pipeline manager."""
    mock = Mock(spec=PipelineManager)
    mock.start_pipeline = Mock(return_value=None)
    mock.stop_all = Mock(return_value=None)
    return mock


@pytest.fixture
def temp_config_file() -> Generator[str, None, None]:
    """Creates a temporary YAML config file for testing."""
    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
        f.write(
            """
name: "test_pipeline"
datahub:
    server: "https://test.datahub.io"
    token: "test-token"
source:
    type: "datahub-cloud"
action:
    type: "hello_world"
"""
        )
        config_path = f.name

    yield config_path
    # Cleanup
    os.unlink(config_path)


@pytest.fixture
def disabled_config_file() -> Generator[str, None, None]:
    """Creates a temporary YAML config file with disabled pipeline."""
    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
        f.write(
            """
name: "disabled_pipeline"
enabled: false
datahub:
    server: "https://test.datahub.io"
    token: "test-token"
source:
    type: "datahub-cloud"
action:
    type: "hello_world"
"""
        )
        config_path = f.name

    yield config_path
    # Cleanup
    os.unlink(config_path)


@pytest.fixture
def invalid_config_file() -> Generator[str, None, None]:
    """Creates a temporary YAML config file with unbound variables."""
    with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
        f.write(
            """
name: "invalid_pipeline"
datahub:
    server: "https://test.datahub.io"
    token: ${UNDEFINED_VAR}
source:
    type: "datahub-cloud"
action:
    type: "hello_world"
"""
        )
        config_path = f.name

    yield config_path
    # Cleanup
    os.unlink(config_path)


def test_version_command() -> None:
    """Test the version command outputs version information."""
    runner = CliRunner()
    result = runner.invoke(actions, ["version"])
    assert result.exit_code == 0
    assert "DataHub Actions version:" in result.output
    assert "Python version:" in result.output


def test_disabled_pipeline_exits(
    disabled_config_file: str, capture_logger: pytest.LogCaptureFixture
) -> None:
    """Test that disabled pipelines cause program exit."""
    runner = CliRunner()
    result = runner.invoke(actions, ["run", "-c", disabled_config_file])

    assert result.exit_code == 1
    assert any(
        "Skipping pipeline disabled_pipeline as it is not enabled" in record.message
        for record in capture_logger.records
    )


def test_invalid_config_single(invalid_config_file: str) -> None:
    """Test handling of single config with unbound variables."""
    runner = CliRunner()
    result = runner.invoke(actions, ["run", "-c", invalid_config_file])
    assert result.exit_code != 0
    assert "Unbound variable(s) provided in config YAML" in str(result.exception)


def test_all_configs_invalid_or_disabled(
    invalid_config_file: str,
    disabled_config_file: str,
) -> None:
    """Test that program exits when all configs are invalid or disabled."""
    runner = CliRunner()
    result = runner.invoke(
        actions, ["run", "-c", invalid_config_file, "-c", disabled_config_file]
    )
    assert result.exit_code == 1
    assert (
        "Failed to load action configuration. Unbound variable(s) provided in config YAML."
        in str(result.exception)
    )


def test_all_configs_multiple_disabled(
    disabled_config_file: str,
    capture_logger: pytest.LogCaptureFixture,
) -> None:
    """Test that program exits when all configs are invalid or disabled."""
    runner = CliRunner()
    result = runner.invoke(
        actions, ["run", "-c", disabled_config_file, "-c", disabled_config_file]
    )
    assert result.exit_code == 1
    assert any(
        "No valid pipelines were started from 2 config(s). Check that at least one pipeline is enabled and all required environment variables are set."
        in record.message
        for record in capture_logger.records
    )


def test_mixed_valid_and_invalid_configs(
    temp_config_file: str,
    disabled_config_file: str,
    capture_logger: pytest.LogCaptureFixture,
    monkeypatch: pytest.MonkeyPatch,
    mock_pipeline: Mock,
    mock_pipeline_manager: Mock,
) -> None:
    """Test handling mix of valid and invalid configs."""

    def mock_create_pipeline(config: dict) -> Pipeline:
        """Mock implementation of pipeline creation."""
        return mock_pipeline

    sleep_count = 0

    def mock_sleep(seconds: int) -> None:
        """Mock sleep that raises KeyboardInterrupt after first call."""
        nonlocal sleep_count
        sleep_count += 1
        if sleep_count > 1:  # Allow one sleep to ensure logs are captured
            raise KeyboardInterrupt()

    runner = CliRunner()

    # Use local_monkeypatch for tighter control

    with local_monkeypatch(
        monkeypatch,
        "datahub_actions.pipeline.pipeline.Pipeline.create",
        mock_create_pipeline,
    ), local_monkeypatch(monkeypatch, "time.sleep", mock_sleep), local_monkeypatch(
        monkeypatch,
        "datahub_actions.cli.actions.pipeline_manager",
        mock_pipeline_manager,
    ):
        result = runner.invoke(
            actions, ["run", "-c", temp_config_file, "-c", disabled_config_file]
        )

    assert result.exit_code == 1
    assert any(
        "Skipping pipeline disabled_pipeline as it is not enabled" in record.message
        for record in capture_logger.records
    )
    assert any(
        "Action Pipeline with name 'test_pipeline' is now running." in record.message
        for record in capture_logger.records
    )


def test_debug_mode_with_valid_config(
    temp_config_file: str,
    capture_logger: pytest.LogCaptureFixture,
    monkeypatch: pytest.MonkeyPatch,
    mock_pipeline: Mock,
    mock_pipeline_manager: Mock,
) -> None:
    """Test debug mode with valid pipeline config."""

    def mock_create_pipeline(config: dict) -> Pipeline:
        """Mock implementation of pipeline creation."""
        return mock_pipeline

    sleep_count = 0

    def mock_sleep(seconds: int) -> None:
        """Mock sleep that raises KeyboardInterrupt after first call."""
        nonlocal sleep_count
        sleep_count += 1
        if sleep_count > 1:  # Allow one sleep to ensure logs are captured
            raise KeyboardInterrupt()

    runner = CliRunner()

    # Use local_monkeypatch for tighter control
    with local_monkeypatch(
        monkeypatch,
        "datahub_actions.pipeline.pipeline.Pipeline.create",
        mock_create_pipeline,
    ), local_monkeypatch(monkeypatch, "time.sleep", mock_sleep), local_monkeypatch(
        monkeypatch,
        "datahub_actions.cli.actions.pipeline_manager",
        mock_pipeline_manager,
    ):
        result = runner.invoke(
            actions,
            ["run", "-c", temp_config_file, "--debug"],
        )

    assert result.exit_code == 1
    assert any(
        "Action Pipeline with name 'test_pipeline' is now running." in record.message
        for record in capture_logger.records
    )


def test_pipeline_config_to_pipeline_error() -> None:
    """Test error handling in pipeline creation."""
    invalid_config = {
        "name": "invalid_pipeline",
        # Missing required fields
    }

    with pytest.raises(Exception) as exc_info:
        pipeline_config_to_pipeline(invalid_config)
    assert "Failed to instantiate Actions Pipeline" in str(exc_info.value)


def test_multiple_disabled_configs(
    disabled_config_file: str,
    capture_logger: pytest.LogCaptureFixture,
) -> None:
    """Test handling of multiple disabled configs."""
    runner = CliRunner()
    result = runner.invoke(
        actions, ["run", "-c", disabled_config_file, "-c", disabled_config_file]
    )
    assert result.exit_code == 1
    assert any(
        "No valid pipelines were started from 2 config(s). Check that at least one pipeline is enabled and all required environment variables are set."
        in record.message
        for record in capture_logger.records
    )
    assert any(
        "Skipping pipeline disabled_pipeline as it is not enabled" in record.message
        for record in capture_logger.records
    )


# Type checking with mypy annotations
def test_type_annotations() -> None:
    """Verify type hints are correct (this is a compile-time check)."""
    # These assignments will be checked by mypy
    runner: CliRunner = CliRunner()
    config_dict: dict = {
        "name": "test_pipeline",
        "datahub": {"server": "https://test.datahub.io", "token": "test-token"},
        "source": {"type": "datahub-cloud"},
        "action": {"type": "hello_world"},
    }

    # These assertions serve as runtime checks
    assert isinstance(runner, CliRunner)
    assert isinstance(config_dict, dict)
feat(actions): Moving datahub-actions into oss datahub (#13120) 2025-04-16 12:23:29 +02:00			`import logging`
			`import os`
			`import tempfile`
			`from contextlib import contextmanager`
			`from typing import Generator`
			`from unittest.mock import Mock`

			`import pytest`
			`from click.testing import CliRunner`

			`from datahub_actions.cli.actions import actions, pipeline_config_to_pipeline`
			`from datahub_actions.pipeline.pipeline import Pipeline`
			`from datahub_actions.pipeline.pipeline_manager import PipelineManager`


			`@contextmanager`
			`def local_monkeypatch(monkeypatch, target, replacement):`
			`"""Apply monkeypatch temporarily within a context."""`
			`monkeypatch.setattr(target, replacement)`
			`try:`
			`yield`
			`finally:`
			`monkeypatch.undo()`


			`@pytest.fixture`
			`def capture_logger(`
			`caplog: pytest.LogCaptureFixture,`
			`) -> Generator[pytest.LogCaptureFixture, None, None]:`
			`"""Fixture to capture logger output with the proper level."""`
			`caplog.set_level(logging.INFO)`
			`yield caplog`


			`@pytest.fixture`
			`def mock_pipeline() -> Mock:`
			`"""Create a mock pipeline object."""`
			`mock = Mock(spec=Pipeline)`
			`mock.name = "test_pipeline"`
			`return mock`


			`@pytest.fixture`
			`def mock_pipeline_manager() -> Mock:`
			`"""Create a mock pipeline manager."""`
			`mock = Mock(spec=PipelineManager)`
			`mock.start_pipeline = Mock(return_value=None)`
			`mock.stop_all = Mock(return_value=None)`
			`return mock`


			`@pytest.fixture`
			`def temp_config_file() -> Generator[str, None, None]:`
			`"""Creates a temporary YAML config file for testing."""`
			`with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:`
			`f.write(`
			`"""`
			`name: "test_pipeline"`
			`datahub:`
			`server: "https://test.datahub.io"`
			`token: "test-token"`
			`source:`
			`type: "datahub-cloud"`
			`action:`
			`type: "hello_world"`
			`"""`
			`)`
			`config_path = f.name`

			`yield config_path`
			`# Cleanup`
			`os.unlink(config_path)`


			`@pytest.fixture`
			`def disabled_config_file() -> Generator[str, None, None]:`
			`"""Creates a temporary YAML config file with disabled pipeline."""`
			`with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:`
			`f.write(`
			`"""`
			`name: "disabled_pipeline"`
			`enabled: false`
			`datahub:`
			`server: "https://test.datahub.io"`
			`token: "test-token"`
			`source:`
			`type: "datahub-cloud"`
			`action:`
			`type: "hello_world"`
			`"""`
			`)`
			`config_path = f.name`

			`yield config_path`
			`# Cleanup`
			`os.unlink(config_path)`


			`@pytest.fixture`
			`def invalid_config_file() -> Generator[str, None, None]:`
			`"""Creates a temporary YAML config file with unbound variables."""`
			`with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:`
			`f.write(`
			`"""`
			`name: "invalid_pipeline"`
			`datahub:`
			`server: "https://test.datahub.io"`
			`token: ${UNDEFINED_VAR}`
			`source:`
			`type: "datahub-cloud"`
			`action:`
			`type: "hello_world"`
			`"""`
			`)`
			`config_path = f.name`

			`yield config_path`
			`# Cleanup`
			`os.unlink(config_path)`


			`def test_version_command() -> None:`
			`"""Test the version command outputs version information."""`
			`runner = CliRunner()`
			`result = runner.invoke(actions, ["version"])`
			`assert result.exit_code == 0`
			`assert "DataHub Actions version:" in result.output`
			`assert "Python version:" in result.output`


			`def test_disabled_pipeline_exits(`
			`disabled_config_file: str, capture_logger: pytest.LogCaptureFixture`
			`) -> None:`
			`"""Test that disabled pipelines cause program exit."""`
			`runner = CliRunner()`
			`result = runner.invoke(actions, ["run", "-c", disabled_config_file])`

			`assert result.exit_code == 1`
			`assert any(`
			`"Skipping pipeline disabled_pipeline as it is not enabled" in record.message`
			`for record in capture_logger.records`
			`)`


			`def test_invalid_config_single(invalid_config_file: str) -> None:`
			`"""Test handling of single config with unbound variables."""`
			`runner = CliRunner()`
			`result = runner.invoke(actions, ["run", "-c", invalid_config_file])`
			`assert result.exit_code != 0`
			`assert "Unbound variable(s) provided in config YAML" in str(result.exception)`


			`def test_all_configs_invalid_or_disabled(`
			`invalid_config_file: str,`
			`disabled_config_file: str,`
			`) -> None:`
			`"""Test that program exits when all configs are invalid or disabled."""`
			`runner = CliRunner()`
			`result = runner.invoke(`
			`actions, ["run", "-c", invalid_config_file, "-c", disabled_config_file]`
			`)`
			`assert result.exit_code == 1`
			`assert (`
			`"Failed to load action configuration. Unbound variable(s) provided in config YAML."`
			`in str(result.exception)`
			`)`


			`def test_all_configs_multiple_disabled(`
			`disabled_config_file: str,`
			`capture_logger: pytest.LogCaptureFixture,`
			`) -> None:`
			`"""Test that program exits when all configs are invalid or disabled."""`
			`runner = CliRunner()`
			`result = runner.invoke(`
			`actions, ["run", "-c", disabled_config_file, "-c", disabled_config_file]`
			`)`
			`assert result.exit_code == 1`
			`assert any(`
			`"No valid pipelines were started from 2 config(s). Check that at least one pipeline is enabled and all required environment variables are set."`
			`in record.message`
			`for record in capture_logger.records`
			`)`


			`def test_mixed_valid_and_invalid_configs(`
			`temp_config_file: str,`
			`disabled_config_file: str,`
			`capture_logger: pytest.LogCaptureFixture,`
			`monkeypatch: pytest.MonkeyPatch,`
			`mock_pipeline: Mock,`
			`mock_pipeline_manager: Mock,`
			`) -> None:`
			`"""Test handling mix of valid and invalid configs."""`

			`def mock_create_pipeline(config: dict) -> Pipeline:`
			`"""Mock implementation of pipeline creation."""`
			`return mock_pipeline`

			`sleep_count = 0`

			`def mock_sleep(seconds: int) -> None:`
			`"""Mock sleep that raises KeyboardInterrupt after first call."""`
			`nonlocal sleep_count`
			`sleep_count += 1`
			`if sleep_count > 1: # Allow one sleep to ensure logs are captured`
			`raise KeyboardInterrupt()`

			`runner = CliRunner()`

			`# Use local_monkeypatch for tighter control`

			`with local_monkeypatch(`
			`monkeypatch,`
			`"datahub_actions.pipeline.pipeline.Pipeline.create",`
			`mock_create_pipeline,`
			`), local_monkeypatch(monkeypatch, "time.sleep", mock_sleep), local_monkeypatch(`
			`monkeypatch,`
			`"datahub_actions.cli.actions.pipeline_manager",`
			`mock_pipeline_manager,`
			`):`
			`result = runner.invoke(`
			`actions, ["run", "-c", temp_config_file, "-c", disabled_config_file]`
			`)`

			`assert result.exit_code == 1`
			`assert any(`
			`"Skipping pipeline disabled_pipeline as it is not enabled" in record.message`
			`for record in capture_logger.records`
			`)`
			`assert any(`
			`"Action Pipeline with name 'test_pipeline' is now running." in record.message`
			`for record in capture_logger.records`
			`)`


			`def test_debug_mode_with_valid_config(`
			`temp_config_file: str,`
			`capture_logger: pytest.LogCaptureFixture,`
			`monkeypatch: pytest.MonkeyPatch,`
			`mock_pipeline: Mock,`
			`mock_pipeline_manager: Mock,`
			`) -> None:`
			`"""Test debug mode with valid pipeline config."""`

			`def mock_create_pipeline(config: dict) -> Pipeline:`
			`"""Mock implementation of pipeline creation."""`
			`return mock_pipeline`

			`sleep_count = 0`

			`def mock_sleep(seconds: int) -> None:`
			`"""Mock sleep that raises KeyboardInterrupt after first call."""`
			`nonlocal sleep_count`
			`sleep_count += 1`
			`if sleep_count > 1: # Allow one sleep to ensure logs are captured`
			`raise KeyboardInterrupt()`

			`runner = CliRunner()`

			`# Use local_monkeypatch for tighter control`
			`with local_monkeypatch(`
			`monkeypatch,`
			`"datahub_actions.pipeline.pipeline.Pipeline.create",`
			`mock_create_pipeline,`
			`), local_monkeypatch(monkeypatch, "time.sleep", mock_sleep), local_monkeypatch(`
			`monkeypatch,`
			`"datahub_actions.cli.actions.pipeline_manager",`
			`mock_pipeline_manager,`
			`):`
			`result = runner.invoke(`
			`actions,`
			`["run", "-c", temp_config_file, "--debug"],`
			`)`

			`assert result.exit_code == 1`
			`assert any(`
			`"Action Pipeline with name 'test_pipeline' is now running." in record.message`
			`for record in capture_logger.records`
			`)`


			`def test_pipeline_config_to_pipeline_error() -> None:`
			`"""Test error handling in pipeline creation."""`
			`invalid_config = {`
			`"name": "invalid_pipeline",`
			`# Missing required fields`
			`}`

			`with pytest.raises(Exception) as exc_info:`
			`pipeline_config_to_pipeline(invalid_config)`
			`assert "Failed to instantiate Actions Pipeline" in str(exc_info.value)`


			`def test_multiple_disabled_configs(`
			`disabled_config_file: str,`
			`capture_logger: pytest.LogCaptureFixture,`
			`) -> None:`
			`"""Test handling of multiple disabled configs."""`
			`runner = CliRunner()`
			`result = runner.invoke(`
			`actions, ["run", "-c", disabled_config_file, "-c", disabled_config_file]`
			`)`
			`assert result.exit_code == 1`
			`assert any(`
			`"No valid pipelines were started from 2 config(s). Check that at least one pipeline is enabled and all required environment variables are set."`
			`in record.message`
			`for record in capture_logger.records`
			`)`
			`assert any(`
			`"Skipping pipeline disabled_pipeline as it is not enabled" in record.message`
			`for record in capture_logger.records`
			`)`


			`# Type checking with mypy annotations`
			`def test_type_annotations() -> None:`
			`"""Verify type hints are correct (this is a compile-time check)."""`
			`# These assignments will be checked by mypy`
			`runner: CliRunner = CliRunner()`
			`config_dict: dict = {`
			`"name": "test_pipeline",`
			`"datahub": {"server": "https://test.datahub.io", "token": "test-token"},`
			`"source": {"type": "datahub-cloud"},`
			`"action": {"type": "hello_world"},`
			`}`

			`# These assertions serve as runtime checks`
			`assert isinstance(runner, CliRunner)`
			`assert isinstance(config_dict, dict)`