2025-09-23 18:18:32 +00:00

161 lines
4.7 KiB
Python

import json
import pathlib
from typing import Any
from unittest.mock import patch
import time_machine
from datahub.ingestion.run.pipeline import Pipeline
from datahub.testing import mce_helpers
FROZEN_TIME = "2020-04-14 07:00:00"
ORG_NAME = "TEST_ORG"
def default_recipe(tmp_path, output_file_name="snaplogic_mces_default_config.json"):
return {
"run_id": "test-snaplogic",
"pipeline_name": "snaplogic_ingest",
"source": {
"type": "snaplogic",
"config": {
"username": "example@snaplogic.com",
"password": "dummy_password",
"base_url": "https://elastic.snaplogic.com",
"org_name": ORG_NAME,
"stateful_ingestion": {
"enabled": False,
"remove_stale_metadata": False,
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/{output_file_name}",
},
},
}
def create_non_snaplogic_datasets_recipe(
tmp_path,
output_file_name="snaplogic_mces_create_non_snaplogic_datasets_config.json",
):
return {
"run_id": "test-snaplogic",
"pipeline_name": "snaplogic_ingest",
"source": {
"type": "snaplogic",
"config": {
"username": "example@snaplogic.com",
"password": "dummy_password",
"base_url": "https://elastic.snaplogic.com",
"org_name": ORG_NAME,
"create_non_snaplogic_datasets": True,
"stateful_ingestion": {
"enabled": False,
"remove_stale_metadata": False,
},
},
},
"sink": {
"type": "file",
"config": {
"filename": f"{tmp_path}/{output_file_name}",
},
},
}
def register_mock_api(pytestconfig: Any, request_mock: Any) -> None:
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/snaplogic/test_data"
)
# Load the mock response from snaplogic_base_response.json
with open(test_resources_dir / "snaplogic_base_response.json", "r") as f:
snaplogic_response = json.load(f)
api_vs_response = {
f"https://elastic.snaplogic.com/api/1/rest/public/catalog/{ORG_NAME}/lineage": {
"method": "GET",
"status_code": 200,
"json": snaplogic_response,
},
}
for url in api_vs_response:
request_mock.register_uri(
api_vs_response[url]["method"],
url,
json=api_vs_response[url]["json"],
status_code=api_vs_response[url]["status_code"],
)
def run_ingest(
pytestconfig,
mock_datahub_graph,
recipe,
):
with (
patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
) as mock_checkpoint,
):
mock_checkpoint.return_value = mock_datahub_graph
# Run an azure usage ingestion run.
pipeline = Pipeline.create(recipe)
pipeline.run()
pipeline.raise_from_status()
return pipeline
@time_machine.travel(FROZEN_TIME, tick=False)
def test_snaplogic_source_default_configs(
pytestconfig, mock_datahub_graph, tmp_path, requests_mock
):
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/snaplogic/test_data"
)
register_mock_api(pytestconfig, requests_mock)
run_ingest(
pytestconfig=pytestconfig,
mock_datahub_graph=mock_datahub_graph,
recipe=default_recipe(tmp_path),
)
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path / "snaplogic_mces_default_config.json",
golden_path=test_resources_dir / "snaplogic_base_golden.json",
)
@time_machine.travel(FROZEN_TIME, tick=False)
def test_snaplogic_source_create_non_snaplogic_datasets(
pytestconfig, mock_datahub_graph, tmp_path, requests_mock
):
test_resources_dir: pathlib.Path = (
pytestconfig.rootpath / "tests/integration/snaplogic/test_data"
)
register_mock_api(pytestconfig, requests_mock)
run_ingest(
pytestconfig=pytestconfig,
mock_datahub_graph=mock_datahub_graph,
recipe=create_non_snaplogic_datasets_recipe(tmp_path),
)
mce_helpers.check_golden_file(
pytestconfig,
output_path=tmp_path
/ "snaplogic_mces_create_non_snaplogic_datasets_config.json",
golden_path=test_resources_dir
/ "snaplogic_create_non_snaplogic_datasets_golden.json",
)