2025-04-17 19:22:13 -05:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
2025-05-21 11:27:34 -05:00
|
|
|
from datahub.testing import mce_helpers
|
2025-04-17 19:22:13 -05:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.integration
|
|
|
|
def test_excel(pytestconfig, tmp_path, mock_time):
|
|
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/excel"
|
2025-05-06 18:50:38 -05:00
|
|
|
test_files = "tests/integration/excel/data/*.xlsx"
|
2025-04-17 19:22:13 -05:00
|
|
|
|
|
|
|
# Run the metadata ingestion pipeline.
|
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
|
|
|
"run_id": "excel-test",
|
|
|
|
"source": {
|
|
|
|
"type": "excel",
|
|
|
|
"config": {
|
|
|
|
"path_list": [
|
2025-05-06 18:50:38 -05:00
|
|
|
str(test_files),
|
2025-04-17 19:22:13 -05:00
|
|
|
],
|
|
|
|
"profiling": {
|
|
|
|
"enabled": True,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"sink": {
|
|
|
|
"type": "file",
|
|
|
|
"config": {
|
|
|
|
"filename": f"{tmp_path}/excel_file_test.json",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
pipeline.run()
|
|
|
|
pipeline.raise_from_status()
|
|
|
|
|
|
|
|
mce_helpers.check_golden_file(
|
|
|
|
pytestconfig,
|
|
|
|
output_path=tmp_path / "excel_file_test.json",
|
|
|
|
golden_path=test_resources_dir / "excel_file_test_golden.json",
|
|
|
|
ignore_paths=[
|
|
|
|
r"root\[\d+\]\['aspect'\]\['json'\]\['fieldProfiles'\]\[\d+\]\['sampleValues'\]",
|
|
|
|
],
|
|
|
|
)
|