2021-04-01 12:15:05 -07:00
|
|
|
import pytest
|
2021-03-23 20:15:44 -07:00
|
|
|
|
|
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
2025-05-19 08:39:53 +02:00
|
|
|
from datahub.testing import mce_helpers
|
2021-04-13 17:30:24 -07:00
|
|
|
from tests.test_helpers.docker_helpers import wait_for_port
|
2021-03-23 20:15:44 -07:00
|
|
|
|
|
|
|
|
2021-07-14 20:02:48 -07:00
|
|
|
@pytest.mark.integration
|
2021-04-13 17:30:24 -07:00
|
|
|
def test_mongodb_ingest(docker_compose_runner, pytestconfig, tmp_path, mock_time):
|
2021-03-23 20:15:44 -07:00
|
|
|
test_resources_dir = pytestconfig.rootpath / "tests/integration/mongodb"
|
|
|
|
|
2021-04-13 17:30:24 -07:00
|
|
|
with docker_compose_runner(
|
|
|
|
test_resources_dir / "docker-compose.yml", "mongo"
|
|
|
|
) as docker_services:
|
|
|
|
wait_for_port(docker_services, "testmongodb", 27017)
|
|
|
|
|
|
|
|
# Run the metadata ingestion pipeline.
|
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
|
|
|
"run_id": "mongodb-test",
|
|
|
|
"source": {
|
|
|
|
"type": "mongodb",
|
|
|
|
"config": {
|
|
|
|
"connect_uri": "mongodb://localhost:57017",
|
|
|
|
"username": "mongoadmin",
|
|
|
|
"password": "examplepass",
|
2021-11-30 11:33:33 -08:00
|
|
|
"maxDocumentSize": 25000,
|
2023-10-27 01:57:43 +09:00
|
|
|
"platform_instance": "instance",
|
2023-12-28 12:09:10 +01:00
|
|
|
"schemaSamplingSize": None,
|
2021-04-13 17:30:24 -07:00
|
|
|
},
|
2021-03-23 20:15:44 -07:00
|
|
|
},
|
2021-04-13 17:30:24 -07:00
|
|
|
"sink": {
|
|
|
|
"type": "file",
|
|
|
|
"config": {
|
|
|
|
"filename": f"{tmp_path}/mongodb_mces.json",
|
|
|
|
},
|
2021-03-23 20:15:44 -07:00
|
|
|
},
|
2021-04-13 17:30:24 -07:00
|
|
|
}
|
|
|
|
)
|
|
|
|
pipeline.run()
|
|
|
|
pipeline.raise_from_status()
|
2021-03-23 20:15:44 -07:00
|
|
|
|
2021-04-13 17:30:24 -07:00
|
|
|
# Verify the output.
|
2021-06-30 16:53:20 -07:00
|
|
|
mce_helpers.check_golden_file(
|
|
|
|
pytestconfig,
|
|
|
|
output_path=tmp_path / "mongodb_mces.json",
|
|
|
|
golden_path=test_resources_dir / "mongodb_mces_golden.json",
|
2021-04-13 17:30:24 -07:00
|
|
|
)
|
2024-02-21 15:25:29 -08:00
|
|
|
|
|
|
|
# Run the metadata ingestion pipeline.
|
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
|
|
|
"run_id": "mongodb-test-small-schema-size",
|
|
|
|
"source": {
|
|
|
|
"type": "mongodb",
|
|
|
|
"config": {
|
|
|
|
"connect_uri": "mongodb://localhost:57017",
|
|
|
|
"username": "mongoadmin",
|
|
|
|
"password": "examplepass",
|
|
|
|
"maxSchemaSize": 10,
|
|
|
|
"platform_instance": "instance",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"sink": {
|
|
|
|
"type": "file",
|
|
|
|
"config": {
|
|
|
|
"filename": f"{tmp_path}/mongodb_mces_small_schema_size.json",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
pipeline.run()
|
|
|
|
pipeline.raise_from_status()
|
|
|
|
|
|
|
|
# Verify the output.
|
|
|
|
mce_helpers.check_golden_file(
|
|
|
|
pytestconfig,
|
|
|
|
output_path=tmp_path / "mongodb_mces_small_schema_size.json",
|
|
|
|
golden_path=test_resources_dir
|
|
|
|
/ "mongodb_mces_small_schema_size_golden.json",
|
|
|
|
)
|
2025-02-06 04:25:09 +09:00
|
|
|
|
|
|
|
# Run the metadata ingestion pipeline.
|
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
|
|
|
"run_id": "mongodb-test-no-random-sampling",
|
|
|
|
"source": {
|
|
|
|
"type": "mongodb",
|
|
|
|
"config": {
|
|
|
|
"connect_uri": "mongodb://localhost:57017",
|
|
|
|
"username": "mongoadmin",
|
|
|
|
"password": "examplepass",
|
|
|
|
"useRandomSampling": False,
|
|
|
|
"platform_instance": "instance",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"sink": {
|
|
|
|
"type": "file",
|
|
|
|
"config": {
|
|
|
|
"filename": f"{tmp_path}/mongodb_mces_no_random_sampling.json",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
pipeline.run()
|
|
|
|
pipeline.raise_from_status()
|
|
|
|
|
|
|
|
# Verify the output.
|
|
|
|
mce_helpers.check_golden_file(
|
|
|
|
pytestconfig,
|
|
|
|
output_path=tmp_path / "mongodb_mces_no_random_sampling.json",
|
|
|
|
golden_path=test_resources_dir
|
|
|
|
/ "mongodb_mces_no_random_sampling_golden.json",
|
|
|
|
)
|