test(ingest): verify the output of mssql (#2120)

This commit is contained in:
Harshal Sheth 2021-02-18 14:47:49 -08:00 committed by GitHub
parent b4430a534f
commit 9336cbc743
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 174 additions and 18 deletions

View File

@ -1,4 +1,5 @@
import os
import time
import pytest
@ -22,8 +23,8 @@ def wait_for_db(docker_services, container_name, container_port):
pause=0.1,
check=lambda: is_responsive(container_name, container_port),
)
import time
# TODO: this is an ugly hack.
time.sleep(5)
return port

View File

@ -1,17 +1,22 @@
import os
import mce_helpers
from click.testing import CliRunner
from datahub.entrypoints import datahub
def test_ingest(mysql, pytestconfig, tmp_path):
def test_mysql_ingest(mysql, pytestconfig):
test_resources_dir = pytestconfig.rootpath / "tests/integration/mysql"
config_file = (test_resources_dir / "mysql_to_file.yml").resolve()
ingest_command = f'cd {tmp_path} && datahub ingest -c {config_file}'
ret = os.system(ingest_command)
assert ret == 0
output = mce_helpers.load_json_file(str(tmp_path / "mysql_mces.json"))
# Run the metadata ingestion pipeline.
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
assert result.exit_code == 0
output = mce_helpers.load_json_file("mysql_mces.json")
# Verify the output.
golden = mce_helpers.load_json_file(
str(test_resources_dir / "mysql_mce_golden.json")
)

View File

@ -0,0 +1,136 @@
[
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.dbo.Products,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "DemoData.dbo.Products",
"platform": "urn:li:dataPlatform:mssql",
"version": 0,
"created": {
"time": 1613593691000,
"actor": "urn:li:corpuser:etl",
"impersonator": null
},
"lastModified": {
"time": 1613593691000,
"actor": "urn:li:corpuser:etl",
"impersonator": null
},
"deleted": null,
"dataset": null,
"cluster": null,
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
"tableSchema": ""
}
},
"fields": [
{
"fieldPath": "ID",
"jsonPath": null,
"nullable": false,
"description": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "INTEGER()",
"recursive": false
},
{
"fieldPath": "ProductName",
"jsonPath": null,
"nullable": false,
"description": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "NVARCHAR()",
"recursive": false
}
],
"primaryKeys": null,
"foreignKeysSpecs": null
}
}
]
}
},
"proposedDelta": null
},
{
"auditHeader": null,
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:mssql,DemoData.Foo.Items,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "DemoData.Foo.Items",
"platform": "urn:li:dataPlatform:mssql",
"version": 0,
"created": {
"time": 1613593691000,
"actor": "urn:li:corpuser:etl",
"impersonator": null
},
"lastModified": {
"time": 1613593691000,
"actor": "urn:li:corpuser:etl",
"impersonator": null
},
"deleted": null,
"dataset": null,
"cluster": null,
"hash": "",
"platformSchema": {
"com.linkedin.pegasus2avro.schema.MySqlDDL": {
"tableSchema": ""
}
},
"fields": [
{
"fieldPath": "ID",
"jsonPath": null,
"nullable": false,
"description": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.NumberType": {}
}
},
"nativeDataType": "INTEGER()",
"recursive": false
},
{
"fieldPath": "ItemName",
"jsonPath": null,
"nullable": false,
"description": null,
"type": {
"type": {
"com.linkedin.pegasus2avro.schema.StringType": {}
}
},
"nativeDataType": "NVARCHAR()",
"recursive": false
}
],
"primaryKeys": null,
"foreignKeysSpecs": null
}
}
]
}
},
"proposedDelta": null
}
]

View File

@ -1,19 +1,33 @@
import os
import subprocess
import mce_helpers
from click.testing import CliRunner
def test_ingest(sql_server, pytestconfig):
from datahub.entrypoints import datahub
def test_mssql_ingest(sql_server, pytestconfig):
test_resources_dir = pytestconfig.rootpath / "tests/integration/sql_server"
# Run the setup.sql file to populate the database.
docker = "docker"
command = f"{docker} exec testsqlserver /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P 'test!Password' -d master -i /setup/setup.sql"
ret = subprocess.run(
command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
assert ret.returncode == 0
config_file = os.path.join(
str(pytestconfig.rootdir), "tests/integration/sql_server", "mssql_to_file.yml"
# Run the metadata ingestion pipeline.
config_file = (test_resources_dir / "mssql_to_file.yml").resolve()
runner = CliRunner()
with runner.isolated_filesystem():
result = runner.invoke(datahub, ["ingest", "-c", f"{config_file}"])
assert result.exit_code == 0
output = mce_helpers.load_json_file("mssql_mces.json")
# Verify the output.
golden = mce_helpers.load_json_file(
str(test_resources_dir / "mssql_mces_golden.json")
)
ingest_command = f'datahub ingest -c {config_file}'
ret = os.system(ingest_command)
assert ret == 0
# TODO: move to a better way to create an output test fixture
os.system("rm ./mssql_mces.json")
mce_helpers.assert_mces_equal(output, golden)