2021-04-21 11:34:24 -07:00
|
|
|
import json
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
import requests
|
|
|
|
|
2021-04-30 21:10:12 -07:00
|
|
|
import datahub.metadata.schema_classes as models
|
2021-04-21 11:34:24 -07:00
|
|
|
from datahub.emitter.rest_emitter import DatahubRestEmitter
|
|
|
|
|
|
|
|
MOCK_GMS_ENDPOINT = "http://fakegmshost:8080"
|
|
|
|
|
|
|
|
basicAuditStamp = models.AuditStampClass(
|
|
|
|
time=1618987484580,
|
|
|
|
actor="urn:li:corpuser:datahub",
|
|
|
|
impersonator=None,
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize(
|
2021-06-03 13:24:33 -07:00
|
|
|
"mce,snapshot",
|
2021-04-21 11:34:24 -07:00
|
|
|
[
|
|
|
|
(
|
|
|
|
# Simple test.
|
|
|
|
models.MetadataChangeEventClass(
|
|
|
|
proposedSnapshot=models.DatasetSnapshotClass(
|
|
|
|
urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)",
|
|
|
|
aspects=[
|
|
|
|
models.UpstreamLineageClass(
|
|
|
|
upstreams=[
|
|
|
|
models.UpstreamClass(
|
|
|
|
auditStamp=basicAuditStamp,
|
|
|
|
dataset="urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)",
|
|
|
|
type="TRANSFORMED",
|
|
|
|
),
|
|
|
|
models.UpstreamClass(
|
|
|
|
auditStamp=basicAuditStamp,
|
|
|
|
dataset="urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)",
|
|
|
|
type="TRANSFORMED",
|
|
|
|
),
|
|
|
|
]
|
|
|
|
)
|
|
|
|
],
|
|
|
|
),
|
|
|
|
),
|
2021-06-03 13:24:33 -07:00
|
|
|
{
|
|
|
|
"entity": {
|
|
|
|
"value": {
|
|
|
|
"com.linkedin.metadata.snapshot.DatasetSnapshot": {
|
|
|
|
"urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)",
|
|
|
|
"aspects": [
|
|
|
|
{
|
|
|
|
"com.linkedin.dataset.UpstreamLineage": {
|
|
|
|
"upstreams": [
|
|
|
|
{
|
|
|
|
"auditStamp": {
|
|
|
|
"time": 1618987484580,
|
|
|
|
"actor": "urn:li:corpuser:datahub",
|
|
|
|
},
|
|
|
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)",
|
|
|
|
"type": "TRANSFORMED",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"auditStamp": {
|
|
|
|
"time": 1618987484580,
|
|
|
|
"actor": "urn:li:corpuser:datahub",
|
|
|
|
},
|
|
|
|
"dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)",
|
|
|
|
"type": "TRANSFORMED",
|
|
|
|
},
|
|
|
|
]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2021-04-21 11:34:24 -07:00
|
|
|
),
|
|
|
|
(
|
|
|
|
# Verify the behavior of the fieldDiscriminator for primitive enums.
|
|
|
|
models.MetadataChangeEventClass(
|
|
|
|
proposedSnapshot=models.MLModelSnapshotClass(
|
|
|
|
urn="urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)",
|
|
|
|
aspects=[
|
|
|
|
models.CostClass(
|
|
|
|
costType=models.CostTypeClass.ORG_COST_TYPE,
|
|
|
|
cost=models.CostCostClass(
|
|
|
|
fieldDiscriminator=models.CostCostDiscriminatorClass.costCode,
|
|
|
|
costCode="sampleCostCode",
|
|
|
|
),
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
|
|
|
),
|
|
|
|
{
|
2021-06-03 13:24:33 -07:00
|
|
|
"entity": {
|
|
|
|
"value": {
|
|
|
|
"com.linkedin.metadata.snapshot.MLModelSnapshot": {
|
|
|
|
"urn": "urn:li:mlModel:(urn:li:dataPlatform:science,scienceModel,PROD)",
|
|
|
|
"aspects": [
|
|
|
|
{
|
|
|
|
"com.linkedin.common.Cost": {
|
|
|
|
"costType": "ORG_COST_TYPE",
|
|
|
|
"cost": {"costCode": "sampleCostCode"},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
2021-04-21 11:34:24 -07:00
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
2021-04-21 11:34:24 -07:00
|
|
|
}
|
|
|
|
},
|
|
|
|
),
|
|
|
|
(
|
|
|
|
# Verify the serialization behavior with chart type enums.
|
|
|
|
models.MetadataChangeEventClass(
|
|
|
|
proposedSnapshot=models.ChartSnapshotClass(
|
|
|
|
urn="urn:li:chart:(superset,227)",
|
|
|
|
aspects=[
|
|
|
|
models.ChartInfoClass(
|
|
|
|
title="Weekly Messages",
|
|
|
|
description="",
|
|
|
|
lastModified=models.ChangeAuditStampsClass(
|
|
|
|
created=basicAuditStamp,
|
|
|
|
lastModified=basicAuditStamp,
|
|
|
|
),
|
|
|
|
type=models.ChartTypeClass.SCATTER,
|
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
|
|
|
),
|
2021-04-23 00:18:39 -07:00
|
|
|
{
|
2021-06-03 13:24:33 -07:00
|
|
|
"entity": {
|
|
|
|
"value": {
|
|
|
|
"com.linkedin.metadata.snapshot.ChartSnapshot": {
|
|
|
|
"urn": "urn:li:chart:(superset,227)",
|
|
|
|
"aspects": [
|
|
|
|
{
|
|
|
|
"com.linkedin.chart.ChartInfo": {
|
|
|
|
"customProperties": {},
|
|
|
|
"title": "Weekly Messages",
|
|
|
|
"description": "",
|
|
|
|
"lastModified": {
|
|
|
|
"created": {
|
|
|
|
"time": 1618987484580,
|
|
|
|
"actor": "urn:li:corpuser:datahub",
|
|
|
|
},
|
|
|
|
"lastModified": {
|
|
|
|
"time": 1618987484580,
|
|
|
|
"actor": "urn:li:corpuser:datahub",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
"type": "SCATTER",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
2021-04-23 00:18:39 -07:00
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
2021-04-23 00:18:39 -07:00
|
|
|
}
|
|
|
|
},
|
2021-04-21 11:34:24 -07:00
|
|
|
),
|
|
|
|
(
|
|
|
|
# Verify that DataJobInfo is serialized properly (particularly it's union type).
|
|
|
|
models.MetadataChangeEventClass(
|
|
|
|
proposedSnapshot=models.DataJobSnapshotClass(
|
|
|
|
urn="urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)",
|
|
|
|
aspects=[
|
|
|
|
models.DataJobInfoClass(
|
|
|
|
name="User Deletions",
|
|
|
|
description="Constructs the fct_users_deleted from logging_events",
|
|
|
|
type=models.AzkabanJobTypeClass.SQL,
|
|
|
|
)
|
|
|
|
],
|
|
|
|
)
|
|
|
|
),
|
2021-04-23 00:18:39 -07:00
|
|
|
{
|
2021-06-03 13:24:33 -07:00
|
|
|
"entity": {
|
|
|
|
"value": {
|
|
|
|
"com.linkedin.metadata.snapshot.DataJobSnapshot": {
|
|
|
|
"urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)",
|
|
|
|
"aspects": [
|
|
|
|
{
|
|
|
|
"com.linkedin.datajob.DataJobInfo": {
|
|
|
|
"customProperties": {},
|
|
|
|
"name": "User Deletions",
|
|
|
|
"description": "Constructs the fct_users_deleted from logging_events",
|
|
|
|
"type": {
|
|
|
|
"com.linkedin.datajob.azkaban.AzkabanJobType": "SQL"
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
2021-04-23 00:18:39 -07:00
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
2021-04-23 00:18:39 -07:00
|
|
|
}
|
|
|
|
},
|
2021-04-21 11:34:24 -07:00
|
|
|
),
|
|
|
|
],
|
|
|
|
)
|
2021-06-03 13:24:33 -07:00
|
|
|
def test_datahub_rest_emitter(requests_mock, mce, snapshot):
|
2021-04-21 11:34:24 -07:00
|
|
|
def match_request_text(request: requests.Request) -> bool:
|
|
|
|
requested_snapshot = request.json()
|
|
|
|
assert (
|
|
|
|
requested_snapshot == snapshot
|
|
|
|
), f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}"
|
|
|
|
return True
|
|
|
|
|
|
|
|
requests_mock.post(
|
2021-06-03 13:24:33 -07:00
|
|
|
f"{MOCK_GMS_ENDPOINT}/entities?action=ingest",
|
2021-04-21 11:34:24 -07:00
|
|
|
request_headers={"X-RestLi-Protocol-Version": "2.0.0"},
|
|
|
|
additional_matcher=match_request_text,
|
|
|
|
)
|
|
|
|
|
|
|
|
emitter = DatahubRestEmitter(MOCK_GMS_ENDPOINT)
|
|
|
|
emitter.emit_mce(mce)
|