| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  | import json | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import pytest | 
					
						
							|  |  |  | import requests | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-30 21:10:12 -07:00
										 |  |  | import datahub.metadata.schema_classes as models | 
					
						
							| 
									
										
										
										
											2021-07-30 17:41:03 -07:00
										 |  |  | from datahub.emitter.mcp import MetadataChangeProposalWrapper | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  | from datahub.emitter.rest_emitter import DatahubRestEmitter | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | MOCK_GMS_ENDPOINT = "http://fakegmshost:8080" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | basicAuditStamp = models.AuditStampClass( | 
					
						
							|  |  |  |     time=1618987484580, | 
					
						
							|  |  |  |     actor="urn:li:corpuser:datahub", | 
					
						
							|  |  |  |     impersonator=None, | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.mark.parametrize( | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |     "record,path,snapshot", | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |     [ | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             # Simple test. | 
					
						
							|  |  |  |             models.MetadataChangeEventClass( | 
					
						
							|  |  |  |                 proposedSnapshot=models.DatasetSnapshotClass( | 
					
						
							|  |  |  |                     urn="urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", | 
					
						
							|  |  |  |                     aspects=[ | 
					
						
							|  |  |  |                         models.UpstreamLineageClass( | 
					
						
							|  |  |  |                             upstreams=[ | 
					
						
							|  |  |  |                                 models.UpstreamClass( | 
					
						
							|  |  |  |                                     auditStamp=basicAuditStamp, | 
					
						
							|  |  |  |                                     dataset="urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)", | 
					
						
							|  |  |  |                                     type="TRANSFORMED", | 
					
						
							|  |  |  |                                 ), | 
					
						
							|  |  |  |                                 models.UpstreamClass( | 
					
						
							|  |  |  |                                     auditStamp=basicAuditStamp, | 
					
						
							|  |  |  |                                     dataset="urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)", | 
					
						
							|  |  |  |                                     type="TRANSFORMED", | 
					
						
							|  |  |  |                                 ), | 
					
						
							|  |  |  |                             ] | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |             "/entities?action=ingest", | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |             { | 
					
						
							|  |  |  |                 "entity": { | 
					
						
							|  |  |  |                     "value": { | 
					
						
							|  |  |  |                         "com.linkedin.metadata.snapshot.DatasetSnapshot": { | 
					
						
							|  |  |  |                             "urn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,downstream,PROD)", | 
					
						
							|  |  |  |                             "aspects": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "com.linkedin.dataset.UpstreamLineage": { | 
					
						
							|  |  |  |                                         "upstreams": [ | 
					
						
							|  |  |  |                                             { | 
					
						
							|  |  |  |                                                 "auditStamp": { | 
					
						
							|  |  |  |                                                     "time": 1618987484580, | 
					
						
							|  |  |  |                                                     "actor": "urn:li:corpuser:datahub", | 
					
						
							|  |  |  |                                                 }, | 
					
						
							|  |  |  |                                                 "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream1,PROD)", | 
					
						
							|  |  |  |                                                 "type": "TRANSFORMED", | 
					
						
							|  |  |  |                                             }, | 
					
						
							|  |  |  |                                             { | 
					
						
							|  |  |  |                                                 "auditStamp": { | 
					
						
							|  |  |  |                                                     "time": 1618987484580, | 
					
						
							|  |  |  |                                                     "actor": "urn:li:corpuser:datahub", | 
					
						
							|  |  |  |                                                 }, | 
					
						
							|  |  |  |                                                 "dataset": "urn:li:dataset:(urn:li:dataPlatform:bigquery,upstream2,PROD)", | 
					
						
							|  |  |  |                                                 "type": "TRANSFORMED", | 
					
						
							|  |  |  |                                             }, | 
					
						
							|  |  |  |                                         ] | 
					
						
							|  |  |  |                                     } | 
					
						
							|  |  |  |                                 } | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                         } | 
					
						
							|  |  |  |                     } | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |                 }, | 
					
						
							|  |  |  |                 "systemMetadata": {}, | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             # Verify the serialization behavior with chart type enums. | 
					
						
							|  |  |  |             models.MetadataChangeEventClass( | 
					
						
							|  |  |  |                 proposedSnapshot=models.ChartSnapshotClass( | 
					
						
							|  |  |  |                     urn="urn:li:chart:(superset,227)", | 
					
						
							|  |  |  |                     aspects=[ | 
					
						
							|  |  |  |                         models.ChartInfoClass( | 
					
						
							|  |  |  |                             title="Weekly Messages", | 
					
						
							|  |  |  |                             description="", | 
					
						
							|  |  |  |                             lastModified=models.ChangeAuditStampsClass( | 
					
						
							|  |  |  |                                 created=basicAuditStamp, | 
					
						
							|  |  |  |                                 lastModified=basicAuditStamp, | 
					
						
							|  |  |  |                             ), | 
					
						
							|  |  |  |                             type=models.ChartTypeClass.SCATTER, | 
					
						
							|  |  |  |                         ), | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |             "/entities?action=ingest", | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |             { | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |                 "entity": { | 
					
						
							|  |  |  |                     "value": { | 
					
						
							|  |  |  |                         "com.linkedin.metadata.snapshot.ChartSnapshot": { | 
					
						
							|  |  |  |                             "urn": "urn:li:chart:(superset,227)", | 
					
						
							|  |  |  |                             "aspects": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "com.linkedin.chart.ChartInfo": { | 
					
						
							|  |  |  |                                         "customProperties": {}, | 
					
						
							|  |  |  |                                         "title": "Weekly Messages", | 
					
						
							|  |  |  |                                         "description": "", | 
					
						
							|  |  |  |                                         "lastModified": { | 
					
						
							|  |  |  |                                             "created": { | 
					
						
							|  |  |  |                                                 "time": 1618987484580, | 
					
						
							|  |  |  |                                                 "actor": "urn:li:corpuser:datahub", | 
					
						
							|  |  |  |                                             }, | 
					
						
							|  |  |  |                                             "lastModified": { | 
					
						
							|  |  |  |                                                 "time": 1618987484580, | 
					
						
							|  |  |  |                                                 "actor": "urn:li:corpuser:datahub", | 
					
						
							|  |  |  |                                             }, | 
					
						
							|  |  |  |                                         }, | 
					
						
							|  |  |  |                                         "type": "SCATTER", | 
					
						
							|  |  |  |                                     } | 
					
						
							|  |  |  |                                 } | 
					
						
							|  |  |  |                             ], | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |                         } | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |                     } | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |                 }, | 
					
						
							|  |  |  |                 "systemMetadata": {}, | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |         ), | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             # Verify that DataJobInfo is serialized properly (particularly it's union type). | 
					
						
							|  |  |  |             models.MetadataChangeEventClass( | 
					
						
							|  |  |  |                 proposedSnapshot=models.DataJobSnapshotClass( | 
					
						
							|  |  |  |                     urn="urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)", | 
					
						
							|  |  |  |                     aspects=[ | 
					
						
							|  |  |  |                         models.DataJobInfoClass( | 
					
						
							|  |  |  |                             name="User Deletions", | 
					
						
							|  |  |  |                             description="Constructs the fct_users_deleted from logging_events", | 
					
						
							|  |  |  |                             type=models.AzkabanJobTypeClass.SQL, | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |             "/entities?action=ingest", | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |             { | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |                 "entity": { | 
					
						
							|  |  |  |                     "value": { | 
					
						
							|  |  |  |                         "com.linkedin.metadata.snapshot.DataJobSnapshot": { | 
					
						
							|  |  |  |                             "urn": "urn:li:dataJob:(urn:li:dataFlow:(airflow,dag_abc,PROD),task_456)", | 
					
						
							|  |  |  |                             "aspects": [ | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "com.linkedin.datajob.DataJobInfo": { | 
					
						
							|  |  |  |                                         "customProperties": {}, | 
					
						
							|  |  |  |                                         "name": "User Deletions", | 
					
						
							|  |  |  |                                         "description": "Constructs the fct_users_deleted from logging_events", | 
					
						
							| 
									
										
										
										
											2021-07-08 16:16:16 -07:00
										 |  |  |                                         "type": {"string": "SQL"}, | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |                                     } | 
					
						
							|  |  |  |                                 } | 
					
						
							|  |  |  |                             ], | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |                         } | 
					
						
							| 
									
										
										
										
											2021-06-03 13:24:33 -07:00
										 |  |  |                     } | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |                 }, | 
					
						
							|  |  |  |                 "systemMetadata": {}, | 
					
						
							| 
									
										
										
										
											2021-04-23 00:18:39 -07:00
										 |  |  |             }, | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |         ), | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |         ( | 
					
						
							|  |  |  |             # Usage stats ingestion test. | 
					
						
							|  |  |  |             models.UsageAggregationClass( | 
					
						
							|  |  |  |                 bucket=1623826800000, | 
					
						
							|  |  |  |                 duration="DAY", | 
					
						
							|  |  |  |                 resource="urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", | 
					
						
							|  |  |  |                 metrics=models.UsageAggregationMetricsClass( | 
					
						
							|  |  |  |                     uniqueUserCount=2, | 
					
						
							|  |  |  |                     users=[ | 
					
						
							|  |  |  |                         models.UserUsageCountsClass( | 
					
						
							|  |  |  |                             user="urn:li:corpuser:jdoe", | 
					
						
							|  |  |  |                             count=5, | 
					
						
							|  |  |  |                         ), | 
					
						
							|  |  |  |                         models.UserUsageCountsClass( | 
					
						
							|  |  |  |                             user="urn:li:corpuser:unknown", | 
					
						
							|  |  |  |                             count=3, | 
					
						
							|  |  |  |                             userEmail="foo@example.com", | 
					
						
							|  |  |  |                         ), | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                     totalSqlQueries=1, | 
					
						
							|  |  |  |                     topSqlQueries=["SELECT * FROM foo"], | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             "/usageStats?action=batchIngest", | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "buckets": [ | 
					
						
							|  |  |  |                     { | 
					
						
							|  |  |  |                         "bucket": 1623826800000, | 
					
						
							|  |  |  |                         "duration": "DAY", | 
					
						
							|  |  |  |                         "resource": "urn:li:dataset:(urn:li:dataPlatform:kafka,SampleKafkaDataset,PROD)", | 
					
						
							|  |  |  |                         "metrics": { | 
					
						
							|  |  |  |                             "uniqueUserCount": 2, | 
					
						
							|  |  |  |                             "users": [ | 
					
						
							|  |  |  |                                 {"count": 5, "user": "urn:li:corpuser:jdoe"}, | 
					
						
							|  |  |  |                                 { | 
					
						
							|  |  |  |                                     "count": 3, | 
					
						
							|  |  |  |                                     "user": "urn:li:corpuser:unknown", | 
					
						
							|  |  |  |                                     "userEmail": "foo@example.com", | 
					
						
							|  |  |  |                                 }, | 
					
						
							|  |  |  |                             ], | 
					
						
							|  |  |  |                             "totalSqlQueries": 1, | 
					
						
							|  |  |  |                             "topSqlQueries": ["SELECT * FROM foo"], | 
					
						
							|  |  |  |                         }, | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 ] | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2021-07-30 17:41:03 -07:00
										 |  |  |         ( | 
					
						
							|  |  |  |             MetadataChangeProposalWrapper( | 
					
						
							|  |  |  |                 entityUrn="urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)", | 
					
						
							|  |  |  |                 aspect=models.OwnershipClass( | 
					
						
							|  |  |  |                     owners=[ | 
					
						
							|  |  |  |                         models.OwnerClass( | 
					
						
							|  |  |  |                             owner="urn:li:corpuser:fbar", | 
					
						
							|  |  |  |                             type=models.OwnershipTypeClass.DATAOWNER, | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                     lastModified=models.AuditStampClass( | 
					
						
							|  |  |  |                         time=0, | 
					
						
							|  |  |  |                         actor="urn:li:corpuser:fbar", | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             "/aspects?action=ingestProposal", | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "proposal": { | 
					
						
							|  |  |  |                     "entityType": "dataset", | 
					
						
							|  |  |  |                     "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:foo,bar,PROD)", | 
					
						
							|  |  |  |                     "changeType": "UPSERT", | 
					
						
							|  |  |  |                     "aspectName": "ownership", | 
					
						
							|  |  |  |                     "aspect": { | 
					
						
							| 
									
										
										
										
											2024-02-28 16:57:26 -06:00
										 |  |  |                         "value": '{"owners": [{"owner": "urn:li:corpuser:fbar", "type": "DATAOWNER"}], "ownerTypes": {}, "lastModified": {"time": 0, "actor": "urn:li:corpuser:fbar"}}', | 
					
						
							| 
									
										
										
										
											2021-07-30 17:41:03 -07:00
										 |  |  |                         "contentType": "application/json", | 
					
						
							|  |  |  |                     }, | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |     ], | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  | def test_datahub_rest_emitter(requests_mock, record, path, snapshot): | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |     def match_request_text(request: requests.Request) -> bool: | 
					
						
							|  |  |  |         requested_snapshot = request.json() | 
					
						
							|  |  |  |         assert ( | 
					
						
							|  |  |  |             requested_snapshot == snapshot | 
					
						
							|  |  |  |         ), f"Expected snapshot to be {json.dumps(snapshot)}, got {json.dumps(requested_snapshot)}" | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     requests_mock.post( | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |         f"{MOCK_GMS_ENDPOINT}{path}", | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |         request_headers={"X-RestLi-Protocol-Version": "2.0.0"}, | 
					
						
							|  |  |  |         additional_matcher=match_request_text, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     emitter = DatahubRestEmitter(MOCK_GMS_ENDPOINT) | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |     emitter.emit(record) |