mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-17 11:53:06 +00:00
61 lines
2.1 KiB
Python
61 lines
2.1 KiB
Python
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
|
|
|
|
# Create a graph client to query DataHub
|
|
graph = DataHubGraph(config=DatahubClientConfig(server="http://localhost:8080"))
|
|
|
|
# Query for process instances of a specific DataJob
|
|
datajob_urn = (
|
|
"urn:li:dataJob:(urn:li:dataFlow:(airflow,sales_pipeline,prod),process_sales_data)"
|
|
)
|
|
|
|
print(f"Querying process instances for DataJob: {datajob_urn}")
|
|
|
|
# Get incoming relationships of type "InstanceOf"
|
|
relationships = graph.get_related_entities(
|
|
entity_urn=datajob_urn,
|
|
relationship_types=["InstanceOf"],
|
|
direction=DataHubGraph.RelationshipDirection.INCOMING,
|
|
)
|
|
|
|
relationships_list = list(relationships)
|
|
print(f"\nFound {len(relationships_list)} process instances:")
|
|
|
|
# Fetch details for each process instance
|
|
for rel in relationships_list[:5]: # Show first 5 instances
|
|
instance_urn = rel.urn
|
|
print(f"\n Instance URN: {instance_urn}")
|
|
|
|
# Get the entity details
|
|
entity_raw = graph.get_entity_raw(
|
|
entity_urn=instance_urn, aspects=["dataProcessInstanceProperties"]
|
|
)
|
|
|
|
if entity_raw:
|
|
properties = entity_raw.get("aspects", {}).get(
|
|
"dataProcessInstanceProperties", {}
|
|
)
|
|
if properties and "value" in properties:
|
|
prop_value = properties["value"]
|
|
print(f" Name: {prop_value.get('name')}")
|
|
print(f" Type: {prop_value.get('type')}")
|
|
if prop_value.get("created"):
|
|
print(f" Created: {prop_value.get('created', {}).get('time')}")
|
|
if prop_value.get("externalUrl"):
|
|
print(f" URL: {prop_value.get('externalUrl')}")
|
|
if prop_value.get("customProperties"):
|
|
print(f" Custom Properties: {prop_value.get('customProperties')}")
|
|
|
|
# Query for all instances across the platform (with pagination)
|
|
print("\n\nQuerying all DataProcessInstance entities (first 10):")
|
|
|
|
search_results = list(
|
|
graph.get_urns_by_filter(
|
|
entity_types=["dataProcessInstance"],
|
|
query="*",
|
|
)
|
|
)
|
|
|
|
print(f"Total instances found: {len(search_results)}")
|
|
for urn in search_results[:10]:
|
|
print(f" - {urn}")
|