datahub/metadata-ingestion/examples/library/dataflow_query_rest.py

48 lines
1.4 KiB
Python

# metadata-ingestion/examples/library/dataflow_query_rest.py
import json
import urllib.parse
import requests
# DataHub REST API endpoint
DATAHUB_GMS_URL = "http://localhost:8080"
# Create the URN for the DataFlow
flow_urn = "urn:li:dataFlow:(airflow,daily_sales_pipeline,prod)"
# URL encode the URN
encoded_urn = urllib.parse.quote(flow_urn, safe="")
# Fetch the entity
response = requests.get(f"{DATAHUB_GMS_URL}/entities/{encoded_urn}")
if response.status_code == 200:
entity_data = response.json()
print("DataFlow Entity:")
print(json.dumps(entity_data, indent=2))
# Extract specific aspects
aspects = entity_data.get("aspects", {})
if "dataFlowInfo" in aspects:
info = aspects["dataFlowInfo"]
print(f"\nFlow Name: {info.get('name')}")
print(f"Description: {info.get('description')}")
print(f"Project: {info.get('project')}")
if "ownership" in aspects:
ownership = aspects["ownership"]
print(f"\nOwners: {len(ownership.get('owners', []))}")
for owner in ownership.get("owners", []):
print(f" - {owner.get('owner')} ({owner.get('type')})")
if "globalTags" in aspects:
tags = aspects["globalTags"]
print(f"\nTags: {len(tags.get('tags', []))}")
for tag in tags.get("tags", []):
print(f" - {tag.get('tag')}")
else:
print(f"Failed to fetch entity: {response.status_code}")
print(response.text)