mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-12 01:11:41 +00:00
98 lines
3.2 KiB
Python
98 lines
3.2 KiB
Python
import json
|
|
import urllib.parse
|
|
|
|
import requests
|
|
|
|
# Configuration
|
|
gms_server = "http://localhost:8080"
|
|
primary_key_urn = "urn:li:mlPrimaryKey:(users_feature_table,user_id)"
|
|
|
|
# Encode the URN for use in URL
|
|
encoded_urn = urllib.parse.quote(primary_key_urn, safe="")
|
|
|
|
# Fetch the MLPrimaryKey entity
|
|
response = requests.get(f"{gms_server}/entities/{encoded_urn}")
|
|
|
|
if response.status_code == 200:
|
|
entity_data = response.json()
|
|
print("MLPrimaryKey Entity:")
|
|
print(json.dumps(entity_data, indent=2))
|
|
|
|
# Extract specific aspects
|
|
if "aspects" in entity_data:
|
|
aspects = entity_data["aspects"]
|
|
|
|
# Get mlPrimaryKeyProperties
|
|
if "mlPrimaryKeyProperties" in aspects:
|
|
properties = aspects["mlPrimaryKeyProperties"]["value"]
|
|
print("\nPrimary Key Properties:")
|
|
print(f" Description: {properties.get('description', 'N/A')}")
|
|
print(f" Data Type: {properties.get('dataType', 'N/A')}")
|
|
if "sources" in properties:
|
|
print(f" Sources: {properties['sources']}")
|
|
|
|
# Get ownership
|
|
if "ownership" in aspects:
|
|
ownership = aspects["ownership"]["value"]
|
|
print("\nOwnership:")
|
|
for owner in ownership.get("owners", []):
|
|
print(f" - {owner['owner']} ({owner['type']})")
|
|
|
|
# Get tags
|
|
if "globalTags" in aspects:
|
|
tags = aspects["globalTags"]["value"]
|
|
print("\nTags:")
|
|
for tag in tags.get("tags", []):
|
|
print(f" - {tag['tag']}")
|
|
|
|
# Get glossary terms
|
|
if "glossaryTerms" in aspects:
|
|
terms = aspects["glossaryTerms"]["value"]
|
|
print("\nGlossary Terms:")
|
|
for term in terms.get("terms", []):
|
|
print(f" - {term['urn']}")
|
|
else:
|
|
print(f"Failed to fetch entity. Status code: {response.status_code}")
|
|
print(f"Response: {response.text}")
|
|
|
|
# Find feature tables that use this primary key
|
|
# Query for entities with a KeyedBy relationship to this primary key
|
|
relationships_response = requests.get(
|
|
f"{gms_server}/relationships",
|
|
params={
|
|
"direction": "INCOMING",
|
|
"urn": primary_key_urn,
|
|
"types": "KeyedBy",
|
|
},
|
|
)
|
|
|
|
if relationships_response.status_code == 200:
|
|
relationships_data = relationships_response.json()
|
|
print("\n\nFeature Tables using this Primary Key:")
|
|
for relationship in relationships_data.get("relationships", []):
|
|
print(f" - {relationship['entity']}")
|
|
else:
|
|
print(
|
|
f"\nFailed to fetch relationships. Status code: {relationships_response.status_code}"
|
|
)
|
|
|
|
# Find upstream datasets that this primary key is derived from
|
|
upstream_response = requests.get(
|
|
f"{gms_server}/relationships",
|
|
params={
|
|
"direction": "OUTGOING",
|
|
"urn": primary_key_urn,
|
|
"types": "DerivedFrom",
|
|
},
|
|
)
|
|
|
|
if upstream_response.status_code == 200:
|
|
upstream_data = upstream_response.json()
|
|
print("\nUpstream Datasets (Sources):")
|
|
for relationship in upstream_data.get("relationships", []):
|
|
print(f" - {relationship['entity']}")
|
|
else:
|
|
print(
|
|
f"\nFailed to fetch upstream lineage. Status code: {upstream_response.status_code}"
|
|
)
|