mirror of
https://github.com/datahub-project/datahub.git
synced 2025-07-24 18:10:11 +00:00
76 lines
1.9 KiB
Python
76 lines
1.9 KiB
Python
import json
|
|
import os
|
|
from typing import Any
|
|
|
|
import requests
|
|
from datahub.cli import cli_utils
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
|
|
|
|
|
def get_gms_url():
|
|
return os.getenv("DATAHUB_GMS_URL") or "http://localhost:8080"
|
|
|
|
|
|
def get_frontend_url():
|
|
return os.getenv("DATAHUB_FRONTEND_URL") or "http://localhost:9002"
|
|
|
|
|
|
def get_kafka_broker_url():
|
|
return os.getenv("DATAHUB_KAFKA_URL") or "localhost:9092"
|
|
|
|
|
|
def get_sleep_info():
|
|
return (
|
|
os.environ.get("DATAHUB_TEST_SLEEP_BETWEEN") or 60,
|
|
os.environ.get("DATAHUB_TEST_SLEEP_TIMES") or 5,
|
|
)
|
|
|
|
|
|
def ingest_file_via_rest(filename: str) -> Any:
|
|
pipeline = Pipeline.create(
|
|
{
|
|
"source": {
|
|
"type": "file",
|
|
"config": {"filename": filename},
|
|
},
|
|
"sink": {
|
|
"type": "datahub-rest",
|
|
"config": {"server": get_gms_url()},
|
|
},
|
|
}
|
|
)
|
|
pipeline.run()
|
|
pipeline.raise_from_status()
|
|
|
|
return pipeline
|
|
|
|
|
|
def delete_urns_from_file(filename: str) -> None:
|
|
session = requests.Session()
|
|
session.headers.update(
|
|
{
|
|
"X-RestLi-Protocol-Version": "2.0.0",
|
|
"Content-Type": "application/json",
|
|
}
|
|
)
|
|
|
|
with open(filename) as f:
|
|
d = json.load(f)
|
|
for entry in d:
|
|
is_mcp = "entityUrn" in entry
|
|
urn = None
|
|
# Kill Snapshot
|
|
if is_mcp:
|
|
urn = entry["entityUrn"]
|
|
else:
|
|
snapshot_union = entry["proposedSnapshot"]
|
|
snapshot = list(snapshot_union.values())[0]
|
|
urn = snapshot["urn"]
|
|
payload_obj = {"urn": urn}
|
|
|
|
cli_utils.post_delete_endpoint_with_session_and_url(
|
|
session,
|
|
get_gms_url() + "/entities?action=delete",
|
|
payload_obj,
|
|
)
|