2021-11-02 12:42:53 -07:00
|
|
|
import json
|
2022-06-30 16:00:50 +05:30
|
|
|
import os
|
2022-07-14 22:04:06 +05:30
|
|
|
from typing import Any, Tuple
|
2021-11-02 12:42:53 -07:00
|
|
|
|
|
|
|
import requests
|
|
|
|
from datahub.cli import cli_utils
|
|
|
|
from datahub.ingestion.run.pipeline import Pipeline
|
2022-07-14 22:04:06 +05:30
|
|
|
from datahub.cli.docker import check_local_docker_containers
|
2021-11-02 12:42:53 -07:00
|
|
|
|
2022-06-30 16:00:50 +05:30
|
|
|
|
|
|
|
def get_gms_url():
|
|
|
|
return os.getenv("DATAHUB_GMS_URL") or "http://localhost:8080"
|
|
|
|
|
|
|
|
|
|
|
|
def get_frontend_url():
|
|
|
|
return os.getenv("DATAHUB_FRONTEND_URL") or "http://localhost:9002"
|
|
|
|
|
|
|
|
|
|
|
|
def get_kafka_broker_url():
|
|
|
|
return os.getenv("DATAHUB_KAFKA_URL") or "localhost:9092"
|
|
|
|
|
|
|
|
|
2022-07-14 22:04:06 +05:30
|
|
|
def get_kafka_schema_registry():
|
|
|
|
return os.getenv("DATAHUB_KAFKA_SCHEMA_REGISTRY_URL") or "http://localhost:8081"
|
|
|
|
|
|
|
|
|
|
|
|
def get_mysql_url():
|
|
|
|
return os.getenv("DATAHUB_MYSQL_URL") or "localhost:3306"
|
|
|
|
|
|
|
|
|
|
|
|
def get_mysql_username():
|
|
|
|
return os.getenv("DATAHUB_MYSQL_USERNAME") or "datahub"
|
|
|
|
|
|
|
|
|
|
|
|
def get_mysql_password():
|
|
|
|
return os.getenv("DATAHUB_MYSQL_PASSWORD") or "datahub"
|
|
|
|
|
|
|
|
|
|
|
|
def get_sleep_info() -> Tuple[int, int]:
|
2022-06-30 16:00:50 +05:30
|
|
|
return (
|
2022-07-14 22:04:06 +05:30
|
|
|
int(os.getenv("DATAHUB_TEST_SLEEP_BETWEEN", 60)),
|
|
|
|
int(os.getenv("DATAHUB_TEST_SLEEP_TIMES", 5)),
|
2022-06-30 16:00:50 +05:30
|
|
|
)
|
|
|
|
|
2021-11-02 12:42:53 -07:00
|
|
|
|
2022-07-14 22:04:06 +05:30
|
|
|
def is_k8s_enabled():
|
|
|
|
return os.getenv("K8S_CLUSTER_ENABLED", "false").lower() in ["true", "yes"]
|
|
|
|
|
|
|
|
|
|
|
|
def wait_for_healthcheck_util():
|
|
|
|
if is_k8s_enabled():
|
|
|
|
# Simply assert that kubernetes endpoints are healthy, but don't wait.
|
|
|
|
assert not check_k8s_endpoint(f"{get_frontend_url()}/admin")
|
|
|
|
assert not check_k8s_endpoint(f"{get_gms_url()}/health")
|
|
|
|
else:
|
|
|
|
# Simply assert that docker is healthy, but don't wait.
|
|
|
|
assert not check_local_docker_containers()
|
|
|
|
|
|
|
|
|
|
|
|
def check_k8s_endpoint(url):
|
|
|
|
try:
|
|
|
|
get = requests.get(url)
|
|
|
|
if get.status_code == 200:
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
return f"{url}: is Not reachable, status_code: {get.status_code}"
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
raise SystemExit(f"{url}: is Not reachable \nErr: {e}")
|
|
|
|
|
|
|
|
|
2022-03-15 19:05:52 +00:00
|
|
|
def ingest_file_via_rest(filename: str) -> Any:
|
2021-11-02 12:42:53 -07:00
|
|
|
pipeline = Pipeline.create(
|
|
|
|
{
|
|
|
|
"source": {
|
|
|
|
"type": "file",
|
|
|
|
"config": {"filename": filename},
|
|
|
|
},
|
|
|
|
"sink": {
|
|
|
|
"type": "datahub-rest",
|
2022-06-30 16:00:50 +05:30
|
|
|
"config": {"server": get_gms_url()},
|
2021-11-02 12:42:53 -07:00
|
|
|
},
|
|
|
|
}
|
|
|
|
)
|
|
|
|
pipeline.run()
|
|
|
|
pipeline.raise_from_status()
|
|
|
|
|
2022-03-15 19:05:52 +00:00
|
|
|
return pipeline
|
|
|
|
|
2021-11-02 12:42:53 -07:00
|
|
|
|
2021-12-16 20:06:33 -08:00
|
|
|
def delete_urns_from_file(filename: str) -> None:
|
2021-11-02 12:42:53 -07:00
|
|
|
session = requests.Session()
|
|
|
|
session.headers.update(
|
|
|
|
{
|
|
|
|
"X-RestLi-Protocol-Version": "2.0.0",
|
|
|
|
"Content-Type": "application/json",
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
with open(filename) as f:
|
|
|
|
d = json.load(f)
|
|
|
|
for entry in d:
|
2022-06-30 16:00:50 +05:30
|
|
|
is_mcp = "entityUrn" in entry
|
2021-11-02 12:42:53 -07:00
|
|
|
urn = None
|
|
|
|
# Kill Snapshot
|
|
|
|
if is_mcp:
|
2022-06-30 16:00:50 +05:30
|
|
|
urn = entry["entityUrn"]
|
2021-11-02 12:42:53 -07:00
|
|
|
else:
|
2022-06-30 16:00:50 +05:30
|
|
|
snapshot_union = entry["proposedSnapshot"]
|
|
|
|
snapshot = list(snapshot_union.values())[0]
|
|
|
|
urn = snapshot["urn"]
|
2021-11-02 12:42:53 -07:00
|
|
|
payload_obj = {"urn": urn}
|
|
|
|
|
|
|
|
cli_utils.post_delete_endpoint_with_session_and_url(
|
|
|
|
session,
|
2022-06-30 16:00:50 +05:30
|
|
|
get_gms_url() + "/entities?action=delete",
|
2021-11-02 12:42:53 -07:00
|
|
|
payload_obj,
|
|
|
|
)
|