datahub/smoke-test/tests/utils.py

import json
import os
from datetime import datetime, timedelta
import subprocess
import time
from typing import Any, Dict, List, Tuple
from time import sleep
from joblib import Parallel, delayed

import requests_wrapper as requests
import logging
from datahub.cli import cli_utils
from datahub.cli.cli_utils import get_system_auth
from datahub.ingestion.run.pipeline import Pipeline

TIME: int = 1581407189000
logger = logging.getLogger(__name__)

def get_frontend_session():
    session = requests.Session()

    headers = {
        "Content-Type": "application/json",
    }
    system_auth = get_system_auth()
    if system_auth is not None:
        session.headers.update({"Authorization": system_auth})
    else:
        username, password = get_admin_credentials()
        data = '{"username":"' + username + '", "password":"' + password + '"}'
        response = session.post(
            f"{get_frontend_url()}/logIn", headers=headers, data=data
        )
        response.raise_for_status()

    return session


def get_admin_username() -> str:
    return get_admin_credentials()[0]


def get_admin_credentials():
    return (
        os.getenv("ADMIN_USERNAME", "datahub"),
        os.getenv("ADMIN_PASSWORD", "datahub"),
    )


def get_root_urn():
    return "urn:li:corpuser:datahub"


def get_gms_url():
    return os.getenv("DATAHUB_GMS_URL") or "http://localhost:8080"


def get_frontend_url():
    return os.getenv("DATAHUB_FRONTEND_URL") or "http://localhost:9002"


def get_kafka_broker_url():
    return os.getenv("DATAHUB_KAFKA_URL") or "localhost:9092"


def get_kafka_schema_registry():
    #  internal registry "http://localhost:8080/schema-registry/api/"
    return os.getenv("DATAHUB_KAFKA_SCHEMA_REGISTRY_URL") or "http://localhost:8081"


def get_mysql_url():
    return os.getenv("DATAHUB_MYSQL_URL") or "localhost:3306"


def get_mysql_username():
    return os.getenv("DATAHUB_MYSQL_USERNAME") or "datahub"


def get_mysql_password():
    return os.getenv("DATAHUB_MYSQL_PASSWORD") or "datahub"


def get_sleep_info() -> Tuple[int, int]:
    return (
        int(os.getenv("DATAHUB_TEST_SLEEP_BETWEEN", 20)),
        int(os.getenv("DATAHUB_TEST_SLEEP_TIMES", 3)),
    )


def is_k8s_enabled():
    return os.getenv("K8S_CLUSTER_ENABLED", "false").lower() in ["true", "yes"]


def wait_for_healthcheck_util():
    assert not check_endpoint(f"{get_frontend_url()}/admin")
    assert not check_endpoint(f"{get_gms_url()}/health")


def check_endpoint(url):
    try:
        get = requests.get(url)
        if get.status_code == 200:
            return
        else:
            return f"{url}: is Not reachable, status_code: {get.status_code}"
    except requests.exceptions.RequestException as e:
        raise SystemExit(f"{url}: is Not reachable \nErr: {e}")


def ingest_file_via_rest(filename: str) -> Pipeline:
    pipeline = Pipeline.create(
        {
            "source": {
                "type": "file",
                "config": {"filename": filename},
            },
            "sink": {
                "type": "datahub-rest",
                "config": {"server": get_gms_url()},
            },
        }
    )
    pipeline.run()
    pipeline.raise_from_status()
    wait_for_writes_to_sync()
    return pipeline


def delete_urn(urn: str) -> None:
    payload_obj = {"urn": urn}

    cli_utils.post_delete_endpoint_with_session_and_url(
        requests.Session(),
        get_gms_url() + "/entities?action=delete",
        payload_obj,
    )


def delete_urns(urns: List[str]) -> None:
    for urn in urns:
        delete_urn(urn)


def delete_urns_from_file(filename: str, shared_data: bool = False) -> None:
    if not cli_utils.get_boolean_env_variable("CLEANUP_DATA", True):
        print("Not cleaning data to save time")
        return
    session = requests.Session()
    session.headers.update(
        {
            "X-RestLi-Protocol-Version": "2.0.0",
            "Content-Type": "application/json",
        }
    )

    def delete(entry):
        is_mcp = "entityUrn" in entry
        urn = None
        # Kill Snapshot
        if is_mcp:
            urn = entry["entityUrn"]
        else:
            snapshot_union = entry["proposedSnapshot"]
            snapshot = list(snapshot_union.values())[0]
            urn = snapshot["urn"]
        delete_urn(urn)

    with open(filename) as f:
        d = json.load(f)
        Parallel(n_jobs=10)(delayed(delete)(entry) for entry in d)

    # Deletes require 60 seconds when run between tests operating on common data, otherwise standard sync wait
    if shared_data:
        wait_for_writes_to_sync()
#        sleep(60)
    else:
        wait_for_writes_to_sync()
#        sleep(requests.ELASTICSEARCH_REFRESH_INTERVAL_SECONDS)


# Fixed now value
NOW: datetime = datetime.now()

def get_timestampmillis_at_start_of_day(relative_day_num: int) -> int:
    """
    Returns the time in milliseconds from epoch at the start of the day
    corresponding to `now + relative_day_num`

    """
    time: datetime = NOW + timedelta(days=float(relative_day_num))
    time = datetime(
        year=time.year,
        month=time.month,
        day=time.day,
        hour=0,
        minute=0,
        second=0,
        microsecond=0,
    )
    return int(time.timestamp() * 1000)


def get_strftime_from_timestamp_millis(ts_millis: int) -> str:
    return datetime.fromtimestamp(ts_millis / 1000).strftime("%Y-%m-%d %H:%M:%S")


def create_datahub_step_state_aspect(
    username: str, onboarding_id: str
) -> Dict[str, Any]:
    entity_urn = f"urn:li:dataHubStepState:urn:li:corpuser:{username}-{onboarding_id}"
    print(f"Creating dataHubStepState aspect for {entity_urn}")
    return {
        "auditHeader": None,
        "entityType": "dataHubStepState",
        "entityUrn": entity_urn,
        "changeType": "UPSERT",
        "aspectName": "dataHubStepStateProperties",
        "aspect": {
            "value": f'{{"properties":{{}},"lastModified":{{"actor":"urn:li:corpuser:{username}","time":{TIME}}}}}',
            "contentType": "application/json",
        },
        "systemMetadata": None,
    }


def create_datahub_step_state_aspects(
    username: str, onboarding_ids: str, onboarding_filename
) -> None:
    """
    For a specific user, creates dataHubStepState aspects for each onboarding id in the list
    """
    aspects_dict: List[Dict[str, Any]] = [
        create_datahub_step_state_aspect(username, onboarding_id)
        for onboarding_id in onboarding_ids
    ]
    with open(onboarding_filename, "w") as f:
        json.dump(aspects_dict, f, indent=2)


def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:
    start_time = time.time()
    # get offsets
    lag_zero = False
    while not lag_zero and (time.time() - start_time) < max_timeout_in_sec:
        time.sleep(1) # micro-sleep
        completed_process = subprocess.run(
            "docker exec broker /bin/kafka-consumer-groups --bootstrap-server broker:29092 --group generic-mae-consumer-job-client --describe | grep -v LAG | awk '{print $6}'",
            capture_output=True,
            shell=True,
            text=True)
        
        result = str(completed_process.stdout)
        lines = result.splitlines()
        lag_values = [int(l) for l in lines if l != ""]
        maximum_lag = max(lag_values)
        if maximum_lag == 0:
            lag_zero = True
    
    if not lag_zero:
        logger.warning(f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}")
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`import json`
fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30			`import os`
feat(platform): timeseries - Server & Client side changes to support timeseries aspect deletion & rollback. (#4756) 2022-09-11 11:27:46 -07:00			`from datetime import datetime, timedelta`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`import subprocess`
			`import time`
feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00			`from typing import Any, Dict, List, Tuple`
feat(elasticsearch): Updates to elasticsearch configuration, dao, and tests (#6269) 2022-11-15 20:03:11 -06:00			`from time import sleep`
test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00			`from joblib import Parallel, delayed`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00
feat(elasticsearch): Updates to elasticsearch configuration, dao, and tests (#6269) 2022-11-15 20:03:11 -06:00			`import requests_wrapper as requests`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`import logging`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`from datahub.cli import cli_utils`
feat(tests): allow use of system auth for test session (#7445) 2023-02-27 22:41:06 +05:30			`from datahub.cli.cli_utils import get_system_auth`
feat(platform): timeseries - Server & Client side changes to support timeseries aspect deletion & rollback. (#4756) 2022-09-11 11:27:46 -07:00			`from datahub.ingestion.run.pipeline import Pipeline`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00
feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00			`TIME: int = 1581407189000`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`logger = logging.getLogger(__name__)`
fix(ci): add env variable for creds smoke test (#5609) 2022-08-10 17:04:28 +05:30
			`def get_frontend_session():`
			`session = requests.Session()`

			`headers = {`
			`"Content-Type": "application/json",`
			`}`
feat(tests): allow use of system auth for test session (#7445) 2023-02-27 22:41:06 +05:30			`system_auth = get_system_auth()`
			`if system_auth is not None:`
			`session.headers.update({"Authorization": system_auth})`
			`else:`
			`username, password = get_admin_credentials()`
			`data = '{"username":"' + username + '", "password":"' + password + '"}'`
			`response = session.post(`
			`f"{get_frontend_url()}/logIn", headers=headers, data=data`
			`)`
			`response.raise_for_status()`
fix(ci): add env variable for creds smoke test (#5609) 2022-08-10 17:04:28 +05:30
			`return session`


feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00			`def get_admin_username() -> str:`
refactor(smoke): use env variables (#6866) 2022-12-29 17:16:43 +05:30			`return get_admin_credentials()[0]`
feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00

fix(tests): Removes duplicate policies tests & makes DataHub user configurable (#5365) 2022-07-15 14:44:19 +01:00			`def get_admin_credentials():`
feat(platform): timeseries - Server & Client side changes to support timeseries aspect deletion & rollback. (#4756) 2022-09-11 11:27:46 -07:00			`return (`
			`os.getenv("ADMIN_USERNAME", "datahub"),`
			`os.getenv("ADMIN_PASSWORD", "datahub"),`
			`)`
fix(ci): add env variable for creds smoke test (#5609) 2022-08-10 17:04:28 +05:30
fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30
refactor(tests): extract common code (#7441) 2023-02-27 19:06:16 +05:30			`def get_root_urn():`
			`return "urn:li:corpuser:datahub"`


fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30			`def get_gms_url():`
			`return os.getenv("DATAHUB_GMS_URL") or "http://localhost:8080"`


			`def get_frontend_url():`
			`return os.getenv("DATAHUB_FRONTEND_URL") or "http://localhost:9002"`


			`def get_kafka_broker_url():`
			`return os.getenv("DATAHUB_KAFKA_URL") or "localhost:9092"`


Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2022-07-14 22:04:06 +05:30			`def get_kafka_schema_registry():`
feat(schema-registry): replace confluent schema registry (#7930) Co-authored-by: Pedro Silva <pedro@acryl.io> Co-authored-by: Shirshanka Das <shirshanka@apache.org> Co-authored-by: Ryan Holstien <ryan@acryl.io> 2023-05-01 13:18:41 -05:00			`# internal registry "http://localhost:8080/schema-registry/api/"`
Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2022-07-14 22:04:06 +05:30			`return os.getenv("DATAHUB_KAFKA_SCHEMA_REGISTRY_URL") or "http://localhost:8081"`


			`def get_mysql_url():`
			`return os.getenv("DATAHUB_MYSQL_URL") or "localhost:3306"`


			`def get_mysql_username():`
			`return os.getenv("DATAHUB_MYSQL_USERNAME") or "datahub"`


			`def get_mysql_password():`
			`return os.getenv("DATAHUB_MYSQL_PASSWORD") or "datahub"`


			`def get_sleep_info() -> Tuple[int, int]:`
fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30			`return (`
fix(ci): smoke test less flaky, add src, dev dep in smoke image (#5594) 2022-08-09 19:07:12 +05:30			`int(os.getenv("DATAHUB_TEST_SLEEP_BETWEEN", 20)),`
feat(elasticsearch): Updates to elasticsearch configuration, dao, and tests (#6269) 2022-11-15 20:03:11 -06:00			`int(os.getenv("DATAHUB_TEST_SLEEP_TIMES", 3)),`
fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30			`)`

tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00
Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2022-07-14 22:04:06 +05:30			`def is_k8s_enabled():`
			`return os.getenv("K8S_CLUSTER_ENABLED", "false").lower() in ["true", "yes"]`


			`def wait_for_healthcheck_util():`
fix(tests): Misc updates for tests, auth log level, and quickstart (#6491) 2022-11-29 10:44:55 -06:00			`assert not check_endpoint(f"{get_frontend_url()}/admin")`
			`assert not check_endpoint(f"{get_gms_url()}/health")`
Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2022-07-14 22:04:06 +05:30

feat(test): add read-only smoke tests (#5558) 2022-08-06 05:05:23 +05:30			`def check_endpoint(url):`
Add Changes to support smoke test on Datahub deployed on kubernetes Cluster (#5334) Co-authored-by: Aseem Bansal <asmbansal2@gmail.com> 2022-07-14 22:04:06 +05:30			`try:`
			`get = requests.get(url)`
			`if get.status_code == 200:`
			`return`
			`else:`
			`return f"{url}: is Not reachable, status_code: {get.status_code}"`
			`except requests.exceptions.RequestException as e:`
			`raise SystemExit(f"{url}: is Not reachable \nErr: {e}")`


feat(platform): timeseries - Server & Client side changes to support timeseries aspect deletion & rollback. (#4756) 2022-09-11 11:27:46 -07:00			`def ingest_file_via_rest(filename: str) -> Pipeline:`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`pipeline = Pipeline.create(`
			`{`
			`"source": {`
			`"type": "file",`
			`"config": {"filename": filename},`
			`},`
			`"sink": {`
			`"type": "datahub-rest",`
fix(test): add cleanup in tests, make urls configurable (#5287) 2022-06-30 16:00:50 +05:30			`"config": {"server": get_gms_url()},`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`},`
			`}`
			`)`
			`pipeline.run()`
			`pipeline.raise_from_status()`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`wait_for_writes_to_sync()`
feat(cli) Changes rollback behaviour to apply soft deletes by default (#4358) * Changes rollback behaviour to apply soft deletes by default Summary: Addresses feature request: Flag in delete command to only delete aspects touched by an ingestion run; add flag to nuke everything by modifying the default behaviour of a rollback operation which will not by default delete an entity if a keyAspect is being rolled-back. Instead the key aspect is kept and a StatusAspect is upserted with removed=true, effectively making a soft delete. Another PR will follow to perform garbage collection on these soft deleted entities. To keep old behaviour, a new parameter to the cli ingest rollback endpoint: --hard-delete was added. * Adds restli specs * Fixes deleteAspect endpoint & adds support for nested transactions * Enable regression test & fix docker-compose for local development * Add generated quickstart * Fix quickstart generation script * Adds missing var env to docker-compose-without-neo4j * Sets status removed=true when ingesting resources * Adds soft deletes for ElasticSearch + soft delete flags across ingestion sub-commands * Makes elastic search consistent * Update tests with new behaviour * apply review comments * apply review comment * Forces Elastic search to add documents with status removed false when ingesting * Reset gradle properties to default * Fix tests 2022-03-15 19:05:52 +00:00			`return pipeline`

tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00
tests(cypress): add improved Cypress tests for timeline lineage (#7464) 2023-03-02 15:02:06 -08:00			`def delete_urn(urn: str) -> None:`
			`payload_obj = {"urn": urn}`

			`cli_utils.post_delete_endpoint_with_session_and_url(`
			`requests.Session(),`
			`get_gms_url() + "/entities?action=delete",`
			`payload_obj,`
			`)`


			`def delete_urns(urns: List[str]) -> None:`
			`for urn in urns:`
			`delete_urn(urn)`


test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00			`def delete_urns_from_file(filename: str, shared_data: bool = False) -> None:`
fix(ci): reduce smoke test run time (#6841) 2022-12-27 00:08:01 +05:30			`if not cli_utils.get_boolean_env_variable("CLEANUP_DATA", True):`
			`print("Not cleaning data to save time")`
			`return`
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`session = requests.Session()`
			`session.headers.update(`
			`{`
			`"X-RestLi-Protocol-Version": "2.0.0",`
			`"Content-Type": "application/json",`
			`}`
			`)`

test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00			`def delete(entry):`
			`is_mcp = "entityUrn" in entry`
			`urn = None`
			`# Kill Snapshot`
			`if is_mcp:`
			`urn = entry["entityUrn"]`
			`else:`
			`snapshot_union = entry["proposedSnapshot"]`
			`snapshot = list(snapshot_union.values())[0]`
			`urn = snapshot["urn"]`
tests(cypress): add improved Cypress tests for timeline lineage (#7464) 2023-03-02 15:02:06 -08:00			`delete_urn(urn)`
test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00
tests(smoke): introducing first isolated smoke test: updating tags & terms (#3496) 2021-11-02 12:42:53 -07:00			`with open(filename) as f:`
			`d = json.load(f)`
test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00			`Parallel(n_jobs=10)(delayed(delete)(entry) for entry in d)`

			`# Deletes require 60 seconds when run between tests operating on common data, otherwise standard sync wait`
			`if shared_data:`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`wait_for_writes_to_sync()`
			`# sleep(60)`
test(misc): misc test updates (#6890) 2022-12-29 11:26:42 -06:00			`else:`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00			`wait_for_writes_to_sync()`
			`# sleep(requests.ELASTICSEARCH_REFRESH_INTERVAL_SECONDS)`
feat(platform): timeseries - Server & Client side changes to support timeseries aspect deletion & rollback. (#4756) 2022-09-11 11:27:46 -07:00

			`# Fixed now value`
			`NOW: datetime = datetime.now()`

			`def get_timestampmillis_at_start_of_day(relative_day_num: int) -> int:`
			`"""`
			`Returns the time in milliseconds from epoch at the start of the day`
			corresponding to `now + relative_day_num`

			`"""`
			`time: datetime = NOW + timedelta(days=float(relative_day_num))`
			`time = datetime(`
			`year=time.year,`
			`month=time.month,`
			`day=time.day,`
			`hour=0,`
			`minute=0,`
			`second=0,`
			`microsecond=0,`
			`)`
			`return int(time.timestamp() * 1000)`


			`def get_strftime_from_timestamp_millis(ts_millis: int) -> str:`
			`return datetime.fromtimestamp(ts_millis / 1000).strftime("%Y-%m-%d %H:%M:%S")`
feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00

			`def create_datahub_step_state_aspect(`
			`username: str, onboarding_id: str`
			`) -> Dict[str, Any]:`
			`entity_urn = f"urn:li:dataHubStepState:urn:li:corpuser:{username}-{onboarding_id}"`
			`print(f"Creating dataHubStepState aspect for {entity_urn}")`
			`return {`
			`"auditHeader": None,`
			`"entityType": "dataHubStepState",`
			`"entityUrn": entity_urn,`
			`"changeType": "UPSERT",`
			`"aspectName": "dataHubStepStateProperties",`
			`"aspect": {`
			`"value": f'{{"properties":{{}},"lastModified":{{"actor":"urn:li:corpuser:{username}","time":{TIME}}}}}',`
			`"contentType": "application/json",`
			`},`
			`"systemMetadata": None,`
			`}`


			`def create_datahub_step_state_aspects(`
			`username: str, onboarding_ids: str, onboarding_filename`
			`) -> None:`
			`"""`
			`For a specific user, creates dataHubStepState aspects for each onboarding id in the list`
			`"""`
fix(ci): reduce smoke test run time (#6841) 2022-12-27 00:08:01 +05:30			`aspects_dict: List[Dict[str, Any]] = [`
feat(onboarding): adds framework and some steps for onboarding steps UI (#6462) * feat(onboarding): adds models and API for onboarding steps feature * feat(onboarding): adds backend for onboarding steps feature * feat(onboarding): adds framework and some steps for onboarding steps UI 2022-12-07 16:21:55 -08:00			`create_datahub_step_state_aspect(username, onboarding_id)`
			`for onboarding_id in onboarding_ids`
			`]`
			`with open(onboarding_filename, "w") as f:`
			`json.dump(aspects_dict, f, indent=2)`
feat: Add support for Data Products (#8039) Co-authored-by: Chris Collins <chriscollins3456@gmail.com> 2023-05-17 00:17:25 -07:00

			`def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None:`
			`start_time = time.time()`
			`# get offsets`
			`lag_zero = False`
			`while not lag_zero and (time.time() - start_time) < max_timeout_in_sec:`
			`time.sleep(1) # micro-sleep`
			`completed_process = subprocess.run(`
			`"docker exec broker /bin/kafka-consumer-groups --bootstrap-server broker:29092 --group generic-mae-consumer-job-client --describe \| grep -v LAG \| awk '{print $6}'",`
			`capture_output=True,`
			`shell=True,`
			`text=True)`

			`result = str(completed_process.stdout)`
			`lines = result.splitlines()`
			`lag_values = [int(l) for l in lines if l != ""]`
			`maximum_lag = max(lag_values)`
			`if maximum_lag == 0:`
			`lag_zero = True`

			`if not lag_zero:`
			`logger.warning(f"Exiting early from waiting for elastic to catch up due to a timeout. Current lag is {lag_values}")`