mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-08 07:23:34 +00:00
* Changes rollback behaviour to apply soft deletes by default Summary: Addresses feature request: Flag in delete command to only delete aspects touched by an ingestion run; add flag to nuke everything by modifying the default behaviour of a rollback operation which will not by default delete an entity if a keyAspect is being rolled-back. Instead the key aspect is kept and a StatusAspect is upserted with removed=true, effectively making a soft delete. Another PR will follow to perform garbage collection on these soft deleted entities. To keep old behaviour, a new parameter to the cli ingest rollback endpoint: --hard-delete was added. * Adds restli specs * Fixes deleteAspect endpoint & adds support for nested transactions * Enable regression test & fix docker-compose for local development * Add generated quickstart * Fix quickstart generation script * Adds missing var env to docker-compose-without-neo4j * Sets status removed=true when ingesting resources * Adds soft deletes for ElasticSearch + soft delete flags across ingestion sub-commands * Makes elastic search consistent * Update tests with new behaviour * apply review comments * apply review comment * Forces Elastic search to add documents with status removed false when ingesting * Reset gradle properties to default * Fix tests
66 lines
2.7 KiB
Python
66 lines
2.7 KiB
Python
import json
|
|
import pytest
|
|
from time import sleep
|
|
from datahub.cli import delete_cli, ingest_cli
|
|
from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity
|
|
from datahub.cli.ingest_cli import get_session_and_host
|
|
from datahub.cli.delete_cli import guess_entity_type, delete_one_urn_cmd
|
|
from tests.utils import ingest_file_via_rest, delete_urns_from_file
|
|
|
|
def delete_by_urn(urn, session, host):
|
|
entity_type = guess_entity_type(urn=urn)
|
|
delete_one_urn_cmd(
|
|
urn,
|
|
soft=False,
|
|
dry_run=False,
|
|
entity_type=entity_type,
|
|
cached_session_host=(session, host),
|
|
)
|
|
sleep(5)
|
|
print()
|
|
|
|
@pytest.mark.dependency()
|
|
def test_rollback_editable():
|
|
platform = "urn:li:dataPlatform:kafka"
|
|
dataset_name = (
|
|
"test-rollback"
|
|
)
|
|
env = "PROD"
|
|
dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"
|
|
|
|
session, gms_host = get_session_and_host()
|
|
|
|
# Clean slate.
|
|
delete_by_urn(dataset_urn, session, gms_host)
|
|
|
|
assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
|
|
assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
|
|
|
|
# Ingest dataset
|
|
ingested_dataset_run_id = ingest_file_via_rest("tests/cli/cli_test_data.json").config.run_id
|
|
print("Ingested dataset id:", ingested_dataset_run_id)
|
|
# Assert that second data ingestion worked
|
|
assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
|
|
|
|
# Sleep forces ingestion of files to have distinct run-ids.
|
|
sleep(1)
|
|
|
|
# Make editable change
|
|
ingested_editable_run_id = ingest_file_via_rest("tests/cli/cli_editable_test_data.json").config.run_id
|
|
print("ingested editable id:", ingested_editable_run_id)
|
|
# Assert that second data ingestion worked
|
|
assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
|
|
|
|
# rollback ingestion 1
|
|
rollback_url = f"{gms_host}/runs?action=rollback"
|
|
|
|
session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}))
|
|
|
|
# Allow async MCP processor to handle ingestions & rollbacks
|
|
sleep(10)
|
|
|
|
# EditableDatasetProperties should still be part of the entity that was soft deleted.
|
|
assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
|
|
# But first ingestion aspects should not be present
|
|
assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
|
|
pass |