feat(run): Create a describe run endpoint for fetching aspects created by the ingestion run (#4964)

This commit is contained in:
Dexter Lee 2022-05-24 14:46:36 -07:00 committed by GitHub
parent bbd0ab823d
commit 669160a677
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 325 additions and 163 deletions

View File

@ -242,6 +242,21 @@ def parse_run_restli_response(response: requests.Response) -> dict:
return summary
def format_aspect_summaries(summaries: list) -> typing.List[typing.List[str]]:
local_timezone = datetime.now().astimezone().tzinfo
return [
[
row.get("urn"),
row.get("aspectName"),
datetime.fromtimestamp(row.get("timestamp") / 1000).strftime(
"%Y-%m-%d %H:%M:%S"
)
+ f" ({local_timezone})",
]
for row in summaries
]
def post_rollback_endpoint(
payload_obj: dict,
path: str,
@ -266,19 +281,7 @@ def post_rollback_endpoint(
if len(rows) == 0:
click.secho(f"No entities found. Payload used: {payload}", fg="yellow")
local_timezone = datetime.now().astimezone().tzinfo
structured_rolled_back_results = [
[
row.get("urn"),
row.get("aspectName"),
datetime.fromtimestamp(row.get("timestamp") / 1000).strftime(
"%Y-%m-%d %H:%M:%S"
)
+ f" ({local_timezone})",
]
for row in rolled_back_aspects
]
structured_rolled_back_results = format_aspect_summaries(rolled_back_aspects)
return (
structured_rolled_back_results,
entities_affected,

View File

@ -15,6 +15,7 @@ import datahub as datahub_package
from datahub.cli import cli_utils
from datahub.cli.cli_utils import (
CONDENSED_DATAHUB_CONFIG_PATH,
format_aspect_summaries,
get_session_and_host,
post_rollback_endpoint,
)
@ -203,36 +204,44 @@ def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> N
@ingest.command()
@click.option("--run-id", required=True, type=str)
@click.option("--start", type=int, default=0)
@click.option("--count", type=int, default=100)
@click.option(
"--include-soft-deletes",
is_flag=True,
default=False,
help="If enabled, will include aspects that have been soft deleted",
)
@click.option("-a", "--show-aspect", required=False, is_flag=True)
@telemetry.with_telemetry
def show(run_id: str) -> None:
def show(
run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool
) -> None:
"""Describe a provided ingestion run to datahub"""
session, gms_host = get_session_and_host()
payload_obj = {"runId": run_id, "dryRun": True, "hardDelete": True}
(
structured_rows,
entities_affected,
aspects_modified,
aspects_affected,
unsafe_entity_count,
unsafe_entities,
) = post_rollback_endpoint(payload_obj, "/runs?action=rollback")
url = f"{gms_host}/runs?action=describe"
if aspects_modified >= ELASTIC_MAX_PAGE_SIZE:
payload_obj = {
"runId": run_id,
"start": start,
"count": count,
"includeSoft": include_soft_deletes,
"includeAspect": show_aspect,
}
payload = json.dumps(payload_obj)
response = session.post(url, data=payload)
rows = parse_restli_response(response)
if not show_aspect:
click.echo(
f"this run created at least {entities_affected} new entities and updated at least {aspects_modified} aspects"
tabulate(format_aspect_summaries(rows), RUN_TABLE_COLUMNS, tablefmt="grid")
)
else:
click.echo(
f"this run created {entities_affected} new entities and updated {aspects_modified} aspects"
)
click.echo(
"rolling back will delete the entities created and revert the updated aspects"
)
click.echo()
click.echo(
f"showing first {len(structured_rows)} of {aspects_modified} aspects touched by this run"
)
click.echo(tabulate(structured_rows, RUN_TABLE_COLUMNS, tablefmt="grid"))
for row in rows:
click.echo(json.dumps(row, indent=4))
@ingest.command()

View File

@ -90,10 +90,7 @@ public class ESSystemMetadataDAO {
return null;
}
public BulkByScrollResponse deleteByUrnAspect(
@Nonnull final String urn,
@Nonnull final String aspect
) {
public BulkByScrollResponse deleteByUrnAspect(@Nonnull final String urn, @Nonnull final String aspect) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery();
finalQuery.must(QueryBuilders.termQuery("urn", urn));
finalQuery.must(QueryBuilders.termQuery("aspect", aspect));
@ -114,7 +111,7 @@ public class ESSystemMetadataDAO {
return null;
}
public SearchResponse findByParams(Map<String, String> searchParams, boolean includeSoftDeleted) {
public SearchResponse findByParams(Map<String, String> searchParams, boolean includeSoftDeleted, int from, int size) {
SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
@ -131,8 +128,8 @@ public class ESSystemMetadataDAO {
searchSourceBuilder.query(finalQuery);
// this is the max page size elastic will return
searchSourceBuilder.size(10000);
searchSourceBuilder.from(from);
searchSourceBuilder.size(size);
searchRequest.source(searchSourceBuilder);
@ -147,15 +144,16 @@ public class ESSystemMetadataDAO {
return null;
}
public SearchResponse findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted) {
public SearchResponse findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted,
int from, int size) {
Map<String, String> params = new HashMap<>();
params.put("registryName", registryName);
params.put("registryVersion", registryVersion);
return findByParams(params, includeSoftDeleted);
return findByParams(params, includeSoftDeleted, from, size);
}
public SearchResponse findByRunId(String runId, boolean includeSoftDeleted) {
return findByParams(Collections.singletonMap("runId", runId), includeSoftDeleted);
public SearchResponse findByRunId(String runId, boolean includeSoftDeleted, int from, int size) {
return findByParams(Collections.singletonMap("runId", runId), includeSoftDeleted, from, size);
}
public SearchResponse findRuns(Integer pageOffset, Integer pageSize) {

View File

@ -6,6 +6,7 @@ import com.google.common.collect.ImmutableMap;
import com.linkedin.metadata.run.AspectRowSummary;
import com.linkedin.metadata.run.IngestionRunSummary;
import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.mxe.SystemMetadata;
import java.io.IOException;
@ -100,7 +101,8 @@ public class ElasticSearchSystemMetadataService implements SystemMetadataService
// searchBy findByParams
// If status.removed -> false (from removed to not removed) --> get soft deleted entities.
// If status.removed -> true (from not removed to removed) --> do not get soft deleted entities.
final List<AspectRowSummary> aspectList = findByParams(ImmutableMap.of("urn", urn), !removed);
final List<AspectRowSummary> aspectList =
findByParams(ImmutableMap.of("urn", urn), !removed, 0, ESUtils.MAX_RESULT_SIZE);
// for each -> toDocId and set removed to true for all
aspectList.forEach(aspect -> {
final String docId = toDocId(aspect.getUrn(), aspect.getAspectName());
@ -123,18 +125,19 @@ public class ElasticSearchSystemMetadataService implements SystemMetadataService
}
@Override
public List<AspectRowSummary> findByRunId(String runId, boolean includeSoftDeleted) {
return findByParams(Collections.singletonMap(FIELD_RUNID, runId), includeSoftDeleted);
public List<AspectRowSummary> findByRunId(String runId, boolean includeSoftDeleted, int from, int size) {
return findByParams(Collections.singletonMap(FIELD_RUNID, runId), includeSoftDeleted, from, size);
}
@Override
public List<AspectRowSummary> findByUrn(String urn, boolean includeSoftDeleted) {
return findByParams(Collections.singletonMap(FIELD_URN, urn), includeSoftDeleted);
public List<AspectRowSummary> findByUrn(String urn, boolean includeSoftDeleted, int from, int size) {
return findByParams(Collections.singletonMap(FIELD_URN, urn), includeSoftDeleted, from, size);
}
@Override
public List<AspectRowSummary> findByParams(Map<String, String> systemMetaParams, boolean includeSoftDeleted) {
SearchResponse searchResponse = _esDAO.findByParams(systemMetaParams, includeSoftDeleted);
public List<AspectRowSummary> findByParams(Map<String, String> systemMetaParams, boolean includeSoftDeleted, int from,
int size) {
SearchResponse searchResponse = _esDAO.findByParams(systemMetaParams, includeSoftDeleted, from, size);
if (searchResponse != null) {
SearchHits hits = searchResponse.getHits();
List<AspectRowSummary> summaries = Arrays.stream(hits.getHits()).map(hit -> {
@ -159,11 +162,12 @@ public class ElasticSearchSystemMetadataService implements SystemMetadataService
}
@Override
public List<AspectRowSummary> findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted) {
public List<AspectRowSummary> findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted,
int from, int size) {
Map<String, String> registryParams = new HashMap<>();
registryParams.put(FIELD_REGISTRY_NAME, registryName);
registryParams.put(FIELD_REGISTRY_VERSION, registryVersion);
return findByParams(registryParams, includeSoftDeleted);
return findByParams(registryParams, includeSoftDeleted, from, size);
}
@Override

View File

@ -23,13 +23,13 @@ public interface SystemMetadataService {
void insert(@Nullable SystemMetadata systemMetadata, String urn, String aspect);
List<AspectRowSummary> findByRunId(String runId, boolean includeSoftDeleted);
List<AspectRowSummary> findByRunId(String runId, boolean includeSoftDeleted, int from, int size);
List<AspectRowSummary> findByUrn(String urn, boolean includeSoftDeleted);
List<AspectRowSummary> findByUrn(String urn, boolean includeSoftDeleted, int from, int size);
List<AspectRowSummary> findByParams(Map<String, String> systemMetaParams, boolean includeSoftDeleted);
List<AspectRowSummary> findByParams(Map<String, String> systemMetaParams, boolean includeSoftDeleted, int from, int size);
List<AspectRowSummary> findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted);
List<AspectRowSummary> findByRegistry(String registryName, String registryVersion, boolean includeSoftDeleted, int from, int size);
List<IngestionRunSummary> listRuns(Integer pageOffset, Integer pageSize, boolean includeSoftDeleted);

View File

@ -4,6 +4,7 @@ import com.linkedin.metadata.ElasticTestUtils;
import com.linkedin.metadata.run.AspectRowSummary;
import com.linkedin.metadata.run.IngestionRunSummary;
import com.linkedin.metadata.search.elasticsearch.ElasticSearchServiceTest;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
import com.linkedin.mxe.SystemMetadata;
@ -137,7 +138,7 @@ public class ElasticSearchSystemMetadataServiceTest {
syncAfterWrite(_searchClient, _indexName);
List<AspectRowSummary> rows = _client.findByRunId("abc-456", false);
List<AspectRowSummary> rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE);
assertEquals(rows.size(), 4);
rows.forEach(row -> assertEquals(row.getRunId(), "abc-456"));
@ -169,7 +170,7 @@ public class ElasticSearchSystemMetadataServiceTest {
syncAfterWrite(_searchClient, _indexName);
List<AspectRowSummary> rows = _client.findByRunId("abc-456", false);
List<AspectRowSummary> rows = _client.findByRunId("abc-456", false, 0, ESUtils.MAX_RESULT_SIZE);
assertEquals(rows.size(), 2);
rows.forEach(row -> assertEquals(row.getRunId(), "abc-456"));

View File

@ -1,5 +1,7 @@
namespace com.linkedin.metadata.run
import com.linkedin.entity.Aspect
record AspectRowSummary {
runId: string
aspectName: string
@ -8,4 +10,5 @@ record AspectRowSummary {
metadata: string
version: long
keyAspect: boolean
aspect: optional Aspect
}

View File

@ -11,6 +11,27 @@
},
"supports" : [ ],
"actions" : [ {
"name" : "describe",
"parameters" : [ {
"name" : "runId",
"type" : "string"
}, {
"name" : "start",
"type" : "int"
}, {
"name" : "count",
"type" : "int"
}, {
"name" : "includeSoft",
"type" : "boolean",
"optional" : true
}, {
"name" : "includeAspect",
"type" : "boolean",
"optional" : true
} ],
"returns" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.run.AspectRowSummary\" }"
}, {
"name" : "list",
"doc" : "Retrieves the value for an entity that is made up of latest versions of specified aspects.",
"parameters" : [ {

View File

@ -702,8 +702,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
@ -804,6 +806,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
@ -881,6 +887,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
@ -2483,7 +2493,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2501,7 +2511,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -2670,7 +2680,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2688,7 +2698,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {

View File

@ -581,8 +581,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
@ -785,6 +787,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
@ -862,6 +868,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
@ -1933,6 +1943,12 @@
"Aspect" : {
"name" : "viewProperties"
}
}, {
"type" : "record",
"name" : "Aspect",
"namespace" : "com.linkedin.entity",
"doc" : "Placeholder PDL record to store the aspect in the response. By using this place-holder, metadata-service can return\nany type of aspect without having to define a single union class",
"fields" : [ ]
}, {
"type" : "record",
"name" : "Entity",
@ -2876,7 +2892,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2894,7 +2910,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -3063,7 +3079,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -3081,7 +3097,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -4602,12 +4618,15 @@
"type" : "string",
"doc" : "Display name of the Policy",
"Searchable" : {
"fieldType" : "KEYWORD"
"fieldType" : "TEXT_PARTIAL"
}
}, {
"name" : "description",
"type" : "string",
"doc" : "Description of the Policy"
"doc" : "Description of the Policy",
"Searchable" : {
"fieldType" : "TEXT"
}
}, {
"name" : "type",
"type" : "string",
@ -5195,6 +5214,10 @@
}, {
"name" : "keyAspect",
"type" : "boolean"
}, {
"name" : "aspect",
"type" : "com.linkedin.entity.Aspect",
"optional" : true
} ]
}, {
"type" : "record",
@ -5223,7 +5246,7 @@
"fields" : [ {
"name" : "entity",
"type" : "com.linkedin.common.Urn",
"doc" : " Urn of the entity containing a related aspect"
"doc" : " Urn of the entity that is referenced by the aspect."
}, {
"name" : "aspect",
"type" : "string"

View File

@ -462,8 +462,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
@ -564,6 +566,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
@ -641,6 +647,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
@ -1424,6 +1434,12 @@
"Aspect" : {
"name" : "upstreamLineage"
}
}, {
"type" : "record",
"name" : "Aspect",
"namespace" : "com.linkedin.entity",
"doc" : "Placeholder PDL record to store the aspect in the response. By using this place-holder, metadata-service can return\nany type of aspect without having to define a single union class",
"fields" : [ ]
}, {
"type" : "record",
"name" : "GlossaryNodeInfo",
@ -2230,7 +2246,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2248,7 +2264,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -2417,7 +2433,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2435,7 +2451,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -3204,6 +3220,10 @@
}, {
"name" : "keyAspect",
"type" : "boolean"
}, {
"name" : "aspect",
"type" : "com.linkedin.entity.Aspect",
"optional" : true
} ]
}, {
"type" : "record",
@ -3277,6 +3297,27 @@
},
"supports" : [ ],
"actions" : [ {
"name" : "describe",
"parameters" : [ {
"name" : "runId",
"type" : "string"
}, {
"name" : "start",
"type" : "int"
}, {
"name" : "count",
"type" : "int"
}, {
"name" : "includeSoft",
"type" : "boolean",
"optional" : true
}, {
"name" : "includeAspect",
"type" : "boolean",
"optional" : true
} ],
"returns" : "{ \"type\" : \"array\", \"items\" : \"com.linkedin.metadata.run.AspectRowSummary\" }"
}, {
"name" : "list",
"doc" : "Retrieves the value for an entity that is made up of latest versions of specified aspects.",
"parameters" : [ {

View File

@ -581,8 +581,10 @@
"doc" : "Instance of the data platform (e.g. db instance)",
"optional" : true,
"Searchable" : {
"addToFilters" : true,
"fieldName" : "platformInstance",
"fieldType" : "URN"
"fieldType" : "URN",
"filterNameOverride" : "Platform Instance"
}
} ],
"Aspect" : {
@ -785,6 +787,10 @@
}
},
"doc" : "Urn of the applied tag",
"Relationship" : {
"entityTypes" : [ "tag" ],
"name" : "TaggedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "tags",
@ -862,6 +868,10 @@
}
},
"doc" : "Urn of the applied glossary term",
"Relationship" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "TermedWith"
},
"Searchable" : {
"addToFilters" : true,
"fieldName" : "glossaryTerms",
@ -2876,7 +2886,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "FieldTaggedWith"
"name" : "SchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -2894,7 +2904,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "FieldWithGlossaryTerm"
"name" : "SchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -3063,7 +3073,7 @@
"Relationship" : {
"/tags/*/tag" : {
"entityTypes" : [ "tag" ],
"name" : "EditableFieldTaggedWith"
"name" : "EditableSchemaFieldTaggedWith"
}
},
"Searchable" : {
@ -3081,7 +3091,7 @@
"Relationship" : {
"/terms/*/urn" : {
"entityTypes" : [ "glossaryTerm" ],
"name" : "EditableFieldWithGlossaryTerm"
"name" : "EditableSchemaFieldWithGlossaryTerm"
}
},
"Searchable" : {
@ -4602,12 +4612,15 @@
"type" : "string",
"doc" : "Display name of the Policy",
"Searchable" : {
"fieldType" : "KEYWORD"
"fieldType" : "TEXT_PARTIAL"
}
}, {
"name" : "description",
"type" : "string",
"doc" : "Description of the Policy"
"doc" : "Description of the Policy",
"Searchable" : {
"fieldType" : "TEXT"
}
}, {
"name" : "type",
"type" : "string",

View File

@ -1,18 +1,22 @@
package com.linkedin.metadata.resources.entity;
import com.codahale.metrics.MetricRegistry;
import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.entity.EnvelopedAspect;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.aspect.VersionedAspect;
import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.RollbackRunResult;
import com.linkedin.metadata.restli.RestliUtil;
import com.linkedin.metadata.run.UnsafeEntityInfo;
import com.linkedin.metadata.run.UnsafeEntityInfoArray;
import com.linkedin.metadata.run.AspectRowSummary;
import com.linkedin.metadata.run.AspectRowSummaryArray;
import com.linkedin.metadata.run.IngestionRunSummary;
import com.linkedin.metadata.run.IngestionRunSummaryArray;
import com.linkedin.metadata.run.RollbackResponse;
import com.linkedin.metadata.run.UnsafeEntityInfo;
import com.linkedin.metadata.run.UnsafeEntityInfoArray;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.systemmetadata.SystemMetadataService;
import com.linkedin.parseq.Task;
import com.linkedin.restli.server.annotations.Action;
@ -21,16 +25,15 @@ import com.linkedin.restli.server.annotations.Optional;
import com.linkedin.restli.server.annotations.RestLiCollection;
import com.linkedin.restli.server.resources.CollectionResourceTaskTemplate;
import io.opentelemetry.extension.annotations.WithSpan;
import lombok.extern.slf4j.Slf4j;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Named;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Named;
import lombok.extern.slf4j.Slf4j;
/**
@ -63,9 +66,9 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
@Nonnull
@WithSpan
public Task<RollbackResponse> rollback(@ActionParam("runId") @Nonnull String runId,
@ActionParam("dryRun") @Optional Boolean dryRun,
@Deprecated @ActionParam("hardDelete") @Optional Boolean hardDelete,
@ActionParam("safe") @Optional Boolean safe) {
@ActionParam("dryRun") @Optional Boolean dryRun,
@Deprecated @ActionParam("hardDelete") @Optional Boolean hardDelete,
@ActionParam("safe") @Optional Boolean safe) {
log.info("ROLLBACK RUN runId: {} dry run: {}", runId, dryRun);
boolean doHardDelete = safe != null ? !safe : hardDelete != null ? hardDelete : DEFAULT_HARD_DELETE;
@ -82,24 +85,24 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
}
RollbackResponse response = new RollbackResponse();
List<AspectRowSummary> aspectRowsToDelete;
aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete);
aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete, 0, ESUtils.MAX_RESULT_SIZE);
log.info("found {} rows to delete...", stringifyRowCount(aspectRowsToDelete.size()));
if (dryRun) {
final Map<Boolean, List<AspectRowSummary>> aspectsSplitByIsKeyAspects = aspectRowsToDelete.stream()
.collect(Collectors.partitioningBy(AspectRowSummary::isKeyAspect));
final Map<Boolean, List<AspectRowSummary>> aspectsSplitByIsKeyAspects =
aspectRowsToDelete.stream().collect(Collectors.partitioningBy(AspectRowSummary::isKeyAspect));
final List<AspectRowSummary> keyAspects = aspectsSplitByIsKeyAspects.get(true);
long entitiesDeleted = keyAspects.size();
long aspectsReverted = aspectRowsToDelete.size();
final long affectedEntities = aspectRowsToDelete.stream()
.collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final long affectedEntities =
aspectRowsToDelete.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final AspectRowSummaryArray rowSummaries = new AspectRowSummaryArray(
aspectRowsToDelete.subList(0, Math.min(100, aspectRowsToDelete.size())));
final AspectRowSummaryArray rowSummaries =
new AspectRowSummaryArray(aspectRowsToDelete.subList(0, Math.min(100, aspectRowsToDelete.size())));
// If we are soft deleting, remove key aspects from count of aspects being deleted
if (!doHardDelete) {
@ -108,34 +111,33 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
}
// Compute the aspects that exist referencing the key aspects we are deleting
final List<AspectRowSummary> affectedAspectsList = keyAspects.stream()
.map((AspectRowSummary urn) -> _systemMetadataService.findByUrn(urn.getUrn(), false))
.flatMap(List::stream)
.filter(row -> !row.getRunId().equals(runId) && !row.isKeyAspect()
&& !row.getAspectName().equals(Constants.STATUS_ASPECT_NAME))
.collect(Collectors.toList());
.map((AspectRowSummary urn) -> _systemMetadataService.findByUrn(urn.getUrn(), false, 0,
ESUtils.MAX_RESULT_SIZE))
.flatMap(List::stream)
.filter(row -> !row.getRunId().equals(runId) && !row.isKeyAspect() && !row.getAspectName()
.equals(Constants.STATUS_ASPECT_NAME))
.collect(Collectors.toList());
long affectedAspects = affectedAspectsList.size();
long unsafeEntitiesCount = affectedAspectsList.stream()
.collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
long unsafeEntitiesCount =
affectedAspectsList.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final List<UnsafeEntityInfo> unsafeEntityInfos = affectedAspectsList.stream().map(AspectRowSummary::getUrn)
.distinct()
.map(urn -> {
UnsafeEntityInfo unsafeEntityInfo = new UnsafeEntityInfo();
unsafeEntityInfo.setUrn(urn);
return unsafeEntityInfo;
})
final List<UnsafeEntityInfo> unsafeEntityInfos =
affectedAspectsList.stream().map(AspectRowSummary::getUrn).distinct().map(urn -> {
UnsafeEntityInfo unsafeEntityInfo = new UnsafeEntityInfo();
unsafeEntityInfo.setUrn(urn);
return unsafeEntityInfo;
})
// Return at most 1 million rows
.limit(DEFAULT_UNSAFE_ENTITIES_PAGE_SIZE)
.collect(Collectors.toList());
.limit(DEFAULT_UNSAFE_ENTITIES_PAGE_SIZE).collect(Collectors.toList());
return response.setAspectsAffected(affectedAspects)
.setAspectsReverted(aspectsReverted)
.setEntitiesAffected(affectedEntities)
.setEntitiesDeleted(entitiesDeleted)
.setUnsafeEntitiesCount(unsafeEntitiesCount)
.setUnsafeEntities(new UnsafeEntityInfoArray(unsafeEntityInfos))
.setAspectRowSummaries(rowSummaries);
.setAspectsReverted(aspectsReverted)
.setEntitiesAffected(affectedEntities)
.setEntitiesDeleted(entitiesDeleted)
.setUnsafeEntitiesCount(unsafeEntitiesCount)
.setUnsafeEntities(new UnsafeEntityInfoArray(unsafeEntityInfos))
.setAspectRowSummaries(rowSummaries);
}
RollbackRunResult rollbackRunResult = _entityService.rollbackRun(aspectRowsToDelete, runId, doHardDelete);
@ -145,7 +147,7 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
// since elastic limits how many rows we can access at once, we need to iteratively delete
while (aspectRowsToDelete.size() >= ELASTIC_MAX_PAGE_SIZE) {
sleep(ELASTIC_BATCH_DELETE_SLEEP_SEC);
aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete);
aspectRowsToDelete = _systemMetadataService.findByRunId(runId, doHardDelete, 0, ESUtils.MAX_RESULT_SIZE);
log.info("{} remaining rows to delete...", stringifyRowCount(aspectRowsToDelete.size()));
log.info("deleting...");
rollbackRunResult = _entityService.rollbackRun(aspectRowsToDelete, runId, doHardDelete);
@ -156,51 +158,50 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
log.info("finished deleting {} rows", deletedRows.size());
int aspectsReverted = deletedRows.size() + rowsDeletedFromEntityDeletion;
final Map<Boolean, List<AspectRowSummary>> aspectsSplitByIsKeyAspects = aspectRowsToDelete.stream()
.collect(Collectors.partitioningBy(AspectRowSummary::isKeyAspect));
final Map<Boolean, List<AspectRowSummary>> aspectsSplitByIsKeyAspects =
aspectRowsToDelete.stream().collect(Collectors.partitioningBy(AspectRowSummary::isKeyAspect));
final List<AspectRowSummary> keyAspects = aspectsSplitByIsKeyAspects.get(true);
final long entitiesDeleted = keyAspects.size();
final long affectedEntities = deletedRows.stream()
.collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final long affectedEntities =
deletedRows.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final AspectRowSummaryArray rowSummaries = new AspectRowSummaryArray(
aspectRowsToDelete.subList(0, Math.min(100, aspectRowsToDelete.size())));
final AspectRowSummaryArray rowSummaries =
new AspectRowSummaryArray(aspectRowsToDelete.subList(0, Math.min(100, aspectRowsToDelete.size())));
log.info("computing aspects affected by this rollback...");
// Compute the aspects that exist referencing the key aspects we are deleting
final List<AspectRowSummary> affectedAspectsList = keyAspects.stream()
.map((AspectRowSummary urn) -> _systemMetadataService.findByUrn(urn.getUrn(), false))
.flatMap(List::stream)
.filter(row -> !row.getRunId().equals(runId) && !row.isKeyAspect()
&& !row.getAspectName().equals(Constants.STATUS_ASPECT_NAME))
.collect(Collectors.toList());
.map((AspectRowSummary urn) -> _systemMetadataService.findByUrn(urn.getUrn(), false, 0,
ESUtils.MAX_RESULT_SIZE))
.flatMap(List::stream)
.filter(row -> !row.getRunId().equals(runId) && !row.isKeyAspect() && !row.getAspectName()
.equals(Constants.STATUS_ASPECT_NAME))
.collect(Collectors.toList());
long affectedAspects = affectedAspectsList.size();
long unsafeEntitiesCount = affectedAspectsList.stream()
.collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
long unsafeEntitiesCount =
affectedAspectsList.stream().collect(Collectors.groupingBy(AspectRowSummary::getUrn)).keySet().size();
final List<UnsafeEntityInfo> unsafeEntityInfos = affectedAspectsList.stream().map(AspectRowSummary::getUrn)
.distinct()
.map(urn -> {
UnsafeEntityInfo unsafeEntityInfo = new UnsafeEntityInfo();
unsafeEntityInfo.setUrn(urn);
return unsafeEntityInfo;
})
final List<UnsafeEntityInfo> unsafeEntityInfos =
affectedAspectsList.stream().map(AspectRowSummary::getUrn).distinct().map(urn -> {
UnsafeEntityInfo unsafeEntityInfo = new UnsafeEntityInfo();
unsafeEntityInfo.setUrn(urn);
return unsafeEntityInfo;
})
// Return at most 1 million rows
.limit(DEFAULT_UNSAFE_ENTITIES_PAGE_SIZE)
.collect(Collectors.toList());
.limit(DEFAULT_UNSAFE_ENTITIES_PAGE_SIZE).collect(Collectors.toList());
log.info("calculation done.");
return response.setAspectsAffected(affectedAspects)
.setAspectsReverted(aspectsReverted)
.setEntitiesAffected(affectedEntities)
.setEntitiesDeleted(entitiesDeleted)
.setUnsafeEntitiesCount(unsafeEntitiesCount)
.setUnsafeEntities(new UnsafeEntityInfoArray(unsafeEntityInfos))
.setAspectRowSummaries(rowSummaries);
.setAspectsReverted(aspectsReverted)
.setEntitiesAffected(affectedEntities)
.setEntitiesDeleted(entitiesDeleted)
.setUnsafeEntitiesCount(unsafeEntitiesCount)
.setUnsafeEntities(new UnsafeEntityInfoArray(unsafeEntityInfos))
.setAspectRowSummaries(rowSummaries);
}, MetricRegistry.name(this.getClass(), "rollback"));
}
@ -232,12 +233,45 @@ public class BatchIngestionRunResource extends CollectionResourceTaskTemplate<St
log.info("LIST RUNS offset: {} size: {}", pageOffset, pageSize);
return RestliUtil.toTask(() -> {
List<IngestionRunSummary> summaries = _systemMetadataService.listRuns(
pageOffset != null ? pageOffset : DEFAULT_OFFSET,
pageSize != null ? pageSize : DEFAULT_PAGE_SIZE,
includeSoft != null ? includeSoft : DEFAULT_INCLUDE_SOFT_DELETED);
List<IngestionRunSummary> summaries =
_systemMetadataService.listRuns(pageOffset != null ? pageOffset : DEFAULT_OFFSET,
pageSize != null ? pageSize : DEFAULT_PAGE_SIZE,
includeSoft != null ? includeSoft : DEFAULT_INCLUDE_SOFT_DELETED);
return new IngestionRunSummaryArray(summaries);
}, MetricRegistry.name(this.getClass(), "list"));
return new IngestionRunSummaryArray(summaries);
}, MetricRegistry.name(this.getClass(), "list"));
}
@Action(name = "describe")
@Nonnull
@WithSpan
public Task<AspectRowSummaryArray> describe(@ActionParam("runId") @Nonnull String runId,
@ActionParam("start") Integer start, @ActionParam("count") Integer count,
@ActionParam("includeSoft") @Optional @Nullable Boolean includeSoft,
@ActionParam("includeAspect") @Optional @Nullable Boolean includeAspect) {
log.info("DESCRIBE RUN runId: {}, start: {}, count: {}", runId, start, count);
return RestliUtil.toTask(() -> {
List<AspectRowSummary> summaries =
_systemMetadataService.findByRunId(runId, includeSoft != null && includeSoft, start, count);
if (includeAspect != null && includeAspect) {
summaries.forEach(summary -> {
Urn urn = UrnUtils.getUrn(summary.getUrn());
try {
EnvelopedAspect aspect =
_entityService.getLatestEnvelopedAspect(urn.getEntityType(), urn, summary.getAspectName());
if (aspect == null) {
log.error("Aspect for summary {} not found", summary);
} else {
summary.setAspect(aspect.getValue());
}
} catch (Exception e) {
log.error("Error while fetching aspect for summary {}", summary, e);
}
});
}
return new AspectRowSummaryArray(summaries);
}, MetricRegistry.name(this.getClass(), "describe"));
}
}

View File

@ -33,6 +33,7 @@ import com.linkedin.metadata.search.LineageSearchService;
import com.linkedin.metadata.search.SearchEntity;
import com.linkedin.metadata.search.SearchResult;
import com.linkedin.metadata.search.SearchService;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.systemmetadata.SystemMetadataService;
import com.linkedin.mxe.SystemMetadata;
import com.linkedin.parseq.Task;
@ -369,7 +370,8 @@ public class EntityResource extends CollectionResourceTaskTemplate<String, Entit
return RestliUtil.toTask(() -> {
RollbackResponse response = new RollbackResponse();
List<AspectRowSummary> aspectRowsToDelete =
_systemMetadataService.findByRegistry(finalRegistryName, finalRegistryVersion.toString(), false);
_systemMetadataService.findByRegistry(finalRegistryName, finalRegistryVersion.toString(), false, 0,
ESUtils.MAX_RESULT_SIZE);
log.info("found {} rows to delete...", stringifyRowCount(aspectRowsToDelete.size()));
response.setAspectsAffected(aspectRowsToDelete.size());
response.setEntitiesAffected(