2021-06-03 13:24:33 -07:00
|
|
|
package com.linkedin.metadata.entity;
|
|
|
|
|
2021-12-14 11:18:02 +09:00
|
|
|
import com.codahale.metrics.Timer;
|
2023-09-06 10:51:03 -07:00
|
|
|
import com.linkedin.data.template.GetMode;
|
|
|
|
import com.linkedin.data.template.SetMode;
|
2023-09-30 22:47:59 -05:00
|
|
|
import com.linkedin.entity.client.SystemEntityClient;
|
2023-07-19 20:09:14 -05:00
|
|
|
import com.linkedin.metadata.config.PreProcessHooks;
|
2022-01-05 19:32:31 -06:00
|
|
|
import com.datahub.util.RecordUtils;
|
|
|
|
import com.datahub.util.exception.ModelConversionException;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.google.common.collect.ImmutableList;
|
2022-04-26 01:29:24 +01:00
|
|
|
import com.google.common.collect.ImmutableSet;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.google.common.collect.Iterators;
|
2021-07-29 20:04:40 -07:00
|
|
|
import com.google.common.collect.Streams;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.common.AuditStamp;
|
2021-10-21 11:15:10 -07:00
|
|
|
import com.linkedin.common.BrowsePaths;
|
2023-06-21 04:10:07 -04:00
|
|
|
import com.linkedin.common.BrowsePathsV2;
|
2022-03-15 19:05:52 +00:00
|
|
|
import com.linkedin.common.Status;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.common.UrnArray;
|
2022-04-29 14:04:38 -05:00
|
|
|
import com.linkedin.common.VersionedUrn;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.common.urn.Urn;
|
2022-04-26 01:29:24 +01:00
|
|
|
import com.linkedin.common.urn.UrnUtils;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.common.urn.VersionedUrnUtils;
|
2021-07-30 17:41:03 -07:00
|
|
|
import com.linkedin.data.schema.TyperefDataSchema;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.data.template.DataTemplateUtil;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.data.template.RecordTemplate;
|
2022-09-07 13:32:38 -07:00
|
|
|
import com.linkedin.data.template.StringArray;
|
2023-03-21 08:43:56 -07:00
|
|
|
import com.linkedin.data.template.StringMap;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.data.template.UnionTemplate;
|
2022-09-07 13:32:38 -07:00
|
|
|
import com.linkedin.dataplatform.DataPlatformInfo;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.entity.AspectType;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.entity.Entity;
|
2022-01-04 00:19:46 +09:00
|
|
|
import com.linkedin.entity.EntityResponse;
|
|
|
|
import com.linkedin.entity.EnvelopedAspect;
|
|
|
|
import com.linkedin.entity.EnvelopedAspectMap;
|
2021-11-08 16:22:24 -08:00
|
|
|
import com.linkedin.events.metadata.ChangeType;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.metadata.Constants;
|
|
|
|
import com.linkedin.metadata.aspect.Aspect;
|
2021-06-16 10:03:21 -07:00
|
|
|
import com.linkedin.metadata.aspect.VersionedAspect;
|
2022-09-02 15:24:40 +05:30
|
|
|
import com.linkedin.metadata.entity.ebean.EbeanAspectV2;
|
2023-09-02 19:25:44 -05:00
|
|
|
import com.linkedin.metadata.entity.ebean.transactions.AspectsBatchImpl;
|
|
|
|
import com.linkedin.metadata.entity.transactions.AbstractBatchItem;
|
|
|
|
import com.linkedin.metadata.entity.ebean.transactions.PatchBatchItem;
|
|
|
|
import com.linkedin.metadata.entity.ebean.transactions.UpsertBatchItem;
|
2022-09-02 15:24:40 +05:30
|
|
|
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs;
|
|
|
|
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult;
|
2022-09-30 16:11:39 +05:30
|
|
|
import com.linkedin.metadata.entity.retention.BulkApplyRetentionArgs;
|
|
|
|
import com.linkedin.metadata.entity.retention.BulkApplyRetentionResult;
|
2023-09-02 19:25:44 -05:00
|
|
|
import com.linkedin.metadata.entity.transactions.AspectsBatch;
|
2022-03-29 18:32:04 -07:00
|
|
|
import com.linkedin.metadata.event.EventProducer;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.metadata.models.AspectSpec;
|
|
|
|
import com.linkedin.metadata.models.EntitySpec;
|
2023-03-21 08:43:56 -07:00
|
|
|
import com.linkedin.metadata.models.RelationshipFieldSpec;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.metadata.models.registry.EntityRegistry;
|
2021-09-02 19:05:13 -07:00
|
|
|
import com.linkedin.metadata.query.ListUrnsResult;
|
2021-07-29 20:04:40 -07:00
|
|
|
import com.linkedin.metadata.run.AspectRowSummary;
|
2023-06-21 04:10:07 -04:00
|
|
|
import com.linkedin.metadata.search.utils.BrowsePathV2Utils;
|
2023-06-15 06:17:13 -05:00
|
|
|
import com.linkedin.metadata.service.UpdateIndicesService;
|
2021-06-03 13:24:33 -07:00
|
|
|
import com.linkedin.metadata.snapshot.Snapshot;
|
2021-10-07 11:41:29 -07:00
|
|
|
import com.linkedin.metadata.utils.DataPlatformInstanceUtils;
|
|
|
|
import com.linkedin.metadata.utils.EntityKeyUtils;
|
2022-03-29 18:32:04 -07:00
|
|
|
import com.linkedin.metadata.utils.GenericRecordUtils;
|
2022-01-05 19:32:31 -06:00
|
|
|
import com.linkedin.metadata.utils.PegasusUtils;
|
2021-12-14 11:18:02 +09:00
|
|
|
import com.linkedin.metadata.utils.metrics.MetricUtils;
|
2021-07-29 20:04:40 -07:00
|
|
|
import com.linkedin.mxe.MetadataAuditOperation;
|
2022-05-24 20:40:42 +01:00
|
|
|
import com.linkedin.mxe.MetadataChangeLog;
|
2022-06-21 18:00:16 -05:00
|
|
|
import com.linkedin.mxe.MetadataChangeProposal;
|
2021-07-29 20:04:40 -07:00
|
|
|
import com.linkedin.mxe.SystemMetadata;
|
2021-07-30 17:41:03 -07:00
|
|
|
import com.linkedin.util.Pair;
|
2023-02-02 15:30:49 -08:00
|
|
|
import io.ebean.PagedList;
|
2023-09-02 19:25:44 -05:00
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
import java.net.URISyntaxException;
|
2022-09-23 01:52:18 -05:00
|
|
|
import java.nio.charset.StandardCharsets;
|
2022-05-24 20:40:42 +01:00
|
|
|
import java.sql.Timestamp;
|
2021-10-21 11:15:10 -07:00
|
|
|
import java.util.ArrayList;
|
2022-05-24 20:40:42 +01:00
|
|
|
import java.util.Arrays;
|
2021-06-03 13:24:33 -07:00
|
|
|
import java.util.Collections;
|
2022-05-24 20:40:42 +01:00
|
|
|
import java.util.HashMap;
|
2022-02-15 16:37:05 -06:00
|
|
|
import java.util.HashSet;
|
2023-09-02 19:25:44 -05:00
|
|
|
import java.util.LinkedList;
|
2021-06-03 13:24:33 -07:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Map;
|
2023-02-20 14:00:14 -08:00
|
|
|
import java.util.Objects;
|
2021-11-08 16:22:24 -08:00
|
|
|
import java.util.Optional;
|
2021-06-03 13:24:33 -07:00
|
|
|
import java.util.Set;
|
2023-09-02 19:25:44 -05:00
|
|
|
import java.util.concurrent.ExecutionException;
|
|
|
|
import java.util.concurrent.Future;
|
2022-05-24 20:40:42 +01:00
|
|
|
import java.util.concurrent.atomic.AtomicInteger;
|
2022-09-02 15:24:40 +05:30
|
|
|
import java.util.function.Consumer;
|
2023-03-15 23:26:02 +03:00
|
|
|
import java.util.concurrent.TimeUnit;
|
2021-06-03 13:24:33 -07:00
|
|
|
import java.util.stream.Collectors;
|
2023-09-02 19:25:44 -05:00
|
|
|
import java.util.stream.Stream;
|
2022-06-21 18:00:16 -05:00
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
import javax.annotation.Nullable;
|
2022-12-29 11:26:42 -06:00
|
|
|
import javax.persistence.EntityNotFoundException;
|
2023-09-02 19:25:44 -05:00
|
|
|
|
|
|
|
import io.ebean.Transaction;
|
2023-09-30 22:47:59 -05:00
|
|
|
import lombok.Getter;
|
2022-06-21 18:00:16 -05:00
|
|
|
import lombok.extern.slf4j.Slf4j;
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2022-06-21 18:00:16 -05:00
|
|
|
import static com.linkedin.metadata.Constants.*;
|
2022-09-07 13:32:38 -07:00
|
|
|
import static com.linkedin.metadata.search.utils.BrowsePathUtils.*;
|
2022-06-21 18:00:16 -05:00
|
|
|
import static com.linkedin.metadata.utils.PegasusUtils.*;
|
2021-06-03 13:24:33 -07:00
|
|
|
|
|
|
|
|
|
|
|
/**
|
2022-05-24 20:40:42 +01:00
|
|
|
* A class specifying create, update, and read operations against metadata entities and aspects
|
2021-06-03 13:24:33 -07:00
|
|
|
* by primary key (urn).
|
|
|
|
*
|
|
|
|
* This interface is meant to abstract away the storage concerns of these pieces of metadata, permitting
|
|
|
|
* any underlying storage system to be used in materializing GMS domain objects, which are implemented using Pegasus
|
|
|
|
* {@link RecordTemplate}s.
|
|
|
|
*
|
2022-05-24 20:40:42 +01:00
|
|
|
* Internal versioning semantics
|
|
|
|
* =============================
|
2021-06-03 13:24:33 -07:00
|
|
|
*
|
2022-05-24 20:40:42 +01:00
|
|
|
* The latest version of any aspect is set to 0 for efficient retrieval; in most cases the latest state of an aspect
|
|
|
|
* will be the only fetched.
|
2021-06-03 13:24:33 -07:00
|
|
|
*
|
|
|
|
* As such, 0 is treated as a special number. Once an aspect is no longer the latest, versions will increment
|
|
|
|
* monotonically, starting from 1. Thus, the second-to-last version of an aspect will be equal to total # versions
|
|
|
|
* of the aspect - 1.
|
|
|
|
*
|
|
|
|
* For example, if there are 5 instances of a single aspect, the latest will have version 0, and the second-to-last
|
|
|
|
* will have version 4. The "true" latest version of an aspect is always equal to the highest stored version
|
|
|
|
* of a given aspect + 1.
|
|
|
|
*
|
2023-09-02 19:25:44 -05:00
|
|
|
* Note that currently, implementations of this interface are responsible for producing Metadata Change Log on
|
|
|
|
* ingestion using {@link #conditionallyProduceMCLAsync(RecordTemplate, SystemMetadata, RecordTemplate, SystemMetadata,
|
|
|
|
* MetadataChangeProposal, Urn, AuditStamp, AspectSpec)}.
|
2021-06-03 13:24:33 -07:00
|
|
|
*
|
|
|
|
* TODO: Consider whether we can abstract away virtual versioning semantics to subclasses of this class.
|
|
|
|
*/
|
2021-06-25 10:56:45 -07:00
|
|
|
@Slf4j
|
2023-07-19 20:09:14 -05:00
|
|
|
public class EntityServiceImpl implements EntityService {
|
2021-06-03 13:24:33 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* As described above, the latest version of an aspect should <b>always</b> take the value 0, with
|
|
|
|
* monotonically increasing version incrementing as usual once the latest version is replaced.
|
|
|
|
*/
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3;
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
protected final AspectDao _aspectDao;
|
2022-03-29 18:32:04 -07:00
|
|
|
private final EventProducer _producer;
|
2021-06-03 13:24:33 -07:00
|
|
|
private final EntityRegistry _entityRegistry;
|
|
|
|
private final Map<String, Set<String>> _entityToValidAspects;
|
2022-05-24 20:40:42 +01:00
|
|
|
private RetentionService _retentionService;
|
2023-02-22 15:52:51 +00:00
|
|
|
private final Boolean _alwaysEmitChangeLog;
|
2023-09-30 22:47:59 -05:00
|
|
|
@Getter
|
2023-06-15 06:17:13 -05:00
|
|
|
private final UpdateIndicesService _updateIndicesService;
|
|
|
|
private final PreProcessHooks _preProcessHooks;
|
2022-04-26 01:29:24 +01:00
|
|
|
protected static final int MAX_KEYS_PER_QUERY = 500;
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2023-07-28 14:09:44 -05:00
|
|
|
private final Integer ebeanMaxTransactionRetry;
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
public EntityServiceImpl(
|
2022-05-24 20:40:42 +01:00
|
|
|
@Nonnull final AspectDao aspectDao,
|
|
|
|
@Nonnull final EventProducer producer,
|
2023-02-22 15:52:51 +00:00
|
|
|
@Nonnull final EntityRegistry entityRegistry,
|
2023-06-15 06:17:13 -05:00
|
|
|
final boolean alwaysEmitChangeLog,
|
|
|
|
final UpdateIndicesService updateIndicesService,
|
|
|
|
final PreProcessHooks preProcessHooks) {
|
2023-07-28 14:09:44 -05:00
|
|
|
this(aspectDao, producer, entityRegistry, alwaysEmitChangeLog, updateIndicesService, preProcessHooks, DEFAULT_MAX_TRANSACTION_RETRY);
|
|
|
|
}
|
|
|
|
|
|
|
|
public EntityServiceImpl(
|
|
|
|
@Nonnull final AspectDao aspectDao,
|
|
|
|
@Nonnull final EventProducer producer,
|
|
|
|
@Nonnull final EntityRegistry entityRegistry,
|
|
|
|
final boolean alwaysEmitChangeLog,
|
|
|
|
final UpdateIndicesService updateIndicesService,
|
|
|
|
final PreProcessHooks preProcessHooks,
|
|
|
|
final Integer retry) {
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
_aspectDao = aspectDao;
|
2021-06-03 13:24:33 -07:00
|
|
|
_producer = producer;
|
|
|
|
_entityRegistry = entityRegistry;
|
|
|
|
_entityToValidAspects = buildEntityToValidAspects(entityRegistry);
|
2023-02-22 15:52:51 +00:00
|
|
|
_alwaysEmitChangeLog = alwaysEmitChangeLog;
|
2023-06-15 06:17:13 -05:00
|
|
|
_updateIndicesService = updateIndicesService;
|
|
|
|
_preProcessHooks = preProcessHooks;
|
2023-07-28 14:09:44 -05:00
|
|
|
ebeanMaxTransactionRetry = retry != null ? retry : DEFAULT_MAX_TRANSACTION_RETRY;
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2023-09-30 22:47:59 -05:00
|
|
|
@Override
|
|
|
|
public void setSystemEntityClient(SystemEntityClient systemEntityClient) {
|
|
|
|
this._updateIndicesService.setSystemEntityClient(systemEntityClient);
|
|
|
|
}
|
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided
|
|
|
|
* set of aspect names.
|
|
|
|
*
|
|
|
|
* @param urns set of urns to fetch aspects for
|
|
|
|
* @param aspectNames aspects to fetch for each urn in urns set
|
|
|
|
* @return a map of provided {@link Urn} to a List containing the requested aspects.
|
|
|
|
*/
|
2023-09-02 19:25:44 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public Map<Urn, List<RecordTemplate>> getLatestAspects(
|
2022-01-04 00:19:46 +09:00
|
|
|
@Nonnull final Set<Urn> urns,
|
2022-05-24 20:40:42 +01:00
|
|
|
@Nonnull final Set<String> aspectNames) {
|
|
|
|
|
|
|
|
Map<EntityAspectIdentifier, EntityAspect> batchGetResults = getLatestAspect(urns, aspectNames);
|
|
|
|
|
|
|
|
// Fetch from db and populate urn -> aspect map.
|
|
|
|
final Map<Urn, List<RecordTemplate>> urnToAspects = new HashMap<>();
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
// Each urn should have some result, regardless of whether aspects are found in the DB.
|
|
|
|
for (Urn urn : urns) {
|
|
|
|
urnToAspects.putIfAbsent(urn, new ArrayList<>());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add "key" aspects for each urn. TODO: Replace this with a materialized key aspect.
|
|
|
|
urnToAspects.keySet().forEach(key -> {
|
2023-09-02 19:25:44 -05:00
|
|
|
final RecordTemplate keyAspect = EntityUtils.buildKeyAspect(_entityRegistry, key);
|
2022-05-24 20:40:42 +01:00
|
|
|
urnToAspects.get(key).add(keyAspect);
|
|
|
|
});
|
|
|
|
|
|
|
|
batchGetResults.forEach((key, aspectEntry) -> {
|
|
|
|
final Urn urn = toUrn(key.getUrn());
|
|
|
|
final String aspectName = key.getAspect();
|
|
|
|
// for now, don't add the key aspect here- we have already added it above
|
|
|
|
if (aspectName.equals(getKeyAspectName(urn))) {
|
|
|
|
return;
|
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
final RecordTemplate aspectRecord =
|
|
|
|
EntityUtils.toAspectRecord(urn, aspectName, aspectEntry.getMetadata(), getEntityRegistry());
|
|
|
|
urnToAspects.putIfAbsent(urn, new ArrayList<>());
|
|
|
|
urnToAspects.get(urn).add(aspectRecord);
|
|
|
|
});
|
|
|
|
|
|
|
|
return urnToAspects;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public Map<String, RecordTemplate> getLatestAspectsForUrn(@Nonnull final Urn urn, @Nonnull final Set<String> aspectNames) {
|
|
|
|
Map<EntityAspectIdentifier, EntityAspect> batchGetResults = getLatestAspect(new HashSet<>(Arrays.asList(urn)), aspectNames);
|
|
|
|
|
|
|
|
final Map<String, RecordTemplate> result = new HashMap<>();
|
|
|
|
batchGetResults.forEach((key, aspectEntry) -> {
|
|
|
|
final String aspectName = key.getAspect();
|
|
|
|
final RecordTemplate aspectRecord = EntityUtils.toAspectRecord(urn, aspectName, aspectEntry.getMetadata(), getEntityRegistry());
|
|
|
|
result.put(aspectName, aspectRecord);
|
|
|
|
});
|
|
|
|
return result;
|
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
/**
|
|
|
|
* Retrieves an aspect having a specific {@link Urn}, name, & version.
|
|
|
|
*
|
|
|
|
* Note that once we drop support for legacy aspect-specific resources,
|
|
|
|
* we should make this a protected method. Only visible for backwards compatibility.
|
|
|
|
*
|
|
|
|
* @param urn an urn associated with the requested aspect
|
|
|
|
* @param aspectName name of the aspect requested
|
|
|
|
* @param version specific version of the aspect being requests
|
2021-11-22 16:33:14 -08:00
|
|
|
* @return the {@link RecordTemplate} representation of the requested aspect object, or null if one cannot be found
|
2021-06-03 13:24:33 -07:00
|
|
|
*/
|
2021-11-22 16:33:14 -08:00
|
|
|
@Nullable
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) {
|
|
|
|
|
|
|
|
log.debug("Invoked getAspect with urn: {}, aspectName: {}, version: {}", urn, aspectName, version);
|
|
|
|
|
|
|
|
version = calculateVersionNumber(urn, aspectName, version);
|
|
|
|
final EntityAspectIdentifier primaryKey = new EntityAspectIdentifier(urn.toString(), aspectName, version);
|
|
|
|
final Optional<EntityAspect> maybeAspect = Optional.ofNullable(_aspectDao.getAspect(primaryKey));
|
|
|
|
return maybeAspect.map(
|
|
|
|
aspect -> EntityUtils.toAspectRecord(urn, aspectName, aspect.getMetadata(), getEntityRegistry())).orElse(null);
|
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2022-03-29 12:02:22 -07:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspects for the given urn as dynamic aspect objects
|
|
|
|
* (Without having to define union objects)
|
|
|
|
*
|
|
|
|
* @param entityName name of the entity to fetch
|
|
|
|
* @param urn urn of entity to fetch
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link Entity} object
|
|
|
|
*/
|
|
|
|
@Nullable
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-03-29 12:02:22 -07:00
|
|
|
public EntityResponse getEntityV2(
|
|
|
|
@Nonnull final String entityName,
|
|
|
|
@Nonnull final Urn urn,
|
|
|
|
@Nonnull final Set<String> aspectNames) throws URISyntaxException {
|
|
|
|
return getEntitiesV2(entityName, Collections.singleton(urn), aspectNames).get(urn);
|
|
|
|
}
|
|
|
|
|
2022-01-04 00:19:46 +09:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspects for the given set of urns as dynamic aspect objects
|
|
|
|
* (Without having to define union objects)
|
|
|
|
*
|
|
|
|
* @param entityName name of the entity to fetch
|
|
|
|
* @param urns set of urns to fetch
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link Entity} object
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-01-04 00:19:46 +09:00
|
|
|
public Map<Urn, EntityResponse> getEntitiesV2(
|
|
|
|
@Nonnull final String entityName,
|
|
|
|
@Nonnull final Set<Urn> urns,
|
2022-02-15 16:37:05 -06:00
|
|
|
@Nonnull final Set<String> aspectNames) throws URISyntaxException {
|
2022-01-04 00:19:46 +09:00
|
|
|
return getLatestEnvelopedAspects(entityName, urns, aspectNames)
|
|
|
|
.entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> toEntityResponse(entry.getKey(), entry.getValue())));
|
|
|
|
}
|
|
|
|
|
2022-04-29 14:04:38 -05:00
|
|
|
/**
|
|
|
|
* Retrieves the aspects for the given set of urns and versions as dynamic aspect objects
|
|
|
|
* (Without having to define union objects)
|
|
|
|
*
|
|
|
|
* @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized string
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link Entity} object
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-04-29 14:04:38 -05:00
|
|
|
public Map<Urn, EntityResponse> getEntitiesVersionedV2(
|
|
|
|
@Nonnull final Set<VersionedUrn> versionedUrns,
|
|
|
|
@Nonnull final Set<String> aspectNames) throws URISyntaxException {
|
|
|
|
return getVersionedEnvelopedAspects(versionedUrns, aspectNames)
|
|
|
|
.entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> toEntityResponse(entry.getKey(), entry.getValue())));
|
|
|
|
}
|
|
|
|
|
2022-01-04 00:19:46 +09:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspects for the given set of urns as a list of enveloped aspects
|
|
|
|
*
|
|
|
|
* @param entityName name of the entity to fetch
|
|
|
|
* @param urns set of urns to fetch
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link EnvelopedAspect} object
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public Map<Urn, List<EnvelopedAspect>> getLatestEnvelopedAspects(
|
2023-02-02 15:30:49 -08:00
|
|
|
// TODO: entityName is unused, can we remove this as a param?
|
|
|
|
@Nonnull String entityName,
|
|
|
|
@Nonnull Set<Urn> urns,
|
|
|
|
@Nonnull Set<String> aspectNames) throws URISyntaxException {
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-02-02 15:30:49 -08:00
|
|
|
final Set<EntityAspectIdentifier> dbKeys = urns.stream()
|
|
|
|
.map(urn -> aspectNames.stream()
|
|
|
|
.map(aspectName -> new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION))
|
|
|
|
.collect(Collectors.toList()))
|
|
|
|
.flatMap(List::stream)
|
|
|
|
.collect(Collectors.toSet());
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-02-02 15:30:49 -08:00
|
|
|
return getCorrespondingAspects(dbKeys, urns);
|
|
|
|
}
|
2022-01-04 00:19:46 +09:00
|
|
|
|
2022-04-29 14:04:38 -05:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspects for the given set of urns as a list of enveloped aspects
|
|
|
|
*
|
|
|
|
* @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized string
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link EnvelopedAspect} object
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public Map<Urn, List<EnvelopedAspect>> getVersionedEnvelopedAspects(
|
2023-02-02 15:30:49 -08:00
|
|
|
@Nonnull Set<VersionedUrn> versionedUrns,
|
|
|
|
@Nonnull Set<String> aspectNames) throws URISyntaxException {
|
|
|
|
|
|
|
|
Map<String, Map<String, Long>> urnAspectVersionMap = versionedUrns.stream()
|
|
|
|
.collect(Collectors.toMap(versionedUrn -> versionedUrn.getUrn().toString(),
|
|
|
|
versionedUrn -> VersionedUrnUtils.convertVersionStamp(versionedUrn.getVersionStamp())));
|
|
|
|
|
|
|
|
// Cover full/partial versionStamp
|
|
|
|
final Set<EntityAspectIdentifier> dbKeys = urnAspectVersionMap.entrySet().stream()
|
|
|
|
.filter(entry -> !entry.getValue().isEmpty())
|
|
|
|
.map(entry -> aspectNames.stream()
|
|
|
|
.filter(aspectName -> entry.getValue().containsKey(aspectName))
|
|
|
|
.map(aspectName -> new EntityAspectIdentifier(entry.getKey(), aspectName,
|
|
|
|
entry.getValue().get(aspectName)))
|
|
|
|
.collect(Collectors.toList()))
|
|
|
|
.flatMap(List::stream)
|
|
|
|
.collect(Collectors.toSet());
|
|
|
|
|
|
|
|
// Cover empty versionStamp
|
|
|
|
dbKeys.addAll(urnAspectVersionMap.entrySet().stream()
|
|
|
|
.filter(entry -> entry.getValue().isEmpty())
|
|
|
|
.map(entry -> aspectNames.stream()
|
|
|
|
.map(aspectName -> new EntityAspectIdentifier(entry.getKey(), aspectName, 0L))
|
|
|
|
.collect(Collectors.toList()))
|
|
|
|
.flatMap(List::stream)
|
|
|
|
.collect(Collectors.toSet()));
|
|
|
|
|
|
|
|
return getCorrespondingAspects(dbKeys, versionedUrns.stream()
|
|
|
|
.map(versionedUrn -> versionedUrn.getUrn().toString())
|
|
|
|
.map(UrnUtils::getUrn).collect(Collectors.toSet()));
|
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private Map<Urn, List<EnvelopedAspect>> getCorrespondingAspects(Set<EntityAspectIdentifier> dbKeys, Set<Urn> urns) {
|
2023-02-02 15:30:49 -08:00
|
|
|
|
|
|
|
final Map<EntityAspectIdentifier, EnvelopedAspect> envelopedAspectMap = getEnvelopedAspects(dbKeys);
|
|
|
|
|
|
|
|
// Group result by Urn
|
|
|
|
final Map<String, List<EnvelopedAspect>> urnToAspects = envelopedAspectMap.entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.groupingBy(entry -> entry.getKey().getUrn(),
|
|
|
|
Collectors.mapping(Map.Entry::getValue, Collectors.toList())));
|
|
|
|
|
|
|
|
final Map<Urn, List<EnvelopedAspect>> result = new HashMap<>();
|
|
|
|
for (Urn urn : urns) {
|
|
|
|
List<EnvelopedAspect> aspects = urnToAspects.getOrDefault(urn.toString(), Collections.emptyList());
|
|
|
|
EnvelopedAspect keyAspect = getKeyEnvelopedAspect(urn);
|
|
|
|
// Add key aspect if it does not exist in the returned aspects
|
|
|
|
if (aspects.isEmpty() || aspects.stream().noneMatch(aspect -> keyAspect.getName().equals(aspect.getName()))) {
|
|
|
|
result.put(urn, ImmutableList.<EnvelopedAspect>builder().addAll(aspects).add(keyAspect).build());
|
|
|
|
} else {
|
|
|
|
result.put(urn, aspects);
|
|
|
|
}
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2023-02-02 15:30:49 -08:00
|
|
|
return result;
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2022-04-29 14:04:38 -05:00
|
|
|
|
2022-01-04 00:19:46 +09:00
|
|
|
/**
|
|
|
|
* Retrieves the latest aspect for the given urn as a list of enveloped aspects
|
|
|
|
*
|
|
|
|
* @param entityName name of the entity to fetch
|
|
|
|
* @param urn urn to fetch
|
|
|
|
* @param aspectName name of the aspect to fetch
|
|
|
|
* @return {@link EnvelopedAspect} object, or null if one cannot be found
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-04-26 01:29:24 +01:00
|
|
|
public EnvelopedAspect getLatestEnvelopedAspect(
|
2022-01-04 00:19:46 +09:00
|
|
|
@Nonnull final String entityName,
|
|
|
|
@Nonnull final Urn urn,
|
2022-04-26 01:29:24 +01:00
|
|
|
@Nonnull final String aspectName) throws Exception {
|
|
|
|
return getLatestEnvelopedAspects(entityName, ImmutableSet.of(urn), ImmutableSet.of(aspectName)).getOrDefault(urn, Collections.emptyList())
|
|
|
|
.stream()
|
|
|
|
.filter(envelopedAspect -> envelopedAspect.getName().equals(aspectName))
|
|
|
|
.findFirst()
|
|
|
|
.orElse(null);
|
|
|
|
}
|
2022-01-04 00:19:46 +09:00
|
|
|
|
2021-11-22 16:33:14 -08:00
|
|
|
/**
|
|
|
|
* Retrieves an {@link VersionedAspect}, or null if one cannot be found.
|
|
|
|
*/
|
|
|
|
@Nullable
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public VersionedAspect getVersionedAspect(@Nonnull Urn urn, @Nonnull String aspectName, long version) {
|
|
|
|
|
|
|
|
log.debug("Invoked getVersionedAspect with urn: {}, aspectName: {}, version: {}", urn, aspectName, version);
|
|
|
|
|
|
|
|
VersionedAspect result = new VersionedAspect();
|
|
|
|
|
|
|
|
version = calculateVersionNumber(urn, aspectName, version);
|
|
|
|
|
|
|
|
final EntityAspectIdentifier primaryKey = new EntityAspectIdentifier(urn.toString(), aspectName, version);
|
|
|
|
final Optional<EntityAspect> maybeAspect = Optional.ofNullable(_aspectDao.getAspect(primaryKey));
|
|
|
|
RecordTemplate aspectRecord =
|
|
|
|
maybeAspect.map(aspect -> EntityUtils.toAspectRecord(urn, aspectName, aspect.getMetadata(), getEntityRegistry()))
|
|
|
|
.orElse(null);
|
|
|
|
|
|
|
|
if (aspectRecord == null) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
Aspect resultAspect = new Aspect();
|
|
|
|
|
|
|
|
RecordUtils.setSelectedRecordTemplateInUnion(resultAspect, aspectRecord);
|
|
|
|
result.setAspect(resultAspect);
|
|
|
|
result.setVersion(version);
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
2021-06-16 10:03:21 -07:00
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
/**
|
2021-06-17 19:52:50 -07:00
|
|
|
* Retrieves a list of all aspects belonging to an entity of a particular type, sorted by urn.
|
2021-06-03 13:24:33 -07:00
|
|
|
*
|
|
|
|
* Note that once we drop support for legacy 'getAllDataPlatforms' endpoint,
|
|
|
|
* we can drop support for this unless otherwise required. Only visible for backwards compatibility.
|
|
|
|
*
|
2021-06-17 19:52:50 -07:00
|
|
|
* @param entityName name of the entity type the aspect belongs to, e.g. 'dataset'
|
|
|
|
* @param aspectName name of the aspect requested, e.g. 'ownership'
|
2021-06-03 13:24:33 -07:00
|
|
|
* @param start the starting index of the returned aspects, used in pagination
|
|
|
|
* @param count the count of the aspects to be returned, used in pagination
|
|
|
|
* @return a {@link ListResult} of {@link RecordTemplate}s representing the requested aspect.
|
|
|
|
*/
|
2022-04-26 01:29:24 +01:00
|
|
|
@Nonnull
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public ListResult<RecordTemplate> listLatestAspects(
|
|
|
|
@Nonnull final String entityName,
|
|
|
|
@Nonnull final String aspectName,
|
|
|
|
final int start,
|
|
|
|
final int count) {
|
|
|
|
|
|
|
|
log.debug("Invoked listLatestAspects with entityName: {}, aspectName: {}, start: {}, count: {}", entityName,
|
|
|
|
aspectName, start, count);
|
|
|
|
|
|
|
|
final ListResult<String> aspectMetadataList =
|
|
|
|
_aspectDao.listLatestAspectMetadata(entityName, aspectName, start, count);
|
|
|
|
|
|
|
|
final List<RecordTemplate> aspects = new ArrayList<>();
|
|
|
|
for (int i = 0; i < aspectMetadataList.getValues().size(); i++) {
|
|
|
|
aspects.add(EntityUtils.toAspectRecord(aspectMetadataList.getMetadata().getExtraInfos().get(i).getUrn(), aspectName,
|
|
|
|
aspectMetadataList.getValues().get(i), getEntityRegistry()));
|
|
|
|
}
|
|
|
|
|
|
|
|
return new ListResult<>(aspects, aspectMetadataList.getMetadata(), aspectMetadataList.getNextStart(),
|
|
|
|
aspectMetadataList.isHasNext(), aspectMetadataList.getTotalCount(), aspectMetadataList.getTotalPageCount(),
|
|
|
|
aspectMetadataList.getPageSize());
|
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
/**
|
|
|
|
* Common batch-like pattern used primarily in tests.
|
|
|
|
* @param entityUrn the entity urn
|
|
|
|
* @param pairList list of aspects in pairs of aspect name and record template
|
|
|
|
* @param auditStamp audit stamp
|
|
|
|
* @param systemMetadata system metadata
|
|
|
|
* @return update result
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public List<UpdateAspectResult> ingestAspects(@Nonnull Urn entityUrn,
|
|
|
|
List<Pair<String, RecordTemplate>> pairList,
|
|
|
|
@Nonnull final AuditStamp auditStamp,
|
|
|
|
SystemMetadata systemMetadata) {
|
|
|
|
List<? extends AbstractBatchItem> items = pairList.stream()
|
|
|
|
.map(pair -> UpsertBatchItem.builder()
|
|
|
|
.urn(entityUrn)
|
|
|
|
.aspectName(pair.getKey())
|
|
|
|
.aspect(pair.getValue())
|
|
|
|
.systemMetadata(systemMetadata)
|
|
|
|
.build(_entityRegistry))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
return ingestAspects(AspectsBatchImpl.builder().items(items).build(), auditStamp, true, true);
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2021-12-14 11:18:02 +09:00
|
|
|
/**
|
2023-09-02 19:25:44 -05:00
|
|
|
* Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataChangeLog}.
|
2021-12-14 11:18:02 +09:00
|
|
|
*
|
2023-09-02 19:25:44 -05:00
|
|
|
* @param aspectsBatch aspects to write
|
|
|
|
* @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time
|
|
|
|
* @param emitMCL whether a {@link com.linkedin.mxe.MetadataChangeLog} should be emitted in correspondence upon
|
|
|
|
* successful update
|
|
|
|
* @return the {@link RecordTemplate} representation of the written aspect object
|
2021-12-14 11:18:02 +09:00
|
|
|
*/
|
2023-09-02 19:25:44 -05:00
|
|
|
@Override
|
|
|
|
public List<UpdateAspectResult> ingestAspects(@Nonnull final AspectsBatch aspectsBatch,
|
|
|
|
@Nonnull final AuditStamp auditStamp,
|
|
|
|
boolean emitMCL,
|
|
|
|
boolean overwrite) {
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
Timer.Context ingestToLocalDBTimer = MetricUtils.timer(this.getClass(), "ingestAspectsToLocalDB").time();
|
|
|
|
List<UpdateAspectResult> ingestResults = ingestAspectsToLocalDB(aspectsBatch, auditStamp, overwrite);
|
|
|
|
List<UpdateAspectResult> mclResults = emitMCL(ingestResults, emitMCL);
|
|
|
|
ingestToLocalDBTimer.stop();
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return mclResults;
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2022-09-23 01:52:18 -05:00
|
|
|
/**
|
2023-09-02 19:25:44 -05:00
|
|
|
* Checks whether there is an actual update to the aspect by applying the updateLambda
|
|
|
|
* If there is an update, push the new version into the local DB.
|
|
|
|
* Otherwise, do not push the new version, but just update the system metadata.
|
2022-09-23 01:52:18 -05:00
|
|
|
*
|
2023-09-02 19:25:44 -05:00
|
|
|
* @param aspectsBatch Collection of the following: an urn associated with the new aspect, name of the aspect being
|
|
|
|
* inserted, and a function to apply to the latest version of the aspect to get the updated version
|
|
|
|
* @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time
|
2022-09-23 01:52:18 -05:00
|
|
|
* @return Details about the new and old version of the aspect
|
|
|
|
*/
|
|
|
|
@Nonnull
|
2023-09-02 19:25:44 -05:00
|
|
|
private List<UpdateAspectResult> ingestAspectsToLocalDB(@Nonnull final AspectsBatch aspectsBatch,
|
|
|
|
@Nonnull final AuditStamp auditStamp,
|
|
|
|
boolean overwrite) {
|
2023-06-16 11:16:59 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
if (aspectsBatch.containsDuplicateAspects()) {
|
|
|
|
log.warn(String.format("Batch contains duplicates: %s", aspectsBatch));
|
|
|
|
}
|
2022-09-23 01:52:18 -05:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return _aspectDao.runInTransactionWithRetry((tx) -> {
|
|
|
|
// Read before write is unfortunate, however batch it
|
|
|
|
Map<String, Set<String>> urnAspects = aspectsBatch.getUrnAspectsMap();
|
|
|
|
// read #1
|
|
|
|
Map<String, Map<String, EntityAspect>> latestAspects = _aspectDao.getLatestAspects(urnAspects);
|
|
|
|
// read #2
|
|
|
|
Map<String, Map<String, Long>> nextVersions = _aspectDao.getNextVersions(urnAspects);
|
|
|
|
|
|
|
|
List<UpsertBatchItem> items = aspectsBatch.getItems().stream()
|
|
|
|
.map(item -> {
|
|
|
|
if (item instanceof UpsertBatchItem) {
|
|
|
|
return (UpsertBatchItem) item;
|
|
|
|
} else {
|
|
|
|
// patch to upsert
|
|
|
|
PatchBatchItem patchBatchItem = (PatchBatchItem) item;
|
|
|
|
final String urnStr = patchBatchItem.getUrn().toString();
|
|
|
|
final EntityAspect latest = latestAspects.getOrDefault(urnStr, Map.of()).get(patchBatchItem.getAspectName());
|
|
|
|
final RecordTemplate currentValue = latest != null
|
|
|
|
? EntityUtils.toAspectRecord(patchBatchItem.getUrn(), patchBatchItem.getAspectName(), latest.getMetadata(), _entityRegistry) : null;
|
|
|
|
return patchBatchItem.applyPatch(_entityRegistry, currentValue);
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
|
|
// Database Upsert results
|
|
|
|
List<UpdateAspectResult> upsertResults = items.stream()
|
|
|
|
.map(item -> {
|
|
|
|
final String urnStr = item.getUrn().toString();
|
|
|
|
final EntityAspect latest = latestAspects.getOrDefault(urnStr, Map.of()).get(item.getAspectName());
|
|
|
|
final long nextVersion = nextVersions.getOrDefault(urnStr, Map.of()).getOrDefault(item.getAspectName(), 0L);
|
|
|
|
|
|
|
|
final UpdateAspectResult result;
|
|
|
|
if (overwrite || latest == null) {
|
|
|
|
result = ingestAspectToLocalDB(tx, item.getUrn(), item.getAspectName(), item.getAspect(),
|
|
|
|
auditStamp, item.getSystemMetadata(), latest, nextVersion).toBuilder().request(item).build();
|
|
|
|
|
|
|
|
// support inner-batch upserts
|
|
|
|
latestAspects.computeIfAbsent(urnStr, key -> new HashMap<>()).put(item.getAspectName(), item.toLatestEntityAspect(auditStamp));
|
|
|
|
nextVersions.computeIfAbsent(urnStr, key -> new HashMap<>()).put(item.getAspectName(), nextVersion + 1);
|
|
|
|
} else {
|
|
|
|
RecordTemplate oldValue = EntityUtils.toAspectRecord(item.getUrn().getEntityType(), item.getAspectName(),
|
|
|
|
latest.getMetadata(), getEntityRegistry());
|
|
|
|
SystemMetadata oldMetadata = EntityUtils.parseSystemMetadata(latest.getSystemMetadata());
|
|
|
|
result = UpdateAspectResult.builder()
|
|
|
|
.urn(item.getUrn())
|
|
|
|
.request(item)
|
|
|
|
.oldValue(oldValue)
|
|
|
|
.newValue(oldValue)
|
|
|
|
.oldSystemMetadata(oldMetadata)
|
|
|
|
.newSystemMetadata(oldMetadata)
|
|
|
|
.operation(MetadataAuditOperation.UPDATE)
|
|
|
|
.auditStamp(auditStamp)
|
|
|
|
.maxVersion(latest.getVersion())
|
|
|
|
.build();
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}).collect(Collectors.toList());
|
|
|
|
|
|
|
|
// commit upserts prior to retention or kafka send, if supported by impl
|
|
|
|
if (tx != null) {
|
|
|
|
tx.commitAndContinue();
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
// Retention optimization and tx
|
|
|
|
if (_retentionService != null) {
|
|
|
|
List<RetentionService.RetentionContext> retentionBatch = upsertResults.stream()
|
|
|
|
// Only consider retention when there was a previous version
|
|
|
|
.filter(result -> latestAspects.containsKey(result.getUrn().toString())
|
|
|
|
&& latestAspects.get(result.getUrn().toString()).containsKey(result.getRequest().getAspectName()))
|
|
|
|
.filter(result -> {
|
|
|
|
RecordTemplate oldAspect = result.getOldValue();
|
|
|
|
RecordTemplate newAspect = result.getNewValue();
|
|
|
|
// Apply retention policies if there was an update to existing aspect value
|
|
|
|
return oldAspect != newAspect && oldAspect != null && _retentionService != null;
|
|
|
|
})
|
|
|
|
.map(result -> RetentionService.RetentionContext.builder()
|
|
|
|
.urn(result.getUrn())
|
|
|
|
.aspectName(result.getRequest().getAspectName())
|
|
|
|
.maxVersion(Optional.of(result.getMaxVersion()))
|
|
|
|
.build())
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
_retentionService.applyRetentionWithPolicyDefaults(retentionBatch);
|
|
|
|
} else {
|
|
|
|
log.warn("Retention service is missing!");
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2023-04-18 17:44:45 -05:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return upsertResults;
|
|
|
|
}, aspectsBatch, DEFAULT_MAX_TRANSACTION_RETRY);
|
2022-01-06 21:37:16 +05:30
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
@Nonnull
|
|
|
|
private List<UpdateAspectResult> emitMCL(List<UpdateAspectResult> sqlResults, boolean emitMCL) {
|
|
|
|
List<UpdateAspectResult> withEmitMCL = sqlResults.stream()
|
|
|
|
.map(result -> emitMCL ? conditionallyProduceMCLAsync(result) : result)
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
|
|
// join futures messages, capture error state
|
|
|
|
List<Pair<Boolean, UpdateAspectResult>> statusPairs = withEmitMCL.stream()
|
|
|
|
.filter(result -> result.getMclFuture() != null)
|
|
|
|
.map(result -> {
|
|
|
|
try {
|
|
|
|
result.getMclFuture().get();
|
|
|
|
return Pair.of(true, result);
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
return Pair.of(false, result);
|
|
|
|
}
|
|
|
|
}).collect(Collectors.toList());
|
|
|
|
|
|
|
|
if (statusPairs.stream().anyMatch(p -> !p.getFirst())) {
|
|
|
|
log.error("Failed to produce MCLs: {}", statusPairs.stream()
|
|
|
|
.filter(p -> !p.getFirst())
|
|
|
|
.map(Pair::getValue)
|
|
|
|
.map(v -> v.getRequest().toString())
|
|
|
|
.collect(Collectors.toList()));
|
|
|
|
// TODO restoreIndices?
|
|
|
|
throw new RuntimeException("Failed to produce MCLs");
|
2022-01-06 21:37:16 +05:30
|
|
|
}
|
2021-12-14 11:18:02 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return withEmitMCL;
|
2022-01-06 21:37:16 +05:30
|
|
|
}
|
|
|
|
|
2022-05-17 13:37:45 -05:00
|
|
|
/**
|
|
|
|
* Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataAuditEvent}.
|
|
|
|
*
|
|
|
|
* This method runs a read -> write atomically in a single transaction, this is to prevent multiple IDs from being created.
|
|
|
|
*
|
|
|
|
* Note that in general, this should not be used externally. It is currently serving upgrade scripts and
|
|
|
|
* is as such public.
|
|
|
|
*
|
|
|
|
* @param urn an urn associated with the new aspect
|
|
|
|
* @param aspectName name of the aspect being inserted
|
|
|
|
* @param newValue value of the aspect being inserted
|
|
|
|
* @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time
|
|
|
|
* @param systemMetadata
|
|
|
|
* @return the {@link RecordTemplate} representation of the written aspect object
|
|
|
|
*/
|
|
|
|
@Nullable
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-09-02 19:25:44 -05:00
|
|
|
public RecordTemplate ingestAspectIfNotPresent(@Nonnull Urn urn,
|
|
|
|
@Nonnull String aspectName,
|
|
|
|
@Nonnull RecordTemplate newValue,
|
|
|
|
@Nonnull AuditStamp auditStamp,
|
|
|
|
@Nonnull SystemMetadata systemMetadata) {
|
2022-05-24 20:40:42 +01:00
|
|
|
log.debug("Invoked ingestAspectIfNotPresent with urn: {}, aspectName: {}, newValue: {}", urn, aspectName, newValue);
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder()
|
|
|
|
.one(UpsertBatchItem.builder()
|
|
|
|
.urn(urn)
|
|
|
|
.aspectName(aspectName)
|
|
|
|
.aspect(newValue)
|
|
|
|
.systemMetadata(systemMetadata)
|
|
|
|
.build(_entityRegistry))
|
|
|
|
.build();
|
|
|
|
List<UpdateAspectResult> ingested = ingestAspects(aspectsBatch, auditStamp, true, false);
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return ingested.stream().findFirst().get().getNewValue();
|
2021-12-14 11:18:02 +09:00
|
|
|
}
|
2021-07-29 20:04:40 -07:00
|
|
|
|
2022-06-29 22:41:41 -04:00
|
|
|
/**
|
2023-09-02 19:25:44 -05:00
|
|
|
* Wrapper around batch method for single item
|
|
|
|
* @param proposal the proposal
|
|
|
|
* @param auditStamp an audit stamp representing the time and actor proposing the change
|
|
|
|
* @param async a flag to control whether we commit to primary store or just write to proposal log before returning
|
|
|
|
* @return an {@link IngestResult} containing the results
|
2022-06-29 22:41:41 -04:00
|
|
|
*/
|
2023-09-02 19:25:44 -05:00
|
|
|
@Override
|
|
|
|
public IngestResult ingestProposal(MetadataChangeProposal proposal, AuditStamp auditStamp, final boolean async) {
|
|
|
|
return ingestProposal(AspectsBatchImpl.builder().mcps(List.of(proposal), getEntityRegistry()).build(), auditStamp,
|
|
|
|
async).stream().findFirst().get();
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2023-02-02 15:30:49 -08:00
|
|
|
/**
|
|
|
|
* Ingest a new {@link MetadataChangeProposal}. Note that this method does NOT include any additional aspects or do any
|
|
|
|
* enrichment, instead it changes only those which are provided inside the metadata change proposal.
|
|
|
|
*
|
|
|
|
* Do not use this method directly for creating new entities, as it DOES NOT create an Entity Key aspect in the DB. Instead,
|
|
|
|
* use an Entity Client.
|
|
|
|
*
|
2023-09-02 19:25:44 -05:00
|
|
|
* @param aspectsBatch the proposals to ingest
|
2023-02-02 15:30:49 -08:00
|
|
|
* @param auditStamp an audit stamp representing the time and actor proposing the change
|
|
|
|
* @param async a flag to control whether we commit to primary store or just write to proposal log before returning
|
2023-09-02 19:25:44 -05:00
|
|
|
* @return an {@link IngestResult} containing the results
|
2023-02-02 15:30:49 -08:00
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-09-02 19:25:44 -05:00
|
|
|
public Set<IngestResult> ingestProposal(AspectsBatch aspectsBatch, AuditStamp auditStamp, final boolean async) {
|
2022-09-23 01:52:18 -05:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
Stream<IngestResult> timeseriesIngestResults = ingestTimeseriesProposal(aspectsBatch, auditStamp);
|
|
|
|
Stream<IngestResult> nonTimeseriesIngestResults = async ? ingestProposalAsync(aspectsBatch)
|
|
|
|
: ingestProposalSync(aspectsBatch, auditStamp);
|
2022-09-23 01:52:18 -05:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return Stream.concat(timeseriesIngestResults, nonTimeseriesIngestResults).collect(Collectors.toSet());
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
/**
|
|
|
|
* Timeseries is pass through to MCL, no MCP
|
|
|
|
* @param aspectsBatch timeseries upserts batch
|
|
|
|
* @param auditStamp provided audit information
|
|
|
|
* @return returns ingest proposal result, however was never in the MCP topic
|
|
|
|
*/
|
|
|
|
private Stream<IngestResult> ingestTimeseriesProposal(AspectsBatch aspectsBatch, AuditStamp auditStamp) {
|
|
|
|
List<? extends AbstractBatchItem> unsupported = aspectsBatch.getItems().stream()
|
|
|
|
.filter(item -> item.getAspectSpec().isTimeseries() && item.getChangeType() != ChangeType.UPSERT)
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
if (!unsupported.isEmpty()) {
|
|
|
|
throw new UnsupportedOperationException("ChangeType not supported: " + unsupported.stream()
|
|
|
|
.map(AbstractBatchItem::getChangeType).collect(Collectors.toSet()));
|
2021-12-14 11:18:02 +09:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
List<Pair<UpsertBatchItem, Optional<Pair<Future<?>, Boolean>>>> timeseriesResults = aspectsBatch.getItems().stream()
|
|
|
|
.filter(item -> item.getAspectSpec().isTimeseries())
|
|
|
|
.map(item -> (UpsertBatchItem) item)
|
|
|
|
.map(item -> Pair.of(item, conditionallyProduceMCLAsync(null, null, item.getAspect(), item.getSystemMetadata(),
|
|
|
|
item.getMetadataChangeProposal(), item.getUrn(), auditStamp, item.getAspectSpec())))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
|
|
return timeseriesResults.stream().map(result -> {
|
|
|
|
Optional<Pair<Future<?>, Boolean>> emissionStatus = result.getSecond();
|
|
|
|
|
|
|
|
emissionStatus.ifPresent(status -> {
|
|
|
|
try {
|
|
|
|
status.getFirst().get();
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
UpsertBatchItem request = result.getFirst();
|
|
|
|
return IngestResult.builder()
|
|
|
|
.urn(request.getUrn())
|
|
|
|
.request(request)
|
|
|
|
.publishedMCL(emissionStatus.map(status -> status.getFirst() != null).orElse(false))
|
|
|
|
.processedMCL(emissionStatus.map(Pair::getSecond).orElse(false))
|
|
|
|
.build();
|
|
|
|
});
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
/**
|
|
|
|
* For async ingestion of non-timeseries, any change type
|
|
|
|
* @param aspectsBatch non-timeseries ingest aspects
|
|
|
|
* @return produced items to the MCP topic
|
|
|
|
*/
|
|
|
|
private Stream<IngestResult> ingestProposalAsync(AspectsBatch aspectsBatch) {
|
|
|
|
List<? extends AbstractBatchItem> nonTimeseries = aspectsBatch.getItems().stream()
|
|
|
|
.filter(item -> !item.getAspectSpec().isTimeseries())
|
|
|
|
.collect(Collectors.toList());
|
2022-09-23 01:52:18 -05:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
List<Future<?>> futures = nonTimeseries.stream().map(item ->
|
|
|
|
// When async is turned on, we write to proposal log and return without waiting
|
|
|
|
_producer.produceMetadataChangeProposal(item.getUrn(), item.getMetadataChangeProposal()))
|
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.collect(Collectors.toList());
|
2021-12-14 11:18:02 +09:00
|
|
|
|
|
|
|
try {
|
2023-09-02 19:25:44 -05:00
|
|
|
return nonTimeseries.stream().map(item ->
|
|
|
|
IngestResult.builder()
|
|
|
|
.urn(item.getUrn())
|
|
|
|
.request(item)
|
|
|
|
.publishedMCP(true)
|
|
|
|
.build());
|
|
|
|
} finally {
|
|
|
|
futures.forEach(f -> {
|
|
|
|
try {
|
|
|
|
f.get();
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
});
|
2021-12-14 11:18:02 +09:00
|
|
|
}
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
2021-12-14 11:18:02 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private Stream<IngestResult> ingestProposalSync(AspectsBatch aspectsBatch, AuditStamp auditStamp) {
|
|
|
|
AspectsBatchImpl nonTimeseries = AspectsBatchImpl.builder()
|
|
|
|
.items(aspectsBatch.getItems().stream()
|
|
|
|
.filter(item -> !item.getAspectSpec().isTimeseries())
|
|
|
|
.collect(Collectors.toList()))
|
|
|
|
.build();
|
|
|
|
|
|
|
|
List<? extends AbstractBatchItem> unsupported = nonTimeseries.getItems().stream()
|
|
|
|
.filter(item -> item.getMetadataChangeProposal().getChangeType() != ChangeType.PATCH
|
|
|
|
&& item.getMetadataChangeProposal().getChangeType() != ChangeType.UPSERT)
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
if (!unsupported.isEmpty()) {
|
|
|
|
throw new UnsupportedOperationException("ChangeType not supported: " + unsupported.stream()
|
|
|
|
.map(item -> item.getMetadataChangeProposal().getChangeType()).collect(Collectors.toSet()));
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
2021-12-14 11:18:02 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
List<UpdateAspectResult> upsertResults = ingestAspects(nonTimeseries, auditStamp, true, true);
|
2021-12-14 11:18:02 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return upsertResults.stream().map(result -> {
|
|
|
|
AbstractBatchItem item = result.getRequest();
|
|
|
|
|
|
|
|
return IngestResult.builder()
|
|
|
|
.urn(item.getUrn())
|
|
|
|
.request(item)
|
|
|
|
.publishedMCL(result.getMclFuture() != null)
|
|
|
|
.sqlCommitted(true)
|
|
|
|
.isUpdate(result.getOldValue() != null)
|
|
|
|
.build();
|
|
|
|
});
|
2022-09-23 01:52:18 -05:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-09-30 16:11:39 +05:30
|
|
|
public String batchApplyRetention(Integer start, Integer count, Integer attemptWithVersion, String aspectName,
|
2023-02-02 15:30:49 -08:00
|
|
|
String urn) {
|
2022-09-30 16:11:39 +05:30
|
|
|
BulkApplyRetentionArgs args = new BulkApplyRetentionArgs();
|
|
|
|
if (start == null) {
|
|
|
|
start = 0;
|
|
|
|
}
|
|
|
|
args.start = start;
|
|
|
|
if (count == null) {
|
|
|
|
count = 100;
|
|
|
|
}
|
|
|
|
args.count = count;
|
|
|
|
if (attemptWithVersion == null) {
|
|
|
|
attemptWithVersion = 21;
|
|
|
|
}
|
|
|
|
args.attemptWithVersion = attemptWithVersion;
|
|
|
|
args.aspectName = aspectName;
|
|
|
|
args.urn = urn;
|
|
|
|
BulkApplyRetentionResult result = _retentionService.batchApplyRetentionEntities(args);
|
|
|
|
return result.toString();
|
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private boolean preprocessEvent(MetadataChangeLog metadataChangeLog) {
|
2023-06-15 06:17:13 -05:00
|
|
|
if (_preProcessHooks.isUiEnabled()) {
|
|
|
|
if (metadataChangeLog.getSystemMetadata() != null) {
|
|
|
|
if (metadataChangeLog.getSystemMetadata().getProperties() != null) {
|
|
|
|
if (UI_SOURCE.equals(metadataChangeLog.getSystemMetadata().getProperties().get(APP_SOURCE))) {
|
|
|
|
// Pre-process the update indices hook for UI updates to avoid perceived lag from Kafka
|
|
|
|
_updateIndicesService.handleChangeEvent(metadataChangeLog);
|
2023-09-02 19:25:44 -05:00
|
|
|
return true;
|
2023-06-15 06:17:13 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-09-02 19:25:44 -05:00
|
|
|
return false;
|
2023-06-15 06:17:13 -05:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-09-02 15:24:40 +05:30
|
|
|
public Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLike) {
|
|
|
|
return _aspectDao.countAspect(aspectName, urnLike);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-09-02 15:24:40 +05:30
|
|
|
public RestoreIndicesResult restoreIndices(@Nonnull RestoreIndicesArgs args, @Nonnull Consumer<String> logger) {
|
|
|
|
RestoreIndicesResult result = new RestoreIndicesResult();
|
|
|
|
int ignored = 0;
|
|
|
|
int rowsMigrated = 0;
|
|
|
|
logger.accept(String.format("Args are %s", args));
|
|
|
|
logger.accept(String.format(
|
2023-02-02 15:30:49 -08:00
|
|
|
"Reading rows %s through %s from the aspects table started.", args.start, args.start + args.batchSize));
|
2022-09-02 15:24:40 +05:30
|
|
|
long startTime = System.currentTimeMillis();
|
|
|
|
PagedList<EbeanAspectV2> rows = _aspectDao.getPagedAspects(args);
|
|
|
|
result.timeSqlQueryMs = System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
logger.accept(String.format(
|
2023-02-02 15:30:49 -08:00
|
|
|
"Reading rows %s through %s from the aspects table completed.", args.start, args.start + args.batchSize));
|
2022-09-02 15:24:40 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
LinkedList<Future<?>> futures = new LinkedList<>();
|
|
|
|
|
2023-03-21 16:53:04 -05:00
|
|
|
for (EbeanAspectV2 aspect : rows != null ? rows.getList() : List.<EbeanAspectV2>of()) {
|
2022-09-02 15:24:40 +05:30
|
|
|
// 1. Extract an Entity type from the entity Urn
|
|
|
|
result.timeGetRowMs = System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
Urn urn;
|
|
|
|
try {
|
|
|
|
urn = Urn.createFromString(aspect.getKey().getUrn());
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.accept(String.format("Failed to bind Urn with value %s into Urn object: %s. Ignoring row.",
|
2023-02-02 15:30:49 -08:00
|
|
|
aspect.getKey().getUrn(), e));
|
2022-09-02 15:24:40 +05:30
|
|
|
ignored = ignored + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
result.timeUrnMs += System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
// 2. Verify that the entity associated with the aspect is found in the registry.
|
|
|
|
final String entityName = urn.getEntityType();
|
|
|
|
final EntitySpec entitySpec;
|
|
|
|
try {
|
|
|
|
entitySpec = _entityRegistry.getEntitySpec(entityName);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.accept(String.format("Failed to find entity with name %s in Entity Registry: %s. Ignoring row.",
|
2023-02-02 15:30:49 -08:00
|
|
|
entityName, e));
|
2022-09-02 15:24:40 +05:30
|
|
|
ignored = ignored + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
result.timeEntityRegistryCheckMs += System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
final String aspectName = aspect.getKey().getAspect();
|
|
|
|
|
|
|
|
// 3. Verify that the aspect is a valid aspect associated with the entity
|
|
|
|
AspectSpec aspectSpec = entitySpec.getAspectSpec(aspectName);
|
|
|
|
if (aspectSpec == null) {
|
|
|
|
logger.accept(String.format("Failed to find aspect with name %s associated with entity named %s", aspectName,
|
2023-02-02 15:30:49 -08:00
|
|
|
entityName));
|
2022-09-02 15:24:40 +05:30
|
|
|
ignored = ignored + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
result.aspectCheckMs += System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
|
|
|
|
// 4. Create record from json aspect
|
|
|
|
final RecordTemplate aspectRecord;
|
|
|
|
try {
|
|
|
|
aspectRecord = EntityUtils.toAspectRecord(entityName, aspectName, aspect.getMetadata(), _entityRegistry);
|
|
|
|
} catch (Exception e) {
|
|
|
|
logger.accept(String.format("Failed to deserialize row %s for entity %s, aspect %s: %s. Ignoring row.",
|
2023-02-02 15:30:49 -08:00
|
|
|
aspect.getMetadata(), entityName, aspectName, e));
|
2022-09-02 15:24:40 +05:30
|
|
|
ignored = ignored + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
result.createRecordMs += System.currentTimeMillis() - startTime;
|
|
|
|
startTime = System.currentTimeMillis();
|
|
|
|
|
2023-03-21 08:43:56 -07:00
|
|
|
// Force indexing to skip diff mode and fix error states
|
2022-09-02 15:24:40 +05:30
|
|
|
SystemMetadata latestSystemMetadata = EntityUtils.parseSystemMetadata(aspect.getSystemMetadata());
|
2023-03-21 08:43:56 -07:00
|
|
|
StringMap properties = latestSystemMetadata.getProperties() != null ? latestSystemMetadata.getProperties()
|
|
|
|
: new StringMap();
|
|
|
|
properties.put(FORCE_INDEXING_KEY, Boolean.TRUE.toString());
|
|
|
|
latestSystemMetadata.setProperties(properties);
|
2022-09-02 15:24:40 +05:30
|
|
|
|
|
|
|
// 5. Produce MAE events for the aspect record
|
2023-09-02 19:25:44 -05:00
|
|
|
futures.add(alwaysProduceMCLAsync(urn, entityName, aspectName, aspectSpec, null, aspectRecord, null,
|
2023-02-02 15:30:49 -08:00
|
|
|
latestSystemMetadata,
|
|
|
|
new AuditStamp().setActor(UrnUtils.getUrn(SYSTEM_ACTOR)).setTime(System.currentTimeMillis()),
|
2023-09-02 19:25:44 -05:00
|
|
|
ChangeType.RESTATE).getFirst());
|
2022-09-02 15:24:40 +05:30
|
|
|
result.sendMessageMs += System.currentTimeMillis() - startTime;
|
|
|
|
|
|
|
|
rowsMigrated++;
|
|
|
|
}
|
2023-09-02 19:25:44 -05:00
|
|
|
futures.stream().filter(Objects::nonNull).forEach(f -> {
|
|
|
|
try {
|
|
|
|
f.get();
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
});
|
2023-03-15 23:26:02 +03:00
|
|
|
try {
|
|
|
|
TimeUnit.MILLISECONDS.sleep(args.batchDelayMs);
|
|
|
|
} catch (InterruptedException e) {
|
|
|
|
throw new RuntimeException("Thread interrupted while sleeping after successful batch migration.");
|
|
|
|
}
|
2022-09-02 15:24:40 +05:30
|
|
|
result.ignored = ignored;
|
|
|
|
result.rowsMigrated = rowsMigrated;
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-09-02 19:05:13 -07:00
|
|
|
/**
|
|
|
|
* Lists the entity URNs found in storage.
|
|
|
|
*
|
|
|
|
* @param entityName the name associated with the entity
|
|
|
|
* @param start the start offset
|
|
|
|
* @param count the count
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count) {
|
|
|
|
log.debug("Invoked listUrns with entityName: {}, start: {}, count: {}", entityName, start, count);
|
|
|
|
|
|
|
|
// If a keyAspect exists, the entity exists.
|
|
|
|
final String keyAspectName = getEntityRegistry().getEntitySpec(entityName).getKeyAspectSpec().getName();
|
|
|
|
final ListResult<String> keyAspectList = _aspectDao.listUrns(entityName, keyAspectName, start, count);
|
|
|
|
|
|
|
|
final ListUrnsResult result = new ListUrnsResult();
|
|
|
|
result.setStart(start);
|
|
|
|
result.setCount(keyAspectList.getValues().size());
|
|
|
|
result.setTotal(keyAspectList.getTotalCount());
|
|
|
|
|
|
|
|
// Extract urns
|
|
|
|
final UrnArray entityUrns = new UrnArray();
|
|
|
|
for (String urn : keyAspectList.getValues()) {
|
|
|
|
try {
|
|
|
|
entityUrns.add(Urn.createFromString(urn));
|
|
|
|
} catch (URISyntaxException e) {
|
|
|
|
throw new IllegalArgumentException(String.format("Failed to convert urn %s found in db to Urn object.", urn),
|
|
|
|
e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.setEntities(entityUrns);
|
|
|
|
return result;
|
|
|
|
}
|
2021-09-02 19:05:13 -07:00
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
/**
|
|
|
|
* Default implementations. Subclasses should feel free to override if it's more efficient to do so.
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-06-03 13:24:33 -07:00
|
|
|
public Entity getEntity(@Nonnull final Urn urn, @Nonnull final Set<String> aspectNames) {
|
2021-07-30 17:41:03 -07:00
|
|
|
return getEntities(Collections.singleton(urn), aspectNames).values().stream().findFirst().orElse(null);
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2021-06-04 11:28:53 -07:00
|
|
|
/**
|
2022-04-19 20:04:14 -07:00
|
|
|
* Deprecated! Use getEntitiesV2 instead.
|
|
|
|
*
|
2021-06-04 11:28:53 -07:00
|
|
|
* Retrieves multiple entities.
|
|
|
|
*
|
|
|
|
* @param urns set of urns to fetch
|
|
|
|
* @param aspectNames set of aspects to fetch
|
|
|
|
* @return a map of {@link Urn} to {@link Entity} object
|
|
|
|
*/
|
2022-04-19 20:04:14 -07:00
|
|
|
@Deprecated
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-06-04 11:28:53 -07:00
|
|
|
public Map<Urn, Entity> getEntities(@Nonnull final Set<Urn> urns, @Nonnull Set<String> aspectNames) {
|
2022-01-06 21:37:16 +05:30
|
|
|
log.debug("Invoked getEntities with urns {}, aspects {}", urns, aspectNames);
|
2021-06-04 11:28:53 -07:00
|
|
|
if (urns.isEmpty()) {
|
|
|
|
return Collections.emptyMap();
|
|
|
|
}
|
2021-07-30 17:41:03 -07:00
|
|
|
return getSnapshotUnions(urns, aspectNames).entrySet()
|
|
|
|
.stream()
|
2021-06-03 13:24:33 -07:00
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> toEntity(entry.getValue())));
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-09-02 19:25:44 -05:00
|
|
|
public Pair<Future<?>, Boolean> alwaysProduceMCLAsync(@Nonnull final Urn urn, @Nonnull final AspectSpec aspectSpec,
|
|
|
|
@Nonnull final MetadataChangeLog metadataChangeLog) {
|
|
|
|
Future<?> future = _producer.produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog);
|
|
|
|
return Pair.of(future, preprocessEvent(metadataChangeLog));
|
2022-01-05 11:24:30 +09:00
|
|
|
}
|
2021-06-30 16:49:02 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
@Override
|
|
|
|
public Pair<Future<?>, Boolean> alwaysProduceMCLAsync(@Nonnull final Urn urn, @Nonnull String entityName, @Nonnull String aspectName,
|
|
|
|
@Nonnull final AspectSpec aspectSpec, @Nullable final RecordTemplate oldAspectValue,
|
|
|
|
@Nullable final RecordTemplate newAspectValue, @Nullable final SystemMetadata oldSystemMetadata,
|
|
|
|
@Nullable final SystemMetadata newSystemMetadata, @Nonnull AuditStamp auditStamp,
|
|
|
|
@Nonnull final ChangeType changeType) {
|
|
|
|
final MetadataChangeLog metadataChangeLog = constructMCL(null, entityName, urn, changeType, aspectName, auditStamp,
|
|
|
|
newAspectValue, newSystemMetadata, oldAspectValue, oldSystemMetadata);
|
|
|
|
return alwaysProduceMCLAsync(urn, aspectSpec, metadataChangeLog);
|
2022-01-05 11:24:30 +09:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
public Optional<Pair<Future<?>, Boolean>> conditionallyProduceMCLAsync(@Nullable RecordTemplate oldAspect,
|
|
|
|
@Nullable SystemMetadata oldSystemMetadata,
|
|
|
|
RecordTemplate newAspect, SystemMetadata newSystemMetadata,
|
|
|
|
@Nullable MetadataChangeProposal mcp, Urn entityUrn,
|
|
|
|
AuditStamp auditStamp, AspectSpec aspectSpec) {
|
|
|
|
boolean isNoOp = oldAspect == newAspect;
|
|
|
|
if (!isNoOp || _alwaysEmitChangeLog || shouldAspectEmitChangeLog(aspectSpec)) {
|
|
|
|
log.debug("Producing MetadataChangeLog for ingested aspect {}, urn {}", aspectSpec.getName(), entityUrn);
|
2022-01-05 11:24:30 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
final MetadataChangeLog metadataChangeLog = constructMCL(mcp, urnToEntityName(entityUrn), entityUrn,
|
|
|
|
isNoOp ? ChangeType.RESTATE : ChangeType.UPSERT, aspectSpec.getName(), auditStamp, newAspect, newSystemMetadata,
|
|
|
|
oldAspect, oldSystemMetadata);
|
2022-01-05 11:24:30 +09:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
log.debug("Serialized MCL event: {}", metadataChangeLog);
|
|
|
|
Pair<Future<?>, Boolean> emissionStatus = alwaysProduceMCLAsync(entityUrn, aspectSpec, metadataChangeLog);
|
|
|
|
return emissionStatus.getFirst() != null ? Optional.of(emissionStatus) : Optional.empty();
|
|
|
|
} else {
|
|
|
|
log.debug(
|
|
|
|
"Skipped producing MetadataChangeLog for ingested aspect {}, urn {}. Aspect has not changed.",
|
|
|
|
aspectSpec.getName(), entityUrn);
|
|
|
|
return Optional.empty();
|
|
|
|
}
|
2021-06-30 16:49:02 -07:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private UpdateAspectResult conditionallyProduceMCLAsync(UpdateAspectResult result) {
|
|
|
|
AbstractBatchItem request = result.getRequest();
|
|
|
|
Optional<Pair<Future<?>, Boolean>> emissionStatus = conditionallyProduceMCLAsync(result.getOldValue(), result.getOldSystemMetadata(),
|
|
|
|
result.getNewValue(), result.getNewSystemMetadata(),
|
|
|
|
request.getMetadataChangeProposal(), result.getUrn(), result.getAuditStamp(), request.getAspectSpec());
|
2021-07-30 17:41:03 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return emissionStatus.map(status ->
|
|
|
|
result.toBuilder()
|
|
|
|
.mclFuture(status.getFirst())
|
|
|
|
.processedMCL(status.getSecond())
|
|
|
|
.build()
|
|
|
|
).orElse(result);
|
2021-11-08 16:22:24 -08:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-06-03 13:24:33 -07:00
|
|
|
public RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName) {
|
2022-01-06 21:37:16 +05:30
|
|
|
log.debug("Invoked getLatestAspect with urn {}, aspect {}", urn, aspectName);
|
2021-09-02 19:05:13 -07:00
|
|
|
return getAspect(urn, aspectName, ASPECT_LATEST_VERSION);
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-07-30 17:41:03 -07:00
|
|
|
public void ingestEntities(@Nonnull final List<Entity> entities, @Nonnull final AuditStamp auditStamp,
|
2021-07-29 20:04:40 -07:00
|
|
|
@Nonnull final List<SystemMetadata> systemMetadata) {
|
2022-01-06 21:37:16 +05:30
|
|
|
log.debug("Invoked ingestEntities with entities {}, audit stamp {}", entities, auditStamp);
|
2021-07-30 17:41:03 -07:00
|
|
|
Streams.zip(entities.stream(), systemMetadata.stream(), (a, b) -> new Pair<Entity, SystemMetadata>(a, b))
|
|
|
|
.forEach(pair -> ingestEntity(pair.getFirst(), auditStamp, pair.getSecond()));
|
2021-07-29 20:04:40 -07:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-09-07 17:09:52 -05:00
|
|
|
public SystemMetadata ingestEntity(Entity entity, AuditStamp auditStamp) {
|
2021-07-29 20:04:40 -07:00
|
|
|
SystemMetadata generatedSystemMetadata = new SystemMetadata();
|
|
|
|
generatedSystemMetadata.setRunId(DEFAULT_RUN_ID);
|
|
|
|
generatedSystemMetadata.setLastObserved(System.currentTimeMillis());
|
|
|
|
|
|
|
|
ingestEntity(entity, auditStamp, generatedSystemMetadata);
|
2023-09-07 17:09:52 -05:00
|
|
|
return generatedSystemMetadata;
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-07-30 17:41:03 -07:00
|
|
|
public void ingestEntity(@Nonnull Entity entity, @Nonnull AuditStamp auditStamp,
|
2023-09-02 19:25:44 -05:00
|
|
|
@Nonnull SystemMetadata systemMetadata) {
|
2022-01-06 21:37:16 +05:30
|
|
|
log.debug("Invoked ingestEntity with entity {}, audit stamp {} systemMetadata {}", entity, auditStamp, systemMetadata.toString());
|
2021-07-29 20:04:40 -07:00
|
|
|
ingestSnapshotUnion(entity.getValue(), auditStamp, systemMetadata);
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
protected Map<Urn, Snapshot> getSnapshotUnions(@Nonnull final Set<Urn> urns, @Nonnull final Set<String> aspectNames) {
|
|
|
|
return getSnapshotRecords(urns, aspectNames).entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> toSnapshotUnion(entry.getValue())));
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2021-07-30 17:41:03 -07:00
|
|
|
protected Map<Urn, RecordTemplate> getSnapshotRecords(@Nonnull final Set<Urn> urns,
|
|
|
|
@Nonnull final Set<String> aspectNames) {
|
2021-06-03 13:24:33 -07:00
|
|
|
return getLatestAspectUnions(urns, aspectNames).entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> toSnapshotRecord(entry.getKey(), entry.getValue())));
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2022-01-04 00:19:46 +09:00
|
|
|
protected Map<Urn, List<UnionTemplate>> getLatestAspectUnions(
|
|
|
|
@Nonnull final Set<Urn> urns,
|
2021-07-30 17:41:03 -07:00
|
|
|
@Nonnull final Set<String> aspectNames) {
|
|
|
|
return getLatestAspects(urns, aspectNames).entrySet()
|
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue()
|
|
|
|
.stream()
|
|
|
|
.map(aspectRecord -> toAspectUnion(entry.getKey(), aspectRecord))
|
|
|
|
.collect(Collectors.toList())));
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2022-01-06 21:37:16 +05:30
|
|
|
/**
|
2023-02-02 15:30:49 -08:00
|
|
|
Returns true if entityType should have some aspect as per its definition
|
|
|
|
but aspects given does not have that aspect
|
2022-01-06 21:37:16 +05:30
|
|
|
*/
|
2022-06-29 22:41:41 -04:00
|
|
|
private boolean isAspectMissing(String entityType, String aspectName, Set<String> aspects) {
|
2022-01-06 21:37:16 +05:30
|
|
|
return _entityRegistry.getEntitySpec(entityType).getAspectSpecMap().containsKey(aspectName)
|
|
|
|
&& !aspects.contains(aspectName);
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-09-02 19:25:44 -05:00
|
|
|
public Pair<Boolean, List<Pair<String, RecordTemplate>>> generateDefaultAspectsOnFirstWrite(@Nonnull final Urn urn,
|
|
|
|
Map<String, RecordTemplate> includedAspects) {
|
|
|
|
List<Pair<String, RecordTemplate>> returnAspects = new ArrayList<>();
|
2021-10-21 11:15:10 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
final String keyAspectName = getKeyAspectName(urn);
|
|
|
|
final Map<String, RecordTemplate> latestAspects = new HashMap<>(getLatestAspectsForUrn(urn, Set.of(keyAspectName)));
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
// key aspect: does not exist in database && is being written
|
|
|
|
boolean generateDefaults = !latestAspects.containsKey(keyAspectName) && includedAspects.containsKey(keyAspectName);
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
// conditionally generate defaults
|
|
|
|
if (generateDefaults) {
|
|
|
|
String entityType = urnToEntityName(urn);
|
|
|
|
Set<String> aspectsToGet = new HashSet<>();
|
2023-06-21 04:10:07 -04:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
boolean shouldCheckBrowsePath = isAspectMissing(entityType, BROWSE_PATHS_ASPECT_NAME, includedAspects.keySet());
|
|
|
|
if (shouldCheckBrowsePath) {
|
|
|
|
aspectsToGet.add(BROWSE_PATHS_ASPECT_NAME);
|
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
boolean shouldCheckBrowsePathV2 = isAspectMissing(entityType, BROWSE_PATHS_V2_ASPECT_NAME, includedAspects.keySet());
|
|
|
|
if (shouldCheckBrowsePathV2) {
|
|
|
|
aspectsToGet.add(BROWSE_PATHS_V2_ASPECT_NAME);
|
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
boolean shouldCheckDataPlatform = isAspectMissing(entityType, DATA_PLATFORM_INSTANCE_ASPECT_NAME, includedAspects.keySet());
|
|
|
|
if (shouldCheckDataPlatform) {
|
|
|
|
aspectsToGet.add(DATA_PLATFORM_INSTANCE_ASPECT_NAME);
|
|
|
|
}
|
2022-01-06 21:37:16 +05:30
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
// fetch additional aspects
|
|
|
|
latestAspects.putAll(getLatestAspectsForUrn(urn, aspectsToGet));
|
2021-10-07 11:41:29 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
if (shouldCheckBrowsePath && latestAspects.get(BROWSE_PATHS_ASPECT_NAME) == null
|
|
|
|
&& !includedAspects.containsKey(BROWSE_PATHS_ASPECT_NAME)) {
|
|
|
|
try {
|
|
|
|
BrowsePaths generatedBrowsePath = buildDefaultBrowsePath(urn);
|
|
|
|
returnAspects.add(Pair.of(BROWSE_PATHS_ASPECT_NAME, generatedBrowsePath));
|
|
|
|
} catch (URISyntaxException e) {
|
|
|
|
log.error("Failed to parse urn: {}", urn);
|
|
|
|
}
|
2021-10-07 11:41:29 -07:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
if (shouldCheckBrowsePathV2 && latestAspects.get(BROWSE_PATHS_V2_ASPECT_NAME) == null
|
|
|
|
&& !includedAspects.containsKey(BROWSE_PATHS_V2_ASPECT_NAME)) {
|
|
|
|
try {
|
|
|
|
BrowsePathsV2 generatedBrowsePathV2 = buildDefaultBrowsePathV2(urn, false);
|
|
|
|
returnAspects.add(Pair.of(BROWSE_PATHS_V2_ASPECT_NAME, generatedBrowsePathV2));
|
|
|
|
} catch (URISyntaxException e) {
|
|
|
|
log.error("Failed to parse urn: {}", urn);
|
|
|
|
}
|
2023-06-21 04:10:07 -04:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
if (shouldCheckDataPlatform && latestAspects.get(DATA_PLATFORM_INSTANCE_ASPECT_NAME) == null
|
|
|
|
&& !includedAspects.containsKey(DATA_PLATFORM_INSTANCE_ASPECT_NAME)) {
|
|
|
|
RecordTemplate keyAspect = includedAspects.get(keyAspectName);
|
|
|
|
DataPlatformInstanceUtils.buildDataPlatformInstance(entityType, keyAspect)
|
|
|
|
.ifPresent(aspect -> returnAspects.add(Pair.of(DATA_PLATFORM_INSTANCE_ASPECT_NAME, aspect)));
|
|
|
|
}
|
2021-10-07 11:41:29 -07:00
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return Pair.of(latestAspects.containsKey(keyAspectName), returnAspects);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public List<Pair<String, RecordTemplate>> generateDefaultAspectsIfMissing(@Nonnull final Urn urn,
|
|
|
|
Map<String, RecordTemplate> includedAspects) {
|
|
|
|
|
|
|
|
final String keyAspectName = getKeyAspectName(urn);
|
|
|
|
|
|
|
|
if (includedAspects.containsKey(keyAspectName)) {
|
|
|
|
return generateDefaultAspectsOnFirstWrite(urn, includedAspects).getValue();
|
|
|
|
} else {
|
|
|
|
// No key aspect being written, generate it and potentially suggest writing it later
|
|
|
|
HashMap<String, RecordTemplate> includedWithKeyAspect = new HashMap<>(includedAspects);
|
|
|
|
Pair<String, RecordTemplate> keyAspect = Pair.of(keyAspectName, EntityUtils.buildKeyAspect(_entityRegistry, urn));
|
|
|
|
includedWithKeyAspect.put(keyAspect.getKey(), keyAspect.getValue());
|
|
|
|
|
|
|
|
Pair<Boolean, List<Pair<String, RecordTemplate>>> returnAspects = generateDefaultAspectsOnFirstWrite(urn, includedWithKeyAspect);
|
|
|
|
|
|
|
|
// missing key aspect in database, add it
|
|
|
|
if (!returnAspects.getFirst()) {
|
|
|
|
returnAspects.getValue().add(keyAspect);
|
|
|
|
}
|
|
|
|
|
|
|
|
return returnAspects.getValue();
|
|
|
|
}
|
2021-10-07 11:41:29 -07:00
|
|
|
}
|
|
|
|
|
2021-11-08 16:22:24 -08:00
|
|
|
private void ingestSnapshotUnion(@Nonnull final Snapshot snapshotUnion, @Nonnull final AuditStamp auditStamp,
|
2021-07-30 17:41:03 -07:00
|
|
|
SystemMetadata systemMetadata) {
|
2021-06-03 13:24:33 -07:00
|
|
|
final RecordTemplate snapshotRecord = RecordUtils.getSelectedRecordTemplateFromUnion(snapshotUnion);
|
2022-01-05 19:32:31 -06:00
|
|
|
final Urn urn = com.datahub.util.ModelUtils.getUrnFromSnapshot(snapshotRecord);
|
2021-10-21 11:15:10 -07:00
|
|
|
final List<Pair<String, RecordTemplate>> aspectRecordsToIngest =
|
|
|
|
NewModelUtils.getAspectsFromSnapshot(snapshotRecord);
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2021-10-07 11:41:29 -07:00
|
|
|
log.info("INGEST urn {} with system metadata {}", urn.toString(), systemMetadata.toString());
|
2021-11-08 16:22:24 -08:00
|
|
|
aspectRecordsToIngest.addAll(generateDefaultAspectsIfMissing(urn,
|
2023-09-02 19:25:44 -05:00
|
|
|
aspectRecordsToIngest.stream().collect(Collectors.toMap(Pair::getKey, Pair::getValue))));
|
2021-07-29 20:04:40 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
AspectsBatchImpl aspectsBatch = AspectsBatchImpl.builder()
|
|
|
|
.items(aspectRecordsToIngest.stream().map(pair -> UpsertBatchItem.builder()
|
|
|
|
.urn(urn)
|
|
|
|
.aspectName(pair.getKey())
|
|
|
|
.aspect(pair.getValue())
|
|
|
|
.systemMetadata(systemMetadata)
|
|
|
|
.build(_entityRegistry)).collect(Collectors.toList()))
|
|
|
|
.build();
|
2021-06-03 13:24:33 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
ingestAspects(aspectsBatch, auditStamp, true, true);
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-09-02 19:05:13 -07:00
|
|
|
public AspectSpec getKeyAspectSpec(@Nonnull final Urn urn) {
|
|
|
|
return getKeyAspectSpec(urnToEntityName(urn));
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-09-02 19:05:13 -07:00
|
|
|
public AspectSpec getKeyAspectSpec(@Nonnull final String entityName) {
|
|
|
|
final EntitySpec spec = _entityRegistry.getEntitySpec(entityName);
|
|
|
|
return spec.getKeyAspectSpec();
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-11-08 16:22:24 -08:00
|
|
|
public Optional<AspectSpec> getAspectSpec(@Nonnull final String entityName, @Nonnull final String aspectName) {
|
|
|
|
final EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName);
|
|
|
|
return Optional.ofNullable(entitySpec.getAspectSpec(aspectName));
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-07-29 20:04:40 -07:00
|
|
|
public String getKeyAspectName(@Nonnull final Urn urn) {
|
|
|
|
final EntitySpec spec = _entityRegistry.getEntitySpec(urnToEntityName(urn));
|
|
|
|
final AspectSpec keySpec = spec.getKeyAspectSpec();
|
|
|
|
return keySpec.getName();
|
|
|
|
}
|
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
protected Entity toEntity(@Nonnull final Snapshot snapshot) {
|
|
|
|
return new Entity().setValue(snapshot);
|
|
|
|
}
|
|
|
|
|
|
|
|
protected Snapshot toSnapshotUnion(@Nonnull final RecordTemplate snapshotRecord) {
|
|
|
|
final Snapshot snapshot = new Snapshot();
|
2021-07-30 17:41:03 -07:00
|
|
|
RecordUtils.setSelectedRecordTemplateInUnion(snapshot, snapshotRecord);
|
2021-06-03 13:24:33 -07:00
|
|
|
return snapshot;
|
|
|
|
}
|
|
|
|
|
2021-07-30 17:41:03 -07:00
|
|
|
protected RecordTemplate toSnapshotRecord(@Nonnull final Urn urn,
|
2021-06-03 13:24:33 -07:00
|
|
|
@Nonnull final List<UnionTemplate> aspectUnionTemplates) {
|
|
|
|
final String entityName = urnToEntityName(urn);
|
|
|
|
final EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName);
|
2022-01-05 19:32:31 -06:00
|
|
|
return com.datahub.util.ModelUtils.newSnapshot(
|
2021-07-30 17:41:03 -07:00
|
|
|
getDataTemplateClassFromSchema(entitySpec.getSnapshotSchema(), RecordTemplate.class), urn,
|
2021-06-03 13:24:33 -07:00
|
|
|
aspectUnionTemplates);
|
|
|
|
}
|
|
|
|
|
2021-07-30 17:41:03 -07:00
|
|
|
protected UnionTemplate toAspectUnion(@Nonnull final Urn urn, @Nonnull final RecordTemplate aspectRecord) {
|
2021-06-03 13:24:33 -07:00
|
|
|
final EntitySpec entitySpec = _entityRegistry.getEntitySpec(urnToEntityName(urn));
|
2021-07-30 17:41:03 -07:00
|
|
|
final TyperefDataSchema aspectSchema = entitySpec.getAspectTyperefSchema();
|
|
|
|
if (aspectSchema == null) {
|
|
|
|
throw new RuntimeException(
|
|
|
|
String.format("Aspect schema for %s is null: v4 operation is not supported on this entity registry",
|
|
|
|
entitySpec.getName()));
|
|
|
|
}
|
2022-01-05 19:32:31 -06:00
|
|
|
return com.datahub.util.ModelUtils.newAspectUnion(
|
2021-07-30 17:41:03 -07:00
|
|
|
getDataTemplateClassFromSchema(entitySpec.getAspectTyperefSchema(), UnionTemplate.class), aspectRecord);
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
protected Urn toUrn(final String urnStr) {
|
|
|
|
try {
|
|
|
|
return Urn.createFromString(urnStr);
|
|
|
|
} catch (URISyntaxException e) {
|
2022-01-06 21:37:16 +05:30
|
|
|
log.error("Failed to convert urn string {} into Urn object", urnStr);
|
2021-06-03 13:24:33 -07:00
|
|
|
throw new ModelConversionException(String.format("Failed to convert urn string %s into Urn object ", urnStr), e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-04 00:19:46 +09:00
|
|
|
private EntityResponse toEntityResponse(final Urn urn, final List<EnvelopedAspect> envelopedAspects) {
|
|
|
|
final EntityResponse response = new EntityResponse();
|
|
|
|
response.setUrn(urn);
|
|
|
|
response.setEntityName(urnToEntityName(urn));
|
|
|
|
response.setAspects(new EnvelopedAspectMap(
|
|
|
|
envelopedAspects.stream().collect(Collectors.toMap(EnvelopedAspect::getName, aspect -> aspect))
|
|
|
|
));
|
|
|
|
return response;
|
|
|
|
}
|
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
private Map<String, Set<String>> buildEntityToValidAspects(final EntityRegistry entityRegistry) {
|
|
|
|
return entityRegistry.getEntitySpecs()
|
2021-07-30 17:41:03 -07:00
|
|
|
.values()
|
2021-06-03 13:24:33 -07:00
|
|
|
.stream()
|
|
|
|
.collect(Collectors.toMap(EntitySpec::getName,
|
2021-07-30 17:41:03 -07:00
|
|
|
entry -> entry.getAspectSpecs().stream().map(AspectSpec::getName).collect(Collectors.toSet())));
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2021-09-02 19:05:13 -07:00
|
|
|
public EntityRegistry getEntityRegistry() {
|
2021-06-03 13:24:33 -07:00
|
|
|
return _entityRegistry;
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public void setRetentionService(RetentionService retentionService) {
|
|
|
|
_retentionService = retentionService;
|
|
|
|
}
|
|
|
|
|
2021-06-03 13:24:33 -07:00
|
|
|
protected Set<String> getEntityAspectNames(final Urn entityUrn) {
|
|
|
|
return getEntityAspectNames(urnToEntityName(entityUrn));
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-01-04 00:19:46 +09:00
|
|
|
public Set<String> getEntityAspectNames(final String entityName) {
|
2021-06-03 13:24:33 -07:00
|
|
|
return _entityToValidAspects.get(entityName);
|
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public void setWritable(boolean canWrite) {
|
|
|
|
log.debug("Setting writable to {}", canWrite);
|
|
|
|
_aspectDao.setWritable(canWrite);
|
|
|
|
}
|
2021-07-29 20:04:40 -07:00
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-03-15 19:05:52 +00:00
|
|
|
public RollbackRunResult rollbackRun(List<AspectRowSummary> aspectRows, String runId, boolean hardDelete) {
|
|
|
|
return rollbackWithConditions(aspectRows, Collections.singletonMap("runId", runId), hardDelete);
|
2021-11-28 21:06:27 -08:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public RollbackRunResult rollbackWithConditions(List<AspectRowSummary> aspectRows, Map<String, String> conditions, boolean hardDelete) {
|
|
|
|
List<AspectRowSummary> removedAspects = new ArrayList<>();
|
|
|
|
AtomicInteger rowsDeletedFromEntityDeletion = new AtomicInteger(0);
|
2021-07-29 20:04:40 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
List<Future<?>> futures = aspectRows.stream().map(aspectToRemove -> {
|
2022-05-24 20:40:42 +01:00
|
|
|
RollbackResult result = deleteAspect(aspectToRemove.getUrn(), aspectToRemove.getAspectName(),
|
|
|
|
conditions, hardDelete);
|
|
|
|
if (result != null) {
|
|
|
|
Optional<AspectSpec> aspectSpec = getAspectSpec(result.entityName, result.aspectName);
|
|
|
|
if (!aspectSpec.isPresent()) {
|
|
|
|
log.error("Issue while rolling back: unknown aspect {} for entity {}", result.entityName, result.aspectName);
|
2023-09-02 19:25:44 -05:00
|
|
|
return null;
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2021-12-14 11:18:02 +09:00
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
rowsDeletedFromEntityDeletion.addAndGet(result.additionalRowsAffected);
|
|
|
|
removedAspects.add(aspectToRemove);
|
2023-09-02 19:25:44 -05:00
|
|
|
return alwaysProduceMCLAsync(result.getUrn(), result.getEntityName(), result.getAspectName(), aspectSpec.get(),
|
2022-05-24 20:40:42 +01:00
|
|
|
result.getOldValue(), result.getNewValue(), result.getOldSystemMetadata(), result.getNewSystemMetadata(),
|
|
|
|
// TODO: use properly attributed audit stamp.
|
|
|
|
createSystemAuditStamp(),
|
2023-09-02 19:25:44 -05:00
|
|
|
result.getChangeType()).getFirst();
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}).filter(Objects::nonNull).collect(Collectors.toList());
|
|
|
|
|
|
|
|
futures.forEach(f -> {
|
|
|
|
try {
|
|
|
|
f.get();
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
throw new RuntimeException(e);
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion.get());
|
2021-12-14 11:18:02 +09:00
|
|
|
}
|
|
|
|
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public RollbackRunResult deleteUrn(Urn urn) {
|
|
|
|
List<AspectRowSummary> removedAspects = new ArrayList<>();
|
|
|
|
Integer rowsDeletedFromEntityDeletion = 0;
|
|
|
|
|
|
|
|
final EntitySpec spec = getEntityRegistry().getEntitySpec(PegasusUtils.urnToEntityName(urn));
|
|
|
|
final AspectSpec keySpec = spec.getKeyAspectSpec();
|
|
|
|
String keyAspectName = getKeyAspectName(urn);
|
|
|
|
|
2022-12-29 11:26:42 -06:00
|
|
|
EntityAspect latestKey = null;
|
|
|
|
try {
|
|
|
|
latestKey = _aspectDao.getLatestAspect(urn.toString(), keyAspectName);
|
|
|
|
} catch (EntityNotFoundException e) {
|
|
|
|
log.warn("Entity to delete does not exist. {}", urn.toString());
|
|
|
|
}
|
2022-05-24 20:40:42 +01:00
|
|
|
if (latestKey == null || latestKey.getSystemMetadata() == null) {
|
|
|
|
return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion);
|
|
|
|
}
|
|
|
|
|
|
|
|
SystemMetadata latestKeySystemMetadata = EntityUtils.parseSystemMetadata(latestKey.getSystemMetadata());
|
|
|
|
RollbackResult result = deleteAspect(urn.toString(), keyAspectName, Collections.singletonMap("runId", latestKeySystemMetadata.getRunId()), true);
|
|
|
|
|
|
|
|
if (result != null) {
|
|
|
|
AspectRowSummary summary = new AspectRowSummary();
|
|
|
|
summary.setUrn(urn.toString());
|
|
|
|
summary.setKeyAspect(true);
|
|
|
|
summary.setAspectName(keyAspectName);
|
|
|
|
summary.setVersion(0);
|
|
|
|
summary.setTimestamp(latestKey.getCreatedOn().getTime());
|
|
|
|
|
|
|
|
rowsDeletedFromEntityDeletion = result.additionalRowsAffected;
|
|
|
|
removedAspects.add(summary);
|
2023-09-02 19:25:44 -05:00
|
|
|
Future<?> future = alwaysProduceMCLAsync(result.getUrn(), result.getEntityName(), result.getAspectName(), keySpec,
|
2022-05-24 20:40:42 +01:00
|
|
|
result.getOldValue(), result.getNewValue(), result.getOldSystemMetadata(), result.getNewSystemMetadata(),
|
|
|
|
// TODO: Use a proper inferred audit stamp
|
|
|
|
createSystemAuditStamp(),
|
2023-09-02 19:25:44 -05:00
|
|
|
result.getChangeType()).getFirst();
|
|
|
|
|
|
|
|
if (future != null) {
|
|
|
|
try {
|
|
|
|
future.get();
|
|
|
|
} catch (InterruptedException | ExecutionException e) {
|
|
|
|
throw new RuntimeException(e);
|
|
|
|
}
|
|
|
|
}
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion);
|
|
|
|
}
|
|
|
|
|
2023-02-20 14:00:14 -08:00
|
|
|
/**
|
|
|
|
* Returns true if the entity exists (has materialized aspects)
|
|
|
|
*
|
|
|
|
* @param urn the urn of the entity to check
|
|
|
|
* @return true if the entity exists, false otherwise
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-05-24 20:40:42 +01:00
|
|
|
public Boolean exists(Urn urn) {
|
|
|
|
final Set<String> aspectsToFetch = getEntityAspectNames(urn);
|
|
|
|
final List<EntityAspectIdentifier> dbKeys = aspectsToFetch.stream()
|
|
|
|
.map(aspectName -> new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
|
|
|
|
Map<EntityAspectIdentifier, EntityAspect> aspects = _aspectDao.batchGet(new HashSet(dbKeys));
|
|
|
|
return aspects.values().stream().anyMatch(aspect -> aspect != null);
|
|
|
|
}
|
|
|
|
|
2023-02-20 14:00:14 -08:00
|
|
|
/**
|
|
|
|
* Returns true if an entity is soft-deleted.
|
|
|
|
*
|
|
|
|
* @param urn the urn to check
|
|
|
|
* @return true is the entity is soft deleted, false otherwise.
|
|
|
|
*/
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-02-20 14:00:14 -08:00
|
|
|
public Boolean isSoftDeleted(@Nonnull final Urn urn) {
|
|
|
|
Objects.requireNonNull(urn, "urn is required");
|
|
|
|
final RecordTemplate statusAspect = getLatestAspect(urn, STATUS_ASPECT_NAME);
|
|
|
|
return statusAspect != null && ((Status) statusAspect).isRemoved();
|
|
|
|
}
|
|
|
|
|
2023-09-07 17:09:52 -05:00
|
|
|
@Override
|
|
|
|
public Boolean exists(Urn urn, String aspectName) {
|
|
|
|
EntityAspectIdentifier dbKey = new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION);
|
|
|
|
Map<EntityAspectIdentifier, EntityAspect> aspects = _aspectDao.batchGet(Set.of(dbKey));
|
|
|
|
return aspects.values().stream().anyMatch(Objects::nonNull);
|
|
|
|
}
|
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
@Nullable
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-06-21 18:00:16 -05:00
|
|
|
public RollbackResult deleteAspect(String urn, String aspectName, @Nonnull Map<String, String> conditions, boolean hardDelete) {
|
2022-05-24 20:40:42 +01:00
|
|
|
// Validate pre-conditions before running queries
|
|
|
|
Urn entityUrn;
|
|
|
|
EntitySpec entitySpec;
|
|
|
|
try {
|
|
|
|
entityUrn = Urn.createFromString(urn);
|
|
|
|
String entityName = PegasusUtils.urnToEntityName(entityUrn);
|
|
|
|
entitySpec = getEntityRegistry().getEntitySpec(entityName);
|
|
|
|
} catch (URISyntaxException uriSyntaxException) {
|
|
|
|
// don't expect this to happen, so raising RuntimeException here
|
|
|
|
throw new RuntimeException(String.format("Failed to extract urn from %s", urn));
|
|
|
|
}
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
final RollbackResult result = _aspectDao.runInTransactionWithRetry((tx) -> {
|
2022-05-24 20:40:42 +01:00
|
|
|
Integer additionalRowsDeleted = 0;
|
|
|
|
|
|
|
|
// 1. Fetch the latest existing version of the aspect.
|
|
|
|
final EntityAspect latest = _aspectDao.getLatestAspect(urn, aspectName);
|
|
|
|
|
|
|
|
// 1.1 If no latest exists, skip this aspect
|
|
|
|
if (latest == null) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 2. Compare the match conditions, if they don't match, ignore.
|
|
|
|
SystemMetadata latestSystemMetadata = EntityUtils.parseSystemMetadata(latest.getSystemMetadata());
|
|
|
|
if (!filterMatch(latestSystemMetadata, conditions)) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
String latestMetadata = latest.getMetadata();
|
|
|
|
|
|
|
|
// 3. Check if this is a key aspect
|
|
|
|
Boolean isKeyAspect = false;
|
|
|
|
try {
|
|
|
|
isKeyAspect = getKeyAspectName(Urn.createFromString(urn)).equals(aspectName);
|
|
|
|
} catch (URISyntaxException e) {
|
2022-06-21 18:00:16 -05:00
|
|
|
log.error("Error occurred while parsing urn: {}", urn, e);
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// 4. Fetch all preceding aspects, that match
|
|
|
|
List<EntityAspect> aspectsToDelete = new ArrayList<>();
|
|
|
|
long maxVersion = _aspectDao.getMaxVersion(urn, aspectName);
|
|
|
|
EntityAspect survivingAspect = null;
|
|
|
|
String previousMetadata = null;
|
|
|
|
boolean filterMatch = true;
|
|
|
|
while (maxVersion > 0 && filterMatch) {
|
|
|
|
EntityAspect candidateAspect = _aspectDao.getAspect(urn, aspectName, maxVersion);
|
|
|
|
SystemMetadata previousSysMetadata = EntityUtils.parseSystemMetadata(candidateAspect.getSystemMetadata());
|
|
|
|
filterMatch = filterMatch(previousSysMetadata, conditions);
|
|
|
|
if (filterMatch) {
|
|
|
|
aspectsToDelete.add(candidateAspect);
|
|
|
|
maxVersion = maxVersion - 1;
|
|
|
|
} else {
|
|
|
|
survivingAspect = candidateAspect;
|
|
|
|
previousMetadata = survivingAspect.getMetadata();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// 5. Apply deletes and fix up latest row
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
aspectsToDelete.forEach(aspect -> _aspectDao.deleteAspect(tx, aspect));
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
if (survivingAspect != null) {
|
|
|
|
// if there was a surviving aspect, copy its information into the latest row
|
|
|
|
// eBean does not like us updating a pkey column (version) for the surviving aspect
|
|
|
|
// as a result we copy information from survivingAspect to latest and delete survivingAspect
|
|
|
|
latest.setMetadata(survivingAspect.getMetadata());
|
|
|
|
latest.setSystemMetadata(survivingAspect.getSystemMetadata());
|
|
|
|
latest.setCreatedOn(survivingAspect.getCreatedOn());
|
|
|
|
latest.setCreatedBy(survivingAspect.getCreatedBy());
|
|
|
|
latest.setCreatedFor(survivingAspect.getCreatedFor());
|
2023-09-02 19:25:44 -05:00
|
|
|
_aspectDao.saveAspect(tx, latest, false);
|
2023-07-31 09:10:40 -05:00
|
|
|
// metrics
|
|
|
|
_aspectDao.incrementWriteMetrics(aspectName, 1, latest.getAspect().getBytes(StandardCharsets.UTF_8).length);
|
2023-09-02 19:25:44 -05:00
|
|
|
_aspectDao.deleteAspect(tx, survivingAspect);
|
2022-05-24 20:40:42 +01:00
|
|
|
} else {
|
|
|
|
if (isKeyAspect) {
|
|
|
|
if (hardDelete) {
|
|
|
|
// If this is the key aspect, delete the entity entirely.
|
2023-09-02 19:25:44 -05:00
|
|
|
additionalRowsDeleted = _aspectDao.deleteUrn(tx, urn);
|
2022-05-24 20:40:42 +01:00
|
|
|
} else if (entitySpec.hasAspect(Constants.STATUS_ASPECT_NAME)) {
|
|
|
|
// soft delete by setting status.removed=true (if applicable)
|
|
|
|
final Status statusAspect = new Status();
|
|
|
|
statusAspect.setRemoved(true);
|
|
|
|
|
|
|
|
final MetadataChangeProposal gmce = new MetadataChangeProposal();
|
|
|
|
gmce.setEntityUrn(entityUrn);
|
|
|
|
gmce.setChangeType(ChangeType.UPSERT);
|
|
|
|
gmce.setEntityType(entityUrn.getEntityType());
|
|
|
|
gmce.setAspectName(Constants.STATUS_ASPECT_NAME);
|
|
|
|
gmce.setAspect(GenericRecordUtils.serializeAspect(statusAspect));
|
|
|
|
final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis());
|
|
|
|
|
2022-10-03 19:56:19 -05:00
|
|
|
this.ingestProposal(gmce, auditStamp, false);
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Else, only delete the specific aspect.
|
2023-09-02 19:25:44 -05:00
|
|
|
_aspectDao.deleteAspect(tx, latest);
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// 6. Emit the Update
|
|
|
|
try {
|
|
|
|
final RecordTemplate latestValue = latest == null ? null
|
|
|
|
: EntityUtils.toAspectRecord(Urn.createFromString(latest.getUrn()), latest.getAspect(),
|
2023-02-02 15:30:49 -08:00
|
|
|
latestMetadata, getEntityRegistry());
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
final RecordTemplate previousValue = survivingAspect == null ? null
|
|
|
|
: EntityUtils.toAspectRecord(Urn.createFromString(survivingAspect.getUrn()),
|
2023-02-02 15:30:49 -08:00
|
|
|
survivingAspect.getAspect(), previousMetadata, getEntityRegistry());
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
final Urn urnObj = Urn.createFromString(urn);
|
|
|
|
// We are not deleting key aspect if hardDelete has not been set so do not return a rollback result
|
|
|
|
if (isKeyAspect && !hardDelete) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return new RollbackResult(urnObj, urnObj.getEntityType(), latest.getAspect(), latestValue,
|
|
|
|
previousValue, latestSystemMetadata,
|
|
|
|
previousValue == null ? null : EntityUtils.parseSystemMetadata(survivingAspect.getSystemMetadata()),
|
|
|
|
survivingAspect == null ? ChangeType.DELETE : ChangeType.UPSERT, isKeyAspect, additionalRowsDeleted);
|
|
|
|
} catch (URISyntaxException e) {
|
|
|
|
throw new RuntimeException(String.format("Failed to emit the update for urn %s", urn));
|
2022-06-21 18:00:16 -05:00
|
|
|
} catch (IllegalStateException e) {
|
|
|
|
log.warn("Unable to find aspect, rollback result will not be sent. Error: {}", e.getMessage());
|
|
|
|
return null;
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
}, DEFAULT_MAX_TRANSACTION_RETRY);
|
|
|
|
|
|
|
|
return result;
|
2021-12-14 11:18:02 +09:00
|
|
|
}
|
2022-04-26 01:29:24 +01:00
|
|
|
|
|
|
|
protected boolean filterMatch(SystemMetadata systemMetadata, Map<String, String> conditions) {
|
|
|
|
String runIdCondition = conditions.getOrDefault("runId", null);
|
|
|
|
if (runIdCondition != null) {
|
|
|
|
if (!runIdCondition.equals(systemMetadata.getRunId())) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
String registryNameCondition = conditions.getOrDefault("registryName", null);
|
|
|
|
if (registryNameCondition != null) {
|
|
|
|
if (!registryNameCondition.equals(systemMetadata.getRegistryName())) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
String registryVersionCondition = conditions.getOrDefault("registryVersion", null);
|
|
|
|
if (registryVersionCondition != null) {
|
|
|
|
if (!registryVersionCondition.equals(systemMetadata.getRegistryVersion())) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
protected AuditStamp createSystemAuditStamp() {
|
|
|
|
return new AuditStamp()
|
|
|
|
.setActor(UrnUtils.getUrn(SYSTEM_ACTOR))
|
|
|
|
.setTime(System.currentTimeMillis());
|
|
|
|
}
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
@Nonnull
|
|
|
|
private Map<EntityAspectIdentifier, EntityAspect> getLatestAspect(@Nonnull final Set<Urn> urns, @Nonnull final Set<String> aspectNames) {
|
|
|
|
|
|
|
|
log.debug("Invoked getLatestAspects with urns: {}, aspectNames: {}", urns, aspectNames);
|
|
|
|
|
|
|
|
// Create DB keys
|
|
|
|
final Set<EntityAspectIdentifier> dbKeys = urns.stream().map(urn -> {
|
|
|
|
final Set<String> aspectsToFetch = aspectNames.isEmpty() ? getEntityAspectNames(urn) : aspectNames;
|
|
|
|
return aspectsToFetch.stream()
|
|
|
|
.map(aspectName -> new EntityAspectIdentifier(urn.toString(), aspectName, ASPECT_LATEST_VERSION))
|
|
|
|
.collect(Collectors.toList());
|
|
|
|
}).flatMap(List::stream).collect(Collectors.toSet());
|
|
|
|
|
|
|
|
Map<EntityAspectIdentifier, EntityAspect> batchGetResults = new HashMap<>();
|
|
|
|
Iterators.partition(dbKeys.iterator(), MAX_KEYS_PER_QUERY)
|
|
|
|
.forEachRemaining(batch -> batchGetResults.putAll(_aspectDao.batchGet(ImmutableSet.copyOf(batch))));
|
|
|
|
return batchGetResults;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a user tries to fetch a negative version, we want to index most recent to least recent snapshots.
|
|
|
|
* To do this, we want to fetch the maximum version and subtract the negative version from that. Since -1 represents
|
|
|
|
* the maximum version, we need to add 1 to the final result.
|
|
|
|
*/
|
|
|
|
private long calculateVersionNumber(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) {
|
|
|
|
if (version < 0) {
|
|
|
|
return _aspectDao.getMaxVersion(urn.toString(), aspectName) + version + 1;
|
|
|
|
}
|
|
|
|
return version;
|
|
|
|
}
|
|
|
|
|
|
|
|
private Map<EntityAspectIdentifier, EnvelopedAspect> getEnvelopedAspects(final Set<EntityAspectIdentifier> dbKeys) {
|
|
|
|
final Map<EntityAspectIdentifier, EnvelopedAspect> result = new HashMap<>();
|
|
|
|
final Map<EntityAspectIdentifier, EntityAspect> dbEntries = _aspectDao.batchGet(dbKeys);
|
|
|
|
|
|
|
|
for (EntityAspectIdentifier currKey : dbKeys) {
|
|
|
|
|
|
|
|
final EntityAspect currAspectEntry = dbEntries.get(currKey);
|
|
|
|
|
|
|
|
if (currAspectEntry == null) {
|
|
|
|
// No aspect found.
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Aspect found. Now turn it into an EnvelopedAspect
|
|
|
|
final com.linkedin.entity.Aspect aspect = RecordUtils.toRecordTemplate(com.linkedin.entity.Aspect.class, currAspectEntry
|
|
|
|
.getMetadata());
|
|
|
|
final EnvelopedAspect envelopedAspect = new EnvelopedAspect();
|
|
|
|
envelopedAspect.setName(currAspectEntry.getAspect());
|
|
|
|
envelopedAspect.setVersion(currAspectEntry.getVersion());
|
|
|
|
// TODO: I think we can assume this here, adding as it's a required field so object mapping barfs when trying to access it,
|
|
|
|
// since nowhere else is using it should be safe for now at least
|
|
|
|
envelopedAspect.setType(AspectType.VERSIONED);
|
|
|
|
envelopedAspect.setValue(aspect);
|
2022-07-07 14:31:01 -05:00
|
|
|
|
|
|
|
try {
|
|
|
|
if (currAspectEntry.getSystemMetadata() != null) {
|
|
|
|
final SystemMetadata systemMetadata = RecordUtils.toRecordTemplate(SystemMetadata.class, currAspectEntry.getSystemMetadata());
|
|
|
|
envelopedAspect.setSystemMetadata(systemMetadata);
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
log.warn("Exception encountered when setting system metadata on enveloped aspect {}. Error: {}", envelopedAspect.getName(), e);
|
|
|
|
}
|
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
envelopedAspect.setCreated(new AuditStamp()
|
|
|
|
.setActor(UrnUtils.getUrn(currAspectEntry.getCreatedBy()))
|
|
|
|
.setTime(currAspectEntry.getCreatedOn().getTime())
|
|
|
|
);
|
|
|
|
result.put(currKey, envelopedAspect);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
private EnvelopedAspect getKeyEnvelopedAspect(final Urn urn) {
|
|
|
|
final EntitySpec spec = getEntityRegistry().getEntitySpec(PegasusUtils.urnToEntityName(urn));
|
|
|
|
final AspectSpec keySpec = spec.getKeyAspectSpec();
|
|
|
|
final com.linkedin.entity.Aspect aspect =
|
2022-09-19 11:47:20 -04:00
|
|
|
new com.linkedin.entity.Aspect(EntityKeyUtils.convertUrnToEntityKey(urn, keySpec).data());
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
final EnvelopedAspect envelopedAspect = new EnvelopedAspect();
|
|
|
|
envelopedAspect.setName(keySpec.getName());
|
|
|
|
envelopedAspect.setVersion(ASPECT_LATEST_VERSION);
|
|
|
|
envelopedAspect.setValue(aspect);
|
|
|
|
// TODO: I think we can assume this here, adding as it's a required field so object mapping barfs when trying to access it,
|
|
|
|
// since nowhere else is using it should be safe for now at least
|
|
|
|
envelopedAspect.setType(AspectType.VERSIONED);
|
|
|
|
envelopedAspect.setCreated(
|
2023-02-02 15:30:49 -08:00
|
|
|
new AuditStamp().setActor(UrnUtils.getUrn(SYSTEM_ACTOR)).setTime(System.currentTimeMillis()));
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
return envelopedAspect;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nonnull
|
2023-09-02 19:25:44 -05:00
|
|
|
private UpdateAspectResult ingestAspectToLocalDB(
|
|
|
|
@Nullable Transaction tx,
|
|
|
|
@Nonnull final Urn urn,
|
|
|
|
@Nonnull final String aspectName,
|
|
|
|
@Nonnull final RecordTemplate newValue,
|
|
|
|
@Nonnull final AuditStamp auditStamp,
|
|
|
|
@Nonnull final SystemMetadata providedSystemMetadata,
|
|
|
|
@Nullable final EntityAspect latest,
|
|
|
|
@Nonnull final Long nextVersion) {
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-09-06 10:51:03 -07:00
|
|
|
// Set the "last run id" to be the run id provided with the new system metadata. This will be stored in index
|
|
|
|
// for all aspects that have a run id, regardless of whether they change.
|
|
|
|
providedSystemMetadata.setLastRunId(providedSystemMetadata.getRunId(GetMode.NULL), SetMode.IGNORE_NULL);
|
|
|
|
|
2022-05-24 20:40:42 +01:00
|
|
|
// 2. Compare the latest existing and new.
|
|
|
|
final RecordTemplate oldValue =
|
|
|
|
latest == null ? null : EntityUtils.toAspectRecord(urn, aspectName, latest.getMetadata(), getEntityRegistry());
|
|
|
|
|
|
|
|
// 3. If there is no difference between existing and new, we just update
|
|
|
|
// the lastObserved in system metadata. RunId should stay as the original runId
|
|
|
|
if (oldValue != null && DataTemplateUtil.areEqual(oldValue, newValue)) {
|
|
|
|
SystemMetadata latestSystemMetadata = EntityUtils.parseSystemMetadata(latest.getSystemMetadata());
|
|
|
|
latestSystemMetadata.setLastObserved(providedSystemMetadata.getLastObserved());
|
2023-09-06 10:51:03 -07:00
|
|
|
latestSystemMetadata.setLastRunId(providedSystemMetadata.getLastRunId(GetMode.NULL), SetMode.IGNORE_NULL);
|
2022-05-24 20:40:42 +01:00
|
|
|
|
|
|
|
latest.setSystemMetadata(RecordUtils.toJsonString(latestSystemMetadata));
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
log.info("Ingesting aspect with name {}, urn {}", aspectName, urn);
|
|
|
|
_aspectDao.saveAspect(tx, latest, false);
|
2022-05-24 20:40:42 +01:00
|
|
|
|
2023-07-31 09:10:40 -05:00
|
|
|
// metrics
|
|
|
|
_aspectDao.incrementWriteMetrics(aspectName, 1, latest.getAspect().getBytes(StandardCharsets.UTF_8).length);
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return UpdateAspectResult.builder()
|
|
|
|
.urn(urn)
|
|
|
|
.oldValue(oldValue)
|
|
|
|
.newValue(oldValue)
|
|
|
|
.oldSystemMetadata(EntityUtils.parseSystemMetadata(latest.getSystemMetadata()))
|
|
|
|
.newSystemMetadata(latestSystemMetadata)
|
|
|
|
.operation(MetadataAuditOperation.UPDATE)
|
|
|
|
.auditStamp(auditStamp)
|
|
|
|
.maxVersion(0)
|
|
|
|
.build();
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// 4. Save the newValue as the latest version
|
|
|
|
log.debug("Ingesting aspect with name {}, urn {}", aspectName, urn);
|
2023-07-31 09:10:40 -05:00
|
|
|
String newValueStr = EntityUtils.toJsonAspect(newValue);
|
2023-09-02 19:25:44 -05:00
|
|
|
long versionOfOld = _aspectDao.saveLatestAspect(tx, urn.toString(), aspectName, latest == null ? null : EntityUtils.toJsonAspect(oldValue),
|
2022-05-24 20:40:42 +01:00
|
|
|
latest == null ? null : latest.getCreatedBy(), latest == null ? null : latest.getCreatedFor(),
|
|
|
|
latest == null ? null : latest.getCreatedOn(), latest == null ? null : latest.getSystemMetadata(),
|
2023-07-31 09:10:40 -05:00
|
|
|
newValueStr, auditStamp.getActor().toString(),
|
2022-05-24 20:40:42 +01:00
|
|
|
auditStamp.hasImpersonator() ? auditStamp.getImpersonator().toString() : null,
|
|
|
|
new Timestamp(auditStamp.getTime()), EntityUtils.toJsonAspect(providedSystemMetadata), nextVersion);
|
|
|
|
|
2023-07-31 09:10:40 -05:00
|
|
|
// metrics
|
|
|
|
_aspectDao.incrementWriteMetrics(aspectName, 1, newValueStr.getBytes(StandardCharsets.UTF_8).length);
|
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
return UpdateAspectResult.builder()
|
|
|
|
.urn(urn)
|
|
|
|
.oldValue(oldValue)
|
|
|
|
.newValue(newValue)
|
|
|
|
.oldSystemMetadata(latest == null ? null : EntityUtils.parseSystemMetadata(latest.getSystemMetadata()))
|
|
|
|
.newSystemMetadata(providedSystemMetadata)
|
|
|
|
.operation(MetadataAuditOperation.UPDATE)
|
|
|
|
.auditStamp(auditStamp)
|
|
|
|
.maxVersion(versionOfOld)
|
|
|
|
.build();
|
2022-05-24 20:40:42 +01:00
|
|
|
}
|
2022-09-07 13:32:38 -07:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Builds the default browse path aspects for a subset of well-supported entities.
|
|
|
|
*
|
|
|
|
* This method currently supports datasets, charts, dashboards, data flows, data jobs, and glossary terms.
|
|
|
|
*/
|
|
|
|
@Nonnull
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2022-09-07 13:32:38 -07:00
|
|
|
public BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISyntaxException {
|
|
|
|
Character dataPlatformDelimiter = getDataPlatformDelimiter(urn);
|
|
|
|
String defaultBrowsePath = getDefaultBrowsePath(urn, this.getEntityRegistry(), dataPlatformDelimiter);
|
|
|
|
StringArray browsePaths = new StringArray();
|
|
|
|
browsePaths.add(defaultBrowsePath);
|
|
|
|
BrowsePaths browsePathAspect = new BrowsePaths();
|
|
|
|
browsePathAspect.setPaths(browsePaths);
|
|
|
|
return browsePathAspect;
|
|
|
|
}
|
|
|
|
|
2023-06-21 04:10:07 -04:00
|
|
|
/**
|
|
|
|
* Builds the default browse path V2 aspects for all entities.
|
|
|
|
*
|
|
|
|
* This method currently supports datasets, charts, dashboards, and data jobs best. Everything else
|
|
|
|
* will have a basic "Default" folder added to their browsePathV2.
|
|
|
|
*/
|
|
|
|
@Nonnull
|
2023-07-19 20:09:14 -05:00
|
|
|
@Override
|
2023-06-21 04:10:07 -04:00
|
|
|
public BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) throws URISyntaxException {
|
|
|
|
Character dataPlatformDelimiter = getDataPlatformDelimiter(urn);
|
|
|
|
return BrowsePathV2Utils.getDefaultBrowsePathV2(urn, this.getEntityRegistry(), dataPlatformDelimiter, this, useContainerPaths);
|
|
|
|
}
|
|
|
|
|
2022-09-07 13:32:38 -07:00
|
|
|
/**
|
|
|
|
* Returns a delimiter on which the name of an asset may be split.
|
|
|
|
*/
|
|
|
|
private Character getDataPlatformDelimiter(Urn urn) {
|
|
|
|
// Attempt to construct the appropriate Data Platform URN
|
|
|
|
Urn dataPlatformUrn = buildDataPlatformUrn(urn, this.getEntityRegistry());
|
|
|
|
if (dataPlatformUrn != null) {
|
|
|
|
// Attempt to resolve the delimiter from Data Platform Info
|
|
|
|
DataPlatformInfo dataPlatformInfo = getDataPlatformInfo(dataPlatformUrn);
|
|
|
|
if (dataPlatformInfo != null && dataPlatformInfo.hasDatasetNameDelimiter()) {
|
|
|
|
return dataPlatformInfo.getDatasetNameDelimiter().charAt(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Else, fallback to a default delimiter (period) if one cannot be resolved.
|
|
|
|
return '.';
|
|
|
|
}
|
|
|
|
|
|
|
|
@Nullable
|
|
|
|
private DataPlatformInfo getDataPlatformInfo(Urn urn) {
|
|
|
|
try {
|
|
|
|
final EntityResponse entityResponse = getEntityV2(
|
|
|
|
Constants.DATA_PLATFORM_ENTITY_NAME,
|
|
|
|
urn,
|
|
|
|
ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)
|
|
|
|
);
|
2023-03-21 08:43:56 -07:00
|
|
|
if (entityResponse != null && entityResponse.hasAspects() && entityResponse.getAspects()
|
|
|
|
.containsKey(Constants.DATA_PLATFORM_INFO_ASPECT_NAME)) {
|
|
|
|
return new DataPlatformInfo(
|
|
|
|
entityResponse.getAspects().get(Constants.DATA_PLATFORM_INFO_ASPECT_NAME).getValue().data());
|
2022-09-07 13:32:38 -07:00
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
log.warn(String.format("Failed to find Data Platform Info for urn %s", urn));
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
2023-03-21 08:43:56 -07:00
|
|
|
|
2023-09-02 19:25:44 -05:00
|
|
|
private static boolean shouldAspectEmitChangeLog(@Nonnull final AspectSpec aspectSpec) {
|
2023-03-21 08:43:56 -07:00
|
|
|
final List<RelationshipFieldSpec> relationshipFieldSpecs = aspectSpec.getRelationshipFieldSpecs();
|
|
|
|
return relationshipFieldSpecs.stream().anyMatch(RelationshipFieldSpec::isLineageRelationship);
|
|
|
|
}
|
2021-06-03 13:24:33 -07:00
|
|
|
}
|