mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-16 12:38:13 +00:00
Refactor MIE consumer to be used in Samza (#1203)
This commit is contained in:
parent
8655c407c9
commit
fdc3ba03d1
@ -42,8 +42,8 @@ ext.externalDependency = [
|
|||||||
|
|
||||||
"parquet_avro" : "org.apache.parquet:parquet-avro:1.8.1",
|
"parquet_avro" : "org.apache.parquet:parquet-avro:1.8.1",
|
||||||
|
|
||||||
"hibernate_core" : "org.hibernate:hibernate-core:5.2.5.Final",
|
"hibernate_core" : "org.hibernate:hibernate-core:5.2.16.Final",
|
||||||
"hikaricp" : "com.zaxxer:HikariCP:2.6.3",
|
"hikaricp" : "com.zaxxer:HikariCP:2.7.8",
|
||||||
|
|
||||||
"lombok" : "org.projectlombok:lombok:1.16.20",
|
"lombok" : "org.projectlombok:lombok:1.16.20",
|
||||||
"guava" : "com.google.guava:guava:19.0",
|
"guava" : "com.google.guava:guava:19.0",
|
||||||
|
|||||||
@ -17,6 +17,7 @@ import com.linkedin.events.metadata.ChangeAuditStamp;
|
|||||||
import com.linkedin.events.metadata.DataOrigin;
|
import com.linkedin.events.metadata.DataOrigin;
|
||||||
import com.linkedin.events.metadata.DatasetIdentifier;
|
import com.linkedin.events.metadata.DatasetIdentifier;
|
||||||
import com.linkedin.events.metadata.FailedMetadataInventoryEvent;
|
import com.linkedin.events.metadata.FailedMetadataInventoryEvent;
|
||||||
|
import com.linkedin.events.metadata.MetadataChangeEvent;
|
||||||
import com.linkedin.events.metadata.MetadataInventoryEvent;
|
import com.linkedin.events.metadata.MetadataInventoryEvent;
|
||||||
import com.typesafe.config.Config;
|
import com.typesafe.config.Config;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -27,13 +28,13 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import org.apache.avro.generic.IndexedRecord;
|
import org.apache.avro.generic.IndexedRecord;
|
||||||
import org.apache.commons.lang3.exception.ExceptionUtils;
|
import org.apache.commons.lang3.exception.ExceptionUtils;
|
||||||
import org.apache.kafka.clients.producer.KafkaProducer;
|
import org.apache.kafka.clients.producer.KafkaProducer;
|
||||||
import wherehows.dao.DaoFactory;
|
|
||||||
import wherehows.dao.table.DictDatasetDao;
|
|
||||||
import wherehows.dao.view.DatasetViewDao;
|
|
||||||
import wherehows.common.exceptions.UnauthorizedException;
|
import wherehows.common.exceptions.UnauthorizedException;
|
||||||
|
import wherehows.dao.DaoFactory;
|
||||||
|
import wherehows.dao.view.DatasetViewDao;
|
||||||
import wherehows.utils.ProcessorUtil;
|
import wherehows.utils.ProcessorUtil;
|
||||||
|
|
||||||
import static wherehows.util.UrnUtil.*;
|
import static wherehows.util.UrnUtil.*;
|
||||||
|
import static wherehows.utils.ProcessorUtil.*;
|
||||||
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@ -43,14 +44,11 @@ public class MetadataInventoryProcessor extends KafkaMessageProcessor {
|
|||||||
|
|
||||||
private final DatasetViewDao _datasetViewDao;
|
private final DatasetViewDao _datasetViewDao;
|
||||||
|
|
||||||
private final DictDatasetDao _dictDatasetDao;
|
|
||||||
|
|
||||||
public MetadataInventoryProcessor(Config config, DaoFactory daoFactory, String producerTopic,
|
public MetadataInventoryProcessor(Config config, DaoFactory daoFactory, String producerTopic,
|
||||||
KafkaProducer<String, IndexedRecord> producer) {
|
KafkaProducer<String, IndexedRecord> producer) {
|
||||||
super(producerTopic, producer);
|
super(producerTopic, producer);
|
||||||
|
|
||||||
_datasetViewDao = daoFactory.getDatasetViewDao();
|
_datasetViewDao = daoFactory.getDatasetViewDao();
|
||||||
_dictDatasetDao = daoFactory.getDictDatasetDao();
|
|
||||||
|
|
||||||
_whitelistActors = ProcessorUtil.getWhitelistedActors(config, "whitelist.mie");
|
_whitelistActors = ProcessorUtil.getWhitelistedActors(config, "whitelist.mie");
|
||||||
|
|
||||||
@ -70,27 +68,26 @@ public class MetadataInventoryProcessor extends KafkaMessageProcessor {
|
|||||||
|
|
||||||
final MetadataInventoryEvent event = (MetadataInventoryEvent) indexedRecord;
|
final MetadataInventoryEvent event = (MetadataInventoryEvent) indexedRecord;
|
||||||
try {
|
try {
|
||||||
processEvent(event);
|
for (MetadataChangeEvent mce : processEvent(event)) {
|
||||||
|
sendMessage(mce);
|
||||||
|
log.info("set " + mce.datasetIdentifier + " removed");
|
||||||
|
}
|
||||||
} catch (Exception exception) {
|
} catch (Exception exception) {
|
||||||
log.error("MIE Processor Error:", exception);
|
log.error("MIE Processor Error:", exception);
|
||||||
log.error("Message content: {}", event.toString());
|
log.error("Message content: {}", event.toString());
|
||||||
sendMessage(newFailedEvent(event, exception));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processEvent(MetadataInventoryEvent event) throws Exception {
|
public List<MetadataChangeEvent> processEvent(MetadataInventoryEvent event) throws Exception {
|
||||||
final ChangeAuditStamp changeAuditStamp = event.changeAuditStamp;
|
final ChangeAuditStamp changeAuditStamp = event.changeAuditStamp;
|
||||||
String actorUrn = changeAuditStamp.actorUrn == null ? null : changeAuditStamp.actorUrn.toString();
|
final String actorUrn = changeAuditStamp.actorUrn == null ? null : changeAuditStamp.actorUrn.toString();
|
||||||
if (_whitelistActors != null && !_whitelistActors.contains(actorUrn)) {
|
if (_whitelistActors != null && !_whitelistActors.contains(actorUrn)) {
|
||||||
throw new UnauthorizedException("Actor " + actorUrn + " not in whitelist, skip processing");
|
throw new UnauthorizedException("Actor " + actorUrn + " not in whitelist, skip processing");
|
||||||
}
|
}
|
||||||
|
|
||||||
final String platformUrn = event.dataPlatformUrn.toString();
|
final String platformUrn = event.dataPlatformUrn.toString();
|
||||||
|
|
||||||
final String platform = getUrnEntity(platformUrn);
|
final String platform = getUrnEntity(platformUrn);
|
||||||
|
|
||||||
final DataOrigin origin = event.dataOrigin;
|
final DataOrigin origin = event.dataOrigin;
|
||||||
|
|
||||||
final String namespace = event.namespace.toString();
|
final String namespace = event.namespace.toString();
|
||||||
|
|
||||||
log.info("Processing MIE for " + platform + " " + origin + " " + namespace);
|
log.info("Processing MIE for " + platform + " " + origin + " " + namespace);
|
||||||
@ -99,24 +96,21 @@ public class MetadataInventoryProcessor extends KafkaMessageProcessor {
|
|||||||
event.exclusionPatterns.stream().map(s -> Pattern.compile(s.toString())).collect(Collectors.toList());
|
event.exclusionPatterns.stream().map(s -> Pattern.compile(s.toString())).collect(Collectors.toList());
|
||||||
|
|
||||||
final List<String> names = event.nativeNames.stream().map(CharSequence::toString).collect(Collectors.toList());
|
final List<String> names = event.nativeNames.stream().map(CharSequence::toString).collect(Collectors.toList());
|
||||||
log.info("new datasets: " + names);
|
log.debug("new datasets: " + names);
|
||||||
|
|
||||||
final List<String> existingDatasets = _datasetViewDao.listFullNames(platform, origin.name(), namespace);
|
final List<String> existingDatasets = _datasetViewDao.listFullNames(platform, origin.name(), namespace);
|
||||||
log.info("existing datasets: " + existingDatasets);
|
log.debug("existing datasets: " + existingDatasets);
|
||||||
|
|
||||||
for (String removedDataset : ProcessorUtil.listDiffWithExclusion(existingDatasets, names, exclusions)) {
|
// find removed datasets by diff
|
||||||
try {
|
return ProcessorUtil.listDiffWithExclusion(existingDatasets, names, exclusions).stream().map(datasetName -> {
|
||||||
|
// send MCE to DELETE dataset
|
||||||
DatasetIdentifier identifier = new DatasetIdentifier();
|
DatasetIdentifier identifier = new DatasetIdentifier();
|
||||||
identifier.dataPlatformUrn = platformUrn;
|
identifier.dataPlatformUrn = platformUrn;
|
||||||
identifier.dataOrigin = origin;
|
identifier.dataOrigin = origin;
|
||||||
identifier.nativeName = removedDataset;
|
identifier.nativeName = datasetName;
|
||||||
|
|
||||||
_dictDatasetDao.setDatasetRemoved(identifier, true, changeAuditStamp);
|
return mceDelete(identifier, actorUrn);
|
||||||
log.info("set " + removedDataset + " removed");
|
}).collect(Collectors.toList());
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Fail to mark dataset " + removedDataset + " as removed", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public FailedMetadataInventoryEvent newFailedEvent(MetadataInventoryEvent event, Throwable throwable) {
|
public FailedMetadataInventoryEvent newFailedEvent(MetadataInventoryEvent event, Throwable throwable) {
|
||||||
|
|||||||
@ -13,6 +13,10 @@
|
|||||||
*/
|
*/
|
||||||
package wherehows.utils;
|
package wherehows.utils;
|
||||||
|
|
||||||
|
import com.linkedin.events.metadata.ChangeAuditStamp;
|
||||||
|
import com.linkedin.events.metadata.ChangeType;
|
||||||
|
import com.linkedin.events.metadata.DatasetIdentifier;
|
||||||
|
import com.linkedin.events.metadata.MetadataChangeEvent;
|
||||||
import com.typesafe.config.Config;
|
import com.typesafe.config.Config;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
@ -61,4 +65,20 @@ public class ProcessorUtil {
|
|||||||
|
|
||||||
return new HashSet<>(Arrays.asList(actors.split(";")));
|
return new HashSet<>(Arrays.asList(actors.split(";")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create MCE to DELETE the dataset
|
||||||
|
*/
|
||||||
|
public static MetadataChangeEvent mceDelete(@Nonnull DatasetIdentifier dataset, String actor) {
|
||||||
|
MetadataChangeEvent mce = new MetadataChangeEvent();
|
||||||
|
mce.datasetIdentifier = dataset;
|
||||||
|
|
||||||
|
ChangeAuditStamp auditStamp = new ChangeAuditStamp();
|
||||||
|
auditStamp.actorUrn = actor;
|
||||||
|
auditStamp.time = System.currentTimeMillis();
|
||||||
|
auditStamp.type = ChangeType.DELETE;
|
||||||
|
mce.changeAuditStamp = auditStamp;
|
||||||
|
|
||||||
|
return mce;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -14,6 +14,10 @@
|
|||||||
package wherehows.util;
|
package wherehows.util;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.linkedin.events.metadata.ChangeType;
|
||||||
|
import com.linkedin.events.metadata.DataOrigin;
|
||||||
|
import com.linkedin.events.metadata.DatasetIdentifier;
|
||||||
|
import com.linkedin.events.metadata.MetadataChangeEvent;
|
||||||
import com.typesafe.config.Config;
|
import com.typesafe.config.Config;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
@ -74,4 +78,23 @@ public class ProcessorUtilTest {
|
|||||||
|
|
||||||
assertEquals(actors, null);
|
assertEquals(actors, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMceDelete() {
|
||||||
|
String actor = "tester";
|
||||||
|
DatasetIdentifier dataset = makeDataset("test");
|
||||||
|
MetadataChangeEvent mce = ProcessorUtil.mceDelete(dataset, actor);
|
||||||
|
|
||||||
|
assertEquals(mce.datasetIdentifier, dataset);
|
||||||
|
assertEquals(mce.changeAuditStamp.type, ChangeType.DELETE);
|
||||||
|
assertEquals(mce.changeAuditStamp.actorUrn, actor);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DatasetIdentifier makeDataset(String datasetName) {
|
||||||
|
DatasetIdentifier identifier = new DatasetIdentifier();
|
||||||
|
identifier.nativeName = datasetName;
|
||||||
|
identifier.dataPlatformUrn = "urn:li:dataPlatform:hive";
|
||||||
|
identifier.dataOrigin = DataOrigin.DEV;
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user