feat(gms): Merge MAE, MCE consumers into GMS (#2690)

This commit is contained in:
John Joyce 2021-06-15 08:44:15 -07:00 committed by GitHub
parent 7591c8994a
commit cc95916201
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
64 changed files with 300 additions and 323 deletions

View File

@ -20,11 +20,11 @@ dependencies:
- name: datahub-mae-consumer
version: 0.2.1
repository: file://./charts/datahub-mae-consumer
condition: datahub-mae-consumer.enabled
condition: global.datahub_standalone_consumers_enabled
- name: datahub-mce-consumer
version: 0.2.1
repository: file://./charts/datahub-mce-consumer
condition: datahub-mce-consumer.enabled
condition: global.datahub_standalone_consumers_enabled
- name: datahub-ingestion-cron
version: 0.2.1
repository: file://./charts/datahub-ingestion-cron

View File

@ -73,6 +73,14 @@ spec:
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
env:
{{- if not .Values.global.datahub_standalone_consumers_enabled }}
- name: MCE_CONSUMER_ENABLED
value: "true"
- name: MAE_CONSUMER_ENABLED
value: "true"
{{- end }}
- name: DATAHUB_ANALYTICS_ENABLED
value: "{{ .Values.global.datahub_analytics_enabled }}"
- name: EBEAN_DATASOURCE_USERNAME
value: "{{ .Values.global.sql.datasource.username }}"
- name: EBEAN_DATASOURCE_PASSWORD

View File

@ -146,6 +146,8 @@ readinessProbe:
#This section is useful if we are installing this chart separately for testing
# helm install datahub-gms datahub-gms/
global:
datahub_analytics_enabled: true
elasticsearch:
host: "elasticsearch"
port: "9200"

View File

@ -73,6 +73,8 @@ spec:
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
env:
- name: MAE_CONSUMER_ENABLED
value: "true"
- name: GMS_HOST
value: {{ printf "%s-%s" .Release.Name "datahub-gms" }}
- name: GMS_PORT

View File

@ -69,6 +69,8 @@ spec:
periodSeconds: {{ .Values.readinessProbe.periodSeconds }}
failureThreshold: {{ .Values.readinessProbe.failureThreshold }}
env:
- name: MCE_CONSUMER_ENABLED
value: "true"
- name: KAFKA_BOOTSTRAP_SERVER
value: "{{ .Values.global.kafka.bootstrap.server }}"
- name: KAFKA_SCHEMAREGISTRY_URL

View File

@ -15,18 +15,6 @@ datahub-frontend:
ingress:
enabled: false
datahub-mae-consumer:
enabled: true
image:
repository: linkedin/datahub-mae-consumer
tag: "v0.8.1"
datahub-mce-consumer:
enabled: true
image:
repository: linkedin/datahub-mce-consumer
tag: "v0.8.1"
elasticsearchSetupJob:
enabled: true
image:

View File

@ -58,7 +58,9 @@ datahubUpgrade:
tag: "v0.8.1"
global:
datahub_analytics_enabled: true
datahub_standalone_consumers_enabled: false
elasticsearch:
host: "elasticsearch-master"

View File

@ -1,87 +0,0 @@
package com.linkedin.datahub.upgrade.nocode;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.linkedin.datahub.upgrade.UpgradeContext;
import com.linkedin.datahub.upgrade.UpgradeStep;
import com.linkedin.datahub.upgrade.UpgradeStepResult;
import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.function.Function;
public class MAEQualificationStep implements UpgradeStep {
private static String convertStreamToString(InputStream is) {
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
StringBuilder sb = new StringBuilder();
String line = null;
try {
while ((line = reader.readLine()) != null) {
sb.append(line + "\n");
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return sb.toString();
}
MAEQualificationStep() { }
@Override
public String id() {
return "MAEQualificationStep";
}
@Override
public int retryCount() {
return 2;
}
@Override
public Function<UpgradeContext, UpgradeStepResult> executable() {
return (context) -> {
String maeHost = System.getenv("DATAHUB_MAE_CONSUMER_HOST") == null ? "localhost" : System.getenv("DATAHUB_MAE_CONSUMER_HOST");
String maePort = System.getenv("DATAHUB_MAE_CONSUMER_PORT") == null ? "9091" : System.getenv("DATAHUB_MAE_CONSUMER_PORT");
try {
String spec = String.format("http://%s:%s/config", maeHost, maePort);
URLConnection gmsConnection = new URL(spec).openConnection();
InputStream response = gmsConnection.getInputStream();
String responseString = convertStreamToString(response);
ObjectMapper mapper = new ObjectMapper();
JsonNode configJson = mapper.readTree(responseString);
if (configJson.get("noCode").asBoolean()) {
context.report().addLine("MAE Consumer is running and up to date. Proceeding with upgrade...");
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.SUCCEEDED);
} else {
context.report().addLine(String.format("Failed to qualify MAE Consumer. It is not running on the latest version."
+ "Re-run MAE Consumer on the latest datahub release"));
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.FAILED);
}
} catch (Exception e) {
e.printStackTrace();
context.report().addLine(String.format(
"ERROR: Cannot connect to MAE Consumer"
+ "at host %s port %s. Make sure MAE Consumer is on the latest version "
+ "and is running at that host before starting the migration.",
maeHost, maePort));
return new DefaultUpgradeStepResult(id(), UpgradeStepResult.Result.FAILED);
}
};
}
}

View File

@ -58,7 +58,6 @@ public class NoCodeUpgrade implements Upgrade {
final List<UpgradeStep> steps = new ArrayList<>();
steps.add(new RemoveAspectV2TableStep(server));
steps.add(new GMSQualificationStep());
steps.add(new MAEQualificationStep());
steps.add(new UpgradeQualificationStep(server));
steps.add(new CreateAspectTableStep(server));
steps.add(new IngestDataPlatformsStep(entityService));

View File

@ -13,6 +13,12 @@ NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true
# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
# Uncomment to configure kafka topic names
# Make sure these names are consistent across the whole deployment
# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4

View File

@ -11,3 +11,5 @@ NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true

View File

@ -11,3 +11,6 @@ NEO4J_HOST=http://neo4j:7474
NEO4J_URI=bolt://neo4j
NEO4J_USERNAME=neo4j
NEO4J_PASSWORD=datahub
MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true

View File

@ -1,3 +1,4 @@
MAE_CONSUMER_ENABLED=true
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
ELASTICSEARCH_HOST=elasticsearch

View File

@ -1,3 +1,4 @@
MCE_CONSUMER_ENABLED=true
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
GMS_HOST=datahub-gms

View File

@ -0,0 +1,23 @@
version: '3.8'
services:
datahub-mae-consumer:
image: linkedin/datahub-mae-consumer:debug
build:
context: datahub-mae-consumer
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh
- ../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/
datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:debug
build:
context: datahub-mce-consumer
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh
- ../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin

View File

@ -0,0 +1,31 @@
# Service definitions for standalone Kafka consumer containers.
version: '3.8'
services:
datahub-mae-consumer:
build:
context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
env_file: datahub-mae-consumer/env/docker.env
hostname: datahub-mae-consumer
container_name: datahub-mae-consumer
ports:
- "9091:9091"
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
datahub-mce-consumer:
build:
context: ../
dockerfile: docker/datahub-mce-consumer/Dockerfile
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
env_file: datahub-mce-consumer/env/docker.env
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
ports:
- "9090:9090"
depends_on:
- kafka-setup
- datahub-gms

View File

@ -50,25 +50,3 @@ services:
APP_ENV: dev
volumes:
- ../datahub-frontend/build/stage/datahub-frontend:/datahub-frontend
datahub-mae-consumer:
image: linkedin/datahub-mae-consumer:debug
build:
context: datahub-mae-consumer
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh
- ../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/
datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:debug
build:
context: datahub-mce-consumer
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh
- ../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin

View File

@ -160,35 +160,6 @@ services:
depends_on:
- datahub-gms
datahub-mae-consumer:
build:
context: ../
dockerfile: docker/datahub-mae-consumer/Dockerfile
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
env_file: datahub-mae-consumer/env/docker.env
hostname: datahub-mae-consumer
container_name: datahub-mae-consumer
ports:
- "9091:9091"
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
datahub-mce-consumer:
build:
context: ../
dockerfile: docker/datahub-mce-consumer/Dockerfile
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
env_file: datahub-mce-consumer/env/docker.env
hostname: datahub-mce-consumer
container_name: datahub-mce-consumer
ports:
- "9090:9090"
depends_on:
- kafka-setup
- datahub-gms
networks:
default:
name: datahub_network

View File

@ -55,48 +55,13 @@ services:
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
hostname: datahub-gms
image: linkedin/datahub-gms:${DATAHUB_VERSION:-latest}
mem_limit: 850m
ports:
- 8080:8080
datahub-mae-consumer:
container_name: datahub-mae-consumer
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
environment:
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- GMS_HOST=datahub-gms
- GMS_PORT=8080
hostname: datahub-mae-consumer
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-latest}
mem_limit: 256m
ports:
- 9091:9091
datahub-mce-consumer:
container_name: datahub-mce-consumer
depends_on:
- kafka-setup
- datahub-gms
environment:
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- GMS_HOST=datahub-gms
- GMS_PORT=8080
hostname: datahub-mce-consumer
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-latest}
mem_limit: 384m
ports:
- 9090:9090
elasticsearch:
container_name: elasticsearch
environment:

View File

@ -4,6 +4,8 @@ ext.apiProject = project(':gms:api')
dependencies {
runtime project(':gms:factories')
runtime project(':metadata-jobs:mce-consumer')
runtime project(':metadata-jobs:mae-consumer')
runtime externalDependency.h2
runtime externalDependency.logbackClassic

View File

@ -7,9 +7,7 @@ REQUIRED_CONTAINERS = [
"elasticsearch-setup",
"elasticsearch",
"datahub-gms",
"datahub-mce-consumer",
"datahub-frontend-react",
"datahub-mae-consumer",
"kafka-setup",
"schema-registry",
"broker",
@ -21,6 +19,8 @@ REQUIRED_CONTAINERS = [
# "schema-registry-ui",
# "kibana",
# "kafka-rest-proxy",
# "datahub-mce-consumer",
# "datahub-mae-consumer"
]
ENSURE_EXIT_SUCCESS = [

View File

@ -3,64 +3,15 @@ plugins {
id 'java'
}
apply plugin: 'pegasus'
configurations {
avro
}
dependencies {
avro project(path: ':metadata-models', configuration: 'avroSchema')
compile project(':li-utils')
compile (project(':gms:factories')) {
exclude group: 'org.neo4j.test'
}
compile project(':metadata-utils')
compile project(":entity-registry")
compile project(':metadata-builders')
compile project(':metadata-dao-impl:restli-dao')
compile project(':metadata-events:mxe-schemas')
compile project(':metadata-events:mxe-avro-1.7')
compile project(':metadata-events:mxe-registration')
compile project(':metadata-events:mxe-utils-avro-1.7')
compile externalDependency.elasticSearchRest
compile externalDependency.gmaDaoApi
compile externalDependency.gmaNeo4jDao
compile externalDependency.kafkaAvroSerde
compile externalDependency.neo4jJavaDriver
compile (externalDependency.springBootStarterWeb) {
compile project(':metadata-jobs:mae-consumer')
compile(externalDependency.springBootStarterWeb) {
exclude module: "spring-boot-starter-tomcat"
}
compile externalDependency.springBootStarterJetty
compile externalDependency.springKafka
compile externalDependency.springActuator
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
runtime externalDependency.logbackClassic
}
task avroSchemaSources(type: Copy) {
dependsOn configurations.avro
from { // use of closure defers evaluation until execution time
configurations.avro.collect { zipTree(it) }
}
into("src/main/resources/")
include("avro/com/linkedin/mxe/")
}
compileJava.dependsOn avroSchemaSources
clean {
project.delete("src/main/resources/avro")
}
bootJar {
mainClassName = 'com.linkedin.metadata.kafka.MaeConsumerApplication'
}
}

View File

@ -8,8 +8,8 @@ import org.springframework.boot.autoconfigure.elasticsearch.rest.RestClientAutoC
@SpringBootApplication(exclude = {RestClientAutoConfiguration.class})
public class MaeConsumerApplication {
public static void main(String[] args) {
Class<?>[] primarySources = {MaeConsumerApplication.class, MaeConsumerConfig.class};
SpringApplication.run(primarySources, args);
}
}
public static void main(String[] args) {
Class<?>[] primarySources = {MaeConsumerApplication.class, com.linkedin.metadata.kafka.MaeConsumerConfig.class};
SpringApplication.run(primarySources, args);
}
}

View File

@ -0,0 +1,57 @@
plugins {
id 'java'
}
apply plugin: 'pegasus'
configurations {
avro
}
dependencies {
avro project(path: ':metadata-models', configuration: 'avroSchema')
compile project(':li-utils')
compile (project(':gms:factories')) {
exclude group: 'org.neo4j.test'
}
compile project(':metadata-utils')
compile project(":entity-registry")
compile project(':metadata-builders')
compile project(':metadata-dao-impl:restli-dao')
compile project(':metadata-events:mxe-schemas')
compile project(':metadata-events:mxe-avro-1.7')
compile project(':metadata-events:mxe-registration')
compile project(':metadata-events:mxe-utils-avro-1.7')
compile externalDependency.elasticSearchRest
compile externalDependency.gmaDaoApi
compile externalDependency.gmaNeo4jDao
compile externalDependency.kafkaAvroSerde
compile externalDependency.neo4jJavaDriver
compile externalDependency.springKafka
compile externalDependency.springActuator
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
runtime externalDependency.logbackClassic
}
task avroSchemaSources(type: Copy) {
dependsOn configurations.avro
from { // use of closure defers evaluation until execution time
configurations.avro.collect { zipTree(it) }
}
into("src/main/resources/")
include("avro/com/linkedin/mxe/")
}
compileJava.dependsOn avroSchemaSources
clean {
project.delete("src/main/resources/avro")
}

View File

@ -1,6 +1,7 @@
package com.linkedin.metadata.kafka;
import com.linkedin.events.metadata.ChangeType;
import com.linkedin.metadata.kafka.config.DataHubUsageEventsProcessorCondition;
import com.linkedin.metadata.kafka.elasticsearch.ElasticsearchConnector;
import com.linkedin.metadata.kafka.elasticsearch.JsonElasticEvent;
import com.linkedin.metadata.kafka.transformer.DataHubUsageEventTransformer;
@ -11,7 +12,7 @@ import java.net.URLEncoder;
import java.util.Optional;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.context.annotation.Conditional;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.stereotype.Component;
@ -19,16 +20,18 @@ import org.springframework.stereotype.Component;
@Slf4j
@Component
@ConditionalOnProperty(value = "DATAHUB_ANALYTICS_ENABLED", havingValue = "true", matchIfMissing = true)
@EnableKafka
@Conditional(DataHubUsageEventsProcessorCondition.class)
public class DataHubUsageEventsProcessor {
private final ElasticsearchConnector elasticSearchConnector;
private final DataHubUsageEventTransformer dataHubUsageEventTransformer;
private final String indexName;
public DataHubUsageEventsProcessor(ElasticsearchConnector elasticSearchConnector,
DataHubUsageEventTransformer dataHubUsageEventTransformer, IndexConvention indexConvention) {
public DataHubUsageEventsProcessor(
ElasticsearchConnector elasticSearchConnector,
DataHubUsageEventTransformer dataHubUsageEventTransformer,
IndexConvention indexConvention) {
this.elasticSearchConnector = elasticSearchConnector;
this.dataHubUsageEventTransformer = dataHubUsageEventTransformer;
this.indexName = indexConvention.getIndexName("datahub_usage_event");

View File

@ -10,6 +10,7 @@ import com.linkedin.metadata.dao.utils.RecordUtils;
import com.linkedin.metadata.extractor.FieldExtractor;
import com.linkedin.metadata.graph.Edge;
import com.linkedin.metadata.graph.GraphService;
import com.linkedin.metadata.kafka.config.MetadataAuditEventsProcessorCondition;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.RelationshipFieldSpec;
import com.linkedin.metadata.models.registry.SnapshotEntityRegistry;
@ -34,6 +35,7 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.avro.generic.GenericRecord;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Conditional;
import org.springframework.context.annotation.Import;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.annotation.KafkaListener;
@ -44,6 +46,7 @@ import static com.linkedin.metadata.dao.Neo4jUtil.createRelationshipFilter;
@Slf4j
@Component
@Conditional(MetadataAuditEventsProcessorCondition.class)
@Import({GraphServiceFactory.class, SearchServiceFactory.class})
@EnableKafka
public class MetadataAuditEventsProcessor {

View File

@ -0,0 +1,19 @@
package com.linkedin.metadata.kafka.config;
import org.springframework.context.annotation.Condition;
import org.springframework.context.annotation.ConditionContext;
import org.springframework.core.env.Environment;
import org.springframework.core.type.AnnotatedTypeMetadata;
public class DataHubUsageEventsProcessorCondition implements Condition {
@Override
public boolean matches(
ConditionContext context,
AnnotatedTypeMetadata metadata) {
Environment env = context.getEnvironment();
return "true".equals(env.getProperty("MAE_CONSUMER_ENABLED")) && (
env.getProperty("DATAHUB_ANALYTICS_ENABLED") == null
|| "true".equals(env.getProperty("DATAHUB_ANALYTICS_ENABLED")));
}
}

View File

@ -15,8 +15,6 @@ import com.linkedin.metadata.builders.search.TagIndexBuilder;
import com.linkedin.metadata.builders.search.GlossaryTermInfoIndexBuilder;
import com.linkedin.metadata.builders.search.GlossaryNodeInfoIndexBuilder;
import com.linkedin.metadata.restli.DefaultRestliClientFactory;
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl;
import com.linkedin.restli.client.Client;
import java.util.HashSet;
import java.util.Set;
@ -71,12 +69,4 @@ public class IndexBuildersConfig {
public Client restliClient() {
return DefaultRestliClientFactory.getRestLiClient(gmsHost, gmsPort);
}
/**
* Convention for naming search indices
*/
@Bean
public IndexConvention indexConvention() {
return new IndexConventionImpl(indexPrefix);
}
}

View File

@ -20,7 +20,7 @@ import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
@Slf4j
@Configuration
public class KafkaConfig {
public class MaeKafkaConfig {
@Value("${KAFKA_BOOTSTRAP_SERVER:http://localhost:9092}")
private String kafkaBootstrapServer;
@Value("${KAFKA_SCHEMAREGISTRY_URL:http://localhost:8081}")

View File

@ -0,0 +1,17 @@
package com.linkedin.metadata.kafka.config;
import org.springframework.context.annotation.Condition;
import org.springframework.context.annotation.ConditionContext;
import org.springframework.core.env.Environment;
import org.springframework.core.type.AnnotatedTypeMetadata;
public class MetadataAuditEventsProcessorCondition implements Condition {
@Override
public boolean matches(
ConditionContext context,
AnnotatedTypeMetadata metadata) {
Environment env = context.getEnvironment();
return "true".equals(env.getProperty("MAE_CONSUMER_ENABLED"));
}
}

View File

@ -21,7 +21,7 @@ public class ElasticsearchConnectorFactory {
@Bean(name = "elasticsearchConnector")
@Nonnull
public ElasticsearchConnector createInstance(RestHighLevelClient elasticSearchRestHighLevelClient) {
public ElasticsearchConnector createInstance(@Nonnull RestHighLevelClient elasticSearchRestHighLevelClient) {
return new ElasticsearchConnector(elasticSearchRestHighLevelClient, bulkRequestsLimit, bulkFlushPeriod);
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.snapshot.ChartSnapshot;
import com.linkedin.restli.client.Client;
import java.net.URISyntaxException;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
@ -20,7 +21,7 @@ public class ChartHydrator implements Hydrator {
private static final String DASHBOARD_TOOL = "dashboardTool";
private static final String TITLE = "title";
public ChartHydrator(Client restliClient) {
public ChartHydrator(@Nonnull Client restliClient) {
_restliClient = restliClient;
_remoteDAO = new RestliRemoteDAO<>(ChartSnapshot.class, ChartAspect.class, _restliClient);
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.snapshot.CorpUserSnapshot;
import com.linkedin.restli.client.Client;
import java.net.URISyntaxException;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
@ -20,7 +21,7 @@ public class CorpUserHydrator implements Hydrator {
private static final String USER_NAME = "username";
private static final String NAME = "name";
public CorpUserHydrator(Client restliClient) {
public CorpUserHydrator(@Nonnull Client restliClient) {
_restliClient = restliClient;
_remoteDAO = new RestliRemoteDAO<>(CorpUserSnapshot.class, CorpUserAspect.class, _restliClient);
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.snapshot.DashboardSnapshot;
import com.linkedin.restli.client.Client;
import java.net.URISyntaxException;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
@ -20,7 +21,7 @@ public class DashboardHydrator implements Hydrator {
private static final String DASHBOARD_TOOL = "dashboardTool";
private static final String TITLE = "title";
public DashboardHydrator(Client restliClient) {
public DashboardHydrator(@Nonnull Client restliClient) {
_restliClient = restliClient;
_remoteDAO = new RestliRemoteDAO<>(DashboardSnapshot.class, DashboardAspect.class, _restliClient);
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.snapshot.DataFlowSnapshot;
import com.linkedin.restli.client.Client;
import java.net.URISyntaxException;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
@ -20,7 +21,7 @@ public class DataFlowHydrator implements Hydrator {
private static final String ORCHESTRATOR = "orchestrator";
private static final String NAME = "name";
public DataFlowHydrator(Client restliClient) {
public DataFlowHydrator(@Nonnull Client restliClient) {
_restliClient = restliClient;
_remoteDAO = new RestliRemoteDAO<>(DataFlowSnapshot.class, DataFlowAspect.class, _restliClient);
}

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.snapshot.DataJobSnapshot;
import com.linkedin.restli.client.Client;
import java.net.URISyntaxException;
import java.util.Optional;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
@ -20,7 +21,7 @@ public class DataJobHydrator implements Hydrator {
private static final String ORCHESTRATOR = "orchestrator";
private static final String NAME = "name";
public DataJobHydrator(Client restliClient) {
public DataJobHydrator(@Nonnull Client restliClient) {
_restliClient = restliClient;
_remoteDAO = new RestliRemoteDAO<>(DataJobSnapshot.class, DataJobAspect.class, _restliClient);
}

View File

@ -6,6 +6,7 @@ import com.linkedin.restli.client.Client;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nonnull;
public class HydratorFactory {
@ -14,7 +15,7 @@ public class HydratorFactory {
public static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
public HydratorFactory(Client restliClient) {
public HydratorFactory(@Nonnull Client restliClient) {
_restliClient = restliClient;
_hydratorMap = new HashMap<>();
_hydratorMap.put(EntityType.DATASET, new DatasetHydrator());

View File

@ -9,6 +9,7 @@ import com.linkedin.metadata.kafka.hydrator.EntityType;
import com.linkedin.metadata.kafka.hydrator.HydratorFactory;
import java.util.Optional;
import java.util.Set;
import javax.annotation.Nonnull;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
@ -36,7 +37,7 @@ public class DataHubUsageEventTransformer {
String document;
}
public DataHubUsageEventTransformer(HydratorFactory hydratorFactory) {
public DataHubUsageEventTransformer(@Nonnull HydratorFactory hydratorFactory) {
this.hydratorFactory = hydratorFactory;
}

View File

@ -3,59 +3,15 @@ plugins {
id 'java'
}
apply plugin: 'pegasus'
configurations {
avro
}
dependencies {
avro project(path: ':metadata-models', configuration: 'avroSchema')
compile project(':li-utils')
compile project(':metadata-utils')
compile project(':metadata-builders')
compile project(':metadata-events:mxe-schemas')
compile project(':metadata-events:mxe-avro-1.7')
compile project(':metadata-events:mxe-registration')
compile project(':metadata-events:mxe-utils-avro-1.7')
compile project(':metadata-dao-impl:restli-dao')
compile project(':metadata-io')
compile project(':gms:client')
compile spec.product.pegasus.restliClient
compile spec.product.pegasus.restliCommon
compile externalDependency.elasticSearchRest
compile externalDependency.kafkaAvroSerde
compile (externalDependency.springBootStarterWeb) {
compile project(':metadata-jobs:mce-consumer')
compile(externalDependency.springBootStarterWeb) {
exclude module: "spring-boot-starter-tomcat"
}
compile externalDependency.springBootStarterJetty
compile externalDependency.springKafka
compile externalDependency.springActuator
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
}
task avroSchemaSources(type: Copy) {
dependsOn configurations.avro
from { // use of closure defers evaluation until execution time
configurations.avro.collect { zipTree(it) }
}
into("src/main/resources/")
include("avro/com/linkedin/mxe/")
}
compileJava.dependsOn avroSchemaSources
clean {
project.delete("src/main/resources/avro")
}
bootJar {
mainClassName = 'com.linkedin.metadata.kafka.MceConsumerApplication'
}
}

View File

@ -12,4 +12,4 @@ public class MceConsumerApplication {
public static void main(String[] args) {
SpringApplication.run(MceConsumerApplication.class, args);
}
}
}

View File

@ -0,0 +1,52 @@
plugins {
id 'java'
}
apply plugin: 'pegasus'
configurations {
avro
}
dependencies {
avro project(path: ':metadata-models', configuration: 'avroSchema')
compile project(':li-utils')
compile project(':metadata-utils')
compile project(':metadata-builders')
compile project(':metadata-events:mxe-schemas')
compile project(':metadata-events:mxe-avro-1.7')
compile project(':metadata-events:mxe-registration')
compile project(':metadata-events:mxe-utils-avro-1.7')
compile project(':metadata-dao-impl:restli-dao')
compile project(':metadata-io')
compile project(':gms:client')
compile spec.product.pegasus.restliClient
compile spec.product.pegasus.restliCommon
compile externalDependency.elasticSearchRest
compile externalDependency.kafkaAvroSerde
compile externalDependency.springKafka
compile externalDependency.springActuator
compileOnly externalDependency.lombok
annotationProcessor externalDependency.lombok
}
task avroSchemaSources(type: Copy) {
dependsOn configurations.avro
from { // use of closure defers evaluation until execution time
configurations.avro.collect { zipTree(it) }
}
into("src/main/resources/")
include("avro/com/linkedin/mxe/")
}
compileJava.dependsOn avroSchemaSources
clean {
project.delete("src/main/resources/avro")
}

View File

@ -3,6 +3,7 @@ package com.linkedin.metadata.kafka;
import com.linkedin.entity.client.EntityClient;
import com.linkedin.entity.Entity;
import com.linkedin.metadata.EventUtils;
import com.linkedin.metadata.kafka.config.MetadataChangeEventsProcessorCondition;
import com.linkedin.metadata.snapshot.Snapshot;
import com.linkedin.mxe.FailedMetadataChangeEvent;
import com.linkedin.mxe.MetadataChangeEvent;
@ -15,6 +16,7 @@ import org.apache.avro.generic.GenericRecord;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Conditional;
import org.springframework.kafka.annotation.EnableKafka;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.core.KafkaTemplate;
@ -23,6 +25,7 @@ import org.springframework.stereotype.Component;
@Slf4j
@Component
@Conditional(MetadataChangeEventsProcessorCondition.class)
@EnableKafka
public class MetadataChangeEventsProcessor {
@ -40,7 +43,7 @@ public class MetadataChangeEventsProcessor {
}
@KafkaListener(id = "${KAFKA_CONSUMER_GROUP_ID:mce-consumer-job-client}", topics = "${KAFKA_MCE_TOPIC_NAME:"
+ Topics.METADATA_CHANGE_EVENT + "}")
+ Topics.METADATA_CHANGE_EVENT + "}", containerFactory = "mceKafkaContainerFactory")
public void consume(final ConsumerRecord<String, GenericRecord> consumerRecord) {
final GenericRecord record = consumerRecord.value();
log.debug("Record ", record);

View File

@ -24,13 +24,14 @@ import org.springframework.kafka.listener.ErrorHandler;
@Slf4j
@Configuration
public class KafkaConfig {
public class MceKafkaConfig {
@Value("${KAFKA_BOOTSTRAP_SERVER:http://localhost:9092}")
private String kafkaBootstrapServers;
@Value("${KAFKA_SCHEMAREGISTRY_URL:http://localhost:8081}")
private String kafkaSchemaRegistryUrl;
@Bean
@Bean(name = "mceKafkaContainerFactory")
public KafkaListenerContainerFactory<?> kafkaListenerContainerFactory(KafkaProperties properties) {
KafkaProperties.Consumer consumerProps = properties.getConsumer();

View File

@ -0,0 +1,17 @@
package com.linkedin.metadata.kafka.config;
import org.springframework.context.annotation.Condition;
import org.springframework.context.annotation.ConditionContext;
import org.springframework.core.env.Environment;
import org.springframework.core.type.AnnotatedTypeMetadata;
public class MetadataChangeEventsProcessorCondition implements Condition {
@Override
public boolean matches(
ConditionContext context,
AnnotatedTypeMetadata metadata) {
Environment env = context.getEnvironment();
return "true".equals(env.getProperty("MCE_CONSUMER_ENABLED"));
}
}

View File

@ -21,6 +21,8 @@ include 'metadata-ingestion'
include 'metadata-ingestion-examples:common'
include 'metadata-ingestion-examples:kafka-etl'
include 'metadata-ingestion-examples:mce-cli'
include 'metadata-jobs:mae-consumer'
include 'metadata-jobs:mce-consumer'
include 'metadata-jobs:mae-consumer-job'
include 'metadata-jobs:mce-consumer-job'
include 'metadata-models'