diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 7afa33c1ea..20194bd650 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -128,7 +128,7 @@ jobs: publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mae-consumer/Dockerfile - platforms: linux/amd64 + platforms: linux/amd64,linux/arm64 mae_consumer_scan: name: "[Monitoring] Scan MAE consumer images for vulnerabilities" runs-on: ubuntu-latest @@ -171,7 +171,7 @@ jobs: publish: ${{ needs.setup.outputs.publish }} context: . file: ./docker/datahub-mce-consumer/Dockerfile - platforms: linux/amd64 + platforms: linux/amd64,linux/arm64 mce_consumer_scan: name: "[Monitoring] Scan MCE consumer images for vulnerabilities" runs-on: ubuntu-latest diff --git a/docker/datahub-mae-consumer/Dockerfile b/docker/datahub-mae-consumer/Dockerfile index b8b28244dd..f4e3a6946f 100644 --- a/docker/datahub-mae-consumer/Dockerfile +++ b/docker/datahub-mae-consumer/Dockerfile @@ -1,15 +1,30 @@ # Defining environment ARG APP_ENV=prod -FROM adoptopenjdk/openjdk8:alpine-jre as base +FROM alpine:3.14 AS base + ENV DOCKERIZE_VERSION v0.6.1 -RUN apk --no-cache add curl tar wget bash coreutils \ + +# Upgrade Alpine and base packages +RUN apk --no-cache --update-cache --available upgrade \ + && if [ $(arch) = "aarch64" ]; then \ + DOCKERIZE_ARCH='aarch64';\ + elif [ $(arch) = "x86_64" ]; then \ + DOCKERIZE_ARCH='amd64'; \ + else \ + echo >&2 "Unsupported architecture $(arch)" ; exit 1; \ + fi \ + && apk --no-cache add tar curl bash openjdk8-jre \ && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \ && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \ - && curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv + && curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv + +FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build + +# Upgrade Alpine and base packages +RUN apk --no-cache --update-cache --available upgrade \ + && apk --no-cache add openjdk8 perl -FROM adoptopenjdk/openjdk8:alpine-slim as prod-build -RUN apk --no-cache add openjdk8-jre perl COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build -x test RUN cd datahub-src && cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar diff --git a/docker/datahub-mce-consumer/Dockerfile b/docker/datahub-mce-consumer/Dockerfile index f15af8426d..3626bb8f7a 100644 --- a/docker/datahub-mce-consumer/Dockerfile +++ b/docker/datahub-mce-consumer/Dockerfile @@ -1,15 +1,31 @@ # Defining environment ARG APP_ENV=prod -FROM adoptopenjdk/openjdk8:alpine-jre as base +FROM alpine:3.14 AS base + ENV DOCKERIZE_VERSION v0.6.1 -RUN apk --no-cache add curl tar wget openjdk8-jre bash \ + +# Upgrade Alpine and base packages +RUN apk --no-cache --update-cache --available upgrade \ + && if [ $(arch) = "aarch64" ]; then \ + DOCKERIZE_ARCH='aarch64';\ + elif [ $(arch) = "x86_64" ]; then \ + DOCKERIZE_ARCH='amd64'; \ + else \ + echo >&2 "Unsupported architecture $(arch)" ; exit 1; \ + fi \ + && apk --no-cache add tar curl bash openjdk8-jre \ && wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \ && wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \ && cp /usr/lib/jvm/java-1.8-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks \ - && curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv + && curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv + +FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build + +# Upgrade Alpine and base packages +RUN apk --no-cache --update-cache --available upgrade \ + && apk --no-cache add openjdk8 perl -FROM openjdk:8 as prod-build COPY . datahub-src RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build RUN cd datahub-src && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar diff --git a/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml new file mode 100644 index 0000000000..966e354207 --- /dev/null +++ b/docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml @@ -0,0 +1,41 @@ +services: + datahub-gms: + environment: + - MAE_CONSUMER_ENABLED=false + - MCE_CONSUMER_ENABLED=false + datahub-mae-consumer: + container_name: datahub-mae-consumer + depends_on: + - kafka-setup + - elasticsearch-setup + environment: + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - MAE_CONSUMER_ENABLED=true + - PE_CONSUMER_ENABLED=true + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - GRAPH_SERVICE_IMPL=elasticsearch + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml + hostname: datahub-mae-consumer + image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head} + ports: + - 9091:9091 + datahub-mce-consumer: + container_name: datahub-mce-consumer + depends_on: + - kafka-setup + - datahub-gms + environment: + - MCE_CONSUMER_ENABLED=true + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + hostname: datahub-mce-consumer + image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head} + ports: + - 9090:9090 +version: '2.3' diff --git a/docker/quickstart/docker-compose.consumers.quickstart.yml b/docker/quickstart/docker-compose.consumers.quickstart.yml new file mode 100644 index 0000000000..700f48d42d --- /dev/null +++ b/docker/quickstart/docker-compose.consumers.quickstart.yml @@ -0,0 +1,46 @@ +services: + datahub-gms: + environment: + - MAE_CONSUMER_ENABLED=false + - MCE_CONSUMER_ENABLED=false + datahub-mae-consumer: + container_name: datahub-mae-consumer + depends_on: + - kafka-setup + - elasticsearch-setup + - neo4j + environment: + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + - MAE_CONSUMER_ENABLED=true + - PE_CONSUMER_ENABLED=true + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - ELASTICSEARCH_HOST=elasticsearch + - ELASTICSEARCH_PORT=9200 + - NEO4J_HOST=http://neo4j:7474 + - NEO4J_URI=bolt://neo4j + - NEO4J_USERNAME=neo4j + - NEO4J_PASSWORD=datahub + - GRAPH_SERVICE_IMPL=neo4j + - ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml + hostname: datahub-mae-consumer + image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head} + ports: + - 9091:9091 + datahub-mce-consumer: + container_name: datahub-mce-consumer + depends_on: + - kafka-setup + - datahub-gms + environment: + - MCE_CONSUMER_ENABLED=true + - KAFKA_BOOTSTRAP_SERVER=broker:29092 + - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + - DATAHUB_GMS_HOST=datahub-gms + - DATAHUB_GMS_PORT=8080 + hostname: datahub-mce-consumer + image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head} + ports: + - 9090:9090 +version: '2.3' diff --git a/docker/quickstart/generate_and_compare.sh b/docker/quickstart/generate_and_compare.sh index 6939eae7c2..4e7c26b265 100755 --- a/docker/quickstart/generate_and_compare.sh +++ b/docker/quickstart/generate_and_compare.sh @@ -15,6 +15,8 @@ pip install -r requirements.txt python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.consumers.yml temp.consumers.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml temp.consumers-without-neo4j.quickstart.yml for flavour in "${FLAVOURS[@]}" do diff --git a/docker/quickstart/generate_docker_quickstart.sh b/docker/quickstart/generate_docker_quickstart.sh index d8427a67b5..aa3c767430 100755 --- a/docker/quickstart/generate_docker_quickstart.sh +++ b/docker/quickstart/generate_docker_quickstart.sh @@ -12,3 +12,5 @@ pip install -r requirements.txt python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.consumers.yml docker-compose.consumers.quickstart.yml +python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml docker-compose.consumers-without-neo4j.quickstart.yml diff --git a/metadata-ingestion/src/datahub/cli/docker.py b/metadata-ingestion/src/datahub/cli/docker.py index 63fc4c6144..51c5197f56 100644 --- a/metadata-ingestion/src/datahub/cli/docker.py +++ b/metadata-ingestion/src/datahub/cli/docker.py @@ -36,10 +36,17 @@ ELASTIC_QUICKSTART_COMPOSE_FILE = ( M1_QUICKSTART_COMPOSE_FILE = ( "docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml" ) +CONSUMERS_QUICKSTART_COMPOSE_FILE = ( + "docker/quickstart/docker-compose.consumers.quickstart.yml" +) +ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = ( + "docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml" +) BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json" GITHUB_BASE_URL = "https://raw.githubusercontent.com/datahub-project/datahub/master" + GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = ( f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}" ) @@ -188,7 +195,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None: if compose_files_for_stopping: # docker-compose stop base_command: List[str] = [ - "docker-compose", + "docker", + "compose", *itertools.chain.from_iterable( ("-f", f"{path}") for path in compose_files_for_stopping ), @@ -473,6 +481,13 @@ DATAHUB_MAE_CONSUMER_PORT=9091 default=False, help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.", ) +@click.option( + "--standalone_consumers", + required=False, + is_flag=True, + default=False, + help="Launches MAE & MCE consumers as stand alone docker containers", +) @upgrade.check_upgrade @telemetry.with_telemetry def quickstart( @@ -493,6 +508,7 @@ def quickstart( restore_file: str, restore_indices: bool, no_restore_indices: bool, + standalone_consumers: bool, ) -> None: """Start an instance of DataHub locally using docker-compose. @@ -570,6 +586,32 @@ def quickstart( tmp_file.write(quickstart_download_response.content) logger.debug(f"Copied to {path}") + if standalone_consumers: + consumer_github_file = ( + f"{GITHUB_BASE_URL}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}" + if should_use_neo4j + else f"{GITHUB_BASE_URL}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}" + ) + + default_consumer_compose_file = ( + Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml" + ) + with open( + default_consumer_compose_file, "wb" + ) if default_consumer_compose_file else tempfile.NamedTemporaryFile( + suffix=".yml", delete=False + ) as tmp_file: + path = pathlib.Path(tmp_file.name) + quickstart_compose_file.append(path) + click.echo( + f"Fetching consumer docker-compose file {consumer_github_file} from GitHub" + ) + # Download the quickstart docker-compose file from GitHub. + quickstart_download_response = requests.get(consumer_github_file) + quickstart_download_response.raise_for_status() + tmp_file.write(quickstart_download_response.content) + logger.debug(f"Copied to {path}") + # set version _set_environment_variables( version=version, @@ -581,7 +623,8 @@ def quickstart( ) base_command: List[str] = [ - "docker-compose", + "docker", + "compose", *itertools.chain.from_iterable( ("-f", f"{path}") for path in quickstart_compose_file ), @@ -597,7 +640,7 @@ def quickstart( ) except subprocess.CalledProcessError: click.secho( - "Error while pulling images. Going to attempt to move on to docker-compose up assuming the images have " + "Error while pulling images. Going to attempt to move on to docker compose up assuming the images have " "been built locally", fg="red", ) @@ -623,7 +666,7 @@ def quickstart( up_interval = datetime.timedelta(seconds=30) up_attempts = 0 while (datetime.datetime.now() - start_time) < max_wait_time: - # Attempt to run docker-compose up every minute. + # Attempt to run docker compose up every minute. if (datetime.datetime.now() - start_time) > up_attempts * up_interval: click.echo() subprocess.run(base_command + ["up", "-d", "--remove-orphans"]) @@ -651,7 +694,7 @@ def quickstart( if dump_logs_on_failure: with open(log_file.name, "r") as logs: - click.echo("Dumping docker-compose logs:") + click.echo("Dumping docker compose logs:") click.echo(logs.read()) click.echo() diff --git a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java index cd856399c4..cf04c5bfb6 100644 --- a/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java +++ b/metadata-jobs/mae-consumer-job/src/main/java/com/linkedin/metadata/kafka/MaeConsumerApplication.java @@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; import org.springframework.boot.SpringApplication; +import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration; import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration; @@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType; @SuppressWarnings("checkstyle:HideUtilityClassConstructor") -@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class}) +@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class, + SolrHealthContributorAutoConfiguration.class}) @ComponentScan(excludeFilters = { @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)}) public class MaeConsumerApplication { diff --git a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java index ba605a59ab..840abedc20 100644 --- a/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java +++ b/metadata-jobs/mce-consumer-job/src/main/java/com/linkedin/metadata/kafka/MceConsumerApplication.java @@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka; import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory; import org.springframework.boot.SpringApplication; +import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration; import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration; import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration; @@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType; @SuppressWarnings("checkstyle:HideUtilityClassConstructor") -@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class}) +@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class, + SolrHealthContributorAutoConfiguration.class}) @ComponentScan(excludeFilters = { @ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)}) public class MceConsumerApplication { diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index 141f0c70e1..133ed51dda 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -21,6 +21,7 @@ pip install -r requirements.txt echo "DATAHUB_VERSION = $DATAHUB_VERSION" DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --quickstart-compose-file ../docker/quickstart/docker-compose-without-neo4j.quickstart.yml --dump-logs-on-failure +#DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --standalone_consumers --build-locally --dump-logs-on-failure (cd ..; ./gradlew :smoke-test:yarnInstall)