fix(standalone-consumers): Exclude Solr from spring boot application config & make them run on M1 (#5827)

* fix(standalone-consumers) Removes Solr from spring boot application config

* Adds standalone consumer option to datahub quickstart cli

* Rename files

* Make dockerize platform agnostic & change docker compose utility to work with M1

* Fix MAE/MCE dockerfiles for arm & make smoke tests use standalone consumers

* Fix identation

* Use master code

* Adds ARM image publishing for consumers

* Fix linter

* Fix lint
This commit is contained in:
Pedro Silva 2022-09-06 19:55:31 +01:00 committed by GitHub
parent aa146db611
commit 20138a32e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 188 additions and 18 deletions

View File

@ -128,7 +128,7 @@ jobs:
publish: ${{ needs.setup.outputs.publish }}
context: .
file: ./docker/datahub-mae-consumer/Dockerfile
platforms: linux/amd64
platforms: linux/amd64,linux/arm64
mae_consumer_scan:
name: "[Monitoring] Scan MAE consumer images for vulnerabilities"
runs-on: ubuntu-latest
@ -171,7 +171,7 @@ jobs:
publish: ${{ needs.setup.outputs.publish }}
context: .
file: ./docker/datahub-mce-consumer/Dockerfile
platforms: linux/amd64
platforms: linux/amd64,linux/arm64
mce_consumer_scan:
name: "[Monitoring] Scan MCE consumer images for vulnerabilities"
runs-on: ubuntu-latest

View File

@ -1,15 +1,30 @@
# Defining environment
ARG APP_ENV=prod
FROM adoptopenjdk/openjdk8:alpine-jre as base
FROM alpine:3.14 AS base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar wget bash coreutils \
# Upgrade Alpine and base packages
RUN apk --no-cache --update-cache --available upgrade \
&& if [ $(arch) = "aarch64" ]; then \
DOCKERIZE_ARCH='aarch64';\
elif [ $(arch) = "x86_64" ]; then \
DOCKERIZE_ARCH='amd64'; \
else \
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
fi \
&& apk --no-cache add tar curl bash openjdk8-jre \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
&& curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
&& curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build
# Upgrade Alpine and base packages
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add openjdk8 perl
FROM adoptopenjdk/openjdk8:alpine-slim as prod-build
RUN apk --no-cache add openjdk8-jre perl
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build -x test
RUN cd datahub-src && cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar

View File

@ -1,15 +1,31 @@
# Defining environment
ARG APP_ENV=prod
FROM adoptopenjdk/openjdk8:alpine-jre as base
FROM alpine:3.14 AS base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar wget openjdk8-jre bash \
# Upgrade Alpine and base packages
RUN apk --no-cache --update-cache --available upgrade \
&& if [ $(arch) = "aarch64" ]; then \
DOCKERIZE_ARCH='aarch64';\
elif [ $(arch) = "x86_64" ]; then \
DOCKERIZE_ARCH='amd64'; \
else \
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
fi \
&& apk --no-cache add tar curl bash openjdk8-jre \
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
&& cp /usr/lib/jvm/java-1.8-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks \
&& curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
&& curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build
# Upgrade Alpine and base packages
RUN apk --no-cache --update-cache --available upgrade \
&& apk --no-cache add openjdk8 perl
FROM openjdk:8 as prod-build
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build
RUN cd datahub-src && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar

View File

@ -0,0 +1,41 @@
services:
datahub-gms:
environment:
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=false
datahub-mae-consumer:
container_name: datahub-mae-consumer
depends_on:
- kafka-setup
- elasticsearch-setup
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- MAE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- GRAPH_SERVICE_IMPL=elasticsearch
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
hostname: datahub-mae-consumer
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
ports:
- 9091:9091
datahub-mce-consumer:
container_name: datahub-mce-consumer
depends_on:
- kafka-setup
- datahub-gms
environment:
- MCE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
hostname: datahub-mce-consumer
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head}
ports:
- 9090:9090
version: '2.3'

View File

@ -0,0 +1,46 @@
services:
datahub-gms:
environment:
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=false
datahub-mae-consumer:
container_name: datahub-mae-consumer
depends_on:
- kafka-setup
- elasticsearch-setup
- neo4j
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- MAE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=elasticsearch
- ELASTICSEARCH_PORT=9200
- NEO4J_HOST=http://neo4j:7474
- NEO4J_URI=bolt://neo4j
- NEO4J_USERNAME=neo4j
- NEO4J_PASSWORD=datahub
- GRAPH_SERVICE_IMPL=neo4j
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
hostname: datahub-mae-consumer
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
ports:
- 9091:9091
datahub-mce-consumer:
container_name: datahub-mce-consumer
depends_on:
- kafka-setup
- datahub-gms
environment:
- MCE_CONSUMER_ENABLED=true
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
hostname: datahub-mce-consumer
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head}
ports:
- 9090:9090
version: '2.3'

View File

@ -15,6 +15,8 @@ pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers.yml temp.consumers.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml temp.consumers-without-neo4j.quickstart.yml
for flavour in "${FLAVOURS[@]}"
do

View File

@ -12,3 +12,5 @@ pip install -r requirements.txt
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers.yml docker-compose.consumers.quickstart.yml
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml docker-compose.consumers-without-neo4j.quickstart.yml

View File

@ -36,10 +36,17 @@ ELASTIC_QUICKSTART_COMPOSE_FILE = (
M1_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
)
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose.consumers.quickstart.yml"
)
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
)
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
GITHUB_BASE_URL = "https://raw.githubusercontent.com/datahub-project/datahub/master"
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
)
@ -188,7 +195,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
if compose_files_for_stopping:
# docker-compose stop
base_command: List[str] = [
"docker-compose",
"docker",
"compose",
*itertools.chain.from_iterable(
("-f", f"{path}") for path in compose_files_for_stopping
),
@ -473,6 +481,13 @@ DATAHUB_MAE_CONSUMER_PORT=9091
default=False,
help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
)
@click.option(
"--standalone_consumers",
required=False,
is_flag=True,
default=False,
help="Launches MAE & MCE consumers as stand alone docker containers",
)
@upgrade.check_upgrade
@telemetry.with_telemetry
def quickstart(
@ -493,6 +508,7 @@ def quickstart(
restore_file: str,
restore_indices: bool,
no_restore_indices: bool,
standalone_consumers: bool,
) -> None:
"""Start an instance of DataHub locally using docker-compose.
@ -570,6 +586,32 @@ def quickstart(
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
if standalone_consumers:
consumer_github_file = (
f"{GITHUB_BASE_URL}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
if should_use_neo4j
else f"{GITHUB_BASE_URL}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
)
default_consumer_compose_file = (
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
)
with open(
default_consumer_compose_file, "wb"
) if default_consumer_compose_file else tempfile.NamedTemporaryFile(
suffix=".yml", delete=False
) as tmp_file:
path = pathlib.Path(tmp_file.name)
quickstart_compose_file.append(path)
click.echo(
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
)
# Download the quickstart docker-compose file from GitHub.
quickstart_download_response = requests.get(consumer_github_file)
quickstart_download_response.raise_for_status()
tmp_file.write(quickstart_download_response.content)
logger.debug(f"Copied to {path}")
# set version
_set_environment_variables(
version=version,
@ -581,7 +623,8 @@ def quickstart(
)
base_command: List[str] = [
"docker-compose",
"docker",
"compose",
*itertools.chain.from_iterable(
("-f", f"{path}") for path in quickstart_compose_file
),
@ -597,7 +640,7 @@ def quickstart(
)
except subprocess.CalledProcessError:
click.secho(
"Error while pulling images. Going to attempt to move on to docker-compose up assuming the images have "
"Error while pulling images. Going to attempt to move on to docker compose up assuming the images have "
"been built locally",
fg="red",
)
@ -623,7 +666,7 @@ def quickstart(
up_interval = datetime.timedelta(seconds=30)
up_attempts = 0
while (datetime.datetime.now() - start_time) < max_wait_time:
# Attempt to run docker-compose up every minute.
# Attempt to run docker compose up every minute.
if (datetime.datetime.now() - start_time) > up_attempts * up_interval:
click.echo()
subprocess.run(base_command + ["up", "-d", "--remove-orphans"])
@ -651,7 +694,7 @@ def quickstart(
if dump_logs_on_failure:
with open(log_file.name, "r") as logs:
click.echo("Dumping docker-compose logs:")
click.echo("Dumping docker compose logs:")
click.echo(logs.read())
click.echo()

View File

@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka;
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration;
import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration;
@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType;
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class})
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class,
SolrHealthContributorAutoConfiguration.class})
@ComponentScan(excludeFilters = {
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)})
public class MaeConsumerApplication {

View File

@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka;
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration;
import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration;
@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType;
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class})
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class,
SolrHealthContributorAutoConfiguration.class})
@ComponentScan(excludeFilters = {
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)})
public class MceConsumerApplication {

View File

@ -21,6 +21,7 @@ pip install -r requirements.txt
echo "DATAHUB_VERSION = $DATAHUB_VERSION"
DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --quickstart-compose-file ../docker/quickstart/docker-compose-without-neo4j.quickstart.yml --dump-logs-on-failure
#DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --standalone_consumers --build-locally --dump-logs-on-failure
(cd ..; ./gradlew :smoke-test:yarnInstall)