mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-10 00:13:49 +00:00
fix(standalone-consumers): Exclude Solr from spring boot application config & make them run on M1 (#5827)
* fix(standalone-consumers) Removes Solr from spring boot application config * Adds standalone consumer option to datahub quickstart cli * Rename files * Make dockerize platform agnostic & change docker compose utility to work with M1 * Fix MAE/MCE dockerfiles for arm & make smoke tests use standalone consumers * Fix identation * Use master code * Adds ARM image publishing for consumers * Fix linter * Fix lint
This commit is contained in:
parent
aa146db611
commit
20138a32e5
4
.github/workflows/docker-unified.yml
vendored
4
.github/workflows/docker-unified.yml
vendored
@ -128,7 +128,7 @@ jobs:
|
||||
publish: ${{ needs.setup.outputs.publish }}
|
||||
context: .
|
||||
file: ./docker/datahub-mae-consumer/Dockerfile
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64,linux/arm64
|
||||
mae_consumer_scan:
|
||||
name: "[Monitoring] Scan MAE consumer images for vulnerabilities"
|
||||
runs-on: ubuntu-latest
|
||||
@ -171,7 +171,7 @@ jobs:
|
||||
publish: ${{ needs.setup.outputs.publish }}
|
||||
context: .
|
||||
file: ./docker/datahub-mce-consumer/Dockerfile
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64,linux/arm64
|
||||
mce_consumer_scan:
|
||||
name: "[Monitoring] Scan MCE consumer images for vulnerabilities"
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -1,15 +1,30 @@
|
||||
# Defining environment
|
||||
ARG APP_ENV=prod
|
||||
|
||||
FROM adoptopenjdk/openjdk8:alpine-jre as base
|
||||
FROM alpine:3.14 AS base
|
||||
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
RUN apk --no-cache add curl tar wget bash coreutils \
|
||||
|
||||
# Upgrade Alpine and base packages
|
||||
RUN apk --no-cache --update-cache --available upgrade \
|
||||
&& if [ $(arch) = "aarch64" ]; then \
|
||||
DOCKERIZE_ARCH='aarch64';\
|
||||
elif [ $(arch) = "x86_64" ]; then \
|
||||
DOCKERIZE_ARCH='amd64'; \
|
||||
else \
|
||||
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
|
||||
fi \
|
||||
&& apk --no-cache add tar curl bash openjdk8-jre \
|
||||
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
|
||||
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
|
||||
&& curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
&& curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
|
||||
FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build
|
||||
|
||||
# Upgrade Alpine and base packages
|
||||
RUN apk --no-cache --update-cache --available upgrade \
|
||||
&& apk --no-cache add openjdk8 perl
|
||||
|
||||
FROM adoptopenjdk/openjdk8:alpine-slim as prod-build
|
||||
RUN apk --no-cache add openjdk8-jre perl
|
||||
COPY . datahub-src
|
||||
RUN cd datahub-src && ./gradlew :metadata-jobs:mae-consumer-job:build -x test
|
||||
RUN cd datahub-src && cp metadata-jobs/mae-consumer-job/build/libs/mae-consumer-job.jar ../mae-consumer-job.jar
|
||||
|
@ -1,15 +1,31 @@
|
||||
# Defining environment
|
||||
ARG APP_ENV=prod
|
||||
|
||||
FROM adoptopenjdk/openjdk8:alpine-jre as base
|
||||
FROM alpine:3.14 AS base
|
||||
|
||||
ENV DOCKERIZE_VERSION v0.6.1
|
||||
RUN apk --no-cache add curl tar wget openjdk8-jre bash \
|
||||
|
||||
# Upgrade Alpine and base packages
|
||||
RUN apk --no-cache --update-cache --available upgrade \
|
||||
&& if [ $(arch) = "aarch64" ]; then \
|
||||
DOCKERIZE_ARCH='aarch64';\
|
||||
elif [ $(arch) = "x86_64" ]; then \
|
||||
DOCKERIZE_ARCH='amd64'; \
|
||||
else \
|
||||
echo >&2 "Unsupported architecture $(arch)" ; exit 1; \
|
||||
fi \
|
||||
&& apk --no-cache add tar curl bash openjdk8-jre \
|
||||
&& wget --no-verbose https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
|
||||
&& wget --no-verbose https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
|
||||
&& cp /usr/lib/jvm/java-1.8-openjdk/jre/lib/security/cacerts /tmp/kafka.client.truststore.jks \
|
||||
&& curl -sSL https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
&& curl -sSL https://github.com/treff7es/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-${DOCKERIZE_ARCH}-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv
|
||||
|
||||
FROM --platform=$BUILDPLATFORM alpine:3.14.2 AS prod-build
|
||||
|
||||
# Upgrade Alpine and base packages
|
||||
RUN apk --no-cache --update-cache --available upgrade \
|
||||
&& apk --no-cache add openjdk8 perl
|
||||
|
||||
FROM openjdk:8 as prod-build
|
||||
COPY . datahub-src
|
||||
RUN cd datahub-src && ./gradlew :metadata-jobs:mce-consumer-job:build
|
||||
RUN cd datahub-src && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-job.jar ../mce-consumer-job.jar
|
||||
|
@ -0,0 +1,41 @@
|
||||
services:
|
||||
datahub-gms:
|
||||
environment:
|
||||
- MAE_CONSUMER_ENABLED=false
|
||||
- MCE_CONSUMER_ENABLED=false
|
||||
datahub-mae-consumer:
|
||||
container_name: datahub-mae-consumer
|
||||
depends_on:
|
||||
- kafka-setup
|
||||
- elasticsearch-setup
|
||||
environment:
|
||||
- DATAHUB_GMS_HOST=datahub-gms
|
||||
- DATAHUB_GMS_PORT=8080
|
||||
- MAE_CONSUMER_ENABLED=true
|
||||
- PE_CONSUMER_ENABLED=true
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- GRAPH_SERVICE_IMPL=elasticsearch
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
hostname: datahub-mae-consumer
|
||||
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
- 9091:9091
|
||||
datahub-mce-consumer:
|
||||
container_name: datahub-mce-consumer
|
||||
depends_on:
|
||||
- kafka-setup
|
||||
- datahub-gms
|
||||
environment:
|
||||
- MCE_CONSUMER_ENABLED=true
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- DATAHUB_GMS_HOST=datahub-gms
|
||||
- DATAHUB_GMS_PORT=8080
|
||||
hostname: datahub-mce-consumer
|
||||
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
- 9090:9090
|
||||
version: '2.3'
|
46
docker/quickstart/docker-compose.consumers.quickstart.yml
Normal file
46
docker/quickstart/docker-compose.consumers.quickstart.yml
Normal file
@ -0,0 +1,46 @@
|
||||
services:
|
||||
datahub-gms:
|
||||
environment:
|
||||
- MAE_CONSUMER_ENABLED=false
|
||||
- MCE_CONSUMER_ENABLED=false
|
||||
datahub-mae-consumer:
|
||||
container_name: datahub-mae-consumer
|
||||
depends_on:
|
||||
- kafka-setup
|
||||
- elasticsearch-setup
|
||||
- neo4j
|
||||
environment:
|
||||
- DATAHUB_GMS_HOST=datahub-gms
|
||||
- DATAHUB_GMS_PORT=8080
|
||||
- MAE_CONSUMER_ENABLED=true
|
||||
- PE_CONSUMER_ENABLED=true
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- ELASTICSEARCH_HOST=elasticsearch
|
||||
- ELASTICSEARCH_PORT=9200
|
||||
- NEO4J_HOST=http://neo4j:7474
|
||||
- NEO4J_URI=bolt://neo4j
|
||||
- NEO4J_USERNAME=neo4j
|
||||
- NEO4J_PASSWORD=datahub
|
||||
- GRAPH_SERVICE_IMPL=neo4j
|
||||
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-mae-consumer/resources/entity-registry.yml
|
||||
hostname: datahub-mae-consumer
|
||||
image: linkedin/datahub-mae-consumer:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
- 9091:9091
|
||||
datahub-mce-consumer:
|
||||
container_name: datahub-mce-consumer
|
||||
depends_on:
|
||||
- kafka-setup
|
||||
- datahub-gms
|
||||
environment:
|
||||
- MCE_CONSUMER_ENABLED=true
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
- DATAHUB_GMS_HOST=datahub-gms
|
||||
- DATAHUB_GMS_PORT=8080
|
||||
hostname: datahub-mce-consumer
|
||||
image: linkedin/datahub-mce-consumer:${DATAHUB_VERSION:-head}
|
||||
ports:
|
||||
- 9090:9090
|
||||
version: '2.3'
|
@ -15,6 +15,8 @@ pip install -r requirements.txt
|
||||
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml temp.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml temp-without-neo4j.quickstart.yml
|
||||
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml temp.monitoring.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose.consumers.yml temp.consumers.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml temp.consumers-without-neo4j.quickstart.yml
|
||||
|
||||
for flavour in "${FLAVOURS[@]}"
|
||||
do
|
||||
|
@ -12,3 +12,5 @@ pip install -r requirements.txt
|
||||
python generate_docker_quickstart.py ../docker-compose.yml ../docker-compose.override.yml docker-compose.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose-without-neo4j.yml ../docker-compose-without-neo4j.override.yml docker-compose-without-neo4j.quickstart.yml
|
||||
python generate_docker_quickstart.py ../monitoring/docker-compose.monitoring.yml docker-compose.monitoring.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose.consumers.yml docker-compose.consumers.quickstart.yml
|
||||
python generate_docker_quickstart.py ../docker-compose.consumers-without-neo4j.yml docker-compose.consumers-without-neo4j.quickstart.yml
|
||||
|
@ -36,10 +36,17 @@ ELASTIC_QUICKSTART_COMPOSE_FILE = (
|
||||
M1_QUICKSTART_COMPOSE_FILE = (
|
||||
"docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml"
|
||||
)
|
||||
CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
||||
"docker/quickstart/docker-compose.consumers.quickstart.yml"
|
||||
)
|
||||
ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE = (
|
||||
"docker/quickstart/docker-compose.consumers-without-neo4j.quickstart.yml"
|
||||
)
|
||||
|
||||
BOOTSTRAP_MCES_FILE = "metadata-ingestion/examples/mce_files/bootstrap_mce.json"
|
||||
|
||||
GITHUB_BASE_URL = "https://raw.githubusercontent.com/datahub-project/datahub/master"
|
||||
|
||||
GITHUB_NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_URL = (
|
||||
f"{GITHUB_BASE_URL}/{NEO4J_AND_ELASTIC_QUICKSTART_COMPOSE_FILE}"
|
||||
)
|
||||
@ -188,7 +195,8 @@ def _attempt_stop(quickstart_compose_file: List[pathlib.Path]) -> None:
|
||||
if compose_files_for_stopping:
|
||||
# docker-compose stop
|
||||
base_command: List[str] = [
|
||||
"docker-compose",
|
||||
"docker",
|
||||
"compose",
|
||||
*itertools.chain.from_iterable(
|
||||
("-f", f"{path}") for path in compose_files_for_stopping
|
||||
),
|
||||
@ -473,6 +481,13 @@ DATAHUB_MAE_CONSUMER_PORT=9091
|
||||
default=False,
|
||||
help="Disables the restoration of indices of a running quickstart instance when used in conjunction with --restore.",
|
||||
)
|
||||
@click.option(
|
||||
"--standalone_consumers",
|
||||
required=False,
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Launches MAE & MCE consumers as stand alone docker containers",
|
||||
)
|
||||
@upgrade.check_upgrade
|
||||
@telemetry.with_telemetry
|
||||
def quickstart(
|
||||
@ -493,6 +508,7 @@ def quickstart(
|
||||
restore_file: str,
|
||||
restore_indices: bool,
|
||||
no_restore_indices: bool,
|
||||
standalone_consumers: bool,
|
||||
) -> None:
|
||||
"""Start an instance of DataHub locally using docker-compose.
|
||||
|
||||
@ -570,6 +586,32 @@ def quickstart(
|
||||
tmp_file.write(quickstart_download_response.content)
|
||||
logger.debug(f"Copied to {path}")
|
||||
|
||||
if standalone_consumers:
|
||||
consumer_github_file = (
|
||||
f"{GITHUB_BASE_URL}/{CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
||||
if should_use_neo4j
|
||||
else f"{GITHUB_BASE_URL}/{ELASTIC_CONSUMERS_QUICKSTART_COMPOSE_FILE}"
|
||||
)
|
||||
|
||||
default_consumer_compose_file = (
|
||||
Path(DATAHUB_ROOT_FOLDER) / "quickstart/docker-compose.consumers.yml"
|
||||
)
|
||||
with open(
|
||||
default_consumer_compose_file, "wb"
|
||||
) if default_consumer_compose_file else tempfile.NamedTemporaryFile(
|
||||
suffix=".yml", delete=False
|
||||
) as tmp_file:
|
||||
path = pathlib.Path(tmp_file.name)
|
||||
quickstart_compose_file.append(path)
|
||||
click.echo(
|
||||
f"Fetching consumer docker-compose file {consumer_github_file} from GitHub"
|
||||
)
|
||||
# Download the quickstart docker-compose file from GitHub.
|
||||
quickstart_download_response = requests.get(consumer_github_file)
|
||||
quickstart_download_response.raise_for_status()
|
||||
tmp_file.write(quickstart_download_response.content)
|
||||
logger.debug(f"Copied to {path}")
|
||||
|
||||
# set version
|
||||
_set_environment_variables(
|
||||
version=version,
|
||||
@ -581,7 +623,8 @@ def quickstart(
|
||||
)
|
||||
|
||||
base_command: List[str] = [
|
||||
"docker-compose",
|
||||
"docker",
|
||||
"compose",
|
||||
*itertools.chain.from_iterable(
|
||||
("-f", f"{path}") for path in quickstart_compose_file
|
||||
),
|
||||
@ -597,7 +640,7 @@ def quickstart(
|
||||
)
|
||||
except subprocess.CalledProcessError:
|
||||
click.secho(
|
||||
"Error while pulling images. Going to attempt to move on to docker-compose up assuming the images have "
|
||||
"Error while pulling images. Going to attempt to move on to docker compose up assuming the images have "
|
||||
"been built locally",
|
||||
fg="red",
|
||||
)
|
||||
@ -623,7 +666,7 @@ def quickstart(
|
||||
up_interval = datetime.timedelta(seconds=30)
|
||||
up_attempts = 0
|
||||
while (datetime.datetime.now() - start_time) < max_wait_time:
|
||||
# Attempt to run docker-compose up every minute.
|
||||
# Attempt to run docker compose up every minute.
|
||||
if (datetime.datetime.now() - start_time) > up_attempts * up_interval:
|
||||
click.echo()
|
||||
subprocess.run(base_command + ["up", "-d", "--remove-orphans"])
|
||||
@ -651,7 +694,7 @@ def quickstart(
|
||||
|
||||
if dump_logs_on_failure:
|
||||
with open(log_file.name, "r") as logs:
|
||||
click.echo("Dumping docker-compose logs:")
|
||||
click.echo("Dumping docker compose logs:")
|
||||
click.echo(logs.read())
|
||||
click.echo()
|
||||
|
||||
|
@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka;
|
||||
|
||||
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration;
|
||||
@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType;
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
|
||||
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class})
|
||||
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class,
|
||||
SolrHealthContributorAutoConfiguration.class})
|
||||
@ComponentScan(excludeFilters = {
|
||||
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)})
|
||||
public class MaeConsumerApplication {
|
||||
|
@ -2,6 +2,7 @@ package com.linkedin.metadata.kafka;
|
||||
|
||||
import com.linkedin.gms.factory.telemetry.ScheduledAnalyticsFactory;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.actuate.autoconfigure.solr.SolrHealthContributorAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.autoconfigure.cassandra.CassandraAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.elasticsearch.ElasticsearchRestClientAutoConfiguration;
|
||||
@ -10,7 +11,8 @@ import org.springframework.context.annotation.FilterType;
|
||||
|
||||
|
||||
@SuppressWarnings("checkstyle:HideUtilityClassConstructor")
|
||||
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class})
|
||||
@SpringBootApplication(exclude = {ElasticsearchRestClientAutoConfiguration.class, CassandraAutoConfiguration.class,
|
||||
SolrHealthContributorAutoConfiguration.class})
|
||||
@ComponentScan(excludeFilters = {
|
||||
@ComponentScan.Filter(type = FilterType.ASSIGNABLE_TYPE, classes = ScheduledAnalyticsFactory.class)})
|
||||
public class MceConsumerApplication {
|
||||
|
@ -21,6 +21,7 @@ pip install -r requirements.txt
|
||||
|
||||
echo "DATAHUB_VERSION = $DATAHUB_VERSION"
|
||||
DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --quickstart-compose-file ../docker/quickstart/docker-compose-without-neo4j.quickstart.yml --dump-logs-on-failure
|
||||
#DATAHUB_TELEMETRY_ENABLED=false datahub docker quickstart --standalone_consumers --build-locally --dump-logs-on-failure
|
||||
|
||||
(cd ..; ./gradlew :smoke-test:yarnInstall)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user