feat(ingest): switch quickstart to Python ingestion (#2158)

This commit is contained in:
Harshal Sheth 2021-03-02 11:48:26 -08:00 committed by GitHub
parent 6c7668115f
commit dced25fef7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 26 additions and 63 deletions

View File

@ -1,19 +0,0 @@
# Defining environment
ARG APP_ENV=prod
FROM openjdk:8-jre-alpine as base
FROM openjdk:8 as prod-build
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-ingestion-examples:mce-cli:build
FROM base as prod-install
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/build/libs/mce-cli.jar /datahub/ingestion/bin/mce-cli.jar
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/example-bootstrap.json /datahub/ingestion/example-bootstrap.json
FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
FROM ${APP_ENV}-install as final
CMD java -jar /datahub/ingestion/bin/mce-cli.jar -m produce /datahub/ingestion/example-bootstrap.json

View File

@ -1,5 +1,3 @@
# DataHub MetadataChangeEvent (MCE) Ingestion Docker Image # DataHub Ingestion Quickstart
Ingests data into [GMA](../../docs/what/gma.md) using the [example ingestion CLI]( Ingests some [sample data](../../metadata-ingestion/examples/mce_files/bootstrap_mce.json) into [GMA](../../docs/what/gma.md) using the [Python ingestion framework](../../metadata-ingestion). Recommended for use with [quickstart](../../docs/quickstart.md) to quickly get a DataHub instance up and running with some fake data.
../../metadata-ingestion-examples/mce-cli/README.md). Recommended to use with [quickstart](../../docs/quickstart.md) to
quickly get a DataHub instance up and running with some fake data.

View File

@ -1,18 +0,0 @@
---
version: '3.5'
services:
ingestion:
image: datahub-ingestion:debug
env_file: env/docker.env
build:
context: .
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ../../metadata-ingestion-examples/mce-cli/build/libs/:/datahub/ingestion/bin
- ../../metadata-ingestion-examples/mce-cli/example-bootstrap.json:/datahub/ingestion/example-bootstrap.json
networks:
default:
name: datahub_network

View File

@ -2,17 +2,18 @@
version: '3.5' version: '3.5'
services: services:
ingestion: ingestion:
image: datahub-ingestion
env_file: env/docker.env
build: build:
context: ../../ context: ../../
dockerfile: docker/ingestion/Dockerfile dockerfile: docker/datahub-ingestion/Dockerfile
image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-latest}
hostname: ingestion hostname: ingestion
container_name: ingestion container_name: ingestion
environment: command: "ingest -c /sample_recipe.yml"
- KAFKA_BOOTSTRAP_SERVER=broker:29092 volumes:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 # Most of the config is embedded inside the sample recipe file.
- ./sample_recipe.yml:/sample_recipe.yml:ro
- ../../metadata-ingestion/examples/mce_files/bootstrap_mce.json:/bootstrap_mce.json:ro
networks: networks:
default: default:
name: datahub_network name: datahub_network

View File

@ -1,2 +0,0 @@
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081

View File

@ -1,6 +0,0 @@
#!/bin/bash
# Runs the ingestion image using your locally built mce-cli. Gradle build must have been run before this script.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -f docker-compose.dev.yml -p datahub up

View File

@ -1,4 +1,4 @@
#!/bin/bash #!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub up --build cd $DIR && docker-compose pull && docker-compose -p datahub up

View File

@ -0,0 +1,9 @@
source:
type: "file"
config:
filename: "/bootstrap_mce.json"
sink:
type: "datahub-rest"
config:
server: 'http://datahub-gms:8080'

View File

@ -52,7 +52,4 @@ Where `my-file.json` is some file that contains a
### Producing the Example Events with Docker ### Producing the Example Events with Docker
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example or in a We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example.
docker environment using `docker/ingestion/ingestion.sh`. We also have a developer image
(`docker/ingestion/ingestion-dev.sh`) which uses your locally built jar rather than building on the docker image itself,
which may be faster if you have already built code locally.

View File

@ -54,9 +54,12 @@ setuptools.setup(
], ],
python_requires=">=3.6", python_requires=">=3.6",
package_dir={"": "src"}, package_dir={"": "src"},
packages=setuptools.find_packages(where="./src"), packages=setuptools.find_namespace_packages(where="./src"),
include_package_data=True, include_package_data=True,
package_data={"datahub": ["py.typed"]}, package_data={
"datahub": ["py.typed"],
"datahub.metadata": ["schema.avsc"],
},
entry_points={ entry_points={
"console_scripts": ["datahub = datahub.entrypoints:datahub"], "console_scripts": ["datahub = datahub.entrypoints:datahub"],
}, },