feat(ingest): switch quickstart to Python ingestion (#2158)

This commit is contained in:
Harshal Sheth 2021-03-02 11:48:26 -08:00 committed by GitHub
parent 6c7668115f
commit dced25fef7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 26 additions and 63 deletions

View File

@ -1,19 +0,0 @@
# Defining environment
ARG APP_ENV=prod
FROM openjdk:8-jre-alpine as base
FROM openjdk:8 as prod-build
COPY . datahub-src
RUN cd datahub-src && ./gradlew :metadata-ingestion-examples:mce-cli:build
FROM base as prod-install
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/build/libs/mce-cli.jar /datahub/ingestion/bin/mce-cli.jar
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/example-bootstrap.json /datahub/ingestion/example-bootstrap.json
FROM base as dev-install
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
# See this excellent thread https://github.com/docker/cli/issues/1134
FROM ${APP_ENV}-install as final
CMD java -jar /datahub/ingestion/bin/mce-cli.jar -m produce /datahub/ingestion/example-bootstrap.json

View File

@ -1,5 +1,3 @@
# DataHub MetadataChangeEvent (MCE) Ingestion Docker Image
# DataHub Ingestion Quickstart
Ingests data into [GMA](../../docs/what/gma.md) using the [example ingestion CLI](
../../metadata-ingestion-examples/mce-cli/README.md). Recommended to use with [quickstart](../../docs/quickstart.md) to
quickly get a DataHub instance up and running with some fake data.
Ingests some [sample data](../../metadata-ingestion/examples/mce_files/bootstrap_mce.json) into [GMA](../../docs/what/gma.md) using the [Python ingestion framework](../../metadata-ingestion). Recommended for use with [quickstart](../../docs/quickstart.md) to quickly get a DataHub instance up and running with some fake data.

View File

@ -1,18 +0,0 @@
---
version: '3.5'
services:
ingestion:
image: datahub-ingestion:debug
env_file: env/docker.env
build:
context: .
dockerfile: Dockerfile
args:
APP_ENV: dev
volumes:
- ../../metadata-ingestion-examples/mce-cli/build/libs/:/datahub/ingestion/bin
- ../../metadata-ingestion-examples/mce-cli/example-bootstrap.json:/datahub/ingestion/example-bootstrap.json
networks:
default:
name: datahub_network

View File

@ -2,17 +2,18 @@
version: '3.5'
services:
ingestion:
image: datahub-ingestion
env_file: env/docker.env
build:
context: ../../
dockerfile: docker/ingestion/Dockerfile
dockerfile: docker/datahub-ingestion/Dockerfile
image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-latest}
hostname: ingestion
container_name: ingestion
environment:
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
command: "ingest -c /sample_recipe.yml"
volumes:
# Most of the config is embedded inside the sample recipe file.
- ./sample_recipe.yml:/sample_recipe.yml:ro
- ../../metadata-ingestion/examples/mce_files/bootstrap_mce.json:/bootstrap_mce.json:ro
networks:
default:
name: datahub_network
name: datahub_network

View File

@ -1,2 +0,0 @@
KAFKA_BOOTSTRAP_SERVER=broker:29092
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081

View File

@ -1,6 +0,0 @@
#!/bin/bash
# Runs the ingestion image using your locally built mce-cli. Gradle build must have been run before this script.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -f docker-compose.dev.yml -p datahub up

View File

@ -1,4 +1,4 @@
#!/bin/bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub up --build
cd $DIR && docker-compose pull && docker-compose -p datahub up

View File

@ -0,0 +1,9 @@
source:
type: "file"
config:
filename: "/bootstrap_mce.json"
sink:
type: "datahub-rest"
config:
server: 'http://datahub-gms:8080'

View File

@ -52,7 +52,4 @@ Where `my-file.json` is some file that contains a
### Producing the Example Events with Docker
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example or in a
docker environment using `docker/ingestion/ingestion.sh`. We also have a developer image
(`docker/ingestion/ingestion-dev.sh`) which uses your locally built jar rather than building on the docker image itself,
which may be faster if you have already built code locally.
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example.

View File

@ -54,9 +54,12 @@ setuptools.setup(
],
python_requires=">=3.6",
package_dir={"": "src"},
packages=setuptools.find_packages(where="./src"),
packages=setuptools.find_namespace_packages(where="./src"),
include_package_data=True,
package_data={"datahub": ["py.typed"]},
package_data={
"datahub": ["py.typed"],
"datahub.metadata": ["schema.avsc"],
},
entry_points={
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
},