diff --git a/docker/ingestion/Dockerfile b/docker/ingestion/Dockerfile deleted file mode 100644 index d3d875c5dd..0000000000 --- a/docker/ingestion/Dockerfile +++ /dev/null @@ -1,19 +0,0 @@ -# Defining environment -ARG APP_ENV=prod - -FROM openjdk:8-jre-alpine as base - -FROM openjdk:8 as prod-build -COPY . datahub-src -RUN cd datahub-src && ./gradlew :metadata-ingestion-examples:mce-cli:build - -FROM base as prod-install -COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/build/libs/mce-cli.jar /datahub/ingestion/bin/mce-cli.jar -COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/example-bootstrap.json /datahub/ingestion/example-bootstrap.json - -FROM base as dev-install -# Dummy stage for development. Assumes code is built on your machine and mounted to this image. -# See this excellent thread https://github.com/docker/cli/issues/1134 - -FROM ${APP_ENV}-install as final -CMD java -jar /datahub/ingestion/bin/mce-cli.jar -m produce /datahub/ingestion/example-bootstrap.json \ No newline at end of file diff --git a/docker/ingestion/README.md b/docker/ingestion/README.md index 8776019cb5..677e2b8ff8 100644 --- a/docker/ingestion/README.md +++ b/docker/ingestion/README.md @@ -1,5 +1,3 @@ -# DataHub MetadataChangeEvent (MCE) Ingestion Docker Image +# DataHub Ingestion Quickstart -Ingests data into [GMA](../../docs/what/gma.md) using the [example ingestion CLI]( -../../metadata-ingestion-examples/mce-cli/README.md). Recommended to use with [quickstart](../../docs/quickstart.md) to -quickly get a DataHub instance up and running with some fake data. +Ingests some [sample data](../../metadata-ingestion/examples/mce_files/bootstrap_mce.json) into [GMA](../../docs/what/gma.md) using the [Python ingestion framework](../../metadata-ingestion). Recommended for use with [quickstart](../../docs/quickstart.md) to quickly get a DataHub instance up and running with some fake data. diff --git a/docker/ingestion/docker-compose.dev.yml b/docker/ingestion/docker-compose.dev.yml deleted file mode 100644 index 39ce3ab2a2..0000000000 --- a/docker/ingestion/docker-compose.dev.yml +++ /dev/null @@ -1,18 +0,0 @@ ---- -version: '3.5' -services: - ingestion: - image: datahub-ingestion:debug - env_file: env/docker.env - build: - context: . - dockerfile: Dockerfile - args: - APP_ENV: dev - volumes: - - ../../metadata-ingestion-examples/mce-cli/build/libs/:/datahub/ingestion/bin - - ../../metadata-ingestion-examples/mce-cli/example-bootstrap.json:/datahub/ingestion/example-bootstrap.json - -networks: - default: - name: datahub_network \ No newline at end of file diff --git a/docker/ingestion/docker-compose.yml b/docker/ingestion/docker-compose.yml index 9cedae7e1e..175788a5ab 100644 --- a/docker/ingestion/docker-compose.yml +++ b/docker/ingestion/docker-compose.yml @@ -2,17 +2,18 @@ version: '3.5' services: ingestion: - image: datahub-ingestion - env_file: env/docker.env build: context: ../../ - dockerfile: docker/ingestion/Dockerfile + dockerfile: docker/datahub-ingestion/Dockerfile + image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-latest} hostname: ingestion container_name: ingestion - environment: - - KAFKA_BOOTSTRAP_SERVER=broker:29092 - - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 + command: "ingest -c /sample_recipe.yml" + volumes: + # Most of the config is embedded inside the sample recipe file. + - ./sample_recipe.yml:/sample_recipe.yml:ro + - ../../metadata-ingestion/examples/mce_files/bootstrap_mce.json:/bootstrap_mce.json:ro networks: default: - name: datahub_network \ No newline at end of file + name: datahub_network diff --git a/docker/ingestion/env/docker.env b/docker/ingestion/env/docker.env deleted file mode 100644 index 0e2f58bf87..0000000000 --- a/docker/ingestion/env/docker.env +++ /dev/null @@ -1,2 +0,0 @@ -KAFKA_BOOTSTRAP_SERVER=broker:29092 -KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 diff --git a/docker/ingestion/ingestion-dev.sh b/docker/ingestion/ingestion-dev.sh deleted file mode 100755 index 72072156bf..0000000000 --- a/docker/ingestion/ingestion-dev.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -# Runs the ingestion image using your locally built mce-cli. Gradle build must have been run before this script. - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -f docker-compose.dev.yml -p datahub up \ No newline at end of file diff --git a/docker/ingestion/ingestion.sh b/docker/ingestion/ingestion.sh index 339158a6cc..2ac49f6e7e 100755 --- a/docker/ingestion/ingestion.sh +++ b/docker/ingestion/ingestion.sh @@ -1,4 +1,4 @@ #!/bin/bash DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub up --build \ No newline at end of file +cd $DIR && docker-compose pull && docker-compose -p datahub up diff --git a/docker/ingestion/sample_recipe.yml b/docker/ingestion/sample_recipe.yml new file mode 100644 index 0000000000..e1315bb87b --- /dev/null +++ b/docker/ingestion/sample_recipe.yml @@ -0,0 +1,9 @@ +source: + type: "file" + config: + filename: "/bootstrap_mce.json" + +sink: + type: "datahub-rest" + config: + server: 'http://datahub-gms:8080' diff --git a/metadata-ingestion-examples/mce-cli/README.md b/metadata-ingestion-examples/mce-cli/README.md index 5ada2b8b1c..de524ad19f 100644 --- a/metadata-ingestion-examples/mce-cli/README.md +++ b/metadata-ingestion-examples/mce-cli/README.md @@ -52,7 +52,4 @@ Where `my-file.json` is some file that contains a ### Producing the Example Events with Docker -We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example or in a -docker environment using `docker/ingestion/ingestion.sh`. We also have a developer image -(`docker/ingestion/ingestion-dev.sh`) which uses your locally built jar rather than building on the docker image itself, -which may be faster if you have already built code locally. \ No newline at end of file +We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example. \ No newline at end of file diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index ff8513708f..4d957038a2 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -54,9 +54,12 @@ setuptools.setup( ], python_requires=">=3.6", package_dir={"": "src"}, - packages=setuptools.find_packages(where="./src"), + packages=setuptools.find_namespace_packages(where="./src"), include_package_data=True, - package_data={"datahub": ["py.typed"]}, + package_data={ + "datahub": ["py.typed"], + "datahub.metadata": ["schema.avsc"], + }, entry_points={ "console_scripts": ["datahub = datahub.entrypoints:datahub"], }, diff --git a/metadata-ingestion/src/datahub/ingestion/extractor/__init__.py b/metadata-ingestion/src/datahub/ingestion/extractor/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata-ingestion/src/datahub/ingestion/sink/__init__.py b/metadata-ingestion/src/datahub/ingestion/sink/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/metadata-ingestion/src/datahub/ingestion/source/__init__.py b/metadata-ingestion/src/datahub/ingestion/source/__init__.py new file mode 100644 index 0000000000..e69de29bb2