mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(ingest): switch quickstart to Python ingestion (#2158)
This commit is contained in:
parent
6c7668115f
commit
dced25fef7
@ -1,19 +0,0 @@
|
||||
# Defining environment
|
||||
ARG APP_ENV=prod
|
||||
|
||||
FROM openjdk:8-jre-alpine as base
|
||||
|
||||
FROM openjdk:8 as prod-build
|
||||
COPY . datahub-src
|
||||
RUN cd datahub-src && ./gradlew :metadata-ingestion-examples:mce-cli:build
|
||||
|
||||
FROM base as prod-install
|
||||
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/build/libs/mce-cli.jar /datahub/ingestion/bin/mce-cli.jar
|
||||
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/example-bootstrap.json /datahub/ingestion/example-bootstrap.json
|
||||
|
||||
FROM base as dev-install
|
||||
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
|
||||
# See this excellent thread https://github.com/docker/cli/issues/1134
|
||||
|
||||
FROM ${APP_ENV}-install as final
|
||||
CMD java -jar /datahub/ingestion/bin/mce-cli.jar -m produce /datahub/ingestion/example-bootstrap.json
|
@ -1,5 +1,3 @@
|
||||
# DataHub MetadataChangeEvent (MCE) Ingestion Docker Image
|
||||
# DataHub Ingestion Quickstart
|
||||
|
||||
Ingests data into [GMA](../../docs/what/gma.md) using the [example ingestion CLI](
|
||||
../../metadata-ingestion-examples/mce-cli/README.md). Recommended to use with [quickstart](../../docs/quickstart.md) to
|
||||
quickly get a DataHub instance up and running with some fake data.
|
||||
Ingests some [sample data](../../metadata-ingestion/examples/mce_files/bootstrap_mce.json) into [GMA](../../docs/what/gma.md) using the [Python ingestion framework](../../metadata-ingestion). Recommended for use with [quickstart](../../docs/quickstart.md) to quickly get a DataHub instance up and running with some fake data.
|
||||
|
@ -1,18 +0,0 @@
|
||||
---
|
||||
version: '3.5'
|
||||
services:
|
||||
ingestion:
|
||||
image: datahub-ingestion:debug
|
||||
env_file: env/docker.env
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile
|
||||
args:
|
||||
APP_ENV: dev
|
||||
volumes:
|
||||
- ../../metadata-ingestion-examples/mce-cli/build/libs/:/datahub/ingestion/bin
|
||||
- ../../metadata-ingestion-examples/mce-cli/example-bootstrap.json:/datahub/ingestion/example-bootstrap.json
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: datahub_network
|
@ -2,17 +2,18 @@
|
||||
version: '3.5'
|
||||
services:
|
||||
ingestion:
|
||||
image: datahub-ingestion
|
||||
env_file: env/docker.env
|
||||
build:
|
||||
context: ../../
|
||||
dockerfile: docker/ingestion/Dockerfile
|
||||
dockerfile: docker/datahub-ingestion/Dockerfile
|
||||
image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-latest}
|
||||
hostname: ingestion
|
||||
container_name: ingestion
|
||||
environment:
|
||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
||||
command: "ingest -c /sample_recipe.yml"
|
||||
volumes:
|
||||
# Most of the config is embedded inside the sample recipe file.
|
||||
- ./sample_recipe.yml:/sample_recipe.yml:ro
|
||||
- ../../metadata-ingestion/examples/mce_files/bootstrap_mce.json:/bootstrap_mce.json:ro
|
||||
|
||||
networks:
|
||||
default:
|
||||
name: datahub_network
|
||||
name: datahub_network
|
||||
|
2
docker/ingestion/env/docker.env
vendored
2
docker/ingestion/env/docker.env
vendored
@ -1,2 +0,0 @@
|
||||
KAFKA_BOOTSTRAP_SERVER=broker:29092
|
||||
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
@ -1,6 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Runs the ingestion image using your locally built mce-cli. Gradle build must have been run before this script.
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -f docker-compose.dev.yml -p datahub up
|
@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub up --build
|
||||
cd $DIR && docker-compose pull && docker-compose -p datahub up
|
||||
|
9
docker/ingestion/sample_recipe.yml
Normal file
9
docker/ingestion/sample_recipe.yml
Normal file
@ -0,0 +1,9 @@
|
||||
source:
|
||||
type: "file"
|
||||
config:
|
||||
filename: "/bootstrap_mce.json"
|
||||
|
||||
sink:
|
||||
type: "datahub-rest"
|
||||
config:
|
||||
server: 'http://datahub-gms:8080'
|
@ -52,7 +52,4 @@ Where `my-file.json` is some file that contains a
|
||||
|
||||
### Producing the Example Events with Docker
|
||||
|
||||
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example or in a
|
||||
docker environment using `docker/ingestion/ingestion.sh`. We also have a developer image
|
||||
(`docker/ingestion/ingestion-dev.sh`) which uses your locally built jar rather than building on the docker image itself,
|
||||
which may be faster if you have already built code locally.
|
||||
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example.
|
@ -54,9 +54,12 @@ setuptools.setup(
|
||||
],
|
||||
python_requires=">=3.6",
|
||||
package_dir={"": "src"},
|
||||
packages=setuptools.find_packages(where="./src"),
|
||||
packages=setuptools.find_namespace_packages(where="./src"),
|
||||
include_package_data=True,
|
||||
package_data={"datahub": ["py.typed"]},
|
||||
package_data={
|
||||
"datahub": ["py.typed"],
|
||||
"datahub.metadata": ["schema.avsc"],
|
||||
},
|
||||
entry_points={
|
||||
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user