mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(ingest): switch quickstart to Python ingestion (#2158)
This commit is contained in:
parent
6c7668115f
commit
dced25fef7
@ -1,19 +0,0 @@
|
|||||||
# Defining environment
|
|
||||||
ARG APP_ENV=prod
|
|
||||||
|
|
||||||
FROM openjdk:8-jre-alpine as base
|
|
||||||
|
|
||||||
FROM openjdk:8 as prod-build
|
|
||||||
COPY . datahub-src
|
|
||||||
RUN cd datahub-src && ./gradlew :metadata-ingestion-examples:mce-cli:build
|
|
||||||
|
|
||||||
FROM base as prod-install
|
|
||||||
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/build/libs/mce-cli.jar /datahub/ingestion/bin/mce-cli.jar
|
|
||||||
COPY --from=prod-build datahub-src/metadata-ingestion-examples/mce-cli/example-bootstrap.json /datahub/ingestion/example-bootstrap.json
|
|
||||||
|
|
||||||
FROM base as dev-install
|
|
||||||
# Dummy stage for development. Assumes code is built on your machine and mounted to this image.
|
|
||||||
# See this excellent thread https://github.com/docker/cli/issues/1134
|
|
||||||
|
|
||||||
FROM ${APP_ENV}-install as final
|
|
||||||
CMD java -jar /datahub/ingestion/bin/mce-cli.jar -m produce /datahub/ingestion/example-bootstrap.json
|
|
@ -1,5 +1,3 @@
|
|||||||
# DataHub MetadataChangeEvent (MCE) Ingestion Docker Image
|
# DataHub Ingestion Quickstart
|
||||||
|
|
||||||
Ingests data into [GMA](../../docs/what/gma.md) using the [example ingestion CLI](
|
Ingests some [sample data](../../metadata-ingestion/examples/mce_files/bootstrap_mce.json) into [GMA](../../docs/what/gma.md) using the [Python ingestion framework](../../metadata-ingestion). Recommended for use with [quickstart](../../docs/quickstart.md) to quickly get a DataHub instance up and running with some fake data.
|
||||||
../../metadata-ingestion-examples/mce-cli/README.md). Recommended to use with [quickstart](../../docs/quickstart.md) to
|
|
||||||
quickly get a DataHub instance up and running with some fake data.
|
|
||||||
|
@ -1,18 +0,0 @@
|
|||||||
---
|
|
||||||
version: '3.5'
|
|
||||||
services:
|
|
||||||
ingestion:
|
|
||||||
image: datahub-ingestion:debug
|
|
||||||
env_file: env/docker.env
|
|
||||||
build:
|
|
||||||
context: .
|
|
||||||
dockerfile: Dockerfile
|
|
||||||
args:
|
|
||||||
APP_ENV: dev
|
|
||||||
volumes:
|
|
||||||
- ../../metadata-ingestion-examples/mce-cli/build/libs/:/datahub/ingestion/bin
|
|
||||||
- ../../metadata-ingestion-examples/mce-cli/example-bootstrap.json:/datahub/ingestion/example-bootstrap.json
|
|
||||||
|
|
||||||
networks:
|
|
||||||
default:
|
|
||||||
name: datahub_network
|
|
@ -2,17 +2,18 @@
|
|||||||
version: '3.5'
|
version: '3.5'
|
||||||
services:
|
services:
|
||||||
ingestion:
|
ingestion:
|
||||||
image: datahub-ingestion
|
|
||||||
env_file: env/docker.env
|
|
||||||
build:
|
build:
|
||||||
context: ../../
|
context: ../../
|
||||||
dockerfile: docker/ingestion/Dockerfile
|
dockerfile: docker/datahub-ingestion/Dockerfile
|
||||||
|
image: linkedin/datahub-ingestion:${DATAHUB_VERSION:-latest}
|
||||||
hostname: ingestion
|
hostname: ingestion
|
||||||
container_name: ingestion
|
container_name: ingestion
|
||||||
environment:
|
command: "ingest -c /sample_recipe.yml"
|
||||||
- KAFKA_BOOTSTRAP_SERVER=broker:29092
|
volumes:
|
||||||
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
# Most of the config is embedded inside the sample recipe file.
|
||||||
|
- ./sample_recipe.yml:/sample_recipe.yml:ro
|
||||||
|
- ../../metadata-ingestion/examples/mce_files/bootstrap_mce.json:/bootstrap_mce.json:ro
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
default:
|
default:
|
||||||
name: datahub_network
|
name: datahub_network
|
||||||
|
2
docker/ingestion/env/docker.env
vendored
2
docker/ingestion/env/docker.env
vendored
@ -1,2 +0,0 @@
|
|||||||
KAFKA_BOOTSTRAP_SERVER=broker:29092
|
|
||||||
KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
|
|
@ -1,6 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Runs the ingestion image using your locally built mce-cli. Gradle build must have been run before this script.
|
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
|
||||||
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -f docker-compose.dev.yml -p datahub up
|
|
@ -1,4 +1,4 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
cd $DIR && COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub up --build
|
cd $DIR && docker-compose pull && docker-compose -p datahub up
|
||||||
|
9
docker/ingestion/sample_recipe.yml
Normal file
9
docker/ingestion/sample_recipe.yml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
source:
|
||||||
|
type: "file"
|
||||||
|
config:
|
||||||
|
filename: "/bootstrap_mce.json"
|
||||||
|
|
||||||
|
sink:
|
||||||
|
type: "datahub-rest"
|
||||||
|
config:
|
||||||
|
server: 'http://datahub-gms:8080'
|
@ -52,7 +52,4 @@ Where `my-file.json` is some file that contains a
|
|||||||
|
|
||||||
### Producing the Example Events with Docker
|
### Producing the Example Events with Docker
|
||||||
|
|
||||||
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example or in a
|
We have some example events in the `example-bootstrap.json` file, which can be invoked via the above example.
|
||||||
docker environment using `docker/ingestion/ingestion.sh`. We also have a developer image
|
|
||||||
(`docker/ingestion/ingestion-dev.sh`) which uses your locally built jar rather than building on the docker image itself,
|
|
||||||
which may be faster if you have already built code locally.
|
|
@ -54,9 +54,12 @@ setuptools.setup(
|
|||||||
],
|
],
|
||||||
python_requires=">=3.6",
|
python_requires=">=3.6",
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
packages=setuptools.find_packages(where="./src"),
|
packages=setuptools.find_namespace_packages(where="./src"),
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
package_data={"datahub": ["py.typed"]},
|
package_data={
|
||||||
|
"datahub": ["py.typed"],
|
||||||
|
"datahub.metadata": ["schema.avsc"],
|
||||||
|
},
|
||||||
entry_points={
|
entry_points={
|
||||||
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user