feat: Refactor openmetadata/ingestion docker image (#7456)

* centralize openmetadata/ingestion docker image

* update volume mappings; fix ingestion tag!

* fix PR comments

* fix cypress mysql tests!
This commit is contained in:
Akash Jain 2022-09-19 09:20:54 +05:30 committed by GitHub
parent 78bd0c40c6
commit fc8312c5fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 153 additions and 2405 deletions

View File

@ -1,56 +0,0 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: docker-openmetadata-airflow docker
on:
workflow_dispatch:
inputs:
tag:
description: "Input tag"
required: true
release:
types: [published]
jobs:
push_to_docker_hub:
runs-on: ubuntu-latest
env:
input: ${{ github.event.inputs.tag }}
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
run: echo "input=0.12.0" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
password: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v2
with:
context: .
platforms: linux/amd64,linux/arm64
push: ${{ github.event_name == 'release' }}
# Update tags before every release
tags: 'openmetadata/airflow:${{ env.input }},openmetadata/airflow:latest'
file: ./docker/airflow/Dockerfile

View File

@ -32,8 +32,7 @@ services:
ports: ports:
- "5432:5432" - "5432:5432"
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.10
healthcheck: healthcheck:
test: psql -U postgres -tAc 'select 1' -d openmetadata_db test: psql -U postgres -tAc 'select 1' -d openmetadata_db
interval: 15s interval: 15s
@ -47,8 +46,7 @@ services:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m - ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.11
expose: expose:
- 9200 - 9200
- 9300 - 9300
@ -121,9 +119,6 @@ services:
expose: expose:
- 8585 - 8585
- 8586 - 8586
- 9200
- 9300
- 5432
ports: ports:
- "8585:8585" - "8585:8585"
- "8586:8586" - "8586:8586"
@ -133,15 +128,14 @@ services:
postgresql: postgresql:
condition: service_healthy condition: service_healthy
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.13
healthcheck: healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion: ingestion:
build: build:
context: ../../. context: ../../.
dockerfile: ingestion/Dockerfile_local dockerfile: ingestion/Dockerfile.ci
args: args:
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all} INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
container_name: openmetadata_ingestion container_name: openmetadata_ingestion
@ -153,12 +147,22 @@ services:
openmetadata-server: openmetadata-server:
condition: service_healthy condition: service_healthy
environment: environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-postgresql} DB_HOST: ${DB_HOST:-postgresql}
DB_PORT: ${DB_PORT:-5432} DB_PORT: ${DB_PORT:-5432}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
DB_USER: ${DB_USER:-airflow_user} DB_USER: ${DB_USER:-airflow_user}
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2} DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass} DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose: expose:
- 8080 - 8080
ports: ports:
@ -166,9 +170,8 @@ services:
networks: networks:
- local_app_net - local_app_net
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs - ingestion-volume-dags:/opt/airflow/airflow/dags
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-tmp:/tmp - ingestion-volume-tmp:/tmp
networks: networks:

View File

@ -31,8 +31,7 @@ services:
ports: ports:
- "3306:3306" - "3306:3306"
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.10
healthcheck: healthcheck:
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db" test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
interval: 15s interval: 15s
@ -46,8 +45,7 @@ services:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m - ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.11
expose: expose:
- 9200 - 9200
- 9300 - 9300
@ -119,9 +117,6 @@ services:
expose: expose:
- 8585 - 8585
- 8586 - 8586
- 9200
- 9300
- 3306
ports: ports:
- "8585:8585" - "8585:8585"
- "8586:8586" - "8586:8586"
@ -131,18 +126,34 @@ services:
mysql: mysql:
condition: service_healthy condition: service_healthy
networks: networks:
local_app_net: - local_app_net
ipv4_address: 172.16.239.13
healthcheck: healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion: ingestion:
build: build:
context: ../../. context: ../../.
dockerfile: ingestion/Dockerfile_local dockerfile: ingestion/Dockerfile.ci
args: args:
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all} INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
container_name: openmetadata_ingestion container_name: openmetadata_ingestion
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-mysql}
DB_PORT: ${DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
depends_on: depends_on:
elasticsearch: elasticsearch:
condition: service_started condition: service_started
@ -157,9 +168,8 @@ services:
networks: networks:
- local_app_net - local_app_net
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock - ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs - ingestion-volume-dags:/opt/airflow/airflow/dags
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-tmp:/tmp - ingestion-volume-tmp:/tmp
networks: networks:

View File

@ -28,8 +28,7 @@ services:
ports: ports:
- "5432:5432" - "5432:5432"
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.10
healthcheck: healthcheck:
test: psql -U postgres -tAc 'select 1' -d openmetadata_db test: psql -U postgres -tAc 'select 1' -d openmetadata_db
interval: 15s interval: 15s
@ -43,8 +42,7 @@ services:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m - ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.11
ports: ports:
- "9200:9200" - "9200:9200"
- "9300:9300" - "9300:9300"
@ -52,7 +50,7 @@ services:
openmetadata-server: openmetadata-server:
container_name: openmetadata_server container_name: openmetadata_server
restart: always restart: always
image: openmetadata/server:0.11.4 image: openmetadata/server:0.12.0
environment: environment:
ELASTICSEARCH_HOST: elasticsearch ELASTICSEARCH_HOST: elasticsearch
# OpenMetadata Server Authentication Configuration # OpenMetadata Server Authentication Configuration
@ -111,9 +109,6 @@ services:
expose: expose:
- 8585 - 8585
- 8586 - 8586
- 9200
- 9300
- 5432
ports: ports:
- "8585:8585" - "8585:8585"
- "8586:8586" - "8586:8586"
@ -123,17 +118,13 @@ services:
postgresql: postgresql:
condition: service_healthy condition: service_healthy
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.13
extra_hosts:
- "postgresql:172.16.240.10"
- "elasticsearch:172.16.240.11"
healthcheck: healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion: ingestion:
container_name: openmetadata_ingestion container_name: openmetadata_ingestion
image: openmetadata/ingestion:0.11.4 image: openmetadata/ingestion:0.12.0
depends_on: depends_on:
elasticsearch: elasticsearch:
condition: service_started condition: service_started
@ -142,26 +133,31 @@ services:
openmetadata-server: openmetadata-server:
condition: service_healthy condition: service_healthy
environment: environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-postgresql} DB_HOST: ${DB_HOST:-postgresql}
DB_PORT: ${DB_PORT:-5432} DB_PORT: ${DB_PORT:-5432}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-postgresql+psycopg2}
DB_USER: ${DB_USER:-airflow_user} DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2} DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose: expose:
- 8080 - 8080
ports: ports:
- "8080:8080" - "8080:8080"
networks: networks:
- app_net - app_net
extra_hosts:
- "postgresql:172.16.240.10"
- "localhost:172.16.240.11"
- "localhost:172.16.240.13"
volumes: volumes:
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs - ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags - ingestion-volume-dags:/opt/airflow/dags
- ingestion-volume-tmp:/tmp - ingestion-volume-tmp:/tmp
networks: networks:

View File

@ -25,8 +25,7 @@ services:
expose: expose:
- 3306 - 3306
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.10
healthcheck: healthcheck:
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db" test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
interval: 15s interval: 15s
@ -40,8 +39,7 @@ services:
- discovery.type=single-node - discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m - ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.11
ports: ports:
- "9200:9200" - "9200:9200"
- "9300:9300" - "9300:9300"
@ -120,11 +118,7 @@ services:
mysql: mysql:
condition: service_healthy condition: service_healthy
networks: networks:
app_net: - app_net
ipv4_address: 172.16.240.13
extra_hosts:
- "localhost:172.16.240.10"
- "elasticsearch:172.16.240.11"
healthcheck: healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
@ -139,25 +133,31 @@ services:
openmetadata-server: openmetadata-server:
condition: service_healthy condition: service_healthy
environment: environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-mysql} DB_HOST: ${DB_HOST:-mysql}
DB_PORT: ${DB_PORT:-3306} DB_PORT: ${DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql} AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
DB_USER: ${DB_USER:-airflow_user} DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass} DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose: expose:
- 8080 - 8080
ports: ports:
- "8080:8080" - "8080:8080"
networks: networks:
- app_net - app_net
extra_hosts:
- "localhost:172.16.240.10"
- "localhost:172.16.240.11"
- "localhost:172.16.240.13"
volumes: volumes:
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs - ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags - ingestion-volume-dags:/opt/airflow/dags
- ingestion-volume-tmp:/tmp - ingestion-volume-tmp:/tmp
networks: networks:

View File

@ -1,58 +1,54 @@
FROM python:3.9-slim as base FROM apache/airflow:2.3.3-python3.9
ENV AIRFLOW_HOME=/airflow USER root
RUN apt-get update && \ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \ RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
rm -rf /var/lib/apt/lists/* # Install Dependencies (listed in alphabetical order)
RUN apt-get update \
# Manually fix security vulnerability from curl && apt-get install -y build-essential \
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229 default-libmysqlclient-dev \
# Add it back to the usual apt-get install once a fix for Debian is released freetds-bin \
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \ freetds-dev \
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \ gcc \
./configure --with-openssl && make && make install gnupg \
libevent-dev \
libffi-dev \
FROM base as airflow libpq-dev \
ENV AIRFLOW_VERSION=2.3.3 librdkafka-dev \
libsasl2-dev \
# install odbc driver libsasl2-modules \
RUN apt-get update && \ libssl-dev \
apt-get install -y gnupg && \ libxml2 \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ openjdk-11-jre \
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ openssl \
apt-get update && \ postgresql \
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \ postgresql-contrib \
rm -rf /var/lib/apt/lists/* tdsodbc \
unixodbc \
unixodbc-dev \
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt" wget --no-install-recommends \
# Add docker provider for the DockerOperator # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/*
# Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data
COPY --chown=airflow:airflow ingestion/examples/sample_data /home/airflow/ingestion/examples/sample_data
# Required for Airflow DAGs of Sample Data
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow
# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}]
FROM airflow as apis
WORKDIR /openmetadata-airflow-apis
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
RUN pip install "."
FROM apis as ingestion
WORKDIR /ingestion
COPY ingestion /ingestion
ARG INGESTION_DEPENDENCY=all
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
# Uninstalling psycopg2-binary and installing psycopg2 instead # Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error # because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error # while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 RUN pip install psycopg2 mysqlclient
# Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs
RUN airflow db init # This is required as it's responsible to create airflow.cfg file
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg RUN airflow db init && rm -f /opt/airflow/airflow.db
RUN chmod 755 ingestion_dependency.sh
EXPOSE 8080
CMD [ "./ingestion_dependency.sh" ]

View File

@ -5,39 +5,61 @@ RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/so
# Install Dependencies (listed in alphabetical order) # Install Dependencies (listed in alphabetical order)
RUN apt-get update \ RUN apt-get update \
&& apt-get install -y build-essential \ && apt-get install -y build-essential \
default-libmysqlclient-dev \
freetds-bin \ freetds-bin \
freetds-dev \ freetds-dev \
gcc \ gcc \
gnupg \ gnupg \
libevent-dev \ libevent-dev \
libffi-dev \ libffi-dev \
default-libmysqlclient-dev \
libpq-dev \ libpq-dev \
librdkafka-dev \ librdkafka-dev \
libsasl2-dev \ libsasl2-dev \
libsasl2-modules \ libsasl2-modules \
libssl-dev \ libssl-dev \
libxml2 \ libxml2 \
netcat \
openjdk-11-jre \ openjdk-11-jre \
openssl \ openssl \
postgresql \ postgresql \
postgresql-contrib \ postgresql-contrib \
tdsodbc \ tdsodbc \
unixodbc \ unixodbc \
unixodbc-dev --no-install-recommends \ unixodbc-dev \
vim \
wget --no-install-recommends \
# Accept MSSQL ODBC License # Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ && ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data
COPY --chown=airflow:airflow ingestion /home/airflow/ingestion
COPY --chown=airflow:airflow openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis
# Required for Airflow DAGs of Sample Data
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow USER airflow
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
# Argument to provide for Ingestion Dependencies to install. Defaults to all # Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade pip RUN pip install --upgrade pip
RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}] WORKDIR /home/airflow/openmetadata-airflow-apis
RUN pip install "."
WORKDIR /home/airflow/ingestion
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
# Uninstalling psycopg2-binary and installing psycopg2 instead # Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error # because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error # while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 mysqlclient RUN pip install psycopg2 mysqlclient
# Make required folders for openmetadata-airflow-apis # Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs RUN mkdir -p /opt/airflow/dag_generated_configs
EXPOSE 8080
# This is required as it's responsible to create airflow.cfg file
RUN airflow db init && rm -f /opt/airflow/airflow.db

View File

@ -1,57 +0,0 @@
FROM python:3.9-slim as base
ENV AIRFLOW_HOME=/airflow
RUN apt-get update && \
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev \
libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib \
python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \
rm -rf /var/lib/apt/lists/*
# Manually fix security vulnerability from curl
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229
# Add it back to the usual apt-get install once a fix for Debian is released
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \
./configure --with-openssl && make && make install
FROM base as airflow
ENV AIRFLOW_VERSION=2.3.3
# install odbc driver
RUN apt-get update && \
apt-get install -y gnupg && \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \
rm -rf /var/lib/apt/lists/*
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
# Add docker provider for the DockerOperator
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
FROM airflow as apis
WORKDIR /openmetadata-airflow-apis
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
RUN pip install "."
FROM apis as ingestion
WORKDIR /ingestion
COPY ingestion /ingestion
ARG INGESTION_DEPENDENCY
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2
RUN airflow db init
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg
RUN chmod 755 ingestion_dependency.sh
EXPOSE 8080
CMD [ "./ingestion_dependency.sh" ]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,7 @@ source:
serviceConnection: serviceConnection:
config: config:
type: SampleData type: SampleData
sampleDataFolder: "./examples/sample_data" sampleDataFolder: "/home/airflow/ingestion/examples/sample_data"
sourceConfig: {} sourceConfig: {}
sink: sink:
type: metadata-rest type: metadata-rest

View File

@ -40,7 +40,7 @@ config = """
"serviceConnection": { "serviceConnection": {
"config": { "config": {
"type": "SampleData", "type": "SampleData",
"sampleDataFolder": "./examples/sample_data" "sampleDataFolder": "/home/airflow/ingestion/examples/sample_data"
} }
}, },
"sourceConfig": { "sourceConfig": {

View File

@ -25,7 +25,7 @@ AIRFLOW_ADMIN_PASSWORD=${AIRFLOW_ADMIN_PASSWORD:-admin}
OPENMETADATA_SERVER=${OPENMETADATA_SERVER:-"http://openmetadata-server:8585"} OPENMETADATA_SERVER=${OPENMETADATA_SERVER:-"http://openmetadata-server:8585"}
sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /airflow/airflow.cfg sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /opt/airflow/airflow.cfg
airflow db init airflow db init

View File

@ -23,7 +23,7 @@ describe('MySQL Ingestion', () => {
const connectionInput = () => { const connectionInput = () => {
cy.get('#root_username').type('openmetadata_user'); cy.get('#root_username').type('openmetadata_user');
cy.get('#root_password').type('openmetadata_password'); cy.get('#root_password').type('openmetadata_password');
cy.get('#root_hostPort').type('172.16.239.10:3306'); cy.get('#root_hostPort').type('mysql:3306');
cy.get('#root_databaseSchema').type('openmetadata_db'); cy.get('#root_databaseSchema').type('openmetadata_db');
}; };