mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-16 12:33:08 +00:00
feat: Refactor openmetadata/ingestion docker image (#7456)
* centralize openmetadata/ingestion docker image * update volume mappings; fix ingestion tag! * fix PR comments * fix cypress mysql tests!
This commit is contained in:
parent
78bd0c40c6
commit
fc8312c5fb
@ -1,56 +0,0 @@
|
||||
# Copyright 2021 Collate
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
name: docker-openmetadata-airflow docker
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag:
|
||||
description: "Input tag"
|
||||
required: true
|
||||
release:
|
||||
types: [published]
|
||||
|
||||
jobs:
|
||||
push_to_docker_hub:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
input: ${{ github.event.inputs.tag }}
|
||||
|
||||
steps:
|
||||
- name: Check trigger type
|
||||
if: ${{ env.input == '' }}
|
||||
run: echo "input=0.12.0" >> $GITHUB_ENV
|
||||
|
||||
- name: Check out the Repo
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v1
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v1
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64,linux/arm64
|
||||
push: ${{ github.event_name == 'release' }}
|
||||
# Update tags before every release
|
||||
tags: 'openmetadata/airflow:${{ env.input }},openmetadata/airflow:latest'
|
||||
file: ./docker/airflow/Dockerfile
|
@ -32,8 +32,7 @@ services:
|
||||
ports:
|
||||
- "5432:5432"
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.10
|
||||
- local_app_net
|
||||
healthcheck:
|
||||
test: psql -U postgres -tAc 'select 1' -d openmetadata_db
|
||||
interval: 15s
|
||||
@ -47,8 +46,7 @@ services:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.11
|
||||
- local_app_net
|
||||
expose:
|
||||
- 9200
|
||||
- 9300
|
||||
@ -121,9 +119,6 @@ services:
|
||||
expose:
|
||||
- 8585
|
||||
- 8586
|
||||
- 9200
|
||||
- 9300
|
||||
- 5432
|
||||
ports:
|
||||
- "8585:8585"
|
||||
- "8586:8586"
|
||||
@ -133,15 +128,14 @@ services:
|
||||
postgresql:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.13
|
||||
- local_app_net
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
|
||||
|
||||
ingestion:
|
||||
build:
|
||||
context: ../../.
|
||||
dockerfile: ingestion/Dockerfile_local
|
||||
dockerfile: ingestion/Dockerfile.ci
|
||||
args:
|
||||
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
|
||||
container_name: openmetadata_ingestion
|
||||
@ -153,12 +147,22 @@ services:
|
||||
openmetadata-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
|
||||
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
|
||||
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
|
||||
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
|
||||
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
|
||||
DB_HOST: ${DB_HOST:-postgresql}
|
||||
DB_PORT: ${DB_PORT:-5432}
|
||||
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
|
||||
DB_USER: ${DB_USER:-airflow_user}
|
||||
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
|
||||
entrypoint: /bin/bash
|
||||
command:
|
||||
- "/opt/airflow/ingestion_dependency.sh"
|
||||
expose:
|
||||
- 8080
|
||||
ports:
|
||||
@ -166,9 +170,8 @@ services:
|
||||
networks:
|
||||
- local_app_net
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/ingestion/examples/airflow/dags
|
||||
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/opt/airflow/airflow/dags
|
||||
- ingestion-volume-tmp:/tmp
|
||||
|
||||
networks:
|
||||
|
@ -31,8 +31,7 @@ services:
|
||||
ports:
|
||||
- "3306:3306"
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.10
|
||||
- local_app_net
|
||||
healthcheck:
|
||||
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
|
||||
interval: 15s
|
||||
@ -46,8 +45,7 @@ services:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.11
|
||||
- local_app_net
|
||||
expose:
|
||||
- 9200
|
||||
- 9300
|
||||
@ -119,9 +117,6 @@ services:
|
||||
expose:
|
||||
- 8585
|
||||
- 8586
|
||||
- 9200
|
||||
- 9300
|
||||
- 3306
|
||||
ports:
|
||||
- "8585:8585"
|
||||
- "8586:8586"
|
||||
@ -131,18 +126,34 @@ services:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
local_app_net:
|
||||
ipv4_address: 172.16.239.13
|
||||
- local_app_net
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
|
||||
|
||||
ingestion:
|
||||
build:
|
||||
context: ../../.
|
||||
dockerfile: ingestion/Dockerfile_local
|
||||
dockerfile: ingestion/Dockerfile.ci
|
||||
args:
|
||||
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
|
||||
container_name: openmetadata_ingestion
|
||||
environment:
|
||||
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
|
||||
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
|
||||
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
|
||||
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
|
||||
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
|
||||
DB_HOST: ${DB_HOST:-mysql}
|
||||
DB_PORT: ${DB_PORT:-3306}
|
||||
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
|
||||
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
|
||||
DB_USER: ${DB_USER:-airflow_user}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
|
||||
entrypoint: /bin/bash
|
||||
command:
|
||||
- "/opt/airflow/ingestion_dependency.sh"
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_started
|
||||
@ -157,9 +168,8 @@ services:
|
||||
networks:
|
||||
- local_app_net
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/ingestion/examples/airflow/dags
|
||||
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/opt/airflow/airflow/dags
|
||||
- ingestion-volume-tmp:/tmp
|
||||
|
||||
networks:
|
||||
|
@ -28,8 +28,7 @@ services:
|
||||
ports:
|
||||
- "5432:5432"
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.10
|
||||
- app_net
|
||||
healthcheck:
|
||||
test: psql -U postgres -tAc 'select 1' -d openmetadata_db
|
||||
interval: 15s
|
||||
@ -43,8 +42,7 @@ services:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.11
|
||||
- app_net
|
||||
ports:
|
||||
- "9200:9200"
|
||||
- "9300:9300"
|
||||
@ -52,7 +50,7 @@ services:
|
||||
openmetadata-server:
|
||||
container_name: openmetadata_server
|
||||
restart: always
|
||||
image: openmetadata/server:0.11.4
|
||||
image: openmetadata/server:0.12.0
|
||||
environment:
|
||||
ELASTICSEARCH_HOST: elasticsearch
|
||||
# OpenMetadata Server Authentication Configuration
|
||||
@ -111,9 +109,6 @@ services:
|
||||
expose:
|
||||
- 8585
|
||||
- 8586
|
||||
- 9200
|
||||
- 9300
|
||||
- 5432
|
||||
ports:
|
||||
- "8585:8585"
|
||||
- "8586:8586"
|
||||
@ -123,17 +118,13 @@ services:
|
||||
postgresql:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.13
|
||||
extra_hosts:
|
||||
- "postgresql:172.16.240.10"
|
||||
- "elasticsearch:172.16.240.11"
|
||||
- app_net
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
|
||||
|
||||
ingestion:
|
||||
container_name: openmetadata_ingestion
|
||||
image: openmetadata/ingestion:0.11.4
|
||||
image: openmetadata/ingestion:0.12.0
|
||||
depends_on:
|
||||
elasticsearch:
|
||||
condition: service_started
|
||||
@ -142,26 +133,31 @@ services:
|
||||
openmetadata-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
|
||||
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
|
||||
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
|
||||
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
|
||||
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
|
||||
DB_HOST: ${DB_HOST:-postgresql}
|
||||
DB_PORT: ${DB_PORT:-5432}
|
||||
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
|
||||
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-postgresql+psycopg2}
|
||||
DB_USER: ${DB_USER:-airflow_user}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
|
||||
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
|
||||
entrypoint: /bin/bash
|
||||
command:
|
||||
- "/opt/airflow/ingestion_dependency.sh"
|
||||
expose:
|
||||
- 8080
|
||||
ports:
|
||||
- "8080:8080"
|
||||
networks:
|
||||
- app_net
|
||||
extra_hosts:
|
||||
- "postgresql:172.16.240.10"
|
||||
- "localhost:172.16.240.11"
|
||||
- "localhost:172.16.240.13"
|
||||
volumes:
|
||||
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/ingestion/examples/airflow/dags
|
||||
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/opt/airflow/dags
|
||||
- ingestion-volume-tmp:/tmp
|
||||
|
||||
networks:
|
||||
|
@ -25,8 +25,7 @@ services:
|
||||
expose:
|
||||
- 3306
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.10
|
||||
- app_net
|
||||
healthcheck:
|
||||
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
|
||||
interval: 15s
|
||||
@ -40,8 +39,7 @@ services:
|
||||
- discovery.type=single-node
|
||||
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.11
|
||||
- app_net
|
||||
ports:
|
||||
- "9200:9200"
|
||||
- "9300:9300"
|
||||
@ -120,11 +118,7 @@ services:
|
||||
mysql:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
app_net:
|
||||
ipv4_address: 172.16.240.13
|
||||
extra_hosts:
|
||||
- "localhost:172.16.240.10"
|
||||
- "elasticsearch:172.16.240.11"
|
||||
- app_net
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
|
||||
|
||||
@ -139,25 +133,31 @@ services:
|
||||
openmetadata-server:
|
||||
condition: service_healthy
|
||||
environment:
|
||||
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
|
||||
AIRFLOW__CORE__EXECUTOR: LocalExecutor
|
||||
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
|
||||
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
|
||||
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
|
||||
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
|
||||
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
|
||||
DB_HOST: ${DB_HOST:-mysql}
|
||||
DB_PORT: ${DB_PORT:-3306}
|
||||
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
|
||||
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
|
||||
DB_USER: ${DB_USER:-airflow_user}
|
||||
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
|
||||
entrypoint: /bin/bash
|
||||
command:
|
||||
- "/opt/airflow/ingestion_dependency.sh"
|
||||
expose:
|
||||
- 8080
|
||||
ports:
|
||||
- "8080:8080"
|
||||
networks:
|
||||
- app_net
|
||||
extra_hosts:
|
||||
- "localhost:172.16.240.10"
|
||||
- "localhost:172.16.240.11"
|
||||
- "localhost:172.16.240.13"
|
||||
volumes:
|
||||
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/ingestion/examples/airflow/dags
|
||||
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
|
||||
- ingestion-volume-dags:/opt/airflow/dags
|
||||
- ingestion-volume-tmp:/tmp
|
||||
|
||||
networks:
|
||||
|
@ -1,58 +1,54 @@
|
||||
FROM python:3.9-slim as base
|
||||
ENV AIRFLOW_HOME=/airflow
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Manually fix security vulnerability from curl
|
||||
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229
|
||||
# Add it back to the usual apt-get install once a fix for Debian is released
|
||||
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \
|
||||
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \
|
||||
./configure --with-openssl && make && make install
|
||||
|
||||
|
||||
FROM base as airflow
|
||||
ENV AIRFLOW_VERSION=2.3.3
|
||||
|
||||
# install odbc driver
|
||||
RUN apt-get update && \
|
||||
apt-get install -y gnupg && \
|
||||
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
||||
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
||||
apt-get update && \
|
||||
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
|
||||
# Add docker provider for the DockerOperator
|
||||
FROM apache/airflow:2.3.3-python3.9
|
||||
USER root
|
||||
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
|
||||
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
|
||||
# Install Dependencies (listed in alphabetical order)
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y build-essential \
|
||||
default-libmysqlclient-dev \
|
||||
freetds-bin \
|
||||
freetds-dev \
|
||||
gcc \
|
||||
gnupg \
|
||||
libevent-dev \
|
||||
libffi-dev \
|
||||
libpq-dev \
|
||||
librdkafka-dev \
|
||||
libsasl2-dev \
|
||||
libsasl2-modules \
|
||||
libssl-dev \
|
||||
libxml2 \
|
||||
openjdk-11-jre \
|
||||
openssl \
|
||||
postgresql \
|
||||
postgresql-contrib \
|
||||
tdsodbc \
|
||||
unixodbc \
|
||||
unixodbc-dev \
|
||||
wget --no-install-recommends \
|
||||
# Accept MSSQL ODBC License
|
||||
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Required for Starting Ingestion Container in Docker Compose
|
||||
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
|
||||
# Required for Ingesting Sample Data
|
||||
COPY --chown=airflow:airflow ingestion/examples/sample_data /home/airflow/ingestion/examples/sample_data
|
||||
# Required for Airflow DAGs of Sample Data
|
||||
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
|
||||
# Provide Execute Permissions to shell script
|
||||
RUN chmod +x /opt/airflow/ingestion_dependency.sh
|
||||
USER airflow
|
||||
# Argument to provide for Ingestion Dependencies to install. Defaults to all
|
||||
ARG INGESTION_DEPENDENCY="all"
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
||||
|
||||
|
||||
FROM airflow as apis
|
||||
WORKDIR /openmetadata-airflow-apis
|
||||
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
|
||||
|
||||
RUN pip install "."
|
||||
|
||||
FROM apis as ingestion
|
||||
WORKDIR /ingestion
|
||||
COPY ingestion /ingestion
|
||||
|
||||
ARG INGESTION_DEPENDENCY=all
|
||||
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
|
||||
|
||||
RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
|
||||
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}]
|
||||
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
||||
# because the psycopg2-binary generates a architecture specific error
|
||||
# while authrenticating connection with the airflow, psycopg2 solves this error
|
||||
RUN pip uninstall psycopg2-binary -y
|
||||
RUN pip install psycopg2
|
||||
|
||||
|
||||
RUN airflow db init
|
||||
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg
|
||||
RUN chmod 755 ingestion_dependency.sh
|
||||
EXPOSE 8080
|
||||
CMD [ "./ingestion_dependency.sh" ]
|
||||
RUN pip install psycopg2 mysqlclient
|
||||
# Make required folders for openmetadata-airflow-apis
|
||||
RUN mkdir -p /opt/airflow/dag_generated_configs
|
||||
# This is required as it's responsible to create airflow.cfg file
|
||||
RUN airflow db init && rm -f /opt/airflow/airflow.db
|
||||
|
@ -5,35 +5,54 @@ RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/so
|
||||
# Install Dependencies (listed in alphabetical order)
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y build-essential \
|
||||
default-libmysqlclient-dev \
|
||||
freetds-bin \
|
||||
freetds-dev \
|
||||
gcc \
|
||||
gnupg \
|
||||
libevent-dev \
|
||||
libffi-dev \
|
||||
default-libmysqlclient-dev \
|
||||
libpq-dev \
|
||||
librdkafka-dev \
|
||||
libsasl2-dev \
|
||||
libsasl2-modules \
|
||||
libssl-dev \
|
||||
libxml2 \
|
||||
netcat \
|
||||
openjdk-11-jre \
|
||||
openssl \
|
||||
postgresql \
|
||||
postgresql-contrib \
|
||||
tdsodbc \
|
||||
unixodbc \
|
||||
unixodbc-dev --no-install-recommends \
|
||||
unixodbc-dev \
|
||||
vim \
|
||||
wget --no-install-recommends \
|
||||
# Accept MSSQL ODBC License
|
||||
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
# Required for Starting Ingestion Container in Docker Compose
|
||||
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
|
||||
# Required for Ingesting Sample Data
|
||||
COPY --chown=airflow:airflow ingestion /home/airflow/ingestion
|
||||
|
||||
COPY --chown=airflow:airflow openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis
|
||||
# Required for Airflow DAGs of Sample Data
|
||||
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
|
||||
# Provide Execute Permissions to shell script
|
||||
RUN chmod +x /opt/airflow/ingestion_dependency.sh
|
||||
USER airflow
|
||||
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
|
||||
# Argument to provide for Ingestion Dependencies to install. Defaults to all
|
||||
ARG INGESTION_DEPENDENCY="all"
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
|
||||
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}]
|
||||
|
||||
WORKDIR /home/airflow/openmetadata-airflow-apis
|
||||
RUN pip install "."
|
||||
|
||||
WORKDIR /home/airflow/ingestion
|
||||
ARG INGESTION_DEPENDENCY="all"
|
||||
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
|
||||
|
||||
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
||||
# because the psycopg2-binary generates a architecture specific error
|
||||
# while authrenticating connection with the airflow, psycopg2 solves this error
|
||||
@ -41,3 +60,6 @@ RUN pip uninstall psycopg2-binary -y
|
||||
RUN pip install psycopg2 mysqlclient
|
||||
# Make required folders for openmetadata-airflow-apis
|
||||
RUN mkdir -p /opt/airflow/dag_generated_configs
|
||||
EXPOSE 8080
|
||||
# This is required as it's responsible to create airflow.cfg file
|
||||
RUN airflow db init && rm -f /opt/airflow/airflow.db
|
@ -1,57 +0,0 @@
|
||||
FROM python:3.9-slim as base
|
||||
ENV AIRFLOW_HOME=/airflow
|
||||
RUN apt-get update && \
|
||||
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev \
|
||||
libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib \
|
||||
python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Manually fix security vulnerability from curl
|
||||
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229
|
||||
# Add it back to the usual apt-get install once a fix for Debian is released
|
||||
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \
|
||||
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \
|
||||
./configure --with-openssl && make && make install
|
||||
|
||||
|
||||
FROM base as airflow
|
||||
ENV AIRFLOW_VERSION=2.3.3
|
||||
|
||||
# install odbc driver
|
||||
RUN apt-get update && \
|
||||
apt-get install -y gnupg && \
|
||||
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
|
||||
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
|
||||
apt-get update && \
|
||||
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
|
||||
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
|
||||
# Add docker provider for the DockerOperator
|
||||
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
|
||||
|
||||
FROM airflow as apis
|
||||
WORKDIR /openmetadata-airflow-apis
|
||||
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
|
||||
|
||||
RUN pip install "."
|
||||
|
||||
FROM apis as ingestion
|
||||
WORKDIR /ingestion
|
||||
COPY ingestion /ingestion
|
||||
|
||||
ARG INGESTION_DEPENDENCY
|
||||
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
|
||||
|
||||
# Uninstalling psycopg2-binary and installing psycopg2 instead
|
||||
# because the psycopg2-binary generates a architecture specific error
|
||||
# while authrenticating connection with the airflow, psycopg2 solves this error
|
||||
RUN pip uninstall psycopg2-binary -y
|
||||
RUN pip install psycopg2
|
||||
|
||||
RUN airflow db init
|
||||
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg
|
||||
RUN chmod 755 ingestion_dependency.sh
|
||||
EXPOSE 8080
|
||||
CMD [ "./ingestion_dependency.sh" ]
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -39,7 +39,7 @@ source:
|
||||
serviceConnection:
|
||||
config:
|
||||
type: SampleData
|
||||
sampleDataFolder: "./examples/sample_data"
|
||||
sampleDataFolder: "/home/airflow/ingestion/examples/sample_data"
|
||||
sourceConfig: {}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
|
@ -40,7 +40,7 @@ config = """
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "SampleData",
|
||||
"sampleDataFolder": "./examples/sample_data"
|
||||
"sampleDataFolder": "/home/airflow/ingestion/examples/sample_data"
|
||||
}
|
||||
},
|
||||
"sourceConfig": {
|
||||
|
@ -25,7 +25,7 @@ AIRFLOW_ADMIN_PASSWORD=${AIRFLOW_ADMIN_PASSWORD:-admin}
|
||||
|
||||
OPENMETADATA_SERVER=${OPENMETADATA_SERVER:-"http://openmetadata-server:8585"}
|
||||
|
||||
sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /airflow/airflow.cfg
|
||||
sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /opt/airflow/airflow.cfg
|
||||
|
||||
airflow db init
|
||||
|
||||
|
@ -23,7 +23,7 @@ describe('MySQL Ingestion', () => {
|
||||
const connectionInput = () => {
|
||||
cy.get('#root_username').type('openmetadata_user');
|
||||
cy.get('#root_password').type('openmetadata_password');
|
||||
cy.get('#root_hostPort').type('172.16.239.10:3306');
|
||||
cy.get('#root_hostPort').type('mysql:3306');
|
||||
cy.get('#root_databaseSchema').type('openmetadata_db');
|
||||
};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user