feat: Refactor openmetadata/ingestion docker image (#7456)

* centralize openmetadata/ingestion docker image

* update volume mappings; fix ingestion tag!

* fix PR comments

* fix cypress mysql tests!
This commit is contained in:
Akash Jain 2022-09-19 09:20:54 +05:30 committed by GitHub
parent 78bd0c40c6
commit fc8312c5fb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 153 additions and 2405 deletions

View File

@ -1,56 +0,0 @@
# Copyright 2021 Collate
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: docker-openmetadata-airflow docker
on:
workflow_dispatch:
inputs:
tag:
description: "Input tag"
required: true
release:
types: [published]
jobs:
push_to_docker_hub:
runs-on: ubuntu-latest
env:
input: ${{ github.event.inputs.tag }}
steps:
- name: Check trigger type
if: ${{ env.input == '' }}
run: echo "input=0.12.0" >> $GITHUB_ENV
- name: Check out the Repo
uses: actions/checkout@v2
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }}
password: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }}
- name: Build and push
uses: docker/build-push-action@v2
with:
context: .
platforms: linux/amd64,linux/arm64
push: ${{ github.event_name == 'release' }}
# Update tags before every release
tags: 'openmetadata/airflow:${{ env.input }},openmetadata/airflow:latest'
file: ./docker/airflow/Dockerfile

View File

@ -32,8 +32,7 @@ services:
ports:
- "5432:5432"
networks:
local_app_net:
ipv4_address: 172.16.239.10
- local_app_net
healthcheck:
test: psql -U postgres -tAc 'select 1' -d openmetadata_db
interval: 15s
@ -47,8 +46,7 @@ services:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks:
local_app_net:
ipv4_address: 172.16.239.11
- local_app_net
expose:
- 9200
- 9300
@ -121,9 +119,6 @@ services:
expose:
- 8585
- 8586
- 9200
- 9300
- 5432
ports:
- "8585:8585"
- "8586:8586"
@ -133,15 +128,14 @@ services:
postgresql:
condition: service_healthy
networks:
local_app_net:
ipv4_address: 172.16.239.13
- local_app_net
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion:
build:
context: ../../.
dockerfile: ingestion/Dockerfile_local
dockerfile: ingestion/Dockerfile.ci
args:
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
container_name: openmetadata_ingestion
@ -153,12 +147,22 @@ services:
openmetadata-server:
condition: service_healthy
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-postgresql}
DB_PORT: ${DB_PORT:-5432}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
DB_USER: ${DB_USER:-airflow_user}
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose:
- 8080
ports:
@ -166,9 +170,8 @@ services:
networks:
- local_app_net
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/opt/airflow/airflow/dags
- ingestion-volume-tmp:/tmp
networks:

View File

@ -31,8 +31,7 @@ services:
ports:
- "3306:3306"
networks:
local_app_net:
ipv4_address: 172.16.239.10
- local_app_net
healthcheck:
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
interval: 15s
@ -46,8 +45,7 @@ services:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks:
local_app_net:
ipv4_address: 172.16.239.11
- local_app_net
expose:
- 9200
- 9300
@ -119,9 +117,6 @@ services:
expose:
- 8585
- 8586
- 9200
- 9300
- 3306
ports:
- "8585:8585"
- "8586:8586"
@ -131,18 +126,34 @@ services:
mysql:
condition: service_healthy
networks:
local_app_net:
ipv4_address: 172.16.239.13
- local_app_net
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion:
build:
context: ../../.
dockerfile: ingestion/Dockerfile_local
dockerfile: ingestion/Dockerfile.ci
args:
INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all}
container_name: openmetadata_ingestion
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-mysql}
DB_PORT: ${DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
depends_on:
elasticsearch:
condition: service_started
@ -157,9 +168,8 @@ services:
networks:
- local_app_net
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/opt/airflow/airflow/dags
- ingestion-volume-tmp:/tmp
networks:

View File

@ -28,8 +28,7 @@ services:
ports:
- "5432:5432"
networks:
app_net:
ipv4_address: 172.16.240.10
- app_net
healthcheck:
test: psql -U postgres -tAc 'select 1' -d openmetadata_db
interval: 15s
@ -43,8 +42,7 @@ services:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks:
app_net:
ipv4_address: 172.16.240.11
- app_net
ports:
- "9200:9200"
- "9300:9300"
@ -52,7 +50,7 @@ services:
openmetadata-server:
container_name: openmetadata_server
restart: always
image: openmetadata/server:0.11.4
image: openmetadata/server:0.12.0
environment:
ELASTICSEARCH_HOST: elasticsearch
# OpenMetadata Server Authentication Configuration
@ -111,9 +109,6 @@ services:
expose:
- 8585
- 8586
- 9200
- 9300
- 5432
ports:
- "8585:8585"
- "8586:8586"
@ -123,17 +118,13 @@ services:
postgresql:
condition: service_healthy
networks:
app_net:
ipv4_address: 172.16.240.13
extra_hosts:
- "postgresql:172.16.240.10"
- "elasticsearch:172.16.240.11"
- app_net
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
ingestion:
container_name: openmetadata_ingestion
image: openmetadata/ingestion:0.11.4
image: openmetadata/ingestion:0.12.0
depends_on:
elasticsearch:
condition: service_started
@ -142,26 +133,31 @@ services:
openmetadata-server:
condition: service_healthy
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-postgresql}
DB_PORT: ${DB_PORT:-5432}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-postgresql+psycopg2}
DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose:
- 8080
ports:
- "8080:8080"
networks:
- app_net
extra_hosts:
- "postgresql:172.16.240.10"
- "localhost:172.16.240.11"
- "localhost:172.16.240.13"
volumes:
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/opt/airflow/dags
- ingestion-volume-tmp:/tmp
networks:

View File

@ -25,8 +25,7 @@ services:
expose:
- 3306
networks:
app_net:
ipv4_address: 172.16.240.10
- app_net
healthcheck:
test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db"
interval: 15s
@ -40,8 +39,7 @@ services:
- discovery.type=single-node
- ES_JAVA_OPTS=-Xms1024m -Xmx1024m
networks:
app_net:
ipv4_address: 172.16.240.11
- app_net
ports:
- "9200:9200"
- "9300:9300"
@ -120,11 +118,7 @@ services:
mysql:
condition: service_healthy
networks:
app_net:
ipv4_address: 172.16.240.13
extra_hosts:
- "localhost:172.16.240.10"
- "elasticsearch:172.16.240.11"
- app_net
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ]
@ -139,25 +133,31 @@ services:
openmetadata-server:
condition: service_healthy
environment:
AIRFLOW__API__AUTH_BACKENDS: airflow.api.auth.backend.basic_auth
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__LINEAGE__BACKEND: airflow_provider_openmetadata.lineage.openmetadata.OpenMetadataLineageBackend
AIRFLOW__LINEAGE__AIRFLOW_SERVICE_NAME: airflow_docker
AIRFLOW__LINEAGE__OPENMETADATA_API_ENDPOINT: http://openmetadata-server:8585/api
AIRFLOW__LINEAGE__AUTH_PROVIDER_TYPE: no-auth # Update this if you are using SSO
AIRFLOW__OPENMETADATA_AIRFLOW_APIS__DAG_GENERATED_CONFIGS: "/opt/airflow/dag_generated_configs"
DB_HOST: ${DB_HOST:-mysql}
DB_PORT: ${DB_PORT:-3306}
AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db}
AIRFLOW_DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql}
DB_USER: ${DB_USER:-airflow_user}
DB_PASSWORD: ${DB_PASSWORD:-airflow_pass}
entrypoint: /bin/bash
command:
- "/opt/airflow/ingestion_dependency.sh"
expose:
- 8080
ports:
- "8080:8080"
networks:
- app_net
extra_hosts:
- "localhost:172.16.240.10"
- "localhost:172.16.240.11"
- "localhost:172.16.240.13"
volumes:
- ingestion-volume-dag-airflow:/airflow/dag_generated_configs
- ingestion-volume-dags:/ingestion/examples/airflow/dags
- ingestion-volume-dag-airflow:/opt/airflow/dag_generated_configs
- ingestion-volume-dags:/opt/airflow/dags
- ingestion-volume-tmp:/tmp
networks:

View File

@ -1,58 +1,54 @@
FROM python:3.9-slim as base
ENV AIRFLOW_HOME=/airflow
RUN apt-get update && \
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \
rm -rf /var/lib/apt/lists/*
# Manually fix security vulnerability from curl
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229
# Add it back to the usual apt-get install once a fix for Debian is released
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \
./configure --with-openssl && make && make install
FROM base as airflow
ENV AIRFLOW_VERSION=2.3.3
# install odbc driver
RUN apt-get update && \
apt-get install -y gnupg && \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \
rm -rf /var/lib/apt/lists/*
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
# Add docker provider for the DockerOperator
FROM apache/airflow:2.3.3-python3.9
USER root
RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list
# Install Dependencies (listed in alphabetical order)
RUN apt-get update \
&& apt-get install -y build-essential \
default-libmysqlclient-dev \
freetds-bin \
freetds-dev \
gcc \
gnupg \
libevent-dev \
libffi-dev \
libpq-dev \
librdkafka-dev \
libsasl2-dev \
libsasl2-modules \
libssl-dev \
libxml2 \
openjdk-11-jre \
openssl \
postgresql \
postgresql-contrib \
tdsodbc \
unixodbc \
unixodbc-dev \
wget --no-install-recommends \
# Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/*
# Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data
COPY --chown=airflow:airflow ingestion/examples/sample_data /home/airflow/ingestion/examples/sample_data
# Required for Airflow DAGs of Sample Data
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow
# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade pip
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
FROM airflow as apis
WORKDIR /openmetadata-airflow-apis
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
RUN pip install "."
FROM apis as ingestion
WORKDIR /ingestion
COPY ingestion /ingestion
ARG INGESTION_DEPENDENCY=all
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}]
# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2
RUN airflow db init
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg
RUN chmod 755 ingestion_dependency.sh
EXPOSE 8080
CMD [ "./ingestion_dependency.sh" ]
RUN pip install psycopg2 mysqlclient
# Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs
# This is required as it's responsible to create airflow.cfg file
RUN airflow db init && rm -f /opt/airflow/airflow.db

View File

@ -5,39 +5,61 @@ RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/so
# Install Dependencies (listed in alphabetical order)
RUN apt-get update \
&& apt-get install -y build-essential \
default-libmysqlclient-dev \
freetds-bin \
freetds-dev \
gcc \
gnupg \
libevent-dev \
libffi-dev \
default-libmysqlclient-dev \
libpq-dev \
librdkafka-dev \
libsasl2-dev \
libsasl2-modules \
libssl-dev \
libxml2 \
netcat \
openjdk-11-jre \
openssl \
postgresql \
postgresql-contrib \
tdsodbc \
unixodbc \
unixodbc-dev --no-install-recommends \
unixodbc-dev \
vim \
wget --no-install-recommends \
# Accept MSSQL ODBC License
&& ACCEPT_EULA=Y apt-get install -y msodbcsql18 \
&& rm -rf /var/lib/apt/lists/*
# Required for Starting Ingestion Container in Docker Compose
COPY --chown=airflow:airflow ingestion/ingestion_dependency.sh /opt/airflow
# Required for Ingesting Sample Data
COPY --chown=airflow:airflow ingestion /home/airflow/ingestion
COPY --chown=airflow:airflow openmetadata-airflow-apis /home/airflow/openmetadata-airflow-apis
# Required for Airflow DAGs of Sample Data
COPY --chown=airflow:airflow ingestion/examples/airflow/dags /opt/airflow/dags
# Provide Execute Permissions to shell script
RUN chmod +x /opt/airflow/ingestion_dependency.sh
USER airflow
ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
# Argument to provide for Ingestion Dependencies to install. Defaults to all
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade pip
RUN pip install --upgrade openmetadata-managed-apis --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.3.3/constraints-3.9.txt"
RUN pip install --upgrade openmetadata-ingestion[${INGESTION_DEPENDENCY}]
WORKDIR /home/airflow/openmetadata-airflow-apis
RUN pip install "."
WORKDIR /home/airflow/ingestion
ARG INGESTION_DEPENDENCY="all"
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 mysqlclient
# Make required folders for openmetadata-airflow-apis
RUN mkdir -p /opt/airflow/dag_generated_configs
RUN mkdir -p /opt/airflow/dag_generated_configs
EXPOSE 8080
# This is required as it's responsible to create airflow.cfg file
RUN airflow db init && rm -f /opt/airflow/airflow.db

View File

@ -1,57 +0,0 @@
FROM python:3.9-slim as base
ENV AIRFLOW_HOME=/airflow
RUN apt-get update && \
apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev \
libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib \
python3.9-dev tdsodbc unixodbc unixodbc-dev wget vim --no-install-recommends && \
rm -rf /var/lib/apt/lists/*
# Manually fix security vulnerability from curl
# - https://security.snyk.io/vuln/SNYK-DEBIAN11-CURL-2936229
# Add it back to the usual apt-get install once a fix for Debian is released
RUN wget https://curl.se/download/curl-7.84.0.tar.gz && \
tar -xvf curl-7.84.0.tar.gz && cd curl-7.84.0 && \
./configure --with-openssl && make && make install
FROM base as airflow
ENV AIRFLOW_VERSION=2.3.3
# install odbc driver
RUN apt-get update && \
apt-get install -y gnupg && \
curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \
curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list && \
apt-get update && \
ACCEPT_EULA=Y apt-get install -y msodbcsql18 && \
rm -rf /var/lib/apt/lists/*
ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt"
# Add docker provider for the DockerOperator
RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}"
FROM airflow as apis
WORKDIR /openmetadata-airflow-apis
COPY openmetadata-airflow-apis /openmetadata-airflow-apis
RUN pip install "."
FROM apis as ingestion
WORKDIR /ingestion
COPY ingestion /ingestion
ARG INGESTION_DEPENDENCY
RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]"
# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authrenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2
RUN airflow db init
RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg
RUN chmod 755 ingestion_dependency.sh
EXPOSE 8080
CMD [ "./ingestion_dependency.sh" ]

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -39,7 +39,7 @@ source:
serviceConnection:
config:
type: SampleData
sampleDataFolder: "./examples/sample_data"
sampleDataFolder: "/home/airflow/ingestion/examples/sample_data"
sourceConfig: {}
sink:
type: metadata-rest

View File

@ -40,7 +40,7 @@ config = """
"serviceConnection": {
"config": {
"type": "SampleData",
"sampleDataFolder": "./examples/sample_data"
"sampleDataFolder": "/home/airflow/ingestion/examples/sample_data"
}
},
"sourceConfig": {

View File

@ -25,7 +25,7 @@ AIRFLOW_ADMIN_PASSWORD=${AIRFLOW_ADMIN_PASSWORD:-admin}
OPENMETADATA_SERVER=${OPENMETADATA_SERVER:-"http://openmetadata-server:8585"}
sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /airflow/airflow.cfg
sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /opt/airflow/airflow.cfg
airflow db init

View File

@ -23,7 +23,7 @@ describe('MySQL Ingestion', () => {
const connectionInput = () => {
cy.get('#root_username').type('openmetadata_user');
cy.get('#root_password').type('openmetadata_password');
cy.get('#root_hostPort').type('172.16.239.10:3306');
cy.get('#root_hostPort').type('mysql:3306');
cy.get('#root_databaseSchema').type('openmetadata_db');
};