diff --git a/conf/openmetadata.yaml b/conf/openmetadata.yaml index bbae9b754d0..6cf5b9bcf0b 100644 --- a/conf/openmetadata.yaml +++ b/conf/openmetadata.yaml @@ -119,10 +119,11 @@ database: # the name of the JDBC driver, mysql in our case driverClass: ${DB_DRIVER_CLASS:-com.mysql.cj.jdbc.Driver} # the username and password - user: ${MYSQL_USER:-openmetadata_user} - password: ${MYSQL_USER_PASSWORD:-openmetadata_password} + user: ${DB_USER:-openmetadata_user} + password: ${DB_USER_PASSWORD:-openmetadata_password} # the JDBC URL; the database is called openmetadata_db - url: jdbc:${DB_SCHEME:-mysql}://${MYSQL_HOST:-localhost}:${MYSQL_PORT:-3306}/${MYSQL_DATABASE:-openmetadata_db}?allowPublicKeyRetrieval=true&useSSL=${DB_USE_SSL:-false}&serverTimezone=UTC + url: jdbc:${DB_SCHEME:-mysql}://${DB_HOST:-localhost}:${DB_PORT:-3306}/${OM_DATABASE:-openmetadata_db}?allowPublicKeyRetrieval=true&useSSL=${DB_USE_SSL:-false}&serverTimezone=UTC + migrationConfiguration: path: "./bootstrap/sql" diff --git a/docker/airflow/Dockerfile b/docker/airflow/Dockerfile index 6d996846c43..a65b0509ebe 100644 --- a/docker/airflow/Dockerfile +++ b/docker/airflow/Dockerfile @@ -3,11 +3,10 @@ USER root RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list RUN apt-get update \ - && apt-get install -y \ - gcc libsasl2-dev gnupg build-essential libssl-dev libffi-dev \ - librdkafka-dev unixodbc-dev libxml2 libevent-dev --no-install-recommends \ + && apt-get install -y build-essential freetds-bin freetds-dev gcc gnupg libevent-dev libffi-dev libpq-dev \ + librdkafka-dev libsasl2-dev libsasl2-modules libssl-dev libxml2 openjdk-11-jre openssl \ + postgresql postgresql-contrib tdsodbc unixodbc unixodbc-dev --no-install-recommends \ && ACCEPT_EULA=Y apt-get install -y msodbcsql18 \ - && rm -rf /var/lib/apt/lists/* USER airflow # Download openmetadata airflow plugins @@ -16,5 +15,12 @@ RUN curl -LJO https://github.com/open-metadata/OpenMetadata/releases/download/0. # Argument to provide for Ingestion Dependencies to install. Defaults to all ARG INGESTION_DEPENDENCY="all" RUN pip install --upgrade openmetadata-airflow-managed-apis openmetadata-ingestion[${INGESTION_DEPENDENCY}] + +# Uninstalling psycopg2-binary and installing psycopg2 instead +# because the psycopg2-binary generates a architecture specific error +# while authrenticating connection with the airflow, psycopg2 solves this error +RUN pip uninstall psycopg2-binary -y +RUN pip install psycopg2 + # Make and copy required folders for openmetadata-airflow-apis RUN mkdir -p /opt/airflow/dag_generated_configs && cp -r plugins/dag_* /opt/airflow diff --git a/docker/local-metadata/docker-compose-dev-postgres.yml b/docker/local-metadata/docker-compose-dev-postgres.yml index 0daed71cd06..bcf3936c4aa 100644 --- a/docker/local-metadata/docker-compose-dev-postgres.yml +++ b/docker/local-metadata/docker-compose-dev-postgres.yml @@ -11,18 +11,11 @@ version: "3.9" services: - postgres: - build: - context: ../../. - dockerfile: docker/local-metadata/Dockerfile_postgres - container_name: openmetadata_postgres + postgresql: + image: openmetadata/postgresql:latest restart: always - depends_on: - - elasticsearch environment: POSTGRES_PASSWORD: password - expose: - - 5432 ports: - 5432:5432 networks: diff --git a/docker/local-metadata/docker-compose-postgres.yml b/docker/local-metadata/docker-compose-postgres.yml new file mode 100644 index 00000000000..a2e23aeec72 --- /dev/null +++ b/docker/local-metadata/docker-compose-postgres.yml @@ -0,0 +1,181 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.9" +volumes: + ingestion-volume-dag-airflow: + ingestion-volume-dags: + ingestion-volume-tmp: +services: + postgresql: + build: + context: ../../. + dockerfile: docker/local-metadata/Dockerfile_postgres + container_name: openmetadata_postgresql + restart: always + depends_on: + - elasticsearch + environment: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: password + expose: + - 5432 + ports: + - 5432:5432 + networks: + local_app_net: + ipv4_address: 172.16.239.10 + healthcheck: + test: psql -U postgres -tAc 'select 1' -d openmetadata_db + interval: 15s + timeout: 10s + retries: 10 + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 + container_name: openmetadata_elasticsearch + environment: + - discovery.type=single-node + - ES_JAVA_OPTS=-Xms1024m -Xmx1024m + networks: + local_app_net: + ipv4_address: 172.16.239.11 + expose: + - 9200 + - 9300 + ports: + - 9200:9200 + - 9300:9300 + + + openmetadata-server: + build: + context: ../../. + dockerfile: docker/local-metadata/Dockerfile + container_name: openmetadata_server + environment: + ELASTICSEARCH_HOST: elasticsearch + # OpenMetadata Server Authentication Configuration + AUTHORIZER_CLASS_NAME: ${AUTHORIZER_CLASS_NAME:-org.openmetadata.catalog.security.NoopAuthorizer} + AUTHORIZER_REQUEST_FILTER: ${AUTHORIZER_REQUEST_FILTER:-org.openmetadata.catalog.security.NoopFilter} + AUTHORIZER_ADMIN_PRINCIPALS: ${AUTHORIZER_ADMIN_PRINCIPALS:-[admin]} + AUTHORIZER_INGESTION_PRINCIPALS: ${AUTHORIZER_INGESTION_PRINCIPALS:-[ingestion-bot]} + AUTHORIZER_PRINCIPAL_DOMAIN: ${AUTHORIZER_PRINCIPAL_DOMAIN:-""} + AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN: ${AUTHORIZER_ENFORCE_PRINCIPAL_DOMAIN:-false} + AUTHORIZER_ENABLE_SECURE_SOCKET: ${AUTHORIZER_ENABLE_SECURE_SOCKET:-false} + AUTHENTICATION_PROVIDER: ${AUTHENTICATION_PROVIDER:-no-auth} + CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME: ${CUSTOM_OIDC_AUTHENTICATION_PROVIDER_NAME:-""} + AUTHENTICATION_PUBLIC_KEYS: ${AUTHENTICATION_PUBLIC_KEYS:-[https://www.googleapis.com/oauth2/v3/certs]} + AUTHENTICATION_AUTHORITY: ${AUTHENTICATION_AUTHORITY:-https://accounts.google.com} + AUTHENTICATION_CLIENT_ID: ${AUTHENTICATION_CLIENT_ID:-""} + AUTHENTICATION_CALLBACK_URL: ${AUTHENTICATION_CALLBACK_URL:-""} + AUTHENTICATION_JWT_PRINCIPAL_CLAIMS: ${AUTHENTICATION_JWT_PRINCIPAL_CLAIMS:-[email,preferred_username,sub]} + # OpenMetadata Server Airflow Configuration + AIRFLOW_HOST: ${AIRFLOW_HOST:-http://ingestion:8080} + SERVER_HOST_API_URL: ${SERVER_HOST_API_URL:-http://localhost:8585/api} + # OpenMetadata Airflow Azure SSO Configuration + AIRFLOW_AUTH_PROVIDER: ${AIRFLOW_AUTH_PROVIDER:-no-auth} + OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_SECRET:-""} + OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL: ${OM_AUTH_AIRFLOW_AZURE_AUTHORITY_URL:-""} + OM_AUTH_AIRFLOW_AZURE_SCOPES: ${OM_AUTH_AIRFLOW_AZURE_SCOPES:-[]} + OM_AUTH_AIRFLOW_AZURE_CLIENT_ID: ${OM_AUTH_AIRFLOW_AZURE_CLIENT_ID:-""} + # OpenMetadata Airflow Google SSO Configuration + OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH: ${OM_AUTH_AIRFLOW_GOOGLE_SECRET_KEY_PATH:- ""} + OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE: ${OM_AUTH_AIRFLOW_GOOGLE_AUDIENCE:-"https://www.googleapis.com/oauth2/v4/token"} + # OpenMetadata Airflow Okta SSO Configuration + OM_AUTH_AIRFLOW_OKTA_CLIENT_ID: ${OM_AUTH_AIRFLOW_OKTA_CLIENT_ID:-""} + OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL: ${OM_AUTH_AIRFLOW_OKTA_ORGANIZATION_URL:-""} + OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY: ${OM_AUTH_AIRFLOW_OKTA_PRIVATE_KEY:-""} + OM_AUTH_AIRFLOW_OKTA_SA_EMAIL: ${OM_AUTH_AIRFLOW_OKTA_SA_EMAIL:-""} + OM_AUTH_AIRFLOW_OKTA_SCOPES: ${OM_AUTH_AIRFLOW_OKTA_SCOPES:-[]} + # OpenMetadata Airflow Auth0 SSO Configuration + OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_ID:-""} + OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET: ${OM_AUTH_AIRFLOW_AUTH0_CLIENT_SECRET:-""} + OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL: ${OM_AUTH_AIRFLOW_AUTH0_DOMAIN_URL:-""} + # OpenMetadata Airflow Custom OIDC SSO Configuration + OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID:-""} + OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY:-""} + OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL:-""} + # Database configuration for Postgres + DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-org.postgresql.Driver} + DB_SCHEME: ${DB_SCHEME:-postgresql} + DB_USE_SSL: ${DB_USE_SSL:-false} + DB_USER: ${DB_USER:-openmetadata_user} + DB_USER_PASSWORD: ${DB_USER_PASSWORD:-openmetadata_password} + DB_HOST: ${DB_HOST:-postgresql} + DB_PORT: ${DB_PORT:-5432} + OM_DATABASE: ${OM_DATABASE:-openmetadata_db} + expose: + - 8585 + - 8586 + - 9200 + - 9300 + - 5432 + ports: + - 8585:8585 + - 8586:8586 + depends_on: + elasticsearch: + condition: service_started + postgresql: + condition: service_healthy + networks: + local_app_net: + ipv4_address: 172.16.239.13 + extra_hosts: + - "postgresql:172.16.239.10" + - "elasticsearch:172.16.239.11" + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] + + ingestion: + build: + context: ../../. + dockerfile: ingestion/Dockerfile_local + args: + INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all} + container_name: openmetadata_ingestion + depends_on: + elasticsearch: + condition: service_started + postgresql: + condition: service_healthy + openmetadata-server: + condition: service_healthy + environment: + DB_HOST: ${DB_HOST:-postgresql} + DB_PORT: ${DB_PORT:-5432} + AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} + DB_USER: ${DB_USER:-airflow_user} + DB_SCHEME: ${DB_SCHEME:-postgresql+psycopg2} + DB_PASSWORD: ${DB_PASSWORD:-airflow_pass} + expose: + - 8080 + ports: + - 8080:8080 + networks: + - local_app_net + extra_hosts: + - "postgresql:172.16.239.10" + - "localhost:172.16.239.11" + - "localhost:172.16.239.13" + volumes: + - /var/run/docker.sock:/var/run/docker.sock + - ingestion-volume-dag-airflow:/airflow/dag_generated_configs + - ingestion-volume-dags:/airflow/dags + - ingestion-volume-tmp:/tmp +networks: + local_app_net: + name: ometa_network + ipam: + driver: default + config: + - subnet: "172.16.239.0/24" diff --git a/docker/local-metadata/docker-compose.yml b/docker/local-metadata/docker-compose.yml index 04200a41ab7..e2279050b51 100644 --- a/docker/local-metadata/docker-compose.yml +++ b/docker/local-metadata/docker-compose.yml @@ -14,6 +14,7 @@ volumes: ingestion-volume-dag-airflow: ingestion-volume-dags: ingestion-volume-tmp: + services: mysql: build: @@ -32,6 +33,11 @@ services: networks: local_app_net: ipv4_address: 172.16.239.10 + healthcheck: + test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db" + interval: 15s + timeout: 10s + retries: 10 elasticsearch: image: docker.elastic.co/elasticsearch/elasticsearch:7.10.2 @@ -39,7 +45,6 @@ services: environment: - discovery.type=single-node - ES_JAVA_OPTS=-Xms1024m -Xmx1024m - networks: local_app_net: ipv4_address: 172.16.239.11 @@ -109,15 +114,18 @@ services: - 8585:8585 - 8586:8586 depends_on: - - elasticsearch - - mysql - - ingestion + elasticsearch: + condition: service_started + mysql: + condition: service_healthy networks: local_app_net: ipv4_address: 172.16.239.13 extra_hosts: - "localhost:172.16.239.10" - "elasticsearch:172.16.239.11" + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] ingestion: build: @@ -127,8 +135,12 @@ services: INGESTION_DEPENDENCY: ${INGESTION_DEPENDENCY:-all} container_name: openmetadata_ingestion depends_on: - - mysql - - elasticsearch + elasticsearch: + condition: service_started + mysql: + condition: service_healthy + openmetadata-server: + condition: service_healthy expose: - 8080 ports: @@ -144,6 +156,7 @@ services: - ingestion-volume-dag-airflow:/airflow/dag_generated_configs - ingestion-volume-dags:/ingestion/examples/airflow/dags - ingestion-volume-tmp:/tmp + networks: local_app_net: name: ometa_network diff --git a/docker/metadata/docker-compose-postgres.yml b/docker/metadata/docker-compose-postgres.yml index f749fd72f7d..cc45ee27ceb 100644 --- a/docker/metadata/docker-compose-postgres.yml +++ b/docker/metadata/docker-compose-postgres.yml @@ -14,10 +14,11 @@ volumes: ingestion-volume-dag-airflow: ingestion-volume-dags: ingestion-volume-tmp: + services: - postgressql: - container_name: openmetadata_postgressql - image: openmetadata/postgressql:0.11.4 + postgresql: + container_name: openmetadata_postgresql + image: openmetadata/postgresql:0.11.4 restart: always environment: POSTGRES_USER: postgres @@ -27,6 +28,11 @@ services: networks: app_net: ipv4_address: 172.16.240.10 + healthcheck: + test: psql -U postgres -tAc 'select 1' -d openmetadata_db + interval: 15s + timeout: 10s + retries: 10 elasticsearch: container_name: openmetadata_elasticsearch @@ -34,7 +40,6 @@ services: environment: - discovery.type=single-node - ES_JAVA_OPTS=-Xms1024m -Xmx1024m - networks: app_net: ipv4_address: 172.16.240.11 @@ -89,15 +94,15 @@ services: OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_CLIENT_ID:-""} OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_SECRET_KEY:-""} OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL: ${OM_AUTH_AIRFLOW_CUSTOM_OIDC_TOKEN_ENDPOINT_URL:-""} - #Database configuration for Postgressql + #Database configuration for postgresql DB_DRIVER_CLASS: ${DB_DRIVER_CLASS:-org.postgresql.Driver} DB_SCHEME: ${DB_SCHEME:-postgresql} DB_USE_SSL: ${DB_USE_SSL:-false} DB_USER: ${DB_USER:-openmetadata_user} DB_USER_PASSWORD: ${DB_USER_PASSWORD:-openmetadata_password} - DB_HOST: ${DB_HOST:-postgres} + DB_HOST: ${DB_HOST:-postgresql} DB_PORT: ${DB_PORT:-5432} - OM_DATABASE: ${OM_DATABASE:-openmetadata_db} + OM_DATABASE: ${OM_DATABASE:-openmetadata_db} expose: - 8585 - 8586 @@ -108,21 +113,29 @@ services: - 8585:8585 - 8586:8586 depends_on: - - postgressql + elasticsearch: + condition: service_started + postgresql: + condition: service_healthy networks: app_net: ipv4_address: 172.16.240.13 extra_hosts: - - "postgres:172.16.240.10" + - "postgresql:172.16.240.10" - "elasticsearch:172.16.240.11" ingestion: container_name: openmetadata_ingestion image: openmetadata/ingestion:0.11.4 depends_on: - - potgressql + elasticsearch: + condition: service_started + postgresql: + condition: service_healthy + openmetadata-server: + condition: service_healthy environment: - DB_HOST: ${DB_HOST:-postgres} + DB_HOST: ${DB_HOST:-postgresql} DB_PORT: ${DB_PORT:-5432} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} DB_USER: ${DB_USER:-airflow_user} @@ -135,7 +148,7 @@ services: networks: - app_net extra_hosts: - - "postgres:172.16.240.10" + - "postgresql:172.16.240.10" - "localhost:172.16.240.11" - "localhost:172.16.240.13" volumes: diff --git a/docker/metadata/docker-compose.yml b/docker/metadata/docker-compose.yml index ada552fdff0..eabcd2dc3af 100644 --- a/docker/metadata/docker-compose.yml +++ b/docker/metadata/docker-compose.yml @@ -14,6 +14,7 @@ volumes: ingestion-volume-dag-airflow: ingestion-volume-dags: ingestion-volume-tmp: + services: mysql: container_name: openmetadata_mysql @@ -26,6 +27,11 @@ services: networks: app_net: ipv4_address: 172.16.240.10 + healthcheck: + test: mysql --user=root --password=$$MYSQL_ROOT_PASSWORD --silent --execute "use openmetadata_db" + interval: 15s + timeout: 10s + retries: 10 elasticsearch: container_name: openmetadata_elasticsearch @@ -33,7 +39,6 @@ services: environment: - discovery.type=single-node - ES_JAVA_OPTS=-Xms1024m -Xmx1024m - networks: app_net: ipv4_address: 172.16.240.11 @@ -98,19 +103,29 @@ services: - 8585:8585 - 8586:8586 depends_on: - - mysql + elasticsearch: + condition: service_started + mysql: + condition: service_healthy networks: app_net: ipv4_address: 172.16.240.13 extra_hosts: - "localhost:172.16.240.10" - "elasticsearch:172.16.240.11" + healthcheck: + test: [ "CMD", "curl", "-f", "http://localhost:8586/healthcheck" ] ingestion: container_name: openmetadata_ingestion image: openmetadata/ingestion:0.11.5 depends_on: - - mysql + elasticsearch: + condition: service_started + mysql: + condition: service_healthy + openmetadata-server: + condition: service_healthy expose: - 8080 ports: diff --git a/docker/metadata/openmetadata-start.sh b/docker/metadata/openmetadata-start.sh index 8313bcb4807..02fe7cd7c14 100644 --- a/docker/metadata/openmetadata-start.sh +++ b/docker/metadata/openmetadata-start.sh @@ -10,10 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -MYSQL="${MYSQL_HOST:-mysql}":"${MYSQL_PORT:-3306}" -while ! nc -z -w 5 "${MYSQL}"; - do echo "Trying to connect to ${MYSQL}"; sleep 5; -done +echo "Starting OpenMetadata Server"; cd /openmetadata-*/ ./bootstrap/bootstrap_storage.sh migrate-all ./bin/openmetadata-server-start.sh conf/openmetadata.yaml diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index e71bbe7d68d..2f091c3ed48 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -1,7 +1,7 @@ FROM python:3.9-slim as base ENV AIRFLOW_HOME=/airflow RUN apt-get update && \ - apt-get install -y gcc libsasl2-modules libxml2 libsasl2-dev build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev openjdk-11-jre unixodbc freetds-dev freetds-bin tdsodbc libevent-dev wget openssl --no-install-recommends && \ + apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib python3.9-dev tdsodbc unixodbc unixodbc-dev wget --no-install-recommends && \ rm -rf /var/lib/apt/lists/* # Manually fix security vulnerability from curl @@ -27,6 +27,7 @@ RUN apt-get update && \ ENV CONSTRAINT_URL="https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.9.txt" # Add docker provider for the DockerOperator +RUN pip install --upgrade pip RUN pip install "apache-airflow[docker]==${AIRFLOW_VERSION}" --constraint "${CONSTRAINT_URL}" @@ -43,6 +44,13 @@ COPY ingestion /ingestion ARG INGESTION_DEPENDENCY=all RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]" +# Uninstalling psycopg2-binary and installing psycopg2 instead +# because the psycopg2-binary generates a architecture specific error +# while authrenticating connection with the airflow, psycopg2 solves this error +RUN pip uninstall psycopg2-binary -y +RUN pip install psycopg2 + + RUN airflow db init RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg RUN chmod 755 ingestion_dependency.sh diff --git a/ingestion/Dockerfile_local b/ingestion/Dockerfile_local index ea2e0199da6..f3cdb8b93b3 100644 --- a/ingestion/Dockerfile_local +++ b/ingestion/Dockerfile_local @@ -1,7 +1,9 @@ FROM python:3.9-slim as base ENV AIRFLOW_HOME=/airflow RUN apt-get update && \ - apt-get install -y gcc libsasl2-modules libxml2 libsasl2-dev build-essential libssl-dev libffi-dev librdkafka-dev unixodbc-dev python3.9-dev openjdk-11-jre unixodbc freetds-dev freetds-bin tdsodbc libevent-dev wget openssl --no-install-recommends && \ + apt-get install -y build-essential freetds-bin freetds-dev gcc libevent-dev libffi-dev libpq-dev librdkafka-dev \ + libsasl2-dev libsasl2-modules libssl-dev libxml2 netcat openjdk-11-jre openssl postgresql postgresql-contrib \ + python3.9-dev tdsodbc unixodbc unixodbc-dev wget --no-install-recommends && \ rm -rf /var/lib/apt/lists/* # Manually fix security vulnerability from curl @@ -45,6 +47,12 @@ COPY ingestion /ingestion ARG INGESTION_DEPENDENCY=all RUN pip install --upgrade ".[${INGESTION_DEPENDENCY}]" +# Uninstalling psycopg2-binary and installing psycopg2 instead +# because the psycopg2-binary generates a architecture specific error +# while authrenticating connection with the airflow, psycopg2 solves this error +RUN pip uninstall psycopg2-binary -y +RUN pip install psycopg2 + RUN airflow db init RUN cp -r /ingestion/airflow.cfg /airflow/airflow.cfg RUN chmod 755 ingestion_dependency.sh diff --git a/ingestion/ingestion_dependency.sh b/ingestion/ingestion_dependency.sh index 01caca4f164..dcbc99c94b6 100755 --- a/ingestion/ingestion_dependency.sh +++ b/ingestion/ingestion_dependency.sh @@ -10,23 +10,22 @@ # See the License for the specific language governing permissions and # limitations under the License. -MYSQL_HOST=${MYSQL_HOST:-mysql} -MYSQL_PORT=${MYSQL_PORT:-3306} +DB_HOST=${DB_HOST:-mysql} +DB_PORT=${DB_PORT:-3306} -MYSQL_DB=${MYSQL_DB:-airflow_db} -MYSQL_USER=${MYSQL_USER:-airflow_user} -MYSQL_PASSWORD=${MYSQL_PASSWORD:-airflow_pass} +AIRFLOW_DB=${AIRFLOW_DB:-airflow_db} +DB_USER=${DB_USER:-airflow_user} +DB_SCHEME=${DB_SCHEME:-mysql+pymysql} +DB_PASSWORD=${DB_PASSWORD:-airflow_pass} -MYSQL_CONN="${MYSQL_USER}:${MYSQL_PASSWORD}@${MYSQL_HOST}:${MYSQL_PORT}/${MYSQL_DB}" +DB_CONN="${DB_SCHEME}://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${AIRFLOW_DB}" AIRFLOW_ADMIN_USER=${AIRFLOW_ADMIN_USER:-admin} AIRFLOW_ADMIN_PASSWORD=${AIRFLOW_ADMIN_PASSWORD:-admin} OPENMETADATA_SERVER=${OPENMETADATA_SERVER:-"http://openmetadata-server:8585"} -sed -i "s#\(sql_alchemy_conn = \).*#\1mysql+pymysql://${MYSQL_CONN}#" /airflow/airflow.cfg - -while ! wget -O /dev/null -o /dev/null $MYSQL_HOST:$MYSQL_PORT; do sleep 5; done +sed -i "s#\(sql_alchemy_conn = \).*#\1${DB_CONN}#" /airflow/airflow.cfg airflow db init