From 0e92a975e3cb8c3595df6cc07686500a23ba77ef Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Tue, 19 Dec 2023 11:09:38 +0100 Subject: [PATCH] #14425 - Create ingestion-base-slim image (#14426) * #14425 - Create ingestion-base-slim image * Format * Bump airflow * Bump constraints --- ...ocker-openmetadata-ingestion-base-slim.yml | 83 +++++++++++++++++++ ingestion/Dockerfile | 6 +- ingestion/Dockerfile.ci | 6 +- ingestion/operators/docker/Dockerfile | 2 +- ingestion/operators/docker/Dockerfile-dev | 2 +- ingestion/setup.py | 31 ++++--- 6 files changed, 111 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/docker-openmetadata-ingestion-base-slim.yml diff --git a/.github/workflows/docker-openmetadata-ingestion-base-slim.yml b/.github/workflows/docker-openmetadata-ingestion-base-slim.yml new file mode 100644 index 00000000000..39f912bebdd --- /dev/null +++ b/.github/workflows/docker-openmetadata-ingestion-base-slim.yml @@ -0,0 +1,83 @@ +# Copyright 2021 Collate +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: docker-openmetadata-ingestion-base-slim docker +on: + workflow_dispatch: + inputs: + tag: + description: "Input tag" + required: true + push_latest_tag_to_release: + description: "Do you want to update docker image latest tag as well ?" + type: boolean + release: + types: [published] + +jobs: + push_to_docker_hub: + runs-on: ubuntu-latest + env: + input: ${{ github.event.inputs.tag }} + + steps: + - name: Check trigger type + if: ${{ env.input == '' }} + run: echo "input=1.3.0-SNAPSHOT" >> $GITHUB_ENV + + - name: Check out the Repo + uses: actions/checkout@v3 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_OPENMETADATA_USERNAME }} + password: ${{ secrets.DOCKERHUB_OPENMETADATA_TOKEN }} + + - name: Install Ubuntu dependencies + run: | + sudo apt-get install -y python3-venv + + - name: Install open-metadata dependencies + run: | + python3 -m venv env + source env/bin/activate + pip install --upgrade pip + sudo make install_antlr_cli + make install_dev generate + + - name: Process Docker Tags + id: input_check + run: | + if ${{ github.event_name == 'release' }}; then + echo "tags=openmetadata/ingestion-base-slim:${{ env.input }},openmetadata/ingestion-base-slim:latest" >> $GITHUB_OUTPUT + elif ${{ github.event_name == 'workflow_dispatch' && inputs.push_latest_tag_to_release }}; then + echo "tags=openmetadata/ingestion-base-slim:${{inputs.tag}},openmetadata/ingestion-base-slim:latest" >> $GITHUB_OUTPUT + else + echo "tags=openmetadata/ingestion-base-slim:${{ inputs.tag }}" >> $GITHUB_OUTPUT + fi + + - name: Build and push if event is workflow_dispatch and input is checked + uses: docker/build-push-action@v3 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name == 'release' || github.event_name == 'workflow_dispatch' }} + tags: ${{ steps.input_check.outputs.tags }} + file: ./ingestion/operators/docker/Dockerfile + build-args: | + INGESTION_DEPENDENCY=slim \ No newline at end of file diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index a24608c3277..b6d9e8d609b 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -1,4 +1,4 @@ -FROM apache/airflow:2.6.3-python3.10 +FROM apache/airflow:2.7.3-python3.10 USER root RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list @@ -83,7 +83,7 @@ ENV PIP_NO_CACHE_DIR=1 ENV PIP_QUIET=1 ARG RI_VERSION="1.3.0.0.dev0" RUN pip install --upgrade pip -RUN pip install "openmetadata-managed-apis~=${RI_VERSION}" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt" +RUN pip install "openmetadata-managed-apis~=${RI_VERSION}" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt" RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}" # Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593 @@ -99,7 +99,7 @@ RUN pip install "python-daemon>=3.0.0" RUN pip freeze | grep "apache-airflow-providers" | grep -v "docker\|http" | xargs pip uninstall -y # Uninstalling psycopg2-binary and installing psycopg2 instead # because the psycopg2-binary generates a architecture specific error -# while authrenticating connection with the airflow, psycopg2 solves this error +# while authenticating connection with the airflow, psycopg2 solves this error RUN pip uninstall psycopg2-binary -y RUN pip install psycopg2 mysqlclient==2.1.1 # Make required folders for openmetadata-airflow-apis diff --git a/ingestion/Dockerfile.ci b/ingestion/Dockerfile.ci index 6e8eafca034..a4ac018505b 100644 --- a/ingestion/Dockerfile.ci +++ b/ingestion/Dockerfile.ci @@ -1,4 +1,4 @@ -FROM apache/airflow:2.6.3-python3.10 +FROM apache/airflow:2.7.3-python3.10 USER root RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list @@ -76,7 +76,7 @@ COPY --chown=airflow:0 openmetadata-airflow-apis /home/airflow/openmetadata-airf COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags USER airflow -ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.6.3/constraints-3.9.txt" +ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.7.3/constraints-3.10.txt" # Disable pip cache dir # https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching @@ -110,7 +110,7 @@ RUN pip freeze | grep "apache-airflow-providers" | grep -v "docker\|http" | xarg # Uninstalling psycopg2-binary and installing psycopg2 instead # because the psycopg2-binary generates a architecture specific error -# while authrenticating connection with the airflow, psycopg2 solves this error +# while authenticating connection with the airflow, psycopg2 solves this error RUN pip uninstall psycopg2-binary -y RUN pip install psycopg2 mysqlclient==2.1.1 # Make required folders for openmetadata-airflow-apis diff --git a/ingestion/operators/docker/Dockerfile b/ingestion/operators/docker/Dockerfile index 2fca6c00304..2221394cecf 100644 --- a/ingestion/operators/docker/Dockerfile +++ b/ingestion/operators/docker/Dockerfile @@ -101,6 +101,6 @@ RUN if [[ $(uname -m) == "arm64" ]]; \ # Uninstalling psycopg2-binary and installing psycopg2 instead # because the psycopg2-binary generates a architecture specific error -# while authrenticating connection with the airflow, psycopg2 solves this error +# while authenticating connection with the airflow, psycopg2 solves this error RUN pip uninstall psycopg2-binary -y RUN pip install psycopg2 mysqlclient==2.1.1 diff --git a/ingestion/operators/docker/Dockerfile-dev b/ingestion/operators/docker/Dockerfile-dev index e25fb50ab75..b5483a9f739 100644 --- a/ingestion/operators/docker/Dockerfile-dev +++ b/ingestion/operators/docker/Dockerfile-dev @@ -103,6 +103,6 @@ RUN if [[ $(uname -m) == "arm64" ]]; \ # Uninstalling psycopg2-binary and installing psycopg2 instead # because the psycopg2-binary generates a architecture specific error -# while authrenticating connection with the airflow, psycopg2 solves this error +# while authenticating connection with the airflow, psycopg2 solves this error RUN pip uninstall psycopg2-binary -y RUN pip install psycopg2 mysqlclient==2.1.1 diff --git a/ingestion/setup.py b/ingestion/setup.py index efc5bb14109..b2a80fff4ee 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -13,13 +13,13 @@ Python Dependencies """ -from typing import Dict, Set +from typing import Dict, List, Set from setuptools import setup # Add here versions required for multiple plugins VERSIONS = { - "airflow": "apache-airflow==2.6.3", + "airflow": "apache-airflow==2.7.3", "avro": "avro~=1.11", "boto3": "boto3>=1.20,<2.0", # No need to add botocore separately. It's a dep from boto3 "geoalchemy2": "GeoAlchemy2~=0.12", @@ -29,7 +29,7 @@ VERSIONS = { "msal": "msal~=1.2", "neo4j": "neo4j~=5.3.0", "pandas": "pandas<=2,<3", - "pyarrow": "pyarrow~=10.0", + "pyarrow": "pyarrow~=14.0", "pydomo": "pydomo~=0.3", "pymysql": "pymysql>=1.0.2", "pyodbc": "pyodbc>=4.0.35,<5", @@ -304,6 +304,20 @@ e2e_test = { "pytest-base-url", } + +def filter_requirements(filtered: Set[str]) -> List[str]: + """Filter out requirements from base_requirements""" + return list( + base_requirements.union( + *[ + requirements + for plugin, requirements in plugins.items() + if plugin not in filtered + ] + ) + ) + + setup( install_requires=list(base_requirements), extras_require={ @@ -313,14 +327,9 @@ setup( "e2e_test": list(e2e_test), "data-insight": list(plugins["elasticsearch"]), **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()}, - "all": list( - base_requirements.union( - *[ - requirements - for plugin, requirements in plugins.items() - if plugin not in {"airflow", "db2", "great-expectations"} - ] - ) + "all": filter_requirements({"airflow", "db2", "great-expectations"}), + "slim": filter_requirements( + {"airflow", "db2", "great-expectations", "deltalake", "sklearn"} ), }, )