diff --git a/docker/development/docker-compose.yml b/docker/development/docker-compose.yml index b5509137a52..ceaef3cecec 100644 --- a/docker/development/docker-compose.yml +++ b/docker/development/docker-compose.yml @@ -492,7 +492,7 @@ services: DB_HOST: ${AIRFLOW_DB_HOST:-mysql} DB_PORT: ${AIRFLOW_DB_PORT:-3306} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} - DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql} + DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+mysqldb} DB_USER: ${AIRFLOW_DB_USER:-airflow_user} DB_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow_pass} diff --git a/docker/docker-compose-ingestion/docker-compose-ingestion.yml b/docker/docker-compose-ingestion/docker-compose-ingestion.yml index 072745d51b0..d20b7d6e11f 100644 --- a/docker/docker-compose-ingestion/docker-compose-ingestion.yml +++ b/docker/docker-compose-ingestion/docker-compose-ingestion.yml @@ -26,7 +26,7 @@ services: DB_HOST: ${AIRFLOW_DB_HOST:-mysql} DB_PORT: ${AIRFLOW_DB_PORT:-3306} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} - DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql} + DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+mysqldb} DB_USER: ${AIRFLOW_DB_USER:-airflow_user} DB_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow_pass} # extra connection-string properties for the database diff --git a/docker/docker-compose-quickstart/docker-compose.yml b/docker/docker-compose-quickstart/docker-compose.yml index ab8927baf7d..187e05dad99 100644 --- a/docker/docker-compose-quickstart/docker-compose.yml +++ b/docker/docker-compose-quickstart/docker-compose.yml @@ -497,7 +497,7 @@ services: DB_HOST: ${AIRFLOW_DB_HOST:-mysql} DB_PORT: ${AIRFLOW_DB_PORT:-3306} AIRFLOW_DB: ${AIRFLOW_DB:-airflow_db} - DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+pymysql} + DB_SCHEME: ${AIRFLOW_DB_SCHEME:-mysql+mysqldb} DB_USER: ${AIRFLOW_DB_USER:-airflow_user} DB_PASSWORD: ${AIRFLOW_DB_PASSWORD:-airflow_pass} # extra connection-string properties for the database diff --git a/ingestion/Dockerfile b/ingestion/Dockerfile index 493909bcaa8..ef4b874f8b5 100644 --- a/ingestion/Dockerfile +++ b/ingestion/Dockerfile @@ -1,6 +1,6 @@ FROM mysql:8.3 as mysql -FROM apache/airflow:2.9.1-python3.10 +FROM apache/airflow:2.9.3-python3.10 USER root RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list @@ -78,7 +78,7 @@ ENV PIP_NO_CACHE_DIR=1 ENV PIP_QUIET=1 ARG RI_VERSION="1.6.4.0" RUN pip install --upgrade pip -RUN pip install "openmetadata-managed-apis~=${RI_VERSION}" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.9.1/constraints-3.10.txt" +RUN pip install "openmetadata-managed-apis~=${RI_VERSION}" --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.10.txt" RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}" # Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593 diff --git a/ingestion/Dockerfile.ci b/ingestion/Dockerfile.ci index f8433f776ca..6e7c6e2a640 100644 --- a/ingestion/Dockerfile.ci +++ b/ingestion/Dockerfile.ci @@ -1,6 +1,6 @@ FROM mysql:8.3 as mysql -FROM apache/airflow:2.9.1-python3.10 +FROM apache/airflow:2.9.3-python3.10 USER root RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add - RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list @@ -73,7 +73,7 @@ COPY --chown=airflow:0 openmetadata-airflow-apis /home/airflow/openmetadata-airf COPY --chown=airflow:0 ingestion/examples/airflow/dags /opt/airflow/dags USER airflow -ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.1/constraints-3.10.txt" +ARG AIRFLOW_CONSTRAINTS_LOCATION="https://raw.githubusercontent.com/apache/airflow/constraints-2.9.3/constraints-3.10.txt" # Disable pip cache dir # https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching diff --git a/ingestion/ingestion_dependency.sh b/ingestion/ingestion_dependency.sh index ee54d6f6ac9..2b8372f852f 100755 --- a/ingestion/ingestion_dependency.sh +++ b/ingestion/ingestion_dependency.sh @@ -15,7 +15,7 @@ DB_PORT=${DB_PORT:-3306} AIRFLOW_DB=${AIRFLOW_DB:-airflow_db} DB_USER=${DB_USER:-airflow_user} -DB_SCHEME=${DB_SCHEME:-mysql+pymysql} +DB_SCHEME=${DB_SCHEME:-mysql+mysqldb} DB_PASSWORD=${DB_PASSWORD:-airflow_pass} DB_PROPERTIES=${DB_PROPERTIES:-""} diff --git a/ingestion/setup.py b/ingestion/setup.py index 7d81e0a592d..b35f1f7e340 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -19,7 +19,7 @@ from setuptools import setup # Add here versions required for multiple plugins VERSIONS = { - "airflow": "apache-airflow==2.9.1", + "airflow": "apache-airflow==2.9.3", "adlfs": "adlfs>=2023.1.0", "avro": "avro>=1.11.3,<1.12", "boto3": "boto3>=1.20,<2.0", # No need to add botocore separately. It's a dep from boto3 @@ -56,8 +56,13 @@ VERSIONS = { "elasticsearch8": "elasticsearch8~=8.9.0", "giturlparse": "giturlparse", "validators": "validators~=0.22.0", - "teradata": "teradatasqlalchemy>=20.0.0.0", - "collate-data-diff": "collate-data-diff<=0.11.3", + "teradata": "teradatasqlalchemy==20.0.0.2", + "cockroach": "sqlalchemy-cockroachdb~=2.0", + "cassandra": "cassandra-driver>=3.28.0", + "pydoris": "pydoris==1.0.2", + "pyiceberg": "pyiceberg==0.5.1", + "google-cloud-bigtable": "google-cloud-bigtable>=2.0.0", + "pyathena": "pyathena~=3.0", } COMMONS = { @@ -95,9 +100,9 @@ COMMONS = { } DATA_DIFF = { - driver: f"collate-data-diff[{driver}]<=0.11.3" + driver: f"collate-data-diff[{driver}]" # data-diff uses different drivers out-of-the-box than OpenMetadata - # the exrtas are described here: + # the extras are described here: # https://github.com/open-metadata/collate-data-diff/blob/main/pyproject.toml#L68 # install all data diffs with "pip install collate-data-diff[all-dbs]" for driver in [ @@ -142,8 +147,13 @@ base_requirements = { "tabulate==0.9.0", "typing-inspect", "packaging", # For version parsing + "setuptools~=70.0", "shapely", - VERSIONS["collate-data-diff"], + "collate-data-diff", + # TODO: Remove one once we have updated datadiff version + "snowflake-connector-python>=3.13.1,<4.0.0", + "mysql-connector-python>=8.0.29;python_version<'3.9'", + "mysql-connector-python>=9.1;python_version>='3.9'", } plugins: Dict[str, Set[str]] = { @@ -307,7 +317,7 @@ plugins: Dict[str, Set[str]] = { VERSIONS["geoalchemy2"], }, "sagemaker": {VERSIONS["boto3"]}, - "salesforce": {"simple_salesforce~=1.11"}, + "salesforce": {"simple_salesforce~=1.11", "authlib>=1.3.1"}, "sample-data": {VERSIONS["avro"], VERSIONS["grpc-tools"]}, "sap-hana": {"hdbcli", "sqlalchemy-hana"}, "sas": {}, diff --git a/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md b/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md index f29fc2e2002..3852522895b 100644 --- a/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md +++ b/openmetadata-docs/content/partials/v1.6/deployment/upgrade/upgrade-prerequisites.md @@ -86,6 +86,18 @@ After the migration is finished, you can revert this changes. # Backward Incompatible Changes +## 1.6.4 + +### Airflow 2.9.3 + +We are upgrading the Ingestion Airflow version to 2.9.3. + +The upgrade from the existing 2.9.1 -> 2.9.3 should happen transparently. The only thing to note is that there's +an ongoing issue with Airflow migrations and the `pymysql` driver, which we used before. If you are specifying +on your end the `DB_SCHEME` environment variable in the ingestion image, make sure it now is set to `mysql+mysqldb`. + +We have updated the default values accordingly. + ## 1.6.2 ### Executable Logical Test Suites