| 
									
										
										
										
											2025-06-16 08:03:38 +02:00
										 |  |  | # https://github.com/open-metadata/OpenMetadata/actions/runs/15640676139/job/44066998708?pr=21719  Copyright 2025 Collate | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  Licensed under the Collate Community License, Version 1.0 (the "License"); | 
					
						
							| 
									
										
										
										
											2021-12-01 12:46:28 +05:30
										 |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2025-04-03 10:39:47 +05:30
										 |  |  | #  https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE | 
					
						
							| 
									
										
										
										
											2021-08-02 15:08:30 +05:30
										 |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-13 16:07:29 +05:30
										 |  |  | """
 | 
					
						
							|  |  |  | Python Dependencies | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-24 16:08:38 +02:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  | from typing import Dict, List, Set | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  | from setuptools import setup | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | # Add here versions required for multiple plugins | 
					
						
							|  |  |  | VERSIONS = { | 
					
						
							| 
									
										
										
										
											2025-02-20 17:11:38 +01:00
										 |  |  |     "airflow": "apache-airflow==2.10.5", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "adlfs": "adlfs>=2023.1.0", | 
					
						
							| 
									
										
										
										
											2024-04-18 17:41:09 +02:00
										 |  |  |     "avro": "avro>=1.11.3,<1.12", | 
					
						
							| 
									
										
										
										
											2023-01-18 08:20:40 +01:00
										 |  |  |     "boto3": "boto3>=1.20,<2.0",  # No need to add botocore separately. It's a dep from boto3 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "geoalchemy2": "GeoAlchemy2~=0.12", | 
					
						
							| 
									
										
										
										
											2024-07-10 08:03:28 -04:00
										 |  |  |     "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0", | 
					
						
							| 
									
										
										
										
											2025-01-17 06:34:34 +01:00
										 |  |  |     "google-cloud-storage": "google-cloud-storage>=1.43.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "gcsfs": "gcsfs>=2023.1.0", | 
					
						
							| 
									
										
										
										
											2025-04-24 11:55:04 +02:00
										 |  |  |     "great-expectations": "great-expectations~=0.18.0", | 
					
						
							|  |  |  |     "great-expectations-1xx": "great-expectations~=1.0", | 
					
						
							| 
									
										
										
										
											2023-02-20 13:37:27 +01:00
										 |  |  |     "grpc-tools": "grpcio-tools>=1.47.2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "msal": "msal~=1.2", | 
					
						
							| 
									
										
										
										
											2025-04-09 11:05:44 +02:00
										 |  |  |     "neo4j": "neo4j~=5.3", | 
					
						
							| 
									
										
										
										
											2024-02-24 18:42:22 +01:00
										 |  |  |     "pandas": "pandas~=2.0.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pyarrow": "pyarrow~=16.0", | 
					
						
							| 
									
										
										
										
											2024-11-13 10:14:06 +01:00
										 |  |  |     "pydantic": "pydantic~=2.0,>=2.7.0", | 
					
						
							| 
									
										
										
										
											2025-03-31 12:06:33 +02:00
										 |  |  |     "pydantic-settings": "pydantic-settings~=2.0,>=2.7.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "pydomo": "pydomo~=0.3", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pymysql": "pymysql~=1.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "pyodbc": "pyodbc>=4.0.35,<5", | 
					
						
							| 
									
										
										
										
											2024-06-18 17:03:35 +05:30
										 |  |  |     "numpy": "numpy<2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "scikit-learn": "scikit-learn~=1.0",  # Python 3.7 only goes up to 1.0.2 | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "packaging": "packaging", | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |     "azure-storage-blob": "azure-storage-blob~=12.14", | 
					
						
							|  |  |  |     "azure-identity": "azure-identity~=1.12", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "sqlalchemy-databricks": "sqlalchemy-databricks~=0.1", | 
					
						
							| 
									
										
										
										
											2025-06-06 00:44:25 +03:00
										 |  |  |     "databricks-sdk": "databricks-sdk~=0.20.0", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "trino": "trino[sqlalchemy]", | 
					
						
							| 
									
										
										
										
											2024-09-12 11:42:53 +02:00
										 |  |  |     "spacy": "spacy<3.8", | 
					
						
							| 
									
										
										
										
											2024-10-11 20:47:43 +02:00
										 |  |  |     "looker-sdk": "looker-sdk>=22.20.0,!=24.18.0", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "lkml": "lkml~=1.3", | 
					
						
							| 
									
										
										
										
											2025-05-15 17:48:39 +05:30
										 |  |  |     "tableau": "tableauserverclient==0.25",  # higher versions require urllib3>2.0 which conflicts other libs | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |     "pyhive": "pyhive[hive_pure_sasl]~=0.7", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "mongo": "pymongo~=4.3", | 
					
						
							|  |  |  |     "redshift": "sqlalchemy-redshift==0.8.12", | 
					
						
							|  |  |  |     "snowflake": "snowflake-sqlalchemy~=1.4", | 
					
						
							|  |  |  |     "elasticsearch8": "elasticsearch8~=8.9.0", | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |     "giturlparse": "giturlparse", | 
					
						
							| 
									
										
										
										
											2024-01-08 11:03:05 +05:30
										 |  |  |     "validators": "validators~=0.22.0", | 
					
						
							| 
									
										
										
										
											2025-02-05 20:30:17 +05:30
										 |  |  |     "teradata": "teradatasqlalchemy==20.0.0.2", | 
					
						
							| 
									
										
										
										
											2025-01-02 13:07:55 +05:30
										 |  |  |     "cockroach": "sqlalchemy-cockroachdb~=2.0", | 
					
						
							| 
									
										
										
										
											2024-12-12 15:12:55 +05:30
										 |  |  |     "cassandra": "cassandra-driver>=3.28.0", | 
					
						
							| 
									
										
										
										
											2025-03-18 06:15:25 -07:00
										 |  |  |     "opensearch": "opensearch-py~=2.4.0", | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |     "pydoris": "pydoris==1.0.2", | 
					
						
							|  |  |  |     "pyiceberg": "pyiceberg==0.5.1", | 
					
						
							|  |  |  |     "google-cloud-bigtable": "google-cloud-bigtable>=2.0.0", | 
					
						
							|  |  |  |     "pyathena": "pyathena~=3.0", | 
					
						
							| 
									
										
										
										
											2025-04-16 11:34:26 +05:30
										 |  |  |     "sqlalchemy-bigquery": "sqlalchemy-bigquery>=1.2.2", | 
					
						
							| 
									
										
										
										
											2025-05-20 15:32:21 +02:00
										 |  |  |     "presidio-analyzer": "presidio-analyzer==2.2.358", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | COMMONS = { | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |     "datalake": { | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |         VERSIONS["avro"], | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["pandas"], | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         # python-snappy does not work well on 3.11 https://github.com/aio-libs/aiokafka/discussions/931 | 
					
						
							|  |  |  |         # Using this as an alternative | 
					
						
							|  |  |  |         "cramjam~=2.7", | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "hive": { | 
					
						
							|  |  |  |         "presto-types-parser>=0.0.2", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["pyhive"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     }, | 
					
						
							|  |  |  |     "kafka": { | 
					
						
							| 
									
										
										
										
											2023-03-15 15:15:57 +01:00
										 |  |  |         VERSIONS["avro"], | 
					
						
							| 
									
										
										
										
											2025-01-08 14:42:24 +05:30
										 |  |  |         "confluent_kafka>=2.1.1,<=2.6.1", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "fastavro>=1.2.0", | 
					
						
							|  |  |  |         # Due to https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925 | 
					
						
							| 
									
										
										
										
											2023-02-20 13:37:27 +01:00
										 |  |  |         # use >= v1.47.2 https://github.com/grpc/grpc/blob/v1.47.2/tools/distrib/python/grpcio_tools/grpc_version.py#L17 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS[ | 
					
						
							|  |  |  |             "grpc-tools" | 
					
						
							|  |  |  |         ],  # grpcio-tools already depends on grpcio. No need to add separately | 
					
						
							|  |  |  |         "protobuf", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "postgres": { | 
					
						
							|  |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         "psycopg2-binary", | 
					
						
							|  |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							|  |  |  |         VERSIONS["packaging"], | 
					
						
							|  |  |  |     },  # Adding as Postgres SQL & GreenPlum are using common packages. | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  | DATA_DIFF = { | 
					
						
							|  |  |  |     driver: f"collate-data-diff[{driver}]" | 
					
						
							|  |  |  |     # data-diff uses different drivers out-of-the-box than OpenMetadata | 
					
						
							| 
									
										
										
										
											2025-02-18 07:56:46 +01:00
										 |  |  |     # the extras are described here: | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     # https://github.com/open-metadata/collate-data-diff/blob/main/pyproject.toml#L68 | 
					
						
							|  |  |  |     # install all data diffs with "pip install collate-data-diff[all-dbs]" | 
					
						
							|  |  |  |     for driver in [ | 
					
						
							|  |  |  |         "clickhouse", | 
					
						
							|  |  |  |         # "duckdb", # Not supported by OpenMetadata | 
					
						
							|  |  |  |         "mssql", | 
					
						
							|  |  |  |         "mysql", | 
					
						
							|  |  |  |         "oracle", | 
					
						
							|  |  |  |         # "postgresql", we dont use this as it installs psycopg2 which interferes with psycopg2-binary | 
					
						
							|  |  |  |         "presto", | 
					
						
							|  |  |  |         "redshift", | 
					
						
							|  |  |  |         "snowflake", | 
					
						
							|  |  |  |         "trino", | 
					
						
							|  |  |  |         "vertica", | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | base_requirements = { | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "antlr4-python3-runtime==4.9.2", | 
					
						
							| 
									
										
										
										
											2024-02-20 07:18:35 +01:00
										 |  |  |     VERSIONS["azure-identity"], | 
					
						
							|  |  |  |     "azure-keyvault-secrets",  # Azure Key Vault SM | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["boto3"],  # Required in base for the secrets manager | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     "cached-property==1.5.2",  # LineageParser | 
					
						
							|  |  |  |     "chardet==4.0.0",  # Used in the profiler | 
					
						
							| 
									
										
										
										
											2024-04-18 17:41:09 +02:00
										 |  |  |     "cryptography>=42.0.0", | 
					
						
							| 
									
										
										
										
											2025-06-06 00:44:25 +03:00
										 |  |  |     "google-cloud-secret-manager==2.22.1", | 
					
						
							| 
									
										
										
										
											2024-06-29 13:09:02 +09:00
										 |  |  |     "google-crc32c", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "email-validator>=2.0",  # For the pydantic generated models for Email | 
					
						
							| 
									
										
										
										
											2023-09-15 12:03:47 +05:30
										 |  |  |     "importlib-metadata>=4.13.0",  # From airflow constraints | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "Jinja2>=2.11.3", | 
					
						
							| 
									
										
										
										
											2024-01-17 00:17:11 -06:00
										 |  |  |     "jsonpatch<2.0, >=1.24", | 
					
						
							| 
									
										
										
										
											2025-07-24 03:40:51 -07:00
										 |  |  |     "kubernetes>=21.0.0",  # Kubernetes client for secrets manager | 
					
						
							| 
									
										
										
										
											2023-06-19 12:09:09 +02:00
										 |  |  |     "memory-profiler", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "mypy_extensions>=0.4.3", | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |     VERSIONS["pydantic"], | 
					
						
							| 
									
										
										
										
											2025-03-31 12:06:33 +02:00
										 |  |  |     VERSIONS["pydantic-settings"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["pymysql"], | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  |     "python-dateutil>=2.8.1", | 
					
						
							| 
									
										
										
										
											2025-06-19 17:57:10 +05:30
										 |  |  |     "python-dotenv>=0.19.0",  # For environment variable support in dbt ingestion | 
					
						
							| 
									
										
										
										
											2023-09-27 11:49:21 +02:00
										 |  |  |     "PyYAML~=6.0", | 
					
						
							| 
									
										
										
										
											2024-05-22 17:12:00 +02:00
										 |  |  |     "requests>=2.23", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "requests-aws4auth~=1.1",  # Only depends on requests as external package. Leaving as base. | 
					
						
							| 
									
										
										
										
											2023-01-27 15:26:30 +01:00
										 |  |  |     "sqlalchemy>=1.4.0,<2", | 
					
						
							| 
									
										
										
										
											2025-01-06 11:39:34 +05:30
										 |  |  |     "collate-sqllineage~=1.6.0", | 
					
						
							| 
									
										
										
										
											2023-03-24 17:59:06 +01:00
										 |  |  |     "tabulate==0.9.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "typing-inspect", | 
					
						
							| 
									
										
										
										
											2024-05-17 07:56:07 +02:00
										 |  |  |     "packaging",  # For version parsing | 
					
						
							| 
									
										
										
										
											2025-02-18 07:56:46 +01:00
										 |  |  |     "setuptools~=70.0", | 
					
						
							| 
									
										
										
										
											2024-11-06 11:17:13 +01:00
										 |  |  |     "shapely", | 
					
						
							| 
									
										
										
										
											2025-06-27 07:58:48 +02:00
										 |  |  |     "collate-data-diff>=0.11.6", | 
					
						
							| 
									
										
										
										
											2025-06-23 13:55:43 +05:30
										 |  |  |     "jaraco.functools<4.2.0",  # above 4.2 breaks the build | 
					
						
							| 
									
										
										
										
											2025-02-18 07:56:46 +01:00
										 |  |  |     # TODO: Remove one once we have updated datadiff version | 
					
						
							|  |  |  |     "snowflake-connector-python>=3.13.1,<4.0.0", | 
					
						
							|  |  |  |     "mysql-connector-python>=8.0.29;python_version<'3.9'", | 
					
						
							|  |  |  |     "mysql-connector-python>=9.1;python_version>='3.9'", | 
					
						
							| 
									
										
										
										
											2021-09-19 13:59:14 +05:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | plugins: Dict[str, Set[str]] = { | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |     "airflow": { | 
					
						
							| 
									
										
										
										
											2024-11-06 19:28:48 +05:30
										 |  |  |         "opentelemetry-exporter-otlp==1.27.0", | 
					
						
							| 
									
										
										
										
											2024-11-06 15:51:43 +05:30
										 |  |  |         "protobuf<5", | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |         "attrs", | 
					
						
							| 
									
										
										
										
											2024-11-06 19:28:48 +05:30
										 |  |  |         VERSIONS["airflow"], | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |     },  # Same as ingestion container. For development. | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "amundsen": {VERSIONS["neo4j"]}, | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |     "athena": {VERSIONS["pyathena"]}, | 
					
						
							| 
									
										
										
										
											2022-03-01 11:50:14 +05:30
										 |  |  |     "atlas": {}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "azuresql": {VERSIONS["pyodbc"]}, | 
					
						
							|  |  |  |     "azure-sso": {VERSIONS["msal"]}, | 
					
						
							| 
									
										
										
										
											2024-02-20 07:18:35 +01:00
										 |  |  |     "backup": {VERSIONS["boto3"], VERSIONS["azure-identity"], "azure-storage-blob"}, | 
					
						
							| 
									
										
										
										
											2022-02-07 00:06:10 +05:30
										 |  |  |     "bigquery": { | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "cachetools", | 
					
						
							| 
									
										
										
										
											2023-07-31 18:14:25 +05:30
										 |  |  |         "google-cloud-datacatalog>=3.6.2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "google-cloud-logging", | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							| 
									
										
										
										
											2024-06-18 17:03:35 +05:30
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "sqlalchemy-bigquery>=1.2.2", | 
					
						
							| 
									
										
										
										
											2022-02-07 00:06:10 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |     "bigtable": { | 
					
						
							|  |  |  |         VERSIONS["google-cloud-bigtable"], | 
					
						
							|  |  |  |         VERSIONS["pandas"], | 
					
						
							|  |  |  |         VERSIONS["numpy"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "clickhouse": { | 
					
						
							|  |  |  |         "clickhouse-driver~=0.2", | 
					
						
							| 
									
										
										
										
											2025-04-24 16:08:38 +02:00
										 |  |  |         "clickhouse-sqlalchemy~=0.2.0", | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |         DATA_DIFF["clickhouse"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "dagster": { | 
					
						
							| 
									
										
										
										
											2024-07-27 18:08:42 +05:30
										 |  |  |         "croniter<3", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         "psycopg2-binary", | 
					
						
							|  |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							| 
									
										
										
										
											2025-04-24 16:08:38 +02:00
										 |  |  |         "dagster_graphql>=1.8.0", | 
					
						
							| 
									
										
										
										
											2022-11-17 10:11:54 +01:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-30 01:17:39 +05:30
										 |  |  |     "dbt": { | 
					
						
							|  |  |  |         "google-cloud", | 
					
						
							|  |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							| 
									
										
										
										
											2025-02-04 11:57:39 +05:30
										 |  |  |         "collate-dbt-artifacts-parser", | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							| 
									
										
										
										
											2023-01-30 01:17:39 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-06-09 19:52:35 +05:30
										 |  |  |     "db2": {"ibm-db-sa~=0.4.1", "ibm-db>=3.2.6"}, | 
					
						
							| 
									
										
										
										
											2024-01-11 12:35:52 +05:30
										 |  |  |     "db2-ibmi": {"sqlalchemy-ibmi~=0.9.3"}, | 
					
						
							| 
									
										
										
										
											2024-04-15 15:37:07 +05:30
										 |  |  |     "databricks": { | 
					
						
							|  |  |  |         VERSIONS["sqlalchemy-databricks"], | 
					
						
							|  |  |  |         VERSIONS["databricks-sdk"], | 
					
						
							|  |  |  |         "ndg-httpsclient~=0.5.1", | 
					
						
							|  |  |  |         "pyOpenSSL~=24.1.0", | 
					
						
							|  |  |  |         "pyasn1~=0.6.0", | 
					
						
							| 
									
										
										
										
											2024-07-09 12:59:23 +05:30
										 |  |  |         # databricks has a dependency on pyhive for metadata as well as profiler | 
					
						
							|  |  |  |         VERSIONS["pyhive"], | 
					
						
							| 
									
										
										
										
											2024-04-15 15:37:07 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-azure": { | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         VERSIONS["adlfs"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2022-06-15 12:27:21 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-gcs": { | 
					
						
							| 
									
										
										
										
											2024-07-10 08:03:28 -04:00
										 |  |  |         VERSIONS["google-cloud-monitoring"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |         VERSIONS["gcsfs"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-s3": { | 
					
						
							|  |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-09-19 08:25:19 +02:00
										 |  |  |     "deltalake": { | 
					
						
							| 
									
										
										
										
											2025-06-25 11:45:01 +02:00
										 |  |  |         "delta-spark>=3.0.0,<4.0.0", | 
					
						
							|  |  |  |         "deltalake>=0.19.0,<0.20", | 
					
						
							|  |  |  |         "pyspark==3.5.6", | 
					
						
							| 
									
										
										
										
											2024-09-19 08:25:19 +02:00
										 |  |  |     },  # TODO: remove pinning to under 0.20 after https://github.com/open-metadata/OpenMetadata/issues/17909 | 
					
						
							| 
									
										
										
										
											2025-06-25 11:45:01 +02:00
										 |  |  |     "deltalake-storage": {"deltalake>=0.19.0,<0.20"}, | 
					
						
							|  |  |  |     "deltalake-spark": {"delta-spark>=3.0.0,<4.0.0", "pyspark==3.5.6"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "domo": {VERSIONS["pydomo"]}, | 
					
						
							| 
									
										
										
										
											2023-11-28 16:27:52 +08:00
										 |  |  |     "doris": {"pydoris==1.0.2"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "druid": {"pydruid>=0.6.5"}, | 
					
						
							|  |  |  |     "dynamodb": {VERSIONS["boto3"]}, | 
					
						
							|  |  |  |     "elasticsearch": { | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["elasticsearch8"], | 
					
						
							| 
									
										
										
										
											2025-03-18 06:15:25 -07:00
										 |  |  |         "httpx>=0.23.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     },  # also requires requests-aws4auth which is in base | 
					
						
							| 
									
										
										
										
											2025-03-18 06:15:25 -07:00
										 |  |  |     "opensearch": {VERSIONS["opensearch"]}, | 
					
						
							| 
									
										
										
										
											2025-08-06 20:19:38 +02:00
										 |  |  |     "exasol": { | 
					
						
							|  |  |  |         "sqlalchemy_exasol>=5,<6", | 
					
						
							|  |  |  |         "exasol-integration-test-docker-environment>=3.1.0,<4", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "glue": {VERSIONS["boto3"]}, | 
					
						
							|  |  |  |     "great-expectations": {VERSIONS["great-expectations"]}, | 
					
						
							| 
									
										
										
										
											2025-04-24 11:55:04 +02:00
										 |  |  |     "great-expectations-1xx": {VERSIONS["great-expectations-1xx"]}, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "greenplum": {*COMMONS["postgres"]}, | 
					
						
							| 
									
										
										
										
											2025-01-02 13:07:55 +05:30
										 |  |  |     "cockroach": { | 
					
						
							|  |  |  |         VERSIONS["cockroach"], | 
					
						
							|  |  |  |         "psycopg2-binary", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "hive": { | 
					
						
							|  |  |  |         *COMMONS["hive"], | 
					
						
							|  |  |  |         "thrift>=0.13,<1", | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         # Replacing sasl with pure-sasl based on https://github.com/cloudera/python-sasl/issues/30 for py 3.11 | 
					
						
							|  |  |  |         "pure-sasl", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "thrift-sasl~=0.4", | 
					
						
							| 
									
										
										
										
											2023-02-22 16:54:56 +05:30
										 |  |  |         "impyla~=0.18.0", | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |     "iceberg": { | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |         VERSIONS["pyiceberg"], | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |         # Forcing the version of a few packages so it plays nicely with other requirements. | 
					
						
							|  |  |  |         VERSIONS["pydantic"], | 
					
						
							|  |  |  |         VERSIONS["adlfs"], | 
					
						
							|  |  |  |         VERSIONS["gcsfs"], | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-04-21 02:57:13 -05:00
										 |  |  |     "impala": { | 
					
						
							|  |  |  |         "presto-types-parser>=0.0.2", | 
					
						
							|  |  |  |         "impyla[kerberos]~=0.18.0", | 
					
						
							|  |  |  |         "thrift>=0.13,<1", | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         "pure-sasl", | 
					
						
							| 
									
										
										
										
											2023-04-21 02:57:13 -05:00
										 |  |  |         "thrift-sasl~=0.4", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "kafka": {*COMMONS["kafka"]}, | 
					
						
							| 
									
										
										
										
											2024-05-10 14:29:45 +05:30
										 |  |  |     "kafkaconnect": {"kafka-connect-py==0.10.11"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "kinesis": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2023-10-04 20:16:21 +07:00
										 |  |  |     "looker": { | 
					
						
							|  |  |  |         VERSIONS["looker-sdk"], | 
					
						
							|  |  |  |         VERSIONS["lkml"], | 
					
						
							|  |  |  |         "gitpython~=3.1.34", | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |         VERSIONS["giturlparse"], | 
					
						
							| 
									
										
										
										
											2024-09-27 07:55:15 -04:00
										 |  |  |         "python-liquid", | 
					
						
							| 
									
										
										
										
											2023-10-04 20:16:21 +07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-06-06 00:44:25 +03:00
										 |  |  |     "mlflow": {"mlflow-skinny~=2.22.0"}, | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |     "mongo": {VERSIONS["mongo"], VERSIONS["pandas"], VERSIONS["numpy"]}, | 
					
						
							| 
									
										
										
										
											2024-12-12 15:12:55 +05:30
										 |  |  |     "cassandra": {VERSIONS["cassandra"]}, | 
					
						
							| 
									
										
										
										
											2023-08-29 11:16:32 +05:30
										 |  |  |     "couchbase": {"couchbase~=4.1"}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "mssql": { | 
					
						
							|  |  |  |         "sqlalchemy-pytds~=0.3", | 
					
						
							|  |  |  |         DATA_DIFF["mssql"], | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "mssql-odbc": { | 
					
						
							|  |  |  |         VERSIONS["pyodbc"], | 
					
						
							|  |  |  |         DATA_DIFF["mssql"], | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "mysql": { | 
					
						
							|  |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         DATA_DIFF["mysql"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "nifi": {},  # uses requests | 
					
						
							| 
									
										
										
										
											2024-03-12 08:39:25 +01:00
										 |  |  |     "openlineage": {*COMMONS["kafka"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "oracle": {"cx_Oracle>=8.3.0,<9", "oracledb~=1.2", DATA_DIFF["oracle"]}, | 
					
						
							| 
									
										
										
										
											2023-07-05 16:18:59 +09:00
										 |  |  |     "pgspider": {"psycopg2-binary", "sqlalchemy-pgspider"}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pinotdb": {"pinotdb~=5.0"}, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "postgres": {*COMMONS["postgres"]}, | 
					
						
							| 
									
										
										
										
											2024-04-29 14:55:06 +05:30
										 |  |  |     "powerbi": { | 
					
						
							|  |  |  |         VERSIONS["msal"], | 
					
						
							|  |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							|  |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-08-11 12:28:05 +05:30
										 |  |  |     "qliksense": {"websocket-client~=1.6.1"}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "presto": {*COMMONS["hive"], DATA_DIFF["presto"]}, | 
					
						
							| 
									
										
										
										
											2023-10-10 16:21:52 +05:30
										 |  |  |     "pymssql": {"pymssql~=2.2.0"}, | 
					
						
							| 
									
										
										
										
											2023-01-31 20:47:40 +05:30
										 |  |  |     "quicksight": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2023-05-15 11:48:03 +05:30
										 |  |  |     "redash": {VERSIONS["packaging"]}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "redpanda": {*COMMONS["kafka"]}, | 
					
						
							|  |  |  |     "redshift": { | 
					
						
							| 
									
										
										
										
											2023-06-19 12:09:09 +02:00
										 |  |  |         # Going higher has memory and performance issues | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["redshift"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         "psycopg2-binary", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "sagemaker": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2025-02-18 07:56:46 +01:00
										 |  |  |     "salesforce": {"simple_salesforce~=1.11", "authlib>=1.3.1"}, | 
					
						
							| 
									
										
										
										
											2025-04-16 11:34:26 +05:30
										 |  |  |     "sample-data": { | 
					
						
							|  |  |  |         VERSIONS["avro"], | 
					
						
							|  |  |  |         VERSIONS["grpc-tools"], | 
					
						
							|  |  |  |         VERSIONS["sqlalchemy-bigquery"], | 
					
						
							| 
									
										
										
										
											2025-05-20 17:40:44 +05:30
										 |  |  |         VERSIONS["presidio-analyzer"], | 
					
						
							| 
									
										
										
										
											2025-04-16 11:34:26 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-05-31 16:00:31 +02:00
										 |  |  |     "sap-hana": {"hdbcli", "sqlalchemy-hana"}, | 
					
						
							| 
									
										
										
										
											2024-01-11 09:46:57 -05:00
										 |  |  |     "sas": {}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "singlestore": {VERSIONS["pymysql"]}, | 
					
						
							|  |  |  |     "sklearn": {VERSIONS["scikit-learn"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "snowflake": {VERSIONS["snowflake"], DATA_DIFF["snowflake"]}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "superset": {},  # uses requests | 
					
						
							| 
									
										
										
										
											2024-01-08 11:03:05 +05:30
										 |  |  |     "tableau": {VERSIONS["tableau"], VERSIONS["validators"], VERSIONS["packaging"]}, | 
					
						
							| 
									
										
										
										
											2024-05-28 07:40:22 +03:00
										 |  |  |     "teradata": {VERSIONS["teradata"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "trino": {VERSIONS["trino"], DATA_DIFF["trino"]}, | 
					
						
							|  |  |  |     "vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5", DATA_DIFF["vertica"]}, | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     "pii-processor": { | 
					
						
							|  |  |  |         VERSIONS["spacy"], | 
					
						
							|  |  |  |         VERSIONS["pandas"], | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2025-05-20 17:40:44 +05:30
										 |  |  |         VERSIONS["presidio-analyzer"], | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-05-20 17:40:44 +05:30
										 |  |  |     "presidio-analyzer": {VERSIONS["presidio-analyzer"]}, | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | dev = { | 
					
						
							| 
									
										
										
										
											2022-05-18 10:55:39 +05:30
										 |  |  |     "black==22.3.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "datamodel-code-generator==0.25.6", | 
					
						
							|  |  |  |     "boto3-stubs", | 
					
						
							|  |  |  |     "mypy-boto3-glue", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "isort", | 
					
						
							|  |  |  |     "pre-commit", | 
					
						
							|  |  |  |     "pycln", | 
					
						
							| 
									
										
										
										
											2024-09-20 18:50:28 +02:00
										 |  |  |     "pylint~=3.2.0",  # 3.3.0+ breaks our current linting | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  |     # For publishing | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |     "twine", | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  |     "build", | 
					
						
							| 
									
										
										
										
											2024-04-25 09:45:26 +05:30
										 |  |  |     *plugins["sample-data"], | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  | # Dependencies for unit testing in addition to dev dependencies and plugins | 
					
						
							|  |  |  | test_unit = { | 
					
						
							| 
									
										
										
										
											2025-06-16 08:03:38 +02:00
										 |  |  |     "pytest==7.0.1", | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  |     "pytest-cov", | 
					
						
							|  |  |  |     "pytest-order", | 
					
						
							|  |  |  |     "dirty-equals", | 
					
						
							|  |  |  |     "faker==37.1.0",  # The version needs to be fixed to prevent flaky tests! | 
					
						
							| 
									
										
										
										
											2025-05-27 10:56:52 +02:00
										 |  |  |     # TODO: Remove once no unit test requires testcontainers | 
					
						
							|  |  |  |     "testcontainers", | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | test = { | 
					
						
							| 
									
										
										
										
											2023-02-01 10:20:26 +01:00
										 |  |  |     # Install Airflow as it's not part of `all` plugin | 
					
						
							| 
									
										
										
										
											2024-11-06 19:28:48 +05:30
										 |  |  |     "opentelemetry-exporter-otlp==1.27.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["airflow"], | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "boto3-stubs", | 
					
						
							|  |  |  |     "mypy-boto3-glue", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "coverage", | 
					
						
							| 
									
										
										
										
											2023-02-01 10:20:26 +01:00
										 |  |  |     # Install GE because it's not in the `all` plugin | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["great-expectations"], | 
					
						
							| 
									
										
										
										
											2024-07-18 11:52:56 +02:00
										 |  |  |     "basedpyright~=1.14", | 
					
						
							| 
									
										
										
										
											2025-06-16 08:03:38 +02:00
										 |  |  |     "pytest==7.0.1", | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |     "pytest-cov", | 
					
						
							| 
									
										
										
										
											2022-10-10 11:36:20 +02:00
										 |  |  |     "pytest-order", | 
					
						
							| 
									
										
										
										
											2024-11-11 10:07:23 +01:00
										 |  |  |     "dirty-equals", | 
					
						
							| 
									
										
										
										
											2023-03-01 08:20:38 +01:00
										 |  |  |     # install dbt dependency | 
					
						
							| 
									
										
										
										
											2025-02-04 11:57:39 +05:30
										 |  |  |     "collate-dbt-artifacts-parser", | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     "freezegun", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     VERSIONS["sqlalchemy-databricks"], | 
					
						
							|  |  |  |     VERSIONS["databricks-sdk"], | 
					
						
							|  |  |  |     VERSIONS["scikit-learn"], | 
					
						
							|  |  |  |     VERSIONS["pyarrow"], | 
					
						
							|  |  |  |     VERSIONS["trino"], | 
					
						
							|  |  |  |     VERSIONS["spacy"], | 
					
						
							|  |  |  |     VERSIONS["pydomo"], | 
					
						
							|  |  |  |     VERSIONS["looker-sdk"], | 
					
						
							|  |  |  |     VERSIONS["lkml"], | 
					
						
							|  |  |  |     VERSIONS["tableau"], | 
					
						
							|  |  |  |     VERSIONS["pyhive"], | 
					
						
							|  |  |  |     VERSIONS["mongo"], | 
					
						
							| 
									
										
										
										
											2024-12-12 15:12:55 +05:30
										 |  |  |     VERSIONS["cassandra"], | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     VERSIONS["redshift"], | 
					
						
							|  |  |  |     VERSIONS["snowflake"], | 
					
						
							|  |  |  |     VERSIONS["elasticsearch8"], | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |     VERSIONS["giturlparse"], | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     VERSIONS["avro"],  # Sample Data | 
					
						
							|  |  |  |     VERSIONS["grpc-tools"], | 
					
						
							| 
									
										
										
										
											2024-07-16 11:01:43 +02:00
										 |  |  |     VERSIONS["neo4j"], | 
					
						
							| 
									
										
										
										
											2025-01-02 13:07:55 +05:30
										 |  |  |     VERSIONS["cockroach"], | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |     VERSIONS["pydoris"], | 
					
						
							|  |  |  |     VERSIONS["pyiceberg"], | 
					
						
							| 
									
										
										
										
											2024-04-17 12:19:37 +02:00
										 |  |  |     "testcontainers==3.7.1;python_version<'3.9'", | 
					
						
							| 
									
										
										
										
											2024-09-24 09:18:36 +02:00
										 |  |  |     "testcontainers~=4.8.0;python_version>='3.9'", | 
					
						
							| 
									
										
										
										
											2024-04-22 15:50:44 +02:00
										 |  |  |     "minio==7.2.5", | 
					
						
							| 
									
										
										
										
											2024-05-16 10:03:27 +02:00
										 |  |  |     *plugins["mlflow"], | 
					
						
							|  |  |  |     *plugins["datalake-s3"], | 
					
						
							| 
									
										
										
										
											2024-06-14 14:08:59 +05:30
										 |  |  |     *plugins["kafka"], | 
					
						
							|  |  |  |     "kafka-python==2.0.2", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     *plugins["pii-processor"], | 
					
						
							| 
									
										
										
										
											2024-05-22 17:12:00 +02:00
										 |  |  |     "requests==2.31.0", | 
					
						
							| 
									
										
										
										
											2024-09-26 13:55:17 +02:00
										 |  |  |     f"{DATA_DIFF['mysql']}", | 
					
						
							| 
									
										
										
										
											2024-06-25 07:51:22 +02:00
										 |  |  |     *plugins["deltalake"], | 
					
						
							| 
									
										
										
										
											2024-07-16 11:01:43 +02:00
										 |  |  |     *plugins["datalake-gcs"], | 
					
						
							|  |  |  |     *plugins["pgspider"], | 
					
						
							|  |  |  |     *plugins["clickhouse"], | 
					
						
							|  |  |  |     *plugins["mssql"], | 
					
						
							|  |  |  |     *plugins["dagster"], | 
					
						
							|  |  |  |     *plugins["oracle"], | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     *plugins["mssql"], | 
					
						
							| 
									
										
										
										
											2025-02-12 07:01:41 -08:00
										 |  |  |     VERSIONS["validators"], | 
					
						
							|  |  |  |     VERSIONS["pyathena"], | 
					
						
							|  |  |  |     VERSIONS["pyiceberg"], | 
					
						
							|  |  |  |     VERSIONS["pydoris"], | 
					
						
							|  |  |  |     "python-liquid", | 
					
						
							|  |  |  |     VERSIONS["google-cloud-bigtable"], | 
					
						
							|  |  |  |     *plugins["bigquery"], | 
					
						
							| 
									
										
										
										
											2025-05-27 10:56:52 +02:00
										 |  |  |     "faker==37.1.0",  # The version needs to be fixed to prevent flaky tests! | 
					
						
							| 
									
										
										
										
											2025-08-06 20:19:38 +02:00
										 |  |  |     *plugins["exasol"], | 
					
						
							|  |  |  |     VERSIONS["opensearch"], | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-24 16:08:38 +02:00
										 |  |  | if sys.version_info >= (3, 9): | 
					
						
							|  |  |  |     test.add("locust~=2.32.0") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-19 14:44:36 +02:00
										 |  |  | e2e_test = { | 
					
						
							|  |  |  |     # playwright dependencies | 
					
						
							|  |  |  |     "pytest-playwright", | 
					
						
							|  |  |  |     "pytest-base-url", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-01 23:42:31 +05:30
										 |  |  | # Define playwright_dependencies as a set of packages required for Playwright tests | 
					
						
							|  |  |  | # These packages correspond to the ingestion connectors used in Playwright tests | 
					
						
							|  |  |  | playwright_dependencies = { | 
					
						
							|  |  |  |     *plugins["mysql"], | 
					
						
							|  |  |  |     *plugins["bigquery"], | 
					
						
							|  |  |  |     *plugins["kafka"], | 
					
						
							|  |  |  |     *plugins["mlflow"], | 
					
						
							|  |  |  |     *plugins["snowflake"], | 
					
						
							|  |  |  |     *plugins["superset"], | 
					
						
							|  |  |  |     *plugins["postgres"], | 
					
						
							|  |  |  |     *plugins["redshift"], | 
					
						
							|  |  |  |     *plugins["airflow"], | 
					
						
							|  |  |  |     *plugins["datalake-s3"], | 
					
						
							| 
									
										
										
										
											2025-04-03 19:11:33 +05:30
										 |  |  |     *plugins["dbt"], | 
					
						
							| 
									
										
										
										
											2025-05-20 17:40:44 +05:30
										 |  |  |     *plugins["presidio-analyzer"], | 
					
						
							| 
									
										
										
										
											2025-06-06 00:44:25 +03:00
										 |  |  |     *e2e_test, | 
					
						
							| 
									
										
										
										
											2025-04-01 23:42:31 +05:30
										 |  |  |     # Add other plugins as needed for Playwright tests | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def filter_requirements(filtered: Set[str]) -> List[str]: | 
					
						
							|  |  |  |     """Filter out requirements from base_requirements""" | 
					
						
							|  |  |  |     return list( | 
					
						
							|  |  |  |         base_requirements.union( | 
					
						
							|  |  |  |             *[ | 
					
						
							|  |  |  |                 requirements | 
					
						
							|  |  |  |                 for plugin, requirements in plugins.items() | 
					
						
							|  |  |  |                 if plugin not in filtered | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | setup( | 
					
						
							|  |  |  |     install_requires=list(base_requirements), | 
					
						
							|  |  |  |     extras_require={ | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |         "dev": list(dev), | 
					
						
							|  |  |  |         "test": list(test), | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  |         "test-unit": list(test_unit), | 
					
						
							| 
									
										
										
										
											2023-09-19 14:44:36 +02:00
										 |  |  |         "e2e_test": list(e2e_test), | 
					
						
							| 
									
										
										
										
											2022-11-15 05:44:25 +01:00
										 |  |  |         "data-insight": list(plugins["elasticsearch"]), | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()}, | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  |         # FIXME: all-dev-env is a temporary solution to install all dependencies except | 
					
						
							| 
									
										
										
										
											2025-05-27 10:56:52 +02:00
										 |  |  |         #   those that might conflict with each other or cause issues in the dev environment | 
					
						
							|  |  |  |         #   This covers all development cases where none of the plugins are used | 
					
						
							| 
									
										
										
										
											2025-05-26 10:38:17 +02:00
										 |  |  |         "all-dev-env": filter_requirements( | 
					
						
							|  |  |  |             {"airflow", "db2", "great-expectations", "pymssql"} | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         # enf-of-fixme | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  |         "all": filter_requirements({"airflow", "db2", "great-expectations"}), | 
					
						
							| 
									
										
										
										
											2025-04-01 23:42:31 +05:30
										 |  |  |         "playwright": list(playwright_dependencies), | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  |         "slim": filter_requirements( | 
					
						
							| 
									
										
										
										
											2024-06-20 08:38:21 +02:00
										 |  |  |             { | 
					
						
							|  |  |  |                 "airflow", | 
					
						
							|  |  |  |                 "db2", | 
					
						
							|  |  |  |                 "great-expectations", | 
					
						
							|  |  |  |                 "deltalake", | 
					
						
							|  |  |  |                 "deltalake-spark", | 
					
						
							|  |  |  |                 "sklearn", | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         ), | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | ) |