| 
									
										
										
										
											2021-12-01 12:46:28 +05:30
										 |  |  | #  Copyright 2021 Collate | 
					
						
							|  |  |  | #  Licensed under the Apache License, Version 2.0 (the "License"); | 
					
						
							|  |  |  | #  you may not use this file except in compliance with the License. | 
					
						
							|  |  |  | #  You may obtain a copy of the License at | 
					
						
							| 
									
										
										
										
											2021-08-02 15:08:30 +05:30
										 |  |  | #  http://www.apache.org/licenses/LICENSE-2.0 | 
					
						
							|  |  |  | #  Unless required by applicable law or agreed to in writing, software | 
					
						
							|  |  |  | #  distributed under the License is distributed on an "AS IS" BASIS, | 
					
						
							|  |  |  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
					
						
							|  |  |  | #  See the License for the specific language governing permissions and | 
					
						
							|  |  |  | #  limitations under the License. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-13 16:07:29 +05:30
										 |  |  | """
 | 
					
						
							|  |  |  | Python Dependencies | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  | from typing import Dict, List, Set | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  | from setuptools import setup | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | # Add here versions required for multiple plugins | 
					
						
							|  |  |  | VERSIONS = { | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "airflow": "apache-airflow==2.9.1", | 
					
						
							|  |  |  |     "adlfs": "adlfs>=2023.1.0", | 
					
						
							| 
									
										
										
										
											2024-04-18 17:41:09 +02:00
										 |  |  |     "avro": "avro>=1.11.3,<1.12", | 
					
						
							| 
									
										
										
										
											2023-01-18 08:20:40 +01:00
										 |  |  |     "boto3": "boto3>=1.20,<2.0",  # No need to add botocore separately. It's a dep from boto3 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "geoalchemy2": "GeoAlchemy2~=0.12", | 
					
						
							| 
									
										
										
										
											2024-07-10 08:03:28 -04:00
										 |  |  |     "google-cloud-monitoring": "google-cloud-monitoring>=2.0.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "google-cloud-storage": "google-cloud-storage==1.43.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "gcsfs": "gcsfs>=2023.1.0", | 
					
						
							| 
									
										
										
										
											2024-05-22 21:25:42 +02:00
										 |  |  |     "great-expectations": "great-expectations>=0.18.0,<0.18.14", | 
					
						
							| 
									
										
										
										
											2023-02-20 13:37:27 +01:00
										 |  |  |     "grpc-tools": "grpcio-tools>=1.47.2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "msal": "msal~=1.2", | 
					
						
							| 
									
										
										
										
											2023-01-11 18:28:25 +05:30
										 |  |  |     "neo4j": "neo4j~=5.3.0", | 
					
						
							| 
									
										
										
										
											2024-02-24 18:42:22 +01:00
										 |  |  |     "pandas": "pandas~=2.0.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pyarrow": "pyarrow~=16.0", | 
					
						
							|  |  |  |     "pydantic": "pydantic~=2.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "pydomo": "pydomo~=0.3", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pymysql": "pymysql~=1.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "pyodbc": "pyodbc>=4.0.35,<5", | 
					
						
							| 
									
										
										
										
											2024-06-18 17:03:35 +05:30
										 |  |  |     "numpy": "numpy<2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "scikit-learn": "scikit-learn~=1.0",  # Python 3.7 only goes up to 1.0.2 | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "packaging": "packaging", | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |     "azure-storage-blob": "azure-storage-blob~=12.14", | 
					
						
							|  |  |  |     "azure-identity": "azure-identity~=1.12", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "sqlalchemy-databricks": "sqlalchemy-databricks~=0.1", | 
					
						
							| 
									
										
										
										
											2024-02-06 10:42:53 +05:30
										 |  |  |     "databricks-sdk": "databricks-sdk>=0.18.0,<0.20.0", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "trino": "trino[sqlalchemy]", | 
					
						
							| 
									
										
										
										
											2024-09-12 11:42:53 +02:00
										 |  |  |     "spacy": "spacy<3.8", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "looker-sdk": "looker-sdk>=22.20.0", | 
					
						
							|  |  |  |     "lkml": "lkml~=1.3", | 
					
						
							|  |  |  |     "tableau": "tableau-api-lib~=0.1", | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |     "pyhive": "pyhive[hive_pure_sasl]~=0.7", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     "mongo": "pymongo~=4.3", | 
					
						
							|  |  |  |     "redshift": "sqlalchemy-redshift==0.8.12", | 
					
						
							|  |  |  |     "snowflake": "snowflake-sqlalchemy~=1.4", | 
					
						
							|  |  |  |     "elasticsearch8": "elasticsearch8~=8.9.0", | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |     "giturlparse": "giturlparse", | 
					
						
							| 
									
										
										
										
											2024-01-08 11:03:05 +05:30
										 |  |  |     "validators": "validators~=0.22.0", | 
					
						
							| 
									
										
										
										
											2024-05-28 07:40:22 +03:00
										 |  |  |     "teradata": "teradatasqlalchemy>=20.0.0.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | COMMONS = { | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |     "datalake": { | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |         VERSIONS["avro"], | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["pandas"], | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         # python-snappy does not work well on 3.11 https://github.com/aio-libs/aiokafka/discussions/931 | 
					
						
							|  |  |  |         # Using this as an alternative | 
					
						
							|  |  |  |         "cramjam~=2.7", | 
					
						
							| 
									
										
										
										
											2023-04-05 12:12:47 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "hive": { | 
					
						
							|  |  |  |         "presto-types-parser>=0.0.2", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["pyhive"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     }, | 
					
						
							|  |  |  |     "kafka": { | 
					
						
							| 
									
										
										
										
											2023-03-15 15:15:57 +01:00
										 |  |  |         VERSIONS["avro"], | 
					
						
							| 
									
										
										
										
											2023-05-23 22:29:12 +05:30
										 |  |  |         "confluent_kafka==2.1.1", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "fastavro>=1.2.0", | 
					
						
							|  |  |  |         # Due to https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925 | 
					
						
							| 
									
										
										
										
											2023-02-20 13:37:27 +01:00
										 |  |  |         # use >= v1.47.2 https://github.com/grpc/grpc/blob/v1.47.2/tools/distrib/python/grpcio_tools/grpc_version.py#L17 | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS[ | 
					
						
							|  |  |  |             "grpc-tools" | 
					
						
							|  |  |  |         ],  # grpcio-tools already depends on grpcio. No need to add separately | 
					
						
							|  |  |  |         "protobuf", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "postgres": { | 
					
						
							|  |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         "psycopg2-binary", | 
					
						
							|  |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							|  |  |  |         VERSIONS["packaging"], | 
					
						
							|  |  |  |     },  # Adding as Postgres SQL & GreenPlum are using common packages. | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  | DATA_DIFF = { | 
					
						
							|  |  |  |     driver: f"collate-data-diff[{driver}]" | 
					
						
							|  |  |  |     # data-diff uses different drivers out-of-the-box than OpenMetadata | 
					
						
							|  |  |  |     # the exrtas are described here: | 
					
						
							|  |  |  |     # https://github.com/open-metadata/collate-data-diff/blob/main/pyproject.toml#L68 | 
					
						
							|  |  |  |     # install all data diffs with "pip install collate-data-diff[all-dbs]" | 
					
						
							|  |  |  |     for driver in [ | 
					
						
							|  |  |  |         "clickhouse", | 
					
						
							|  |  |  |         # "duckdb", # Not supported by OpenMetadata | 
					
						
							|  |  |  |         "mssql", | 
					
						
							|  |  |  |         "mysql", | 
					
						
							|  |  |  |         "oracle", | 
					
						
							|  |  |  |         # "postgresql", we dont use this as it installs psycopg2 which interferes with psycopg2-binary | 
					
						
							|  |  |  |         "presto", | 
					
						
							|  |  |  |         "redshift", | 
					
						
							|  |  |  |         "snowflake", | 
					
						
							|  |  |  |         "trino", | 
					
						
							|  |  |  |         "vertica", | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | base_requirements = { | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "antlr4-python3-runtime==4.9.2", | 
					
						
							| 
									
										
										
										
											2024-02-20 07:18:35 +01:00
										 |  |  |     VERSIONS["azure-identity"], | 
					
						
							|  |  |  |     "azure-keyvault-secrets",  # Azure Key Vault SM | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["boto3"],  # Required in base for the secrets manager | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     "cached-property==1.5.2",  # LineageParser | 
					
						
							|  |  |  |     "chardet==4.0.0",  # Used in the profiler | 
					
						
							| 
									
										
										
										
											2024-04-18 17:41:09 +02:00
										 |  |  |     "cryptography>=42.0.0", | 
					
						
							| 
									
										
										
										
											2024-06-29 13:09:02 +09:00
										 |  |  |     "google-cloud-secret-manager==2.19.0", | 
					
						
							|  |  |  |     "google-crc32c", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "email-validator>=2.0",  # For the pydantic generated models for Email | 
					
						
							| 
									
										
										
										
											2023-09-15 12:03:47 +05:30
										 |  |  |     "importlib-metadata>=4.13.0",  # From airflow constraints | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "Jinja2>=2.11.3", | 
					
						
							| 
									
										
										
										
											2024-01-17 00:17:11 -06:00
										 |  |  |     "jsonpatch<2.0, >=1.24", | 
					
						
							| 
									
										
										
										
											2023-06-19 12:09:09 +02:00
										 |  |  |     "memory-profiler", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "mypy_extensions>=0.4.3", | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |     VERSIONS["pydantic"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["pymysql"], | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  |     "python-dateutil>=2.8.1", | 
					
						
							| 
									
										
										
										
											2023-09-27 11:49:21 +02:00
										 |  |  |     "PyYAML~=6.0", | 
					
						
							| 
									
										
										
										
											2024-05-22 17:12:00 +02:00
										 |  |  |     "requests>=2.23", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "requests-aws4auth~=1.1",  # Only depends on requests as external package. Leaving as base. | 
					
						
							| 
									
										
										
										
											2023-01-27 15:26:30 +01:00
										 |  |  |     "sqlalchemy>=1.4.0,<2", | 
					
						
							| 
									
										
										
										
											2024-05-17 12:09:37 +05:30
										 |  |  |     "collate-sqllineage~=1.4.0", | 
					
						
							| 
									
										
										
										
											2023-03-24 17:59:06 +01:00
										 |  |  |     "tabulate==0.9.0", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "typing-inspect", | 
					
						
							| 
									
										
										
										
											2024-05-17 07:56:07 +02:00
										 |  |  |     "packaging",  # For version parsing | 
					
						
							| 
									
										
										
										
											2021-09-19 13:59:14 +05:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | plugins: Dict[str, Set[str]] = { | 
					
						
							| 
									
										
										
										
											2023-12-01 06:29:44 +01:00
										 |  |  |     "airflow": { | 
					
						
							|  |  |  |         VERSIONS["airflow"], | 
					
						
							|  |  |  |         "attrs", | 
					
						
							|  |  |  |     },  # Same as ingestion container. For development. | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "amundsen": {VERSIONS["neo4j"]}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "athena": {"pyathena~=3.0"}, | 
					
						
							| 
									
										
										
										
											2022-03-01 11:50:14 +05:30
										 |  |  |     "atlas": {}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "azuresql": {VERSIONS["pyodbc"]}, | 
					
						
							|  |  |  |     "azure-sso": {VERSIONS["msal"]}, | 
					
						
							| 
									
										
										
										
											2024-02-20 07:18:35 +01:00
										 |  |  |     "backup": {VERSIONS["boto3"], VERSIONS["azure-identity"], "azure-storage-blob"}, | 
					
						
							| 
									
										
										
										
											2022-02-07 00:06:10 +05:30
										 |  |  |     "bigquery": { | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "cachetools", | 
					
						
							| 
									
										
										
										
											2023-07-31 18:14:25 +05:30
										 |  |  |         "google-cloud-datacatalog>=3.6.2", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "google-cloud-logging", | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							| 
									
										
										
										
											2024-06-18 17:03:35 +05:30
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "sqlalchemy-bigquery>=1.2.2", | 
					
						
							| 
									
										
										
										
											2022-02-07 00:06:10 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |     "bigtable": {"google-cloud-bigtable>=2.0.0", VERSIONS["pandas"], VERSIONS["numpy"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "clickhouse": { | 
					
						
							|  |  |  |         "clickhouse-driver~=0.2", | 
					
						
							|  |  |  |         "clickhouse-sqlalchemy~=0.2", | 
					
						
							|  |  |  |         DATA_DIFF["clickhouse"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "dagster": { | 
					
						
							| 
									
										
										
										
											2024-07-27 18:08:42 +05:30
										 |  |  |         "croniter<3", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         "psycopg2-binary", | 
					
						
							|  |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							|  |  |  |         "dagster_graphql~=1.1", | 
					
						
							| 
									
										
										
										
											2022-11-17 10:11:54 +01:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-30 01:17:39 +05:30
										 |  |  |     "dbt": { | 
					
						
							|  |  |  |         "google-cloud", | 
					
						
							|  |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							|  |  |  |         "dbt-artifacts-parser", | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							| 
									
										
										
										
											2023-01-30 01:17:39 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-05-11 18:33:26 +05:30
										 |  |  |     "db2": {"ibm-db-sa~=0.3"}, | 
					
						
							| 
									
										
										
										
											2024-01-11 12:35:52 +05:30
										 |  |  |     "db2-ibmi": {"sqlalchemy-ibmi~=0.9.3"}, | 
					
						
							| 
									
										
										
										
											2024-04-15 15:37:07 +05:30
										 |  |  |     "databricks": { | 
					
						
							|  |  |  |         VERSIONS["sqlalchemy-databricks"], | 
					
						
							|  |  |  |         VERSIONS["databricks-sdk"], | 
					
						
							|  |  |  |         "ndg-httpsclient~=0.5.1", | 
					
						
							|  |  |  |         "pyOpenSSL~=24.1.0", | 
					
						
							|  |  |  |         "pyasn1~=0.6.0", | 
					
						
							| 
									
										
										
										
											2024-07-09 12:59:23 +05:30
										 |  |  |         # databricks has a dependency on pyhive for metadata as well as profiler | 
					
						
							|  |  |  |         VERSIONS["pyhive"], | 
					
						
							| 
									
										
										
										
											2024-04-15 15:37:07 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-azure": { | 
					
						
							| 
									
										
										
										
											2023-06-22 10:58:38 +05:30
										 |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         VERSIONS["adlfs"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2022-06-15 12:27:21 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-gcs": { | 
					
						
							| 
									
										
										
										
											2024-07-10 08:03:28 -04:00
										 |  |  |         VERSIONS["google-cloud-monitoring"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |         VERSIONS["gcsfs"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "datalake-s3": { | 
					
						
							| 
									
										
										
										
											2024-09-12 07:13:01 +02:00
										 |  |  |         # vendoring 'boto3' to keep all dependencies aligned (s3fs, boto3, botocore, aiobotocore) | 
					
						
							|  |  |  |         "s3fs[boto3]", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         *COMMONS["datalake"], | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-06-20 08:38:21 +02:00
										 |  |  |     "deltalake": {"delta-spark<=2.3.0", "deltalake~=0.17"}, | 
					
						
							|  |  |  |     "deltalake-storage": {"deltalake~=0.17"}, | 
					
						
							|  |  |  |     "deltalake-spark": {"delta-spark<=2.3.0"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "domo": {VERSIONS["pydomo"]}, | 
					
						
							| 
									
										
										
										
											2023-11-28 16:27:52 +08:00
										 |  |  |     "doris": {"pydoris==1.0.2"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "druid": {"pydruid>=0.6.5"}, | 
					
						
							|  |  |  |     "dynamodb": {VERSIONS["boto3"]}, | 
					
						
							|  |  |  |     "elasticsearch": { | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["elasticsearch8"], | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     },  # also requires requests-aws4auth which is in base | 
					
						
							|  |  |  |     "glue": {VERSIONS["boto3"]}, | 
					
						
							|  |  |  |     "great-expectations": {VERSIONS["great-expectations"]}, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "greenplum": {*COMMONS["postgres"]}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "hive": { | 
					
						
							|  |  |  |         *COMMONS["hive"], | 
					
						
							|  |  |  |         "thrift>=0.13,<1", | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         # Replacing sasl with pure-sasl based on https://github.com/cloudera/python-sasl/issues/30 for py 3.11 | 
					
						
							|  |  |  |         "pure-sasl", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         "thrift-sasl~=0.4", | 
					
						
							| 
									
										
										
										
											2023-02-22 16:54:56 +05:30
										 |  |  |         "impyla~=0.18.0", | 
					
						
							| 
									
										
										
										
											2022-11-11 16:35:09 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |     "iceberg": { | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |         "pyiceberg>=0.5", | 
					
						
							| 
									
										
										
										
											2024-01-29 06:32:58 +01:00
										 |  |  |         # Forcing the version of a few packages so it plays nicely with other requirements. | 
					
						
							|  |  |  |         VERSIONS["pydantic"], | 
					
						
							|  |  |  |         VERSIONS["adlfs"], | 
					
						
							|  |  |  |         VERSIONS["gcsfs"], | 
					
						
							|  |  |  |         VERSIONS["pyarrow"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-04-21 02:57:13 -05:00
										 |  |  |     "impala": { | 
					
						
							|  |  |  |         "presto-types-parser>=0.0.2", | 
					
						
							|  |  |  |         "impyla[kerberos]~=0.18.0", | 
					
						
							|  |  |  |         "thrift>=0.13,<1", | 
					
						
							| 
									
										
										
										
											2023-12-14 15:46:58 +01:00
										 |  |  |         "pure-sasl", | 
					
						
							| 
									
										
										
										
											2023-04-21 02:57:13 -05:00
										 |  |  |         "thrift-sasl~=0.4", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "kafka": {*COMMONS["kafka"]}, | 
					
						
							| 
									
										
										
										
											2024-05-10 14:29:45 +05:30
										 |  |  |     "kafkaconnect": {"kafka-connect-py==0.10.11"}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "kinesis": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2023-10-04 20:16:21 +07:00
										 |  |  |     "looker": { | 
					
						
							|  |  |  |         VERSIONS["looker-sdk"], | 
					
						
							|  |  |  |         VERSIONS["lkml"], | 
					
						
							|  |  |  |         "gitpython~=3.1.34", | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |         VERSIONS["giturlparse"], | 
					
						
							| 
									
										
										
										
											2023-10-04 20:16:21 +07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "mlflow": {"mlflow-skinny>=2.3.0"}, | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |     "mongo": {VERSIONS["mongo"], VERSIONS["pandas"], VERSIONS["numpy"]}, | 
					
						
							| 
									
										
										
										
											2023-08-29 11:16:32 +05:30
										 |  |  |     "couchbase": {"couchbase~=4.1"}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "mssql": { | 
					
						
							|  |  |  |         "sqlalchemy-pytds~=0.3", | 
					
						
							|  |  |  |         DATA_DIFF["mssql"], | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "mssql-odbc": { | 
					
						
							|  |  |  |         VERSIONS["pyodbc"], | 
					
						
							|  |  |  |         DATA_DIFF["mssql"], | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "mysql": { | 
					
						
							|  |  |  |         VERSIONS["pymysql"], | 
					
						
							|  |  |  |         DATA_DIFF["mysql"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "nifi": {},  # uses requests | 
					
						
							| 
									
										
										
										
											2024-03-12 08:39:25 +01:00
										 |  |  |     "openlineage": {*COMMONS["kafka"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "oracle": {"cx_Oracle>=8.3.0,<9", "oracledb~=1.2", DATA_DIFF["oracle"]}, | 
					
						
							| 
									
										
										
										
											2023-07-05 16:18:59 +09:00
										 |  |  |     "pgspider": {"psycopg2-binary", "sqlalchemy-pgspider"}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pinotdb": {"pinotdb~=5.0"}, | 
					
						
							| 
									
										
										
										
											2024-05-09 08:57:25 +05:30
										 |  |  |     "postgres": {*COMMONS["postgres"]}, | 
					
						
							| 
									
										
										
										
											2024-04-29 14:55:06 +05:30
										 |  |  |     "powerbi": { | 
					
						
							|  |  |  |         VERSIONS["msal"], | 
					
						
							|  |  |  |         VERSIONS["boto3"], | 
					
						
							|  |  |  |         VERSIONS["google-cloud-storage"], | 
					
						
							|  |  |  |         VERSIONS["azure-storage-blob"], | 
					
						
							|  |  |  |         VERSIONS["azure-identity"], | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-08-11 12:28:05 +05:30
										 |  |  |     "qliksense": {"websocket-client~=1.6.1"}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "presto": {*COMMONS["hive"], DATA_DIFF["presto"]}, | 
					
						
							| 
									
										
										
										
											2023-10-10 16:21:52 +05:30
										 |  |  |     "pymssql": {"pymssql~=2.2.0"}, | 
					
						
							| 
									
										
										
										
											2023-01-31 20:47:40 +05:30
										 |  |  |     "quicksight": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2023-05-15 11:48:03 +05:30
										 |  |  |     "redash": {VERSIONS["packaging"]}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "redpanda": {*COMMONS["kafka"]}, | 
					
						
							|  |  |  |     "redshift": { | 
					
						
							| 
									
										
										
										
											2023-06-19 12:09:09 +02:00
										 |  |  |         # Going higher has memory and performance issues | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |         VERSIONS["redshift"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         "psycopg2-binary", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |         VERSIONS["geoalchemy2"], | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "sagemaker": {VERSIONS["boto3"]}, | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "salesforce": {"simple_salesforce~=1.11"}, | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     "sample-data": {VERSIONS["avro"], VERSIONS["grpc-tools"]}, | 
					
						
							| 
									
										
										
										
											2023-05-31 16:00:31 +02:00
										 |  |  |     "sap-hana": {"hdbcli", "sqlalchemy-hana"}, | 
					
						
							| 
									
										
										
										
											2024-01-11 09:46:57 -05:00
										 |  |  |     "sas": {}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "singlestore": {VERSIONS["pymysql"]}, | 
					
						
							|  |  |  |     "sklearn": {VERSIONS["scikit-learn"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "snowflake": {VERSIONS["snowflake"], DATA_DIFF["snowflake"]}, | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "superset": {},  # uses requests | 
					
						
							| 
									
										
										
										
											2024-01-08 11:03:05 +05:30
										 |  |  |     "tableau": {VERSIONS["tableau"], VERSIONS["validators"], VERSIONS["packaging"]}, | 
					
						
							| 
									
										
										
										
											2024-05-28 07:40:22 +03:00
										 |  |  |     "teradata": {VERSIONS["teradata"]}, | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     "trino": {VERSIONS["trino"], DATA_DIFF["trino"]}, | 
					
						
							|  |  |  |     "vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5", DATA_DIFF["vertica"]}, | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     "pii-processor": { | 
					
						
							|  |  |  |         VERSIONS["spacy"], | 
					
						
							|  |  |  |         VERSIONS["pandas"], | 
					
						
							| 
									
										
										
										
											2024-08-20 12:19:05 +02:00
										 |  |  |         VERSIONS["numpy"], | 
					
						
							| 
									
										
										
										
											2024-09-06 16:07:08 +02:00
										 |  |  |         "presidio-analyzer==2.2.355", | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | dev = { | 
					
						
							| 
									
										
										
										
											2022-05-18 10:55:39 +05:30
										 |  |  |     "black==22.3.0", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "datamodel-code-generator==0.25.6", | 
					
						
							|  |  |  |     "boto3-stubs", | 
					
						
							|  |  |  |     "mypy-boto3-glue", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "isort", | 
					
						
							|  |  |  |     "pre-commit", | 
					
						
							|  |  |  |     "pycln", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "pylint~=3.0", | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  |     # For publishing | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |     "twine", | 
					
						
							| 
									
										
										
										
											2023-11-22 07:10:37 +01:00
										 |  |  |     "build", | 
					
						
							| 
									
										
										
										
											2024-04-25 09:45:26 +05:30
										 |  |  |     *plugins["sample-data"], | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | test = { | 
					
						
							| 
									
										
										
										
											2023-02-01 10:20:26 +01:00
										 |  |  |     # Install Airflow as it's not part of `all` plugin | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["airflow"], | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     "boto3-stubs", | 
					
						
							|  |  |  |     "mypy-boto3-glue", | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     "coverage", | 
					
						
							| 
									
										
										
										
											2023-02-01 10:20:26 +01:00
										 |  |  |     # Install GE because it's not in the `all` plugin | 
					
						
							| 
									
										
										
										
											2023-01-11 07:05:12 +01:00
										 |  |  |     VERSIONS["great-expectations"], | 
					
						
							| 
									
										
										
										
											2024-07-18 11:52:56 +02:00
										 |  |  |     "basedpyright~=1.14", | 
					
						
							| 
									
										
										
										
											2022-02-06 23:41:56 +01:00
										 |  |  |     "pytest==7.0.0", | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |     "pytest-cov", | 
					
						
							| 
									
										
										
										
											2022-10-10 11:36:20 +02:00
										 |  |  |     "pytest-order", | 
					
						
							| 
									
										
										
										
											2023-03-01 08:20:38 +01:00
										 |  |  |     # install dbt dependency | 
					
						
							|  |  |  |     "dbt-artifacts-parser", | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     "freezegun", | 
					
						
							| 
									
										
										
										
											2023-10-02 12:05:30 +02:00
										 |  |  |     VERSIONS["sqlalchemy-databricks"], | 
					
						
							|  |  |  |     VERSIONS["databricks-sdk"], | 
					
						
							|  |  |  |     VERSIONS["scikit-learn"], | 
					
						
							|  |  |  |     VERSIONS["pyarrow"], | 
					
						
							|  |  |  |     VERSIONS["trino"], | 
					
						
							|  |  |  |     VERSIONS["spacy"], | 
					
						
							|  |  |  |     VERSIONS["pydomo"], | 
					
						
							|  |  |  |     VERSIONS["looker-sdk"], | 
					
						
							|  |  |  |     VERSIONS["lkml"], | 
					
						
							|  |  |  |     VERSIONS["tableau"], | 
					
						
							|  |  |  |     VERSIONS["pyhive"], | 
					
						
							|  |  |  |     VERSIONS["mongo"], | 
					
						
							|  |  |  |     VERSIONS["redshift"], | 
					
						
							|  |  |  |     VERSIONS["snowflake"], | 
					
						
							|  |  |  |     VERSIONS["elasticsearch8"], | 
					
						
							| 
									
										
										
										
											2023-10-05 10:02:57 +02:00
										 |  |  |     VERSIONS["giturlparse"], | 
					
						
							| 
									
										
										
										
											2024-04-11 14:30:40 +02:00
										 |  |  |     VERSIONS["avro"],  # Sample Data | 
					
						
							|  |  |  |     VERSIONS["grpc-tools"], | 
					
						
							| 
									
										
										
										
											2024-07-16 11:01:43 +02:00
										 |  |  |     VERSIONS["neo4j"], | 
					
						
							| 
									
										
										
										
											2024-04-17 12:19:37 +02:00
										 |  |  |     "testcontainers==3.7.1;python_version<'3.9'", | 
					
						
							| 
									
										
										
										
											2024-08-21 12:35:09 +02:00
										 |  |  |     "testcontainers==4.8.0;python_version>='3.9'", | 
					
						
							| 
									
										
										
										
											2024-04-22 15:50:44 +02:00
										 |  |  |     "minio==7.2.5", | 
					
						
							| 
									
										
										
										
											2024-05-16 10:03:27 +02:00
										 |  |  |     *plugins["mlflow"], | 
					
						
							|  |  |  |     *plugins["datalake-s3"], | 
					
						
							| 
									
										
										
										
											2024-06-14 14:08:59 +05:30
										 |  |  |     *plugins["kafka"], | 
					
						
							|  |  |  |     "kafka-python==2.0.2", | 
					
						
							| 
									
										
										
										
											2024-06-05 21:18:37 +02:00
										 |  |  |     *plugins["pii-processor"], | 
					
						
							| 
									
										
										
										
											2024-05-22 17:12:00 +02:00
										 |  |  |     "requests==2.31.0", | 
					
						
							| 
									
										
										
										
											2024-06-20 16:54:12 +02:00
										 |  |  |     f"{DATA_DIFF['mysql']}==0.11.2", | 
					
						
							| 
									
										
										
										
											2024-06-25 07:51:22 +02:00
										 |  |  |     *plugins["deltalake"], | 
					
						
							| 
									
										
										
										
											2024-07-16 11:01:43 +02:00
										 |  |  |     *plugins["datalake-gcs"], | 
					
						
							|  |  |  |     *plugins["pgspider"], | 
					
						
							|  |  |  |     *plugins["clickhouse"], | 
					
						
							|  |  |  |     *plugins["mssql"], | 
					
						
							|  |  |  |     *plugins["dagster"], | 
					
						
							|  |  |  |     *plugins["oracle"], | 
					
						
							| 
									
										
										
										
											2024-07-17 08:11:34 +02:00
										 |  |  |     *plugins["mssql"], | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-19 14:44:36 +02:00
										 |  |  | e2e_test = { | 
					
						
							|  |  |  |     # playwright dependencies | 
					
						
							|  |  |  |     "pytest-playwright", | 
					
						
							|  |  |  |     "pytest-base-url", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-11 14:23:33 +05:30
										 |  |  | extended_testing = { | 
					
						
							|  |  |  |     "Faker",  # For Sample Data Generation | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def filter_requirements(filtered: Set[str]) -> List[str]: | 
					
						
							|  |  |  |     """Filter out requirements from base_requirements""" | 
					
						
							|  |  |  |     return list( | 
					
						
							|  |  |  |         base_requirements.union( | 
					
						
							|  |  |  |             *[ | 
					
						
							|  |  |  |                 requirements | 
					
						
							|  |  |  |                 for plugin, requirements in plugins.items() | 
					
						
							|  |  |  |                 if plugin not in filtered | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | setup( | 
					
						
							|  |  |  |     install_requires=list(base_requirements), | 
					
						
							|  |  |  |     extras_require={ | 
					
						
							|  |  |  |         "base": list(base_requirements), | 
					
						
							| 
									
										
										
										
											2022-01-07 10:28:38 +01:00
										 |  |  |         "dev": list(dev), | 
					
						
							|  |  |  |         "test": list(test), | 
					
						
							| 
									
										
										
										
											2023-09-19 14:44:36 +02:00
										 |  |  |         "e2e_test": list(e2e_test), | 
					
						
							| 
									
										
										
										
											2024-01-11 14:23:33 +05:30
										 |  |  |         "extended_testing": list(extended_testing), | 
					
						
							| 
									
										
										
										
											2022-11-15 05:44:25 +01:00
										 |  |  |         "data-insight": list(plugins["elasticsearch"]), | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()}, | 
					
						
							| 
									
										
										
										
											2023-12-19 11:09:38 +01:00
										 |  |  |         "all": filter_requirements({"airflow", "db2", "great-expectations"}), | 
					
						
							|  |  |  |         "slim": filter_requirements( | 
					
						
							| 
									
										
										
										
											2024-06-20 08:38:21 +02:00
										 |  |  |             { | 
					
						
							|  |  |  |                 "airflow", | 
					
						
							|  |  |  |                 "db2", | 
					
						
							|  |  |  |                 "great-expectations", | 
					
						
							|  |  |  |                 "deltalake", | 
					
						
							|  |  |  |                 "deltalake-spark", | 
					
						
							|  |  |  |                 "sklearn", | 
					
						
							|  |  |  |             } | 
					
						
							| 
									
										
										
										
											2021-10-26 21:44:24 +05:30
										 |  |  |         ), | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-08-01 14:27:44 -07:00
										 |  |  | ) |