2021-12-01 12:46:28 +05:30
|
|
|
# Copyright 2021 Collate
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
2021-08-02 15:08:30 +05:30
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2023-01-13 16:07:29 +05:30
|
|
|
"""
|
|
|
|
Python Dependencies
|
|
|
|
"""
|
|
|
|
|
2023-12-19 11:09:38 +01:00
|
|
|
from typing import Dict, List, Set
|
2021-08-01 14:27:44 -07:00
|
|
|
|
2023-11-22 07:10:37 +01:00
|
|
|
from setuptools import setup
|
2021-08-01 14:27:44 -07:00
|
|
|
|
2023-01-11 07:05:12 +01:00
|
|
|
# Add here versions required for multiple plugins
|
|
|
|
VERSIONS = {
|
2024-06-05 21:18:37 +02:00
|
|
|
"airflow": "apache-airflow==2.9.1",
|
|
|
|
"adlfs": "adlfs>=2023.1.0",
|
2024-04-18 17:41:09 +02:00
|
|
|
"avro": "avro>=1.11.3,<1.12",
|
2023-01-18 08:20:40 +01:00
|
|
|
"boto3": "boto3>=1.20,<2.0", # No need to add botocore separately. It's a dep from boto3
|
2023-01-11 07:05:12 +01:00
|
|
|
"geoalchemy2": "GeoAlchemy2~=0.12",
|
2024-07-10 08:03:28 -04:00
|
|
|
"google-cloud-monitoring": "google-cloud-monitoring>=2.0.0",
|
2025-01-17 06:34:34 +01:00
|
|
|
"google-cloud-storage": "google-cloud-storage>=1.43.0",
|
2024-06-05 21:18:37 +02:00
|
|
|
"gcsfs": "gcsfs>=2023.1.0",
|
2024-05-22 21:25:42 +02:00
|
|
|
"great-expectations": "great-expectations>=0.18.0,<0.18.14",
|
2023-02-20 13:37:27 +01:00
|
|
|
"grpc-tools": "grpcio-tools>=1.47.2",
|
2023-01-11 07:05:12 +01:00
|
|
|
"msal": "msal~=1.2",
|
2023-01-11 18:28:25 +05:30
|
|
|
"neo4j": "neo4j~=5.3.0",
|
2024-02-24 18:42:22 +01:00
|
|
|
"pandas": "pandas~=2.0.0",
|
2024-06-05 21:18:37 +02:00
|
|
|
"pyarrow": "pyarrow~=16.0",
|
2024-11-13 10:14:06 +01:00
|
|
|
"pydantic": "pydantic~=2.0,>=2.7.0",
|
2023-01-11 07:05:12 +01:00
|
|
|
"pydomo": "pydomo~=0.3",
|
2024-06-05 21:18:37 +02:00
|
|
|
"pymysql": "pymysql~=1.0",
|
2023-01-11 07:05:12 +01:00
|
|
|
"pyodbc": "pyodbc>=4.0.35,<5",
|
2024-06-18 17:03:35 +05:30
|
|
|
"numpy": "numpy<2",
|
2023-01-11 07:05:12 +01:00
|
|
|
"scikit-learn": "scikit-learn~=1.0", # Python 3.7 only goes up to 1.0.2
|
2024-06-05 21:18:37 +02:00
|
|
|
"packaging": "packaging",
|
2023-06-22 10:58:38 +05:30
|
|
|
"azure-storage-blob": "azure-storage-blob~=12.14",
|
|
|
|
"azure-identity": "azure-identity~=1.12",
|
2023-10-02 12:05:30 +02:00
|
|
|
"sqlalchemy-databricks": "sqlalchemy-databricks~=0.1",
|
2024-02-06 10:42:53 +05:30
|
|
|
"databricks-sdk": "databricks-sdk>=0.18.0,<0.20.0",
|
2023-10-02 12:05:30 +02:00
|
|
|
"trino": "trino[sqlalchemy]",
|
2024-09-12 11:42:53 +02:00
|
|
|
"spacy": "spacy<3.8",
|
2024-10-11 20:47:43 +02:00
|
|
|
"looker-sdk": "looker-sdk>=22.20.0,!=24.18.0",
|
2023-10-02 12:05:30 +02:00
|
|
|
"lkml": "lkml~=1.3",
|
|
|
|
"tableau": "tableau-api-lib~=0.1",
|
2023-12-14 15:46:58 +01:00
|
|
|
"pyhive": "pyhive[hive_pure_sasl]~=0.7",
|
2023-10-02 12:05:30 +02:00
|
|
|
"mongo": "pymongo~=4.3",
|
|
|
|
"redshift": "sqlalchemy-redshift==0.8.12",
|
|
|
|
"snowflake": "snowflake-sqlalchemy~=1.4",
|
|
|
|
"elasticsearch8": "elasticsearch8~=8.9.0",
|
2023-10-05 10:02:57 +02:00
|
|
|
"giturlparse": "giturlparse",
|
2024-01-08 11:03:05 +05:30
|
|
|
"validators": "validators~=0.22.0",
|
2024-05-28 07:40:22 +03:00
|
|
|
"teradata": "teradatasqlalchemy>=20.0.0.0",
|
2025-01-02 13:07:55 +05:30
|
|
|
"cockroach": "sqlalchemy-cockroachdb~=2.0",
|
2024-12-12 15:12:55 +05:30
|
|
|
"cassandra": "cassandra-driver>=3.28.0",
|
2023-01-11 07:05:12 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
COMMONS = {
|
2023-04-05 12:12:47 +05:30
|
|
|
"datalake": {
|
2024-04-11 14:30:40 +02:00
|
|
|
VERSIONS["avro"],
|
2023-04-05 12:12:47 +05:30
|
|
|
VERSIONS["boto3"],
|
|
|
|
VERSIONS["pandas"],
|
|
|
|
VERSIONS["pyarrow"],
|
2024-08-20 12:19:05 +02:00
|
|
|
VERSIONS["numpy"],
|
2023-12-14 15:46:58 +01:00
|
|
|
# python-snappy does not work well on 3.11 https://github.com/aio-libs/aiokafka/discussions/931
|
|
|
|
# Using this as an alternative
|
|
|
|
"cramjam~=2.7",
|
2023-04-05 12:12:47 +05:30
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"hive": {
|
|
|
|
"presto-types-parser>=0.0.2",
|
2023-10-02 12:05:30 +02:00
|
|
|
VERSIONS["pyhive"],
|
2023-01-11 07:05:12 +01:00
|
|
|
},
|
|
|
|
"kafka": {
|
2023-03-15 15:15:57 +01:00
|
|
|
VERSIONS["avro"],
|
2025-01-08 14:42:24 +05:30
|
|
|
"confluent_kafka>=2.1.1,<=2.6.1",
|
2023-01-11 07:05:12 +01:00
|
|
|
"fastavro>=1.2.0",
|
|
|
|
# Due to https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925
|
2023-02-20 13:37:27 +01:00
|
|
|
# use >= v1.47.2 https://github.com/grpc/grpc/blob/v1.47.2/tools/distrib/python/grpcio_tools/grpc_version.py#L17
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS[
|
|
|
|
"grpc-tools"
|
|
|
|
], # grpcio-tools already depends on grpcio. No need to add separately
|
|
|
|
"protobuf",
|
|
|
|
},
|
2024-05-09 08:57:25 +05:30
|
|
|
"postgres": {
|
|
|
|
VERSIONS["pymysql"],
|
|
|
|
"psycopg2-binary",
|
|
|
|
VERSIONS["geoalchemy2"],
|
|
|
|
VERSIONS["packaging"],
|
|
|
|
}, # Adding as Postgres SQL & GreenPlum are using common packages.
|
2023-01-11 07:05:12 +01:00
|
|
|
}
|
|
|
|
|
2024-06-20 16:54:12 +02:00
|
|
|
DATA_DIFF = {
|
|
|
|
driver: f"collate-data-diff[{driver}]"
|
|
|
|
# data-diff uses different drivers out-of-the-box than OpenMetadata
|
|
|
|
# the exrtas are described here:
|
|
|
|
# https://github.com/open-metadata/collate-data-diff/blob/main/pyproject.toml#L68
|
|
|
|
# install all data diffs with "pip install collate-data-diff[all-dbs]"
|
|
|
|
for driver in [
|
|
|
|
"clickhouse",
|
|
|
|
# "duckdb", # Not supported by OpenMetadata
|
|
|
|
"mssql",
|
|
|
|
"mysql",
|
|
|
|
"oracle",
|
|
|
|
# "postgresql", we dont use this as it installs psycopg2 which interferes with psycopg2-binary
|
|
|
|
"presto",
|
|
|
|
"redshift",
|
|
|
|
"snowflake",
|
|
|
|
"trino",
|
|
|
|
"vertica",
|
|
|
|
]
|
|
|
|
}
|
2023-01-11 07:05:12 +01:00
|
|
|
|
2021-08-01 14:27:44 -07:00
|
|
|
base_requirements = {
|
2023-01-11 07:05:12 +01:00
|
|
|
"antlr4-python3-runtime==4.9.2",
|
2024-02-20 07:18:35 +01:00
|
|
|
VERSIONS["azure-identity"],
|
|
|
|
"azure-keyvault-secrets", # Azure Key Vault SM
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["boto3"], # Required in base for the secrets manager
|
2024-04-11 14:30:40 +02:00
|
|
|
"cached-property==1.5.2", # LineageParser
|
|
|
|
"chardet==4.0.0", # Used in the profiler
|
2024-04-18 17:41:09 +02:00
|
|
|
"cryptography>=42.0.0",
|
2024-06-29 13:09:02 +09:00
|
|
|
"google-cloud-secret-manager==2.19.0",
|
|
|
|
"google-crc32c",
|
2024-06-05 21:18:37 +02:00
|
|
|
"email-validator>=2.0", # For the pydantic generated models for Email
|
2023-09-15 12:03:47 +05:30
|
|
|
"importlib-metadata>=4.13.0", # From airflow constraints
|
2023-01-11 07:05:12 +01:00
|
|
|
"Jinja2>=2.11.3",
|
2024-01-17 00:17:11 -06:00
|
|
|
"jsonpatch<2.0, >=1.24",
|
2023-06-19 12:09:09 +02:00
|
|
|
"memory-profiler",
|
2023-01-11 07:05:12 +01:00
|
|
|
"mypy_extensions>=0.4.3",
|
2024-01-29 06:32:58 +01:00
|
|
|
VERSIONS["pydantic"],
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["pymysql"],
|
2021-08-01 14:27:44 -07:00
|
|
|
"python-dateutil>=2.8.1",
|
2023-09-27 11:49:21 +02:00
|
|
|
"PyYAML~=6.0",
|
2024-05-22 17:12:00 +02:00
|
|
|
"requests>=2.23",
|
2023-01-11 07:05:12 +01:00
|
|
|
"requests-aws4auth~=1.1", # Only depends on requests as external package. Leaving as base.
|
2023-01-27 15:26:30 +01:00
|
|
|
"sqlalchemy>=1.4.0,<2",
|
2025-01-06 11:39:34 +05:30
|
|
|
"collate-sqllineage~=1.6.0",
|
2023-03-24 17:59:06 +01:00
|
|
|
"tabulate==0.9.0",
|
2023-01-11 07:05:12 +01:00
|
|
|
"typing-inspect",
|
2024-05-17 07:56:07 +02:00
|
|
|
"packaging", # For version parsing
|
2024-11-06 11:17:13 +01:00
|
|
|
"shapely",
|
2024-11-26 18:28:22 +01:00
|
|
|
"collate-data-diff",
|
2021-09-19 13:59:14 +05:30
|
|
|
}
|
|
|
|
|
2021-08-01 14:27:44 -07:00
|
|
|
plugins: Dict[str, Set[str]] = {
|
2023-12-01 06:29:44 +01:00
|
|
|
"airflow": {
|
2024-11-06 19:28:48 +05:30
|
|
|
"opentelemetry-exporter-otlp==1.27.0",
|
2024-11-06 15:51:43 +05:30
|
|
|
"protobuf<5",
|
2023-12-01 06:29:44 +01:00
|
|
|
"attrs",
|
2024-11-06 19:28:48 +05:30
|
|
|
VERSIONS["airflow"],
|
2023-12-01 06:29:44 +01:00
|
|
|
}, # Same as ingestion container. For development.
|
2023-01-11 07:05:12 +01:00
|
|
|
"amundsen": {VERSIONS["neo4j"]},
|
2024-06-05 21:18:37 +02:00
|
|
|
"athena": {"pyathena~=3.0"},
|
2022-03-01 11:50:14 +05:30
|
|
|
"atlas": {},
|
2023-01-11 07:05:12 +01:00
|
|
|
"azuresql": {VERSIONS["pyodbc"]},
|
|
|
|
"azure-sso": {VERSIONS["msal"]},
|
2024-02-20 07:18:35 +01:00
|
|
|
"backup": {VERSIONS["boto3"], VERSIONS["azure-identity"], "azure-storage-blob"},
|
2022-02-07 00:06:10 +05:30
|
|
|
"bigquery": {
|
2023-01-11 07:05:12 +01:00
|
|
|
"cachetools",
|
2023-07-31 18:14:25 +05:30
|
|
|
"google-cloud-datacatalog>=3.6.2",
|
2023-01-11 07:05:12 +01:00
|
|
|
"google-cloud-logging",
|
|
|
|
VERSIONS["pyarrow"],
|
2024-06-18 17:03:35 +05:30
|
|
|
VERSIONS["numpy"],
|
2023-01-11 07:05:12 +01:00
|
|
|
"sqlalchemy-bigquery>=1.2.2",
|
2022-02-07 00:06:10 +05:30
|
|
|
},
|
2024-08-20 12:19:05 +02:00
|
|
|
"bigtable": {"google-cloud-bigtable>=2.0.0", VERSIONS["pandas"], VERSIONS["numpy"]},
|
2024-06-20 16:54:12 +02:00
|
|
|
"clickhouse": {
|
|
|
|
"clickhouse-driver~=0.2",
|
|
|
|
"clickhouse-sqlalchemy~=0.2",
|
|
|
|
DATA_DIFF["clickhouse"],
|
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"dagster": {
|
2024-07-27 18:08:42 +05:30
|
|
|
"croniter<3",
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["pymysql"],
|
|
|
|
"psycopg2-binary",
|
|
|
|
VERSIONS["geoalchemy2"],
|
|
|
|
"dagster_graphql~=1.1",
|
2022-11-17 10:11:54 +01:00
|
|
|
},
|
2023-01-30 01:17:39 +05:30
|
|
|
"dbt": {
|
|
|
|
"google-cloud",
|
|
|
|
VERSIONS["boto3"],
|
|
|
|
VERSIONS["google-cloud-storage"],
|
2025-02-04 11:57:39 +05:30
|
|
|
"collate-dbt-artifacts-parser",
|
2023-06-22 10:58:38 +05:30
|
|
|
VERSIONS["azure-storage-blob"],
|
|
|
|
VERSIONS["azure-identity"],
|
2023-01-30 01:17:39 +05:30
|
|
|
},
|
2024-10-09 18:39:29 +02:00
|
|
|
"db2": {"ibm-db-sa~=0.4.1", "ibm-db>=2.0.0"},
|
2024-01-11 12:35:52 +05:30
|
|
|
"db2-ibmi": {"sqlalchemy-ibmi~=0.9.3"},
|
2024-04-15 15:37:07 +05:30
|
|
|
"databricks": {
|
|
|
|
VERSIONS["sqlalchemy-databricks"],
|
|
|
|
VERSIONS["databricks-sdk"],
|
|
|
|
"ndg-httpsclient~=0.5.1",
|
|
|
|
"pyOpenSSL~=24.1.0",
|
|
|
|
"pyasn1~=0.6.0",
|
2024-07-09 12:59:23 +05:30
|
|
|
# databricks has a dependency on pyhive for metadata as well as profiler
|
|
|
|
VERSIONS["pyhive"],
|
2024-04-15 15:37:07 +05:30
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"datalake-azure": {
|
2023-06-22 10:58:38 +05:30
|
|
|
VERSIONS["azure-storage-blob"],
|
|
|
|
VERSIONS["azure-identity"],
|
2024-06-05 21:18:37 +02:00
|
|
|
VERSIONS["adlfs"],
|
2023-01-11 07:05:12 +01:00
|
|
|
*COMMONS["datalake"],
|
2022-06-15 12:27:21 +05:30
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"datalake-gcs": {
|
2024-07-10 08:03:28 -04:00
|
|
|
VERSIONS["google-cloud-monitoring"],
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["google-cloud-storage"],
|
2024-01-29 06:32:58 +01:00
|
|
|
VERSIONS["gcsfs"],
|
2023-01-11 07:05:12 +01:00
|
|
|
*COMMONS["datalake"],
|
2021-10-26 21:44:24 +05:30
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"datalake-s3": {
|
2024-09-12 07:13:01 +02:00
|
|
|
# vendoring 'boto3' to keep all dependencies aligned (s3fs, boto3, botocore, aiobotocore)
|
|
|
|
"s3fs[boto3]",
|
2023-01-11 07:05:12 +01:00
|
|
|
*COMMONS["datalake"],
|
2022-11-11 16:35:09 +05:30
|
|
|
},
|
2024-09-19 08:25:19 +02:00
|
|
|
"deltalake": {
|
|
|
|
"delta-spark<=2.3.0",
|
|
|
|
"deltalake~=0.17,<0.20",
|
|
|
|
}, # TODO: remove pinning to under 0.20 after https://github.com/open-metadata/OpenMetadata/issues/17909
|
2024-06-20 08:38:21 +02:00
|
|
|
"deltalake-storage": {"deltalake~=0.17"},
|
|
|
|
"deltalake-spark": {"delta-spark<=2.3.0"},
|
2023-01-11 07:05:12 +01:00
|
|
|
"domo": {VERSIONS["pydomo"]},
|
2023-11-28 16:27:52 +08:00
|
|
|
"doris": {"pydoris==1.0.2"},
|
2023-01-11 07:05:12 +01:00
|
|
|
"druid": {"pydruid>=0.6.5"},
|
|
|
|
"dynamodb": {VERSIONS["boto3"]},
|
|
|
|
"elasticsearch": {
|
2023-10-02 12:05:30 +02:00
|
|
|
VERSIONS["elasticsearch8"],
|
2023-01-11 07:05:12 +01:00
|
|
|
}, # also requires requests-aws4auth which is in base
|
2024-10-31 08:11:30 +01:00
|
|
|
"exasol": {"sqlalchemy_exasol>=5,<6"},
|
2023-01-11 07:05:12 +01:00
|
|
|
"glue": {VERSIONS["boto3"]},
|
|
|
|
"great-expectations": {VERSIONS["great-expectations"]},
|
2024-05-09 08:57:25 +05:30
|
|
|
"greenplum": {*COMMONS["postgres"]},
|
2025-01-02 13:07:55 +05:30
|
|
|
"cockroach": {
|
|
|
|
VERSIONS["cockroach"],
|
|
|
|
"psycopg2-binary",
|
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"hive": {
|
|
|
|
*COMMONS["hive"],
|
|
|
|
"thrift>=0.13,<1",
|
2023-12-14 15:46:58 +01:00
|
|
|
# Replacing sasl with pure-sasl based on https://github.com/cloudera/python-sasl/issues/30 for py 3.11
|
|
|
|
"pure-sasl",
|
2023-01-11 07:05:12 +01:00
|
|
|
"thrift-sasl~=0.4",
|
2023-02-22 16:54:56 +05:30
|
|
|
"impyla~=0.18.0",
|
2022-11-11 16:35:09 +05:30
|
|
|
},
|
2024-01-29 06:32:58 +01:00
|
|
|
"iceberg": {
|
2024-10-08 14:55:57 +05:30
|
|
|
"pyiceberg==0.5.1",
|
2024-01-29 06:32:58 +01:00
|
|
|
# Forcing the version of a few packages so it plays nicely with other requirements.
|
|
|
|
VERSIONS["pydantic"],
|
|
|
|
VERSIONS["adlfs"],
|
|
|
|
VERSIONS["gcsfs"],
|
|
|
|
VERSIONS["pyarrow"],
|
|
|
|
},
|
2023-04-21 02:57:13 -05:00
|
|
|
"impala": {
|
|
|
|
"presto-types-parser>=0.0.2",
|
|
|
|
"impyla[kerberos]~=0.18.0",
|
|
|
|
"thrift>=0.13,<1",
|
2023-12-14 15:46:58 +01:00
|
|
|
"pure-sasl",
|
2023-04-21 02:57:13 -05:00
|
|
|
"thrift-sasl~=0.4",
|
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"kafka": {*COMMONS["kafka"]},
|
2024-05-10 14:29:45 +05:30
|
|
|
"kafkaconnect": {"kafka-connect-py==0.10.11"},
|
2023-01-11 07:05:12 +01:00
|
|
|
"kinesis": {VERSIONS["boto3"]},
|
2023-10-04 20:16:21 +07:00
|
|
|
"looker": {
|
|
|
|
VERSIONS["looker-sdk"],
|
|
|
|
VERSIONS["lkml"],
|
|
|
|
"gitpython~=3.1.34",
|
2023-10-05 10:02:57 +02:00
|
|
|
VERSIONS["giturlparse"],
|
2024-09-27 07:55:15 -04:00
|
|
|
"python-liquid",
|
2023-10-04 20:16:21 +07:00
|
|
|
},
|
2024-06-05 21:18:37 +02:00
|
|
|
"mlflow": {"mlflow-skinny>=2.3.0"},
|
2024-08-20 12:19:05 +02:00
|
|
|
"mongo": {VERSIONS["mongo"], VERSIONS["pandas"], VERSIONS["numpy"]},
|
2024-12-12 15:12:55 +05:30
|
|
|
"cassandra": {VERSIONS["cassandra"]},
|
2023-08-29 11:16:32 +05:30
|
|
|
"couchbase": {"couchbase~=4.1"},
|
2024-06-20 16:54:12 +02:00
|
|
|
"mssql": {
|
|
|
|
"sqlalchemy-pytds~=0.3",
|
|
|
|
DATA_DIFF["mssql"],
|
|
|
|
},
|
|
|
|
"mssql-odbc": {
|
|
|
|
VERSIONS["pyodbc"],
|
|
|
|
DATA_DIFF["mssql"],
|
|
|
|
},
|
|
|
|
"mysql": {
|
|
|
|
VERSIONS["pymysql"],
|
|
|
|
DATA_DIFF["mysql"],
|
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"nifi": {}, # uses requests
|
2024-03-12 08:39:25 +01:00
|
|
|
"openlineage": {*COMMONS["kafka"]},
|
2024-06-20 16:54:12 +02:00
|
|
|
"oracle": {"cx_Oracle>=8.3.0,<9", "oracledb~=1.2", DATA_DIFF["oracle"]},
|
2023-07-05 16:18:59 +09:00
|
|
|
"pgspider": {"psycopg2-binary", "sqlalchemy-pgspider"},
|
2024-06-05 21:18:37 +02:00
|
|
|
"pinotdb": {"pinotdb~=5.0"},
|
2024-05-09 08:57:25 +05:30
|
|
|
"postgres": {*COMMONS["postgres"]},
|
2024-04-29 14:55:06 +05:30
|
|
|
"powerbi": {
|
|
|
|
VERSIONS["msal"],
|
|
|
|
VERSIONS["boto3"],
|
|
|
|
VERSIONS["google-cloud-storage"],
|
|
|
|
VERSIONS["azure-storage-blob"],
|
|
|
|
VERSIONS["azure-identity"],
|
|
|
|
},
|
2023-08-11 12:28:05 +05:30
|
|
|
"qliksense": {"websocket-client~=1.6.1"},
|
2024-06-20 16:54:12 +02:00
|
|
|
"presto": {*COMMONS["hive"], DATA_DIFF["presto"]},
|
2023-10-10 16:21:52 +05:30
|
|
|
"pymssql": {"pymssql~=2.2.0"},
|
2023-01-31 20:47:40 +05:30
|
|
|
"quicksight": {VERSIONS["boto3"]},
|
2023-05-15 11:48:03 +05:30
|
|
|
"redash": {VERSIONS["packaging"]},
|
2023-01-11 07:05:12 +01:00
|
|
|
"redpanda": {*COMMONS["kafka"]},
|
|
|
|
"redshift": {
|
2023-06-19 12:09:09 +02:00
|
|
|
# Going higher has memory and performance issues
|
2023-10-02 12:05:30 +02:00
|
|
|
VERSIONS["redshift"],
|
2021-10-26 21:44:24 +05:30
|
|
|
"psycopg2-binary",
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["geoalchemy2"],
|
2021-10-26 21:44:24 +05:30
|
|
|
},
|
2023-01-11 07:05:12 +01:00
|
|
|
"sagemaker": {VERSIONS["boto3"]},
|
2024-06-05 21:18:37 +02:00
|
|
|
"salesforce": {"simple_salesforce~=1.11"},
|
2024-04-11 14:30:40 +02:00
|
|
|
"sample-data": {VERSIONS["avro"], VERSIONS["grpc-tools"]},
|
2023-05-31 16:00:31 +02:00
|
|
|
"sap-hana": {"hdbcli", "sqlalchemy-hana"},
|
2024-01-11 09:46:57 -05:00
|
|
|
"sas": {},
|
2023-01-11 07:05:12 +01:00
|
|
|
"singlestore": {VERSIONS["pymysql"]},
|
|
|
|
"sklearn": {VERSIONS["scikit-learn"]},
|
2024-06-20 16:54:12 +02:00
|
|
|
"snowflake": {VERSIONS["snowflake"], DATA_DIFF["snowflake"]},
|
2023-01-11 07:05:12 +01:00
|
|
|
"superset": {}, # uses requests
|
2024-01-08 11:03:05 +05:30
|
|
|
"tableau": {VERSIONS["tableau"], VERSIONS["validators"], VERSIONS["packaging"]},
|
2024-05-28 07:40:22 +03:00
|
|
|
"teradata": {VERSIONS["teradata"]},
|
2024-06-20 16:54:12 +02:00
|
|
|
"trino": {VERSIONS["trino"], DATA_DIFF["trino"]},
|
|
|
|
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5", DATA_DIFF["vertica"]},
|
2024-04-11 14:30:40 +02:00
|
|
|
"pii-processor": {
|
|
|
|
VERSIONS["spacy"],
|
|
|
|
VERSIONS["pandas"],
|
2024-08-20 12:19:05 +02:00
|
|
|
VERSIONS["numpy"],
|
2024-09-06 16:07:08 +02:00
|
|
|
"presidio-analyzer==2.2.355",
|
2024-04-11 14:30:40 +02:00
|
|
|
},
|
2021-08-01 14:27:44 -07:00
|
|
|
}
|
2023-01-11 07:05:12 +01:00
|
|
|
|
2022-01-07 10:28:38 +01:00
|
|
|
dev = {
|
2022-05-18 10:55:39 +05:30
|
|
|
"black==22.3.0",
|
2024-06-05 21:18:37 +02:00
|
|
|
"datamodel-code-generator==0.25.6",
|
|
|
|
"boto3-stubs",
|
|
|
|
"mypy-boto3-glue",
|
2023-01-11 07:05:12 +01:00
|
|
|
"isort",
|
|
|
|
"pre-commit",
|
|
|
|
"pycln",
|
2024-09-20 18:50:28 +02:00
|
|
|
"pylint~=3.2.0", # 3.3.0+ breaks our current linting
|
2023-11-22 07:10:37 +01:00
|
|
|
# For publishing
|
2022-01-07 10:28:38 +01:00
|
|
|
"twine",
|
2023-11-22 07:10:37 +01:00
|
|
|
"build",
|
2024-04-25 09:45:26 +05:30
|
|
|
*plugins["sample-data"],
|
2022-01-07 10:28:38 +01:00
|
|
|
}
|
2023-01-11 07:05:12 +01:00
|
|
|
|
2022-01-07 10:28:38 +01:00
|
|
|
test = {
|
2023-02-01 10:20:26 +01:00
|
|
|
# Install Airflow as it's not part of `all` plugin
|
2024-11-06 19:28:48 +05:30
|
|
|
"opentelemetry-exporter-otlp==1.27.0",
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["airflow"],
|
2024-06-05 21:18:37 +02:00
|
|
|
"boto3-stubs",
|
|
|
|
"mypy-boto3-glue",
|
2023-01-11 07:05:12 +01:00
|
|
|
"coverage",
|
2023-02-01 10:20:26 +01:00
|
|
|
# Install GE because it's not in the `all` plugin
|
2023-01-11 07:05:12 +01:00
|
|
|
VERSIONS["great-expectations"],
|
2024-07-18 11:52:56 +02:00
|
|
|
"basedpyright~=1.14",
|
2022-02-06 23:41:56 +01:00
|
|
|
"pytest==7.0.0",
|
2022-01-07 10:28:38 +01:00
|
|
|
"pytest-cov",
|
2022-10-10 11:36:20 +02:00
|
|
|
"pytest-order",
|
2024-11-11 10:07:23 +01:00
|
|
|
"dirty-equals",
|
2023-03-01 08:20:38 +01:00
|
|
|
# install dbt dependency
|
2025-02-04 11:57:39 +05:30
|
|
|
"collate-dbt-artifacts-parser",
|
2024-07-17 08:11:34 +02:00
|
|
|
"freezegun",
|
2023-10-02 12:05:30 +02:00
|
|
|
VERSIONS["sqlalchemy-databricks"],
|
|
|
|
VERSIONS["databricks-sdk"],
|
|
|
|
VERSIONS["scikit-learn"],
|
|
|
|
VERSIONS["pyarrow"],
|
|
|
|
VERSIONS["trino"],
|
|
|
|
VERSIONS["spacy"],
|
|
|
|
VERSIONS["pydomo"],
|
|
|
|
VERSIONS["looker-sdk"],
|
|
|
|
VERSIONS["lkml"],
|
|
|
|
VERSIONS["tableau"],
|
|
|
|
VERSIONS["pyhive"],
|
|
|
|
VERSIONS["mongo"],
|
2024-12-12 15:12:55 +05:30
|
|
|
VERSIONS["cassandra"],
|
2023-10-02 12:05:30 +02:00
|
|
|
VERSIONS["redshift"],
|
|
|
|
VERSIONS["snowflake"],
|
|
|
|
VERSIONS["elasticsearch8"],
|
2023-10-05 10:02:57 +02:00
|
|
|
VERSIONS["giturlparse"],
|
2024-04-11 14:30:40 +02:00
|
|
|
VERSIONS["avro"], # Sample Data
|
|
|
|
VERSIONS["grpc-tools"],
|
2024-07-16 11:01:43 +02:00
|
|
|
VERSIONS["neo4j"],
|
2025-01-02 13:07:55 +05:30
|
|
|
VERSIONS["cockroach"],
|
2024-04-17 12:19:37 +02:00
|
|
|
"testcontainers==3.7.1;python_version<'3.9'",
|
2024-09-24 09:18:36 +02:00
|
|
|
"testcontainers~=4.8.0;python_version>='3.9'",
|
2024-04-22 15:50:44 +02:00
|
|
|
"minio==7.2.5",
|
2024-05-16 10:03:27 +02:00
|
|
|
*plugins["mlflow"],
|
|
|
|
*plugins["datalake-s3"],
|
2024-06-14 14:08:59 +05:30
|
|
|
*plugins["kafka"],
|
|
|
|
"kafka-python==2.0.2",
|
2024-06-05 21:18:37 +02:00
|
|
|
*plugins["pii-processor"],
|
2024-05-22 17:12:00 +02:00
|
|
|
"requests==2.31.0",
|
2024-09-26 13:55:17 +02:00
|
|
|
f"{DATA_DIFF['mysql']}",
|
2024-06-25 07:51:22 +02:00
|
|
|
*plugins["deltalake"],
|
2024-07-16 11:01:43 +02:00
|
|
|
*plugins["datalake-gcs"],
|
|
|
|
*plugins["pgspider"],
|
|
|
|
*plugins["clickhouse"],
|
|
|
|
*plugins["mssql"],
|
|
|
|
*plugins["dagster"],
|
|
|
|
*plugins["oracle"],
|
2024-07-17 08:11:34 +02:00
|
|
|
*plugins["mssql"],
|
2022-01-07 10:28:38 +01:00
|
|
|
}
|
2021-08-01 14:27:44 -07:00
|
|
|
|
2023-09-19 14:44:36 +02:00
|
|
|
e2e_test = {
|
|
|
|
# playwright dependencies
|
|
|
|
"pytest-playwright",
|
|
|
|
"pytest-base-url",
|
|
|
|
}
|
|
|
|
|
2024-01-11 14:23:33 +05:30
|
|
|
extended_testing = {
|
|
|
|
"Faker", # For Sample Data Generation
|
|
|
|
}
|
|
|
|
|
2023-12-19 11:09:38 +01:00
|
|
|
|
|
|
|
def filter_requirements(filtered: Set[str]) -> List[str]:
|
|
|
|
"""Filter out requirements from base_requirements"""
|
|
|
|
return list(
|
|
|
|
base_requirements.union(
|
|
|
|
*[
|
|
|
|
requirements
|
|
|
|
for plugin, requirements in plugins.items()
|
|
|
|
if plugin not in filtered
|
|
|
|
]
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-08-01 14:27:44 -07:00
|
|
|
setup(
|
|
|
|
install_requires=list(base_requirements),
|
|
|
|
extras_require={
|
|
|
|
"base": list(base_requirements),
|
2022-01-07 10:28:38 +01:00
|
|
|
"dev": list(dev),
|
|
|
|
"test": list(test),
|
2023-09-19 14:44:36 +02:00
|
|
|
"e2e_test": list(e2e_test),
|
2024-01-11 14:23:33 +05:30
|
|
|
"extended_testing": list(extended_testing),
|
2022-11-15 05:44:25 +01:00
|
|
|
"data-insight": list(plugins["elasticsearch"]),
|
2021-10-26 21:44:24 +05:30
|
|
|
**{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()},
|
2023-12-19 11:09:38 +01:00
|
|
|
"all": filter_requirements({"airflow", "db2", "great-expectations"}),
|
|
|
|
"slim": filter_requirements(
|
2024-06-20 08:38:21 +02:00
|
|
|
{
|
|
|
|
"airflow",
|
|
|
|
"db2",
|
|
|
|
"great-expectations",
|
|
|
|
"deltalake",
|
|
|
|
"deltalake-spark",
|
|
|
|
"sklearn",
|
|
|
|
}
|
2021-10-26 21:44:24 +05:30
|
|
|
),
|
|
|
|
},
|
2021-08-01 14:27:44 -07:00
|
|
|
)
|