| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  | from typing import Dict, Set | 
					
						
							| 
									
										
										
										
											2021-02-11 23:14:20 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  | import setuptools | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  | package_metadata: dict = {} | 
					
						
							| 
									
										
										
										
											2025-01-24 11:35:39 -08:00
										 |  |  | with open("./src/datahub/_version.py") as fp: | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     exec(fp.read(), package_metadata) | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  | _version: str = package_metadata["__version__"] | 
					
						
							|  |  |  | _self_pin = ( | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  |     f"=={_version}" | 
					
						
							|  |  |  |     if not (_version.endswith(("dev0", "dev1")) or "docker" in _version) | 
					
						
							|  |  |  |     else "" | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | base_requirements = { | 
					
						
							| 
									
										
										
										
											2024-11-28 21:41:53 -05:00
										 |  |  |     # Our min version of typing_extensions is somewhat constrained by Airflow. | 
					
						
							| 
									
										
										
										
											2025-03-26 11:32:46 -07:00
										 |  |  |     "typing_extensions>=4.5.0", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     # Actual dependencies. | 
					
						
							| 
									
										
										
										
											2021-04-01 12:15:05 -07:00
										 |  |  |     "typing-inspect", | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  |     # pydantic 1.8.2 is incompatible with mypy 0.910. | 
					
						
							|  |  |  |     # See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910. | 
					
						
							| 
									
										
										
										
											2023-01-04 19:31:04 -05:00
										 |  |  |     # pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885 | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  |     "pydantic>=1.10.0,!=1.10.3", | 
					
						
							| 
									
										
										
										
											2022-02-24 15:35:48 -05:00
										 |  |  |     "mixpanel>=4.9.0", | 
					
						
							| 
									
										
										
										
											2024-02-27 12:36:24 -08:00
										 |  |  |     # Airflow depends on fairly old versions of sentry-sdk, so we want to be loose with our constraints. | 
					
						
							|  |  |  |     "sentry-sdk", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | framework_common = { | 
					
						
							| 
									
										
										
										
											2025-05-14 23:26:18 -07:00
										 |  |  |     # Avoiding click 8.2.0 due to https://github.com/pallets/click/issues/2894 | 
					
						
							|  |  |  |     "click>=7.1.2, !=8.2.0", | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |     "click-default-group", | 
					
						
							| 
									
										
										
										
											2021-04-12 17:40:15 -07:00
										 |  |  |     "PyYAML", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     "toml>=0.10.0", | 
					
						
							| 
									
										
										
										
											2023-09-22 16:43:58 -07:00
										 |  |  |     # In Python 3.10+, importlib_metadata is included in the standard library. | 
					
						
							|  |  |  |     "importlib_metadata>=4.0.0; python_version < '3.10'", | 
					
						
							| 
									
										
										
										
											2021-04-12 17:40:15 -07:00
										 |  |  |     "docker", | 
					
						
							| 
									
										
										
										
											2021-03-26 21:57:05 -07:00
										 |  |  |     "expandvars>=0.6.5", | 
					
						
							| 
									
										
										
										
											2024-09-09 00:02:05 -07:00
										 |  |  |     "avro-gen3==0.7.16", | 
					
						
							| 
									
										
										
										
											2022-08-29 23:34:58 +00:00
										 |  |  |     # "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3", | 
					
						
							| 
									
										
										
										
											2025-02-26 09:24:38 -08:00
										 |  |  |     # avro has historically made breaking changes, so we have a cautious upper bound. | 
					
						
							|  |  |  |     "avro>=1.11.3,<1.13", | 
					
						
							| 
									
										
										
										
											2021-08-21 18:41:28 -07:00
										 |  |  |     "python-dateutil>=2.8.0", | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |     "tabulate", | 
					
						
							| 
									
										
										
										
											2021-11-07 22:13:50 -08:00
										 |  |  |     "progressbar2", | 
					
						
							| 
									
										
										
										
											2022-02-08 23:25:12 -08:00
										 |  |  |     "psutil>=5.8.0", | 
					
						
							| 
									
										
										
										
											2022-03-02 11:36:37 +05:30
										 |  |  |     "Deprecated", | 
					
						
							| 
									
										
										
										
											2022-06-04 09:34:17 -07:00
										 |  |  |     "humanfriendly", | 
					
						
							| 
									
										
										
										
											2022-06-06 16:34:23 -07:00
										 |  |  |     "packaging", | 
					
						
							| 
									
										
										
										
											2022-07-16 17:38:33 -07:00
										 |  |  |     "aiohttp<4", | 
					
						
							| 
									
										
										
										
											2022-08-19 09:08:17 -07:00
										 |  |  |     "cached_property", | 
					
						
							| 
									
										
										
										
											2022-08-21 01:48:22 -07:00
										 |  |  |     "ijson", | 
					
						
							| 
									
										
										
										
											2022-08-30 13:25:03 -07:00
										 |  |  |     "click-spinner", | 
					
						
							| 
									
										
										
										
											2022-11-16 12:29:24 -06:00
										 |  |  |     "requests_file", | 
					
						
							| 
									
										
										
										
											2023-02-19 08:43:13 -08:00
										 |  |  |     "jsonref", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |     "jsonschema", | 
					
						
							| 
									
										
										
										
											2023-05-23 15:25:28 +02:00
										 |  |  |     "ruamel.yaml", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  | pydantic_no_v2 = { | 
					
						
							|  |  |  |     # pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887 | 
					
						
							|  |  |  |     # Tags sources that require the pydantic v2 API. | 
					
						
							|  |  |  |     "pydantic<2", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-02-02 09:24:28 -05:00
										 |  |  | rest_common = {"requests", "requests_file"} | 
					
						
							| 
									
										
										
										
											2022-11-11 15:04:36 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | kafka_common = { | 
					
						
							| 
									
										
										
										
											2023-12-21 20:28:45 -05:00
										 |  |  |     # Note that confluent_kafka 1.9.0 introduced a hard compatibility break, and | 
					
						
							|  |  |  |     # requires librdkafka >=1.9.0. This is generally not an issue, since they | 
					
						
							|  |  |  |     # now provide prebuilt wheels for most platforms, including M1 Macs and | 
					
						
							|  |  |  |     # Linux aarch64 (e.g. Docker's linux/arm64). Installing confluent_kafka | 
					
						
							|  |  |  |     # from source remains a pain. | 
					
						
							| 
									
										
										
										
											2025-02-28 10:14:07 -08:00
										 |  |  |     # With the release of 2.8.1, confluent-kafka only released a source distribution, | 
					
						
							|  |  |  |     # and no prebuilt wheels. | 
					
						
							|  |  |  |     # See https://github.com/confluentinc/confluent-kafka-python/issues/1927 | 
					
						
							|  |  |  |     "confluent_kafka[schemaregistry,avro]>=1.9.0, != 2.8.1", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     # We currently require both Avro libraries. The codegen uses avro-python3 (above) | 
					
						
							|  |  |  |     # schema parsers at runtime for generating and reading JSON into Python objects. | 
					
						
							|  |  |  |     # At the same time, we use Kafka's AvroSerializer, which internally relies on | 
					
						
							|  |  |  |     # fastavro for serialization. We do not use confluent_kafka[avro], since it | 
					
						
							|  |  |  |     # is incompatible with its own dep on avro-python3. | 
					
						
							| 
									
										
										
										
											2021-04-12 17:40:15 -07:00
										 |  |  |     "fastavro>=1.2.0", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  | kafka_protobuf = { | 
					
						
							|  |  |  |     "networkx>=2.6.2", | 
					
						
							|  |  |  |     # Required to generate protobuf python modules from the schema downloaded from the schema registry | 
					
						
							| 
									
										
										
										
											2022-11-03 14:23:19 -04:00
										 |  |  |     # NOTE: potential conflict with feast also depending on grpcio | 
					
						
							|  |  |  |     "grpcio>=1.44.0,<2", | 
					
						
							|  |  |  |     "grpcio-tools>=1.44.0,<2", | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2022-05-04 17:07:01 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  | usage_common = { | 
					
						
							|  |  |  |     "sqlparse", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | sqlglot_lib = { | 
					
						
							| 
									
										
										
										
											2024-10-22 19:57:46 -07:00
										 |  |  |     # We heavily monkeypatch sqlglot. | 
					
						
							| 
									
										
										
										
											2025-02-22 11:35:43 -08:00
										 |  |  |     # We used to maintain an acryl-sqlglot fork: https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 | 
					
						
							|  |  |  |     # but not longer do. | 
					
						
							| 
									
										
										
										
											2025-06-10 11:09:37 -07:00
										 |  |  |     "sqlglot[rs]==26.26.0", | 
					
						
							| 
									
										
										
										
											2024-10-22 19:57:46 -07:00
										 |  |  |     "patchy==2.8.0", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-12 21:53:20 +05:30
										 |  |  | classification_lib = { | 
					
						
							| 
									
										
										
										
											2024-07-03 19:32:46 -04:00
										 |  |  |     "acryl-datahub-classify==0.0.11", | 
					
						
							| 
									
										
										
										
											2025-07-09 13:24:33 +05:30
										 |  |  |     # schwifty is needed for the classify plugin | 
					
						
							|  |  |  |     "schwifty", | 
					
						
							| 
									
										
										
										
											2024-04-19 12:52:51 -07:00
										 |  |  |     # This is a bit of a hack. Because we download the SpaCy model at runtime in the classify plugin, | 
					
						
							|  |  |  |     # we need pip to be available. | 
					
						
							|  |  |  |     "pip", | 
					
						
							| 
									
										
										
										
											2024-06-17 13:50:08 -07:00
										 |  |  |     # We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject` | 
					
						
							|  |  |  |     # with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions. | 
					
						
							|  |  |  |     # https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility | 
					
						
							|  |  |  |     "numpy<2", | 
					
						
							| 
									
										
										
										
											2024-03-12 21:53:20 +05:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-25 09:31:19 -07:00
										 |  |  | dbt_common = { | 
					
						
							|  |  |  |     *sqlglot_lib, | 
					
						
							|  |  |  |     "more_itertools", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-22 11:49:57 -07:00
										 |  |  | cachetools_lib = { | 
					
						
							|  |  |  |     "cachetools", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-02 13:04:53 -07:00
										 |  |  | great_expectations_lib = { | 
					
						
							|  |  |  |     # 1. Our original dep was this: | 
					
						
							|  |  |  |     # "great-expectations>=0.15.12, <=0.15.50", | 
					
						
							|  |  |  |     # 2. For hive, we had additional restrictions: | 
					
						
							|  |  |  |     #    Due to https://github.com/great-expectations/great_expectations/issues/6146, | 
					
						
							|  |  |  |     #    we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by | 
					
						
							|  |  |  |     #    https://github.com/great-expectations/great_expectations/pull/6149. | 
					
						
							|  |  |  |     # "great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26", | 
					
						
							|  |  |  |     # 3. Since then, we've ended up forking great-expectations in order to | 
					
						
							|  |  |  |     #    add pydantic 2.x support. The fork is pretty simple | 
					
						
							|  |  |  |     #    https://github.com/great-expectations/great_expectations/compare/0.15.50...hsheth2:great_expectations:0.15.50-pydantic-2-patch?expand=1 | 
					
						
							|  |  |  |     #    This was derived from work done by @jskrzypek in | 
					
						
							|  |  |  |     #    https://github.com/datahub-project/datahub/issues/8115#issuecomment-2264219783 | 
					
						
							|  |  |  |     "acryl-great-expectations==0.15.50.1", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-02 19:54:01 -07:00
										 |  |  | sqlalchemy_lib = { | 
					
						
							| 
									
										
										
										
											2025-04-30 19:39:35 -07:00
										 |  |  |     # Required for all SQL sources. | 
					
						
							|  |  |  |     # This is temporary lower bound that we're open to loosening/tightening as requirements show up | 
					
						
							|  |  |  |     "sqlalchemy>=1.4.39, <2", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  | sql_common = ( | 
					
						
							|  |  |  |     { | 
					
						
							| 
									
										
										
										
											2025-05-02 19:54:01 -07:00
										 |  |  |         *sqlalchemy_lib, | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  |         # Required for SQL profiling. | 
					
						
							| 
									
										
										
										
											2025-05-02 13:04:53 -07:00
										 |  |  |         *great_expectations_lib, | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  |         # scipy version restricted to reduce backtracking, used by great-expectations, | 
					
						
							|  |  |  |         "scipy>=1.7.2", | 
					
						
							|  |  |  |         # GE added handling for higher version of jinja2 | 
					
						
							|  |  |  |         # https://github.com/great-expectations/great_expectations/pull/5382/files | 
					
						
							|  |  |  |         # datahub does not depend on traitlets directly but great expectations does. | 
					
						
							|  |  |  |         # https://github.com/ipython/traitlets/issues/741 | 
					
						
							| 
									
										
										
										
											2024-10-29 01:21:20 -07:00
										 |  |  |         "traitlets!=5.2.2", | 
					
						
							| 
									
										
										
										
											2024-12-04 17:50:56 -05:00
										 |  |  |         # GE depends on IPython - we have no direct dependency on it. | 
					
						
							|  |  |  |         # IPython 8.22.0 added a dependency on traitlets 5.13.x, but only declared a | 
					
						
							|  |  |  |         # version requirement of traitlets>5. | 
					
						
							|  |  |  |         # See https://github.com/ipython/ipython/issues/14352. | 
					
						
							|  |  |  |         # This issue was fixed by https://github.com/ipython/ipython/pull/14353, | 
					
						
							|  |  |  |         # which first appeared in IPython 8.22.1. | 
					
						
							|  |  |  |         # As such, we just need to avoid that version in order to get the | 
					
						
							|  |  |  |         # dependencies that we need. IPython probably should've yanked 8.22.0. | 
					
						
							|  |  |  |         "IPython!=8.22.0", | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  |         "greenlet", | 
					
						
							| 
									
										
										
										
											2024-10-22 11:49:57 -07:00
										 |  |  |         *cachetools_lib, | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  |     } | 
					
						
							|  |  |  |     | usage_common | 
					
						
							|  |  |  |     | sqlglot_lib | 
					
						
							| 
									
										
										
										
											2024-03-12 21:53:20 +05:30
										 |  |  |     | classification_lib | 
					
						
							| 
									
										
										
										
											2023-10-26 21:44:32 +05:30
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-29 19:43:31 -07:00
										 |  |  | aws_common = { | 
					
						
							|  |  |  |     # AWS Python SDK | 
					
						
							| 
									
										
										
										
											2021-11-08 16:23:25 -08:00
										 |  |  |     "boto3", | 
					
						
							|  |  |  |     # Deal with a version incompatibility between botocore (used by boto3) and urllib3. | 
					
						
							|  |  |  |     # See https://github.com/boto/botocore/pull/2563. | 
					
						
							|  |  |  |     "botocore!=1.23.0", | 
					
						
							| 
									
										
										
										
											2021-06-29 19:43:31 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  | path_spec_common = { | 
					
						
							|  |  |  |     "parse>=1.19.0", | 
					
						
							|  |  |  |     "wcmatch", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-16 23:09:45 -07:00
										 |  |  | looker_common = { | 
					
						
							|  |  |  |     # Looker Python SDK | 
					
						
							| 
									
										
										
										
											2024-10-31 15:50:41 -07:00
										 |  |  |     "looker-sdk>=23.0.0", | 
					
						
							| 
									
										
										
										
											2022-10-23 23:31:48 -07:00
										 |  |  |     # This version of lkml contains a fix for parsing lists in | 
					
						
							|  |  |  |     # LookML files with spaces between an item and the following comma. | 
					
						
							|  |  |  |     # See https://github.com/joshtemple/lkml/issues/73. | 
					
						
							| 
									
										
										
										
											2024-01-22 16:23:56 -08:00
										 |  |  |     "lkml>=1.3.4", | 
					
						
							| 
									
										
										
										
											2024-07-10 01:21:08 +05:30
										 |  |  |     *sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2022-10-23 23:31:48 -07:00
										 |  |  |     "GitPython>2", | 
					
						
							| 
									
										
										
										
											2025-03-17 13:51:04 -07:00
										 |  |  |     # python-liquid 2 includes a bunch of breaking changes. | 
					
						
							|  |  |  |     # See https://jg-rp.github.io/liquid/migration/ | 
					
						
							|  |  |  |     # Eventually we should fully upgrade to v2, but that will require | 
					
						
							|  |  |  |     # us to drop Python 3.8 support first. | 
					
						
							|  |  |  |     "python-liquid<2", | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  |     "deepmerge>=1.1.1", | 
					
						
							| 
									
										
										
										
											2021-09-16 23:09:45 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-25 17:50:07 -07:00
										 |  |  | bigquery_common = { | 
					
						
							|  |  |  |     # Google cloud logging library | 
					
						
							| 
									
										
										
										
											2023-02-21 13:03:00 +05:30
										 |  |  |     "google-cloud-logging<=3.5.0", | 
					
						
							| 
									
										
										
										
											2022-04-13 17:59:31 +05:30
										 |  |  |     "google-cloud-bigquery", | 
					
						
							| 
									
										
										
										
											2024-06-14 16:43:12 +05:30
										 |  |  |     "google-cloud-datacatalog>=1.5.0", | 
					
						
							| 
									
										
										
										
											2024-08-21 00:12:00 +05:30
										 |  |  |     "google-cloud-resource-manager", | 
					
						
							| 
									
										
										
										
											2022-01-13 22:12:11 -08:00
										 |  |  |     "more-itertools>=8.12.0", | 
					
						
							| 
									
										
										
										
											2024-01-11 00:48:36 +05:30
										 |  |  |     "sqlalchemy-bigquery>=1.4.1", | 
					
						
							| 
									
										
										
										
											2024-10-30 03:13:15 -07:00
										 |  |  |     *path_spec_common, | 
					
						
							| 
									
										
										
										
											2021-10-25 17:50:07 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-11 15:04:36 -05:00
										 |  |  | clickhouse_common = { | 
					
						
							| 
									
										
										
										
											2023-09-13 00:00:24 +05:30
										 |  |  |     # Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x | 
					
						
							| 
									
										
										
										
											2023-10-30 14:18:48 -07:00
										 |  |  |     # Disallow 0.2.5 because of https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/272. | 
					
						
							|  |  |  |     # Note that there's also a known issue around nested map types: https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/269. | 
					
						
							| 
									
										
										
										
											2025-05-06 23:07:47 +02:00
										 |  |  |     # zstd needs to be pinned because the latest version causes issues on arm | 
					
						
							|  |  |  |     "zstd<1.5.6.8", | 
					
						
							| 
									
										
										
										
											2023-10-30 14:18:48 -07:00
										 |  |  |     "clickhouse-sqlalchemy>=0.2.0,<0.2.5", | 
					
						
							| 
									
										
										
										
											2022-11-11 15:04:36 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  | redshift_common = { | 
					
						
							| 
									
										
										
										
											2023-09-13 00:00:24 +05:30
										 |  |  |     # Clickhouse 0.8.3 adds support for SQLAlchemy 1.4.x | 
					
						
							|  |  |  |     "sqlalchemy-redshift>=0.8.3", | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  |     "GeoAlchemy2", | 
					
						
							| 
									
										
										
										
											2025-01-03 13:33:45 +01:00
										 |  |  |     "redshift-connector>=2.1.5", | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  |     *path_spec_common, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-30 13:47:53 -06:00
										 |  |  | snowflake_common = { | 
					
						
							| 
									
										
										
										
											2025-06-11 20:37:00 -07:00
										 |  |  |     # Lower bound due to https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Upper bound <1.7.4: Version 1.7.4 of snowflake-sqlalchemy introduced a bug that breaks | 
					
						
							|  |  |  |     # table column name reflection for non-uppercase table names. While we do not | 
					
						
							|  |  |  |     # use this method directly, it is used by great-expectations during profiling. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # See: https://github.com/snowflakedb/snowflake-sqlalchemy/compare/v1.7.3...v1.7.4 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # The exact cause of the breakage in v1.7.4 is unclear, but it may be related to | 
					
						
							|  |  |  |     # changes in the _get_table_columns function. I initially suspected PR #541 | 
					
						
							|  |  |  |     # (https://github.com/snowflakedb/snowflake-sqlalchemy/pull/541), but that has been | 
					
						
							|  |  |  |     # present since v1.7.0 and does not appear to cause issues. | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # Reflection failures for case-sensitive object names are a known issue: | 
					
						
							|  |  |  |     # https://github.com/snowflakedb/snowflake-sqlalchemy/issues/388 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     # As of May 2025, snowflake-sqlalchemy is in maintenance mode. I have commented on the | 
					
						
							|  |  |  |     # above issue and we are pinning to a safe version. | 
					
						
							|  |  |  |     "snowflake-sqlalchemy>=1.4.3, <1.7.4", | 
					
						
							| 
									
										
										
										
											2024-05-08 10:42:30 +02:00
										 |  |  |     "snowflake-connector-python>=3.4.0", | 
					
						
							| 
									
										
										
										
											2022-11-23 11:13:30 +05:30
										 |  |  |     "pandas", | 
					
						
							| 
									
										
										
										
											2022-02-08 23:25:12 -08:00
										 |  |  |     "cryptography", | 
					
						
							| 
									
										
										
										
											2022-06-06 17:49:49 +05:30
										 |  |  |     "msal", | 
					
						
							| 
									
										
										
										
											2024-10-22 11:49:57 -07:00
										 |  |  |     *cachetools_lib, | 
					
						
							| 
									
										
										
										
											2025-04-30 19:39:35 -07:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2022-01-30 13:47:53 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-31 03:50:26 +02:00
										 |  |  | trino = { | 
					
						
							| 
									
										
										
										
											2023-09-13 00:00:24 +05:30
										 |  |  |     "trino[sqlalchemy]>=0.308", | 
					
						
							| 
									
										
										
										
											2022-03-31 03:50:26 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  | pyhive_common = { | 
					
						
							| 
									
										
										
										
											2025-04-28 23:34:33 +09:00
										 |  |  |     # DataHub maintains a fork of PyHive | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  |     # - 0.6.11 adds support for table comments and column comments, | 
					
						
							|  |  |  |     #   and also releases HTTP and HTTPS transport schemes | 
					
						
							|  |  |  |     # - 0.6.12 adds support for Spark Thrift Server | 
					
						
							|  |  |  |     # - 0.6.13 adds a small fix for Databricks | 
					
						
							|  |  |  |     # - 0.6.14 uses pure-sasl instead of sasl so it builds on Python 3.11 | 
					
						
							| 
									
										
										
										
											2023-11-16 17:19:25 -05:00
										 |  |  |     # - 0.6.15 adds support for thrift > 0.14 (cherry-picked from https://github.com/apache/thrift/pull/2491) | 
					
						
							| 
									
										
										
										
											2023-11-29 04:25:33 -05:00
										 |  |  |     # - 0.6.16 fixes a regression in 0.6.15 (https://github.com/acryldata/PyHive/pull/9) | 
					
						
							|  |  |  |     "acryl-pyhive[hive-pure-sasl]==0.6.16", | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  |     # As per https://github.com/datahub-project/datahub/issues/8405 | 
					
						
							| 
									
										
										
										
											2023-11-16 17:19:25 -05:00
										 |  |  |     # and https://github.com/dropbox/PyHive/issues/417, version 0.14.0 | 
					
						
							|  |  |  |     # of thrift broke PyHive's hive+http transport. | 
					
						
							|  |  |  |     # Fixed by https://github.com/apache/thrift/pull/2491 in version 0.17.0 | 
					
						
							|  |  |  |     # which is unfortunately not on PyPi. | 
					
						
							|  |  |  |     # Instead, we put the fix in our PyHive fork, so no thrift pin is needed. | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-01 20:22:47 -05:00
										 |  |  | microsoft_common = { | 
					
						
							|  |  |  |     "msal>=1.31.1", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2022-03-01 07:07:22 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-16 05:30:40 +00:00
										 |  |  | iceberg_common = { | 
					
						
							|  |  |  |     # Iceberg Python SDK | 
					
						
							| 
									
										
										
										
											2024-11-28 16:34:20 +01:00
										 |  |  |     # Kept at 0.4.0 due to higher versions requiring pydantic>2, as soon as we are fine with it, bump this dependency | 
					
						
							|  |  |  |     "pyiceberg>=0.4.0", | 
					
						
							| 
									
										
										
										
											2025-01-30 00:56:35 -05:00
										 |  |  |     *cachetools_lib, | 
					
						
							| 
									
										
										
										
											2022-08-16 05:30:40 +00:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-24 13:00:45 -05:00
										 |  |  | mssql_common = { | 
					
						
							|  |  |  |     "sqlalchemy-pytds>=0.3", | 
					
						
							|  |  |  |     "pyOpenSSL", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-10 04:36:51 +03:00
										 |  |  | postgres_common = { | 
					
						
							|  |  |  |     "psycopg2-binary", | 
					
						
							|  |  |  |     "GeoAlchemy2", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-16 05:30:40 +00:00
										 |  |  | s3_base = { | 
					
						
							| 
									
										
										
										
											2022-02-25 02:26:06 -05:00
										 |  |  |     *aws_common, | 
					
						
							| 
									
										
										
										
											2023-05-11 21:14:25 +02:00
										 |  |  |     "more-itertools>=8.12.0", | 
					
						
							| 
									
										
										
										
											2022-02-25 02:26:06 -05:00
										 |  |  |     "parse>=1.19.0", | 
					
						
							|  |  |  |     "pyarrow>=6.0.1", | 
					
						
							|  |  |  |     "tableschema>=1.20.2", | 
					
						
							| 
									
										
										
										
											2022-10-13 19:48:05 +00:00
										 |  |  |     # ujson 5.2.0 has the JSONDecodeError exception type, which we need for error handling. | 
					
						
							|  |  |  |     "ujson>=5.2.0", | 
					
						
							| 
									
										
										
										
											2022-04-01 00:15:09 +02:00
										 |  |  |     "smart-open[s3]>=5.2.1", | 
					
						
							| 
									
										
										
										
											2024-01-29 15:12:30 +01:00
										 |  |  |     # moto 5.0.0 drops support for Python 3.7 | 
					
						
							|  |  |  |     "moto[s3]<5.0.0", | 
					
						
							| 
									
										
										
										
											2022-08-16 05:30:40 +00:00
										 |  |  |     *path_spec_common, | 
					
						
							| 
									
										
										
										
											2022-02-25 02:26:06 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-30 11:40:45 -07:00
										 |  |  | threading_timeout_common = { | 
					
						
							|  |  |  |     "stopit==1.1.2", | 
					
						
							| 
									
										
										
										
											2024-10-31 16:27:45 -07:00
										 |  |  |     # stopit uses pkg_resources internally, which means there's an implied | 
					
						
							|  |  |  |     # dependency on setuptools. | 
					
						
							|  |  |  |     "setuptools", | 
					
						
							| 
									
										
										
										
											2024-10-30 11:40:45 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-17 11:06:05 +02:00
										 |  |  | abs_base = { | 
					
						
							| 
									
										
										
										
											2025-05-01 20:22:47 -05:00
										 |  |  |     "azure-core>=1.31.0", | 
					
						
							|  |  |  |     "azure-identity>=1.21.0", | 
					
						
							| 
									
										
										
										
											2024-07-17 11:06:05 +02:00
										 |  |  |     "azure-storage-blob>=12.19.0", | 
					
						
							|  |  |  |     "azure-storage-file-datalake>=12.14.0", | 
					
						
							| 
									
										
										
										
											2024-07-19 15:30:43 +02:00
										 |  |  |     "more-itertools>=8.12.0", | 
					
						
							|  |  |  |     "pyarrow>=6.0.1", | 
					
						
							|  |  |  |     "smart-open[azure]>=5.2.1", | 
					
						
							|  |  |  |     "tableschema>=1.20.2", | 
					
						
							|  |  |  |     "ujson>=5.2.0", | 
					
						
							|  |  |  |     *path_spec_common, | 
					
						
							| 
									
										
										
										
											2024-07-17 11:06:05 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-02-25 02:26:06 -05:00
										 |  |  | data_lake_profiling = { | 
					
						
							| 
									
										
										
										
											2024-12-27 13:46:49 -05:00
										 |  |  |     "pydeequ>=1.1.0", | 
					
						
							|  |  |  |     "pyspark~=3.5.0", | 
					
						
							| 
									
										
										
										
											2025-01-31 05:05:57 -08:00
										 |  |  |     # cachetools is used by the profiling config | 
					
						
							|  |  |  |     *cachetools_lib, | 
					
						
							| 
									
										
										
										
											2022-02-25 02:26:06 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-06-28 04:46:33 +05:30
										 |  |  | delta_lake = { | 
					
						
							|  |  |  |     *s3_base, | 
					
						
							| 
									
										
										
										
											2024-07-17 11:06:05 +02:00
										 |  |  |     *abs_base, | 
					
						
							| 
									
										
										
										
											2024-06-11 11:44:13 -07:00
										 |  |  |     # Version 0.18.0 broken on ARM Macs: https://github.com/delta-io/delta-rs/issues/2577 | 
					
						
							| 
									
										
										
										
											2025-04-04 20:19:13 +01:00
										 |  |  |     "deltalake>=0.6.3, != 0.6.4, != 0.18.0; platform_system == 'Darwin' and platform_machine == 'arm64'", | 
					
						
							| 
									
										
										
										
											2024-06-11 11:44:13 -07:00
										 |  |  |     "deltalake>=0.6.3, != 0.6.4; platform_system != 'Darwin' or platform_machine != 'arm64'", | 
					
						
							| 
									
										
										
										
											2022-06-28 04:46:33 +05:30
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-02 01:05:42 +02:00
										 |  |  | powerbi_report_server = {"requests", "requests_ntlm"} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-24 11:35:39 -08:00
										 |  |  | slack = { | 
					
						
							|  |  |  |     "slack-sdk==3.18.1", | 
					
						
							|  |  |  |     "tenacity>=8.0.1", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-02-08 14:05:26 +05:30
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-06-23 08:31:05 -04:00
										 |  |  | databricks = { | 
					
						
							| 
									
										
										
										
											2023-06-27 13:38:55 -04:00
										 |  |  |     # 0.1.11 appears to have authentication issues with azure databricks | 
					
						
							| 
									
										
										
										
											2024-09-02 19:09:45 +05:30
										 |  |  |     # 0.22.0 has support for `include_browse` in metadata list apis | 
					
						
							| 
									
										
										
										
											2024-08-20 16:07:30 +02:00
										 |  |  |     "databricks-sdk>=0.30.0", | 
					
						
							| 
									
										
										
										
											2024-12-27 13:46:49 -05:00
										 |  |  |     "pyspark~=3.5.0", | 
					
						
							| 
									
										
										
										
											2023-05-09 16:30:11 -04:00
										 |  |  |     "requests", | 
					
						
							| 
									
										
										
										
											2023-12-06 13:59:23 -05:00
										 |  |  |     # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes | 
					
						
							| 
									
										
										
										
											2023-12-14 21:05:06 +05:30
										 |  |  |     # Version 3.0.0 required SQLAlchemy > 2.0.21 | 
					
						
							|  |  |  |     "databricks-sql-connector>=2.8.0,<3.0.0", | 
					
						
							| 
									
										
										
										
											2024-04-04 22:06:44 +05:30
										 |  |  |     # Due to https://github.com/databricks/databricks-sql-python/issues/326 | 
					
						
							|  |  |  |     # databricks-sql-connector<3.0.0 requires pandas<2.2.0 | 
					
						
							|  |  |  |     "pandas<2.2.0", | 
					
						
							| 
									
										
										
										
											2023-05-09 16:30:11 -04:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  | mysql = {"pymysql>=1.0.2"} | 
					
						
							| 
									
										
										
										
											2023-08-15 17:49:20 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-26 20:29:15 +02:00
										 |  |  | sac = { | 
					
						
							|  |  |  |     "requests", | 
					
						
							|  |  |  |     "pyodata>=1.11.1", | 
					
						
							|  |  |  |     "Authlib", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-10-16 04:53:01 -07:00
										 |  |  | superset_common = { | 
					
						
							|  |  |  |     "requests", | 
					
						
							| 
									
										
										
										
											2025-03-17 10:29:02 -07:00
										 |  |  |     *sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2024-10-16 04:53:01 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | # Note: for all of these, framework_common will be added. | 
					
						
							|  |  |  | plugins: Dict[str, Set[str]] = { | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     # Sink plugins. | 
					
						
							|  |  |  |     "datahub-kafka": kafka_common, | 
					
						
							| 
									
										
										
										
											2022-11-11 15:04:36 -05:00
										 |  |  |     "datahub-rest": rest_common, | 
					
						
							| 
									
										
										
										
											2023-10-04 06:53:15 -04:00
										 |  |  |     "sync-file-emitter": {"filelock"}, | 
					
						
							| 
									
										
										
										
											2023-01-20 01:21:24 -08:00
										 |  |  |     "datahub-lite": { | 
					
						
							| 
									
										
										
										
											2025-05-22 16:19:31 -07:00
										 |  |  |         "duckdb>=1.0.0", | 
					
						
							| 
									
										
										
										
											2023-01-20 01:21:24 -08:00
										 |  |  |         "fastapi", | 
					
						
							|  |  |  |         "uvicorn", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     # Integrations. | 
					
						
							| 
									
										
										
										
											2021-08-11 16:20:01 -04:00
										 |  |  |     "airflow": { | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |         f"acryl-datahub-airflow-plugin{_self_pin}", | 
					
						
							| 
									
										
										
										
											2021-08-11 16:20:01 -04:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2022-07-13 19:17:38 +02:00
										 |  |  |     "circuit-breaker": { | 
					
						
							|  |  |  |         "gql>=3.3.0", | 
					
						
							|  |  |  |         "gql[requests]>=3.3.0", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |     # TODO: Eventually we should reorganize our imports so that this depends on sqlalchemy_lib | 
					
						
							|  |  |  |     # but not the full sql_common. | 
					
						
							|  |  |  |     "datahub": sql_common | mysql | kafka_common, | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  |     "great-expectations": { | 
					
						
							|  |  |  |         f"acryl-datahub-gx-plugin{_self_pin}", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-07-07 16:24:35 -07:00
										 |  |  |     # Misc plugins. | 
					
						
							|  |  |  |     "sql-parser": sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     # Source plugins | 
					
						
							| 
									
										
										
										
											2023-10-18 18:39:59 +02:00
										 |  |  |     # sqlalchemy-bigquery is included here since it provides an implementation of | 
					
						
							|  |  |  |     # a SQLalchemy-conform STRUCT type definition | 
					
						
							| 
									
										
										
										
											2023-10-24 00:09:41 -07:00
										 |  |  |     "athena": sql_common | 
					
						
							| 
									
										
										
										
											2024-07-20 00:00:40 +02:00
										 |  |  |     # We need to set tenacity lower than 8.4.0 as | 
					
						
							|  |  |  |     # this version has missing dependency asyncio | 
					
						
							|  |  |  |     # https://github.com/jd/tenacity/issues/471 | 
					
						
							|  |  |  |     | { | 
					
						
							|  |  |  |         "PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", | 
					
						
							|  |  |  |         "sqlalchemy-bigquery>=1.4.1", | 
					
						
							|  |  |  |         "tenacity!=8.4.0", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-09-02 12:24:10 -04:00
										 |  |  |     "azure-ad": set(), | 
					
						
							| 
									
										
										
										
											2022-11-28 16:55:15 -05:00
										 |  |  |     "bigquery": sql_common | 
					
						
							|  |  |  |     | bigquery_common | 
					
						
							| 
									
										
										
										
											2024-10-30 03:13:15 -07:00
										 |  |  |     | sqlglot_lib | 
					
						
							|  |  |  |     | classification_lib | 
					
						
							| 
									
										
										
										
											2023-02-02 09:24:28 -05:00
										 |  |  |     | { | 
					
						
							| 
									
										
										
										
											2023-05-25 12:37:46 -03:00
										 |  |  |         "google-cloud-datacatalog-lineage==0.2.2", | 
					
						
							| 
									
										
										
										
											2024-10-30 03:13:15 -07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-04-30 19:39:35 -07:00
										 |  |  |     "bigquery-slim": bigquery_common, | 
					
						
							| 
									
										
										
										
											2024-08-26 11:21:00 +05:30
										 |  |  |     "bigquery-queries": sql_common | bigquery_common | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2022-11-11 15:04:36 -05:00
										 |  |  |     "clickhouse": sql_common | clickhouse_common, | 
					
						
							|  |  |  |     "clickhouse-usage": sql_common | usage_common | clickhouse_common, | 
					
						
							| 
									
										
										
										
											2024-04-10 04:36:51 +03:00
										 |  |  |     "cockroachdb": sql_common | postgres_common | {"sqlalchemy-cockroachdb<2.0.0"}, | 
					
						
							| 
									
										
										
										
											2022-02-24 20:02:38 -05:00
										 |  |  |     "datahub-lineage-file": set(), | 
					
						
							| 
									
										
										
										
											2021-09-01 15:10:12 -07:00
										 |  |  |     "datahub-business-glossary": set(), | 
					
						
							| 
									
										
										
										
											2022-06-28 04:46:33 +05:30
										 |  |  |     "delta-lake": {*data_lake_profiling, *delta_lake}, | 
					
						
							| 
									
										
										
										
											2024-07-25 09:31:19 -07:00
										 |  |  |     "dbt": {"requests"} | dbt_common | aws_common, | 
					
						
							|  |  |  |     "dbt-cloud": {"requests"} | dbt_common, | 
					
						
							| 
									
										
										
										
											2024-11-05 22:06:35 +05:30
										 |  |  |     "dremio": {"requests"} | sql_common, | 
					
						
							| 
									
										
										
										
											2021-06-09 15:07:04 -07:00
										 |  |  |     "druid": sql_common | {"pydruid>=0.6.2"}, | 
					
						
							| 
									
										
										
										
											2024-04-02 07:28:43 -07:00
										 |  |  |     "dynamodb": aws_common | classification_lib, | 
					
						
							| 
									
										
										
										
											2022-02-19 11:44:32 -08:00
										 |  |  |     # Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws | 
					
						
							|  |  |  |     # UnsupportedProductError | 
					
						
							|  |  |  |     # https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0 | 
					
						
							|  |  |  |     # https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433 | 
					
						
							| 
									
										
										
										
											2025-01-30 00:56:35 -05:00
										 |  |  |     "elasticsearch": {"elasticsearch==7.13.4", *cachetools_lib}, | 
					
						
							| 
									
										
										
										
											2024-11-15 20:41:21 +05:30
										 |  |  |     "cassandra": { | 
					
						
							|  |  |  |         "cassandra-driver>=3.28.0", | 
					
						
							|  |  |  |         # We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject` | 
					
						
							|  |  |  |         # with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions. | 
					
						
							|  |  |  |         # https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility | 
					
						
							|  |  |  |         "numpy<2", | 
					
						
							| 
									
										
										
										
											2025-01-30 00:56:35 -05:00
										 |  |  |         *cachetools_lib, | 
					
						
							| 
									
										
										
										
											2024-11-15 20:41:21 +05:30
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2023-03-15 03:57:20 -04:00
										 |  |  |     "feast": { | 
					
						
							| 
									
										
										
										
											2024-02-02 17:47:26 -05:00
										 |  |  |         "feast>=0.34.0,<1", | 
					
						
							| 
									
										
										
										
											2023-03-15 03:57:20 -04:00
										 |  |  |         "flask-openid>=1.3.0", | 
					
						
							| 
									
										
										
										
											2024-07-08 18:52:11 +05:30
										 |  |  |         "dask[dataframe]<2024.7.0", | 
					
						
							| 
									
										
										
										
											2024-07-19 15:30:43 +02:00
										 |  |  |         # We were seeing an error like this `numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject` | 
					
						
							|  |  |  |         # with numpy 2.0. This likely indicates a mismatch between scikit-learn and numpy versions. | 
					
						
							|  |  |  |         # https://stackoverflow.com/questions/40845304/runtimewarning-numpy-dtype-size-changed-may-indicate-binary-incompatibility | 
					
						
							|  |  |  |         "numpy<2", | 
					
						
							| 
									
										
										
										
											2023-03-15 03:57:20 -04:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-07-16 02:42:18 +05:30
										 |  |  |     "grafana": {"requests"}, | 
					
						
							| 
									
										
										
										
											2025-01-30 00:56:35 -05:00
										 |  |  |     "glue": aws_common | cachetools_lib, | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:50 +02:00
										 |  |  |     # hdbcli is supported officially by SAP, sqlalchemy-hana is built on top but not officially supported | 
					
						
							| 
									
										
										
										
											2022-05-26 15:29:21 -07:00
										 |  |  |     "hana": sql_common | 
					
						
							|  |  |  |     | { | 
					
						
							| 
									
										
										
										
											2022-05-30 02:02:33 -07:00
										 |  |  |         "sqlalchemy-hana>=0.5.0; platform_machine != 'aarch64' and platform_machine != 'arm64'", | 
					
						
							|  |  |  |         "hdbcli>=2.11.20; platform_machine != 'aarch64' and platform_machine != 'arm64'", | 
					
						
							| 
									
										
										
										
											2022-05-26 15:29:21 -07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-05-03 22:11:50 -07:00
										 |  |  |     "hive": sql_common | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  |     | pyhive_common | 
					
						
							| 
									
										
										
										
											2021-05-03 22:11:50 -07:00
										 |  |  |     | { | 
					
						
							| 
									
										
										
										
											2022-08-30 02:14:04 +00:00
										 |  |  |         "databricks-dbapi", | 
					
						
							| 
									
										
										
										
											2025-05-02 13:04:53 -07:00
										 |  |  |         *great_expectations_lib, | 
					
						
							| 
									
										
										
										
											2021-05-03 22:11:50 -07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2024-04-16 23:35:16 +02:00
										 |  |  |     # keep in sync with presto-on-hive until presto-on-hive will be removed | 
					
						
							|  |  |  |     "hive-metastore": sql_common | 
					
						
							|  |  |  |     | pyhive_common | 
					
						
							|  |  |  |     | {"psycopg2-binary", "pymysql>=1.0.2"}, | 
					
						
							| 
									
										
										
										
											2022-05-26 08:05:57 -07:00
										 |  |  |     "iceberg": iceberg_common, | 
					
						
							| 
									
										
										
										
											2025-01-30 04:08:15 -06:00
										 |  |  |     "iceberg-catalog": aws_common, | 
					
						
							| 
									
										
										
										
											2023-02-19 08:43:13 -08:00
										 |  |  |     "json-schema": set(), | 
					
						
							| 
									
										
										
										
											2023-08-15 17:49:20 -04:00
										 |  |  |     "kafka": kafka_common | kafka_protobuf, | 
					
						
							| 
									
										
										
										
											2021-11-08 16:23:25 -08:00
										 |  |  |     "kafka-connect": sql_common | {"requests", "JPype1"}, | 
					
						
							| 
									
										
										
										
											2021-06-09 15:07:04 -07:00
										 |  |  |     "ldap": {"python-ldap>=2.4"}, | 
					
						
							| 
									
										
										
										
											2021-09-16 23:09:45 -07:00
										 |  |  |     "looker": looker_common, | 
					
						
							| 
									
										
										
										
											2024-07-10 01:21:08 +05:30
										 |  |  |     "lookml": looker_common, | 
					
						
							| 
									
										
										
										
											2024-01-26 14:03:16 -08:00
										 |  |  |     "metabase": {"requests"} | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2024-04-16 15:04:51 -07:00
										 |  |  |     "mlflow": { | 
					
						
							| 
									
										
										
										
											2025-04-16 08:27:31 +09:00
										 |  |  |         "mlflow-skinny>=2.3.0,<2.21.0", | 
					
						
							|  |  |  |         # Pinned to avoid the breaking change introduced in MLflow 2.21.0 where search_registered_models injects an implicit filter | 
					
						
							|  |  |  |         # https://github.com/mlflow/mlflow/pull/14795 | 
					
						
							|  |  |  |         # Upper bound can be removed once the upstream issue is resolved, | 
					
						
							|  |  |  |         # or we have a reliable and backward-compatible way to handle prompt filtering. | 
					
						
							| 
									
										
										
										
											2024-04-16 15:04:51 -07:00
										 |  |  |         # It's technically wrong for packages to depend on setuptools. However, it seems mlflow does it anyways. | 
					
						
							|  |  |  |         "setuptools", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-07-09 02:25:08 -04:00
										 |  |  |     "datahub-debug": {"dnspython==2.7.0", "requests"}, | 
					
						
							| 
									
										
										
										
											2024-12-10 22:06:01 +05:30
										 |  |  |     "mode": {"requests", "python-liquid", "tenacity>=8.0.1"} | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2022-08-23 02:42:09 -04:00
										 |  |  |     "mongodb": {"pymongo[srv]>=3.11", "packaging"}, | 
					
						
							| 
									
										
										
										
											2024-04-24 13:00:45 -05:00
										 |  |  |     "mssql": sql_common | mssql_common, | 
					
						
							|  |  |  |     "mssql-odbc": sql_common | mssql_common | {"pyodbc"}, | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |     "mysql": sql_common | mysql, | 
					
						
							| 
									
										
										
										
											2021-10-13 11:57:47 +05:30
										 |  |  |     # mariadb should have same dependency as mysql | 
					
						
							| 
									
										
										
										
											2025-01-06 18:29:51 -05:00
										 |  |  |     "mariadb": sql_common | mysql, | 
					
						
							| 
									
										
										
										
											2023-08-16 07:02:57 +02:00
										 |  |  |     "okta": {"okta~=1.7.0", "nest-asyncio"}, | 
					
						
							| 
									
										
										
										
											2024-10-23 22:39:57 +01:00
										 |  |  |     "oracle": sql_common | {"oracledb"}, | 
					
						
							| 
									
										
										
										
											2024-04-10 04:36:51 +03:00
										 |  |  |     "postgres": sql_common | postgres_common, | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  |     "presto": sql_common | pyhive_common | trino, | 
					
						
							| 
									
										
										
										
											2024-04-16 23:35:16 +02:00
										 |  |  |     # presto-on-hive is an alias for hive-metastore and needs to be kept in sync | 
					
						
							| 
									
										
										
										
											2022-04-13 17:59:31 +05:30
										 |  |  |     "presto-on-hive": sql_common | 
					
						
							| 
									
										
										
										
											2023-08-09 10:04:36 -07:00
										 |  |  |     | pyhive_common | 
					
						
							|  |  |  |     | {"psycopg2-binary", "pymysql>=1.0.2"}, | 
					
						
							| 
									
										
										
										
											2022-04-29 12:27:02 +02:00
										 |  |  |     "pulsar": {"requests"}, | 
					
						
							| 
									
										
										
										
											2024-12-10 22:06:01 +05:30
										 |  |  |     "redash": {"redash-toolbelt", "sql-metadata"} | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2023-12-15 15:07:56 -05:00
										 |  |  |     "redshift": sql_common | 
					
						
							|  |  |  |     | redshift_common | 
					
						
							|  |  |  |     | usage_common | 
					
						
							|  |  |  |     | sqlglot_lib | 
					
						
							| 
									
										
										
										
											2024-03-14 11:15:28 +05:30
										 |  |  |     | classification_lib | 
					
						
							|  |  |  |     | {"db-dtypes"}  # Pandas extension data types | 
					
						
							| 
									
										
										
										
											2024-10-22 11:49:57 -07:00
										 |  |  |     | cachetools_lib, | 
					
						
							| 
									
										
										
										
											2025-01-31 05:05:57 -08:00
										 |  |  |     "s3": {*s3_base, *data_lake_profiling}, | 
					
						
							| 
									
										
										
										
											2025-07-09 21:22:22 +01:00
										 |  |  |     "gcs": {*s3_base, *data_lake_profiling, "smart-open[gcs]>=5.2.1"}, | 
					
						
							| 
									
										
										
										
											2025-01-31 05:05:57 -08:00
										 |  |  |     "abs": {*abs_base, *data_lake_profiling}, | 
					
						
							| 
									
										
										
										
											2021-06-29 19:43:31 -07:00
										 |  |  |     "sagemaker": aws_common, | 
					
						
							| 
									
										
										
										
											2025-01-30 00:56:35 -05:00
										 |  |  |     "salesforce": {"simple-salesforce", *cachetools_lib}, | 
					
						
							| 
									
										
										
										
											2025-04-30 19:39:35 -07:00
										 |  |  |     "snowflake": snowflake_common | sql_common | usage_common | sqlglot_lib, | 
					
						
							|  |  |  |     "snowflake-slim": snowflake_common, | 
					
						
							|  |  |  |     "snowflake-summary": snowflake_common | sql_common | usage_common | sqlglot_lib, | 
					
						
							|  |  |  |     "snowflake-queries": snowflake_common | sql_common | usage_common | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2021-08-11 18:49:16 -07:00
										 |  |  |     "sqlalchemy": sql_common, | 
					
						
							| 
									
										
										
										
											2023-08-24 10:35:46 -04:00
										 |  |  |     "sql-queries": usage_common | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2024-02-08 14:05:26 +05:30
										 |  |  |     "slack": slack, | 
					
						
							| 
									
										
										
										
											2024-10-16 04:53:01 -07:00
										 |  |  |     "superset": superset_common, | 
					
						
							|  |  |  |     "preset": superset_common, | 
					
						
							| 
									
										
										
										
											2024-12-10 22:06:01 +05:30
										 |  |  |     "tableau": {"tableauserverclient>=0.24.0"} | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2023-10-13 22:59:18 +02:00
										 |  |  |     "teradata": sql_common | 
					
						
							|  |  |  |     | usage_common | 
					
						
							|  |  |  |     | sqlglot_lib | 
					
						
							| 
									
										
										
										
											2024-10-30 10:38:33 -07:00
										 |  |  |     | { | 
					
						
							|  |  |  |         # On 2024-10-30, teradatasqlalchemy 20.0.0.2 was released. This version seemed to cause issues | 
					
						
							|  |  |  |         # in our CI, so we're pinning the version for now. | 
					
						
							|  |  |  |         "teradatasqlalchemy>=17.20.0.0,<=20.0.0.2", | 
					
						
							|  |  |  |     }, | 
					
						
							| 
									
										
										
										
											2022-03-31 03:50:26 +02:00
										 |  |  |     "trino": sql_common | trino, | 
					
						
							| 
									
										
										
										
											2022-04-01 22:26:52 +05:30
										 |  |  |     "starburst-trino-usage": sql_common | usage_common | trino, | 
					
						
							| 
									
										
										
										
											2023-05-25 03:39:01 +05:30
										 |  |  |     "nifi": {"requests", "packaging", "requests-gssapi"}, | 
					
						
							| 
									
										
										
										
											2024-10-30 11:40:45 -07:00
										 |  |  |     "powerbi": ( | 
					
						
							| 
									
										
										
										
											2025-01-24 11:35:39 -08:00
										 |  |  |         microsoft_common | 
					
						
							|  |  |  |         | {"lark[regex]==1.1.4", "sqlparse", "more-itertools"} | 
					
						
							|  |  |  |         | sqlglot_lib | 
					
						
							|  |  |  |         | threading_timeout_common | 
					
						
							| 
									
										
										
										
											2024-10-30 11:40:45 -07:00
										 |  |  |     ), | 
					
						
							| 
									
										
										
										
											2022-11-02 01:05:42 +02:00
										 |  |  |     "powerbi-report-server": powerbi_report_server, | 
					
						
							| 
									
										
										
										
											2024-06-15 05:34:55 +05:30
										 |  |  |     "vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.2"}, | 
					
						
							| 
									
										
										
										
											2024-12-10 22:06:01 +05:30
										 |  |  |     "unity-catalog": databricks | sql_common, | 
					
						
							| 
									
										
										
										
											2023-12-14 21:05:06 +05:30
										 |  |  |     # databricks is alias for unity-catalog and needs to be kept in sync | 
					
						
							| 
									
										
										
										
											2024-12-10 22:06:01 +05:30
										 |  |  |     "databricks": databricks | sql_common, | 
					
						
							| 
									
										
										
										
											2025-05-02 19:54:01 -07:00
										 |  |  |     "fivetran": snowflake_common | bigquery_common | sqlalchemy_lib | sqlglot_lib, | 
					
						
							| 
									
										
										
										
											2024-02-27 00:57:37 +05:30
										 |  |  |     "qlik-sense": sqlglot_lib | {"requests", "websocket-client"}, | 
					
						
							| 
									
										
										
										
											2024-04-16 15:04:51 -07:00
										 |  |  |     "sigma": sqlglot_lib | {"requests"}, | 
					
						
							| 
									
										
										
										
											2024-08-26 20:29:15 +02:00
										 |  |  |     "sac": sac, | 
					
						
							| 
									
										
										
										
											2024-12-02 04:23:28 -05:00
										 |  |  |     "neo4j": {"pandas", "neo4j"}, | 
					
						
							| 
									
										
										
										
											2025-03-13 11:02:15 -07:00
										 |  |  |     "vertexai": {"google-cloud-aiplatform>=1.80.0"}, | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-20 17:24:43 -08:00
										 |  |  | # This is mainly used to exclude plugins from the Docker image. | 
					
						
							| 
									
										
										
										
											2021-06-07 14:00:35 -07:00
										 |  |  | all_exclude_plugins: Set[str] = { | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |     # The Airflow extra is only retained for compatibility, but new users should | 
					
						
							|  |  |  |     # be using the datahub-airflow-plugin package instead. | 
					
						
							|  |  |  |     "airflow", | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  |     # The great-expectations extra is only retained for compatibility, but new users should | 
					
						
							|  |  |  |     # be using the datahub-gx-plugin package instead. | 
					
						
							|  |  |  |     "great-expectations", | 
					
						
							| 
									
										
										
										
											2021-06-07 14:00:35 -07:00
										 |  |  |     # SQL Server ODBC requires additional drivers, and so we don't want to keep | 
					
						
							|  |  |  |     # it included in the default "all" installation. | 
					
						
							|  |  |  |     "mssql-odbc", | 
					
						
							| 
									
										
										
										
											2023-01-20 17:24:43 -08:00
										 |  |  |     # duckdb doesn't have a prebuilt wheel for Linux arm7l or aarch64, so we | 
					
						
							|  |  |  |     # simply exclude it. | 
					
						
							|  |  |  |     "datahub-lite", | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |     # Feast tends to have overly restrictive dependencies and hence doesn't | 
					
						
							|  |  |  |     # play nice with the "all" installation. | 
					
						
							|  |  |  |     "feast", | 
					
						
							| 
									
										
										
										
											2021-06-07 14:00:35 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-06-08 16:10:16 -07:00
										 |  |  | mypy_stubs = { | 
					
						
							|  |  |  |     "types-dataclasses", | 
					
						
							|  |  |  |     "types-six", | 
					
						
							|  |  |  |     "types-python-dateutil", | 
					
						
							| 
									
										
										
										
											2023-09-25 16:24:19 -04:00
										 |  |  |     # We need to avoid 2.31.0.5 and 2.31.0.4 due to | 
					
						
							|  |  |  |     # https://github.com/python/typeshed/issues/10764. Once that | 
					
						
							|  |  |  |     # issue is resolved, we can remove the upper bound and change it | 
					
						
							|  |  |  |     # to a != constraint. | 
					
						
							|  |  |  |     # We have a PR up to fix the underlying issue: https://github.com/python/typeshed/pull/10776. | 
					
						
							|  |  |  |     "types-requests>=2.28.11.6,<=2.31.0.3", | 
					
						
							| 
									
										
										
										
											2021-06-08 16:10:16 -07:00
										 |  |  |     "types-toml", | 
					
						
							|  |  |  |     "types-PyMySQL", | 
					
						
							|  |  |  |     "types-PyYAML", | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |     "types-cachetools", | 
					
						
							| 
									
										
										
										
											2021-06-08 16:10:16 -07:00
										 |  |  |     # versions 0.1.13 and 0.1.14 seem to have issues | 
					
						
							|  |  |  |     "types-click==0.1.12", | 
					
						
							| 
									
										
										
										
											2023-07-31 19:48:05 -07:00
										 |  |  |     # The boto3-stubs package seems to have regularly breaking minor releases, | 
					
						
							|  |  |  |     # we pin to a specific version to avoid this. | 
					
						
							| 
									
										
										
										
											2025-06-11 12:26:27 +02:00
										 |  |  |     "boto3-stubs[s3,glue,sagemaker,sts,dynamodb, lakeformation]==1.28.15", | 
					
						
							| 
									
										
										
										
											2023-08-29 13:37:27 -04:00
										 |  |  |     "mypy-boto3-sagemaker==1.28.15",  # For some reason, above pin only restricts `mypy-boto3-sagemaker<1.29.0,>=1.28.0` | 
					
						
							| 
									
										
										
										
											2021-07-29 20:04:40 -07:00
										 |  |  |     "types-tabulate", | 
					
						
							| 
									
										
										
										
											2022-03-14 21:20:29 +05:30
										 |  |  |     # avrogen package requires this | 
					
						
							|  |  |  |     "types-pytz", | 
					
						
							| 
									
										
										
										
											2022-06-06 17:49:49 +05:30
										 |  |  |     "types-pyOpenSSL", | 
					
						
							| 
									
										
										
										
											2022-11-21 17:08:05 -05:00
										 |  |  |     "types-click-spinner>=0.1.13.1", | 
					
						
							| 
									
										
										
										
											2022-10-13 19:48:05 +00:00
										 |  |  |     "types-ujson>=5.2.0", | 
					
						
							| 
									
										
										
										
											2022-11-01 21:12:34 -07:00
										 |  |  |     "types-Deprecated", | 
					
						
							| 
									
										
										
										
											2022-12-02 13:53:28 -05:00
										 |  |  |     "types-protobuf>=4.21.0.1", | 
					
						
							| 
									
										
										
										
											2023-09-13 00:00:24 +05:30
										 |  |  |     "sqlalchemy2-stubs", | 
					
						
							| 
									
										
										
										
											2021-06-08 16:10:16 -07:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-11 10:39:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  | test_api_requirements = { | 
					
						
							|  |  |  |     "pytest>=6.2.2", | 
					
						
							| 
									
										
										
										
											2024-11-13 23:48:30 -08:00
										 |  |  |     "pytest-timeout", | 
					
						
							| 
									
										
										
										
											2024-08-28 19:53:42 +05:30
										 |  |  |     # Missing numpy requirement in 8.0.0 | 
					
						
							|  |  |  |     "deepdiff!=8.0.0", | 
					
						
							| 
									
										
										
										
											2025-07-09 02:25:08 -04:00
										 |  |  |     "orderly-set!=5.4.0",  # 5.4.0 uses invalid types on older Python versions | 
					
						
							| 
									
										
										
										
											2024-08-21 21:43:36 +05:30
										 |  |  |     "PyYAML", | 
					
						
							|  |  |  |     "pytest-docker>=1.1.0", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-07-11 10:39:47 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 00:09:41 -07:00
										 |  |  | debug_requirements = { | 
					
						
							|  |  |  |     "memray", | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2023-10-12 18:43:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-20 13:33:54 -08:00
										 |  |  | lint_requirements = { | 
					
						
							| 
									
										
										
										
											2022-12-19 13:35:49 -05:00
										 |  |  |     # This is pinned only to avoid spurious errors in CI. | 
					
						
							|  |  |  |     # We should make an effort to keep it up to date. | 
					
						
							| 
									
										
										
										
											2025-05-02 15:02:00 +05:30
										 |  |  |     "ruff==0.11.7", | 
					
						
							|  |  |  |     "mypy==1.14.1", | 
					
						
							| 
									
										
										
										
											2024-11-20 13:33:54 -08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | base_dev_requirements = { | 
					
						
							|  |  |  |     *base_requirements, | 
					
						
							|  |  |  |     *framework_common, | 
					
						
							|  |  |  |     *mypy_stubs, | 
					
						
							|  |  |  |     *s3_base, | 
					
						
							|  |  |  |     *lint_requirements, | 
					
						
							| 
									
										
										
										
											2023-07-11 10:39:47 -04:00
										 |  |  |     *test_api_requirements, | 
					
						
							| 
									
										
										
										
											2024-11-20 13:33:54 -08:00
										 |  |  |     "coverage>=5.1", | 
					
						
							|  |  |  |     "faker>=18.4.0", | 
					
						
							| 
									
										
										
										
											2022-02-10 20:02:23 -08:00
										 |  |  |     "pytest-asyncio>=0.16.0", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     "pytest-cov>=2.8.1", | 
					
						
							| 
									
										
										
										
											2024-02-09 18:20:05 -05:00
										 |  |  |     "pytest-random-order~=1.1.0", | 
					
						
							| 
									
										
										
										
											2021-04-21 11:34:24 -07:00
										 |  |  |     "requests-mock", | 
					
						
							| 
									
										
										
										
											2021-04-04 19:00:27 +01:00
										 |  |  |     "freezegun", | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |     "jsonpickle", | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     "build", | 
					
						
							|  |  |  |     "twine", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     *list( | 
					
						
							|  |  |  |         dependency | 
					
						
							|  |  |  |         for plugin in [ | 
					
						
							| 
									
										
										
										
											2024-07-19 15:30:43 +02:00
										 |  |  |             "abs", | 
					
						
							| 
									
										
										
										
											2023-10-03 23:17:49 -04:00
										 |  |  |             "athena", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |             "bigquery", | 
					
						
							| 
									
										
										
										
											2022-02-21 17:36:08 +02:00
										 |  |  |             "clickhouse", | 
					
						
							|  |  |  |             "clickhouse-usage", | 
					
						
							| 
									
										
										
										
											2024-04-10 04:36:51 +03:00
										 |  |  |             "cockroachdb", | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  |             "delta-lake", | 
					
						
							| 
									
										
										
										
											2024-11-05 22:06:35 +05:30
										 |  |  |             "dremio", | 
					
						
							| 
									
										
										
										
											2022-04-08 20:48:48 +05:30
										 |  |  |             "druid", | 
					
						
							| 
									
										
										
										
											2022-01-14 13:10:12 -08:00
										 |  |  |             "elasticsearch", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |             "feast", | 
					
						
							|  |  |  |             "iceberg", | 
					
						
							| 
									
										
										
										
											2025-01-30 04:08:15 -06:00
										 |  |  |             "iceberg-catalog", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |             "mlflow", | 
					
						
							| 
									
										
										
										
											2023-02-19 08:43:13 -08:00
										 |  |  |             "json-schema", | 
					
						
							| 
									
										
										
										
											2022-04-08 20:48:48 +05:30
										 |  |  |             "ldap", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |             "looker", | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  |             "lookml", | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |             "glue", | 
					
						
							| 
									
										
										
										
											2021-10-13 11:57:47 +05:30
										 |  |  |             "mariadb", | 
					
						
							| 
									
										
										
										
											2021-08-11 18:49:16 -07:00
										 |  |  |             "okta", | 
					
						
							| 
									
										
										
										
											2021-06-11 17:27:34 -07:00
										 |  |  |             "oracle", | 
					
						
							| 
									
										
										
										
											2021-07-20 19:31:42 +05:30
										 |  |  |             "postgres", | 
					
						
							| 
									
										
										
										
											2021-06-29 19:43:31 -07:00
										 |  |  |             "sagemaker", | 
					
						
							| 
									
										
										
										
											2022-05-04 17:07:01 -07:00
										 |  |  |             "kafka", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |             "datahub-rest", | 
					
						
							| 
									
										
										
										
											2023-01-18 19:18:56 -08:00
										 |  |  |             "datahub-lite", | 
					
						
							| 
									
										
										
										
											2022-11-18 12:02:48 +01:00
										 |  |  |             "presto", | 
					
						
							| 
									
										
										
										
											2021-08-19 02:03:03 +07:00
										 |  |  |             "redash", | 
					
						
							| 
									
										
										
										
											2021-09-23 16:39:33 -04:00
										 |  |  |             "redshift", | 
					
						
							| 
									
										
										
										
											2022-03-29 15:22:57 +05:30
										 |  |  |             "s3", | 
					
						
							| 
									
										
										
										
											2022-11-23 11:13:30 +05:30
										 |  |  |             "snowflake", | 
					
						
							| 
									
										
										
										
											2024-02-08 14:05:26 +05:30
										 |  |  |             "slack", | 
					
						
							| 
									
										
										
										
											2022-02-08 14:26:44 -08:00
										 |  |  |             "tableau", | 
					
						
							| 
									
										
										
										
											2023-10-13 00:14:45 +02:00
										 |  |  |             "teradata", | 
					
						
							| 
									
										
										
										
											2022-01-31 22:52:55 +05:30
										 |  |  |             "trino", | 
					
						
							| 
									
										
										
										
											2022-02-02 22:52:50 -08:00
										 |  |  |             "hive", | 
					
						
							| 
									
										
										
										
											2022-01-31 22:52:55 +05:30
										 |  |  |             "starburst-trino-usage", | 
					
						
							| 
									
										
										
										
											2022-04-08 20:48:48 +05:30
										 |  |  |             "powerbi", | 
					
						
							| 
									
										
										
										
											2022-11-02 01:05:42 +02:00
										 |  |  |             "powerbi-report-server", | 
					
						
							| 
									
										
										
										
											2022-10-30 05:44:41 +01:00
										 |  |  |             "salesforce", | 
					
						
							| 
									
										
										
										
											2023-05-25 03:39:01 +05:30
										 |  |  |             "unity-catalog", | 
					
						
							| 
									
										
										
										
											2023-08-01 19:34:35 +05:30
										 |  |  |             "nifi", | 
					
						
							| 
									
										
										
										
											2023-08-31 02:38:42 +05:30
										 |  |  |             "vertica", | 
					
						
							|  |  |  |             "mode", | 
					
						
							| 
									
										
										
										
											2023-11-08 12:32:41 +05:30
										 |  |  |             "fivetran", | 
					
						
							| 
									
										
										
										
											2023-09-23 05:42:48 +05:30
										 |  |  |             "kafka-connect", | 
					
						
							| 
									
										
										
										
											2024-02-27 00:57:37 +05:30
										 |  |  |             "qlik-sense", | 
					
						
							| 
									
										
										
										
											2024-04-16 08:48:31 +05:30
										 |  |  |             "sigma", | 
					
						
							| 
									
										
										
										
											2024-08-26 20:29:15 +02:00
										 |  |  |             "sac", | 
					
						
							| 
									
										
										
										
											2024-11-15 20:41:21 +05:30
										 |  |  |             "cassandra", | 
					
						
							| 
									
										
										
										
											2024-12-02 04:23:28 -05:00
										 |  |  |             "neo4j", | 
					
						
							| 
									
										
										
										
											2025-03-13 11:02:15 -07:00
										 |  |  |             "vertexai", | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |         ] | 
					
						
							| 
									
										
										
										
											2022-11-03 14:23:19 -04:00
										 |  |  |         if plugin | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |         for dependency in plugins[plugin] | 
					
						
							|  |  |  |     ), | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |     *pydantic_no_v2, | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-26 16:44:36 -07:00
										 |  |  | dev_requirements = { | 
					
						
							|  |  |  |     *base_dev_requirements, | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  | full_test_dev_requirements = { | 
					
						
							|  |  |  |     *list( | 
					
						
							|  |  |  |         dependency | 
					
						
							|  |  |  |         for plugin in [ | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  |             "athena", | 
					
						
							| 
									
										
										
										
											2022-07-13 19:17:38 +02:00
										 |  |  |             "circuit-breaker", | 
					
						
							| 
									
										
										
										
											2022-02-21 17:36:08 +02:00
										 |  |  |             "clickhouse", | 
					
						
							| 
									
										
										
										
											2022-08-10 22:00:31 +00:00
										 |  |  |             "delta-lake", | 
					
						
							| 
									
										
										
										
											2021-07-14 20:29:23 -07:00
										 |  |  |             "druid", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |             "feast", | 
					
						
							| 
									
										
										
										
											2022-05-27 01:50:15 -07:00
										 |  |  |             "hana", | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |             "hive", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |             "iceberg", | 
					
						
							| 
									
										
										
										
											2025-01-30 04:08:15 -06:00
										 |  |  |             "iceberg-catalog", | 
					
						
							| 
									
										
										
										
											2022-07-16 17:38:33 -07:00
										 |  |  |             "kafka-connect", | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |             "ldap", | 
					
						
							|  |  |  |             "mongodb", | 
					
						
							| 
									
										
										
										
											2024-02-08 14:05:26 +05:30
										 |  |  |             "slack", | 
					
						
							| 
									
										
										
										
											2024-01-29 10:50:47 -08:00
										 |  |  |             "mssql", | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |             "mysql", | 
					
						
							| 
									
										
										
										
											2021-10-13 11:57:47 +05:30
										 |  |  |             "mariadb", | 
					
						
							| 
									
										
										
										
											2021-08-19 02:03:03 +07:00
										 |  |  |             "redash", | 
					
						
							| 
									
										
										
										
											2023-08-01 19:34:35 +05:30
										 |  |  |             "vertica", | 
					
						
							| 
									
										
										
										
											2025-03-17 10:29:02 -07:00
										 |  |  |             "vertexai", | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-08-31 13:01:05 -04:00
										 |  |  |         if plugin | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |         for dependency in plugins[plugin] | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  | entry_points = { | 
					
						
							| 
									
										
										
										
											2021-05-17 11:50:38 -07:00
										 |  |  |     "console_scripts": ["datahub = datahub.entrypoints:main"], | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |     "datahub.ingestion.source.plugins": [ | 
					
						
							| 
									
										
										
										
											2024-07-19 15:30:43 +02:00
										 |  |  |         "abs = datahub.ingestion.source.abs.source:ABSSource", | 
					
						
							| 
									
										
										
										
											2022-06-29 16:25:48 +02:00
										 |  |  |         "csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource", | 
					
						
							| 
									
										
										
										
											2021-06-24 17:11:00 -07:00
										 |  |  |         "file = datahub.ingestion.source.file:GenericFileSource", | 
					
						
							| 
									
										
										
										
											2023-08-15 17:49:20 -04:00
										 |  |  |         "datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "sqlalchemy = datahub.ingestion.source.sql.sql_generic:SQLAlchemyGenericSource", | 
					
						
							|  |  |  |         "athena = datahub.ingestion.source.sql.athena:AthenaSource", | 
					
						
							| 
									
										
										
										
											2021-09-02 12:24:10 -04:00
										 |  |  |         "azure-ad = datahub.ingestion.source.identity.azure_ad:AzureADSource", | 
					
						
							| 
									
										
										
										
											2022-10-26 20:15:44 +02:00
										 |  |  |         "bigquery = datahub.ingestion.source.bigquery_v2.bigquery:BigqueryV2Source", | 
					
						
							| 
									
										
										
										
											2024-08-26 11:21:00 +05:30
										 |  |  |         "bigquery-queries = datahub.ingestion.source.bigquery_v2.bigquery_queries:BigQueryQueriesSource", | 
					
						
							| 
									
										
										
										
											2022-02-21 17:36:08 +02:00
										 |  |  |         "clickhouse = datahub.ingestion.source.sql.clickhouse:ClickHouseSource", | 
					
						
							|  |  |  |         "clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsageSource", | 
					
						
							| 
									
										
										
										
											2024-04-10 04:36:51 +03:00
										 |  |  |         "cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource", | 
					
						
							| 
									
										
										
										
											2022-06-28 04:46:33 +05:30
										 |  |  |         "delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource", | 
					
						
							| 
									
										
										
										
											2022-03-29 15:22:57 +05:30
										 |  |  |         "s3 = datahub.ingestion.source.s3:S3Source", | 
					
						
							| 
									
										
										
										
											2022-11-21 14:14:33 -05:00
										 |  |  |         "dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource", | 
					
						
							|  |  |  |         "dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource", | 
					
						
							| 
									
										
										
										
											2024-11-05 22:06:35 +05:30
										 |  |  |         "dremio = datahub.ingestion.source.dremio.dremio_source:DremioSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "druid = datahub.ingestion.source.sql.druid:DruidSource", | 
					
						
							| 
									
										
										
										
											2023-09-15 13:26:17 -07:00
										 |  |  |         "dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource", | 
					
						
							| 
									
										
										
										
											2022-01-14 13:10:12 -08:00
										 |  |  |         "elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource", | 
					
						
							| 
									
										
										
										
											2022-04-26 18:35:02 -03:00
										 |  |  |         "feast = datahub.ingestion.source.feast:FeastRepositorySource", | 
					
						
							| 
									
										
										
										
											2024-07-16 02:42:18 +05:30
										 |  |  |         "grafana = datahub.ingestion.source.grafana.grafana_source:GrafanaSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "glue = datahub.ingestion.source.aws.glue:GlueSource", | 
					
						
							|  |  |  |         "sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource", | 
					
						
							| 
									
										
										
										
											2022-05-26 12:42:50 +02:00
										 |  |  |         "hana = datahub.ingestion.source.sql.hana:HanaSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "hive = datahub.ingestion.source.sql.hive:HiveSource", | 
					
						
							| 
									
										
										
										
											2024-04-16 23:35:16 +02:00
										 |  |  |         "hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource", | 
					
						
							| 
									
										
										
										
											2023-02-19 08:43:13 -08:00
										 |  |  |         "json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource", | 
					
						
							| 
									
										
										
										
											2024-11-22 13:08:23 +05:30
										 |  |  |         "kafka = datahub.ingestion.source.kafka.kafka:KafkaSource", | 
					
						
							| 
									
										
										
										
											2024-12-19 12:39:47 +05:30
										 |  |  |         "kafka-connect = datahub.ingestion.source.kafka_connect.kafka_connect:KafkaConnectSource", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |         "ldap = datahub.ingestion.source.ldap:LDAPSource", | 
					
						
							| 
									
										
										
										
											2022-08-30 12:51:31 +05:30
										 |  |  |         "looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource", | 
					
						
							|  |  |  |         "lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource", | 
					
						
							| 
									
										
										
										
											2024-03-21 15:21:17 +05:30
										 |  |  |         "datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource", | 
					
						
							| 
									
										
										
										
											2025-06-12 18:41:22 +05:30
										 |  |  |         "datahub-debug = datahub.ingestion.source.debug.datahub_debug:DataHubDebugSource", | 
					
						
							| 
									
										
										
										
											2025-01-29 21:37:17 +05:30
										 |  |  |         "datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource", | 
					
						
							| 
									
										
										
										
											2025-06-27 15:07:28 +05:30
										 |  |  |         "datahub-mock-data = datahub.ingestion.source.mock_data.datahub_mock_data:DataHubMockDataSource", | 
					
						
							| 
									
										
										
										
											2022-02-24 20:02:38 -05:00
										 |  |  |         "datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource", | 
					
						
							| 
									
										
										
										
											2021-09-01 15:10:12 -07:00
										 |  |  |         "datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource", | 
					
						
							| 
									
										
										
										
											2023-09-26 20:51:30 +03:00
										 |  |  |         "mlflow = datahub.ingestion.source.mlflow:MLflowSource", | 
					
						
							| 
									
										
										
										
											2021-12-09 16:10:08 -08:00
										 |  |  |         "mode = datahub.ingestion.source.mode:ModeSource", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |         "mongodb = datahub.ingestion.source.mongodb:MongoDBSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "mssql = datahub.ingestion.source.sql.mssql:SQLServerSource", | 
					
						
							|  |  |  |         "mysql = datahub.ingestion.source.sql.mysql:MySQLSource", | 
					
						
							| 
									
										
										
										
											2021-10-13 11:57:47 +05:30
										 |  |  |         "mariadb = datahub.ingestion.source.sql.mariadb.MariaDBSource", | 
					
						
							| 
									
										
										
										
											2021-08-11 18:49:16 -07:00
										 |  |  |         "okta = datahub.ingestion.source.identity.okta:OktaSource", | 
					
						
							| 
									
										
										
										
											2021-07-26 13:06:52 -07:00
										 |  |  |         "oracle = datahub.ingestion.source.sql.oracle:OracleSource", | 
					
						
							|  |  |  |         "postgres = datahub.ingestion.source.sql.postgres:PostgresSource", | 
					
						
							| 
									
										
										
										
											2021-08-19 02:03:03 +07:00
										 |  |  |         "redash = datahub.ingestion.source.redash:RedashSource", | 
					
						
							| 
									
										
										
										
											2023-04-12 19:15:43 +02:00
										 |  |  |         "redshift = datahub.ingestion.source.redshift.redshift:RedshiftSource", | 
					
						
							| 
									
										
										
										
											2024-02-08 14:05:26 +05:30
										 |  |  |         "slack = datahub.ingestion.source.slack.slack:SlackSource", | 
					
						
							| 
									
										
										
										
											2022-09-15 22:23:54 +05:30
										 |  |  |         "snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source", | 
					
						
							| 
									
										
										
										
											2024-06-12 12:04:22 -05:00
										 |  |  |         "snowflake-summary = datahub.ingestion.source.snowflake.snowflake_summary:SnowflakeSummarySource", | 
					
						
							| 
									
										
										
										
											2024-07-12 15:08:51 -07:00
										 |  |  |         "snowflake-queries = datahub.ingestion.source.snowflake.snowflake_queries:SnowflakeQueriesSource", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |         "superset = datahub.ingestion.source.superset:SupersetSource", | 
					
						
							| 
									
										
										
										
											2024-10-09 23:27:31 -04:00
										 |  |  |         "preset = datahub.ingestion.source.preset:PresetSource", | 
					
						
							| 
									
										
										
										
											2024-09-09 23:15:06 +05:30
										 |  |  |         "tableau = datahub.ingestion.source.tableau.tableau:TableauSource", | 
					
						
							| 
									
										
										
										
											2021-11-10 06:22:15 +01:00
										 |  |  |         "openapi = datahub.ingestion.source.openapi:OpenApiSource", | 
					
						
							| 
									
										
										
										
											2021-12-14 02:02:47 -05:00
										 |  |  |         "metabase = datahub.ingestion.source.metabase:MetabaseSource", | 
					
						
							| 
									
										
										
										
											2023-10-13 00:14:45 +02:00
										 |  |  |         "teradata = datahub.ingestion.source.sql.teradata:TeradataSource", | 
					
						
							| 
									
										
										
										
											2021-10-07 00:27:06 +05:30
										 |  |  |         "trino = datahub.ingestion.source.sql.trino:TrinoSource", | 
					
						
							| 
									
										
										
										
											2021-11-18 18:56:24 +01:00
										 |  |  |         "starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource", | 
					
						
							| 
									
										
										
										
											2021-12-09 04:26:31 +05:30
										 |  |  |         "nifi = datahub.ingestion.source.nifi:NifiSource", | 
					
						
							| 
									
										
										
										
											2024-11-27 23:02:24 +05:30
										 |  |  |         "powerbi = datahub.ingestion.source.powerbi.powerbi:PowerBiDashboardSource", | 
					
						
							| 
									
										
										
										
											2022-11-02 01:05:42 +02:00
										 |  |  |         "powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource", | 
					
						
							| 
									
										
										
										
											2022-05-26 08:05:57 -07:00
										 |  |  |         "iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource", | 
					
						
							| 
									
										
										
										
											2022-05-26 19:26:28 +09:00
										 |  |  |         "vertica = datahub.ingestion.source.sql.vertica:VerticaSource", | 
					
						
							| 
									
										
										
										
											2022-11-18 12:02:48 +01:00
										 |  |  |         "presto = datahub.ingestion.source.sql.presto:PrestoSource", | 
					
						
							| 
									
										
										
										
											2024-04-16 23:35:16 +02:00
										 |  |  |         # This is only here for backward compatibility. Use the `hive-metastore` source instead. | 
					
						
							|  |  |  |         "presto-on-hive = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource", | 
					
						
							| 
									
										
										
										
											2022-04-29 12:27:02 +02:00
										 |  |  |         "pulsar = datahub.ingestion.source.pulsar:PulsarSource", | 
					
						
							| 
									
										
										
										
											2022-07-06 22:31:16 +05:30
										 |  |  |         "salesforce = datahub.ingestion.source.salesforce:SalesforceSource", | 
					
						
							| 
									
										
										
										
											2022-12-06 16:10:21 -05:00
										 |  |  |         "demo-data = datahub.ingestion.source.demo_data.DemoDataSource", | 
					
						
							| 
									
										
										
										
											2022-10-30 05:44:41 +01:00
										 |  |  |         "unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource", | 
					
						
							| 
									
										
										
										
											2023-04-27 22:33:41 +05:30
										 |  |  |         "gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource", | 
					
						
							| 
									
										
										
										
											2023-08-24 10:35:46 -04:00
										 |  |  |         "sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource", | 
					
						
							| 
									
										
										
										
											2023-11-08 12:32:41 +05:30
										 |  |  |         "fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource", | 
					
						
							| 
									
										
										
										
											2024-02-27 00:57:37 +05:30
										 |  |  |         "qlik-sense = datahub.ingestion.source.qlik_sense.qlik_sense:QlikSenseSource", | 
					
						
							| 
									
										
										
										
											2024-04-16 08:48:31 +05:30
										 |  |  |         "sigma = datahub.ingestion.source.sigma.sigma:SigmaSource", | 
					
						
							| 
									
										
										
										
											2024-08-26 20:29:15 +02:00
										 |  |  |         "sac = datahub.ingestion.source.sac.sac:SACSource", | 
					
						
							| 
									
										
										
										
											2024-11-15 20:41:21 +05:30
										 |  |  |         "cassandra = datahub.ingestion.source.cassandra.cassandra:CassandraSource", | 
					
						
							| 
									
										
										
										
											2024-12-02 04:23:28 -05:00
										 |  |  |         "neo4j = datahub.ingestion.source.neo4j.neo4j_source:Neo4jSource", | 
					
						
							| 
									
										
										
										
											2025-03-21 14:06:17 -07:00
										 |  |  |         "vertexai = datahub.ingestion.source.vertexai.vertexai:VertexAISource", | 
					
						
							| 
									
										
										
										
											2025-03-25 19:47:11 +01:00
										 |  |  |         "hex = datahub.ingestion.source.hex.hex:HexSource", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |     ], | 
					
						
							| 
									
										
										
										
											2022-12-08 05:08:08 +01:00
										 |  |  |     "datahub.ingestion.transformer.plugins": [ | 
					
						
							| 
									
										
										
										
											2024-03-22 15:23:03 +05:30
										 |  |  |         "pattern_cleanup_ownership = datahub.ingestion.transformer.pattern_cleanup_ownership:PatternCleanUpOwnership", | 
					
						
							| 
									
										
										
										
											2022-12-08 05:08:08 +01:00
										 |  |  |         "simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership", | 
					
						
							|  |  |  |         "mark_dataset_status = datahub.ingestion.transformer.mark_dataset_status:MarkDatasetStatus", | 
					
						
							|  |  |  |         "set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer", | 
					
						
							|  |  |  |         "add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:AddDatasetOwnership", | 
					
						
							|  |  |  |         "simple_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:SimpleAddDatasetOwnership", | 
					
						
							|  |  |  |         "pattern_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:PatternAddDatasetOwnership", | 
					
						
							|  |  |  |         "add_dataset_domain = datahub.ingestion.transformer.dataset_domain:AddDatasetDomain", | 
					
						
							|  |  |  |         "simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain", | 
					
						
							|  |  |  |         "pattern_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:PatternAddDatasetDomain", | 
					
						
							|  |  |  |         "add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:AddDatasetTags", | 
					
						
							|  |  |  |         "simple_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:SimpleAddDatasetTags", | 
					
						
							|  |  |  |         "pattern_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:PatternAddDatasetTags", | 
					
						
							| 
									
										
										
										
											2023-07-20 08:25:30 +05:30
										 |  |  |         "extract_dataset_tags = datahub.ingestion.transformer.extract_dataset_tags:ExtractDatasetTags", | 
					
						
							| 
									
										
										
										
											2022-12-08 05:08:08 +01:00
										 |  |  |         "add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:AddDatasetTerms", | 
					
						
							|  |  |  |         "simple_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:SimpleAddDatasetTerms", | 
					
						
							|  |  |  |         "pattern_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:PatternAddDatasetTerms", | 
					
						
							|  |  |  |         "add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:AddDatasetProperties", | 
					
						
							|  |  |  |         "simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties", | 
					
						
							|  |  |  |         "pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms", | 
					
						
							|  |  |  |         "pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags", | 
					
						
							| 
									
										
										
										
											2023-12-09 05:37:00 +05:30
										 |  |  |         "extract_ownership_from_tags = datahub.ingestion.transformer.extract_ownership_from_tags:ExtractOwnersFromTagsTransformer", | 
					
						
							| 
									
										
										
										
											2024-01-06 03:33:22 +05:30
										 |  |  |         "add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:AddDatasetDataProduct", | 
					
						
							|  |  |  |         "simple_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:SimpleAddDatasetDataProduct", | 
					
						
							|  |  |  |         "pattern_add_dataset_dataproduct = datahub.ingestion.transformer.add_dataset_dataproduct:PatternAddDatasetDataProduct", | 
					
						
							| 
									
										
										
										
											2024-07-30 15:17:04 +05:30
										 |  |  |         "replace_external_url = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlDataset", | 
					
						
							|  |  |  |         "replace_external_url_container = datahub.ingestion.transformer.replace_external_url:ReplaceExternalUrlContainer", | 
					
						
							| 
									
										
										
										
											2024-05-03 09:54:48 +02:00
										 |  |  |         "pattern_cleanup_dataset_usage_user = datahub.ingestion.transformer.pattern_cleanup_dataset_usage_user:PatternCleanupDatasetUsageUser", | 
					
						
							| 
									
										
										
										
											2024-05-15 14:13:03 +05:30
										 |  |  |         "domain_mapping_based_on_tags = datahub.ingestion.transformer.dataset_domain_based_on_tags:DatasetTagDomainMapper", | 
					
						
							| 
									
										
										
										
											2024-07-02 15:30:05 +05:30
										 |  |  |         "tags_to_term = datahub.ingestion.transformer.tags_to_terms:TagsToTermMapper", | 
					
						
							| 
									
										
										
										
											2022-12-08 05:08:08 +01:00
										 |  |  |     ], | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |     "datahub.ingestion.sink.plugins": [ | 
					
						
							|  |  |  |         "file = datahub.ingestion.sink.file:FileSink", | 
					
						
							|  |  |  |         "console = datahub.ingestion.sink.console:ConsoleSink", | 
					
						
							| 
									
										
										
										
											2023-02-10 22:12:02 +01:00
										 |  |  |         "blackhole = datahub.ingestion.sink.blackhole:BlackHoleSink", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |         "datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink", | 
					
						
							|  |  |  |         "datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink", | 
					
						
							| 
									
										
										
										
											2023-01-18 19:18:56 -08:00
										 |  |  |         "datahub-lite = datahub.ingestion.sink.datahub_lite:DataHubLiteSink", | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |     ], | 
					
						
							| 
									
										
										
										
											2022-02-02 13:19:15 -08:00
										 |  |  |     "datahub.ingestion.checkpointing_provider.plugins": [ | 
					
						
							|  |  |  |         "datahub = datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider:DatahubIngestionCheckpointingProvider", | 
					
						
							| 
									
										
										
										
											2023-11-11 04:06:00 +05:30
										 |  |  |         "file = datahub.ingestion.source.state_provider.file_ingestion_checkpointing_provider:FileIngestionCheckpointingProvider", | 
					
						
							| 
									
										
										
										
											2022-02-02 13:19:15 -08:00
										 |  |  |     ], | 
					
						
							|  |  |  |     "datahub.ingestion.reporting_provider.plugins": [ | 
					
						
							| 
									
										
										
										
											2022-08-19 09:08:17 -07:00
										 |  |  |         "datahub = datahub.ingestion.reporting.datahub_ingestion_run_summary_provider:DatahubIngestionRunSummaryProvider", | 
					
						
							|  |  |  |         "file = datahub.ingestion.reporting.file_reporter:FileReporter", | 
					
						
							| 
									
										
										
										
											2021-12-16 20:06:33 -08:00
										 |  |  |     ], | 
					
						
							| 
									
										
										
										
											2023-09-22 16:43:58 -07:00
										 |  |  |     "datahub.custom_packages": [], | 
					
						
							| 
									
										
										
										
											2024-07-01 19:47:07 +02:00
										 |  |  |     "datahub.fs.plugins": [ | 
					
						
							|  |  |  |         "s3 = datahub.ingestion.fs.s3_fs:S3FileSystem", | 
					
						
							|  |  |  |         "file = datahub.ingestion.fs.local_fs:LocalFileSystem", | 
					
						
							|  |  |  |         "http = datahub.ingestion.fs.http_fs:HttpFileSystem", | 
					
						
							|  |  |  |     ], | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  | setuptools.setup( | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     # Package metadata. | 
					
						
							|  |  |  |     name=package_metadata["__package_name__"], | 
					
						
							| 
									
										
										
										
											2024-02-26 15:02:47 -08:00
										 |  |  |     version=_version, | 
					
						
							| 
									
										
										
										
											2025-04-28 23:34:33 +09:00
										 |  |  |     url="https://docs.datahub.com/", | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     project_urls={ | 
					
						
							| 
									
										
										
										
											2025-04-28 23:34:33 +09:00
										 |  |  |         "Documentation": "https://docs.datahub.com/docs/", | 
					
						
							| 
									
										
										
										
											2022-03-18 22:12:19 +01:00
										 |  |  |         "Source": "https://github.com/datahub-project/datahub", | 
					
						
							|  |  |  |         "Changelog": "https://github.com/datahub-project/datahub/releases", | 
					
						
							| 
									
										
										
										
											2023-10-24 00:09:41 -07:00
										 |  |  |         "Releases": "https://github.com/acryldata/datahub/releases", | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2025-07-10 12:26:21 -04:00
										 |  |  |     license="Apache-2.0", | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  |     description="A CLI to work with DataHub metadata", | 
					
						
							| 
									
										
										
										
											2023-10-04 06:53:15 -04:00
										 |  |  |     long_description="""\
 | 
					
						
							|  |  |  | The `acryl-datahub` package contains a CLI and SDK for interacting with DataHub, | 
					
						
							|  |  |  | as well as an integration framework for pulling/pushing metadata from external systems. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-04-28 23:34:33 +09:00
										 |  |  | See the [DataHub docs](https://docs.datahub.com/docs/metadata-ingestion). | 
					
						
							| 
									
										
										
										
											2023-10-04 06:53:15 -04:00
										 |  |  | """,
 | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  |     long_description_content_type="text/markdown", | 
					
						
							|  |  |  |     classifiers=[ | 
					
						
							|  |  |  |         "Development Status :: 5 - Production/Stable", | 
					
						
							|  |  |  |         "Programming Language :: Python", | 
					
						
							|  |  |  |         "Programming Language :: Python :: 3", | 
					
						
							|  |  |  |         "Programming Language :: Python :: 3 :: Only", | 
					
						
							|  |  |  |         "Intended Audience :: Developers", | 
					
						
							|  |  |  |         "Intended Audience :: Information Technology", | 
					
						
							|  |  |  |         "Intended Audience :: System Administrators", | 
					
						
							|  |  |  |         "Operating System :: Unix", | 
					
						
							|  |  |  |         "Operating System :: POSIX :: Linux", | 
					
						
							|  |  |  |         "Environment :: Console", | 
					
						
							|  |  |  |         "Environment :: MacOS X", | 
					
						
							|  |  |  |         "Topic :: Software Development", | 
					
						
							|  |  |  |     ], | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     # Package info. | 
					
						
							|  |  |  |     zip_safe=False, | 
					
						
							| 
									
										
										
										
											2025-07-09 02:25:08 -04:00
										 |  |  |     python_requires=">=3.9", | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  |     package_dir={"": "src"}, | 
					
						
							| 
									
										
										
										
											2021-03-02 11:48:26 -08:00
										 |  |  |     packages=setuptools.find_namespace_packages(where="./src"), | 
					
						
							|  |  |  |     package_data={ | 
					
						
							|  |  |  |         "datahub": ["py.typed"], | 
					
						
							|  |  |  |         "datahub.metadata": ["schema.avsc"], | 
					
						
							| 
									
										
										
										
											2021-06-17 10:04:28 -07:00
										 |  |  |         "datahub.metadata.schemas": ["*.avsc"], | 
					
						
							| 
									
										
										
										
											2023-01-03 21:38:11 +05:30
										 |  |  |         "datahub.ingestion.source.powerbi": ["powerbi-lexical-grammar.rule"], | 
					
						
							| 
									
										
										
										
											2025-07-09 19:55:51 +05:30
										 |  |  |         "datahub.ingestion.autogenerated": ["*.json"], | 
					
						
							| 
									
										
										
										
											2021-03-02 11:48:26 -08:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-05-13 21:42:53 +03:00
										 |  |  |     entry_points=entry_points, | 
					
						
							| 
									
										
										
										
											2021-04-05 19:11:28 -07:00
										 |  |  |     # Dependencies. | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     install_requires=list(base_requirements | framework_common), | 
					
						
							|  |  |  |     extras_require={ | 
					
						
							|  |  |  |         "base": list(framework_common), | 
					
						
							|  |  |  |         **{ | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  |             plugin: list( | 
					
						
							|  |  |  |                 framework_common | 
					
						
							|  |  |  |                 | ( | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |                     # While pydantic v2 support is experimental, require that all plugins | 
					
						
							|  |  |  |                     # continue to use v1. This will ensure that no ingestion recipes break. | 
					
						
							|  |  |  |                     pydantic_no_v2 | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  |                     if plugin | 
					
						
							|  |  |  |                     not in { | 
					
						
							|  |  |  |                         "airflow", | 
					
						
							|  |  |  |                         "datahub-rest", | 
					
						
							|  |  |  |                         "datahub-kafka", | 
					
						
							|  |  |  |                         "sync-file-emitter", | 
					
						
							|  |  |  |                         "sql-parser", | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |                         # Some sources have been manually tested for compatibility with pydantic v2. | 
					
						
							| 
									
										
										
										
											2024-06-04 12:45:29 -04:00
										 |  |  |                         "iceberg", | 
					
						
							| 
									
										
										
										
											2024-12-27 13:46:49 -05:00
										 |  |  |                         "feast", | 
					
						
							| 
									
										
										
										
											2025-04-30 19:39:35 -07:00
										 |  |  |                         "bigquery-slim", | 
					
						
							|  |  |  |                         "snowflake-slim", | 
					
						
							| 
									
										
										
										
											2025-05-14 09:51:11 -07:00
										 |  |  |                         "mysql",  # tested in smoke-test | 
					
						
							| 
									
										
										
										
											2023-12-18 18:26:33 -05:00
										 |  |  |                     } | 
					
						
							|  |  |  |                     else set() | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |                 | dependencies | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |             for (plugin, dependencies) in plugins.items() | 
					
						
							|  |  |  |         }, | 
					
						
							| 
									
										
										
										
											2021-06-07 14:00:35 -07:00
										 |  |  |         "all": list( | 
					
						
							|  |  |  |             framework_common.union( | 
					
						
							|  |  |  |                 *[ | 
					
						
							|  |  |  |                     requirements | 
					
						
							|  |  |  |                     for plugin, requirements in plugins.items() | 
					
						
							|  |  |  |                     if plugin not in all_exclude_plugins | 
					
						
							|  |  |  |                 ] | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ), | 
					
						
							| 
									
										
										
										
											2023-09-22 16:43:58 -07:00
										 |  |  |         "cloud": ["acryl-datahub-cloud"], | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |         "dev": list(dev_requirements), | 
					
						
							| 
									
										
										
										
											2024-11-20 13:33:54 -08:00
										 |  |  |         "lint": list(lint_requirements), | 
					
						
							| 
									
										
										
										
											2023-07-11 10:39:47 -04:00
										 |  |  |         "testing-utils": list(test_api_requirements),  # To import `datahub.testing` | 
					
						
							| 
									
										
										
										
											2021-07-14 20:02:48 -07:00
										 |  |  |         "integration-tests": list(full_test_dev_requirements), | 
					
						
							| 
									
										
										
										
											2023-10-12 18:43:14 +01:00
										 |  |  |         "debug": list(debug_requirements), | 
					
						
							| 
									
										
										
										
											2021-03-11 16:41:05 -05:00
										 |  |  |     }, | 
					
						
							| 
									
										
										
										
											2021-01-31 22:40:30 -08:00
										 |  |  | ) |