2022-11-03 14:23:19 -04:00
|
|
|
import sys
|
2021-04-05 19:11:28 -07:00
|
|
|
from typing import Dict, Set
|
2021-02-11 23:14:20 -08:00
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
import setuptools
|
|
|
|
|
2021-04-05 19:11:28 -07:00
|
|
|
package_metadata: dict = {}
|
|
|
|
with open("./src/datahub/__init__.py") as fp:
|
|
|
|
exec(fp.read(), package_metadata)
|
2021-01-31 22:40:30 -08:00
|
|
|
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
base_requirements = {
|
2023-10-04 06:53:15 -04:00
|
|
|
# Typing extension should be >=3.10.0.2 ideally but we can't restrict due to a Airflow 2.1 dependency conflict.
|
|
|
|
"typing_extensions>=3.7.4.3",
|
2021-03-11 16:41:05 -05:00
|
|
|
"mypy_extensions>=0.4.3",
|
|
|
|
# Actual dependencies.
|
2021-04-01 12:15:05 -07:00
|
|
|
"typing-inspect",
|
2023-01-04 19:31:04 -05:00
|
|
|
# pydantic 1.10.3 is incompatible with typing-extensions 4.1.1 - https://github.com/pydantic/pydantic/issues/4885
|
2023-04-26 03:05:01 +05:30
|
|
|
# pydantic 2 makes major, backwards-incompatible changes - https://github.com/pydantic/pydantic/issues/4887
|
|
|
|
"pydantic>=1.5.1,!=1.10.3,<2",
|
2022-02-24 15:35:48 -05:00
|
|
|
"mixpanel>=4.9.0",
|
2023-06-22 10:24:58 +02:00
|
|
|
"sentry-sdk",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
framework_common = {
|
2022-11-01 21:12:34 -07:00
|
|
|
"click>=7.1.2",
|
2021-07-29 20:04:40 -07:00
|
|
|
"click-default-group",
|
2021-04-12 17:40:15 -07:00
|
|
|
"PyYAML",
|
2021-03-11 16:41:05 -05:00
|
|
|
"toml>=0.10.0",
|
2023-09-22 16:43:58 -07:00
|
|
|
# In Python 3.10+, importlib_metadata is included in the standard library.
|
|
|
|
"importlib_metadata>=4.0.0; python_version < '3.10'",
|
2021-04-12 17:40:15 -07:00
|
|
|
"docker",
|
2021-03-26 21:57:05 -07:00
|
|
|
"expandvars>=0.6.5",
|
2023-09-22 16:43:58 -07:00
|
|
|
"avro-gen3==0.7.11",
|
2022-08-29 23:34:58 +00:00
|
|
|
# "avro-gen3 @ git+https://github.com/acryldata/avro_gen@master#egg=avro-gen3",
|
2023-10-25 13:06:12 -04:00
|
|
|
"avro>=1.11.3,<1.12",
|
2021-08-21 18:41:28 -07:00
|
|
|
"python-dateutil>=2.8.0",
|
2021-07-29 20:04:40 -07:00
|
|
|
"tabulate",
|
2021-11-07 22:13:50 -08:00
|
|
|
"progressbar2",
|
2022-11-29 16:54:24 +05:30
|
|
|
"termcolor>=1.0.0",
|
2022-02-08 23:25:12 -08:00
|
|
|
"psutil>=5.8.0",
|
2022-03-02 11:36:37 +05:30
|
|
|
"Deprecated",
|
2022-06-04 09:34:17 -07:00
|
|
|
"humanfriendly",
|
2022-06-06 16:34:23 -07:00
|
|
|
"packaging",
|
2022-07-16 17:38:33 -07:00
|
|
|
"aiohttp<4",
|
2022-08-19 09:08:17 -07:00
|
|
|
"cached_property",
|
2022-08-21 01:48:22 -07:00
|
|
|
"ijson",
|
2022-08-30 13:25:03 -07:00
|
|
|
"click-spinner",
|
2022-11-16 12:29:24 -06:00
|
|
|
"requests_file",
|
2023-02-19 08:43:13 -08:00
|
|
|
"jsonref",
|
2023-07-12 20:56:13 +02:00
|
|
|
# jsonschema drops python 3.7 support in v4.18.0
|
2023-09-14 12:25:41 -07:00
|
|
|
"jsonschema<=4.17.3; python_version < '3.8'",
|
|
|
|
"jsonschema; python_version >= '3.8'",
|
2023-05-23 15:25:28 +02:00
|
|
|
"ruamel.yaml",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2023-02-02 09:24:28 -05:00
|
|
|
rest_common = {"requests", "requests_file"}
|
2022-11-11 15:04:36 -05:00
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
kafka_common = {
|
2022-07-26 21:43:33 +00:00
|
|
|
# The confluent_kafka package provides a number of pre-built wheels for
|
|
|
|
# various platforms and architectures. However, it does not provide wheels
|
|
|
|
# for arm64 (including M1 Macs) or aarch64 (Docker's linux/arm64). This has
|
|
|
|
# remained an open issue on the confluent_kafka project for a year:
|
|
|
|
# - https://github.com/confluentinc/confluent-kafka-python/issues/1182
|
|
|
|
# - https://github.com/confluentinc/confluent-kafka-python/pull/1161
|
|
|
|
#
|
|
|
|
# When a wheel is not available, we must build from source instead.
|
|
|
|
# Building from source requires librdkafka to be installed.
|
|
|
|
# Most platforms have an easy way to install librdkafka:
|
|
|
|
# - MacOS: `brew install librdkafka` gives latest, which is 1.9.x or newer.
|
|
|
|
# - Debian: `apt install librdkafka` gives 1.6.0 (https://packages.debian.org/bullseye/librdkafka-dev).
|
|
|
|
# - Ubuntu: `apt install librdkafka` gives 1.8.0 (https://launchpad.net/ubuntu/+source/librdkafka).
|
|
|
|
#
|
|
|
|
# Moreover, confluent_kafka 1.9.0 introduced a hard compatibility break, and
|
|
|
|
# requires librdkafka >=1.9.0. As such, installing confluent_kafka 1.9.x on
|
|
|
|
# most arm64 Linux machines will fail, since it will build from source but then
|
|
|
|
# fail because librdkafka is too old. Hence, we have added an extra requirement
|
|
|
|
# that requires confluent_kafka<1.9.0 on non-MacOS arm64/aarch64 machines, which
|
|
|
|
# should ideally allow the builds to succeed in default conditions. We still
|
|
|
|
# want to allow confluent_kafka >= 1.9.0 for M1 Macs, which is why we can't
|
|
|
|
# broadly restrict confluent_kafka to <1.9.0.
|
|
|
|
#
|
|
|
|
# Note that this is somewhat of a hack, since we don't actually require the
|
|
|
|
# older version of confluent_kafka on those machines. Additionally, we will
|
|
|
|
# need monitor the Debian/Ubuntu PPAs and modify this rule if they start to
|
|
|
|
# support librdkafka >= 1.9.0.
|
|
|
|
"confluent_kafka>=1.5.0",
|
|
|
|
'confluent_kafka<1.9.0; platform_system != "Darwin" and (platform_machine == "aarch64" or platform_machine == "arm64")',
|
2021-03-11 16:41:05 -05:00
|
|
|
# We currently require both Avro libraries. The codegen uses avro-python3 (above)
|
|
|
|
# schema parsers at runtime for generating and reading JSON into Python objects.
|
|
|
|
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
|
|
|
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
|
|
|
# is incompatible with its own dep on avro-python3.
|
2021-04-12 17:40:15 -07:00
|
|
|
"fastavro>=1.2.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2022-08-10 22:00:31 +00:00
|
|
|
kafka_protobuf = {
|
|
|
|
"networkx>=2.6.2",
|
|
|
|
# Required to generate protobuf python modules from the schema downloaded from the schema registry
|
2022-11-03 14:23:19 -04:00
|
|
|
# NOTE: potential conflict with feast also depending on grpcio
|
|
|
|
"grpcio>=1.44.0,<2",
|
|
|
|
"grpcio-tools>=1.44.0,<2",
|
2022-08-10 22:00:31 +00:00
|
|
|
}
|
2022-05-04 17:07:01 -07:00
|
|
|
|
2023-10-26 21:44:32 +05:30
|
|
|
usage_common = {
|
|
|
|
"sqlparse",
|
|
|
|
}
|
|
|
|
|
|
|
|
sqlglot_lib = {
|
|
|
|
# Using an Acryl fork of sqlglot.
|
|
|
|
# https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:hsheth?expand=1
|
2023-11-01 00:12:52 -07:00
|
|
|
"acryl-sqlglot==19.0.2.dev10",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2023-10-26 21:44:32 +05:30
|
|
|
sql_common = (
|
|
|
|
{
|
|
|
|
# Required for all SQL sources.
|
|
|
|
# This is temporary lower bound that we're open to loosening/tightening as requirements show up
|
|
|
|
"sqlalchemy>=1.4.39, <2",
|
|
|
|
# Required for SQL profiling.
|
|
|
|
"great-expectations>=0.15.12, <=0.15.50",
|
|
|
|
# scipy version restricted to reduce backtracking, used by great-expectations,
|
|
|
|
"scipy>=1.7.2",
|
|
|
|
# GE added handling for higher version of jinja2
|
|
|
|
# https://github.com/great-expectations/great_expectations/pull/5382/files
|
|
|
|
# datahub does not depend on traitlets directly but great expectations does.
|
|
|
|
# https://github.com/ipython/traitlets/issues/741
|
|
|
|
"traitlets<5.2.2",
|
|
|
|
"greenlet",
|
|
|
|
}
|
|
|
|
| usage_common
|
|
|
|
| sqlglot_lib
|
|
|
|
)
|
|
|
|
|
2023-04-19 02:55:42 +05:30
|
|
|
sqllineage_lib = {
|
2023-07-28 13:10:19 -07:00
|
|
|
"sqllineage==1.3.8",
|
2023-04-19 02:55:42 +05:30
|
|
|
# We don't have a direct dependency on sqlparse but it is a dependency of sqllineage.
|
2023-07-28 13:10:19 -07:00
|
|
|
# There have previously been issues from not pinning sqlparse, so it's best to pin it.
|
|
|
|
# Related: https://github.com/reata/sqllineage/issues/361 and https://github.com/reata/sqllineage/pull/360
|
|
|
|
"sqlparse==0.4.4",
|
2023-04-19 02:55:42 +05:30
|
|
|
}
|
2023-01-26 12:25:02 -05:00
|
|
|
|
2021-06-29 19:43:31 -07:00
|
|
|
aws_common = {
|
|
|
|
# AWS Python SDK
|
2021-11-08 16:23:25 -08:00
|
|
|
"boto3",
|
|
|
|
# Deal with a version incompatibility between botocore (used by boto3) and urllib3.
|
|
|
|
# See https://github.com/boto/botocore/pull/2563.
|
|
|
|
"botocore!=1.23.0",
|
2021-06-29 19:43:31 -07:00
|
|
|
}
|
|
|
|
|
2022-06-29 16:25:48 +02:00
|
|
|
path_spec_common = {
|
|
|
|
"parse>=1.19.0",
|
|
|
|
"wcmatch",
|
|
|
|
}
|
|
|
|
|
2021-09-16 23:09:45 -07:00
|
|
|
looker_common = {
|
|
|
|
# Looker Python SDK
|
2023-03-07 19:49:37 -05:00
|
|
|
"looker-sdk==23.0.0",
|
2022-10-23 23:31:48 -07:00
|
|
|
# This version of lkml contains a fix for parsing lists in
|
|
|
|
# LookML files with spaces between an item and the following comma.
|
|
|
|
# See https://github.com/joshtemple/lkml/issues/73.
|
|
|
|
"lkml>=1.3.0b5",
|
|
|
|
"sql-metadata==2.2.2",
|
2023-04-19 02:55:42 +05:30
|
|
|
*sqllineage_lib,
|
2022-10-23 23:31:48 -07:00
|
|
|
"GitPython>2",
|
2021-09-16 23:09:45 -07:00
|
|
|
}
|
|
|
|
|
2021-10-25 17:50:07 -07:00
|
|
|
bigquery_common = {
|
|
|
|
# Google cloud logging library
|
2023-02-21 13:03:00 +05:30
|
|
|
"google-cloud-logging<=3.5.0",
|
2022-04-13 17:59:31 +05:30
|
|
|
"google-cloud-bigquery",
|
2022-01-13 22:12:11 -08:00
|
|
|
"more-itertools>=8.12.0",
|
2021-10-25 17:50:07 -07:00
|
|
|
}
|
|
|
|
|
2022-11-11 15:04:36 -05:00
|
|
|
clickhouse_common = {
|
2023-09-13 00:00:24 +05:30
|
|
|
# Clickhouse 0.2.0 adds support for SQLAlchemy 1.4.x
|
2023-10-30 14:18:48 -07:00
|
|
|
# Disallow 0.2.5 because of https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/272.
|
|
|
|
# Note that there's also a known issue around nested map types: https://github.com/xzkostyan/clickhouse-sqlalchemy/issues/269.
|
|
|
|
"clickhouse-sqlalchemy>=0.2.0,<0.2.5",
|
2022-11-11 15:04:36 -05:00
|
|
|
}
|
|
|
|
|
2022-06-29 16:25:48 +02:00
|
|
|
redshift_common = {
|
2023-09-13 00:00:24 +05:30
|
|
|
# Clickhouse 0.8.3 adds support for SQLAlchemy 1.4.x
|
|
|
|
"sqlalchemy-redshift>=0.8.3",
|
2022-06-29 16:25:48 +02:00
|
|
|
"GeoAlchemy2",
|
2023-12-05 17:42:29 +01:00
|
|
|
"redshift-connector",
|
2023-04-19 02:55:42 +05:30
|
|
|
*sqllineage_lib,
|
2022-06-29 16:25:48 +02:00
|
|
|
*path_spec_common,
|
|
|
|
}
|
|
|
|
|
2022-01-30 13:47:53 -06:00
|
|
|
snowflake_common = {
|
|
|
|
# Snowflake plugin utilizes sql common
|
|
|
|
*sql_common,
|
2023-09-13 00:00:24 +05:30
|
|
|
# https://github.com/snowflakedb/snowflake-sqlalchemy/issues/350
|
|
|
|
"snowflake-sqlalchemy>=1.4.3",
|
2022-12-01 15:03:03 +05:30
|
|
|
# See https://github.com/snowflakedb/snowflake-connector-python/pull/1348 for why 2.8.2 is blocked
|
2023-06-06 16:12:10 -04:00
|
|
|
"snowflake-connector-python!=2.8.2",
|
2022-11-23 11:13:30 +05:30
|
|
|
"pandas",
|
2022-02-08 23:25:12 -08:00
|
|
|
"cryptography",
|
2022-06-06 17:49:49 +05:30
|
|
|
"msal",
|
2023-06-08 09:58:13 +05:30
|
|
|
"acryl-datahub-classify==0.0.8",
|
2022-11-23 11:13:30 +05:30
|
|
|
# spacy version restricted to reduce backtracking, used by acryl-datahub-classify,
|
|
|
|
"spacy==3.4.3",
|
2022-01-30 13:47:53 -06:00
|
|
|
}
|
|
|
|
|
2022-03-31 03:50:26 +02:00
|
|
|
trino = {
|
2023-09-13 00:00:24 +05:30
|
|
|
"trino[sqlalchemy]>=0.308",
|
2022-03-31 03:50:26 +02:00
|
|
|
}
|
|
|
|
|
2023-08-09 10:04:36 -07:00
|
|
|
pyhive_common = {
|
|
|
|
# Acryl Data maintains a fork of PyHive
|
|
|
|
# - 0.6.11 adds support for table comments and column comments,
|
|
|
|
# and also releases HTTP and HTTPS transport schemes
|
|
|
|
# - 0.6.12 adds support for Spark Thrift Server
|
|
|
|
# - 0.6.13 adds a small fix for Databricks
|
|
|
|
# - 0.6.14 uses pure-sasl instead of sasl so it builds on Python 3.11
|
2023-11-16 17:19:25 -05:00
|
|
|
# - 0.6.15 adds support for thrift > 0.14 (cherry-picked from https://github.com/apache/thrift/pull/2491)
|
2023-11-29 04:25:33 -05:00
|
|
|
# - 0.6.16 fixes a regression in 0.6.15 (https://github.com/acryldata/PyHive/pull/9)
|
|
|
|
"acryl-pyhive[hive-pure-sasl]==0.6.16",
|
2023-08-09 10:04:36 -07:00
|
|
|
# As per https://github.com/datahub-project/datahub/issues/8405
|
2023-11-16 17:19:25 -05:00
|
|
|
# and https://github.com/dropbox/PyHive/issues/417, version 0.14.0
|
|
|
|
# of thrift broke PyHive's hive+http transport.
|
|
|
|
# Fixed by https://github.com/apache/thrift/pull/2491 in version 0.17.0
|
|
|
|
# which is unfortunately not on PyPi.
|
|
|
|
# Instead, we put the fix in our PyHive fork, so no thrift pin is needed.
|
2023-08-09 10:04:36 -07:00
|
|
|
}
|
|
|
|
|
2023-07-13 17:33:19 +05:30
|
|
|
microsoft_common = {"msal==1.22.0"}
|
2022-03-01 07:07:22 +05:30
|
|
|
|
2022-08-16 05:30:40 +00:00
|
|
|
iceberg_common = {
|
|
|
|
# Iceberg Python SDK
|
2023-08-31 13:01:05 -04:00
|
|
|
"pyiceberg",
|
|
|
|
"pyarrow>=9.0.0, <13.0.0",
|
2022-08-16 05:30:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
s3_base = {
|
2022-02-25 02:26:06 -05:00
|
|
|
*aws_common,
|
2023-05-11 21:14:25 +02:00
|
|
|
"more-itertools>=8.12.0",
|
2022-02-25 02:26:06 -05:00
|
|
|
"parse>=1.19.0",
|
|
|
|
"pyarrow>=6.0.1",
|
|
|
|
"tableschema>=1.20.2",
|
2022-10-13 19:48:05 +00:00
|
|
|
# ujson 5.2.0 has the JSONDecodeError exception type, which we need for error handling.
|
|
|
|
"ujson>=5.2.0",
|
2022-04-01 00:15:09 +02:00
|
|
|
"smart-open[s3]>=5.2.1",
|
2022-08-16 05:30:40 +00:00
|
|
|
"moto[s3]",
|
|
|
|
*path_spec_common,
|
2022-02-25 02:26:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
data_lake_profiling = {
|
2023-11-10 22:10:00 -06:00
|
|
|
"pydeequ~=1.1.0",
|
2023-08-29 18:11:37 +02:00
|
|
|
"pyspark~=3.3.0",
|
2022-02-25 02:26:06 -05:00
|
|
|
}
|
|
|
|
|
2022-06-28 04:46:33 +05:30
|
|
|
delta_lake = {
|
|
|
|
*s3_base,
|
2022-12-04 21:57:47 -06:00
|
|
|
"deltalake>=0.6.3, != 0.6.4",
|
2022-06-28 04:46:33 +05:30
|
|
|
}
|
|
|
|
|
2022-11-02 01:05:42 +02:00
|
|
|
powerbi_report_server = {"requests", "requests_ntlm"}
|
|
|
|
|
2023-06-23 08:31:05 -04:00
|
|
|
databricks = {
|
2023-06-27 13:38:55 -04:00
|
|
|
# 0.1.11 appears to have authentication issues with azure databricks
|
2023-10-04 10:22:45 -04:00
|
|
|
"databricks-sdk>=0.9.0",
|
2023-11-10 22:10:00 -06:00
|
|
|
"pyspark~=3.3.0",
|
2023-05-09 16:30:11 -04:00
|
|
|
"requests",
|
2023-12-05 12:33:00 -05:00
|
|
|
"databricks-sql-connector",
|
2023-05-09 16:30:11 -04:00
|
|
|
}
|
2022-06-29 16:25:48 +02:00
|
|
|
|
2023-08-15 17:49:20 -04:00
|
|
|
mysql = sql_common | {"pymysql>=1.0.2"}
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
# Note: for all of these, framework_common will be added.
|
|
|
|
plugins: Dict[str, Set[str]] = {
|
2021-04-05 19:11:28 -07:00
|
|
|
# Sink plugins.
|
|
|
|
"datahub-kafka": kafka_common,
|
2022-11-11 15:04:36 -05:00
|
|
|
"datahub-rest": rest_common,
|
2023-10-04 06:53:15 -04:00
|
|
|
"sync-file-emitter": {"filelock"},
|
2023-01-20 01:21:24 -08:00
|
|
|
"datahub-lite": {
|
2023-01-20 17:24:43 -08:00
|
|
|
"duckdb",
|
2023-01-20 01:21:24 -08:00
|
|
|
"fastapi",
|
|
|
|
"uvicorn",
|
|
|
|
},
|
2021-04-05 19:11:28 -07:00
|
|
|
# Integrations.
|
2021-08-11 16:20:01 -04:00
|
|
|
"airflow": {
|
2023-08-31 02:38:42 +05:30
|
|
|
f"acryl-datahub-airflow-plugin == {package_metadata['__version__']}",
|
2021-08-11 16:20:01 -04:00
|
|
|
},
|
2022-07-13 19:17:38 +02:00
|
|
|
"circuit-breaker": {
|
|
|
|
"gql>=3.3.0",
|
|
|
|
"gql[requests]>=3.3.0",
|
|
|
|
},
|
2023-08-15 17:49:20 -04:00
|
|
|
"datahub": mysql | kafka_common,
|
2023-04-19 02:55:42 +05:30
|
|
|
"great-expectations": sql_common | sqllineage_lib,
|
2023-07-07 16:24:35 -07:00
|
|
|
# Misc plugins.
|
|
|
|
"sql-parser": sqlglot_lib,
|
2021-03-11 16:41:05 -05:00
|
|
|
# Source plugins
|
2023-10-18 18:39:59 +02:00
|
|
|
# sqlalchemy-bigquery is included here since it provides an implementation of
|
|
|
|
# a SQLalchemy-conform STRUCT type definition
|
2023-10-24 00:09:41 -07:00
|
|
|
"athena": sql_common
|
|
|
|
| {"PyAthena[SQLAlchemy]>=2.6.0,<3.0.0", "sqlalchemy-bigquery>=1.4.1"},
|
2021-09-02 12:24:10 -04:00
|
|
|
"azure-ad": set(),
|
2022-11-28 16:55:15 -05:00
|
|
|
"bigquery": sql_common
|
|
|
|
| bigquery_common
|
2023-02-02 09:24:28 -05:00
|
|
|
| {
|
2023-07-07 16:24:35 -07:00
|
|
|
# TODO: I doubt we need all three sql parsing libraries.
|
2023-04-19 02:55:42 +05:30
|
|
|
*sqllineage_lib,
|
2023-07-07 16:24:35 -07:00
|
|
|
*sqlglot_lib,
|
2023-02-02 09:24:28 -05:00
|
|
|
"sqlalchemy-bigquery>=1.4.1",
|
2023-05-25 12:37:46 -03:00
|
|
|
"google-cloud-datacatalog-lineage==0.2.2",
|
2023-02-02 09:24:28 -05:00
|
|
|
},
|
2022-11-11 15:04:36 -05:00
|
|
|
"clickhouse": sql_common | clickhouse_common,
|
|
|
|
"clickhouse-usage": sql_common | usage_common | clickhouse_common,
|
2022-02-24 20:02:38 -05:00
|
|
|
"datahub-lineage-file": set(),
|
2021-09-01 15:10:12 -07:00
|
|
|
"datahub-business-glossary": set(),
|
2022-06-28 04:46:33 +05:30
|
|
|
"delta-lake": {*data_lake_profiling, *delta_lake},
|
2023-11-13 19:00:30 -05:00
|
|
|
"dbt": {"requests"} | sqlglot_lib | aws_common,
|
|
|
|
"dbt-cloud": {"requests"} | sqlglot_lib,
|
2021-06-09 15:07:04 -07:00
|
|
|
"druid": sql_common | {"pydruid>=0.6.2"},
|
2023-09-15 13:26:17 -07:00
|
|
|
"dynamodb": aws_common,
|
2022-02-19 11:44:32 -08:00
|
|
|
# Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws
|
|
|
|
# UnsupportedProductError
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0
|
|
|
|
# https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433
|
|
|
|
"elasticsearch": {"elasticsearch==7.13.4"},
|
2023-03-15 03:57:20 -04:00
|
|
|
"feast": {
|
2023-06-07 04:04:42 -04:00
|
|
|
"feast~=0.31.1",
|
2023-03-15 03:57:20 -04:00
|
|
|
"flask-openid>=1.3.0",
|
|
|
|
# typeguard 3.x, released on 2023-03-14, seems to cause issues with Feast.
|
|
|
|
"typeguard<3",
|
|
|
|
},
|
2021-06-29 19:43:31 -07:00
|
|
|
"glue": aws_common,
|
2022-05-26 12:42:50 +02:00
|
|
|
# hdbcli is supported officially by SAP, sqlalchemy-hana is built on top but not officially supported
|
2022-05-26 15:29:21 -07:00
|
|
|
"hana": sql_common
|
|
|
|
| {
|
2022-05-30 02:02:33 -07:00
|
|
|
"sqlalchemy-hana>=0.5.0; platform_machine != 'aarch64' and platform_machine != 'arm64'",
|
|
|
|
"hdbcli>=2.11.20; platform_machine != 'aarch64' and platform_machine != 'arm64'",
|
2022-05-26 15:29:21 -07:00
|
|
|
},
|
2021-05-03 22:11:50 -07:00
|
|
|
"hive": sql_common
|
2023-08-09 10:04:36 -07:00
|
|
|
| pyhive_common
|
2021-05-03 22:11:50 -07:00
|
|
|
| {
|
2022-08-30 02:14:04 +00:00
|
|
|
"databricks-dbapi",
|
2022-10-15 00:17:56 +00:00
|
|
|
# Due to https://github.com/great-expectations/great_expectations/issues/6146,
|
|
|
|
# we cannot allow 0.15.{23-26}. This was fixed in 0.15.27 by
|
|
|
|
# https://github.com/great-expectations/great_expectations/pull/6149.
|
|
|
|
"great-expectations != 0.15.23, != 0.15.24, != 0.15.25, != 0.15.26",
|
2021-05-03 22:11:50 -07:00
|
|
|
},
|
2022-05-26 08:05:57 -07:00
|
|
|
"iceberg": iceberg_common,
|
2023-02-19 08:43:13 -08:00
|
|
|
"json-schema": set(),
|
2023-08-15 17:49:20 -04:00
|
|
|
"kafka": kafka_common | kafka_protobuf,
|
2021-11-08 16:23:25 -08:00
|
|
|
"kafka-connect": sql_common | {"requests", "JPype1"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"ldap": {"python-ldap>=2.4"},
|
2021-09-16 23:09:45 -07:00
|
|
|
"looker": looker_common,
|
2022-10-23 23:31:48 -07:00
|
|
|
"lookml": looker_common,
|
2023-04-19 02:55:42 +05:30
|
|
|
"metabase": {"requests"} | sqllineage_lib,
|
2023-09-26 20:51:30 +03:00
|
|
|
"mlflow": {"mlflow-skinny>=2.3.0"},
|
2023-04-19 02:55:42 +05:30
|
|
|
"mode": {"requests", "tenacity>=8.0.1"} | sqllineage_lib,
|
2022-08-23 02:42:09 -04:00
|
|
|
"mongodb": {"pymongo[srv]>=3.11", "packaging"},
|
2023-11-29 00:02:26 -08:00
|
|
|
"mssql": sql_common | {"sqlalchemy-pytds>=0.3", "pyOpenSSL"},
|
2021-06-04 18:19:11 -07:00
|
|
|
"mssql-odbc": sql_common | {"pyodbc"},
|
2023-08-15 17:49:20 -04:00
|
|
|
"mysql": mysql,
|
2021-10-13 11:57:47 +05:30
|
|
|
# mariadb should have same dependency as mysql
|
|
|
|
"mariadb": sql_common | {"pymysql>=1.0.2"},
|
2023-08-16 07:02:57 +02:00
|
|
|
"okta": {"okta~=1.7.0", "nest-asyncio"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"oracle": sql_common | {"cx_Oracle"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
|
2023-08-09 10:04:36 -07:00
|
|
|
"presto": sql_common | pyhive_common | trino,
|
2022-04-13 17:59:31 +05:30
|
|
|
"presto-on-hive": sql_common
|
2023-08-09 10:04:36 -07:00
|
|
|
| pyhive_common
|
|
|
|
| {"psycopg2-binary", "pymysql>=1.0.2"},
|
2022-04-29 12:27:02 +02:00
|
|
|
"pulsar": {"requests"},
|
2023-04-19 02:55:42 +05:30
|
|
|
"redash": {"redash-toolbelt", "sql-metadata"} | sqllineage_lib,
|
2023-12-05 17:42:29 +01:00
|
|
|
"redshift": sql_common | redshift_common | usage_common | sqlglot_lib,
|
2022-08-16 05:30:40 +00:00
|
|
|
"s3": {*s3_base, *data_lake_profiling},
|
2023-04-27 22:33:41 +05:30
|
|
|
"gcs": {*s3_base, *data_lake_profiling},
|
2021-06-29 19:43:31 -07:00
|
|
|
"sagemaker": aws_common,
|
2022-07-16 17:38:33 -07:00
|
|
|
"salesforce": {"simple-salesforce"},
|
2023-07-14 21:31:24 -04:00
|
|
|
"snowflake": snowflake_common | usage_common | sqlglot_lib,
|
2021-08-11 18:49:16 -07:00
|
|
|
"sqlalchemy": sql_common,
|
2023-08-24 10:35:46 -04:00
|
|
|
"sql-queries": usage_common | sqlglot_lib,
|
2022-04-01 00:15:09 +02:00
|
|
|
"superset": {
|
|
|
|
"requests",
|
|
|
|
"sqlalchemy",
|
|
|
|
"great_expectations",
|
|
|
|
"greenlet",
|
|
|
|
},
|
2023-08-02 11:18:06 -07:00
|
|
|
# FIXME: I don't think tableau uses sqllineage anymore so we should be able
|
|
|
|
# to remove that dependency.
|
|
|
|
"tableau": {"tableauserverclient>=0.17.0"} | sqllineage_lib | sqlglot_lib,
|
2023-10-13 22:59:18 +02:00
|
|
|
"teradata": sql_common
|
|
|
|
| usage_common
|
|
|
|
| sqlglot_lib
|
|
|
|
| {"teradatasqlalchemy>=17.20.0.0"},
|
2022-03-31 03:50:26 +02:00
|
|
|
"trino": sql_common | trino,
|
2022-04-01 22:26:52 +05:30
|
|
|
"starburst-trino-usage": sql_common | usage_common | trino,
|
2023-05-25 03:39:01 +05:30
|
|
|
"nifi": {"requests", "packaging", "requests-gssapi"},
|
2023-08-23 22:05:53 -07:00
|
|
|
"powerbi": microsoft_common | {"lark[regex]==1.1.4", "sqlparse"} | sqlglot_lib,
|
2022-11-02 01:05:42 +02:00
|
|
|
"powerbi-report-server": powerbi_report_server,
|
2023-10-18 14:52:07 +05:30
|
|
|
"vertica": sql_common | {"vertica-sqlalchemy-dialect[vertica-python]==0.0.8.1"},
|
2023-06-23 08:31:05 -04:00
|
|
|
"unity-catalog": databricks | sqllineage_lib,
|
2023-11-08 12:32:41 +05:30
|
|
|
"fivetran": snowflake_common,
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2023-01-20 17:24:43 -08:00
|
|
|
# This is mainly used to exclude plugins from the Docker image.
|
2021-06-07 14:00:35 -07:00
|
|
|
all_exclude_plugins: Set[str] = {
|
|
|
|
# SQL Server ODBC requires additional drivers, and so we don't want to keep
|
|
|
|
# it included in the default "all" installation.
|
|
|
|
"mssql-odbc",
|
2023-01-20 17:24:43 -08:00
|
|
|
# duckdb doesn't have a prebuilt wheel for Linux arm7l or aarch64, so we
|
|
|
|
# simply exclude it.
|
|
|
|
"datahub-lite",
|
2021-06-07 14:00:35 -07:00
|
|
|
}
|
|
|
|
|
2021-06-08 16:10:16 -07:00
|
|
|
mypy_stubs = {
|
|
|
|
"types-dataclasses",
|
|
|
|
"types-pkg_resources",
|
|
|
|
"types-six",
|
|
|
|
"types-python-dateutil",
|
2023-09-25 16:24:19 -04:00
|
|
|
# We need to avoid 2.31.0.5 and 2.31.0.4 due to
|
|
|
|
# https://github.com/python/typeshed/issues/10764. Once that
|
|
|
|
# issue is resolved, we can remove the upper bound and change it
|
|
|
|
# to a != constraint.
|
|
|
|
# We have a PR up to fix the underlying issue: https://github.com/python/typeshed/pull/10776.
|
|
|
|
"types-requests>=2.28.11.6,<=2.31.0.3",
|
2021-06-08 16:10:16 -07:00
|
|
|
"types-toml",
|
|
|
|
"types-PyMySQL",
|
|
|
|
"types-PyYAML",
|
|
|
|
"types-freezegun",
|
2021-06-24 17:11:00 -07:00
|
|
|
"types-cachetools",
|
2021-06-08 16:10:16 -07:00
|
|
|
# versions 0.1.13 and 0.1.14 seem to have issues
|
|
|
|
"types-click==0.1.12",
|
2023-07-31 19:48:05 -07:00
|
|
|
# The boto3-stubs package seems to have regularly breaking minor releases,
|
|
|
|
# we pin to a specific version to avoid this.
|
|
|
|
"boto3-stubs[s3,glue,sagemaker,sts]==1.28.15",
|
2023-08-29 13:37:27 -04:00
|
|
|
"mypy-boto3-sagemaker==1.28.15", # For some reason, above pin only restricts `mypy-boto3-sagemaker<1.29.0,>=1.28.0`
|
2021-07-29 20:04:40 -07:00
|
|
|
"types-tabulate",
|
2022-03-14 21:20:29 +05:30
|
|
|
# avrogen package requires this
|
|
|
|
"types-pytz",
|
2022-06-06 17:49:49 +05:30
|
|
|
"types-pyOpenSSL",
|
2022-11-21 17:08:05 -05:00
|
|
|
"types-click-spinner>=0.1.13.1",
|
2022-10-13 19:48:05 +00:00
|
|
|
"types-ujson>=5.2.0",
|
2022-11-01 21:12:34 -07:00
|
|
|
"types-termcolor>=1.0.0",
|
|
|
|
"types-Deprecated",
|
2022-12-02 13:53:28 -05:00
|
|
|
"types-protobuf>=4.21.0.1",
|
2023-09-13 00:00:24 +05:30
|
|
|
"sqlalchemy2-stubs",
|
2021-06-08 16:10:16 -07:00
|
|
|
}
|
|
|
|
|
2023-07-11 10:39:47 -04:00
|
|
|
|
|
|
|
pytest_dep = "pytest>=6.2.2"
|
|
|
|
deepdiff_dep = "deepdiff"
|
|
|
|
test_api_requirements = {pytest_dep, deepdiff_dep, "PyYAML"}
|
|
|
|
|
2023-10-24 00:09:41 -07:00
|
|
|
debug_requirements = {
|
|
|
|
"memray",
|
|
|
|
}
|
2023-10-12 18:43:14 +01:00
|
|
|
|
2021-04-26 16:44:36 -07:00
|
|
|
base_dev_requirements = {
|
2021-03-11 16:41:05 -05:00
|
|
|
*base_requirements,
|
|
|
|
*framework_common,
|
2021-06-08 16:10:16 -07:00
|
|
|
*mypy_stubs,
|
2022-03-29 15:22:57 +05:30
|
|
|
*s3_base,
|
2022-12-19 13:35:49 -05:00
|
|
|
# This is pinned only to avoid spurious errors in CI.
|
|
|
|
# We should make an effort to keep it up to date.
|
|
|
|
"black==22.12.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"coverage>=5.1",
|
2023-04-14 21:09:43 -04:00
|
|
|
"faker>=18.4.0",
|
2023-04-11 02:44:42 +05:30
|
|
|
"flake8>=3.8.3", # DEPRECATION: Once we drop Python 3.7, we can pin to 6.x.
|
2021-08-06 13:25:30 -04:00
|
|
|
"flake8-tidy-imports>=4.3.0",
|
2023-04-11 02:44:42 +05:30
|
|
|
"flake8-bugbear==23.3.12",
|
2021-03-11 16:41:05 -05:00
|
|
|
"isort>=5.7.0",
|
2023-02-11 02:54:05 +05:30
|
|
|
"mypy==1.0.0",
|
2022-02-17 03:31:20 -05:00
|
|
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
|
|
|
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
2023-08-23 07:25:51 -04:00
|
|
|
"pydantic>=1.10.0",
|
2023-07-11 10:39:47 -04:00
|
|
|
*test_api_requirements,
|
|
|
|
pytest_dep,
|
2022-02-10 20:02:23 -08:00
|
|
|
"pytest-asyncio>=0.16.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pytest-cov>=2.8.1",
|
2022-12-27 17:06:16 -05:00
|
|
|
"pytest-docker>=1.0.1",
|
2023-07-11 10:39:47 -04:00
|
|
|
deepdiff_dep,
|
2021-04-21 11:34:24 -07:00
|
|
|
"requests-mock",
|
2021-04-04 19:00:27 +01:00
|
|
|
"freezegun",
|
2021-06-24 17:11:00 -07:00
|
|
|
"jsonpickle",
|
2021-04-05 19:11:28 -07:00
|
|
|
"build",
|
|
|
|
"twine",
|
2021-03-11 16:41:05 -05:00
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
2023-10-03 23:17:49 -04:00
|
|
|
"athena",
|
2021-03-11 16:41:05 -05:00
|
|
|
"bigquery",
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse",
|
|
|
|
"clickhouse-usage",
|
2022-08-10 22:00:31 +00:00
|
|
|
"delta-lake",
|
2022-04-08 20:48:48 +05:30
|
|
|
"druid",
|
2022-01-14 13:10:12 -08:00
|
|
|
"elasticsearch",
|
2022-11-03 14:23:19 -04:00
|
|
|
"feast" if sys.version_info >= (3, 8) else None,
|
2023-08-31 13:01:05 -04:00
|
|
|
"iceberg" if sys.version_info >= (3, 8) else None,
|
2023-09-26 20:51:30 +03:00
|
|
|
"mlflow" if sys.version_info >= (3, 8) else None,
|
2023-02-19 08:43:13 -08:00
|
|
|
"json-schema",
|
2022-04-08 20:48:48 +05:30
|
|
|
"ldap",
|
2021-05-13 21:42:53 +03:00
|
|
|
"looker",
|
2022-08-10 22:00:31 +00:00
|
|
|
"lookml",
|
2021-04-05 19:11:28 -07:00
|
|
|
"glue",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb",
|
2021-08-11 18:49:16 -07:00
|
|
|
"okta",
|
2021-06-11 17:27:34 -07:00
|
|
|
"oracle",
|
2021-07-20 19:31:42 +05:30
|
|
|
"postgres",
|
2021-06-29 19:43:31 -07:00
|
|
|
"sagemaker",
|
2022-05-04 17:07:01 -07:00
|
|
|
"kafka",
|
2021-03-11 16:41:05 -05:00
|
|
|
"datahub-rest",
|
2023-01-18 19:18:56 -08:00
|
|
|
"datahub-lite",
|
2023-10-03 23:17:49 -04:00
|
|
|
"great-expectations",
|
2022-11-18 12:02:48 +01:00
|
|
|
"presto",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash",
|
2021-09-23 16:39:33 -04:00
|
|
|
"redshift",
|
2022-03-29 15:22:57 +05:30
|
|
|
"s3",
|
2022-11-23 11:13:30 +05:30
|
|
|
"snowflake",
|
2022-02-08 14:26:44 -08:00
|
|
|
"tableau",
|
2023-10-13 00:14:45 +02:00
|
|
|
"teradata",
|
2022-01-31 22:52:55 +05:30
|
|
|
"trino",
|
2022-02-02 22:52:50 -08:00
|
|
|
"hive",
|
2022-01-31 22:52:55 +05:30
|
|
|
"starburst-trino-usage",
|
2022-04-08 20:48:48 +05:30
|
|
|
"powerbi",
|
2022-11-02 01:05:42 +02:00
|
|
|
"powerbi-report-server",
|
2022-10-30 05:44:41 +01:00
|
|
|
"salesforce",
|
2023-05-25 03:39:01 +05:30
|
|
|
"unity-catalog",
|
2023-08-01 19:34:35 +05:30
|
|
|
"nifi",
|
2023-08-31 02:38:42 +05:30
|
|
|
"vertica",
|
|
|
|
"mode",
|
2023-11-08 12:32:41 +05:30
|
|
|
"fivetran",
|
2023-09-23 05:42:48 +05:30
|
|
|
"kafka-connect",
|
2021-03-11 16:41:05 -05:00
|
|
|
]
|
2022-11-03 14:23:19 -04:00
|
|
|
if plugin
|
2021-03-11 16:41:05 -05:00
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
2021-04-26 16:44:36 -07:00
|
|
|
dev_requirements = {
|
|
|
|
*base_dev_requirements,
|
|
|
|
}
|
|
|
|
|
2021-07-14 20:02:48 -07:00
|
|
|
full_test_dev_requirements = {
|
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
2022-08-10 22:00:31 +00:00
|
|
|
"athena",
|
2022-07-13 19:17:38 +02:00
|
|
|
"circuit-breaker",
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse",
|
2022-08-10 22:00:31 +00:00
|
|
|
"delta-lake",
|
2021-07-14 20:29:23 -07:00
|
|
|
"druid",
|
2023-10-03 23:17:49 -04:00
|
|
|
"feast" if sys.version_info >= (3, 8) else None,
|
2022-05-27 01:50:15 -07:00
|
|
|
"hana",
|
2021-07-14 20:02:48 -07:00
|
|
|
"hive",
|
2023-08-31 13:01:05 -04:00
|
|
|
"iceberg" if sys.version_info >= (3, 8) else None,
|
2022-07-16 17:38:33 -07:00
|
|
|
"kafka-connect",
|
2021-07-14 20:02:48 -07:00
|
|
|
"ldap",
|
|
|
|
"mongodb",
|
|
|
|
"mssql",
|
|
|
|
"mysql",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash",
|
2023-08-01 19:34:35 +05:30
|
|
|
"vertica",
|
2021-07-14 20:02:48 -07:00
|
|
|
]
|
2023-08-31 13:01:05 -04:00
|
|
|
if plugin
|
2021-07-14 20:02:48 -07:00
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
2021-03-11 16:41:05 -05:00
|
|
|
|
2021-05-13 21:42:53 +03:00
|
|
|
entry_points = {
|
2021-05-17 11:50:38 -07:00
|
|
|
"console_scripts": ["datahub = datahub.entrypoints:main"],
|
2021-05-13 21:42:53 +03:00
|
|
|
"datahub.ingestion.source.plugins": [
|
2022-06-29 16:25:48 +02:00
|
|
|
"csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource",
|
2021-06-24 17:11:00 -07:00
|
|
|
"file = datahub.ingestion.source.file:GenericFileSource",
|
2023-08-15 17:49:20 -04:00
|
|
|
"datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"sqlalchemy = datahub.ingestion.source.sql.sql_generic:SQLAlchemyGenericSource",
|
|
|
|
"athena = datahub.ingestion.source.sql.athena:AthenaSource",
|
2021-09-02 12:24:10 -04:00
|
|
|
"azure-ad = datahub.ingestion.source.identity.azure_ad:AzureADSource",
|
2022-10-26 20:15:44 +02:00
|
|
|
"bigquery = datahub.ingestion.source.bigquery_v2.bigquery:BigqueryV2Source",
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse = datahub.ingestion.source.sql.clickhouse:ClickHouseSource",
|
|
|
|
"clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsageSource",
|
2022-06-28 04:46:33 +05:30
|
|
|
"delta-lake = datahub.ingestion.source.delta_lake:DeltaLakeSource",
|
2022-03-29 15:22:57 +05:30
|
|
|
"s3 = datahub.ingestion.source.s3:S3Source",
|
2022-11-21 14:14:33 -05:00
|
|
|
"dbt = datahub.ingestion.source.dbt.dbt_core:DBTCoreSource",
|
|
|
|
"dbt-cloud = datahub.ingestion.source.dbt.dbt_cloud:DBTCloudSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"druid = datahub.ingestion.source.sql.druid:DruidSource",
|
2023-09-15 13:26:17 -07:00
|
|
|
"dynamodb = datahub.ingestion.source.dynamodb.dynamodb:DynamoDBSource",
|
2022-01-14 13:10:12 -08:00
|
|
|
"elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource",
|
2022-04-26 18:35:02 -03:00
|
|
|
"feast = datahub.ingestion.source.feast:FeastRepositorySource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"glue = datahub.ingestion.source.aws.glue:GlueSource",
|
|
|
|
"sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource",
|
2022-05-26 12:42:50 +02:00
|
|
|
"hana = datahub.ingestion.source.sql.hana:HanaSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"hive = datahub.ingestion.source.sql.hive:HiveSource",
|
2023-02-19 08:43:13 -08:00
|
|
|
"json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"kafka = datahub.ingestion.source.kafka:KafkaSource",
|
2021-05-19 04:45:38 +07:00
|
|
|
"kafka-connect = datahub.ingestion.source.kafka_connect:KafkaConnectSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"ldap = datahub.ingestion.source.ldap:LDAPSource",
|
2022-08-30 12:51:31 +05:30
|
|
|
"looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource",
|
|
|
|
"lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource",
|
2022-02-24 20:02:38 -05:00
|
|
|
"datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource",
|
2021-09-01 15:10:12 -07:00
|
|
|
"datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource",
|
2023-09-26 20:51:30 +03:00
|
|
|
"mlflow = datahub.ingestion.source.mlflow:MLflowSource",
|
2021-12-09 16:10:08 -08:00
|
|
|
"mode = datahub.ingestion.source.mode:ModeSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"mssql = datahub.ingestion.source.sql.mssql:SQLServerSource",
|
|
|
|
"mysql = datahub.ingestion.source.sql.mysql:MySQLSource",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb = datahub.ingestion.source.sql.mariadb.MariaDBSource",
|
2021-08-11 18:49:16 -07:00
|
|
|
"okta = datahub.ingestion.source.identity.okta:OktaSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"oracle = datahub.ingestion.source.sql.oracle:OracleSource",
|
|
|
|
"postgres = datahub.ingestion.source.sql.postgres:PostgresSource",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash = datahub.ingestion.source.redash:RedashSource",
|
2023-04-12 19:15:43 +02:00
|
|
|
"redshift = datahub.ingestion.source.redshift.redshift:RedshiftSource",
|
2022-09-15 22:23:54 +05:30
|
|
|
"snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source",
|
2021-05-13 21:42:53 +03:00
|
|
|
"superset = datahub.ingestion.source.superset:SupersetSource",
|
2022-02-08 14:26:44 -08:00
|
|
|
"tableau = datahub.ingestion.source.tableau:TableauSource",
|
2021-11-10 06:22:15 +01:00
|
|
|
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
|
2021-12-14 02:02:47 -05:00
|
|
|
"metabase = datahub.ingestion.source.metabase:MetabaseSource",
|
2023-10-13 00:14:45 +02:00
|
|
|
"teradata = datahub.ingestion.source.sql.teradata:TeradataSource",
|
2021-10-07 00:27:06 +05:30
|
|
|
"trino = datahub.ingestion.source.sql.trino:TrinoSource",
|
2021-11-18 18:56:24 +01:00
|
|
|
"starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
|
2021-12-09 04:26:31 +05:30
|
|
|
"nifi = datahub.ingestion.source.nifi:NifiSource",
|
2022-03-01 07:07:22 +05:30
|
|
|
"powerbi = datahub.ingestion.source.powerbi:PowerBiDashboardSource",
|
2022-11-02 01:05:42 +02:00
|
|
|
"powerbi-report-server = datahub.ingestion.source.powerbi_report_server:PowerBiReportServerDashboardSource",
|
2022-05-26 08:05:57 -07:00
|
|
|
"iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource",
|
2022-05-26 19:26:28 +09:00
|
|
|
"vertica = datahub.ingestion.source.sql.vertica:VerticaSource",
|
2022-11-18 12:02:48 +01:00
|
|
|
"presto = datahub.ingestion.source.sql.presto:PrestoSource",
|
2022-04-12 08:46:44 +08:00
|
|
|
"presto-on-hive = datahub.ingestion.source.sql.presto_on_hive:PrestoOnHiveSource",
|
2022-04-29 12:27:02 +02:00
|
|
|
"pulsar = datahub.ingestion.source.pulsar:PulsarSource",
|
2022-07-06 22:31:16 +05:30
|
|
|
"salesforce = datahub.ingestion.source.salesforce:SalesforceSource",
|
2022-12-06 16:10:21 -05:00
|
|
|
"demo-data = datahub.ingestion.source.demo_data.DemoDataSource",
|
2022-10-30 05:44:41 +01:00
|
|
|
"unity-catalog = datahub.ingestion.source.unity.source:UnityCatalogSource",
|
2023-04-27 22:33:41 +05:30
|
|
|
"gcs = datahub.ingestion.source.gcs.gcs_source:GCSSource",
|
2023-08-24 10:35:46 -04:00
|
|
|
"sql-queries = datahub.ingestion.source.sql_queries:SqlQueriesSource",
|
2023-11-08 12:32:41 +05:30
|
|
|
"fivetran = datahub.ingestion.source.fivetran.fivetran:FivetranSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
],
|
2022-12-08 05:08:08 +01:00
|
|
|
"datahub.ingestion.transformer.plugins": [
|
|
|
|
"simple_remove_dataset_ownership = datahub.ingestion.transformer.remove_dataset_ownership:SimpleRemoveDatasetOwnership",
|
|
|
|
"mark_dataset_status = datahub.ingestion.transformer.mark_dataset_status:MarkDatasetStatus",
|
|
|
|
"set_dataset_browse_path = datahub.ingestion.transformer.add_dataset_browse_path:AddDatasetBrowsePathTransformer",
|
|
|
|
"add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:AddDatasetOwnership",
|
|
|
|
"simple_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:SimpleAddDatasetOwnership",
|
|
|
|
"pattern_add_dataset_ownership = datahub.ingestion.transformer.add_dataset_ownership:PatternAddDatasetOwnership",
|
|
|
|
"add_dataset_domain = datahub.ingestion.transformer.dataset_domain:AddDatasetDomain",
|
|
|
|
"simple_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:SimpleAddDatasetDomain",
|
|
|
|
"pattern_add_dataset_domain = datahub.ingestion.transformer.dataset_domain:PatternAddDatasetDomain",
|
|
|
|
"add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:AddDatasetTags",
|
|
|
|
"simple_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:SimpleAddDatasetTags",
|
|
|
|
"pattern_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:PatternAddDatasetTags",
|
2023-07-20 08:25:30 +05:30
|
|
|
"extract_dataset_tags = datahub.ingestion.transformer.extract_dataset_tags:ExtractDatasetTags",
|
2022-12-08 05:08:08 +01:00
|
|
|
"add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:AddDatasetTerms",
|
|
|
|
"simple_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:SimpleAddDatasetTerms",
|
|
|
|
"pattern_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:PatternAddDatasetTerms",
|
|
|
|
"add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:AddDatasetProperties",
|
|
|
|
"simple_add_dataset_properties = datahub.ingestion.transformer.add_dataset_properties:SimpleAddDatasetProperties",
|
|
|
|
"pattern_add_dataset_schema_terms = datahub.ingestion.transformer.add_dataset_schema_terms:PatternAddDatasetSchemaTerms",
|
|
|
|
"pattern_add_dataset_schema_tags = datahub.ingestion.transformer.add_dataset_schema_tags:PatternAddDatasetSchemaTags",
|
2023-10-04 14:06:03 +05:30
|
|
|
"extract_owners_from_tags = datahub.ingestion.transformer.extract_ownership_from_tags:ExtractOwnersFromTagsTransformer",
|
2022-12-08 05:08:08 +01:00
|
|
|
],
|
2021-05-13 21:42:53 +03:00
|
|
|
"datahub.ingestion.sink.plugins": [
|
|
|
|
"file = datahub.ingestion.sink.file:FileSink",
|
|
|
|
"console = datahub.ingestion.sink.console:ConsoleSink",
|
2023-02-10 22:12:02 +01:00
|
|
|
"blackhole = datahub.ingestion.sink.blackhole:BlackHoleSink",
|
2021-05-13 21:42:53 +03:00
|
|
|
"datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
|
|
|
|
"datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
|
2023-01-18 19:18:56 -08:00
|
|
|
"datahub-lite = datahub.ingestion.sink.datahub_lite:DataHubLiteSink",
|
2021-05-13 21:42:53 +03:00
|
|
|
],
|
2022-02-02 13:19:15 -08:00
|
|
|
"datahub.ingestion.checkpointing_provider.plugins": [
|
|
|
|
"datahub = datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider:DatahubIngestionCheckpointingProvider",
|
2023-11-11 04:06:00 +05:30
|
|
|
"file = datahub.ingestion.source.state_provider.file_ingestion_checkpointing_provider:FileIngestionCheckpointingProvider",
|
2022-02-02 13:19:15 -08:00
|
|
|
],
|
|
|
|
"datahub.ingestion.reporting_provider.plugins": [
|
2022-08-19 09:08:17 -07:00
|
|
|
"datahub = datahub.ingestion.reporting.datahub_ingestion_run_summary_provider:DatahubIngestionRunSummaryProvider",
|
|
|
|
"file = datahub.ingestion.reporting.file_reporter:FileReporter",
|
2021-12-16 20:06:33 -08:00
|
|
|
],
|
2023-09-22 16:43:58 -07:00
|
|
|
"datahub.custom_packages": [],
|
2021-05-13 21:42:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
setuptools.setup(
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package metadata.
|
|
|
|
name=package_metadata["__package_name__"],
|
|
|
|
version=package_metadata["__version__"],
|
|
|
|
url="https://datahubproject.io/",
|
|
|
|
project_urls={
|
|
|
|
"Documentation": "https://datahubproject.io/docs/",
|
2022-03-18 22:12:19 +01:00
|
|
|
"Source": "https://github.com/datahub-project/datahub",
|
|
|
|
"Changelog": "https://github.com/datahub-project/datahub/releases",
|
2023-10-24 00:09:41 -07:00
|
|
|
"Releases": "https://github.com/acryldata/datahub/releases",
|
2021-04-05 19:11:28 -07:00
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
license="Apache License 2.0",
|
|
|
|
description="A CLI to work with DataHub metadata",
|
2023-10-04 06:53:15 -04:00
|
|
|
long_description="""\
|
|
|
|
The `acryl-datahub` package contains a CLI and SDK for interacting with DataHub,
|
|
|
|
as well as an integration framework for pulling/pushing metadata from external systems.
|
|
|
|
|
|
|
|
See the [DataHub docs](https://datahubproject.io/docs/metadata-ingestion).
|
|
|
|
""",
|
2021-01-31 22:40:30 -08:00
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
classifiers=[
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Programming Language :: Python",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Programming Language :: Python :: 3 :: Only",
|
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Programming Language :: Python :: 3.8",
|
2021-02-07 11:14:05 -08:00
|
|
|
"Programming Language :: Python :: 3.9",
|
2022-08-10 22:00:31 +00:00
|
|
|
"Programming Language :: Python :: 3.10",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
"Intended Audience :: System Administrators",
|
|
|
|
"License :: OSI Approved",
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
"Operating System :: Unix",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Environment :: MacOS X",
|
|
|
|
"Topic :: Software Development",
|
|
|
|
],
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package info.
|
|
|
|
zip_safe=False,
|
2022-08-10 22:00:31 +00:00
|
|
|
python_requires=">=3.7",
|
2021-01-31 22:40:30 -08:00
|
|
|
package_dir={"": "src"},
|
2021-03-02 11:48:26 -08:00
|
|
|
packages=setuptools.find_namespace_packages(where="./src"),
|
|
|
|
package_data={
|
|
|
|
"datahub": ["py.typed"],
|
|
|
|
"datahub.metadata": ["schema.avsc"],
|
2021-06-17 10:04:28 -07:00
|
|
|
"datahub.metadata.schemas": ["*.avsc"],
|
2023-01-03 21:38:11 +05:30
|
|
|
"datahub.ingestion.source.powerbi": ["powerbi-lexical-grammar.rule"],
|
2021-03-02 11:48:26 -08:00
|
|
|
},
|
2021-05-13 21:42:53 +03:00
|
|
|
entry_points=entry_points,
|
2021-04-05 19:11:28 -07:00
|
|
|
# Dependencies.
|
2021-03-11 16:41:05 -05:00
|
|
|
install_requires=list(base_requirements | framework_common),
|
|
|
|
extras_require={
|
|
|
|
"base": list(framework_common),
|
|
|
|
**{
|
|
|
|
plugin: list(framework_common | dependencies)
|
|
|
|
for (plugin, dependencies) in plugins.items()
|
|
|
|
},
|
2021-06-07 14:00:35 -07:00
|
|
|
"all": list(
|
|
|
|
framework_common.union(
|
|
|
|
*[
|
|
|
|
requirements
|
|
|
|
for plugin, requirements in plugins.items()
|
|
|
|
if plugin not in all_exclude_plugins
|
|
|
|
]
|
|
|
|
)
|
|
|
|
),
|
2023-09-22 16:43:58 -07:00
|
|
|
"cloud": ["acryl-datahub-cloud"],
|
2021-03-11 16:41:05 -05:00
|
|
|
"dev": list(dev_requirements),
|
2023-07-11 10:39:47 -04:00
|
|
|
"testing-utils": list(test_api_requirements), # To import `datahub.testing`
|
2021-07-14 20:02:48 -07:00
|
|
|
"integration-tests": list(full_test_dev_requirements),
|
2023-10-12 18:43:14 +01:00
|
|
|
"debug": list(debug_requirements),
|
2021-03-11 16:41:05 -05:00
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
)
|