2021-01-31 22:40:30 -08:00
|
|
|
import os
|
2021-05-13 21:42:53 +03:00
|
|
|
import sys
|
2021-04-05 19:11:28 -07:00
|
|
|
from typing import Dict, Set
|
2021-02-11 23:14:20 -08:00
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
import setuptools
|
|
|
|
|
2021-05-13 21:42:53 +03:00
|
|
|
is_py37_or_newer = sys.version_info >= (3, 7)
|
|
|
|
|
|
|
|
|
2021-04-05 19:11:28 -07:00
|
|
|
package_metadata: dict = {}
|
|
|
|
with open("./src/datahub/__init__.py") as fp:
|
|
|
|
exec(fp.read(), package_metadata)
|
2021-01-31 22:40:30 -08:00
|
|
|
|
|
|
|
|
|
|
|
def get_long_description():
|
|
|
|
root = os.path.dirname(__file__)
|
|
|
|
with open(os.path.join(root, "README.md")) as f:
|
|
|
|
description = f.read()
|
|
|
|
|
|
|
|
return description
|
|
|
|
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
base_requirements = {
|
|
|
|
# Compatability.
|
|
|
|
"dataclasses>=0.6; python_version < '3.7'",
|
2021-12-21 02:17:56 +05:30
|
|
|
"typing_extensions>=3.10.0.2,<4",
|
2021-03-11 16:41:05 -05:00
|
|
|
"mypy_extensions>=0.4.3",
|
|
|
|
# Actual dependencies.
|
2021-04-01 12:15:05 -07:00
|
|
|
"typing-inspect",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pydantic>=1.5.1",
|
2022-02-24 15:35:48 -05:00
|
|
|
"mixpanel>=4.9.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
framework_common = {
|
2021-04-12 17:40:15 -07:00
|
|
|
"click>=6.0.0",
|
2021-07-29 20:04:40 -07:00
|
|
|
"click-default-group",
|
2021-04-12 17:40:15 -07:00
|
|
|
"PyYAML",
|
2021-03-11 16:41:05 -05:00
|
|
|
"toml>=0.10.0",
|
2021-04-22 00:13:47 -07:00
|
|
|
"entrypoints",
|
2021-04-12 17:40:15 -07:00
|
|
|
"docker",
|
2021-03-26 21:57:05 -07:00
|
|
|
"expandvars>=0.6.5",
|
2022-02-23 01:13:50 -05:00
|
|
|
"avro-gen3==0.7.2",
|
2022-02-04 07:32:38 -08:00
|
|
|
"avro>=1.10.2,<1.11",
|
2021-08-21 18:41:28 -07:00
|
|
|
"python-dateutil>=2.8.0",
|
2021-07-08 13:29:34 -07:00
|
|
|
"stackprinter",
|
2021-07-29 20:04:40 -07:00
|
|
|
"tabulate",
|
2021-11-07 22:13:50 -08:00
|
|
|
"progressbar2",
|
2022-02-24 19:40:55 -08:00
|
|
|
"termcolor>=1.0.0",
|
|
|
|
"types-termcolor>=1.0.0",
|
2022-02-08 23:25:12 -08:00
|
|
|
"psutil>=5.8.0",
|
2022-02-18 17:53:28 +01:00
|
|
|
# Markupsafe breaking change broke Jinja and some other libs
|
|
|
|
# Pinning it to a version which works even though we are not using explicitly
|
|
|
|
# https://github.com/aws/aws-sam-cli/issues/3661
|
|
|
|
"markupsafe==2.0.1",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
kafka_common = {
|
|
|
|
# We currently require both Avro libraries. The codegen uses avro-python3 (above)
|
|
|
|
# schema parsers at runtime for generating and reading JSON into Python objects.
|
|
|
|
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
|
|
|
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
|
|
|
# is incompatible with its own dep on avro-python3.
|
|
|
|
"confluent_kafka>=1.5.0",
|
2021-04-12 17:40:15 -07:00
|
|
|
"fastavro>=1.2.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
sql_common = {
|
|
|
|
# Required for all SQL sources.
|
2021-05-24 14:09:55 -07:00
|
|
|
"sqlalchemy==1.3.24",
|
2021-11-24 10:19:03 -08:00
|
|
|
# Required for SQL profiling.
|
2021-11-29 17:14:19 -08:00
|
|
|
"great-expectations>=0.13.40",
|
2021-11-24 10:19:03 -08:00
|
|
|
"greenlet",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2021-06-29 19:43:31 -07:00
|
|
|
aws_common = {
|
|
|
|
# AWS Python SDK
|
2021-11-08 16:23:25 -08:00
|
|
|
"boto3",
|
|
|
|
# Deal with a version incompatibility between botocore (used by boto3) and urllib3.
|
|
|
|
# See https://github.com/boto/botocore/pull/2563.
|
|
|
|
"botocore!=1.23.0",
|
2021-06-29 19:43:31 -07:00
|
|
|
}
|
|
|
|
|
2021-09-16 23:09:45 -07:00
|
|
|
looker_common = {
|
|
|
|
# Looker Python SDK
|
|
|
|
"looker-sdk==21.6.0"
|
|
|
|
}
|
|
|
|
|
2021-10-25 17:50:07 -07:00
|
|
|
bigquery_common = {
|
|
|
|
# Google cloud logging library
|
2022-01-13 22:12:11 -08:00
|
|
|
"google-cloud-logging",
|
|
|
|
"more-itertools>=8.12.0",
|
2021-10-25 17:50:07 -07:00
|
|
|
}
|
|
|
|
|
2022-01-30 13:47:53 -06:00
|
|
|
snowflake_common = {
|
|
|
|
# Snowflake plugin utilizes sql common
|
|
|
|
*sql_common,
|
|
|
|
# Required for all Snowflake sources
|
|
|
|
"snowflake-sqlalchemy<=1.2.4",
|
2022-02-08 23:25:12 -08:00
|
|
|
"cryptography",
|
2022-01-30 13:47:53 -06:00
|
|
|
}
|
|
|
|
|
2022-02-25 02:26:06 -05:00
|
|
|
data_lake_base = {
|
|
|
|
*aws_common,
|
|
|
|
"parse>=1.19.0",
|
|
|
|
"pyarrow>=6.0.1",
|
|
|
|
"tableschema>=1.20.2",
|
|
|
|
"ujson>=4.3.0",
|
|
|
|
"types-ujson>=4.2.1",
|
|
|
|
"smart-open[s3]>=5.2.1",
|
|
|
|
}
|
|
|
|
|
|
|
|
data_lake_profiling = {
|
|
|
|
"pydeequ==1.0.1",
|
|
|
|
"pyspark==3.0.3",
|
|
|
|
}
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
# Note: for all of these, framework_common will be added.
|
|
|
|
plugins: Dict[str, Set[str]] = {
|
2021-04-05 19:11:28 -07:00
|
|
|
# Sink plugins.
|
|
|
|
"datahub-kafka": kafka_common,
|
2021-04-12 17:40:15 -07:00
|
|
|
"datahub-rest": {"requests"},
|
2021-04-05 19:11:28 -07:00
|
|
|
# Integrations.
|
2021-08-11 16:20:01 -04:00
|
|
|
"airflow": {
|
|
|
|
"apache-airflow >= 1.10.2",
|
|
|
|
},
|
2021-03-11 16:41:05 -05:00
|
|
|
# Source plugins
|
2022-02-14 22:51:45 +01:00
|
|
|
# PyAthena is pinned with exact version because we use private method in PyAthena
|
|
|
|
"athena": sql_common | {"PyAthena[SQLAlchemy]==2.4.1"},
|
2021-09-02 12:24:10 -04:00
|
|
|
"azure-ad": set(),
|
2021-10-25 17:50:07 -07:00
|
|
|
"bigquery": sql_common | bigquery_common | {"pybigquery >= 0.6.0"},
|
2022-01-13 22:12:11 -08:00
|
|
|
"bigquery-usage": bigquery_common | {"cachetools"},
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse": sql_common | {"clickhouse-sqlalchemy==0.1.8"},
|
|
|
|
"clickhouse-usage": sql_common | {"clickhouse-sqlalchemy==0.1.8"},
|
2022-02-24 20:02:38 -05:00
|
|
|
"datahub-lineage-file": set(),
|
2021-09-01 15:10:12 -07:00
|
|
|
"datahub-business-glossary": set(),
|
2022-02-25 02:26:06 -05:00
|
|
|
"data-lake": {*data_lake_base, *data_lake_profiling},
|
2021-12-15 18:11:39 +01:00
|
|
|
"dbt": {"requests"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"druid": sql_common | {"pydruid>=0.6.2"},
|
2022-02-19 11:44:32 -08:00
|
|
|
# Starting with 7.14.0 python client is checking if it is connected to elasticsearch client. If its not it throws
|
|
|
|
# UnsupportedProductError
|
|
|
|
# https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/release-notes.html#rn-7-14-0
|
|
|
|
# https://github.com/elastic/elasticsearch-py/issues/1639#issuecomment-883587433
|
|
|
|
"elasticsearch": {"elasticsearch==7.13.4"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"feast": {"docker"},
|
2021-06-29 19:43:31 -07:00
|
|
|
"glue": aws_common,
|
2021-05-03 22:11:50 -07:00
|
|
|
"hive": sql_common
|
|
|
|
| {
|
2021-05-13 20:02:47 -07:00
|
|
|
# Acryl Data maintains a fork of PyHive, which adds support for table comments
|
2021-05-03 22:11:50 -07:00
|
|
|
# and column comments, and also releases HTTP and HTTPS transport schemes.
|
2021-08-23 20:18:11 -07:00
|
|
|
"acryl-pyhive[hive]>=0.6.11"
|
2021-05-03 22:11:50 -07:00
|
|
|
},
|
2021-08-11 18:49:16 -07:00
|
|
|
"kafka": kafka_common,
|
2021-11-08 16:23:25 -08:00
|
|
|
"kafka-connect": sql_common | {"requests", "JPype1"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"ldap": {"python-ldap>=2.4"},
|
2021-09-16 23:09:45 -07:00
|
|
|
"looker": looker_common,
|
2021-12-22 05:13:13 +09:00
|
|
|
# lkml>=1.1.2 is required to support the sql_preamble expression in LookML
|
2022-01-13 22:12:11 -08:00
|
|
|
"lookml": looker_common
|
|
|
|
| {"lkml>=1.1.2", "sql-metadata==2.2.2", "sqllineage==1.3.3"},
|
2022-01-05 21:22:59 +01:00
|
|
|
"metabase": {"requests", "sqllineage==1.3.3"},
|
2022-02-09 23:12:41 -08:00
|
|
|
"mode": {"requests", "sqllineage==1.3.3", "tenacity>=8.0.1"},
|
|
|
|
"mongodb": {"pymongo>=3.11", "packaging"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
|
2021-06-04 18:19:11 -07:00
|
|
|
"mssql-odbc": sql_common | {"pyodbc"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"mysql": sql_common | {"pymysql>=1.0.2"},
|
2021-10-13 11:57:47 +05:30
|
|
|
# mariadb should have same dependency as mysql
|
|
|
|
"mariadb": sql_common | {"pymysql>=1.0.2"},
|
2021-08-11 18:49:16 -07:00
|
|
|
"okta": {"okta~=1.7.0"},
|
2021-06-09 15:07:04 -07:00
|
|
|
"oracle": sql_common | {"cx_Oracle"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
|
2022-01-05 21:22:59 +01:00
|
|
|
"redash": {"redash-toolbelt", "sql-metadata", "sqllineage==1.3.3"},
|
2021-12-16 11:48:15 -05:00
|
|
|
"redshift": sql_common
|
2022-01-05 21:22:59 +01:00
|
|
|
| {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage==1.3.3"},
|
2021-09-23 16:39:33 -04:00
|
|
|
"redshift-usage": sql_common
|
2022-02-09 23:12:41 -08:00
|
|
|
| {"sqlalchemy-redshift", "psycopg2-binary", "GeoAlchemy2", "sqllineage==1.3.3"},
|
2021-06-29 19:43:31 -07:00
|
|
|
"sagemaker": aws_common,
|
2022-01-30 13:47:53 -06:00
|
|
|
"snowflake": snowflake_common,
|
|
|
|
"snowflake-usage": snowflake_common | {"more-itertools>=8.12.0"},
|
2021-08-11 18:49:16 -07:00
|
|
|
"sqlalchemy": sql_common,
|
2022-02-21 09:07:44 -08:00
|
|
|
"superset": {"requests", "sqlalchemy", "great_expectations", "greenlet"},
|
2022-02-08 14:26:44 -08:00
|
|
|
"tableau": {"tableauserverclient>=0.17.0"},
|
2022-01-31 22:52:55 +05:30
|
|
|
"trino": sql_common | {"trino"},
|
|
|
|
"starburst-trino-usage": sql_common | {"trino"},
|
2022-02-09 23:12:41 -08:00
|
|
|
"nifi": {"requests", "packaging"},
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
2021-06-07 14:00:35 -07:00
|
|
|
all_exclude_plugins: Set[str] = {
|
|
|
|
# SQL Server ODBC requires additional drivers, and so we don't want to keep
|
|
|
|
# it included in the default "all" installation.
|
|
|
|
"mssql-odbc",
|
|
|
|
}
|
|
|
|
|
2021-06-08 16:10:16 -07:00
|
|
|
mypy_stubs = {
|
|
|
|
"types-dataclasses",
|
|
|
|
"sqlalchemy-stubs",
|
|
|
|
"types-pkg_resources",
|
|
|
|
"types-six",
|
|
|
|
"types-python-dateutil",
|
|
|
|
"types-requests",
|
|
|
|
"types-toml",
|
|
|
|
"types-PyMySQL",
|
|
|
|
"types-PyYAML",
|
|
|
|
"types-freezegun",
|
2021-06-24 17:11:00 -07:00
|
|
|
"types-cachetools",
|
2021-06-08 16:10:16 -07:00
|
|
|
# versions 0.1.13 and 0.1.14 seem to have issues
|
|
|
|
"types-click==0.1.12",
|
2021-07-28 20:35:35 -07:00
|
|
|
"boto3-stubs[s3,glue,sagemaker]",
|
2021-07-29 20:04:40 -07:00
|
|
|
"types-tabulate",
|
2021-06-08 16:10:16 -07:00
|
|
|
}
|
|
|
|
|
2021-04-26 16:44:36 -07:00
|
|
|
base_dev_requirements = {
|
2021-03-11 16:41:05 -05:00
|
|
|
*base_requirements,
|
|
|
|
*framework_common,
|
2021-06-08 16:10:16 -07:00
|
|
|
*mypy_stubs,
|
2022-02-25 02:26:06 -05:00
|
|
|
*data_lake_base,
|
2022-01-31 06:50:46 +05:30
|
|
|
"black>=21.12b0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"coverage>=5.1",
|
|
|
|
"flake8>=3.8.3",
|
2021-08-06 13:25:30 -04:00
|
|
|
"flake8-tidy-imports>=4.3.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"isort>=5.7.0",
|
2022-02-17 03:31:20 -05:00
|
|
|
"mypy>=0.920",
|
|
|
|
# pydantic 1.8.2 is incompatible with mypy 0.910.
|
|
|
|
# See https://github.com/samuelcolvin/pydantic/pull/3175#issuecomment-995382910.
|
|
|
|
"pydantic>=1.9.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pytest>=6.2.2",
|
2022-02-10 20:02:23 -08:00
|
|
|
"pytest-asyncio>=0.16.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pytest-cov>=2.8.1",
|
2021-06-16 16:59:28 -07:00
|
|
|
"pytest-docker>=0.10.3",
|
2021-04-26 16:44:36 -07:00
|
|
|
"tox",
|
2021-03-11 16:41:05 -05:00
|
|
|
"deepdiff",
|
2021-04-21 11:34:24 -07:00
|
|
|
"requests-mock",
|
2021-04-04 19:00:27 +01:00
|
|
|
"freezegun",
|
2021-06-24 17:11:00 -07:00
|
|
|
"jsonpickle",
|
2021-04-05 19:11:28 -07:00
|
|
|
"build",
|
|
|
|
"twine",
|
2022-02-19 21:13:01 -05:00
|
|
|
"packaging",
|
2021-03-11 16:41:05 -05:00
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
|
|
|
"bigquery",
|
2021-06-24 17:11:00 -07:00
|
|
|
"bigquery-usage",
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse",
|
|
|
|
"clickhouse-usage",
|
2022-01-14 13:10:12 -08:00
|
|
|
"elasticsearch",
|
2021-05-13 21:42:53 +03:00
|
|
|
"looker",
|
2021-04-05 19:11:28 -07:00
|
|
|
"glue",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb",
|
2021-08-11 18:49:16 -07:00
|
|
|
"okta",
|
2021-06-11 17:27:34 -07:00
|
|
|
"oracle",
|
2021-07-20 19:31:42 +05:30
|
|
|
"postgres",
|
2021-06-29 19:43:31 -07:00
|
|
|
"sagemaker",
|
2021-03-11 16:41:05 -05:00
|
|
|
"datahub-kafka",
|
|
|
|
"datahub-rest",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash",
|
2021-09-23 16:39:33 -04:00
|
|
|
"redshift",
|
2022-01-17 15:01:23 -08:00
|
|
|
"redshift-usage",
|
2022-01-31 22:52:55 +05:30
|
|
|
"data-lake",
|
2022-02-08 14:26:44 -08:00
|
|
|
"tableau",
|
2022-01-31 22:52:55 +05:30
|
|
|
"trino",
|
2022-02-02 22:52:50 -08:00
|
|
|
"hive",
|
2022-01-31 22:52:55 +05:30
|
|
|
"starburst-trino-usage",
|
2021-04-26 16:44:36 -07:00
|
|
|
# airflow is added below
|
2021-03-11 16:41:05 -05:00
|
|
|
]
|
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
2021-05-13 21:42:53 +03:00
|
|
|
if is_py37_or_newer:
|
2021-05-26 17:16:17 -07:00
|
|
|
# The lookml plugin only works on Python 3.7 or newer.
|
2021-05-13 21:42:53 +03:00
|
|
|
base_dev_requirements = base_dev_requirements.union(
|
2022-01-31 22:52:55 +05:30
|
|
|
{dependency for plugin in ["lookml"] for dependency in plugins[plugin]}
|
2021-05-13 21:42:53 +03:00
|
|
|
)
|
|
|
|
|
2021-04-26 16:44:36 -07:00
|
|
|
dev_requirements = {
|
|
|
|
*base_dev_requirements,
|
2021-07-17 20:00:50 -07:00
|
|
|
"apache-airflow[snowflake]>=2.0.2", # snowflake is used in example dags
|
2021-08-11 16:20:01 -04:00
|
|
|
"snowflake-sqlalchemy<=1.2.4", # make constraint consistent with extras
|
2021-04-26 16:44:36 -07:00
|
|
|
}
|
2022-02-17 03:33:28 -05:00
|
|
|
dev_requirements_airflow_1_base = {
|
2021-07-17 20:00:50 -07:00
|
|
|
"apache-airflow==1.10.15",
|
|
|
|
"apache-airflow-backport-providers-snowflake",
|
2021-08-11 16:20:01 -04:00
|
|
|
"snowflake-sqlalchemy<=1.2.4", # make constraint consistent with extras
|
2021-11-08 16:23:25 -08:00
|
|
|
"WTForms==2.3.3", # make constraint consistent with extras
|
2021-04-26 16:44:36 -07:00
|
|
|
}
|
2022-02-17 03:33:28 -05:00
|
|
|
dev_requirements_airflow_1 = {
|
|
|
|
*base_dev_requirements,
|
|
|
|
*dev_requirements_airflow_1_base,
|
|
|
|
}
|
2021-04-26 16:44:36 -07:00
|
|
|
|
2021-07-14 20:02:48 -07:00
|
|
|
full_test_dev_requirements = {
|
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
2022-02-17 03:33:28 -05:00
|
|
|
# Only include Athena for Python 3.7 or newer.
|
|
|
|
*(["athena"] if is_py37_or_newer else []),
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse",
|
2021-07-14 20:29:23 -07:00
|
|
|
"druid",
|
2021-07-14 20:02:48 -07:00
|
|
|
"feast",
|
|
|
|
"hive",
|
|
|
|
"ldap",
|
|
|
|
"mongodb",
|
|
|
|
"mssql",
|
|
|
|
"mysql",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb",
|
2021-07-20 19:28:40 -07:00
|
|
|
"snowflake",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash",
|
2021-11-08 16:23:25 -08:00
|
|
|
"kafka-connect",
|
2021-07-14 20:02:48 -07:00
|
|
|
]
|
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
2021-03-11 16:41:05 -05:00
|
|
|
|
2021-05-13 21:42:53 +03:00
|
|
|
entry_points = {
|
2021-05-17 11:50:38 -07:00
|
|
|
"console_scripts": ["datahub = datahub.entrypoints:main"],
|
2021-05-13 21:42:53 +03:00
|
|
|
"datahub.ingestion.source.plugins": [
|
2021-06-24 17:11:00 -07:00
|
|
|
"file = datahub.ingestion.source.file:GenericFileSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"sqlalchemy = datahub.ingestion.source.sql.sql_generic:SQLAlchemyGenericSource",
|
|
|
|
"athena = datahub.ingestion.source.sql.athena:AthenaSource",
|
2021-09-02 12:24:10 -04:00
|
|
|
"azure-ad = datahub.ingestion.source.identity.azure_ad:AzureADSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"bigquery = datahub.ingestion.source.sql.bigquery:BigQuerySource",
|
|
|
|
"bigquery-usage = datahub.ingestion.source.usage.bigquery_usage:BigQueryUsageSource",
|
2022-02-21 17:36:08 +02:00
|
|
|
"clickhouse = datahub.ingestion.source.sql.clickhouse:ClickHouseSource",
|
|
|
|
"clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsageSource",
|
2022-01-17 15:01:23 -08:00
|
|
|
"data-lake = datahub.ingestion.source.data_lake:DataLakeSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"druid = datahub.ingestion.source.sql.druid:DruidSource",
|
2022-01-14 13:10:12 -08:00
|
|
|
"elasticsearch = datahub.ingestion.source.elastic_search:ElasticsearchSource",
|
2021-06-09 15:07:04 -07:00
|
|
|
"feast = datahub.ingestion.source.feast:FeastSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"glue = datahub.ingestion.source.aws.glue:GlueSource",
|
|
|
|
"sagemaker = datahub.ingestion.source.aws.sagemaker:SagemakerSource",
|
|
|
|
"hive = datahub.ingestion.source.sql.hive:HiveSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"kafka = datahub.ingestion.source.kafka:KafkaSource",
|
2021-05-19 04:45:38 +07:00
|
|
|
"kafka-connect = datahub.ingestion.source.kafka_connect:KafkaConnectSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"ldap = datahub.ingestion.source.ldap:LDAPSource",
|
|
|
|
"looker = datahub.ingestion.source.looker:LookerDashboardSource",
|
2021-05-26 17:16:17 -07:00
|
|
|
"lookml = datahub.ingestion.source.lookml:LookMLSource",
|
2022-02-24 20:02:38 -05:00
|
|
|
"datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource",
|
2021-09-01 15:10:12 -07:00
|
|
|
"datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource",
|
2021-12-09 16:10:08 -08:00
|
|
|
"mode = datahub.ingestion.source.mode:ModeSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"mssql = datahub.ingestion.source.sql.mssql:SQLServerSource",
|
|
|
|
"mysql = datahub.ingestion.source.sql.mysql:MySQLSource",
|
2021-10-13 11:57:47 +05:30
|
|
|
"mariadb = datahub.ingestion.source.sql.mariadb.MariaDBSource",
|
2021-08-11 18:49:16 -07:00
|
|
|
"okta = datahub.ingestion.source.identity.okta:OktaSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"oracle = datahub.ingestion.source.sql.oracle:OracleSource",
|
|
|
|
"postgres = datahub.ingestion.source.sql.postgres:PostgresSource",
|
2021-08-19 02:03:03 +07:00
|
|
|
"redash = datahub.ingestion.source.redash:RedashSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"redshift = datahub.ingestion.source.sql.redshift:RedshiftSource",
|
2021-09-23 16:39:33 -04:00
|
|
|
"redshift-usage = datahub.ingestion.source.usage.redshift_usage:RedshiftUsageSource",
|
2021-07-26 13:06:52 -07:00
|
|
|
"snowflake = datahub.ingestion.source.sql.snowflake:SnowflakeSource",
|
|
|
|
"snowflake-usage = datahub.ingestion.source.usage.snowflake_usage:SnowflakeUsageSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
"superset = datahub.ingestion.source.superset:SupersetSource",
|
2022-02-08 14:26:44 -08:00
|
|
|
"tableau = datahub.ingestion.source.tableau:TableauSource",
|
2021-11-10 06:22:15 +01:00
|
|
|
"openapi = datahub.ingestion.source.openapi:OpenApiSource",
|
2021-12-14 02:02:47 -05:00
|
|
|
"metabase = datahub.ingestion.source.metabase:MetabaseSource",
|
2021-10-07 00:27:06 +05:30
|
|
|
"trino = datahub.ingestion.source.sql.trino:TrinoSource",
|
2021-11-18 18:56:24 +01:00
|
|
|
"starburst-trino-usage = datahub.ingestion.source.usage.starburst_trino_usage:TrinoUsageSource",
|
2021-12-09 04:26:31 +05:30
|
|
|
"nifi = datahub.ingestion.source.nifi:NifiSource",
|
2021-05-13 21:42:53 +03:00
|
|
|
],
|
|
|
|
"datahub.ingestion.sink.plugins": [
|
|
|
|
"file = datahub.ingestion.sink.file:FileSink",
|
|
|
|
"console = datahub.ingestion.sink.console:ConsoleSink",
|
|
|
|
"datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
|
|
|
|
"datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
|
|
|
|
],
|
2022-02-02 13:19:15 -08:00
|
|
|
"datahub.ingestion.checkpointing_provider.plugins": [
|
|
|
|
"datahub = datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider:DatahubIngestionCheckpointingProvider",
|
|
|
|
],
|
|
|
|
"datahub.ingestion.reporting_provider.plugins": [
|
|
|
|
"datahub = datahub.ingestion.reporting.datahub_ingestion_reporting_provider:DatahubIngestionReportingProvider",
|
2021-12-16 20:06:33 -08:00
|
|
|
],
|
2021-05-13 20:02:47 -07:00
|
|
|
"apache_airflow_provider": ["provider_info=datahub_provider:get_provider_info"],
|
2021-05-13 21:42:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
setuptools.setup(
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package metadata.
|
|
|
|
name=package_metadata["__package_name__"],
|
|
|
|
version=package_metadata["__version__"],
|
|
|
|
url="https://datahubproject.io/",
|
|
|
|
project_urls={
|
|
|
|
"Documentation": "https://datahubproject.io/docs/",
|
|
|
|
"Source": "https://github.com/linkedin/datahub",
|
|
|
|
"Changelog": "https://github.com/linkedin/datahub/releases",
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
license="Apache License 2.0",
|
|
|
|
description="A CLI to work with DataHub metadata",
|
|
|
|
long_description=get_long_description(),
|
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
classifiers=[
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Programming Language :: Python",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Programming Language :: Python :: 3 :: Only",
|
2021-02-12 10:52:08 -08:00
|
|
|
"Programming Language :: Python :: 3.6",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Programming Language :: Python :: 3.8",
|
2021-02-07 11:14:05 -08:00
|
|
|
"Programming Language :: Python :: 3.9",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
"Intended Audience :: System Administrators",
|
|
|
|
"License :: OSI Approved",
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
"Operating System :: Unix",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Environment :: MacOS X",
|
|
|
|
"Topic :: Software Development",
|
|
|
|
],
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package info.
|
|
|
|
zip_safe=False,
|
2022-01-26 04:11:53 +01:00
|
|
|
# restrict python to <=3.9.9 due to https://github.com/looker-open-source/sdk-codegen/issues/944
|
|
|
|
python_requires=">=3.6, <=3.9.9",
|
2021-01-31 22:40:30 -08:00
|
|
|
package_dir={"": "src"},
|
2021-03-02 11:48:26 -08:00
|
|
|
packages=setuptools.find_namespace_packages(where="./src"),
|
|
|
|
package_data={
|
|
|
|
"datahub": ["py.typed"],
|
|
|
|
"datahub.metadata": ["schema.avsc"],
|
2021-06-17 10:04:28 -07:00
|
|
|
"datahub.metadata.schemas": ["*.avsc"],
|
2021-07-26 13:25:58 -07:00
|
|
|
"datahub.ingestion.source.feast_image": ["Dockerfile", "requirements.txt"],
|
2021-03-02 11:48:26 -08:00
|
|
|
},
|
2021-05-13 21:42:53 +03:00
|
|
|
entry_points=entry_points,
|
2021-04-05 19:11:28 -07:00
|
|
|
# Dependencies.
|
2021-03-11 16:41:05 -05:00
|
|
|
install_requires=list(base_requirements | framework_common),
|
|
|
|
extras_require={
|
|
|
|
"base": list(framework_common),
|
|
|
|
**{
|
|
|
|
plugin: list(framework_common | dependencies)
|
|
|
|
for (plugin, dependencies) in plugins.items()
|
|
|
|
},
|
2021-06-07 14:00:35 -07:00
|
|
|
"all": list(
|
|
|
|
framework_common.union(
|
|
|
|
*[
|
|
|
|
requirements
|
|
|
|
for plugin, requirements in plugins.items()
|
|
|
|
if plugin not in all_exclude_plugins
|
|
|
|
]
|
|
|
|
)
|
|
|
|
),
|
2021-03-11 16:41:05 -05:00
|
|
|
"dev": list(dev_requirements),
|
2022-02-17 03:33:28 -05:00
|
|
|
"dev-airflow1-base": list(dev_requirements_airflow_1_base),
|
2021-07-17 20:00:50 -07:00
|
|
|
"dev-airflow1": list(dev_requirements_airflow_1),
|
2021-07-14 20:02:48 -07:00
|
|
|
"integration-tests": list(full_test_dev_requirements),
|
2021-03-11 16:41:05 -05:00
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
)
|