2021-01-31 22:40:30 -08:00
|
|
|
import os
|
2021-04-05 19:11:28 -07:00
|
|
|
from typing import Dict, Set
|
2021-02-11 23:14:20 -08:00
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
import setuptools
|
|
|
|
|
2021-04-05 19:11:28 -07:00
|
|
|
package_metadata: dict = {}
|
|
|
|
with open("./src/datahub/__init__.py") as fp:
|
|
|
|
exec(fp.read(), package_metadata)
|
2021-01-31 22:40:30 -08:00
|
|
|
|
|
|
|
|
|
|
|
def get_long_description():
|
|
|
|
root = os.path.dirname(__file__)
|
|
|
|
with open(os.path.join(root, "README.md")) as f:
|
|
|
|
description = f.read()
|
|
|
|
|
|
|
|
return description
|
|
|
|
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
base_requirements = {
|
|
|
|
# Compatability.
|
|
|
|
"dataclasses>=0.6; python_version < '3.7'",
|
|
|
|
"typing_extensions>=3.7.4; python_version < '3.8'",
|
|
|
|
"mypy_extensions>=0.4.3",
|
|
|
|
# Actual dependencies.
|
2021-04-01 12:15:05 -07:00
|
|
|
"typing-inspect",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pydantic>=1.5.1",
|
|
|
|
}
|
|
|
|
|
|
|
|
framework_common = {
|
2021-04-12 17:40:15 -07:00
|
|
|
"click>=6.0.0",
|
|
|
|
"PyYAML",
|
2021-03-11 16:41:05 -05:00
|
|
|
"toml>=0.10.0",
|
2021-04-12 17:40:15 -07:00
|
|
|
"docker",
|
2021-03-26 21:57:05 -07:00
|
|
|
"expandvars>=0.6.5",
|
2021-04-16 11:59:05 -07:00
|
|
|
"avro-gen3==0.4.1",
|
2021-03-11 16:41:05 -05:00
|
|
|
"avro-python3>=1.8.2",
|
2021-04-12 17:40:15 -07:00
|
|
|
"python-dateutil",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
kafka_common = {
|
|
|
|
# We currently require both Avro libraries. The codegen uses avro-python3 (above)
|
|
|
|
# schema parsers at runtime for generating and reading JSON into Python objects.
|
|
|
|
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
|
|
|
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
|
|
|
# is incompatible with its own dep on avro-python3.
|
|
|
|
"confluent_kafka>=1.5.0",
|
2021-04-12 17:40:15 -07:00
|
|
|
"fastavro>=1.2.0",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
sql_common = {
|
|
|
|
# Required for all SQL sources.
|
2021-04-12 17:40:15 -07:00
|
|
|
"sqlalchemy>=1.3.24",
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
# Note: for all of these, framework_common will be added.
|
|
|
|
plugins: Dict[str, Set[str]] = {
|
2021-04-05 19:11:28 -07:00
|
|
|
# Sink plugins.
|
|
|
|
"datahub-kafka": kafka_common,
|
2021-04-12 17:40:15 -07:00
|
|
|
"datahub-rest": {"requests"},
|
2021-04-05 19:11:28 -07:00
|
|
|
# Integrations.
|
2021-04-12 17:40:15 -07:00
|
|
|
"airflow": {"apache-airflow >= 1.10.2"},
|
2021-03-11 16:41:05 -05:00
|
|
|
# Source plugins
|
|
|
|
"kafka": kafka_common,
|
2021-04-16 09:41:23 -07:00
|
|
|
"sqlalchemy": sql_common,
|
2021-03-11 16:41:05 -05:00
|
|
|
"athena": sql_common | {"PyAthena[SQLAlchemy]"},
|
2021-04-06 18:34:06 -07:00
|
|
|
"bigquery": sql_common | {"pybigquery >= 0.6.0"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"hive": sql_common | {"pyhive[hive]"},
|
|
|
|
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
|
|
|
|
"mysql": sql_common | {"pymysql>=1.0.2"},
|
|
|
|
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
|
|
|
|
"snowflake": sql_common | {"snowflake-sqlalchemy"},
|
2021-04-06 15:38:25 -07:00
|
|
|
"oracle": sql_common | {"cx_Oracle"},
|
2021-03-11 16:41:05 -05:00
|
|
|
"ldap": {"python-ldap>=2.4"},
|
2021-03-18 03:06:48 +00:00
|
|
|
"druid": sql_common | {"pydruid>=0.6.2"},
|
2021-03-23 20:15:44 -07:00
|
|
|
"mongodb": {"pymongo>=3.11"},
|
2021-04-05 17:14:02 -07:00
|
|
|
"glue": {"boto3"},
|
2021-03-11 16:41:05 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
dev_requirements = {
|
|
|
|
*base_requirements,
|
|
|
|
*framework_common,
|
|
|
|
"black>=19.10b0",
|
|
|
|
"coverage>=5.1",
|
|
|
|
"flake8>=3.8.3",
|
|
|
|
"isort>=5.7.0",
|
|
|
|
"mypy>=0.782",
|
|
|
|
"pytest>=6.2.2",
|
|
|
|
"pytest-cov>=2.8.1",
|
|
|
|
"pytest-docker",
|
|
|
|
"sqlalchemy-stubs",
|
|
|
|
"deepdiff",
|
2021-04-04 19:00:27 +01:00
|
|
|
"freezegun",
|
2021-04-05 19:11:28 -07:00
|
|
|
"build",
|
|
|
|
"twine",
|
2021-03-11 16:41:05 -05:00
|
|
|
# Also add the plugins which are used for tests.
|
2021-04-20 20:44:38 -07:00
|
|
|
"apache-airflow==1.10.15",
|
2021-04-12 17:40:15 -07:00
|
|
|
"apache-airflow-backport-providers-snowflake", # Used in the example DAGs.
|
2021-04-20 20:44:38 -07:00
|
|
|
# "apache-airflow>=2.0.2",
|
|
|
|
# "apache-airflow-providers-snowflake",
|
2021-03-11 16:41:05 -05:00
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
|
|
|
"bigquery",
|
|
|
|
"mysql",
|
|
|
|
"mssql",
|
2021-03-23 20:15:44 -07:00
|
|
|
"mongodb",
|
2021-03-11 16:41:05 -05:00
|
|
|
"ldap",
|
2021-04-05 19:11:28 -07:00
|
|
|
"glue",
|
2021-03-11 16:41:05 -05:00
|
|
|
"datahub-kafka",
|
|
|
|
"datahub-rest",
|
2021-04-05 19:11:28 -07:00
|
|
|
"airflow",
|
2021-03-11 16:41:05 -05:00
|
|
|
]
|
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
setuptools.setup(
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package metadata.
|
|
|
|
name=package_metadata["__package_name__"],
|
|
|
|
version=package_metadata["__version__"],
|
|
|
|
url="https://datahubproject.io/",
|
|
|
|
project_urls={
|
|
|
|
"Documentation": "https://datahubproject.io/docs/",
|
|
|
|
"Source": "https://github.com/linkedin/datahub",
|
|
|
|
"Changelog": "https://github.com/linkedin/datahub/releases",
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
author="DataHub Committers",
|
|
|
|
license="Apache License 2.0",
|
|
|
|
description="A CLI to work with DataHub metadata",
|
|
|
|
long_description=get_long_description(),
|
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
classifiers=[
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Programming Language :: Python",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Programming Language :: Python :: 3 :: Only",
|
2021-02-12 10:52:08 -08:00
|
|
|
"Programming Language :: Python :: 3.6",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Programming Language :: Python :: 3.8",
|
2021-02-07 11:14:05 -08:00
|
|
|
"Programming Language :: Python :: 3.9",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
"Intended Audience :: System Administrators",
|
|
|
|
"License :: OSI Approved",
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
"Operating System :: Unix",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Environment :: MacOS X",
|
|
|
|
"Topic :: Software Development",
|
|
|
|
],
|
2021-04-05 19:11:28 -07:00
|
|
|
# Package info.
|
|
|
|
zip_safe=False,
|
2021-02-11 16:23:03 -08:00
|
|
|
python_requires=">=3.6",
|
2021-01-31 22:40:30 -08:00
|
|
|
package_dir={"": "src"},
|
2021-03-02 11:48:26 -08:00
|
|
|
packages=setuptools.find_namespace_packages(where="./src"),
|
2021-01-31 22:40:30 -08:00
|
|
|
include_package_data=True,
|
2021-03-02 11:48:26 -08:00
|
|
|
package_data={
|
|
|
|
"datahub": ["py.typed"],
|
|
|
|
"datahub.metadata": ["schema.avsc"],
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
entry_points={
|
2021-02-15 15:04:21 -08:00
|
|
|
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
2021-03-31 23:59:45 -04:00
|
|
|
"datahub.ingestion.source.plugins": [
|
|
|
|
"file = datahub.ingestion.source.mce_file:MetadataFileSource",
|
2021-04-13 08:01:38 -07:00
|
|
|
"sqlalchemy = datahub.ingestion.source.sql_generic:SQLAlchemyGenericSource",
|
2021-03-31 23:59:45 -04:00
|
|
|
"athena = datahub.ingestion.source.athena:AthenaSource",
|
|
|
|
"bigquery = datahub.ingestion.source.bigquery:BigQuerySource",
|
|
|
|
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
|
|
|
"druid = datahub.ingestion.source.druid:DruidSource",
|
2021-04-04 19:00:27 +01:00
|
|
|
"glue = datahub.ingestion.source.glue:GlueSource",
|
2021-03-31 23:59:45 -04:00
|
|
|
"hive = datahub.ingestion.source.hive:HiveSource",
|
|
|
|
"kafka = datahub.ingestion.source.kafka:KafkaSource",
|
|
|
|
"ldap = datahub.ingestion.source.ldap:LDAPSource",
|
|
|
|
"mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
|
|
|
|
"mssql = datahub.ingestion.source.mssql:SQLServerSource",
|
|
|
|
"mysql = datahub.ingestion.source.mysql:MySQLSource",
|
2021-04-06 15:38:25 -07:00
|
|
|
"oracle = datahub.ingestion.source.oracle:OracleSource",
|
2021-03-31 23:59:45 -04:00
|
|
|
"postgres = datahub.ingestion.source.postgres:PostgresSource",
|
|
|
|
"snowflake = datahub.ingestion.source.snowflake:SnowflakeSource",
|
|
|
|
],
|
|
|
|
"datahub.ingestion.sink.plugins": [
|
|
|
|
"file = datahub.ingestion.sink.file:FileSink",
|
|
|
|
"console = datahub.ingestion.sink.console:ConsoleSink",
|
|
|
|
"datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
|
|
|
|
"datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
|
|
|
|
],
|
2021-04-05 19:11:28 -07:00
|
|
|
"apache_airflow_provider": [
|
|
|
|
"provider_info=datahub.integrations.airflow.get_provider_info:get_provider_info"
|
|
|
|
],
|
2021-04-12 17:40:15 -07:00
|
|
|
"airflow.plugins": [
|
|
|
|
"datahub = datahub.integrations.airflow.get_provider_info:DatahubAirflowPlugin"
|
|
|
|
],
|
2021-01-31 22:40:30 -08:00
|
|
|
},
|
2021-04-05 19:11:28 -07:00
|
|
|
# Dependencies.
|
2021-03-11 16:41:05 -05:00
|
|
|
install_requires=list(base_requirements | framework_common),
|
|
|
|
extras_require={
|
|
|
|
"base": list(framework_common),
|
|
|
|
**{
|
|
|
|
plugin: list(framework_common | dependencies)
|
|
|
|
for (plugin, dependencies) in plugins.items()
|
|
|
|
},
|
|
|
|
"all": list(framework_common.union(*plugins.values())),
|
|
|
|
"dev": list(dev_requirements),
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
)
|