2021-03-11 16:41:05 -05:00
|
|
|
from typing import Dict, Set
|
2021-01-31 22:40:30 -08:00
|
|
|
import os
|
2021-02-11 23:14:20 -08:00
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
import setuptools
|
|
|
|
|
|
|
|
|
|
|
|
def get_version():
|
|
|
|
root = os.path.dirname(__file__)
|
|
|
|
changelog = os.path.join(root, "CHANGELOG")
|
|
|
|
with open(changelog) as f:
|
|
|
|
return f.readline().strip()
|
|
|
|
|
|
|
|
|
|
|
|
def get_long_description():
|
|
|
|
root = os.path.dirname(__file__)
|
|
|
|
with open(os.path.join(root, "README.md")) as f:
|
|
|
|
description = f.read()
|
|
|
|
|
|
|
|
description += "\n\nChangelog\n=========\n\n"
|
|
|
|
|
|
|
|
with open(os.path.join(root, "CHANGELOG")) as f:
|
|
|
|
description += f.read()
|
|
|
|
|
|
|
|
return description
|
|
|
|
|
|
|
|
|
2021-03-11 16:41:05 -05:00
|
|
|
base_requirements = {
|
|
|
|
# Compatability.
|
|
|
|
"dataclasses>=0.6; python_version < '3.7'",
|
|
|
|
"typing_extensions>=3.7.4; python_version < '3.8'",
|
|
|
|
"mypy_extensions>=0.4.3",
|
|
|
|
# Actual dependencies.
|
2021-04-01 12:15:05 -07:00
|
|
|
"typing-inspect",
|
2021-03-11 16:41:05 -05:00
|
|
|
"pydantic>=1.5.1",
|
|
|
|
}
|
|
|
|
|
|
|
|
framework_common = {
|
|
|
|
"click>=7.1.1",
|
|
|
|
"pyyaml>=5.4.1",
|
|
|
|
"toml>=0.10.0",
|
2021-03-26 10:03:51 -07:00
|
|
|
"docker>=4.4",
|
2021-03-26 21:57:05 -07:00
|
|
|
"expandvars>=0.6.5",
|
2021-03-24 10:02:46 -07:00
|
|
|
"avro-gen3==0.3.8",
|
2021-03-11 16:41:05 -05:00
|
|
|
"avro-python3>=1.8.2",
|
|
|
|
}
|
|
|
|
|
|
|
|
kafka_common = {
|
|
|
|
# We currently require both Avro libraries. The codegen uses avro-python3 (above)
|
|
|
|
# schema parsers at runtime for generating and reading JSON into Python objects.
|
|
|
|
# At the same time, we use Kafka's AvroSerializer, which internally relies on
|
|
|
|
# fastavro for serialization. We do not use confluent_kafka[avro], since it
|
|
|
|
# is incompatible with its own dep on avro-python3.
|
|
|
|
"confluent_kafka>=1.5.0",
|
|
|
|
"fastavro>=1.3.0",
|
|
|
|
}
|
|
|
|
|
|
|
|
sql_common = {
|
|
|
|
# Required for all SQL sources.
|
|
|
|
"sqlalchemy>=1.3.23",
|
|
|
|
}
|
|
|
|
|
|
|
|
# Note: for all of these, framework_common will be added.
|
|
|
|
plugins: Dict[str, Set[str]] = {
|
|
|
|
# Source plugins
|
|
|
|
"kafka": kafka_common,
|
|
|
|
"athena": sql_common | {"PyAthena[SQLAlchemy]"},
|
2021-03-23 02:17:54 -04:00
|
|
|
"bigquery": sql_common
|
|
|
|
| {
|
|
|
|
# This will change to a normal reference to pybigquery once a new version is released to PyPI.
|
|
|
|
# We need to use this custom version in order to correctly get table descriptions.
|
|
|
|
# See this PR by hsheth2 for details: https://github.com/tswast/pybigquery/pull/82.
|
|
|
|
"pybigquery @ git+https://github.com/tswast/pybigquery@3250fa796b28225cb1c89d7afea3c2e2a2bf2305#egg=pybigquery"
|
|
|
|
},
|
2021-03-11 16:41:05 -05:00
|
|
|
"hive": sql_common | {"pyhive[hive]"},
|
|
|
|
"mssql": sql_common | {"sqlalchemy-pytds>=0.3"},
|
|
|
|
"mysql": sql_common | {"pymysql>=1.0.2"},
|
|
|
|
"postgres": sql_common | {"psycopg2-binary", "GeoAlchemy2"},
|
|
|
|
"snowflake": sql_common | {"snowflake-sqlalchemy"},
|
|
|
|
"ldap": {"python-ldap>=2.4"},
|
2021-03-18 03:06:48 +00:00
|
|
|
"druid": sql_common | {"pydruid>=0.6.2"},
|
2021-03-23 20:15:44 -07:00
|
|
|
"mongodb": {"pymongo>=3.11"},
|
2021-04-05 17:14:02 -07:00
|
|
|
"glue": {"boto3"},
|
2021-03-11 16:41:05 -05:00
|
|
|
# Sink plugins.
|
|
|
|
"datahub-kafka": kafka_common,
|
|
|
|
"datahub-rest": {"requests>=2.25.1"},
|
|
|
|
}
|
|
|
|
|
|
|
|
dev_requirements = {
|
|
|
|
*base_requirements,
|
|
|
|
*framework_common,
|
|
|
|
"black>=19.10b0",
|
|
|
|
"coverage>=5.1",
|
|
|
|
"flake8>=3.8.3",
|
|
|
|
"isort>=5.7.0",
|
|
|
|
"mypy>=0.782",
|
|
|
|
"pytest>=6.2.2",
|
|
|
|
"pytest-cov>=2.8.1",
|
|
|
|
"pytest-docker",
|
|
|
|
"sqlalchemy-stubs",
|
|
|
|
"deepdiff",
|
2021-04-04 19:00:27 +01:00
|
|
|
"freezegun",
|
|
|
|
"botocore",
|
2021-03-11 16:41:05 -05:00
|
|
|
# Also add the plugins which are used for tests.
|
|
|
|
*list(
|
|
|
|
dependency
|
|
|
|
for plugin in [
|
|
|
|
"bigquery",
|
|
|
|
"mysql",
|
|
|
|
"mssql",
|
2021-03-23 20:15:44 -07:00
|
|
|
"mongodb",
|
2021-03-11 16:41:05 -05:00
|
|
|
"ldap",
|
|
|
|
"datahub-kafka",
|
|
|
|
"datahub-rest",
|
2021-04-04 19:00:27 +01:00
|
|
|
"glue",
|
2021-03-11 16:41:05 -05:00
|
|
|
]
|
|
|
|
for dependency in plugins[plugin]
|
|
|
|
),
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-01-31 22:40:30 -08:00
|
|
|
setuptools.setup(
|
2021-02-15 15:04:21 -08:00
|
|
|
name="datahub",
|
2021-01-31 22:40:30 -08:00
|
|
|
version=get_version(),
|
|
|
|
url="https://github.com/linkedin/datahub",
|
|
|
|
author="DataHub Committers",
|
|
|
|
license="Apache License 2.0",
|
|
|
|
description="A CLI to work with DataHub metadata",
|
|
|
|
long_description=get_long_description(),
|
|
|
|
long_description_content_type="text/markdown",
|
|
|
|
classifiers=[
|
|
|
|
"Development Status :: 5 - Production/Stable",
|
|
|
|
"Programming Language :: Python",
|
|
|
|
"Programming Language :: Python :: 3",
|
|
|
|
"Programming Language :: Python :: 3 :: Only",
|
2021-02-12 10:52:08 -08:00
|
|
|
"Programming Language :: Python :: 3.6",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Programming Language :: Python :: 3.7",
|
|
|
|
"Programming Language :: Python :: 3.8",
|
2021-02-07 11:14:05 -08:00
|
|
|
"Programming Language :: Python :: 3.9",
|
2021-01-31 22:40:30 -08:00
|
|
|
"Intended Audience :: Developers",
|
|
|
|
"Intended Audience :: Information Technology",
|
|
|
|
"Intended Audience :: System Administrators",
|
|
|
|
"License :: OSI Approved",
|
|
|
|
"License :: OSI Approved :: Apache Software License",
|
|
|
|
"Operating System :: Unix",
|
|
|
|
"Operating System :: POSIX :: Linux",
|
|
|
|
"Environment :: Console",
|
|
|
|
"Environment :: MacOS X",
|
|
|
|
"Topic :: Software Development",
|
|
|
|
],
|
2021-02-11 16:23:03 -08:00
|
|
|
python_requires=">=3.6",
|
2021-01-31 22:40:30 -08:00
|
|
|
package_dir={"": "src"},
|
2021-03-02 11:48:26 -08:00
|
|
|
packages=setuptools.find_namespace_packages(where="./src"),
|
2021-01-31 22:40:30 -08:00
|
|
|
include_package_data=True,
|
2021-03-02 11:48:26 -08:00
|
|
|
package_data={
|
|
|
|
"datahub": ["py.typed"],
|
|
|
|
"datahub.metadata": ["schema.avsc"],
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
entry_points={
|
2021-02-15 15:04:21 -08:00
|
|
|
"console_scripts": ["datahub = datahub.entrypoints:datahub"],
|
2021-03-31 23:59:45 -04:00
|
|
|
"datahub.ingestion.source.plugins": [
|
|
|
|
"file = datahub.ingestion.source.mce_file:MetadataFileSource",
|
|
|
|
"athena = datahub.ingestion.source.athena:AthenaSource",
|
|
|
|
"bigquery = datahub.ingestion.source.bigquery:BigQuerySource",
|
|
|
|
"dbt = datahub.ingestion.source.dbt:DBTSource",
|
|
|
|
"druid = datahub.ingestion.source.druid:DruidSource",
|
2021-04-04 19:00:27 +01:00
|
|
|
"glue = datahub.ingestion.source.glue:GlueSource",
|
2021-03-31 23:59:45 -04:00
|
|
|
"hive = datahub.ingestion.source.hive:HiveSource",
|
|
|
|
"kafka = datahub.ingestion.source.kafka:KafkaSource",
|
|
|
|
"ldap = datahub.ingestion.source.ldap:LDAPSource",
|
|
|
|
"mongodb = datahub.ingestion.source.mongodb:MongoDBSource",
|
|
|
|
"mssql = datahub.ingestion.source.mssql:SQLServerSource",
|
|
|
|
"mysql = datahub.ingestion.source.mysql:MySQLSource",
|
|
|
|
"postgres = datahub.ingestion.source.postgres:PostgresSource",
|
|
|
|
"snowflake = datahub.ingestion.source.snowflake:SnowflakeSource",
|
|
|
|
],
|
|
|
|
"datahub.ingestion.sink.plugins": [
|
|
|
|
"file = datahub.ingestion.sink.file:FileSink",
|
|
|
|
"console = datahub.ingestion.sink.console:ConsoleSink",
|
|
|
|
"datahub-kafka = datahub.ingestion.sink.datahub_kafka:DatahubKafkaSink",
|
|
|
|
"datahub-rest = datahub.ingestion.sink.datahub_rest:DatahubRestSink",
|
|
|
|
],
|
2021-01-31 22:40:30 -08:00
|
|
|
},
|
2021-03-11 16:41:05 -05:00
|
|
|
install_requires=list(base_requirements | framework_common),
|
|
|
|
extras_require={
|
|
|
|
"base": list(framework_common),
|
|
|
|
**{
|
|
|
|
plugin: list(framework_common | dependencies)
|
|
|
|
for (plugin, dependencies) in plugins.items()
|
|
|
|
},
|
|
|
|
"all": list(framework_common.union(*plugins.values())),
|
|
|
|
"dev": list(dev_requirements),
|
|
|
|
},
|
2021-01-31 22:40:30 -08:00
|
|
|
)
|