mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-04 04:29:13 +00:00 
			
		
		
		
	* Change how status is handled after running workflow * Reset changes in config files * Add auxiliary Summary class * Improve failures handling * Pylint error * Pylint error * Show result in table * Add test * Fix setup.py * Add comments
		
			
				
	
	
		
			291 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
#  Copyright 2021 Collate
 | 
						|
#  Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
#  you may not use this file except in compliance with the License.
 | 
						|
#  You may obtain a copy of the License at
 | 
						|
#  http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#  Unless required by applicable law or agreed to in writing, software
 | 
						|
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
#  See the License for the specific language governing permissions and
 | 
						|
#  limitations under the License.
 | 
						|
 | 
						|
"""
 | 
						|
Python Dependencies
 | 
						|
"""
 | 
						|
 | 
						|
import os
 | 
						|
from typing import Dict, Set
 | 
						|
 | 
						|
from setuptools import find_namespace_packages, setup
 | 
						|
 | 
						|
 | 
						|
def get_long_description():
 | 
						|
    root = os.path.dirname(__file__)
 | 
						|
    with open(os.path.join(root, "README.md"), encoding="UTF-8") as file:
 | 
						|
        description = file.read()
 | 
						|
    return description
 | 
						|
 | 
						|
 | 
						|
# Add here versions required for multiple plugins
 | 
						|
VERSIONS = {
 | 
						|
    "airflow": "apache-airflow==2.3.3",
 | 
						|
    "avro": "avro~=1.11",
 | 
						|
    "boto3": "boto3>=1.20,<2.0",  # No need to add botocore separately. It's a dep from boto3
 | 
						|
    "geoalchemy2": "GeoAlchemy2~=0.12",
 | 
						|
    "google-cloud-storage": "google-cloud-storage==1.43.0",
 | 
						|
    "great-expectations": "great-expectations~=0.15.0",
 | 
						|
    "grpc-tools": "grpcio-tools>=1.47.2",
 | 
						|
    "msal": "msal~=1.2",
 | 
						|
    "neo4j": "neo4j~=5.3.0",
 | 
						|
    "pandas": "pandas==1.3.5",
 | 
						|
    "pyarrow": "pyarrow~=8.0",
 | 
						|
    "pydomo": "pydomo~=0.3",
 | 
						|
    "pymysql": "pymysql>=1.0.2",
 | 
						|
    "pyodbc": "pyodbc>=4.0.35,<5",
 | 
						|
    "scikit-learn": "scikit-learn~=1.0",  # Python 3.7 only goes up to 1.0.2
 | 
						|
}
 | 
						|
 | 
						|
COMMONS = {
 | 
						|
    "datalake": {VERSIONS["boto3"], VERSIONS["pandas"], VERSIONS["pyarrow"]},
 | 
						|
    "hive": {
 | 
						|
        "presto-types-parser>=0.0.2",
 | 
						|
        "pyhive~=0.6",
 | 
						|
    },
 | 
						|
    "kafka": {
 | 
						|
        VERSIONS["avro"],
 | 
						|
        "confluent_kafka==1.8.2",
 | 
						|
        "fastavro>=1.2.0",
 | 
						|
        # Due to https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925
 | 
						|
        # use >= v1.47.2 https://github.com/grpc/grpc/blob/v1.47.2/tools/distrib/python/grpcio_tools/grpc_version.py#L17
 | 
						|
        VERSIONS[
 | 
						|
            "grpc-tools"
 | 
						|
        ],  # grpcio-tools already depends on grpcio. No need to add separately
 | 
						|
        "protobuf",
 | 
						|
    },
 | 
						|
}
 | 
						|
 | 
						|
# required library for pii tagging
 | 
						|
pii_requirements = {
 | 
						|
    "spacy==3.5.0",
 | 
						|
    (
 | 
						|
        "en-core-web-md @ https://github.com/explosion/spacy-models/releases/download/"
 | 
						|
        "en_core_web_md-3.5.0/en_core_web_md-3.5.0-py3-none-any.whl"
 | 
						|
    ),
 | 
						|
    VERSIONS["pandas"],
 | 
						|
    "presidio-analyzer==2.2.32",
 | 
						|
}
 | 
						|
 | 
						|
base_requirements = {
 | 
						|
    "antlr4-python3-runtime==4.9.2",
 | 
						|
    VERSIONS["avro"],  # Used in sample data
 | 
						|
    VERSIONS["boto3"],  # Required in base for the secrets manager
 | 
						|
    "cached-property==1.5.2",
 | 
						|
    "chardet==4.0.0",
 | 
						|
    "croniter~=1.3.0",
 | 
						|
    "cryptography",
 | 
						|
    "commonregex",
 | 
						|
    "email-validator>=1.0.3",
 | 
						|
    "google>=3.0.0",
 | 
						|
    "google-auth>=1.33.0",
 | 
						|
    VERSIONS["grpc-tools"],  # Used in sample data
 | 
						|
    "idna<3,>=2.5",
 | 
						|
    "importlib-metadata~=4.12.0",  # From airflow constraints
 | 
						|
    "Jinja2>=2.11.3",
 | 
						|
    "jsonschema",
 | 
						|
    "mypy_extensions>=0.4.3",
 | 
						|
    "pydantic~=1.10",
 | 
						|
    VERSIONS["pymysql"],
 | 
						|
    "python-dateutil>=2.8.1",
 | 
						|
    "python-jose~=3.3",
 | 
						|
    "PyYAML",
 | 
						|
    "requests>=2.23",
 | 
						|
    "requests-aws4auth~=1.1",  # Only depends on requests as external package. Leaving as base.
 | 
						|
    "setuptools~=65.6.3",
 | 
						|
    "sqlalchemy>=1.4.0,<2",
 | 
						|
    "openmetadata-sqllineage==1.0.2",
 | 
						|
    "tabulate==0.9.0",
 | 
						|
    "typing-compat~=0.1.0",  # compatibility requirements for 3.7
 | 
						|
    "typing-inspect",
 | 
						|
    "wheel~=0.38.4",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
plugins: Dict[str, Set[str]] = {
 | 
						|
    "airflow": {VERSIONS["airflow"]},  # Same as ingestion container. For development.
 | 
						|
    "amundsen": {VERSIONS["neo4j"]},
 | 
						|
    "athena": {"PyAthena[SQLAlchemy]"},
 | 
						|
    "atlas": {},
 | 
						|
    "azuresql": {VERSIONS["pyodbc"]},
 | 
						|
    "azure-sso": {VERSIONS["msal"]},
 | 
						|
    "backup": {VERSIONS["boto3"], "azure-identity", "azure-storage-blob"},
 | 
						|
    "bigquery": {
 | 
						|
        "cachetools",
 | 
						|
        "google-cloud-datacatalog==3.6.2",
 | 
						|
        "google-cloud-logging",
 | 
						|
        VERSIONS["pyarrow"],
 | 
						|
        "sqlalchemy-bigquery>=1.2.2",
 | 
						|
    },
 | 
						|
    "clickhouse": {"clickhouse-driver~=0.2", "clickhouse-sqlalchemy~=0.2"},
 | 
						|
    "dagster": {
 | 
						|
        VERSIONS["pymysql"],
 | 
						|
        "psycopg2-binary",
 | 
						|
        VERSIONS["geoalchemy2"],
 | 
						|
        "dagster_graphql~=1.1",
 | 
						|
    },
 | 
						|
    "dbt": {
 | 
						|
        "google-cloud",
 | 
						|
        VERSIONS["boto3"],
 | 
						|
        VERSIONS["google-cloud-storage"],
 | 
						|
        "dbt-artifacts-parser",
 | 
						|
    },
 | 
						|
    "db2": {"ibm-db-sa~=0.3"},
 | 
						|
    "databricks": {"sqlalchemy-databricks~=0.1"},
 | 
						|
    "datalake-azure": {
 | 
						|
        "azure-storage-blob~=12.14",
 | 
						|
        "azure-identity~=1.12",
 | 
						|
        "adlfs>=2022.2.0",  # Python 3.7 does only support up to 2022.2.0
 | 
						|
        *COMMONS["datalake"],
 | 
						|
    },
 | 
						|
    "datalake-gcs": {
 | 
						|
        VERSIONS["google-cloud-storage"],
 | 
						|
        "gcsfs==2022.11.0",
 | 
						|
        *COMMONS["datalake"],
 | 
						|
    },
 | 
						|
    "datalake-s3": {
 | 
						|
        # requires aiobotocore
 | 
						|
        # https://github.com/fsspec/s3fs/blob/9bf99f763edaf7026318e150c4bd3a8d18bb3a00/requirements.txt#L1
 | 
						|
        # however, the latest version of `s3fs` conflicts its `aiobotocore` dep with `boto3`'s dep on `botocore`.
 | 
						|
        # Leaving this marked to the automatic resolution to speed up installation.
 | 
						|
        "s3fs==0.4.2",
 | 
						|
        *COMMONS["datalake"],
 | 
						|
    },
 | 
						|
    "deltalake": {"delta-spark~=2.2"},
 | 
						|
    "docker": {"python_on_whales==0.55.0"},
 | 
						|
    "domo": {VERSIONS["pydomo"]},
 | 
						|
    "druid": {"pydruid>=0.6.5"},
 | 
						|
    "dynamodb": {VERSIONS["boto3"]},
 | 
						|
    "elasticsearch": {
 | 
						|
        "elasticsearch==7.13.1"
 | 
						|
    },  # also requires requests-aws4auth which is in base
 | 
						|
    "glue": {VERSIONS["boto3"]},
 | 
						|
    "great-expectations": {VERSIONS["great-expectations"]},
 | 
						|
    "hive": {
 | 
						|
        *COMMONS["hive"],
 | 
						|
        "thrift>=0.13,<1",
 | 
						|
        "sasl~=0.3",
 | 
						|
        "thrift-sasl~=0.4",
 | 
						|
        "impyla~=0.18.0",
 | 
						|
    },
 | 
						|
    "kafka": {*COMMONS["kafka"]},
 | 
						|
    "kinesis": {VERSIONS["boto3"]},
 | 
						|
    "ldap-users": {"ldap3==2.9.1"},
 | 
						|
    "looker": {"looker-sdk>=22.20.0"},
 | 
						|
    "mlflow": {"mlflow-skinny~=1.30"},
 | 
						|
    "mssql": {"sqlalchemy-pytds~=0.3"},
 | 
						|
    "mssql-odbc": {VERSIONS["pyodbc"]},
 | 
						|
    "mysql": {VERSIONS["pymysql"]},
 | 
						|
    "nifi": {},  # uses requests
 | 
						|
    "okta": {"okta~=2.3"},
 | 
						|
    "oracle": {"cx_Oracle>=8.3.0,<9", "oracledb~=1.2"},
 | 
						|
    "pinotdb": {"pinotdb~=0.3"},
 | 
						|
    "postgres": {VERSIONS["pymysql"], "psycopg2-binary", VERSIONS["geoalchemy2"]},
 | 
						|
    "powerbi": {VERSIONS["msal"]},
 | 
						|
    "presto": {*COMMONS["hive"]},
 | 
						|
    "pymssql": {"pymssql==2.2.5"},
 | 
						|
    "quicksight": {VERSIONS["boto3"]},
 | 
						|
    "redash": {"packaging==21.3"},
 | 
						|
    "redpanda": {*COMMONS["kafka"]},
 | 
						|
    "redshift": {
 | 
						|
        "sqlalchemy-redshift~=0.8",
 | 
						|
        "psycopg2-binary",
 | 
						|
        VERSIONS["geoalchemy2"],
 | 
						|
    },
 | 
						|
    "sagemaker": {VERSIONS["boto3"]},
 | 
						|
    "salesforce": {"simple_salesforce==1.11.4"},
 | 
						|
    "singlestore": {VERSIONS["pymysql"]},
 | 
						|
    "sklearn": {VERSIONS["scikit-learn"]},
 | 
						|
    "snowflake": {"snowflake-sqlalchemy~=1.4"},
 | 
						|
    "superset": {},  # uses requests
 | 
						|
    "tableau": {"tableau-api-lib~=0.1"},
 | 
						|
    "trino": {"trino[sqlalchemy]"},
 | 
						|
    "vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},
 | 
						|
    "pii-processor": pii_requirements,
 | 
						|
}
 | 
						|
 | 
						|
dev = {
 | 
						|
    "black==22.3.0",
 | 
						|
    "datamodel-code-generator==0.15.0",
 | 
						|
    "docker",
 | 
						|
    "isort",
 | 
						|
    "pre-commit",
 | 
						|
    "pycln",
 | 
						|
    "pylint",
 | 
						|
    "twine",
 | 
						|
}
 | 
						|
 | 
						|
test = {
 | 
						|
    # Install Airflow as it's not part of `all` plugin
 | 
						|
    VERSIONS["airflow"],
 | 
						|
    "coverage",
 | 
						|
    # Install GE because it's not in the `all` plugin
 | 
						|
    VERSIONS["great-expectations"],
 | 
						|
    "moto==4.0.8",
 | 
						|
    "pytest==7.0.0",
 | 
						|
    "pytest-cov",
 | 
						|
    "pytest-order",
 | 
						|
    # install dbt dependency
 | 
						|
    "dbt-artifacts-parser",
 | 
						|
}
 | 
						|
 | 
						|
build_options = {"includes": ["_cffi_backend"]}
 | 
						|
setup(
 | 
						|
    name="openmetadata-ingestion",
 | 
						|
    version="1.0.0.0.dev0",
 | 
						|
    url="https://open-metadata.org/",
 | 
						|
    author="OpenMetadata Committers",
 | 
						|
    license="Apache License 2.0",
 | 
						|
    description="Ingestion Framework for OpenMetadata",
 | 
						|
    long_description=get_long_description(),
 | 
						|
    long_description_content_type="text/markdown",
 | 
						|
    python_requires=">=3.7",
 | 
						|
    options={"build_exe": build_options},
 | 
						|
    package_dir={"": "src"},
 | 
						|
    package_data={"metadata.examples": ["workflows/*.yaml"]},
 | 
						|
    zip_safe=False,
 | 
						|
    dependency_links=[],
 | 
						|
    project_urls={
 | 
						|
        "Documentation": "https://docs.open-metadata.org/",
 | 
						|
        "Source": "https://github.com/open-metadata/OpenMetadata",
 | 
						|
    },
 | 
						|
    packages=find_namespace_packages(where="./src", exclude=["tests*"]),
 | 
						|
    namespace_package=["metadata"],
 | 
						|
    entry_points={
 | 
						|
        "console_scripts": ["metadata = metadata.cmd:metadata"],
 | 
						|
        "apache_airflow_provider": [
 | 
						|
            "provider_info = airflow_provider_openmetadata:get_provider_config"
 | 
						|
        ],
 | 
						|
    },
 | 
						|
    install_requires=list(base_requirements),
 | 
						|
    extras_require={
 | 
						|
        "base": list(base_requirements),
 | 
						|
        "dev": list(dev),
 | 
						|
        "test": list(test),
 | 
						|
        "data-insight": list(plugins["elasticsearch"]),
 | 
						|
        **{plugin: list(dependencies) for (plugin, dependencies) in plugins.items()},
 | 
						|
        "all": list(
 | 
						|
            base_requirements.union(
 | 
						|
                *[
 | 
						|
                    requirements
 | 
						|
                    for plugin, requirements in plugins.items()
 | 
						|
                    if plugin
 | 
						|
                    not in {
 | 
						|
                        "airflow",
 | 
						|
                        "db2",
 | 
						|
                        "great-expectations",
 | 
						|
                    }
 | 
						|
                ]
 | 
						|
            )
 | 
						|
        ),
 | 
						|
    },
 | 
						|
)
 |