Fix #7768 - Update and organize versions (#9664)

Fix #7768 - Update and organize versions (#9664)
This commit is contained in:
Pere Miquel Brull 2023-01-11 07:05:12 +01:00 committed by GitHub
parent 03c40d9566
commit bf753a4dee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 209 additions and 197 deletions

View File

@ -22,182 +22,201 @@ def get_long_description():
return description
# Add here versions required for multiple plugins
VERSIONS = {
"airflow": "apache-airflow==2.3.3",
"avro-python3": "avro-python3~=1.10",
"boto3": "boto3~=1.26", # No need to add botocore separately. It's a dep from boto3
"geoalchemy2": "GeoAlchemy2~=0.12",
"google-cloud-storage": "google-cloud-storage==1.43.0",
"great-expectations": "great-expectations~=0.15.0",
"grpc-tools": "grpcio-tools==1.47.2",
"msal": "msal~=1.2",
"neo4j": "neo4j~=4.4.0",
"pandas": "pandas==1.3.5",
"pyarrow": "pyarrow~=8.0",
"pydomo": "pydomo~=0.3",
"pymysql": "pymysql>=1.0.2",
"pyodbc": "pyodbc>=4.0.35,<5",
"scikit-learn": "scikit-learn~=1.0", # Python 3.7 only goes up to 1.0.2
}
COMMONS = {
"datalake": {VERSIONS["boto3"], VERSIONS["pandas"], VERSIONS["pyarrow"]},
"hive": {
"presto-types-parser>=0.0.2",
"pyhive~=0.6",
},
"kafka": {
"avro~=1.11",
VERSIONS["avro-python3"],
"confluent_kafka==1.8.2",
"fastavro>=1.2.0",
# Due to https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925
# we use v1.47.2 https://github.com/grpc/grpc/blob/v1.47.2/tools/distrib/python/grpcio_tools/grpc_version.py#L17
VERSIONS[
"grpc-tools"
], # grpcio-tools already depends on grpcio. No need to add separately
"protobuf",
},
}
base_requirements = {
"antlr4-python3-runtime==4.9.2",
VERSIONS["avro-python3"], # Used in sample data
VERSIONS["boto3"], # Required in base for the secrets manager
"cached-property==1.5.2",
"chardet==4.0.0",
"croniter~=1.3.0",
"cryptography",
"commonregex",
"idna<3,>=2.5",
"mypy_extensions>=0.4.3",
"typing-inspect",
"pydantic~=1.9.0",
"email-validator>=1.0.3",
"google>=3.0.0",
"google-auth>=1.33.0",
"python-dateutil>=2.8.1",
"wheel~=0.38.4",
"setuptools~=65.6.3",
"python-jose==3.3.0",
"sqlalchemy>=1.4.0",
"requests>=2.23",
"cryptography",
"Jinja2>=2.11.3",
"PyYAML",
"jsonschema",
"sqllineage==1.3.7",
"antlr4-python3-runtime==4.9.2",
"boto3~=1.19.12",
"botocore==1.22.12",
"avro-python3==1.10.2",
# https://github.com/grpc/grpc/issues/30843#issuecomment-1303816925
"grpcio<1.48.1",
"grpcio-tools==1.47.2",
# compatibility requirements for 3.7
"typing-compat~=0.1.0",
VERSIONS["grpc-tools"], # Used in sample data
"idna<3,>=2.5",
"importlib-metadata~=4.12.0", # From airflow constraints
"croniter~=1.3.0",
"requests-aws4auth==1.1.2",
"pymysql>=1.0.2",
"cached-property==1.5.2",
"pandas==1.3.5",
"chardet==4.0.0",
"Jinja2>=2.11.3",
"jsonschema",
"mypy_extensions>=0.4.3",
VERSIONS["pandas"], # to be removed from base
"pydantic~=1.10",
VERSIONS["pymysql"],
"python-dateutil>=2.8.1",
"python-jose~=3.3",
"PyYAML",
"requests>=2.23",
"requests-aws4auth~=1.1", # Only depends on requests as external package. Leaving as base.
"setuptools~=65.6.3",
"sqlalchemy>=1.4.0",
"sqllineage==1.3.7",
"typing-compat~=0.1.0", # compatibility requirements for 3.7
"typing-inspect",
"wheel~=0.38.4",
}
datalake_common = {
"pandas==1.3.5",
"pyarrow==6.0.1",
}
plugins: Dict[str, Set[str]] = {
"airflow": {
"apache-airflow==2.3.3"
}, # Same as ingestion container. For development.
"airflow-container-1.10.15": {"markupsafe==2.0.1 ", "requests==2.23.0"},
"amundsen": {"neo4j~=4.4.0"},
"amundsen": {VERSIONS["neo4j"]},
"athena": {"PyAthena[SQLAlchemy]"},
"atlas": {},
"azuresql": {"pyodbc"},
"azuresql": {VERSIONS["pyodbc"]},
"azure-sso": {VERSIONS["msal"]},
"backup": {VERSIONS["boto3"], "azure-identity", "azure-storage-blob"},
"bigquery": {
"sqlalchemy-bigquery>=1.2.2",
"pyarrow~=6.0.1",
"cachetools",
"google-cloud-datacatalog==3.6.2",
"google-cloud-logging",
VERSIONS["pyarrow"],
"sqlalchemy-bigquery>=1.2.2",
},
"bigquery-usage": {"google-cloud-logging", "cachetools"},
"docker": {"python_on_whales==0.55.0"},
"backup": {"boto3~=1.19.12", "azure-identity", "azure-storage-blob"},
"dagster": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2", "dagster_graphql"},
"datalake-s3": {
"s3fs==0.4.2",
"boto3~=1.19.12",
*datalake_common,
"clickhouse": {"clickhouse-driver~=0.2", "clickhouse-sqlalchemy~=0.2"},
"dagster": {
VERSIONS["pymysql"],
"psycopg2-binary",
VERSIONS["geoalchemy2"],
"dagster_graphql~=1.1",
},
"dbt": {"google-cloud", VERSIONS["boto3"], VERSIONS["google-cloud-storage"]},
"db2": {"ibm-db-sa~=0.3"},
"databricks": {"sqlalchemy-databricks~=0.1"},
"datalake-azure": {
"azure-storage-blob~=12.14",
"azure-identity~=1.12",
"adlfs>=2022.2.0", # Python 3.7 does only support up to 2022.2.0
*COMMONS["datalake"],
},
"datalake-gcs": {
"google-cloud-storage==1.43.0",
"gcsfs==2022.5.0",
*datalake_common,
VERSIONS["google-cloud-storage"],
"gcsfs==2022.11.0",
*COMMONS["datalake"],
},
"dbt": {"google-cloud", "boto3", "google-cloud-storage==1.43.0"},
"druid": {"pydruid>=0.6.2"},
"elasticsearch": {"elasticsearch==7.13.1", "requests-aws4auth==1.1.2"},
"glue": {"boto3~=1.19.12"},
"dynamodb": {"boto3~=1.19.12"},
"sagemaker": {"boto3~=1.19.12"},
"datalake-s3": {
# requires aiobotocore
# https://github.com/fsspec/s3fs/blob/9bf99f763edaf7026318e150c4bd3a8d18bb3a00/requirements.txt#L1
# however, the latest version of `s3fs` conflicts its `aiobotocore` dep with `boto3`'s dep on `botocore`.
# Leaving this marked to the automatic resolution to speed up installation.
"s3fs==0.4.2",
*COMMONS["datalake"],
},
"deltalake": {"delta-spark~=2.2"},
"docker": {"python_on_whales==0.55.0"},
"domo": {VERSIONS["pydomo"]},
"druid": {"pydruid>=0.6.5"},
"dynamodb": {VERSIONS["boto3"]},
"elasticsearch": {
"elasticsearch>=7.17,<8"
}, # also requires requests-aws4auth which is in base
"glue": {VERSIONS["boto3"]},
"great-expectations": {VERSIONS["great-expectations"]},
"hive": {
"pyhive~=0.6.5",
"thrift~=0.13.0",
"sasl==0.3.1",
"thrift-sasl==0.4.3",
"presto-types-parser==0.0.2",
},
"kafka": {
"confluent_kafka==1.8.2",
"fastavro>=1.2.0",
"avro-python3==1.10.2",
"avro==1.11.1",
"grpcio-tools",
"protobuf",
},
"kinesis": {"boto3~=1.19.12"},
"redpanda": {
"confluent_kafka==1.8.2",
"fastavro>=1.2.0",
"avro-python3==1.10.2",
"avro==1.11.1",
"grpcio-tools",
"protobuf",
*COMMONS["hive"],
"thrift>=0.13,<1",
"sasl~=0.3",
"thrift-sasl~=0.4",
},
"kafka": {*COMMONS["kafka"]},
"kinesis": {VERSIONS["boto3"]},
"ldap-users": {"ldap3==2.9.1"},
"looker": {"looker-sdk>=22.20.0"},
"mssql": {"sqlalchemy-pytds>=0.3"},
"mlflow": {"mlflow-skinny~=1.30"},
"mssql": {"sqlalchemy-pytds~=0.3"},
"mssql-odbc": {VERSIONS["pyodbc"]},
"mysql": {VERSIONS["pymysql"]},
"nifi": {}, # uses requests
"okta": {"okta~=2.3"},
"oracle": {"cx_Oracle>=8.3.0,<9", "oracledb~=1.2"},
"pinotdb": {"pinotdb~=0.3"},
"postgres": {VERSIONS["pymysql"], "psycopg2-binary", VERSIONS["geoalchemy2"]},
"powerbi": {VERSIONS["msal"]},
"presto": {*COMMONS["hive"]},
"pymssql": {"pymssql==2.2.5"},
"mssql-odbc": {"pyodbc"},
"mysql": {"pymysql>=1.0.2"},
"oracle": {"cx_Oracle", "oracledb==1.0.3"},
"powerbi": {"msal==1.17.0"},
"presto": {"pyhive~=0.6.3"},
"trino": {"trino[sqlalchemy]"},
"postgres": {"pymysql>=1.0.2", "psycopg2-binary", "GeoAlchemy2"},
"redash": {"redash-toolbelt==0.1.8"},
"redshift": {"sqlalchemy-redshift==0.8.9", "psycopg2-binary", "GeoAlchemy2"},
"redshift-usage": {
"sqlalchemy-redshift==0.8.9",
"redash": {"redash-toolbelt~=0.1"},
"redpanda": {*COMMONS["kafka"]},
"redshift": {
"sqlalchemy-redshift~=0.8",
"psycopg2-binary",
"GeoAlchemy2",
VERSIONS["geoalchemy2"],
},
"snowflake": {"snowflake-sqlalchemy~=1.4.3"},
"snowflake-usage": {"snowflake-sqlalchemy~=1.4.3"},
"superset": {},
"tableau": {"tableau-api-lib==0.1.50"},
"sagemaker": {VERSIONS["boto3"]},
"salesforce": {"simple_salesforce==1.11.4"},
"singlestore": {VERSIONS["pymysql"]},
"sklearn": {VERSIONS["scikit-learn"]},
"snowflake": {"snowflake-sqlalchemy~=1.4"},
"superset": {}, # uses requests
"tableau": {"tableau-api-lib~=0.1"},
"trino": {"trino[sqlalchemy]"},
"vertica": {"sqlalchemy-vertica[vertica-python]>=0.0.5"},
"webhook-server": {},
"salesforce": {"simple_salesforce~=1.11.4"},
"okta": {"okta~=2.3.0"},
"mlflow": {"mlflow-skinny~=1.26.1"},
"sklearn": {"scikit-learn==1.0.2"},
"db2": {"ibm-db-sa==0.3.8"},
"clickhouse": {"clickhouse-driver==0.2.5", "clickhouse-sqlalchemy==0.2.3"},
"databricks": {"sqlalchemy-databricks==0.1.0"},
"singlestore": {"pymysql>=1.0.2"},
"azure-sso": {"msal~=1.17.0"},
"deltalake": {"delta-spark~=2.2.0"},
"great-expectations": {"great-expectations~=0.15.0"},
"pinotdb": {"pinotdb~=0.3.11"},
"nifi": {},
"domo": {"pydomo~=0.3.0.5"},
"datalake-azure": {
"azure-storage-blob~=12.14.1",
"azure-identity~=1.12.0",
"adlfs==2022.2.0",
*datalake_common,
},
}
dev = {
"datamodel-code-generator==0.15.0",
"black==22.3.0",
"pycln==1.3.2",
"datamodel-code-generator==0.15.0",
"docker",
"google-cloud-storage==1.43.0",
"isort",
"pre-commit",
"pycln",
"pylint",
"twine",
}
test = {
"isort==5.10.1",
"pre-commit",
"pylint",
VERSIONS["airflow"],
"coverage",
VERSIONS["google-cloud-storage"],
VERSIONS["great-expectations"],
"moto==4.0.8",
VERSIONS["neo4j"],
VERSIONS["pandas"],
VERSIONS["pydomo"],
"pytest==7.0.0",
"pytest-cov",
"pytest-order",
"coverage",
# sklearn integration
"scikit-learn==1.0.2",
"pandas==1.3.5",
# great_expectations tests
"great-expectations~=0.15.0",
# Airflow tests
"apache-airflow==2.3.3",
# Domo test
"pydomo~=0.3.0.5",
# mock boto3 functions
"moto==4.0.8",
# amundsen
"neo4j~=4.4.0",
VERSIONS["scikit-learn"],
}
build_options = {"includes": ["_cffi_backend"]}
@ -243,7 +262,6 @@ setup(
if plugin
not in {
"airflow",
"airflow-container-1.10.15",
"db2",
"great-expectations",
}

View File

@ -13,11 +13,43 @@ Module for getting versions of OpenMetadata and python
"""
import os
import re
import sys
import pkg_resources
try:
from importlib.metadata import version
except ImportError:
from importlib_metadata import version
version = pkg_resources.require("openmetadata-ingestion")[0].version
class VersionParsingException(Exception):
"""
Used when we cannot parse version information from a string
"""
def get_version_from_string(raw_version: str) -> str:
"""
Given a raw version string, such as `0.10.1.dev0` or
`0.11.0-SNAPSHOT`, we should extract the major.minor.patch
:param raw_version: raw string with version info
:return: Clean version string
"""
try:
return re.match(r"\d+.\d+.\d+", raw_version).group(0)
except AttributeError as err:
raise VersionParsingException(
f"Can't extract version from {raw_version}: {err}"
)
def get_client_version() -> str:
"""
Get openmetadata-ingestion module version
:return: client version
"""
raw_version = version("openmetadata-ingestion")
return get_version_from_string(raw_version)
def get_metadata_version() -> str:
@ -28,7 +60,7 @@ def get_metadata_version() -> str:
metadata_pkg_dir = os.path.join(os.path.dirname(__file__), "..", "..")
metadata_pkg_dir = os.path.abspath(metadata_pkg_dir)
return f"metadata {version} from {metadata_pkg_dir} (python {get_major_minor_version()})"
return f"metadata {get_client_version()} from {metadata_pkg_dir} (python {get_major_minor_version()})"
def get_major_minor_version() -> str:

View File

@ -13,25 +13,13 @@ Mixin class containing Server and client specific methods
To be used by OpenMetadata class
"""
import re
try:
from importlib.metadata import version
except ImportError:
from importlib_metadata import version
from metadata.__version__ import get_client_version, get_version_from_string
from metadata.ingestion.ometa.client import REST
from metadata.utils.logger import ometa_logger
logger = ometa_logger()
class VersionParsingException(Exception):
"""
Used when we cannot parse version information from a string
"""
class VersionMismatchException(Exception):
"""
Used when server and client versions do not match
@ -53,21 +41,6 @@ class OMetaServerMixin:
client: REST
@staticmethod
def get_version_from_string(raw_version: str) -> str:
"""
Given a raw version string, such as `0.10.1.dev0` or
`0.11.0-SNAPSHOT`, we should extract the major.minor.patch
:param raw_version: raw string with version info
:return: Clean version string
"""
try:
return re.match(r"\d+.\d+.\d+", raw_version).group(0)
except AttributeError as err:
raise VersionParsingException(
f"Can't extract version from {raw_version}: {err}"
)
def get_server_version(self) -> str:
"""
Run endpoint /version to check server version
@ -77,15 +50,7 @@ class OMetaServerMixin:
raw_version = self.client.get("/version")["version"]
except KeyError:
raise VersionNotFoundException("Cannot Find Version at api/v1/version")
return self.get_version_from_string(raw_version)
def get_client_version(self) -> str:
"""
Get openmetadata-ingestion module version
:return: client version
"""
raw_version = version("openmetadata-ingestion")
return self.get_version_from_string(raw_version)
return get_version_from_string(raw_version)
def validate_versions(self) -> None:
"""
@ -95,7 +60,7 @@ class OMetaServerMixin:
logger.debug("Validating client and server versions")
server_version = self.get_server_version()
client_version = self.get_client_version()
client_version = get_client_version()
if server_version != client_version:
raise VersionMismatchException(

View File

@ -15,9 +15,6 @@ Hosts the singledispatch to get the schema parsers
from typing import List, Optional
from metadata.generated.schema.type.schema import FieldModel, SchemaType
from metadata.parsers.avro_parser import parse_avro_schema
from metadata.parsers.json_schema_parser import parse_json_schema
from metadata.parsers.protobuf_parser import ProtobufParser, ProtobufParserConfig
from metadata.utils.dispatch import enum_register
schema_parser_config_registry = enum_register()
@ -29,10 +26,14 @@ class InvalidSchemaTypeException(Exception):
"""
# Load parsers only on demand
# pylint: disable=import-outside-toplevel
@schema_parser_config_registry.add(SchemaType.Avro.value.lower())
def load_avro_parser(
topic_name: str, schema_text: str # pylint: disable=unused-argument
) -> Optional[List[FieldModel]]:
from metadata.parsers.avro_parser import parse_avro_schema
return parse_avro_schema(schema_text)
@ -40,6 +41,8 @@ def load_avro_parser(
def load_protobuf_parser(
topic_name: str, schema_text: str
) -> Optional[List[FieldModel]]:
from metadata.parsers.protobuf_parser import ProtobufParser, ProtobufParserConfig
protobuf_parser = ProtobufParser(
config=ProtobufParserConfig(schema_name=topic_name, schema_text=schema_text)
)
@ -50,6 +53,8 @@ def load_protobuf_parser(
def load_json_schema_parser(
topic_name: str, schema_text: str # pylint: disable=unused-argument
) -> Optional[List[FieldModel]]:
from metadata.parsers.json_schema_parser import parse_json_schema
return parse_json_schema(schema_text)

View File

@ -14,28 +14,20 @@ Validate Server Mixin version methods
from unittest import TestCase
from metadata.ingestion.ometa.mixins.server_mixin import OMetaServerMixin
from metadata.__version__ import get_version_from_string
class OMetaServerTest(TestCase):
class OMetaVersionTest(TestCase):
"""
Check version methods
"""
mixin = OMetaServerMixin()
def test_get_version_from_string(self):
"""
We should be able to parse regular version responses
"""
self.assertEqual("0.11.0", self.mixin.get_version_from_string("0.11.0.dev0"))
self.assertEqual("0.11.0", self.mixin.get_version_from_string("0.11.0"))
self.assertEqual(
"1111.11.111", self.mixin.get_version_from_string("1111.11.111")
)
self.assertEqual(
"1111.11.111", self.mixin.get_version_from_string("1111.11.111-SNAPSHOT")
)
self.assertEqual(
"0.11.1", self.mixin.get_version_from_string("0.11.1.0.0.1.patch")
)
self.assertEqual("0.11.0", get_version_from_string("0.11.0.dev0"))
self.assertEqual("0.11.0", get_version_from_string("0.11.0"))
self.assertEqual("1111.11.111", get_version_from_string("1111.11.111"))
self.assertEqual("1111.11.111", get_version_from_string("1111.11.111-SNAPSHOT"))
self.assertEqual("0.11.1", get_version_from_string("0.11.1.0.0.1.patch"))