From abddc018770e45bbefb252d0f1b708bfeeae1fc0 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 7 Sep 2022 09:14:33 -0700 Subject: [PATCH] fix(ingest): fix doc generation import ordering issue with postgres (#5846) Relying on the correct import directly, rather than going through SQLAlchemy's import wrapper (in their dialect.py) allows us to bypass this strange error in doc generation. --- metadata-ingestion/scripts/docgen.py | 14 +++----- .../ingestion/source/sql/sql_common.py | 34 +++++++++++-------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/metadata-ingestion/scripts/docgen.py b/metadata-ingestion/scripts/docgen.py index 4c19845053..53e3733d76 100644 --- a/metadata-ingestion/scripts/docgen.py +++ b/metadata-ingestion/scripts/docgen.py @@ -18,8 +18,7 @@ from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, ) -from datahub.ingestion.api.registry import PluginRegistry -from datahub.ingestion.api.source import Source +from datahub.ingestion.source.source_registry import source_registry logger = logging.getLogger(__name__) @@ -500,11 +499,7 @@ def generate( file_contents, ) - source_registry = PluginRegistry[Source]() - source_registry.register_from_entrypoint("datahub.ingestion.source.plugins") - - # This source is always enabled - for plugin_name in sorted(source_registry._mapping.keys()): + for plugin_name in sorted(source_registry.mapping.keys()): if source and source != plugin_name: continue @@ -526,8 +521,9 @@ def generate( get_additional_deps_for_extra(extra_plugin) if extra_plugin else [] ) except Exception as e: - print(f"Failed to process {plugin_name} due to exception") - print(repr(e)) + logger.warning( + f"Failed to process {plugin_name} due to exception {e}", exc_info=e + ) metrics["plugins"]["failed"] = metrics["plugins"].get("failed", 0) + 1 if source_type and hasattr(source_type, "get_config_class"): diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index e456a66cd3..cfc5d815fc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -22,8 +22,9 @@ from typing import ( from urllib.parse import quote_plus import pydantic +import sqlalchemy.dialects.postgresql.base from pydantic.fields import Field -from sqlalchemy import create_engine, dialects, inspect +from sqlalchemy import create_engine, inspect from sqlalchemy.engine.reflection import Inspector from sqlalchemy.exc import ProgrammingError from sqlalchemy.sql import sqltypes as types @@ -350,20 +351,23 @@ _field_type_mapping: Dict[Type[types.TypeEngine], Type] = { types.DATETIME: TimeTypeClass, types.TIMESTAMP: TimeTypeClass, types.JSON: RecordTypeClass, - dialects.postgresql.base.BYTEA: BytesTypeClass, - dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass, - dialects.postgresql.base.INET: StringTypeClass, - dialects.postgresql.base.MACADDR: StringTypeClass, - dialects.postgresql.base.MONEY: NumberTypeClass, - dialects.postgresql.base.OID: StringTypeClass, - dialects.postgresql.base.REGCLASS: BytesTypeClass, - dialects.postgresql.base.TIMESTAMP: TimeTypeClass, - dialects.postgresql.base.TIME: TimeTypeClass, - dialects.postgresql.base.INTERVAL: TimeTypeClass, - dialects.postgresql.base.BIT: BytesTypeClass, - dialects.postgresql.base.UUID: StringTypeClass, - dialects.postgresql.base.TSVECTOR: BytesTypeClass, - dialects.postgresql.base.ENUM: EnumTypeClass, + # Because the postgresql dialect is used internally by many other dialects, + # we add some postgres types here. This is ok to do because the postgresql + # dialect is built-in to sqlalchemy. + sqlalchemy.dialects.postgresql.base.BYTEA: BytesTypeClass, + sqlalchemy.dialects.postgresql.base.DOUBLE_PRECISION: NumberTypeClass, + sqlalchemy.dialects.postgresql.base.INET: StringTypeClass, + sqlalchemy.dialects.postgresql.base.MACADDR: StringTypeClass, + sqlalchemy.dialects.postgresql.base.MONEY: NumberTypeClass, + sqlalchemy.dialects.postgresql.base.OID: StringTypeClass, + sqlalchemy.dialects.postgresql.base.REGCLASS: BytesTypeClass, + sqlalchemy.dialects.postgresql.base.TIMESTAMP: TimeTypeClass, + sqlalchemy.dialects.postgresql.base.TIME: TimeTypeClass, + sqlalchemy.dialects.postgresql.base.INTERVAL: TimeTypeClass, + sqlalchemy.dialects.postgresql.base.BIT: BytesTypeClass, + sqlalchemy.dialects.postgresql.base.UUID: StringTypeClass, + sqlalchemy.dialects.postgresql.base.TSVECTOR: BytesTypeClass, + sqlalchemy.dialects.postgresql.base.ENUM: EnumTypeClass, # When SQLAlchemy is unable to map a type into its internal hierarchy, it # assigns the NullType by default. We want to carry this warning through. types.NullType: NullTypeClass,