Merge pull request #96 from open-metadata/ingestion_refactor

Ingestion refactor
2025-11-14 01:40:08 +00:00 · 2021-08-11 20:55:14 -07:00 · 2021-08-11 20:55:14 -07:00 · 3857f72c7b
commit 3857f72c7b
parent 51813c77e7 3810d6fb5f
33 changed files with 199 additions and 557 deletions
--- a/ingestion/pipelines/mssql.json
+++ b/ingestion/pipelines/mssql.json
@ -9,7 +9,7 @@
      "username": "sa",
      "password": "test!Password",
      "include_pattern": {
-        "include": ["catalog_test.*"]
+        "excludes": ["catalog_test.*"]
      }
    }
  },
--- a/ingestion/pipelines/postgres.json
+++ b/ingestion/pipelines/postgres.json
@ -7,13 +7,7 @@
      "host_port": "localhost:5432",
      "database": "pagila",
      "service_name": "local_postgres",
-      "service_type": "Postgres",
+      "service_type": "Postgres"
      "include_pattern": {
        "filter": [
          "pg_catalog.*[a-zA-Z0-9]*",
          "information_schema.*[a-zA-Z0-9]*"
        ]
      }
    }
  },
  "processor": {
--- a/ingestion/pipelines/redshift_sql.json
+++ b/ingestion/pipelines/redshift_sql.json
@ -1,37 +0,0 @@
 {
  "source": {
    "type": "redshift-sql",
    "config": {
      "host_port": "cluster.name.region.redshift.amazonaws.com:5439",
      "username": "username",
      "password": "strong_password",
      "database": "dev",
      "service_name": "aws_redshift",
      "service_type": "Redshift"
    }
  },
  "processor": {
    "type": "pii-tags",
    "config": {
    }
  },
  "sink": {
    "type": "metadata-rest-tables",
    "config": {
    }
  },
  "metadata_server": {
    "type": "metadata-server",
    "config": {
      "api_endpoint": "http://localhost:8585/api",
      "auth_provider_type": "no-auth"
    }
  },
  "cron": {
    "minute": "*/5",
    "hour": null,
    "day": null,
    "month": null,
    "day_of_week": null
  }
 }
--- a/ingestion/pipelines/snowflake.json
+++ b/ingestion/pipelines/snowflake.json
@ -10,7 +10,7 @@
      "service_name": "snowflake",
      "service_type": "Snowflake",
      "include_pattern": {
-        "include": [
+        "includes": [
          "(\\w)*.tpcds_sf100tcl.catalog_page",
          "(\\w)*.tpcds_sf100tcl.time_dim",
          "(\\w)*.tpcds_sf10tcl.catalog_page"
--- a/ingestion/requirements.txt
+++ b/ingestion/requirements.txt
@ -0,0 +1,14 @@
 click~=7.1.2
 pydantic~=1.7.4
 expandvars~=0.6.5
 requests~=2.25.1
 python-dateutil~=2.8.1
 SQLAlchemy~=1.4.5
 pandas~=1.2.4
 Faker~=8.1.1
 elasticsearch~=7.12.0
 spacy~=3.0.5
 commonregex~=1.5.4
 setuptools~=57.0.0
 PyHive~=0.6.4
 ldap3~=2.9.1
--- a/ingestion/setup.py
+++ b/ingestion/setup.py
@ -101,19 +101,17 @@ build_options = {"includes": ["_cffi_backend"]}
 setup(
    name="metadata",
-    version=get_version(),
+    version="0.2.0",
-    url="https://github.com/streamlinedata/metadata",
+    url="https://github.com/open-metadata/OpenMetadata",
    author="Metadata Committers",
    license="Apache License 2.0",
    description="Ingestion Framework for  OpenMetadata",
-    long_description="Ingestion Framework for  OpenMetadata",
+    long_description="Ingestion Framework for OpenMetadata",
    long_description_content_type="text/markdown",
    python_requires=">=3.8",
    options={"build_exe": build_options},
    package_dir={"": "src"},
    packages=find_namespace_packages(where='src', exclude=['tests*']),
    dependency_links=['git+git://github.com/djacobs/PyAPNs.git#egg=apns',
                      'git+https://github.com/StreamlineData/sdscheduler.git#egg=simplescheduler'],
    entry_points={
        "console_scripts": ["metadata = metadata.cmd:metadata"],
        "metadata.ingestion.source.plugins": [
--- a/ingestion/src/metadata/check/check_cli.py
+++ b/ingestion/src/metadata/check/check_cli.py
@ -1,20 +0,0 @@
 #  Licensed to the Apache Software Foundation (ASF) under one or more
 #  contributor license agreements. See the NOTICE file distributed with
 #  this work for additional information regarding copyright ownership.
 #  The ASF licenses this file to You under the Apache License, Version 2.0
 #  (the "License"); you may not use this file except in compliance with
 #  the License. You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 import click
@click.group()
 def check() -> None:
    pass
--- a/ingestion/src/metadata/cmd.py
+++ b/ingestion/src/metadata/cmd.py
@ -21,7 +21,6 @@ import sys
 import click
 from pydantic import ValidationError
 from metadata.check.check_cli import check
 from metadata.config.config_loader import load_config_file
 from metadata.ingestion.workflow.workflow import Workflow
@ -35,11 +34,14 @@ BASE_LOGGING_FORMAT = (
 )
 logging.basicConfig(format=BASE_LOGGING_FORMAT)
@click.group()
 def check() -> None:
    pass
@click.group()
@click.option("--debug/--no-debug", default=False)
 def metadata(debug: bool) -> None:
-    if debug or os.getenv("METADATA_DEBUG", False):
+    if os.getenv("METADATA_DEBUG", False):
        logging.getLogger().setLevel(logging.INFO)
        logging.getLogger("metadata").setLevel(logging.DEBUG)
    else:
@ -52,12 +54,11 @@ def metadata(debug: bool) -> None:
    "-c",
    "--config",
    type=click.Path(exists=True, dir_okay=False),
-    help="Config file in .toml or .yaml format",
+    help="Workflow config",
    required=True,
 )
 def ingest(config: str) -> None:
    """Main command for ingesting metadata into Metadata"""
    config_file = pathlib.Path(config)
    workflow_config = load_config_file(config_file)
@ -71,6 +72,7 @@ def ingest(config: str) -> None:
    workflow.execute()
    ret = workflow.print_status()
    workflow.stop()
    sys.exit(ret)
--- a/ingestion/src/metadata/ingestion/api/common.py
+++ b/ingestion/src/metadata/ingestion/api/common.py
@ -42,48 +42,18 @@ class ConfigModel(BaseModel):
 class DynamicTypedConfig(ConfigModel):
    type: str
    # This config type is declared Optional[Any] here. The eventual parser for the
    # specified type is responsible for further validation.
    config: Optional[Any]
-class MetaError(Exception):
+class WorkflowExecutionError(Exception):
    """A base class for all meta exceptions"""
 class WorkflowExecutionError(MetaError):
    """An error occurred when executing the workflow"""
 class OperationalError(WorkflowExecutionError):
    """An error occurred because of client-provided metadata"""
    message: str
    info: dict
    def __init__(self, message: str, info: dict = None):
        self.message = message
        if info:
            self.info = info
        else:
            self.info = {}
 class ConfigurationError(MetaError):
    """A configuration error has happened"""
 class ConfigurationMechanism(ABC):
    @abstractmethod
    def load_config(self, config_fp: IO) -> dict:
        pass
 class IncludeFilterPattern(ConfigModel):
    """A class to store allow deny regexes"""
-    include: List[str] = [".*"]
+    includes: List[str] = [".*"]
-    filter: List[str] = []
+    excludes: List[str] = []
    alphabet: str = "[A-Za-z0-9 _.-]"
    @property
@ -96,11 +66,11 @@ class IncludeFilterPattern(ConfigModel):
    def included(self, string: str) -> bool:
        try:
-            for filter in self.filter:
+            for exclude in self.excludes:
-                if re.match(filter, string):
+                if re.match(exclude, string):
                    return False
-            for include in self.include:
+            for include in self.includes:
                if re.match(include, string):
                    return True
            return False
@ -108,17 +78,11 @@ class IncludeFilterPattern(ConfigModel):
            raise Exception("Regex Error: {}".format(err))
    def is_fully_specified_include_list(self) -> bool:
-        """
+        for include_pattern in self.includes:
        If the allow patterns are literals and not full regexes, then it is considered
        fully specified. This is useful if you want to convert a 'list + filter'
        pattern into a 'search for the ones that are allowed' pattern, which can be
        much more efficient in some cases.
        """
        for include_pattern in self.include:
            if not self.alphabet_pattern.match(include_pattern):
                return False
        return True
    def get_allowed_list(self):
        assert self.is_fully_specified_include_list()
-        return [a for a in self.include if self.included(a)]
+        return [a for a in self.includes if self.included(a)]
--- a/ingestion/src/metadata/ingestion/api/processor.py
+++ b/ingestion/src/metadata/ingestion/api/processor.py
@ -28,7 +28,7 @@ class ProcessorStatus(Status):
    warnings: List[Any] = field(default_factory=list)
    failures: List[Any] = field(default_factory=list)
-    def records_processed(self, record: Record):
+    def processed(self, record: Record):
        self.records += 1
    def warning(self, info: Any) -> None:
--- a/ingestion/src/metadata/ingestion/api/source.py
+++ b/ingestion/src/metadata/ingestion/api/source.py
@ -28,7 +28,7 @@ class SourceStatus(Status):
    warnings: Dict[str, List[str]] = field(default_factory=dict)
    failures: Dict[str, List[str]] = field(default_factory=dict)
-    def records_produced(self, record: Record) -> None:
+    def scanned(self, record: Record) -> None:
        self.records += 1
    def warning(self, key: str, reason: str) -> None:
--- a/ingestion/src/metadata/ingestion/bulksink/metadata_usage_rest.py
+++ b/ingestion/src/metadata/ingestion/bulksink/metadata_usage_rest.py
@ -44,6 +44,7 @@ class MetadataUsageBulkSink(BulkSink):
        self.client = REST(self.metadata_config)
        self.status = BulkSinkStatus()
        self.tables_dict = {}
        self.table_join_dict = {}
        self.__map_tables()
    def __map_tables(self):
@ -74,7 +75,8 @@ class MetadataUsageBulkSink(BulkSink):
                try:
                    self.client.publish_usage_for_a_table(table_entity, table_usage_request)
                except APIError as err:
-                    logger.error("Failed to update usage and query join {}".format(err))
+                    self.status.failures.append(table_usage_request)
                    logger.error("Failed to update usage for {} {}".format(table_usage.table, err))
                table_join_request = self.__get_table_joins(table_usage)
                logger.debug("table join request {}".format(table_join_request))
@ -82,7 +84,8 @@ class MetadataUsageBulkSink(BulkSink):
                    if table_join_request is not None and len(table_join_request.columnJoins) > 0:
                        self.client.publish_frequently_joined_with(table_entity, table_join_request)
                except APIError as err:
-                    logger.error("Failed to update usage and query join {}".format(err))
+                    self.status.failures.append(table_join_request)
                    logger.error("Failed to update query join for {}, {}".format(table_usage.table, err))
            else:
                logger.warning("Table does not exist, skipping usage publish {}, {}".format(table_usage.table,
@ -90,21 +93,32 @@ class MetadataUsageBulkSink(BulkSink):
    def __get_table_joins(self, table_usage):
        table_joins: TableJoins = TableJoins(columnJoins=[], startDate=table_usage.date)
        column_joins_dict = {}
        joined_with = {}
        for column_join in table_usage.joins:
            if column_join.table_column is None or len(column_join.joined_with) == 0:
                continue
-            logger.debug("main column join {}".format(column_join.table_column))
+
            if column_join.table_column.column in column_joins_dict.keys():
                joined_with = column_joins_dict[column_join.table_column.column]
            else:
                column_joins_dict[column_join.table_column.column] = {}
            main_column_fqdn = self.__get_column_fqdn(column_join.table_column)
            logger.debug("main column fqdn join {}".format(main_column_fqdn))
            joined_with = []
            for column in column_join.joined_with:
                logger.debug("joined column {}".format(column))
                joined_column_fqdn = self.__get_column_fqdn(column)
-                logger.debug("joined column fqdn {}".format(joined_column_fqdn))
+                if joined_column_fqdn in joined_with.keys():
-                if joined_column_fqdn is not None:
+                    column_joined_with = joined_with[joined_column_fqdn]
-                    joined_with.append(ColumnJoinedWith(fullyQualifiedName=joined_column_fqdn, joinCount=1))
+                    column_joined_with.joinCount += 1
-            table_joins.columnJoins.append(ColumnJoins(columnName=column_join.table_column.column,
+                    joined_with[joined_column_fqdn] = column_joined_with
-                                                       joinedWith=joined_with))
+                else:
                    joined_with[joined_column_fqdn] = ColumnJoinedWith(fullyQualifiedName=joined_column_fqdn,
                                                                       joinCount=1)
            column_joins_dict[column_join.table_column.column] = joined_with
        for key, value in column_joins_dict.items():
            table_joins.columnJoins.append(ColumnJoins(columnName=key,
                                                       joinedWith=list(value.values())))
        return table_joins
    def __get_column_fqdn(self, table_column: TableColumn):
--- a/ingestion/src/metadata/ingestion/ometa/client.py
+++ b/ingestion/src/metadata/ingestion/ometa/client.py
@ -31,7 +31,7 @@ from metadata.ingestion.models.table_queries import TableUsageRequest, ColumnJoi
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig, AuthenticationProvider, \
    GoogleAuthenticationProvider, NoOpAuthenticationProvider, OktaAuthenticationProvider
 from metadata.ingestion.ometa.credentials import URL, get_api_version
-from metadata.generated.schema.entity.data.table import TableEntity
+from metadata.generated.schema.entity.data.table import TableEntity, TableJoins
 from metadata.generated.schema.entity.data.database import DatabaseEntity
 logger = logging.getLogger(__name__)
@ -296,11 +296,11 @@ class REST(object):
    def publish_usage_for_a_table(self, table: TableEntity, table_usage_request: TableUsageRequest) -> None:
        """publish usage details for a table"""
        resp = self.post('/usage/table/{}'.format(table.id.__root__), data=table_usage_request.json())
-        # self.post('/usage/compute.percentile/table/{}'.format(table.id.__root__), table_usage_request.date)
+        logger.debug("published table usage {}".format(resp))
-    def publish_frequently_joined_with(self, table: TableEntity, table_join_request: ColumnJoinsList) -> None:
+    def publish_frequently_joined_with(self, table: TableEntity, table_join_request: TableJoins) -> None:
        """publish frequently joined with for a table"""
-        print(table_join_request.json())
+        logger.debug(table_join_request.json())
        logger.info("table join request {}".format(table_join_request.json()))
        resp = self.put('/tables/{}/joins'.format(table.id.__root__), data=table_join_request.json())
        logger.debug("published frequently joined with {}".format(resp))
--- a/ingestion/src/metadata/ingestion/processor/query_parser.py
+++ b/ingestion/src/metadata/ingestion/processor/query_parser.py
@ -52,7 +52,7 @@ class QueryParserProcessor(Processor):
        try:
            start_date = datetime.datetime.strptime(record.analysis_date, '%Y-%m-%d %H:%M:%S').date()
            parser = Parser(record.sql)
-            columns_dict = {} if parser.columns_dict == None else parser.columns_dict
+            columns_dict = {} if parser.columns_dict is None else parser.columns_dict
            query_parser_data = QueryParserData(tables=parser.tables,
                                                tables_aliases=parser.tables_aliases,
                                                columns=columns_dict,
@ -60,8 +60,8 @@ class QueryParserProcessor(Processor):
                                                sql=record.sql,
                                                date=start_date.strftime('%Y-%m-%d'))
        except Exception as err:
-            logger.error(record.sql)
+            logger.debug(record.sql)
-            logger.error(err)
+            logger.debug(err)
            query_parser_data = None
            pass
--- a/ingestion/src/metadata/ingestion/source/athena.py
+++ b/ingestion/src/metadata/ingestion/source/athena.py
@ -16,11 +16,11 @@
 from typing import Optional
 from urllib.parse import quote_plus
-from .sql_source import SQLAlchemyConfig, SQLAlchemySource
+from .sql_source import SQLConnectionConfig, SQLSource
 from ..ometa.auth_provider import MetadataServerConfig
-class AthenaConfig(SQLAlchemyConfig):
+class AthenaConfig(SQLConnectionConfig):
    scheme: str = "awsathena+rest"
    username: Optional[str] = None
    password: Optional[str] = None
@ -29,7 +29,7 @@ class AthenaConfig(SQLAlchemyConfig):
    s3_staging_dir: str
    work_group: str
-    def get_sql_alchemy_url(self):
+    def get_connection_url(self):
        url = f"{self.scheme}://"
        if self.username:
            url += f"{quote_plus(self.username)}"
@ -46,9 +46,9 @@ class AthenaConfig(SQLAlchemyConfig):
        return url
-class AthenaSource(SQLAlchemySource):
+class AthenaSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
-        super().__init__(config, metadata_config, ctx, "athena")
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
--- a/ingestion/src/metadata/ingestion/source/bigquery.py
+++ b/ingestion/src/metadata/ingestion/source/bigquery.py
@ -17,26 +17,32 @@ from typing import Optional, Tuple
 # This import verifies that the dependencies are available.
-from .sql_source import BasicSQLAlchemyConfig, SQLAlchemySource
+from .sql_source import SQLConnectionConfig, SQLSource
 from ..ometa.auth_provider import MetadataServerConfig
-class BigQueryConfig(BasicSQLAlchemyConfig):
+class BigQueryConfig(SQLConnectionConfig, SQLSource):
    scheme = "bigquery"
    project_id: Optional[str] = None
-    def get_sql_alchemy_url(self):
+    def get_connection_url(self):
        if self.project_id:
            return f"{self.scheme}://{self.project_id}"
        return f"{self.scheme}://"
-    def get_identifier(self, schema: str, table: str) -> str:
+
-        if self.project_id:
+class BigQuerySource(SQLSource):
-            return f"{self.project_id}.{schema}.{table}"
+    def __init__(self, config, metadata_config, ctx):
-        return f"{schema}.{table}"
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
        config = BigQueryConfig.parse_obj(config_dict)
        metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
        return cls(config, metadata_config, ctx)
    def standardize_schema_table_names(
-        self, schema: str, table: str
+            self, schema: str, table: str
    ) -> Tuple[str, str]:
        segments = table.split(".")
        if len(segments) != 2:
@ -44,14 +50,3 @@ class BigQueryConfig(BasicSQLAlchemyConfig):
        if segments[0] != schema:
            raise ValueError(f"schema {schema} does not match table {table}")
        return segments[0], segments[1]
 class BigQuerySource(SQLAlchemySource):
    def __init__(self, config, metadata_config, ctx):
        super().__init__(config, metadata_config, ctx, "bigquery")
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
        config = BigQueryConfig.parse_obj(config_dict)
        metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
        return cls(config, metadata_config, ctx)
--- a/ingestion/src/metadata/ingestion/source/hive.py
+++ b/ingestion/src/metadata/ingestion/source/hive.py
@ -17,8 +17,8 @@ from pyhive import hive  # noqa: F401
 from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveTimestamp
 from .sql_source import (
-    BasicSQLAlchemyConfig,
+    SQLConnectionConfig,
-    SQLAlchemySource,
+    SQLSource,
    register_custom_type,
 )
 from ..ometa.auth_provider import MetadataServerConfig
@ -28,13 +28,16 @@ register_custom_type(HiveTimestamp, "TIME")
 register_custom_type(HiveDecimal, "NUMBER")
-class HiveConfig(BasicSQLAlchemyConfig):
+class HiveConfig(SQLConnectionConfig):
    scheme = "hive"
    def get_connection_url(self):
        return super().get_connection_url()
-class HiveSource(SQLAlchemySource):
+
 class HiveSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
-        super().__init__(config, metadata_config, ctx, "hive")
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
--- a/ingestion/src/metadata/ingestion/source/mssql.py
+++ b/ingestion/src/metadata/ingestion/source/mssql.py
@ -16,24 +16,21 @@
 # This import verifies that the dependencies are available.
 import sqlalchemy_pytds  # noqa: F401
-from .sql_source import BasicSQLAlchemyConfig, SQLAlchemySource
+from .sql_source import SQLConnectionConfig, SQLSource
 from ..ometa.auth_provider import MetadataServerConfig
-class SQLServerConfig(BasicSQLAlchemyConfig):
+class SQLServerConfig(SQLConnectionConfig):
    host_port = "localhost:1433"
    scheme = "mssql+pytds"
-    def get_identifier(self, schema: str, table: str) -> str:
+    def get_connection_url(self):
-        regular = f"{schema}.{table}"
+        return super().get_connection_url()
        if self.database:
            return f"{self.database}.{regular}"
        return regular
-class SQLServerSource(SQLAlchemySource):
+class SQLServerSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
-        super().__init__(config, metadata_config, ctx, "mssql")
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
--- a/ingestion/src/metadata/ingestion/source/mysql.py
+++ b/ingestion/src/metadata/ingestion/source/mysql.py
@ -13,19 +13,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-import pymysql  # noqa: F401
+from .sql_source import SQLSource, SQLConnectionConfig
 from .sql_source import BasicSQLAlchemyConfig, SQLAlchemySource
 from ..ometa.auth_provider import MetadataServerConfig
-class MySQLConfig(BasicSQLAlchemyConfig):
+class MySQLConfig(SQLConnectionConfig):
    # defaults
    host_port = "localhost:3306"
    scheme = "mysql+pymysql"
    def get_connection_url(self):
        return super().get_connection_url()
-class MySQLSource(SQLAlchemySource):
+class MySQLSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
        super().__init__(config, metadata_config, ctx)
--- a/ingestion/src/metadata/ingestion/source/oracle.py
+++ b/ingestion/src/metadata/ingestion/source/oracle.py
@ -16,18 +16,18 @@
 # This import verifies that the dependencies are available.
 import cx_Oracle  # noqa: F401
-from .sql_source import BasicSQLAlchemyConfig, SQLAlchemySource
+from .sql_source import SQLSource, SQLConnectionConfig
 from ..ometa.auth_provider import MetadataServerConfig
-class OracleConfig(BasicSQLAlchemyConfig):
+class OracleConfig(SQLConnectionConfig):
    # defaults
    scheme = "oracle+cx_oracle"
-class OracleSource(SQLAlchemySource):
+class OracleSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
-        super().__init__(config, metadata_config, ctx, "oracle")
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
--- a/ingestion/src/metadata/ingestion/source/postgres.py
+++ b/ingestion/src/metadata/ingestion/source/postgres.py
@ -24,8 +24,8 @@ from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
 import pymysql  # noqa: F401
 from metadata.generated.schema.entity.data.table import TableEntity, Column
-from metadata.ingestion.source.sql_source_common import SQLAlchemyHelper, SQLSourceStatus
+from metadata.ingestion.source.sql_alchemy_helper import SQLAlchemyHelper, SQLSourceStatus
-from .sql_source import BasicSQLAlchemyConfig
+from .sql_source import SQLConnectionConfig
 from metadata.ingestion.api.source import Source, SourceStatus
 from metadata.ingestion.models.table_metadata import DatabaseMetadata
 from itertools import groupby
@ -38,27 +38,18 @@ from ...utils.helpers import get_service_or_create
 TableKey = namedtuple('TableKey', ['schema', 'table_name'])
-class PostgresSourceConfig(BasicSQLAlchemyConfig):
+class PostgresSourceConfig(SQLConnectionConfig):
    # defaults
    scheme = "postgresql+psycopg2"
    service_name = "postgres"
    service_type = "POSTGRES"
    def get_sql_alchemy_url(self):
        url = f"{self.scheme}://"
        if self.username:
            url += f"{self.username}"
            if self.password:
                url += f":{self.password}"
            url += "@"
        url += f"{self.host_port}"
        if self.database:
            url += f"/{self.database}"
        return url
    def get_service_type(self) -> DatabaseServiceType:
        return DatabaseServiceType[self.service_type]
    def get_connection_url(self):
        return super().get_connection_url()
 def get_table_key(row: Dict[str, Any]) -> Union[TableKey, None]:
    """
@ -73,7 +64,6 @@ def get_table_key(row: Dict[str, Any]) -> Union[TableKey, None]:
 class PostgresSource(Source):
    # SELECT statement from mysql information_schema to extract table and column metadata
    SQL_STATEMENT = """
            SELECT
          c.table_catalog as cluster, c.table_schema as schema, c.table_name as name, pgtd.description as description
@ -106,7 +96,7 @@ class PostgresSource(Source):
        self.status = SQLSourceStatus()
        self.service = get_service_or_create(config, metadata_config)
        self.include_pattern = IncludeFilterPattern
-        self.pattern = config.include_pattern
+        self.pattern = config
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
@ -131,7 +121,6 @@ class PostgresSource(Source):
                Using itertools.groupby and raw level iterator, it groups to table and yields TableMetadata
                :return:
                """
        counter = 0
        for key, group in groupby(self._get_raw_extract_iter(), get_table_key):
            columns = []
            for row in group:
@ -139,7 +128,7 @@ class PostgresSource(Source):
                col_type = ''
                if row['col_type'].upper() == 'CHARACTER VARYING':
                    col_type = 'VARCHAR'
-                elif row['col_type'].upper() == 'CHARACTER':
+                elif row['col_type'].upper() == 'CHARACTER' or row['col_type'].upper() == 'NAME':
                    col_type = 'CHAR'
                elif row['col_type'].upper() == 'INTEGER':
                    col_type = 'INT'
@ -149,28 +138,29 @@ class PostgresSource(Source):
                    col_type = 'DOUBLE'
                elif row['col_type'].upper() == 'OID':
                    col_type = 'NUMBER'
-                elif row['col_type'].upper() == 'NAME':
+                elif row['col_type'].upper() == 'ARRAY':
-                    col_type = 'CHAR'
+                    col_type = 'ARRAY'
                elif row['col_type'].upper() == 'BOOLEAN':
                    col_type = 'BOOLEAN'
                else:
-                    col_type = row['col_type'].upper()
+                    col_type = None
-                if not self.include_pattern.included(self.pattern, last_row[1]):
+                if not self.pattern.include_pattern.included(f'{last_row[1]}.{last_row[2]}'):
-                    self.status.report_dropped(last_row['name'])
+                    self.status.filtered(f'{last_row[1]}.{last_row[2]}', "pattern not allowed", last_row[2])
                    continue
-                columns.append(Column(name=row['col_name'], description=row['col_description'],
+                if col_type is not None:
-                                      columnDataType=col_type, ordinalPosition=int(row['col_sort_order'])))
+                    columns.append(Column(name=row['col_name'], description=row['col_description'],
-
+                                          columnDataType=col_type, ordinalPosition=int(row['col_sort_order'])))
            table_metadata = TableEntity(name=last_row['name'],
                                         description=last_row['description'],
                                         columns=columns)
-            self.status.report_table_scanned(table_metadata.name)
+            self.status.scanned(table_metadata.name.__root__)
            dm = DatabaseEntity(id=uuid.uuid4(),
                                name=row['schema'],
                                description=row['description'] if row['description'] is not None else ' ',
                                service=EntityReference(id=self.service.id, type=self.SERVICE_TYPE))
            table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=dm)
            self.status.records_produced(dm)
            yield table_and_db
    def close(self):
--- a/ingestion/src/metadata/ingestion/source/redshift.py
+++ b/ingestion/src/metadata/ingestion/source/redshift.py
@ -17,13 +17,13 @@ import logging
 from typing import Optional
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig
-from metadata.ingestion.source.sql_source import SQLAlchemySource, BasicSQLAlchemyConfig
+from metadata.ingestion.source.sql_source import SQLSource, SQLConnectionConfig
 from metadata.ingestion.api.source import SourceStatus
 logger = logging.getLogger(__name__)
-class RedshiftConfig(BasicSQLAlchemyConfig):
+class RedshiftConfig(SQLConnectionConfig):
    scheme = "postgresql+psycopg2"
    where_clause: Optional[str] = None
    duration: int = 1
@ -34,8 +34,11 @@ class RedshiftConfig(BasicSQLAlchemyConfig):
            return f"{self.database}.{regular}"
        return regular
    def get_connection_url(self):
        return super().get_connection_url()
-class RedshiftSource(SQLAlchemySource):
+
 class RedshiftSource(SQLSource):
    def __init__(self, config, metadata_config, ctx):
        super().__init__(config, metadata_config, ctx)
--- a/ingestion/src/metadata/ingestion/source/redshift_sql.py
+++ b/ingestion/src/metadata/ingestion/source/redshift_sql.py
@ -1,200 +0,0 @@
 #  Licensed to the Apache Software Foundation (ASF) under one or more
 #  contributor license agreements. See the NOTICE file distributed with
 #  this work for additional information regarding copyright ownership.
 #  The ASF licenses this file to You under the Apache License, Version 2.0
 #  (the "License"); you may not use this file except in compliance with
 #  the License. You may obtain a copy of the License at
 #
 #  http://www.apache.org/licenses/LICENSE-2.0
 #
 #  Unless required by applicable law or agreed to in writing, software
 #  distributed under the License is distributed on an "AS IS" BASIS,
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 # This import verifies that the dependencies are available.
 import logging
 import uuid
 import pymysql  # noqa: F401
 from pydantic import ValidationError
 from metadata.generated.schema.entity.data.table import Column, TableEntity
 from metadata.generated.schema.entity.data.database import DatabaseEntity
 from metadata.generated.schema.type.entityReference import EntityReference
 from metadata.ingestion.models.ometa_table_db import OMetaDatabaseAndTable
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig
 from metadata.ingestion.source.sql_source_common import BasicSQLQueryConfig, SQLAlchemyHelper, SQLSourceStatus
 from metadata.ingestion.api.source import Source, SourceStatus
 from itertools import groupby
 from typing import Iterator, Union, Dict, Any, Iterable
 from collections import namedtuple
 from metadata.utils.helpers import get_service_or_create
 TableKey = namedtuple('TableKey', ['schema', 'table_name'])
 class RedshiftConfig(BasicSQLQueryConfig):
    scheme = "redshift"
    where_clause: str = None
    cluster_source: str = "CURRENT_DATABASE()"
    api_endpoint: str = None
    service_type: str = "REDSHIFT"
    service_name: str = "aws_redshift"
 def get_table_key(row: Dict[str, Any]) -> Union[TableKey, None]:
    """
    Table key consists of schema and table name
    :param row:
    :return:
    """
    if row:
        return TableKey(schema=row['schema'], table_name=row['name'])
    return None
 logger = logging.getLogger(__name__)
 class RedshiftSQLSource(Source):
    # SELECT statement from mysql information_schema to extract table and column metadata
    SQL_STATEMENT = """
       SELECT
            *
        FROM (
            SELECT
              {cluster_source} as cluster,
              c.table_schema as schema,
              c.table_name as name,
              pgtd.description as description,
              c.column_name as col_name,
              c.data_type as col_type,
              pgcd.description as col_description,
              ordinal_position as col_sort_order
            FROM INFORMATION_SCHEMA.COLUMNS c
            INNER JOIN
              pg_catalog.pg_statio_all_tables as st on c.table_schema=st.schemaname and c.table_name=st.relname
            LEFT JOIN
              pg_catalog.pg_description pgcd on pgcd.objoid=st.relid and pgcd.objsubid=c.ordinal_position
            LEFT JOIN
              pg_catalog.pg_description pgtd on pgtd.objoid=st.relid and pgtd.objsubid=0
            UNION
            SELECT
              {cluster_source} as cluster,
              view_schema as schema,
              view_name as name,
              NULL as description,
              column_name as col_name,
              data_type as col_type,
              NULL as col_description,
              ordinal_position as col_sort_order
            FROM
                PG_GET_LATE_BINDING_VIEW_COLS()
                    COLS(view_schema NAME, view_name NAME, column_name NAME, data_type VARCHAR, ordinal_position INT)
        )
        {where_clause_suffix}
        ORDER by cluster, schema, name, col_sort_order ;
        """
    # CONFIG KEYS
    WHERE_CLAUSE_SUFFIX_KEY = 'where_clause'
    CLUSTER_SOURCE = 'cluster_source'
    CLUSTER_KEY = 'cluster_key'
    USE_CATALOG_AS_CLUSTER_NAME = 'use_catalog_as_cluster_name'
    DATABASE_KEY = 'database_key'
    SERVICE_TYPE = 'REDSHIFT'
    DEFAULT_CLUSTER_SOURCE = 'CURRENT_DATABASE()'
    def __init__(self, config, metadata_config, ctx):
        super().__init__(ctx)
        self.sql_stmt = RedshiftSQLSource.SQL_STATEMENT.format(
            where_clause_suffix=config.where_clause,
            cluster_source=config.cluster_source,
            database=config.database
        )
        self.alchemy_helper = SQLAlchemyHelper(config, metadata_config, ctx, "Redshift", self.sql_stmt)
        self.config = config
        self.metadata_config = metadata_config
        self._extract_iter: Union[None, Iterator] = None
        self._database = 'redshift'
        self.report = SQLSourceStatus()
        self.service = get_service_or_create(config, metadata_config)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
        config = RedshiftConfig.parse_obj(config_dict)
        metadata_config = MetadataServerConfig.parse_obj(metadata_config_dict)
        return cls(config, metadata_config, ctx)
    def prepare(self):
        pass
    def _get_raw_extract_iter(self) -> Iterable[Dict[str, Any]]:
        """
        Provides iterator of result row from SQLAlchemy helper
        :return:
        """
        rows = self.alchemy_helper.execute_query()
        for row in rows:
            yield row
    def next_record(self) -> Iterable[OMetaDatabaseAndTable]:
        """
                Using itertools.groupby and raw level iterator, it groups to table and yields TableMetadata
                :return:
                """
        for key, group in groupby(self._get_raw_extract_iter(), get_table_key):
            try:
                columns = []
                for row in group:
                    last_row = row
                    col_type = ''
                    if row['col_type'].upper() == 'CHARACTER VARYING':
                        col_type = 'VARCHAR'
                    elif row['col_type'].upper() == 'CHARACTER':
                        col_type = 'CHAR'
                    elif row['col_type'].upper() == 'INTEGER':
                        col_type = 'INT'
                    elif row['col_type'].upper() == 'TIMESTAMP WITHOUT TIME ZONE':
                        col_type = 'TIMESTAMP'
                    elif row['col_type'].upper() == 'DOUBLE PRECISION':
                        col_type = 'DOUBLE'
                    elif row['col_type'].upper() == 'OID':
                        col_type = 'NUMBER'
                    elif row['col_type'].upper() == 'NAME':
                        col_type = 'CHAR'
                    else:
                        col_type = row['col_type'].upper()
                    columns.append(Column(name=row['col_name'], description=row['col_description'],
                                          columnDataType=col_type,
                                          ordinalPosition=int(row['col_sort_order'])))
                db = DatabaseEntity(id=uuid.uuid4(),
                                    name=last_row['schema'],
                                    description=last_row['description'] if last_row['description'] is not None else ' ',
                                    service=EntityReference(id=self.service.id, type=self.config.service_type))
                table = TableEntity(name=last_row['name'],
                                    columns=columns)
                table_and_db = OMetaDatabaseAndTable(table=table, database=db)
                self.report.report_table_scanned(table.name)
                self.report.records_produced(table.name)
                yield table_and_db
            except ValidationError as err:
                logger.info("Dropped Table {} due to {}".format(row['name'], err))
                self.report.report_dropped(row['name'])
                continue
    def get_report(self):
        return self.report
    def close(self):
        self.alchemy_helper.close()
    def get_status(self) -> SourceStatus:
        return self.report
--- a/ingestion/src/metadata/ingestion/source/redshift_usage.py
+++ b/ingestion/src/metadata/ingestion/source/redshift_usage.py
@ -17,7 +17,7 @@
 import logging
 from metadata.ingestion.models.table_queries import TableQuery
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig
-from metadata.ingestion.source.sql_source_common import SQLAlchemyHelper, SQLSourceStatus
+from metadata.ingestion.source.sql_alchemy_helper import SQLAlchemyHelper, SQLSourceStatus
 from metadata.ingestion.api.source import Source, SourceStatus
 from typing import Iterator, Union, Dict, Any, Iterable
 from metadata.utils.helpers import get_start_and_end
@ -99,8 +99,8 @@ class RedshiftUsageSource(Source):
                """
        for row in self._get_raw_extract_iter():
            tq = TableQuery(row['query'], row['label'], row['userid'], row['xid'], row['pid'], str(row['starttime']),
-                            str(row['endtime']), str(row['analysis_date']), row['duration'], row['database'], row['aborted'], row['sql'])
+                            str(row['endtime']), str(row['analysis_date']), row['duration'], row['database'],
-            self.status.records_produced(tq)
+                            row['aborted'], row['sql'])
            yield tq
    def close(self):
--- a/ingestion/src/metadata/ingestion/source/sample_data_generator.py
+++ b/ingestion/src/metadata/ingestion/source/sample_data_generator.py
@ -293,8 +293,7 @@ class SampleTableSource(Source):
        for table in self.tables['tables']:
            table_metadata = TableEntity(**table)
            table_and_db = OMetaDatabaseAndTable(table=table_metadata, database=db)
-            self.status.report_table_scanned(table_metadata.name.__root__)
+            self.status.scanned(table_metadata.name.__root__)
            self.status.records_produced(table_metadata.name.__root__)
            yield table_and_db
    def close(self):
--- a/ingestion/src/metadata/ingestion/source/snowflake.py
+++ b/ingestion/src/metadata/ingestion/source/snowflake.py
@ -15,12 +15,11 @@
 from typing import Optional
 import snowflake.sqlalchemy
 from snowflake.sqlalchemy import custom_types
 from .sql_source import (
-    BasicSQLAlchemyConfig,
+    SQLConnectionConfig,
-    SQLAlchemySource,
+    SQLSource,
    register_custom_type,
 )
 from ..ometa.auth_provider import MetadataServerConfig
@ -30,7 +29,7 @@ register_custom_type(custom_types.TIMESTAMP_LTZ, "TIME")
 register_custom_type(custom_types.TIMESTAMP_NTZ, "TIME")
-class SnowflakeConfig(BasicSQLAlchemyConfig):
+class SnowflakeConfig(SQLConnectionConfig):
    scheme = "snowflake"
    account: str
    database: str  # database is required
@ -38,8 +37,8 @@ class SnowflakeConfig(BasicSQLAlchemyConfig):
    role: Optional[str]
    duration: Optional[int]
-    def get_sql_alchemy_url(self):
+    def get_connection_url(self):
-        connect_string = super().get_sql_alchemy_url()
+        connect_string = super().get_connection_url()
        options = {
            "account": self.account,
            "warehouse": self.warehouse,
@ -50,14 +49,10 @@ class SnowflakeConfig(BasicSQLAlchemyConfig):
            connect_string = f"{connect_string}?{params}"
        return connect_string
    def get_identifier(self, schema: str, table: str) -> str:
        regular = super().get_identifier(schema, table)
        return f"{self.database}.{regular}"
-
+class SnowflakeSource(SQLSource):
 class SnowflakeSource(SQLAlchemySource):
    def __init__(self, config, metadata_config, ctx):
-        super().__init__(config, metadata_config, ctx, "snowflake")
+        super().__init__(config, metadata_config, ctx)
    @classmethod
    def create(cls, config_dict, metadata_config_dict, ctx):
--- a/ingestion/src/metadata/ingestion/source/snowflake_usage.py
+++ b/ingestion/src/metadata/ingestion/source/snowflake_usage.py
@ -16,7 +16,7 @@
 # This import verifies that the dependencies are available.
 from metadata.ingestion.models.table_queries import TableQuery
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig
-from metadata.ingestion.source.sql_source_common import SQLAlchemyHelper, SQLSourceStatus
+from metadata.ingestion.source.sql_alchemy_helper import SQLAlchemyHelper, SQLSourceStatus
 from metadata.ingestion.api.source import Source, SourceStatus
 from typing import Iterator, Union, Dict, Any, Iterable
@ -83,7 +83,7 @@ class SnowflakeUsageSource(Source):
        for row in self._get_raw_extract_iter():
            tq = TableQuery(row['query'], row['label'], 0, 0, 0, str(row['starttime']),
                            str(row['endtime']), str(row['starttime'])[0:19], 2, row['database'], 0, row['sql'])
-            self.report.records_produced(tq)
+            self.report.scanned(tq)
            yield tq
    def get_report(self):
--- a/ingestion/src/metadata/ingestion/source/sql_alchemy_helper.py
+++ b/ingestion/src/metadata/ingestion/source/sql_alchemy_helper.py
@ -13,71 +13,18 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-from abc import abstractmethod
+
-from metadata.config.common import ConfigModel
+from typing import Any, Iterable
 from typing import Any, Iterable, List, Optional, Tuple
 from dataclasses import dataclass, field
 from metadata.ingestion.api.common import WorkflowContext
 from metadata.ingestion.api.source import SourceStatus
 from sqlalchemy import create_engine
-
+from .sql_source import SQLConnectionConfig, SQLSourceStatus
 from metadata.ingestion.ometa.auth_provider import MetadataServerConfig
@dataclass
 class SQLSourceStatus(SourceStatus):
    tables_scanned = 0
    filtered: List[str] = field(default_factory=list)
    def report_table_scanned(self, table_name: str) -> None:
        self.tables_scanned += 1
    def report_dropped(self, table_name: str) -> None:
        self.filtered.append(table_name)
 class SQLAlchemyConfig(ConfigModel):
    options: dict = {}
    @abstractmethod
    def get_sql_alchemy_url(self):
        pass
    def get_identifier(self, schema: str, table: str) -> str:
        return f"{schema}.{table}"
    def standardize_schema_table_names(
            self, schema: str, table: str
    ) -> Tuple[str, str]:
        # Some SQLAlchemy dialects need a standardization step to clean the schema
        # and table names. See BigQuery for an example of when this is useful.
        return schema, table
 class BasicSQLQueryConfig(SQLAlchemyConfig):
    username: Optional[str] = None
    password: Optional[str] = None
    host_port: str
    database: Optional[str] = None
    scheme: str
    def get_sql_alchemy_url(self):
        url = f"{self.scheme}://"
        if self.username:
            url += f"{self.username}"
            if self.password:
                url += f":{self.password}"
            url += "@"
        url += f"{self.host_port}"
        if self.database:
            url += f"/{self.database}"
        return url
 class SQLAlchemyHelper:
    """A helper class for all SQL Sources that use SQLAlchemy to extend"""
-    def __init__(self, config: SQLAlchemyConfig, metadata_config: MetadataServerConfig,
+    def __init__(self, config: SQLConnectionConfig, metadata_config: MetadataServerConfig,
                 ctx: WorkflowContext, platform: str, query: str):
        self.config = config
        self.platform = platform
@ -89,7 +36,7 @@ class SQLAlchemyHelper:
        """
        Create a SQLAlchemy connection to Database
        """
-        engine = create_engine(self.config.get_sql_alchemy_url())
+        engine = create_engine(self.config.get_connection_url())
        conn = engine.connect()
        return conn
--- a/ingestion/src/metadata/ingestion/source/sql_source.py
+++ b/ingestion/src/metadata/ingestion/source/sql_source.py
@ -44,40 +44,20 @@ logger: logging.Logger = logging.getLogger(__name__)
@dataclass
 class SQLSourceStatus(SourceStatus):
-    tables_scanned: List[str] = field(default_factory=list)
+    success: List[str] = field(default_factory=list)
-    filtered: List[str] = field(default_factory=list)
+    failures: List[str] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)
-    def report_table_scanned(self, table_name: str) -> None:
+    def scanned(self, table_name: str) -> None:
-        self.tables_scanned.append(table_name)
+        self.success.append(table_name)
        logger.info('Table Scanned: {}'.format(table_name))
-    def report_dropped(self, table_name: str, err: str, dataset_name: str = None, col_type: str = None) -> None:
+    def filtered(self, table_name: str, err: str, dataset_name: str = None, col_type: str = None) -> None:
-        self.filtered.append(table_name)
+        self.warnings.append(table_name)
-        logger.error("Dropped Table {} due to {}".format(dataset_name, err))
+        logger.warning("Dropped Table {} due to {}".format(dataset_name, err))
        logger.error("column type {}".format(col_type))
-class SQLAlchemyConfig(ConfigModel):
+class SQLConnectionConfig(ConfigModel):
    env: str = "PROD"
    options: dict = {}
    include_pattern: IncludeFilterPattern
    @abstractmethod
    def get_sql_alchemy_url(self):
        pass
    def get_identifier(self, schema: str, table: str) -> str:
        return f"{schema}.{table}"
    def standardize_schema_table_names(
            self, schema: str, table: str
    ) -> Tuple[str, str]:
        # Some SQLAlchemy dialects need a standardization step to clean the schema
        # and table names. See BigQuery for an example of when this is useful.
        return schema, table
 class BasicSQLAlchemyConfig(SQLAlchemyConfig):
    username: Optional[str] = None
    password: Optional[str] = None
    host_port: str
@ -85,8 +65,11 @@ class BasicSQLAlchemyConfig(SQLAlchemyConfig):
    scheme: str
    service_name: str
    service_type: str
    options: dict = {}
    include_pattern: IncludeFilterPattern = IncludeFilterPattern.allow_all()
-    def get_sql_alchemy_url(self):
+    @abstractmethod
    def get_connection_url(self):
        url = f"{self.scheme}://"
        if self.username:
            url += f"{self.username}"
@ -101,8 +84,11 @@ class BasicSQLAlchemyConfig(SQLAlchemyConfig):
    def get_service_type(self) -> DatabaseServiceType:
        return DatabaseServiceType[self.service_type]
    def get_service_name(self) -> str:
        return self.service_name
-_field_type_mapping: Dict[Type[types.TypeEngine], str] = {
+
 _column_type_mapping: Dict[Type[types.TypeEngine], str] = {
    types.Integer: "INT",
    types.Numeric: "INT",
    types.Boolean: "BOOLEAN",
@ -123,7 +109,7 @@ _field_type_mapping: Dict[Type[types.TypeEngine], str] = {
    types.JSON: "JSON"
 }
-_known_unknown_field_types: Set[Type[types.TypeEngine]] = {
+_known_unknown_column_types: Set[Type[types.TypeEngine]] = {
    types.Interval,
    types.CLOB,
 }
@ -133,25 +119,25 @@ def register_custom_type(
        tp: Type[types.TypeEngine], output: str = None
 ) -> None:
    if output:
-        _field_type_mapping[tp] = output
+        _column_type_mapping[tp] = output
    else:
-        _known_unknown_field_types.add(tp)
+        _known_unknown_column_types.add(tp)
-def get_column_type(sql_report: SQLSourceStatus, dataset_name: str, column_type: Any) -> str:
+def get_column_type(status: SQLSourceStatus, dataset_name: str, column_type: Any) -> str:
    type_class: Optional[str] = None
-    for sql_type in _field_type_mapping.keys():
+    for sql_type in _column_type_mapping.keys():
        if isinstance(column_type, sql_type):
-            type_class = _field_type_mapping[sql_type]
+            type_class = _column_type_mapping[sql_type]
            break
    if type_class is None:
-        for sql_type in _known_unknown_field_types:
+        for sql_type in _known_unknown_column_types:
            if isinstance(column_type, sql_type):
                type_class = "NULL"
                break
    if type_class is None:
-        sql_report.warning(
+        status.warning(
            dataset_name, f"unable to map type {column_type!r} to metadata schema"
        )
        type_class = "NULL"
@ -159,10 +145,10 @@ def get_column_type(sql_report: SQLSourceStatus, dataset_name: str, column_type:
    return type_class
-class SQLAlchemySource(Source):
+class SQLSource(Source):
-    def __init__(self, config: SQLAlchemyConfig, metadata_config: MetadataServerConfig,
+    def __init__(self, config: SQLConnectionConfig, metadata_config: MetadataServerConfig,
-                 ctx: WorkflowContext, connector: str = None):
+                 ctx: WorkflowContext):
        super().__init__(ctx)
        self.config = config
        self.metadata_config = metadata_config
@ -176,20 +162,25 @@ class SQLAlchemySource(Source):
    def create(cls, config_dict: dict, metadata_config_dict: dict, ctx: WorkflowContext):
        pass
    def standardize_schema_table_names(
            self, schema: str, table: str
    ) -> Tuple[str, str]:
        return schema, table
    def next_record(self) -> Iterable[OMetaDatabaseAndTable]:
        sql_config = self.config
-        url = sql_config.get_sql_alchemy_url()
+        url = sql_config.get_connection_url()
        logger.debug(f"sql_alchemy_url={url}")
        engine = create_engine(url, **sql_config.options)
        inspector = inspect(engine)
        for schema in inspector.get_schema_names():
            if not sql_config.include_pattern.included(schema):
-                self.status.report_dropped(schema, "Schema pattern not allowed")
+                self.status.filtered(schema, "Schema pattern not allowed")
                continue
            logger.debug("total tables {}".format(inspector.get_table_names(schema)))
            for table in inspector.get_table_names(schema):
                try:
-                    schema, table = sql_config.standardize_schema_table_names(schema, table)
+                    schema, table = self.standardize_schema_table_names(schema, table)
                    pk_constraints = inspector.get_pk_constraint(table, schema)
                    pk_columns = pk_constraints['column_constraints'] if len(
                        pk_constraints) > 0 and "column_constraints" in pk_constraints.keys() else {}
@ -203,11 +194,11 @@ class SQLAlchemySource(Source):
                        if 'column_names' in constraint.keys():
                            unique_columns = constraint['column_names']
-                    dataset_name = sql_config.get_identifier(schema, table)
+                    dataset_name = f"{schema}.{table}"
-                    self.status.report_table_scanned('{}.{}'.format(self.config.service_name, dataset_name))
+                    self.status.scanned('{}.{}'.format(self.config.get_service_name(), dataset_name))
                    if not sql_config.include_pattern.included(dataset_name):
-                        self.status.report_dropped('{}.{}'.format(self.config.service_name, dataset_name),
+                        self.status.filtered('{}.{}'.format(self.config.get_service_name(), dataset_name),
-                                                   "Table pattern not allowed")
+                                             "Table pattern not allowed")
                        continue
                    columns = inspector.get_columns(table, schema)
@ -216,15 +207,9 @@ class SQLAlchemySource(Source):
                        table_info: dict = inspector.get_table_comment(table, schema)
                    except NotImplementedError:
                        description: Optional[str] = None
                        properties: Dict[str, str] = {}
                    else:
                        description = table_info["text"]
                    # The "properties" field is a non-standard addition to SQLAlchemy's interface.
                    properties = table_info.get("properties", {})
                    # TODO: capture inspector.get_pk_constraint
                    # TODO: capture inspector.get_sorted_table_and_fkc_names
                    table_columns = []
                    row_order = 1
                    for column in columns:
@ -255,12 +240,11 @@ class SQLAlchemySource(Source):
                                        columns=table_columns)
                    table_and_db = OMetaDatabaseAndTable(table=table, database=db)
                    self.status.records_produced(table.name)
                    yield table_and_db
                except ValidationError as err:
                    logger.error(err)
-                    self.status.report_dropped('{}.{}'.format(self.config.service_name, dataset_name),
+                    self.status.filtered('{}.{}'.format(self.config.service_name, dataset_name),
-                                               "Validation error")
+                                         "Validation error")
                    continue
    def close(self):
--- a/ingestion/src/metadata/ingestion/stage/table_usage_stage.py
+++ b/ingestion/src/metadata/ingestion/stage/table_usage_stage.py
@ -42,7 +42,6 @@ def get_table_column_join(table, table_aliases, joins):
        except ValueError as err:
            logger.error("Error in parsing sql query joins {}".format(err))
            pass
    return TableColumnJoin(table_column=table_column, joined_with=joined_with)
--- a/ingestion/src/metadata/ingestion/workflow/workflow.py
+++ b/ingestion/src/metadata/ingestion/workflow/workflow.py
@ -118,6 +118,8 @@ class Workflow:
            if hasattr(self, 'sink'):
                self.sink.write_record(processed_record)
                self.report['sink'] = self.sink.get_status().as_obj()
    def stop(self):
        if hasattr(self, 'processor'):
            self.processor.close()
        if hasattr(self, 'stage'):
--- a/ingestion/src/metadata/utils/helpers.py
+++ b/ingestion/src/metadata/utils/helpers.py
@ -41,7 +41,7 @@ def get_service_or_create(config, metadata_config) -> DatabaseServiceEntity:
    if service is not None:
        return service
    else:
-        service = {'jdbc': {'connectionUrl': config.get_sql_alchemy_url(), 'driverClass': 'jdbc'},
+        service = {'jdbc': {'connectionUrl': config.get_connection_url(), 'driverClass': 'jdbc'},
                   'name': config.service_name, 'description': '', 'serviceType': config.get_service_type()}
        created_service = client.create_database_service(CreateDatabaseServiceEntityRequest(**service))
        return created_service
--- a/ingestion/tests/integration/mssql/tests/docker-compose.yml
+++ b/ingestion/tests/integration/mssql/tests/docker-compose.yml
@ -24,4 +24,4 @@ services:
    volumes:
      - ./setup:/setup
    ports:
-      - 51433:1433
+      - 1433:1433