Fix linting (#1958)

2025-11-06 05:22:53 +00:00 · 2021-12-29 17:33:40 +01:00 · 2021-12-29 17:33:40 +01:00 · 1e334af89c
commit 1e334af89c
parent b11e503117
2 changed files with 175 additions and 115 deletions
--- a/.pylintrc
+++ b/.pylintrc
@ -1,6 +1,10 @@
 [BASIC]
 # W1203: logging-fstring-interpolation - f-string brings better readability and unifies style
-disable=W1203
+# W1202: logging-format-interpolation - lazy formatting in logging functions
 disable=W1203,W1202
 docstring-min-length=20
 max-args=7
 max-attributes=12
 [MASTER]
 fail-under=6.0
--- a/ingestion/src/metadata/ingestion/source/sql_source.py
+++ b/ingestion/src/metadata/ingestion/source/sql_source.py
@ -8,6 +8,9 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 """
 Generic source to build SQL connectors.
 """
 import json
 import logging
 import re
@ -16,7 +19,7 @@ import uuid
 from abc import abstractmethod
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Type
+from typing import Dict, Iterable, List, Optional, Tuple
 from urllib.parse import quote_plus
 from pydantic import SecretStr
@ -54,20 +57,22 @@ logger: logging.Logger = logging.getLogger(__name__)
@dataclass
 class SQLSourceStatus(SourceStatus):
    """
    Reports the source status after ingestion
    """
    success: List[str] = field(default_factory=list)
    failures: List[str] = field(default_factory=list)
    warnings: List[str] = field(default_factory=list)
    filtered: List[str] = field(default_factory=list)
-    def scanned(self, table_name: str) -> None:
+    def scanned(self, record: str) -> None:
-        self.success.append(table_name)
+        self.success.append(record)
-        logger.info("Table Scanned: {}".format(table_name))
+        logger.info(f"Table Scanned: {record}")
-    def filter(
+    def filter(self, record: str, err: str) -> None:
-        self, table_name: str, err: str, dataset_name: str = None, col_type: str = None
+        self.filtered.append(record)
-    ) -> None:
+        logger.warning(f"Dropped Table {record} due to {err}")
        self.filtered.append(table_name)
        logger.warning("Dropped Table {} due to {}".format(table_name, err))
 def build_sql_source_connection_url(
@ -76,8 +81,11 @@ def build_sql_source_connection_url(
    username: Optional[str] = None,
    password: Optional[SecretStr] = None,
    database: Optional[str] = None,
-    options: dict = {},
+    options: Optional[dict] = None,
 ) -> str:
    """
    Helper function to prepare the db URL
    """
    url = f"{scheme}://"
    if username is not None:
@ -100,6 +108,12 @@ def build_sql_source_connection_url(
 class SQLConnectionConfig(ConfigModel):
    """
    Config class containing all supported
    configurations for an SQL source, including
    data profiling and DBT generated information.
    """
    username: Optional[str] = None
    password: Optional[SecretStr] = None
    host_port: str
@ -144,7 +158,8 @@ def _get_table_description(schema: str, table: str, inspector: Inspector) -> str
    description = None
    try:
        table_info: dict = inspector.get_table_comment(table, schema)
-    except Exception as err:
+    # Catch any exception without breaking the ingestion
    except Exception as err:  # pylint: disable=broad-except
        logger.error(f"Table Description Error : {err}")
    else:
        description = table_info["text"]
@ -152,6 +167,12 @@ def _get_table_description(schema: str, table: str, inspector: Inspector) -> str
 class SQLSource(Source[OMetaDatabaseAndTable]):
    """
    Source Connector implementation to extract
    Database & Table information and convert it
    to OpenMetadata Entities
    """
    def __init__(
        self,
        config: SQLConnectionConfig,
@ -174,26 +195,38 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
        self.data_profiler = None
        self.data_models = {}
        if self.config.dbt_catalog_file is not None:
-            self.dbt_catalog = json.load(open(self.config.dbt_catalog_file, "r"))
+            with open(self.config.dbt_catalog_file, "r", encoding="utf-8") as catalog:
                self.dbt_catalog = json.load(catalog)
        if self.config.dbt_manifest_file is not None:
-            self.dbt_manifest = json.load(open(self.config.dbt_manifest_file, "r"))
+            with open(self.config.dbt_manifest_file, "r", encoding="utf-8") as manifest:
                self.dbt_manifest = json.load(manifest)
-    def _instantiate_profiler(self):
+    def _instantiate_profiler(self) -> bool:
        """
        If the profiler is configured, load it and run.
        Return True if the profiling ran correctly
        """
        try:
            if self.config.data_profiler_enabled:
                if self.data_profiler is None:
                    # pylint: disable=import-outside-toplevel
                    from metadata.profiler.dataprofiler import DataProfiler
                    # pylint: enable=import-outside-toplevel
                    self.data_profiler = DataProfiler(
                        status=self.status, connection_str=self.connection_string
                    )
                return True
-            return False
+        # Catch any errors during profiling init and continue ingestion
-        except Exception:
+        except Exception as exc:  # pylint: disable=broad-except
            logger.error(
                f"Error loading profiler {exc}"
                "DataProfiler configuration is enabled. Please make sure you ran "
                "pip install 'openmetadata-ingestion[data-profiler]'"
            )
        return False
    def prepare(self):
        self._parse_data_model()
@ -204,15 +237,15 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
    ):
        pass
-    def type_of_column_name(self, sa_type, table_name: str, column_name: str):
+    @staticmethod
-        return sa_type
+    def standardize_schema_table_names(schema: str, table: str) -> Tuple[str, str]:
    def standardize_schema_table_names(
        self, schema: str, table: str
    ) -> Tuple[str, str]:
        return schema, table
-    def fetch_sample_data(self, schema: str, table: str):
+    def fetch_sample_data(self, schema: str, table: str) -> Optional[TableData]:
        """
        Get some sample data from the source to be added
        to the Table Entities
        """
        try:
            query = self.config.query.format(schema, table)
            logger.info(query)
@ -221,14 +254,14 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
            for col in results.keys():
                cols.append(col.replace(".", "_DOT_"))
            rows = []
-            for r in results:
+            for res in results:
-                row = list(r)
+                row = list(res)
                rows.append(row)
            return TableData(columns=cols, rows=rows)
-        except Exception as err:
+        # Catch any errors and continue the ingestion
-            logger.error(
+        except Exception as err:  # pylint: disable=broad-except
-                "Failed to generate sample data for {} - {}".format(table, err)
+            logger.error(f"Failed to generate sample data for {table} - {err}")
-            )
+        return None
    def next_record(self) -> Iterable[OMetaDatabaseAndTable]:
        inspector = inspect(self.engine)
@ -236,7 +269,7 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
            if not self.sql_config.schema_filter_pattern.included(schema):
                self.status.filter(schema, "Schema pattern not allowed")
                continue
-            logger.debug("total tables {}".format(inspector.get_table_names(schema)))
+            logger.debug(f"Total tables {inspector.get_table_names(schema)}")
            if self.config.include_tables:
                yield from self.fetch_tables(inspector, schema)
            if self.config.include_views:
@ -245,6 +278,10 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
    def fetch_tables(
        self, inspector: Inspector, schema: str
    ) -> Iterable[OMetaDatabaseAndTable]:
        """
        Scrape an SQL schema and prepare Database and Table
        OpenMetadata Entities
        """
        for table_name in inspector.get_table_names(schema):
            try:
                schema, table_name = self.standardize_schema_table_names(
@ -252,13 +289,11 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                )
                if not self.sql_config.table_filter_pattern.included(table_name):
                    self.status.filter(
-                        "{}.{}".format(self.config.get_service_name(), table_name),
+                        f"{self.config.get_service_name()}.{table_name}",
                        "Table pattern not allowed",
                    )
                    continue
-                self.status.scanned(
+                self.status.scanned(f"{self.config.get_service_name()}.{table_name}")
                    "{}.{}".format(self.config.get_service_name(), table_name)
                )
                description = _get_table_description(schema, table_name, inspector)
                fqn = f"{self.config.service_name}.{self.config.database}.{schema}.{table_name}"
@ -275,7 +310,8 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    if self.sql_config.generate_sample_data:
                        table_data = self.fetch_sample_data(schema, table_name)
                        table_entity.sampleData = table_data
-                except Exception as err:
+                # Catch any errors during the ingestion and continue
                except Exception as err:  # pylint: disable=broad-except
                    logger.error(repr(err))
                    logger.error(err)
@ -291,16 +327,19 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    table=table_entity, database=self._get_database(schema)
                )
                yield table_and_db
-            except Exception as err:
+            # Catch any errors during the ingestion and continue
            except Exception as err:  # pylint: disable=broad-except
                logger.error(err)
-                self.status.warnings.append(
+                self.status.warnings.append(f"{self.config.service_name}.{table_name}")
                    "{}.{}".format(self.config.service_name, table_name)
                )
                continue
    def fetch_views(
        self, inspector: Inspector, schema: str
    ) -> Iterable[OMetaDatabaseAndTable]:
        """
        Get all views in the SQL schema and prepare
        Database & Table OpenMetadata Entities
        """
        for view_name in inspector.get_view_names(schema):
            try:
                if self.config.scheme == "bigquery":
@ -309,12 +348,11 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    )
                if not self.sql_config.table_filter_pattern.included(view_name):
                    self.status.filter(
-                        "{}.{}".format(self.config.get_service_name(), view_name),
+                        f"{self.config.get_service_name()}.{view_name}",
                        "View pattern not allowed",
                    )
                    continue
                try:
                    if self.config.scheme == "bigquery":
                        view_definition = inspector.get_view_definition(
                            f"{self.config.project_id}.{schema}.{view_name}"
@ -329,17 +367,15 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                except NotImplementedError:
                    view_definition = ""
                description = _get_table_description(schema, view_name, inspector)
                table_columns = self._get_columns(schema, view_name, inspector)
                view_name = view_name.replace(".", "_DOT_")
                fqn = f"{self.config.service_name}.{self.config.database}.{schema}.{view_name}"
                table = Table(
                    id=uuid.uuid4(),
-                    name=view_name,
+                    name=view_name.replace(".", "_DOT_"),
                    tableType="View",
-                    description=description if description is not None else " ",
+                    description=_get_table_description(schema, view_name, inspector)
-                    fullyQualifiedName=fqn,
+                    or "",
-                    columns=table_columns,
+                    # This will be generated in the backend!! #1673
                    fullyQualifiedName=view_name,
                    columns=self._get_columns(schema, view_name, inspector),
                    viewDefinition=view_definition,
                )
                if self.sql_config.generate_sample_data:
@ -350,31 +386,34 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    table=table, database=self._get_database(schema)
                )
                yield table_and_db
-            except Exception as err:
+            # Catch any errors and continue the ingestion
            except Exception as err:  # pylint: disable=broad-except
                logger.error(err)
-                self.status.warnings.append(
+                self.status.warnings.append(f"{self.config.service_name}.{view_name}")
                    "{}.{}".format(self.config.service_name, view_name)
                )
                continue
    def _parse_data_model(self):
-
+        """
        Get all the DBT information and feed it to the Table Entity
        """
        if self.config.dbt_manifest_file and self.config.dbt_catalog_file:
            logger.info("Parsing Data Models")
-            manifest_nodes = self.dbt_manifest["nodes"]
+            manifest_entities = {
-            manifest_sources = self.dbt_manifest["sources"]
+                **self.dbt_manifest["nodes"],
-            manifest_entities = {**manifest_nodes, **manifest_sources}
+                **self.dbt_manifest["sources"],
-            catalog_nodes = self.dbt_catalog["nodes"]
+            }
-            catalog_sources = self.dbt_catalog["sources"]
+            catalog_entities = {
-            catalog_entities = {**catalog_nodes, **catalog_sources}
+                **self.dbt_catalog["nodes"],
                **self.dbt_catalog["sources"],
            }
            for key, mnode in manifest_entities.items():
                name = mnode["alias"] if "alias" in mnode.keys() else mnode["name"]
                cnode = catalog_entities.get(key)
-                if cnode is not None:
+                columns = (
-                    columns = self._parse_data_model_columns(name, mnode, cnode)
+                    self._parse_data_model_columns(name, mnode, cnode) if cnode else []
-                else:
+                )
-                    columns = []
+
                if mnode["resource_type"] == "test":
                    continue
                upstream_nodes = self._parse_data_model_upstream(mnode)
@ -382,14 +421,12 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    mnode["alias"] if "alias" in mnode.keys() else mnode["name"]
                )
                model_name = model_name.replace(".", "_DOT_")
                description = mnode.get("description", "")
                schema = mnode["schema"]
                path = f"{mnode['root_path']}/{mnode['original_file_path']}"
                raw_sql = mnode.get("raw_sql", "")
                model = DataModel(
                    modelType=ModelType.DBT,
-                    description=description,
+                    description=mnode.get("description", ""),
-                    path=path,
+                    path=f"{mnode['root_path']}/{mnode['original_file_path']}",
                    rawSql=raw_sql,
                    sql=mnode.get("compiled_sql", raw_sql),
                    columns=columns,
@ -403,12 +440,14 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
        if "depends_on" in mnode and "nodes" in mnode["depends_on"]:
            for node in mnode["depends_on"]["nodes"]:
                try:
-                    node_type, database, table = node.split(".", 2)
+                    _, database, table = node.split(".", 2)
                    table = table.replace(".", "_DOT_")
                    table_fqn = f"{self.config.service_name}.{database}.{table}"
                    upstream_nodes.append(table_fqn)
-                except Exception:
+                except Exception as err:  # pylint: disable=broad-except
-                    logger.error(f"Failed to parse the node {node} to capture lineage")
+                    logger.error(
                        f"Failed to parse the node {node} to capture lineage {err}"
                    )
                    continue
        return upstream_nodes
@ -443,8 +482,8 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                    ordinalPosition=ccolumn["index"],
                )
                columns.append(col)
-            except Exception as e:
+            except Exception as err:  # pylint: disable=broad-except
-                logger.error(f"Failed to parse column type due to {e}")
+                logger.error(f"Failed to parse column type due to {err}")
        return columns
@ -454,33 +493,58 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
            service=EntityReference(id=self.service.id, type=self.config.service_type),
        )
-    def parse_raw_data_type(self, raw_data_type):
+    @staticmethod
-        return raw_data_type
+    def _get_column_constraints(
        column, pk_columns, unique_columns
    ) -> Optional[Constraint]:
        """
        Prepare column constraints for the Table Entity
        """
        constraint = None
        if column["nullable"]:
            constraint = Constraint.NULL
        elif not column["nullable"]:
            constraint = Constraint.NOT_NULL
        if column["name"] in pk_columns:
            constraint = Constraint.PRIMARY_KEY
        elif column["name"] in unique_columns:
            constraint = Constraint.UNIQUE
        return constraint
    def _get_columns(
        self, schema: str, table: str, inspector: Inspector
-    ) -> List[Column]:
+    ) -> Optional[List[Column]]:
        """
        Get columns types and constraints information
        """
        # Get inspector information:
        pk_constraints = inspector.get_pk_constraint(table, schema)
        try:
            unique_constraints = inspector.get_unique_constraints(table, schema)
        except NotImplementedError:
            logger.warning("Cannot obtain constraints - NotImplementedError")
            unique_constraints = []
        pk_columns = (
            pk_constraints["column_constraints"]
            if len(pk_constraints) > 0 and "column_constraints" in pk_constraints.keys()
            else {}
        )
-        unique_constraints = []
+
-        try:
+        unique_columns = [
-            unique_constraints = inspector.get_unique_constraints(table, schema)
+            constraint["column_names"]
-        except NotImplementedError:
+            for constraint in unique_constraints
-            pass
+            if "column_names" in constraint.keys()
-        unique_columns = []
+        ]
-        for constraint in unique_constraints:
+
            if "column_names" in constraint.keys():
                unique_columns = constraint["column_names"]
        dataset_name = f"{schema}.{table}"
        columns = inspector.get_columns(table, schema)
        table_columns = []
        row_order = 1
        try:
-            for column in columns:
+            for row_order, column in enumerate(inspector.get_columns(table, schema)):
                if "." in column["name"]:
                    logger.info(f"Found '.' in {column['name']}")
                    column["name"] = column["name"].replace(".", "_DOT_")
@ -489,9 +553,6 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                col_data_length = None
                arr_data_type = None
                if "raw_data_type" in column and column["raw_data_type"] is not None:
                    column["raw_data_type"] = self.parse_raw_data_type(
                        column["raw_data_type"]
                    )
                    (
                        col_type,
                        data_type_display,
@ -514,26 +575,16 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                            r"(?:\w*)(?:[(]*)(\w*)(?:.*)", str(column["type"])
                        ).groups()
                        data_type_display = column["type"]
-                col_constraint = None
+
-                if column["nullable"]:
+                col_constraint = self._get_column_constraints(
-                    col_constraint = Constraint.NULL
+                    column, pk_columns, unique_columns
-                elif not column["nullable"]:
+                )
-                    col_constraint = Constraint.NOT_NULL
+
-                if column["name"] in pk_columns:
+                if col_type.upper() in {"CHAR", "VARCHAR", "BINARY", "VARBINARY"}:
                    col_constraint = Constraint.PRIMARY_KEY
                elif column["name"] in unique_columns:
                    col_constraint = Constraint.UNIQUE
                if col_type.upper() in ["CHAR", "VARCHAR", "BINARY", "VARBINARY"]:
                    col_data_length = column["type"].length
                if col_data_length is None:
                    col_data_length = 1
                try:
                    if col_type == "NULL":
                        col_type = self.type_of_column_name(
                            col_type,
                            column_name=column["name"],
                            table_name=dataset_name,
                        )
                    if col_type == "NULL":
                        col_type = "VARCHAR"
                        data_type_display = "varchar"
@ -544,27 +595,32 @@ class SQLSource(Source[OMetaDatabaseAndTable]):
                        name=column["name"],
                        description=column.get("comment", None),
                        dataType=col_type,
-                        dataTypeDisplay="{}({})".format(col_type, col_data_length)
+                        dataTypeDisplay=f"{col_type}({col_data_length})"
                        if data_type_display is None
                        else f"{data_type_display}",
                        dataLength=col_data_length,
                        constraint=col_constraint,
-                        ordinalPosition=row_order,
+                        ordinalPosition=row_order + 1,  # enumerate starts at 0
-                        children=children if children is not None else None,
+                        children=children,
                        arrayDataType=arr_data_type,
                    )
-                except Exception as err:
+                except Exception as err:  # pylint: disable=broad-except
                    logger.error(traceback.format_exc())
                    logger.error(traceback.print_exc())
                    logger.error(f"{err} : {column}")
                    continue
                table_columns.append(om_column)
                row_order = row_order + 1
            return table_columns
-        except Exception as err:
+        except Exception as err:  # pylint: disable=broad-except
-            logger.error("{}: {} {}".format(repr(err), table, err))
+            logger.error(f"{repr(err)}: {table} {err}")
            return None
    def run_data_profiler(self, table: str, schema: str) -> TableProfile:
        """
        Run the profiler for a table in a schema.
        Prepare specific namings for different sources, e.g. bigquery
        """
        dataset_name = f"{schema}.{table}"
        self.status.scanned(f"profile of {dataset_name}")
        logger.info(