Vertica comments, dbs, profiler and docs (#9845)

* Vertica comments, dbs, profiler and docs

* Revert metabase changes

* Format

* Fix median
This commit is contained in:
Pere Miquel Brull 2023-01-20 20:36:03 +01:00 committed by GitHub
parent e278b21905
commit 16a1b2c8be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 174 additions and 6 deletions

View File

@ -12,7 +12,9 @@
Vertica source implementation. Vertica source implementation.
""" """
import re import re
import traceback
from textwrap import dedent from textwrap import dedent
from typing import Iterable
from sqlalchemy import sql, util from sqlalchemy import sql, util
from sqlalchemy.engine import reflection from sqlalchemy.engine import reflection
@ -20,6 +22,7 @@ from sqlalchemy.sql import sqltypes
from sqlalchemy.sql.sqltypes import String from sqlalchemy.sql.sqltypes import String
from sqlalchemy_vertica.base import VerticaDialect from sqlalchemy_vertica.base import VerticaDialect
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.services.connections.database.verticaConnection import ( from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
VerticaConnection, VerticaConnection,
) )
@ -34,8 +37,19 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceSe
from metadata.ingestion.source.database.vertica.queries import ( from metadata.ingestion.source.database.vertica.queries import (
VERTICA_GET_COLUMNS, VERTICA_GET_COLUMNS,
VERTICA_GET_PRIMARY_KEYS, VERTICA_GET_PRIMARY_KEYS,
VERTICA_LIST_DATABASES,
VERTICA_TABLE_COMMENTS,
VERTICA_VIEW_DEFINITION, VERTICA_VIEW_DEFINITION,
) )
from metadata.utils import fqn
from metadata.utils.filters import filter_by_database
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import (
get_all_table_comments,
get_table_comment_wrapper,
)
logger = ingestion_logger()
class UUID(String): class UUID(String):
@ -219,9 +233,24 @@ def get_view_definition(
return None return None
@reflection.cache
def get_table_comment(
self, connection, table_name, schema=None, **kw # pylint: disable=unused-argument
):
return get_table_comment_wrapper(
self,
connection,
table_name=table_name,
schema=schema,
query=VERTICA_TABLE_COMMENTS,
)
VerticaDialect.get_columns = get_columns VerticaDialect.get_columns = get_columns
VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access
VerticaDialect.get_view_definition = get_view_definition VerticaDialect.get_view_definition = get_view_definition
VerticaDialect.get_all_table_comments = get_all_table_comments
VerticaDialect.get_table_comment = get_table_comment
class VerticaSource(CommonDbSourceService): class VerticaSource(CommonDbSourceService):
@ -239,3 +268,38 @@ class VerticaSource(CommonDbSourceService):
f"Expected VerticaConnection, but got {connection}" f"Expected VerticaConnection, but got {connection}"
) )
return cls(config, metadata_config) return cls(config, metadata_config)
def get_database_names(self) -> Iterable[str]:
configured_db = self.config.serviceConnection.__root__.config.database
if configured_db:
self.set_inspector(database_name=configured_db)
yield configured_db
else:
results = self.connection.execute(VERTICA_LIST_DATABASES)
for res in results:
row = list(res)
new_database = row[0]
database_fqn = fqn.build(
self.metadata,
entity_type=Database,
service_name=self.context.database_service.name.__root__,
database_name=new_database,
)
if filter_by_database(
self.source_config.databaseFilterPattern,
database_fqn
if self.source_config.useFqnForFiltering
else new_database,
):
self.status.filter(database_fqn, "Database Filtered Out")
continue
try:
self.set_inspector(database_name=new_database)
yield new_database
except Exception as exc:
logger.debug(traceback.format_exc())
logger.error(
f"Error trying to connect to database {new_database}: {exc}"
)

View File

@ -14,15 +14,37 @@ SQL Queries used during ingestion
import textwrap import textwrap
# Column comments in Vertica can only happen on Projections
# https://forum.vertica.com/discussion/238945/vertica-try-to-create-comment
# And Vertica projections follow this naming:
# https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/AdministratorsGuide/Projections/WorkingWithProjections.htm
# So to fetch column comments we need to concat the table_name + projection infix + column name.
# Example: querying `v_catalog.comments` we find an object_name for a column in the table vendor_dimension as
# `vendor_dimension_super.vendor_name`. Note how this is the `_super` projection.
# Then, our join looks for the match in `vendor_dimension_%.vendor_name`.
# Note: This might not suit for all column scenarios, but currently we did not find a better way to join
# v_catalog.comments with v_catalog.columns.
VERTICA_GET_COLUMNS = textwrap.dedent( VERTICA_GET_COLUMNS = textwrap.dedent(
""" """
SELECT column_name, data_type, column_default, is_nullable, comment SELECT
FROM v_catalog.columns col left join v_catalog.comments com on col.table_id=com.object_id column_name,
and com.object_type='COLUMN' and col.column_name=com.child_object data_type,
column_default,
is_nullable,
comment
FROM v_catalog.columns col
LEFT JOIN v_catalog.comments com
ON com.object_type = 'COLUMN'
AND com.object_name LIKE CONCAT(CONCAT(col.table_name, '_%.'), col.column_name)
WHERE lower(table_name) = '{table}' WHERE lower(table_name) = '{table}'
AND {schema_condition} AND {schema_condition}
UNION ALL UNION ALL
SELECT column_name, data_type, '' as column_default, true as is_nullable, '' as comment SELECT
column_name,
data_type,
'' AS column_default,
true AS is_nullable,
'' AS comment
FROM v_catalog.view_columns FROM v_catalog.view_columns
WHERE lower(table_name) = '{table}' WHERE lower(table_name) = '{table}'
AND {schema_condition} AND {schema_condition}
@ -47,3 +69,16 @@ VERTICA_VIEW_DEFINITION = textwrap.dedent(
AND {schema_condition} AND {schema_condition}
""" """
) )
VERTICA_LIST_DATABASES = "SELECT database_name from v_catalog.databases"
VERTICA_TABLE_COMMENTS = textwrap.dedent(
"""
SELECT
object_schema as schema,
object_name as table_name,
comment as table_comment
FROM v_catalog.comments
WHERE object_type = 'TABLE';
"""
)

View File

@ -59,7 +59,7 @@ def _(elements, compiler, **kwargs):
def _(elements, compiler, **kwargs): def _(elements, compiler, **kwargs):
"""Median computation for MSSQL""" """Median computation for MSSQL"""
col = elements.clauses.clauses[0].name col = elements.clauses.clauses[0].name
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
@compiles(MedianFn, Dialects.Hive) @compiles(MedianFn, Dialects.Hive)
@ -70,7 +70,7 @@ def _(elements, compiler, **kwargs):
@compiles(MedianFn, Dialects.MySQL) @compiles(MedianFn, Dialects.MySQL)
def _(elemenst, compiler, **kwargs): # pylint: disable=unused-argument def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
"""Median computation for MySQL currently not supported """Median computation for MySQL currently not supported
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340 Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
""" """
@ -93,3 +93,11 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
""".format( """.format(
col=col, table=table.value col=col, table=table.value
) )
@compiles(MedianFn, Dialects.Vertica)
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
col, table = list(elements.clauses)
return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format(
col=col, table=table.value
)

View File

@ -58,6 +58,7 @@ def _(element, compiler, **kw):
@compiles(ModuloFn, Dialects.Trino) @compiles(ModuloFn, Dialects.Trino)
@compiles(ModuloFn, Dialects.IbmDbSa) @compiles(ModuloFn, Dialects.IbmDbSa)
@compiles(ModuloFn, Dialects.Db2) @compiles(ModuloFn, Dialects.Db2)
@compiles(ModuloFn, Dialects.Vertica)
def _(element, compiler, **kw): def _(element, compiler, **kw):
"""Modulo function for specific dialect""" """Modulo function for specific dialect"""
value, base = validate_and_compile(element, compiler, **kw) value, base = validate_and_compile(element, compiler, **kw)

View File

@ -101,3 +101,12 @@ def _(*_, **__):
from the already sampled results when executing row::MOD(0, 100) < profile_sample. from the already sampled results when executing row::MOD(0, 100) < profile_sample.
""" """
return "0" return "0"
@compiles(RandomNumFn, Dialects.Vertica)
def _(*_, **__):
"""
Vertica RANDOM() returns a number 0 < n < 1 as a float.
We need to cast it to integer to perform the modulo
"""
return "(RANDOM() * 100)::INTEGER"

View File

@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment. custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
### Python Requirements ### Python Requirements
To run the Vertica ingestion, you will need to install: To run the Vertica ingestion, you will need to install:

View File

@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment. custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
### Python Requirements ### Python Requirements
To run the Vertica ingestion, you will need to install: To run the Vertica ingestion, you will need to install:

View File

@ -43,6 +43,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment. custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
## Metadata Ingestion ## Metadata Ingestion
### 1. Visit the Services Page ### 1. Visit the Services Page

View File

@ -0,0 +1,16 @@
---
title: Vertica Connector Troubleshooting
slug: /connectors/database/vertica/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the Vertica connector.
## Profiler: New session rejected
If you see the following error when computing the profiler `New session rejected due to limit, already XYZ sessions active`,
it means that the number of threads configured in the profiler workflow is exceeding the connection limits of your
Vertica instance.
Note that by default the profiler runs with 5 threads. In case you see this error, you might need to reduce this number.

View File

@ -344,6 +344,8 @@ site_menu:
url: /connectors/database/vertica/airflow url: /connectors/database/vertica/airflow
- category: Connectors / Database / Vertica / CLI - category: Connectors / Database / Vertica / CLI
url: /connectors/database/vertica/cli url: /connectors/database/vertica/cli
- category: Connectors / Database / Vertica / Troubleshooting
url: /connectors/database/vertica/troubleshooting
- category: Connectors / Dashboard - category: Connectors / Dashboard
url: /connectors/dashboard url: /connectors/dashboard
- category: Connectors / Dashboard / Looker - category: Connectors / Dashboard / Looker