Vertica comments, dbs, profiler and docs (#9845)

* Vertica comments, dbs, profiler and docs

* Revert metabase changes

* Format

* Fix median
This commit is contained in:
Pere Miquel Brull 2023-01-20 20:36:03 +01:00 committed by GitHub
parent e278b21905
commit 16a1b2c8be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 174 additions and 6 deletions

View File

@ -12,7 +12,9 @@
Vertica source implementation.
"""
import re
import traceback
from textwrap import dedent
from typing import Iterable
from sqlalchemy import sql, util
from sqlalchemy.engine import reflection
@ -20,6 +22,7 @@ from sqlalchemy.sql import sqltypes
from sqlalchemy.sql.sqltypes import String
from sqlalchemy_vertica.base import VerticaDialect
from metadata.generated.schema.entity.data.database import Database
from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
VerticaConnection,
)
@ -34,8 +37,19 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceSe
from metadata.ingestion.source.database.vertica.queries import (
VERTICA_GET_COLUMNS,
VERTICA_GET_PRIMARY_KEYS,
VERTICA_LIST_DATABASES,
VERTICA_TABLE_COMMENTS,
VERTICA_VIEW_DEFINITION,
)
from metadata.utils import fqn
from metadata.utils.filters import filter_by_database
from metadata.utils.logger import ingestion_logger
from metadata.utils.sqlalchemy_utils import (
get_all_table_comments,
get_table_comment_wrapper,
)
logger = ingestion_logger()
class UUID(String):
@ -219,9 +233,24 @@ def get_view_definition(
return None
@reflection.cache
def get_table_comment(
self, connection, table_name, schema=None, **kw # pylint: disable=unused-argument
):
return get_table_comment_wrapper(
self,
connection,
table_name=table_name,
schema=schema,
query=VERTICA_TABLE_COMMENTS,
)
VerticaDialect.get_columns = get_columns
VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access
VerticaDialect.get_view_definition = get_view_definition
VerticaDialect.get_all_table_comments = get_all_table_comments
VerticaDialect.get_table_comment = get_table_comment
class VerticaSource(CommonDbSourceService):
@ -239,3 +268,38 @@ class VerticaSource(CommonDbSourceService):
f"Expected VerticaConnection, but got {connection}"
)
return cls(config, metadata_config)
def get_database_names(self) -> Iterable[str]:
configured_db = self.config.serviceConnection.__root__.config.database
if configured_db:
self.set_inspector(database_name=configured_db)
yield configured_db
else:
results = self.connection.execute(VERTICA_LIST_DATABASES)
for res in results:
row = list(res)
new_database = row[0]
database_fqn = fqn.build(
self.metadata,
entity_type=Database,
service_name=self.context.database_service.name.__root__,
database_name=new_database,
)
if filter_by_database(
self.source_config.databaseFilterPattern,
database_fqn
if self.source_config.useFqnForFiltering
else new_database,
):
self.status.filter(database_fqn, "Database Filtered Out")
continue
try:
self.set_inspector(database_name=new_database)
yield new_database
except Exception as exc:
logger.debug(traceback.format_exc())
logger.error(
f"Error trying to connect to database {new_database}: {exc}"
)

View File

@ -14,15 +14,37 @@ SQL Queries used during ingestion
import textwrap
# Column comments in Vertica can only happen on Projections
# https://forum.vertica.com/discussion/238945/vertica-try-to-create-comment
# And Vertica projections follow this naming:
# https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/AdministratorsGuide/Projections/WorkingWithProjections.htm
# So to fetch column comments we need to concat the table_name + projection infix + column name.
# Example: querying `v_catalog.comments` we find an object_name for a column in the table vendor_dimension as
# `vendor_dimension_super.vendor_name`. Note how this is the `_super` projection.
# Then, our join looks for the match in `vendor_dimension_%.vendor_name`.
# Note: This might not suit for all column scenarios, but currently we did not find a better way to join
# v_catalog.comments with v_catalog.columns.
VERTICA_GET_COLUMNS = textwrap.dedent(
"""
SELECT column_name, data_type, column_default, is_nullable, comment
FROM v_catalog.columns col left join v_catalog.comments com on col.table_id=com.object_id
and com.object_type='COLUMN' and col.column_name=com.child_object
SELECT
column_name,
data_type,
column_default,
is_nullable,
comment
FROM v_catalog.columns col
LEFT JOIN v_catalog.comments com
ON com.object_type = 'COLUMN'
AND com.object_name LIKE CONCAT(CONCAT(col.table_name, '_%.'), col.column_name)
WHERE lower(table_name) = '{table}'
AND {schema_condition}
UNION ALL
SELECT column_name, data_type, '' as column_default, true as is_nullable, '' as comment
SELECT
column_name,
data_type,
'' AS column_default,
true AS is_nullable,
'' AS comment
FROM v_catalog.view_columns
WHERE lower(table_name) = '{table}'
AND {schema_condition}
@ -47,3 +69,16 @@ VERTICA_VIEW_DEFINITION = textwrap.dedent(
AND {schema_condition}
"""
)
VERTICA_LIST_DATABASES = "SELECT database_name from v_catalog.databases"
VERTICA_TABLE_COMMENTS = textwrap.dedent(
"""
SELECT
object_schema as schema,
object_name as table_name,
comment as table_comment
FROM v_catalog.comments
WHERE object_type = 'TABLE';
"""
)

View File

@ -59,7 +59,7 @@ def _(elements, compiler, **kwargs):
def _(elements, compiler, **kwargs):
"""Median computation for MSSQL"""
col = elements.clauses.clauses[0].name
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
@compiles(MedianFn, Dialects.Hive)
@ -70,7 +70,7 @@ def _(elements, compiler, **kwargs):
@compiles(MedianFn, Dialects.MySQL)
def _(elemenst, compiler, **kwargs): # pylint: disable=unused-argument
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
"""Median computation for MySQL currently not supported
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
"""
@ -93,3 +93,11 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
""".format(
col=col, table=table.value
)
@compiles(MedianFn, Dialects.Vertica)
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
col, table = list(elements.clauses)
return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format(
col=col, table=table.value
)

View File

@ -58,6 +58,7 @@ def _(element, compiler, **kw):
@compiles(ModuloFn, Dialects.Trino)
@compiles(ModuloFn, Dialects.IbmDbSa)
@compiles(ModuloFn, Dialects.Db2)
@compiles(ModuloFn, Dialects.Vertica)
def _(element, compiler, **kw):
"""Modulo function for specific dialect"""
value, base = validate_and_compile(element, compiler, **kw)

View File

@ -101,3 +101,12 @@ def _(*_, **__):
from the already sampled results when executing row::MOD(0, 100) < profile_sample.
"""
return "0"
@compiles(RandomNumFn, Dialects.Vertica)
def _(*_, **__):
"""
Vertica RANDOM() returns a number 0 < n < 1 as a float.
We need to cast it to integer to perform the modulo
"""
return "(RANDOM() * 100)::INTEGER"

View File

@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
### Python Requirements
To run the Vertica ingestion, you will need to install:

View File

@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
### Python Requirements
To run the Vertica ingestion, you will need to install:

View File

@ -43,6 +43,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
custom Airflow plugins to handle the workflow deployment.
### Permissions
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
```sql
CREATE USER openmetadata IDENTIFIED BY 'password';
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
```
## Metadata Ingestion
### 1. Visit the Services Page

View File

@ -0,0 +1,16 @@
---
title: Vertica Connector Troubleshooting
slug: /connectors/database/vertica/troubleshooting
---
# Troubleshooting
Learn how to resolve the most common problems people encounter in the Vertica connector.
## Profiler: New session rejected
If you see the following error when computing the profiler `New session rejected due to limit, already XYZ sessions active`,
it means that the number of threads configured in the profiler workflow is exceeding the connection limits of your
Vertica instance.
Note that by default the profiler runs with 5 threads. In case you see this error, you might need to reduce this number.

View File

@ -344,6 +344,8 @@ site_menu:
url: /connectors/database/vertica/airflow
- category: Connectors / Database / Vertica / CLI
url: /connectors/database/vertica/cli
- category: Connectors / Database / Vertica / Troubleshooting
url: /connectors/database/vertica/troubleshooting
- category: Connectors / Dashboard
url: /connectors/dashboard
- category: Connectors / Dashboard / Looker