mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-08 16:38:04 +00:00
Vertica comments, dbs, profiler and docs (#9845)
* Vertica comments, dbs, profiler and docs * Revert metabase changes * Format * Fix median
This commit is contained in:
parent
e278b21905
commit
16a1b2c8be
@ -12,7 +12,9 @@
|
||||
Vertica source implementation.
|
||||
"""
|
||||
import re
|
||||
import traceback
|
||||
from textwrap import dedent
|
||||
from typing import Iterable
|
||||
|
||||
from sqlalchemy import sql, util
|
||||
from sqlalchemy.engine import reflection
|
||||
@ -20,6 +22,7 @@ from sqlalchemy.sql import sqltypes
|
||||
from sqlalchemy.sql.sqltypes import String
|
||||
from sqlalchemy_vertica.base import VerticaDialect
|
||||
|
||||
from metadata.generated.schema.entity.data.database import Database
|
||||
from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
|
||||
VerticaConnection,
|
||||
)
|
||||
@ -34,8 +37,19 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceSe
|
||||
from metadata.ingestion.source.database.vertica.queries import (
|
||||
VERTICA_GET_COLUMNS,
|
||||
VERTICA_GET_PRIMARY_KEYS,
|
||||
VERTICA_LIST_DATABASES,
|
||||
VERTICA_TABLE_COMMENTS,
|
||||
VERTICA_VIEW_DEFINITION,
|
||||
)
|
||||
from metadata.utils import fqn
|
||||
from metadata.utils.filters import filter_by_database
|
||||
from metadata.utils.logger import ingestion_logger
|
||||
from metadata.utils.sqlalchemy_utils import (
|
||||
get_all_table_comments,
|
||||
get_table_comment_wrapper,
|
||||
)
|
||||
|
||||
logger = ingestion_logger()
|
||||
|
||||
|
||||
class UUID(String):
|
||||
@ -219,9 +233,24 @@ def get_view_definition(
|
||||
return None
|
||||
|
||||
|
||||
@reflection.cache
|
||||
def get_table_comment(
|
||||
self, connection, table_name, schema=None, **kw # pylint: disable=unused-argument
|
||||
):
|
||||
return get_table_comment_wrapper(
|
||||
self,
|
||||
connection,
|
||||
table_name=table_name,
|
||||
schema=schema,
|
||||
query=VERTICA_TABLE_COMMENTS,
|
||||
)
|
||||
|
||||
|
||||
VerticaDialect.get_columns = get_columns
|
||||
VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access
|
||||
VerticaDialect.get_view_definition = get_view_definition
|
||||
VerticaDialect.get_all_table_comments = get_all_table_comments
|
||||
VerticaDialect.get_table_comment = get_table_comment
|
||||
|
||||
|
||||
class VerticaSource(CommonDbSourceService):
|
||||
@ -239,3 +268,38 @@ class VerticaSource(CommonDbSourceService):
|
||||
f"Expected VerticaConnection, but got {connection}"
|
||||
)
|
||||
return cls(config, metadata_config)
|
||||
|
||||
def get_database_names(self) -> Iterable[str]:
|
||||
configured_db = self.config.serviceConnection.__root__.config.database
|
||||
if configured_db:
|
||||
self.set_inspector(database_name=configured_db)
|
||||
yield configured_db
|
||||
else:
|
||||
results = self.connection.execute(VERTICA_LIST_DATABASES)
|
||||
for res in results:
|
||||
row = list(res)
|
||||
new_database = row[0]
|
||||
database_fqn = fqn.build(
|
||||
self.metadata,
|
||||
entity_type=Database,
|
||||
service_name=self.context.database_service.name.__root__,
|
||||
database_name=new_database,
|
||||
)
|
||||
|
||||
if filter_by_database(
|
||||
self.source_config.databaseFilterPattern,
|
||||
database_fqn
|
||||
if self.source_config.useFqnForFiltering
|
||||
else new_database,
|
||||
):
|
||||
self.status.filter(database_fqn, "Database Filtered Out")
|
||||
continue
|
||||
|
||||
try:
|
||||
self.set_inspector(database_name=new_database)
|
||||
yield new_database
|
||||
except Exception as exc:
|
||||
logger.debug(traceback.format_exc())
|
||||
logger.error(
|
||||
f"Error trying to connect to database {new_database}: {exc}"
|
||||
)
|
||||
|
@ -14,15 +14,37 @@ SQL Queries used during ingestion
|
||||
|
||||
import textwrap
|
||||
|
||||
# Column comments in Vertica can only happen on Projections
|
||||
# https://forum.vertica.com/discussion/238945/vertica-try-to-create-comment
|
||||
# And Vertica projections follow this naming:
|
||||
# https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/AdministratorsGuide/Projections/WorkingWithProjections.htm
|
||||
# So to fetch column comments we need to concat the table_name + projection infix + column name.
|
||||
# Example: querying `v_catalog.comments` we find an object_name for a column in the table vendor_dimension as
|
||||
# `vendor_dimension_super.vendor_name`. Note how this is the `_super` projection.
|
||||
# Then, our join looks for the match in `vendor_dimension_%.vendor_name`.
|
||||
# Note: This might not suit for all column scenarios, but currently we did not find a better way to join
|
||||
# v_catalog.comments with v_catalog.columns.
|
||||
VERTICA_GET_COLUMNS = textwrap.dedent(
|
||||
"""
|
||||
SELECT column_name, data_type, column_default, is_nullable, comment
|
||||
FROM v_catalog.columns col left join v_catalog.comments com on col.table_id=com.object_id
|
||||
and com.object_type='COLUMN' and col.column_name=com.child_object
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
column_default,
|
||||
is_nullable,
|
||||
comment
|
||||
FROM v_catalog.columns col
|
||||
LEFT JOIN v_catalog.comments com
|
||||
ON com.object_type = 'COLUMN'
|
||||
AND com.object_name LIKE CONCAT(CONCAT(col.table_name, '_%.'), col.column_name)
|
||||
WHERE lower(table_name) = '{table}'
|
||||
AND {schema_condition}
|
||||
UNION ALL
|
||||
SELECT column_name, data_type, '' as column_default, true as is_nullable, '' as comment
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
'' AS column_default,
|
||||
true AS is_nullable,
|
||||
'' AS comment
|
||||
FROM v_catalog.view_columns
|
||||
WHERE lower(table_name) = '{table}'
|
||||
AND {schema_condition}
|
||||
@ -47,3 +69,16 @@ VERTICA_VIEW_DEFINITION = textwrap.dedent(
|
||||
AND {schema_condition}
|
||||
"""
|
||||
)
|
||||
|
||||
VERTICA_LIST_DATABASES = "SELECT database_name from v_catalog.databases"
|
||||
|
||||
VERTICA_TABLE_COMMENTS = textwrap.dedent(
|
||||
"""
|
||||
SELECT
|
||||
object_schema as schema,
|
||||
object_name as table_name,
|
||||
comment as table_comment
|
||||
FROM v_catalog.comments
|
||||
WHERE object_type = 'TABLE';
|
||||
"""
|
||||
)
|
||||
|
@ -59,7 +59,7 @@ def _(elements, compiler, **kwargs):
|
||||
def _(elements, compiler, **kwargs):
|
||||
"""Median computation for MSSQL"""
|
||||
col = elements.clauses.clauses[0].name
|
||||
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
|
||||
return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col
|
||||
|
||||
|
||||
@compiles(MedianFn, Dialects.Hive)
|
||||
@ -70,7 +70,7 @@ def _(elements, compiler, **kwargs):
|
||||
|
||||
|
||||
@compiles(MedianFn, Dialects.MySQL)
|
||||
def _(elemenst, compiler, **kwargs): # pylint: disable=unused-argument
|
||||
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
||||
"""Median computation for MySQL currently not supported
|
||||
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
|
||||
"""
|
||||
@ -93,3 +93,11 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
||||
""".format(
|
||||
col=col, table=table.value
|
||||
)
|
||||
|
||||
|
||||
@compiles(MedianFn, Dialects.Vertica)
|
||||
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
||||
col, table = list(elements.clauses)
|
||||
return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format(
|
||||
col=col, table=table.value
|
||||
)
|
||||
|
@ -58,6 +58,7 @@ def _(element, compiler, **kw):
|
||||
@compiles(ModuloFn, Dialects.Trino)
|
||||
@compiles(ModuloFn, Dialects.IbmDbSa)
|
||||
@compiles(ModuloFn, Dialects.Db2)
|
||||
@compiles(ModuloFn, Dialects.Vertica)
|
||||
def _(element, compiler, **kw):
|
||||
"""Modulo function for specific dialect"""
|
||||
value, base = validate_and_compile(element, compiler, **kw)
|
||||
|
@ -101,3 +101,12 @@ def _(*_, **__):
|
||||
from the already sampled results when executing row::MOD(0, 100) < profile_sample.
|
||||
"""
|
||||
return "0"
|
||||
|
||||
|
||||
@compiles(RandomNumFn, Dialects.Vertica)
|
||||
def _(*_, **__):
|
||||
"""
|
||||
Vertica RANDOM() returns a number 0 < n < 1 as a float.
|
||||
We need to cast it to integer to perform the modulo
|
||||
"""
|
||||
return "(RANDOM() * 100)::INTEGER"
|
||||
|
@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
### Permissions
|
||||
|
||||
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||
|
||||
```sql
|
||||
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||
```
|
||||
|
||||
### Python Requirements
|
||||
|
||||
To run the Vertica ingestion, you will need to install:
|
||||
|
@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
### Permissions
|
||||
|
||||
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||
|
||||
```sql
|
||||
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||
```
|
||||
|
||||
### Python Requirements
|
||||
|
||||
To run the Vertica ingestion, you will need to install:
|
||||
|
@ -43,6 +43,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||
custom Airflow plugins to handle the workflow deployment.
|
||||
|
||||
### Permissions
|
||||
|
||||
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||
|
||||
```sql
|
||||
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||
```
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
### 1. Visit the Services Page
|
||||
|
@ -0,0 +1,16 @@
|
||||
---
|
||||
title: Vertica Connector Troubleshooting
|
||||
slug: /connectors/database/vertica/troubleshooting
|
||||
---
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
Learn how to resolve the most common problems people encounter in the Vertica connector.
|
||||
|
||||
## Profiler: New session rejected
|
||||
|
||||
If you see the following error when computing the profiler `New session rejected due to limit, already XYZ sessions active`,
|
||||
it means that the number of threads configured in the profiler workflow is exceeding the connection limits of your
|
||||
Vertica instance.
|
||||
|
||||
Note that by default the profiler runs with 5 threads. In case you see this error, you might need to reduce this number.
|
@ -344,6 +344,8 @@ site_menu:
|
||||
url: /connectors/database/vertica/airflow
|
||||
- category: Connectors / Database / Vertica / CLI
|
||||
url: /connectors/database/vertica/cli
|
||||
- category: Connectors / Database / Vertica / Troubleshooting
|
||||
url: /connectors/database/vertica/troubleshooting
|
||||
- category: Connectors / Dashboard
|
||||
url: /connectors/dashboard
|
||||
- category: Connectors / Dashboard / Looker
|
||||
|
Loading…
x
Reference in New Issue
Block a user