mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-10 01:21:53 +00:00
Vertica comments, dbs, profiler and docs (#9845)
* Vertica comments, dbs, profiler and docs * Revert metabase changes * Format * Fix median
This commit is contained in:
parent
e278b21905
commit
16a1b2c8be
@ -12,7 +12,9 @@
|
|||||||
Vertica source implementation.
|
Vertica source implementation.
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
import traceback
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
from typing import Iterable
|
||||||
|
|
||||||
from sqlalchemy import sql, util
|
from sqlalchemy import sql, util
|
||||||
from sqlalchemy.engine import reflection
|
from sqlalchemy.engine import reflection
|
||||||
@ -20,6 +22,7 @@ from sqlalchemy.sql import sqltypes
|
|||||||
from sqlalchemy.sql.sqltypes import String
|
from sqlalchemy.sql.sqltypes import String
|
||||||
from sqlalchemy_vertica.base import VerticaDialect
|
from sqlalchemy_vertica.base import VerticaDialect
|
||||||
|
|
||||||
|
from metadata.generated.schema.entity.data.database import Database
|
||||||
from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
|
from metadata.generated.schema.entity.services.connections.database.verticaConnection import (
|
||||||
VerticaConnection,
|
VerticaConnection,
|
||||||
)
|
)
|
||||||
@ -34,8 +37,19 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceSe
|
|||||||
from metadata.ingestion.source.database.vertica.queries import (
|
from metadata.ingestion.source.database.vertica.queries import (
|
||||||
VERTICA_GET_COLUMNS,
|
VERTICA_GET_COLUMNS,
|
||||||
VERTICA_GET_PRIMARY_KEYS,
|
VERTICA_GET_PRIMARY_KEYS,
|
||||||
|
VERTICA_LIST_DATABASES,
|
||||||
|
VERTICA_TABLE_COMMENTS,
|
||||||
VERTICA_VIEW_DEFINITION,
|
VERTICA_VIEW_DEFINITION,
|
||||||
)
|
)
|
||||||
|
from metadata.utils import fqn
|
||||||
|
from metadata.utils.filters import filter_by_database
|
||||||
|
from metadata.utils.logger import ingestion_logger
|
||||||
|
from metadata.utils.sqlalchemy_utils import (
|
||||||
|
get_all_table_comments,
|
||||||
|
get_table_comment_wrapper,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = ingestion_logger()
|
||||||
|
|
||||||
|
|
||||||
class UUID(String):
|
class UUID(String):
|
||||||
@ -219,9 +233,24 @@ def get_view_definition(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
@reflection.cache
|
||||||
|
def get_table_comment(
|
||||||
|
self, connection, table_name, schema=None, **kw # pylint: disable=unused-argument
|
||||||
|
):
|
||||||
|
return get_table_comment_wrapper(
|
||||||
|
self,
|
||||||
|
connection,
|
||||||
|
table_name=table_name,
|
||||||
|
schema=schema,
|
||||||
|
query=VERTICA_TABLE_COMMENTS,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
VerticaDialect.get_columns = get_columns
|
VerticaDialect.get_columns = get_columns
|
||||||
VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access
|
VerticaDialect._get_column_info = _get_column_info # pylint: disable=protected-access
|
||||||
VerticaDialect.get_view_definition = get_view_definition
|
VerticaDialect.get_view_definition = get_view_definition
|
||||||
|
VerticaDialect.get_all_table_comments = get_all_table_comments
|
||||||
|
VerticaDialect.get_table_comment = get_table_comment
|
||||||
|
|
||||||
|
|
||||||
class VerticaSource(CommonDbSourceService):
|
class VerticaSource(CommonDbSourceService):
|
||||||
@ -239,3 +268,38 @@ class VerticaSource(CommonDbSourceService):
|
|||||||
f"Expected VerticaConnection, but got {connection}"
|
f"Expected VerticaConnection, but got {connection}"
|
||||||
)
|
)
|
||||||
return cls(config, metadata_config)
|
return cls(config, metadata_config)
|
||||||
|
|
||||||
|
def get_database_names(self) -> Iterable[str]:
|
||||||
|
configured_db = self.config.serviceConnection.__root__.config.database
|
||||||
|
if configured_db:
|
||||||
|
self.set_inspector(database_name=configured_db)
|
||||||
|
yield configured_db
|
||||||
|
else:
|
||||||
|
results = self.connection.execute(VERTICA_LIST_DATABASES)
|
||||||
|
for res in results:
|
||||||
|
row = list(res)
|
||||||
|
new_database = row[0]
|
||||||
|
database_fqn = fqn.build(
|
||||||
|
self.metadata,
|
||||||
|
entity_type=Database,
|
||||||
|
service_name=self.context.database_service.name.__root__,
|
||||||
|
database_name=new_database,
|
||||||
|
)
|
||||||
|
|
||||||
|
if filter_by_database(
|
||||||
|
self.source_config.databaseFilterPattern,
|
||||||
|
database_fqn
|
||||||
|
if self.source_config.useFqnForFiltering
|
||||||
|
else new_database,
|
||||||
|
):
|
||||||
|
self.status.filter(database_fqn, "Database Filtered Out")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.set_inspector(database_name=new_database)
|
||||||
|
yield new_database
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug(traceback.format_exc())
|
||||||
|
logger.error(
|
||||||
|
f"Error trying to connect to database {new_database}: {exc}"
|
||||||
|
)
|
||||||
|
@ -14,15 +14,37 @@ SQL Queries used during ingestion
|
|||||||
|
|
||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
|
# Column comments in Vertica can only happen on Projections
|
||||||
|
# https://forum.vertica.com/discussion/238945/vertica-try-to-create-comment
|
||||||
|
# And Vertica projections follow this naming:
|
||||||
|
# https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/AdministratorsGuide/Projections/WorkingWithProjections.htm
|
||||||
|
# So to fetch column comments we need to concat the table_name + projection infix + column name.
|
||||||
|
# Example: querying `v_catalog.comments` we find an object_name for a column in the table vendor_dimension as
|
||||||
|
# `vendor_dimension_super.vendor_name`. Note how this is the `_super` projection.
|
||||||
|
# Then, our join looks for the match in `vendor_dimension_%.vendor_name`.
|
||||||
|
# Note: This might not suit for all column scenarios, but currently we did not find a better way to join
|
||||||
|
# v_catalog.comments with v_catalog.columns.
|
||||||
VERTICA_GET_COLUMNS = textwrap.dedent(
|
VERTICA_GET_COLUMNS = textwrap.dedent(
|
||||||
"""
|
"""
|
||||||
SELECT column_name, data_type, column_default, is_nullable, comment
|
SELECT
|
||||||
FROM v_catalog.columns col left join v_catalog.comments com on col.table_id=com.object_id
|
column_name,
|
||||||
and com.object_type='COLUMN' and col.column_name=com.child_object
|
data_type,
|
||||||
|
column_default,
|
||||||
|
is_nullable,
|
||||||
|
comment
|
||||||
|
FROM v_catalog.columns col
|
||||||
|
LEFT JOIN v_catalog.comments com
|
||||||
|
ON com.object_type = 'COLUMN'
|
||||||
|
AND com.object_name LIKE CONCAT(CONCAT(col.table_name, '_%.'), col.column_name)
|
||||||
WHERE lower(table_name) = '{table}'
|
WHERE lower(table_name) = '{table}'
|
||||||
AND {schema_condition}
|
AND {schema_condition}
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT column_name, data_type, '' as column_default, true as is_nullable, '' as comment
|
SELECT
|
||||||
|
column_name,
|
||||||
|
data_type,
|
||||||
|
'' AS column_default,
|
||||||
|
true AS is_nullable,
|
||||||
|
'' AS comment
|
||||||
FROM v_catalog.view_columns
|
FROM v_catalog.view_columns
|
||||||
WHERE lower(table_name) = '{table}'
|
WHERE lower(table_name) = '{table}'
|
||||||
AND {schema_condition}
|
AND {schema_condition}
|
||||||
@ -47,3 +69,16 @@ VERTICA_VIEW_DEFINITION = textwrap.dedent(
|
|||||||
AND {schema_condition}
|
AND {schema_condition}
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
VERTICA_LIST_DATABASES = "SELECT database_name from v_catalog.databases"
|
||||||
|
|
||||||
|
VERTICA_TABLE_COMMENTS = textwrap.dedent(
|
||||||
|
"""
|
||||||
|
SELECT
|
||||||
|
object_schema as schema,
|
||||||
|
object_name as table_name,
|
||||||
|
comment as table_comment
|
||||||
|
FROM v_catalog.comments
|
||||||
|
WHERE object_type = 'TABLE';
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
@ -70,7 +70,7 @@ def _(elements, compiler, **kwargs):
|
|||||||
|
|
||||||
|
|
||||||
@compiles(MedianFn, Dialects.MySQL)
|
@compiles(MedianFn, Dialects.MySQL)
|
||||||
def _(elemenst, compiler, **kwargs): # pylint: disable=unused-argument
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
||||||
"""Median computation for MySQL currently not supported
|
"""Median computation for MySQL currently not supported
|
||||||
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
|
Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340
|
||||||
"""
|
"""
|
||||||
@ -93,3 +93,11 @@ def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
|||||||
""".format(
|
""".format(
|
||||||
col=col, table=table.value
|
col=col, table=table.value
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@compiles(MedianFn, Dialects.Vertica)
|
||||||
|
def _(elements, compiler, **kwargs): # pylint: disable=unused-argument
|
||||||
|
col, table = list(elements.clauses)
|
||||||
|
return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format(
|
||||||
|
col=col, table=table.value
|
||||||
|
)
|
||||||
|
@ -58,6 +58,7 @@ def _(element, compiler, **kw):
|
|||||||
@compiles(ModuloFn, Dialects.Trino)
|
@compiles(ModuloFn, Dialects.Trino)
|
||||||
@compiles(ModuloFn, Dialects.IbmDbSa)
|
@compiles(ModuloFn, Dialects.IbmDbSa)
|
||||||
@compiles(ModuloFn, Dialects.Db2)
|
@compiles(ModuloFn, Dialects.Db2)
|
||||||
|
@compiles(ModuloFn, Dialects.Vertica)
|
||||||
def _(element, compiler, **kw):
|
def _(element, compiler, **kw):
|
||||||
"""Modulo function for specific dialect"""
|
"""Modulo function for specific dialect"""
|
||||||
value, base = validate_and_compile(element, compiler, **kw)
|
value, base = validate_and_compile(element, compiler, **kw)
|
||||||
|
@ -101,3 +101,12 @@ def _(*_, **__):
|
|||||||
from the already sampled results when executing row::MOD(0, 100) < profile_sample.
|
from the already sampled results when executing row::MOD(0, 100) < profile_sample.
|
||||||
"""
|
"""
|
||||||
return "0"
|
return "0"
|
||||||
|
|
||||||
|
|
||||||
|
@compiles(RandomNumFn, Dialects.Vertica)
|
||||||
|
def _(*_, **__):
|
||||||
|
"""
|
||||||
|
Vertica RANDOM() returns a number 0 < n < 1 as a float.
|
||||||
|
We need to cast it to integer to perform the modulo
|
||||||
|
"""
|
||||||
|
return "(RANDOM() * 100)::INTEGER"
|
||||||
|
@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
|||||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||||
custom Airflow plugins to handle the workflow deployment.
|
custom Airflow plugins to handle the workflow deployment.
|
||||||
|
|
||||||
|
### Permissions
|
||||||
|
|
||||||
|
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||||
|
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||||
|
```
|
||||||
|
|
||||||
### Python Requirements
|
### Python Requirements
|
||||||
|
|
||||||
To run the Vertica ingestion, you will need to install:
|
To run the Vertica ingestion, you will need to install:
|
||||||
|
@ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
|||||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||||
custom Airflow plugins to handle the workflow deployment.
|
custom Airflow plugins to handle the workflow deployment.
|
||||||
|
|
||||||
|
### Permissions
|
||||||
|
|
||||||
|
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||||
|
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||||
|
```
|
||||||
|
|
||||||
### Python Requirements
|
### Python Requirements
|
||||||
|
|
||||||
To run the Vertica ingestion, you will need to install:
|
To run the Vertica ingestion, you will need to install:
|
||||||
|
@ -43,6 +43,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides.
|
|||||||
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with
|
||||||
custom Airflow plugins to handle the workflow deployment.
|
custom Airflow plugins to handle the workflow deployment.
|
||||||
|
|
||||||
|
### Permissions
|
||||||
|
|
||||||
|
To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the
|
||||||
|
`V_CATALOG` schema. You can grant those as follows for the schemas in your database:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
CREATE USER openmetadata IDENTIFIED BY 'password';
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata;
|
||||||
|
GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata;
|
||||||
|
```
|
||||||
|
|
||||||
## Metadata Ingestion
|
## Metadata Ingestion
|
||||||
|
|
||||||
### 1. Visit the Services Page
|
### 1. Visit the Services Page
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
---
|
||||||
|
title: Vertica Connector Troubleshooting
|
||||||
|
slug: /connectors/database/vertica/troubleshooting
|
||||||
|
---
|
||||||
|
|
||||||
|
# Troubleshooting
|
||||||
|
|
||||||
|
Learn how to resolve the most common problems people encounter in the Vertica connector.
|
||||||
|
|
||||||
|
## Profiler: New session rejected
|
||||||
|
|
||||||
|
If you see the following error when computing the profiler `New session rejected due to limit, already XYZ sessions active`,
|
||||||
|
it means that the number of threads configured in the profiler workflow is exceeding the connection limits of your
|
||||||
|
Vertica instance.
|
||||||
|
|
||||||
|
Note that by default the profiler runs with 5 threads. In case you see this error, you might need to reduce this number.
|
@ -344,6 +344,8 @@ site_menu:
|
|||||||
url: /connectors/database/vertica/airflow
|
url: /connectors/database/vertica/airflow
|
||||||
- category: Connectors / Database / Vertica / CLI
|
- category: Connectors / Database / Vertica / CLI
|
||||||
url: /connectors/database/vertica/cli
|
url: /connectors/database/vertica/cli
|
||||||
|
- category: Connectors / Database / Vertica / Troubleshooting
|
||||||
|
url: /connectors/database/vertica/troubleshooting
|
||||||
- category: Connectors / Dashboard
|
- category: Connectors / Dashboard
|
||||||
url: /connectors/dashboard
|
url: /connectors/dashboard
|
||||||
- category: Connectors / Dashboard / Looker
|
- category: Connectors / Dashboard / Looker
|
||||||
|
Loading…
x
Reference in New Issue
Block a user