mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-10-31 18:48:35 +00:00 
			
		
		
		
	Vertica comments, dbs, profiler and docs (#9845)
* Vertica comments, dbs, profiler and docs * Revert metabase changes * Format * Fix median
This commit is contained in:
		
							parent
							
								
									e278b21905
								
							
						
					
					
						commit
						16a1b2c8be
					
				| @ -12,7 +12,9 @@ | |||||||
| Vertica source implementation. | Vertica source implementation. | ||||||
| """ | """ | ||||||
| import re | import re | ||||||
|  | import traceback | ||||||
| from textwrap import dedent | from textwrap import dedent | ||||||
|  | from typing import Iterable | ||||||
| 
 | 
 | ||||||
| from sqlalchemy import sql, util | from sqlalchemy import sql, util | ||||||
| from sqlalchemy.engine import reflection | from sqlalchemy.engine import reflection | ||||||
| @ -20,6 +22,7 @@ from sqlalchemy.sql import sqltypes | |||||||
| from sqlalchemy.sql.sqltypes import String | from sqlalchemy.sql.sqltypes import String | ||||||
| from sqlalchemy_vertica.base import VerticaDialect | from sqlalchemy_vertica.base import VerticaDialect | ||||||
| 
 | 
 | ||||||
|  | from metadata.generated.schema.entity.data.database import Database | ||||||
| from metadata.generated.schema.entity.services.connections.database.verticaConnection import ( | from metadata.generated.schema.entity.services.connections.database.verticaConnection import ( | ||||||
|     VerticaConnection, |     VerticaConnection, | ||||||
| ) | ) | ||||||
| @ -34,8 +37,19 @@ from metadata.ingestion.source.database.common_db_source import CommonDbSourceSe | |||||||
| from metadata.ingestion.source.database.vertica.queries import ( | from metadata.ingestion.source.database.vertica.queries import ( | ||||||
|     VERTICA_GET_COLUMNS, |     VERTICA_GET_COLUMNS, | ||||||
|     VERTICA_GET_PRIMARY_KEYS, |     VERTICA_GET_PRIMARY_KEYS, | ||||||
|  |     VERTICA_LIST_DATABASES, | ||||||
|  |     VERTICA_TABLE_COMMENTS, | ||||||
|     VERTICA_VIEW_DEFINITION, |     VERTICA_VIEW_DEFINITION, | ||||||
| ) | ) | ||||||
|  | from metadata.utils import fqn | ||||||
|  | from metadata.utils.filters import filter_by_database | ||||||
|  | from metadata.utils.logger import ingestion_logger | ||||||
|  | from metadata.utils.sqlalchemy_utils import ( | ||||||
|  |     get_all_table_comments, | ||||||
|  |     get_table_comment_wrapper, | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | logger = ingestion_logger() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class UUID(String): | class UUID(String): | ||||||
| @ -219,9 +233,24 @@ def get_view_definition( | |||||||
|     return None |     return None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | @reflection.cache | ||||||
|  | def get_table_comment( | ||||||
|  |     self, connection, table_name, schema=None, **kw  # pylint: disable=unused-argument | ||||||
|  | ): | ||||||
|  |     return get_table_comment_wrapper( | ||||||
|  |         self, | ||||||
|  |         connection, | ||||||
|  |         table_name=table_name, | ||||||
|  |         schema=schema, | ||||||
|  |         query=VERTICA_TABLE_COMMENTS, | ||||||
|  |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| VerticaDialect.get_columns = get_columns | VerticaDialect.get_columns = get_columns | ||||||
| VerticaDialect._get_column_info = _get_column_info  # pylint: disable=protected-access | VerticaDialect._get_column_info = _get_column_info  # pylint: disable=protected-access | ||||||
| VerticaDialect.get_view_definition = get_view_definition | VerticaDialect.get_view_definition = get_view_definition | ||||||
|  | VerticaDialect.get_all_table_comments = get_all_table_comments | ||||||
|  | VerticaDialect.get_table_comment = get_table_comment | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class VerticaSource(CommonDbSourceService): | class VerticaSource(CommonDbSourceService): | ||||||
| @ -239,3 +268,38 @@ class VerticaSource(CommonDbSourceService): | |||||||
|                 f"Expected VerticaConnection, but got {connection}" |                 f"Expected VerticaConnection, but got {connection}" | ||||||
|             ) |             ) | ||||||
|         return cls(config, metadata_config) |         return cls(config, metadata_config) | ||||||
|  | 
 | ||||||
|  |     def get_database_names(self) -> Iterable[str]: | ||||||
|  |         configured_db = self.config.serviceConnection.__root__.config.database | ||||||
|  |         if configured_db: | ||||||
|  |             self.set_inspector(database_name=configured_db) | ||||||
|  |             yield configured_db | ||||||
|  |         else: | ||||||
|  |             results = self.connection.execute(VERTICA_LIST_DATABASES) | ||||||
|  |             for res in results: | ||||||
|  |                 row = list(res) | ||||||
|  |                 new_database = row[0] | ||||||
|  |                 database_fqn = fqn.build( | ||||||
|  |                     self.metadata, | ||||||
|  |                     entity_type=Database, | ||||||
|  |                     service_name=self.context.database_service.name.__root__, | ||||||
|  |                     database_name=new_database, | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |                 if filter_by_database( | ||||||
|  |                     self.source_config.databaseFilterPattern, | ||||||
|  |                     database_fqn | ||||||
|  |                     if self.source_config.useFqnForFiltering | ||||||
|  |                     else new_database, | ||||||
|  |                 ): | ||||||
|  |                     self.status.filter(database_fqn, "Database Filtered Out") | ||||||
|  |                     continue | ||||||
|  | 
 | ||||||
|  |                 try: | ||||||
|  |                     self.set_inspector(database_name=new_database) | ||||||
|  |                     yield new_database | ||||||
|  |                 except Exception as exc: | ||||||
|  |                     logger.debug(traceback.format_exc()) | ||||||
|  |                     logger.error( | ||||||
|  |                         f"Error trying to connect to database {new_database}: {exc}" | ||||||
|  |                     ) | ||||||
|  | |||||||
| @ -14,15 +14,37 @@ SQL Queries used during ingestion | |||||||
| 
 | 
 | ||||||
| import textwrap | import textwrap | ||||||
| 
 | 
 | ||||||
|  | # Column comments in Vertica can only happen on Projections | ||||||
|  | #   https://forum.vertica.com/discussion/238945/vertica-try-to-create-comment | ||||||
|  | # And Vertica projections follow this naming: | ||||||
|  | #   https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/AdministratorsGuide/Projections/WorkingWithProjections.htm | ||||||
|  | # So to fetch column comments we need to concat the table_name + projection infix + column name. | ||||||
|  | # Example: querying `v_catalog.comments` we find an object_name for a column in the table vendor_dimension as | ||||||
|  | # `vendor_dimension_super.vendor_name`. Note how this is the `_super` projection. | ||||||
|  | # Then, our join looks for the match in `vendor_dimension_%.vendor_name`. | ||||||
|  | # Note: This might not suit for all column scenarios, but currently we did not find a better way to join | ||||||
|  | # v_catalog.comments with v_catalog.columns. | ||||||
| VERTICA_GET_COLUMNS = textwrap.dedent( | VERTICA_GET_COLUMNS = textwrap.dedent( | ||||||
|     """ |     """ | ||||||
|         SELECT column_name, data_type, column_default, is_nullable, comment |         SELECT | ||||||
|         FROM v_catalog.columns col left join v_catalog.comments com on col.table_id=com.object_id |           column_name, | ||||||
|         and com.object_type='COLUMN' and col.column_name=com.child_object |           data_type, | ||||||
|  |           column_default, | ||||||
|  |           is_nullable, | ||||||
|  |           comment | ||||||
|  |         FROM v_catalog.columns col | ||||||
|  |         LEFT JOIN v_catalog.comments com | ||||||
|  |           ON com.object_type = 'COLUMN' | ||||||
|  |          AND com.object_name LIKE CONCAT(CONCAT(col.table_name, '_%.'), col.column_name) | ||||||
|         WHERE lower(table_name) = '{table}' |         WHERE lower(table_name) = '{table}' | ||||||
|         AND {schema_condition} |         AND {schema_condition} | ||||||
|         UNION ALL |         UNION ALL | ||||||
|         SELECT column_name, data_type, '' as column_default, true as is_nullable, ''  as comment |         SELECT | ||||||
|  |           column_name, | ||||||
|  |           data_type, | ||||||
|  |           '' AS column_default, | ||||||
|  |           true AS is_nullable, | ||||||
|  |           ''  AS comment | ||||||
|         FROM v_catalog.view_columns |         FROM v_catalog.view_columns | ||||||
|         WHERE lower(table_name) = '{table}' |         WHERE lower(table_name) = '{table}' | ||||||
|         AND {schema_condition} |         AND {schema_condition} | ||||||
| @ -47,3 +69,16 @@ VERTICA_VIEW_DEFINITION = textwrap.dedent( | |||||||
|       AND {schema_condition} |       AND {schema_condition} | ||||||
|     """ |     """ | ||||||
| ) | ) | ||||||
|  | 
 | ||||||
|  | VERTICA_LIST_DATABASES = "SELECT database_name from v_catalog.databases" | ||||||
|  | 
 | ||||||
|  | VERTICA_TABLE_COMMENTS = textwrap.dedent( | ||||||
|  |     """ | ||||||
|  |     SELECT | ||||||
|  |       object_schema as schema, | ||||||
|  |       object_name as table_name, | ||||||
|  |       comment as table_comment | ||||||
|  |     FROM v_catalog.comments | ||||||
|  |     WHERE object_type = 'TABLE'; | ||||||
|  |     """ | ||||||
|  | ) | ||||||
|  | |||||||
| @ -59,7 +59,7 @@ def _(elements, compiler, **kwargs): | |||||||
| def _(elements, compiler, **kwargs): | def _(elements, compiler, **kwargs): | ||||||
|     """Median computation for MSSQL""" |     """Median computation for MSSQL""" | ||||||
|     col = elements.clauses.clauses[0].name |     col = elements.clauses.clauses[0].name | ||||||
|     return "percentile_cont(0.5)  WITHIN GROUP (ORDER BY %s ASC) OVER()" % col |     return "percentile_cont(0.5) WITHIN GROUP (ORDER BY %s ASC) OVER()" % col | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @compiles(MedianFn, Dialects.Hive) | @compiles(MedianFn, Dialects.Hive) | ||||||
| @ -70,7 +70,7 @@ def _(elements, compiler, **kwargs): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @compiles(MedianFn, Dialects.MySQL) | @compiles(MedianFn, Dialects.MySQL) | ||||||
| def _(elemenst, compiler, **kwargs):  # pylint: disable=unused-argument | def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument | ||||||
|     """Median computation for MySQL currently not supported |     """Median computation for MySQL currently not supported | ||||||
|     Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340 |     Needs to be tackled in https://github.com/open-metadata/OpenMetadata/issues/6340 | ||||||
|     """ |     """ | ||||||
| @ -93,3 +93,11 @@ def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument | |||||||
|     """.format( |     """.format( | ||||||
|         col=col, table=table.value |         col=col, table=table.value | ||||||
|     ) |     ) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @compiles(MedianFn, Dialects.Vertica) | ||||||
|  | def _(elements, compiler, **kwargs):  # pylint: disable=unused-argument | ||||||
|  |     col, table = list(elements.clauses) | ||||||
|  |     return "(SELECT MEDIAN({col}) OVER() FROM {table} LIMIT 1)".format( | ||||||
|  |         col=col, table=table.value | ||||||
|  |     ) | ||||||
|  | |||||||
| @ -58,6 +58,7 @@ def _(element, compiler, **kw): | |||||||
| @compiles(ModuloFn, Dialects.Trino) | @compiles(ModuloFn, Dialects.Trino) | ||||||
| @compiles(ModuloFn, Dialects.IbmDbSa) | @compiles(ModuloFn, Dialects.IbmDbSa) | ||||||
| @compiles(ModuloFn, Dialects.Db2) | @compiles(ModuloFn, Dialects.Db2) | ||||||
|  | @compiles(ModuloFn, Dialects.Vertica) | ||||||
| def _(element, compiler, **kw): | def _(element, compiler, **kw): | ||||||
|     """Modulo function for specific dialect""" |     """Modulo function for specific dialect""" | ||||||
|     value, base = validate_and_compile(element, compiler, **kw) |     value, base = validate_and_compile(element, compiler, **kw) | ||||||
|  | |||||||
| @ -101,3 +101,12 @@ def _(*_, **__): | |||||||
|     from the already sampled results when executing row::MOD(0, 100) < profile_sample. |     from the already sampled results when executing row::MOD(0, 100) < profile_sample. | ||||||
|     """ |     """ | ||||||
|     return "0" |     return "0" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | @compiles(RandomNumFn, Dialects.Vertica) | ||||||
|  | def _(*_, **__): | ||||||
|  |     """ | ||||||
|  |     Vertica RANDOM() returns a number 0 < n < 1 as a float. | ||||||
|  |     We need to cast it to integer to perform the modulo | ||||||
|  |     """ | ||||||
|  |     return "(RANDOM() * 100)::INTEGER" | ||||||
|  | |||||||
| @ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides. | |||||||
| To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | ||||||
| custom Airflow plugins to handle the workflow deployment. | custom Airflow plugins to handle the workflow deployment. | ||||||
| 
 | 
 | ||||||
|  | ### Permissions | ||||||
|  | 
 | ||||||
|  | To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the | ||||||
|  | `V_CATALOG` schema. You can grant those as follows for the schemas in your database: | ||||||
|  | 
 | ||||||
|  | ```sql | ||||||
|  | CREATE USER openmetadata IDENTIFIED BY 'password'; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata; | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
| ### Python Requirements | ### Python Requirements | ||||||
| 
 | 
 | ||||||
| To run the Vertica ingestion, you will need to install: | To run the Vertica ingestion, you will need to install: | ||||||
|  | |||||||
| @ -22,6 +22,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides. | |||||||
| To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | ||||||
| custom Airflow plugins to handle the workflow deployment. | custom Airflow plugins to handle the workflow deployment. | ||||||
| 
 | 
 | ||||||
|  | ### Permissions | ||||||
|  | 
 | ||||||
|  | To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the | ||||||
|  | `V_CATALOG` schema. You can grant those as follows for the schemas in your database: | ||||||
|  | 
 | ||||||
|  | ```sql | ||||||
|  | CREATE USER openmetadata IDENTIFIED BY 'password'; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata; | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
| ### Python Requirements | ### Python Requirements | ||||||
| 
 | 
 | ||||||
| To run the Vertica ingestion, you will need to install: | To run the Vertica ingestion, you will need to install: | ||||||
|  | |||||||
| @ -43,6 +43,17 @@ To deploy OpenMetadata, check the <a href="/deployment">Deployment</a> guides. | |||||||
| To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with | ||||||
| custom Airflow plugins to handle the workflow deployment. | custom Airflow plugins to handle the workflow deployment. | ||||||
| 
 | 
 | ||||||
|  | ### Permissions | ||||||
|  | 
 | ||||||
|  | To run the ingestion we need a user with `SELECT` grants on the schemas that you'd like to ingest, as well as to the | ||||||
|  | `V_CATALOG` schema. You can grant those as follows for the schemas in your database: | ||||||
|  | 
 | ||||||
|  | ```sql | ||||||
|  | CREATE USER openmetadata IDENTIFIED BY 'password'; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO openmetadata; | ||||||
|  | GRANT SELECT ON ALL TABLES IN SCHEMA V_CATALOG TO openmetadata; | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
| ## Metadata Ingestion | ## Metadata Ingestion | ||||||
| 
 | 
 | ||||||
| ### 1. Visit the Services Page | ### 1. Visit the Services Page | ||||||
|  | |||||||
| @ -0,0 +1,16 @@ | |||||||
|  | --- | ||||||
|  | title: Vertica Connector Troubleshooting | ||||||
|  | slug: /connectors/database/vertica/troubleshooting | ||||||
|  | --- | ||||||
|  | 
 | ||||||
|  | # Troubleshooting | ||||||
|  | 
 | ||||||
|  | Learn how to resolve the most common problems people encounter in the Vertica connector. | ||||||
|  | 
 | ||||||
|  | ## Profiler: New session rejected | ||||||
|  | 
 | ||||||
|  | If you see the following error when computing the profiler `New session rejected due to limit, already XYZ sessions active`, | ||||||
|  | it means that the number of threads configured in the profiler workflow is exceeding the connection limits of your | ||||||
|  | Vertica instance. | ||||||
|  | 
 | ||||||
|  | Note that by default the profiler runs with 5 threads. In case you see this error, you might need to reduce this number. | ||||||
| @ -344,6 +344,8 @@ site_menu: | |||||||
|     url: /connectors/database/vertica/airflow |     url: /connectors/database/vertica/airflow | ||||||
|   - category: Connectors / Database / Vertica / CLI |   - category: Connectors / Database / Vertica / CLI | ||||||
|     url: /connectors/database/vertica/cli |     url: /connectors/database/vertica/cli | ||||||
|  |   - category: Connectors / Database / Vertica / Troubleshooting | ||||||
|  |     url: /connectors/database/vertica/troubleshooting | ||||||
|   - category: Connectors / Dashboard |   - category: Connectors / Dashboard | ||||||
|     url: /connectors/dashboard |     url: /connectors/dashboard | ||||||
|   - category: Connectors / Dashboard / Looker |   - category: Connectors / Dashboard / Looker | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Pere Miquel Brull
						Pere Miquel Brull