diff --git a/ingestion/src/metadata/examples/workflows/presto.yaml b/ingestion/src/metadata/examples/workflows/presto.yaml index 6864dc41793..6d2c7d2f523 100644 --- a/ingestion/src/metadata/examples/workflows/presto.yaml +++ b/ingestion/src/metadata/examples/workflows/presto.yaml @@ -5,10 +5,10 @@ source: config: type: Presto hostPort: localhost:8080 - catalog: tpcds + catalog: catalog_name username: admin password: password - databaseSchema: tpcds + databaseSchema: schema_name sourceConfig: config: generateSampleData: false diff --git a/ingestion/src/metadata/examples/workflows/trino.yaml b/ingestion/src/metadata/examples/workflows/trino.yaml index c937ec3c505..be043dca60d 100644 --- a/ingestion/src/metadata/examples/workflows/trino.yaml +++ b/ingestion/src/metadata/examples/workflows/trino.yaml @@ -6,8 +6,8 @@ source: type: Trino hostPort: localhost:8080 username: user - catalog: tpcds - databaseSchema: tiny + catalog: catalog_name + databaseSchema: schema_name connectionOptions: {} connectionArguments: {} sourceConfig: diff --git a/ingestion/src/metadata/ingestion/source/database/presto/metadata.py b/ingestion/src/metadata/ingestion/source/database/presto/metadata.py index ba917826ed4..333936e94cb 100644 --- a/ingestion/src/metadata/ingestion/source/database/presto/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/presto/metadata.py @@ -132,27 +132,28 @@ class PrestoSource(CommonDbSourceService): else: results = self.connection.execute("SHOW CATALOGS") for res in results: - new_catalog = res[0] - database_fqn = fqn.build( - self.metadata, - entity_type=Database, - service_name=self.context.database_service.name.__root__, - database_name=new_catalog, - ) - if filter_by_database( - self.source_config.databaseFilterPattern, - database_fqn - if self.source_config.useFqnForFiltering - else new_catalog, - ): - self.status.filter(database_fqn, "Database Filtered Out") - continue - - try: - self.set_inspector(database_name=new_catalog) - yield new_catalog - except Exception as exc: - logger.debug(traceback.format_exc()) - logger.warning( - f"Error trying to connect to database {new_catalog}: {exc}" + if res: + new_catalog = res[0] + database_fqn = fqn.build( + self.metadata, + entity_type=Database, + service_name=self.context.database_service.name.__root__, + database_name=new_catalog, ) + if filter_by_database( + self.source_config.databaseFilterPattern, + database_fqn + if self.source_config.useFqnForFiltering + else new_catalog, + ): + self.status.filter(database_fqn, "Database Filtered Out") + continue + + try: + self.set_inspector(database_name=new_catalog) + yield new_catalog + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.warning( + f"Error trying to connect to database {new_catalog}: {exc}" + ) diff --git a/ingestion/src/metadata/ingestion/source/database/trino/metadata.py b/ingestion/src/metadata/ingestion/source/database/trino/metadata.py index d698a3b1334..4408ec5078c 100644 --- a/ingestion/src/metadata/ingestion/source/database/trino/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/trino/metadata.py @@ -181,27 +181,28 @@ class TrinoSource(CommonDbSourceService): else: results = self.connection.execute("SHOW CATALOGS") for res in results: - new_catalog = res[0] - database_fqn = fqn.build( - self.metadata, - entity_type=Database, - service_name=self.context.database_service.name.__root__, - database_name=new_catalog, - ) - if filter_by_database( - self.source_config.databaseFilterPattern, - database_fqn - if self.source_config.useFqnForFiltering - else new_catalog, - ): - self.status.filter(database_fqn, "Database Filtered Out") - continue - - try: - self.set_inspector(database_name=new_catalog) - yield new_catalog - except Exception as exc: - logger.debug(traceback.format_exc()) - logger.warning( - f"Error trying to connect to database {new_catalog}: {exc}" + if res: + new_catalog = res[0] + database_fqn = fqn.build( + self.metadata, + entity_type=Database, + service_name=self.context.database_service.name.__root__, + database_name=new_catalog, ) + if filter_by_database( + self.source_config.databaseFilterPattern, + database_fqn + if self.source_config.useFqnForFiltering + else new_catalog, + ): + self.status.filter(database_fqn, "Database Filtered Out") + continue + + try: + self.set_inspector(database_name=new_catalog) + yield new_catalog + except Exception as exc: + logger.debug(traceback.format_exc()) + logger.warning( + f"Error trying to connect to database {new_catalog}: {exc}" + ) diff --git a/openmetadata-docs/content/connectors/database/presto/airflow.md b/openmetadata-docs/content/connectors/database/presto/airflow.md index ec81be2a706..1cee3bf6470 100644 --- a/openmetadata-docs/content/connectors/database/presto/airflow.md +++ b/openmetadata-docs/content/connectors/database/presto/airflow.md @@ -142,6 +142,7 @@ workflowConfig: - **password**: Password to connect to Presto. - **hostPort**: Enter the fully qualified hostname and port number for your Presto deployment in the Host and Port field. - **catalog**: Presto offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"` diff --git a/openmetadata-docs/content/connectors/database/presto/cli.md b/openmetadata-docs/content/connectors/database/presto/cli.md index 25f3d302211..08994fb9927 100644 --- a/openmetadata-docs/content/connectors/database/presto/cli.md +++ b/openmetadata-docs/content/connectors/database/presto/cli.md @@ -142,6 +142,7 @@ workflowConfig: - **password**: Password to connect to Presto. - **hostPort**: Enter the fully qualified hostname and port number for your Presto deployment in the Host and Port field. - **catalog**: Presto offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"` diff --git a/openmetadata-docs/content/connectors/database/presto/index.md b/openmetadata-docs/content/connectors/database/presto/index.md index 9baa0ce2f92..0bed7034d01 100644 --- a/openmetadata-docs/content/connectors/database/presto/index.md +++ b/openmetadata-docs/content/connectors/database/presto/index.md @@ -136,6 +136,7 @@ the changes. - **Password**: Password to connect to Presto. - **Host and Port**: Enter the fully qualified hostname and port number for your Presto deployment in the Host and Port field. - **Catalog**: Presto offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Presto during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"` diff --git a/openmetadata-docs/content/connectors/database/trino/airflow.md b/openmetadata-docs/content/connectors/database/trino/airflow.md index 21701489451..baa4d006a90 100644 --- a/openmetadata-docs/content/connectors/database/trino/airflow.md +++ b/openmetadata-docs/content/connectors/database/trino/airflow.md @@ -30,6 +30,17 @@ To run the Trino ingestion, you will need to install: pip3 install "openmetadata-ingestion[trino]" ``` + + +To Inesget metadata from the Trino User Must have select privileges to this tables. +- `information_schema.schemata` +- `information_schema.columns` +- `information_schema.tables` +- `information_schema.views` +- `system.metadata.table_comments` + + + ## Metadata Ingestion All connectors are defined as JSON Schemas. @@ -145,6 +156,7 @@ workflowConfig: - **password**: Password to connect to Trino. - **hostPort**: Enter the fully qualified hostname and port number for your Trino deployment in the Host and Port field. - **catalog**: Trino offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"` diff --git a/openmetadata-docs/content/connectors/database/trino/cli.md b/openmetadata-docs/content/connectors/database/trino/cli.md index c08902e9974..8517a8b235f 100644 --- a/openmetadata-docs/content/connectors/database/trino/cli.md +++ b/openmetadata-docs/content/connectors/database/trino/cli.md @@ -30,6 +30,17 @@ To run the Trino ingestion, you will need to install: pip3 install "openmetadata-ingestion[trino]" ``` + + +To Inesget metadata from the Trino User Must have select privileges to this tables. +- `information_schema.schemata` +- `information_schema.columns` +- `information_schema.tables` +- `information_schema.views` +- `system.metadata.table_comments` + + + ## Metadata Ingestion All connectors are defined as JSON Schemas. @@ -145,6 +156,7 @@ workflowConfig: - **password**: Password to connect to Trino. - **hostPort**: Enter the fully qualified hostname and port number for your Trino deployment in the Host and Port field. - **catalog**: Trino offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"` diff --git a/openmetadata-docs/content/connectors/database/trino/index.md b/openmetadata-docs/content/connectors/database/trino/index.md index 9a5abc3808e..b78eb4bdce3 100644 --- a/openmetadata-docs/content/connectors/database/trino/index.md +++ b/openmetadata-docs/content/connectors/database/trino/index.md @@ -43,6 +43,17 @@ To deploy OpenMetadata, check the Deployment guides. To run the Ingestion via the UI you'll need to use the OpenMetadata Ingestion Container, which comes shipped with custom Airflow plugins to handle the workflow deployment. + + +To ingest metadata from the Trino source, the user must have select privileges for the following tables. +- `information_schema.schemata` +- `information_schema.columns` +- `information_schema.tables` +- `information_schema.views` +- `system.metadata.table_comments` + + + ## Metadata Ingestion ### 1. Visit the Services Page @@ -136,6 +147,7 @@ the changes. - **Password**: Password to connect to Trino. - **Host and Port**: Enter the fully qualified hostname and port number for your Trino deployment in the Host and Port field. - **Catalog**: Trino offers a catalog feature where all the databases are stored. (Providing the Catalog is not mandatory from 0.12.2 or greater versions) +- **DatabaseSchema**: DatabaseSchema of the data source. This is optional parameter, if you would like to restrict the metadata reading to a single databaseSchema. When left blank, OpenMetadata Ingestion attempts to scan all the databaseSchema. - **Connection Options (Optional)**: Enter the details for any additional connection options that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - **Connection Arguments (Optional)**: Enter the details for any additional connection arguments such as security or protocol configs that can be sent to Trino during the connection. These details must be added as Key-Value pairs. - In case you are using Single-Sign-On (SSO) for authentication, add the `authenticator` details in the Connection Arguments as a Key-Value pair as follows: `"authenticator" : "sso_login_url"`