From f6a87ee02a59886037d1bb37099eab3592bc8c20 Mon Sep 17 00:00:00 2001 From: Pere Miquel Brull Date: Mon, 9 Oct 2023 20:47:19 +0200 Subject: [PATCH] Fix #12082 - Bump PyAthena version (#13464) --- .../airflow/dags/airflow_metadata_to_es.py | 27 +++++++------------ ingestion/setup.py | 2 +- .../source/database/athena/metadata.py | 2 +- ingestion/src/metadata/pii/processor.py | 10 ++++++- 4 files changed, 20 insertions(+), 21 deletions(-) diff --git a/ingestion/examples/airflow/dags/airflow_metadata_to_es.py b/ingestion/examples/airflow/dags/airflow_metadata_to_es.py index 5cdd0a35279..a949e659ccd 100644 --- a/ingestion/examples/airflow/dags/airflow_metadata_to_es.py +++ b/ingestion/examples/airflow/dags/airflow_metadata_to_es.py @@ -37,32 +37,23 @@ config = """ { "source": { "type": "metadata_elasticsearch", - "serviceName": "openMetadata", + "serviceName": "Openmetadata", "serviceConnection": { "config":{ - "type":"MetadataES", - "includeTables": "true", - "includeUsers": "true", - "includeTopics": "true", - "includeDashboards": "true", - "limitRecords": 10 - } + "type":"MetadataES" + } }, - "sourceConfig":{"config":{}} + "sourceConfig":{"config":{ + "type": "MetadataToElasticSearch" + }} }, "sink": { - "type": "elasticsearch", - "config": { - "index_tables": "true", - "index_topics": "true", - "index_dashboards": "true", - "es_host": "elasticsearch", - "es_port": 9200 - } + "type": "metadata-rest", + "config": {} }, "workflowConfig": { "openMetadataServerConfig": { - "hostPort": "http://openmetadata-server:8585/api", + "hostPort": "http://localhost:8585/api", "authProvider": "openmetadata", "securityConfig":{ "jwtToken": "eyJraWQiOiJHYjM4OWEtOWY3Ni1nZGpzLWE5MmotMDI0MmJrOTQzNTYiLCJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZG1pbiIsImlzQm90IjpmYWxzZSwiaXNzIjoib3Blbi1tZXRhZGF0YS5vcmciLCJpYXQiOjE2NjM5Mzg0NjIsImVtYWlsIjoiYWRtaW5Ab3Blbm1ldGFkYXRhLm9yZyJ9.tS8um_5DKu7HgzGBzS1VTA5uUjKWOCU0B_j08WXBiEC0mr0zNREkqVfwFDD-d24HlNEbrqioLsBuFRiwIWKc1m_ZlVQbG7P36RUxhuv2vbSp80FKyNM-Tj93FDzq91jsyNmsQhyNv_fNr3TXfzzSPjHt8Go0FMMP66weoKMgW2PbXlhVKwEuXUHyakLLzewm9UMeQaEiRzhiTMU3UkLXcKbYEJJvfNFcLwSl9W8JCO_l0Yj3ud-qt_nQYEZwqW6u5nfdQllN133iikV4fM5QZsMCnm8Rq1mvLR0y9bmJiD7fwM1tmJ791TUWqmKaTnP49U493VanKpUAfzIiOiIbhg" diff --git a/ingestion/setup.py b/ingestion/setup.py index b958aa202b8..e88d9637761 100644 --- a/ingestion/setup.py +++ b/ingestion/setup.py @@ -133,7 +133,7 @@ base_requirements = { plugins: Dict[str, Set[str]] = { "airflow": {VERSIONS["airflow"]}, # Same as ingestion container. For development. "amundsen": {VERSIONS["neo4j"]}, - "athena": {"pyathena==2.25.2"}, + "athena": {"pyathena==3.0.8"}, "atlas": {}, "azuresql": {VERSIONS["pyodbc"]}, "azure-sso": {VERSIONS["msal"]}, diff --git a/ingestion/src/metadata/ingestion/source/database/athena/metadata.py b/ingestion/src/metadata/ingestion/source/database/athena/metadata.py index d663063f169..86361862efd 100644 --- a/ingestion/src/metadata/ingestion/source/database/athena/metadata.py +++ b/ingestion/src/metadata/ingestion/source/database/athena/metadata.py @@ -13,7 +13,7 @@ from typing import Iterable -from pyathena.sqlalchemy_athena import AthenaDialect +from pyathena.sqlalchemy.base import AthenaDialect from sqlalchemy import types from sqlalchemy.engine import reflection diff --git a/ingestion/src/metadata/pii/processor.py b/ingestion/src/metadata/pii/processor.py index 1f6d0075487..0632d01255a 100644 --- a/ingestion/src/metadata/pii/processor.py +++ b/ingestion/src/metadata/pii/processor.py @@ -62,9 +62,17 @@ class PIIProcessor(Processor): DatabaseServiceProfilerPipeline, self.config.source.sourceConfig.config ) # Used to satisfy type checked - self.ner_scanner = NERScanner() + self._ner_scanner = None self.confidence_threshold = self.source_config.confidence + @property + def ner_scanner(self) -> NERScanner: + """Load the NER Scanner only if called""" + if self._ner_scanner is None: + self._ner_scanner = NERScanner() + + return self._ner_scanner + @classmethod def create(cls, config_dict: dict, metadata: OpenMetadata) -> "Step": config = parse_workflow_config_gracefully(config_dict)