mirror of
				https://github.com/open-metadata/OpenMetadata.git
				synced 2025-11-04 04:29:13 +00:00 
			
		
		
		
	ES reindex Airflow workflow (#8723)
* Added support for 'Metadata Service' connectors * Fixed failing unit tests * Removed delete button for 'Metadata Service' connector of type OpenMetadataServer * Code optimization for SelectServiceType component * Fixed errors arrived due to backend changes related to OpenMetadata Services * init reindex * Added additional step for adding Metadata to ES configs while creating ingestion pipeline for Metadata Service * Add ES reindex * Add ES reindex * Format * Format * Rename service * Pick service name from IngestionPipeline * fix ui conflicts Co-authored-by: Aniket Katkar <aniketkatkar97@gmail.com> Co-authored-by: Chirag Madlani <12962843+chirag-madlani@users.noreply.github.com>
This commit is contained in:
		
							parent
							
								
									b04485a5bc
								
							
						
					
					
						commit
						f33003485e
					
				@ -1,7 +1,7 @@
 | 
			
		||||
{
 | 
			
		||||
  "source": {
 | 
			
		||||
    "type": "metadata_elasticsearch",
 | 
			
		||||
    "serviceName": "openMetadata",
 | 
			
		||||
    "serviceName": "Openmetadata",
 | 
			
		||||
    "serviceConnection": {
 | 
			
		||||
      "config":{
 | 
			
		||||
        "type":"MetadataES"
 | 
			
		||||
 | 
			
		||||
@ -51,6 +51,7 @@ from metadata.generated.schema.entity.services.connections.metadata.openMetadata
 | 
			
		||||
from metadata.generated.schema.entity.services.dashboardService import DashboardService
 | 
			
		||||
from metadata.generated.schema.entity.services.databaseService import DatabaseService
 | 
			
		||||
from metadata.generated.schema.entity.services.messagingService import MessagingService
 | 
			
		||||
from metadata.generated.schema.entity.services.metadataService import MetadataService
 | 
			
		||||
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
 | 
			
		||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
 | 
			
		||||
from metadata.generated.schema.entity.services.storageService import StorageService
 | 
			
		||||
@ -375,6 +376,14 @@ class OpenMetadata(
 | 
			
		||||
        ):
 | 
			
		||||
            return "/services/mlmodelServices"
 | 
			
		||||
 | 
			
		||||
        if issubclass(
 | 
			
		||||
            entity,
 | 
			
		||||
            get_args(
 | 
			
		||||
                Union[MetadataService, self.get_create_entity_type(MetadataService)]
 | 
			
		||||
            ),
 | 
			
		||||
        ):
 | 
			
		||||
            return "/services/metadataServices"
 | 
			
		||||
 | 
			
		||||
        if issubclass(
 | 
			
		||||
            entity,
 | 
			
		||||
            get_args(
 | 
			
		||||
 | 
			
		||||
@ -9,7 +9,7 @@
 | 
			
		||||
#  See the License for the specific language governing permissions and
 | 
			
		||||
#  limitations under the License.
 | 
			
		||||
"""
 | 
			
		||||
testSuite DAG function builder
 | 
			
		||||
Data Insights DAG function builder
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from airflow import DAG
 | 
			
		||||
@ -39,7 +39,8 @@ def build_data_insight_workflow_config(
 | 
			
		||||
 | 
			
		||||
    workflow_config = OpenMetadataWorkflowConfig(
 | 
			
		||||
        source=build_source(ingestion_pipeline),
 | 
			
		||||
        sink=ingestion_pipeline.sink,
 | 
			
		||||
        # ingestion_pipeline.service.serviceConnection.elasticsearch
 | 
			
		||||
        sink=ingestion_pipeline.openMetadataServerConnection.elasticsSearch,
 | 
			
		||||
        processor=Processor(
 | 
			
		||||
            type="data-insight-processor",
 | 
			
		||||
            config={},
 | 
			
		||||
@ -55,7 +56,7 @@ def build_data_insight_workflow_config(
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def build_data_insight_dag(ingestion_pipeline: IngestionPipeline) -> DAG:
 | 
			
		||||
    """Build a simple testSuite DAG"""
 | 
			
		||||
    """Build a simple Data Insight DAG"""
 | 
			
		||||
    workflow_config = build_data_insight_workflow_config(ingestion_pipeline)
 | 
			
		||||
    dag = build_dag(
 | 
			
		||||
        task_name="data_insight_task",
 | 
			
		||||
 | 
			
		||||
@ -0,0 +1,113 @@
 | 
			
		||||
#  Copyright 2021 Collate
 | 
			
		||||
#  Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
#  you may not use this file except in compliance with the License.
 | 
			
		||||
#  You may obtain a copy of the License at
 | 
			
		||||
#  http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
#  Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
#  distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
#  See the License for the specific language governing permissions and
 | 
			
		||||
#  limitations under the License.
 | 
			
		||||
"""
 | 
			
		||||
ElasticSearch reindex DAG function builder
 | 
			
		||||
"""
 | 
			
		||||
from airflow import DAG
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.common import (
 | 
			
		||||
    ClientInitializationError,
 | 
			
		||||
    build_dag,
 | 
			
		||||
    build_source,
 | 
			
		||||
    metadata_ingestion_workflow,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
from metadata.generated.schema.entity.services.connections.metadata.metadataESConnection import (
 | 
			
		||||
    MetadataESConnection,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.entity.services.ingestionPipelines.ingestionPipeline import (
 | 
			
		||||
    IngestionPipeline,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.entity.services.metadataService import (
 | 
			
		||||
    MetadataConnection,
 | 
			
		||||
    MetadataService,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.metadataIngestion.workflow import (
 | 
			
		||||
    LogLevels,
 | 
			
		||||
    OpenMetadataWorkflowConfig,
 | 
			
		||||
    Sink,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.metadataIngestion.workflow import (
 | 
			
		||||
    Source as WorkflowSource,
 | 
			
		||||
)
 | 
			
		||||
from metadata.generated.schema.metadataIngestion.workflow import (
 | 
			
		||||
    SourceConfig,
 | 
			
		||||
    WorkflowConfig,
 | 
			
		||||
)
 | 
			
		||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def build_es_reindex_workflow_config(
 | 
			
		||||
    ingestion_pipeline: IngestionPipeline,
 | 
			
		||||
) -> OpenMetadataWorkflowConfig:
 | 
			
		||||
    """
 | 
			
		||||
    Given an airflow_pipeline, prepare the workflow config JSON
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        metadata = OpenMetadata(config=ingestion_pipeline.openMetadataServerConnection)
 | 
			
		||||
    except Exception as exc:
 | 
			
		||||
        raise ClientInitializationError(f"Failed to initialize the client: {exc}")
 | 
			
		||||
 | 
			
		||||
    openmetadata_service: MetadataService = metadata.get_by_name(
 | 
			
		||||
        entity=MetadataService, fqn=ingestion_pipeline.service.fullyQualifiedName
 | 
			
		||||
    )
 | 
			
		||||
    if not openmetadata_service:
 | 
			
		||||
        raise ValueError(
 | 
			
		||||
            "Could not retrieve the OpenMetadata service! This should not happen."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    om_service_elasticsearch_dict = {
 | 
			
		||||
        key: value
 | 
			
		||||
        for key, value in openmetadata_service.connection.config.elasticsSearch.config.dict().items()
 | 
			
		||||
        if value
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    ingestion_pipeline_elasticsearch_source_config = {
 | 
			
		||||
        key: value
 | 
			
		||||
        for key, value in ingestion_pipeline.sourceConfig.config.dict().items()
 | 
			
		||||
        if value and key != "type"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    workflow_config = OpenMetadataWorkflowConfig(
 | 
			
		||||
        source=WorkflowSource(
 | 
			
		||||
            type="metadata_elasticsearch",
 | 
			
		||||
            serviceName=ingestion_pipeline.service.fullyQualifiedName,
 | 
			
		||||
            serviceConnection=MetadataConnection(config=MetadataESConnection()),
 | 
			
		||||
            sourceConfig=SourceConfig(),
 | 
			
		||||
        ),
 | 
			
		||||
        sink=Sink(
 | 
			
		||||
            type="elasticsearch",
 | 
			
		||||
            config={
 | 
			
		||||
                **om_service_elasticsearch_dict,
 | 
			
		||||
                **ingestion_pipeline_elasticsearch_source_config,
 | 
			
		||||
            },
 | 
			
		||||
        ),
 | 
			
		||||
        workflowConfig=WorkflowConfig(
 | 
			
		||||
            loggerLevel=ingestion_pipeline.loggerLevel or LogLevels.INFO,
 | 
			
		||||
            openMetadataServerConfig=ingestion_pipeline.openMetadataServerConnection,
 | 
			
		||||
        ),
 | 
			
		||||
        ingestionPipelineFQN=ingestion_pipeline.fullyQualifiedName.__root__,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return workflow_config
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def build_es_reindex_dag(ingestion_pipeline: IngestionPipeline) -> DAG:
 | 
			
		||||
    """Build a simple Data Insight DAG"""
 | 
			
		||||
    workflow_config = build_es_reindex_workflow_config(ingestion_pipeline)
 | 
			
		||||
    dag = build_dag(
 | 
			
		||||
        task_name="elasticsearch_reindex_task",
 | 
			
		||||
        ingestion_pipeline=ingestion_pipeline,
 | 
			
		||||
        workflow_config=workflow_config,
 | 
			
		||||
        workflow_fn=metadata_ingestion_workflow,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    return dag
 | 
			
		||||
@ -16,6 +16,9 @@ Add a function for each type from PipelineType
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.data_insight import (
 | 
			
		||||
    build_data_insight_dag,
 | 
			
		||||
)
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.es_reindex import (
 | 
			
		||||
    build_es_reindex_dag,
 | 
			
		||||
)
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.lineage import build_lineage_dag
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.metadata import build_metadata_dag
 | 
			
		||||
from openmetadata_managed_apis.workflows.ingestion.profiler import build_profiler_dag
 | 
			
		||||
@ -36,4 +39,5 @@ build_registry.add(PipelineType.usage.value)(build_usage_dag)
 | 
			
		||||
build_registry.add(PipelineType.lineage.value)(build_lineage_dag)
 | 
			
		||||
build_registry.add(PipelineType.profiler.value)(build_profiler_dag)
 | 
			
		||||
build_registry.add(PipelineType.TestSuite.value)(build_test_suite_dag)
 | 
			
		||||
build_registry.add(PipelineType.dataInsight)(build_data_insight_dag)
 | 
			
		||||
build_registry.add(PipelineType.dataInsight.value)(build_data_insight_dag)
 | 
			
		||||
build_registry.add(PipelineType.elasticSearchReindex.value)(build_es_reindex_dag)
 | 
			
		||||
 | 
			
		||||
@ -203,7 +203,7 @@
 | 
			
		||||
      "additionalProperties": false,
 | 
			
		||||
      "required": ["type"]
 | 
			
		||||
    },
 | 
			
		||||
    "supportMetadataToElasticSearchExtraction": {
 | 
			
		||||
    "supportsDataInsightExtraction": {
 | 
			
		||||
      "$ref": "../connectionBasicType.json#/definitions/supportsDataInsightExtraction"
 | 
			
		||||
    },
 | 
			
		||||
    "supportsElasticSearchReindexingExtraction": {
 | 
			
		||||
 | 
			
		||||
@ -25,9 +25,6 @@
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "$ref": "../mlmodelService.json#/definitions/mlModelConnection"
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
          "$ref": "../metadataService.json#/definitions/metadataConnection"
 | 
			
		||||
        }
 | 
			
		||||
      ]
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -18,12 +18,14 @@
 | 
			
		||||
      "default": "MetadataToElasticSearch"
 | 
			
		||||
    },
 | 
			
		||||
    "useSSL": {
 | 
			
		||||
      "description": "Indicates whether to use SSL",
 | 
			
		||||
      "title": "Use SSL",
 | 
			
		||||
      "description": "Indicates whether to use SSL when connecting to ElasticSearch. By default, we will ignore SSL settings.",
 | 
			
		||||
      "type": "boolean",
 | 
			
		||||
      "default": false
 | 
			
		||||
    },
 | 
			
		||||
    "verifyCerts": {
 | 
			
		||||
      "description": "Indicates whether to verify certificates",
 | 
			
		||||
      "title": "Validate Certificates",
 | 
			
		||||
      "description": "Indicates whether to verify certificates when using SSL connection to ElasticSearch. Ignored by default. Is set to true, make sure to send the certificates in the property `CA Certificates`.",
 | 
			
		||||
      "type": "boolean",
 | 
			
		||||
      "default": false
 | 
			
		||||
    },
 | 
			
		||||
@ -33,17 +35,22 @@
 | 
			
		||||
      "default": 30
 | 
			
		||||
    },
 | 
			
		||||
    "caCerts": {
 | 
			
		||||
      "description": "Certificate path to be added in configuration",
 | 
			
		||||
      "type": "string"
 | 
			
		||||
      "title": "CA Certificates",
 | 
			
		||||
      "description": "Certificate path to be added in configuration. The path should be local in the Ingestion Container.",
 | 
			
		||||
      "type": "string",
 | 
			
		||||
      "default": null
 | 
			
		||||
    },
 | 
			
		||||
    "useAwsCredentials": {
 | 
			
		||||
      "description": "Indicates whether to use aws credentials",
 | 
			
		||||
      "title": "Use AWS Credentials",
 | 
			
		||||
      "description": "Indicates whether to use aws credentials when connecting to OpenSearch in AWS.",
 | 
			
		||||
      "type": "boolean",
 | 
			
		||||
      "default": false
 | 
			
		||||
    },
 | 
			
		||||
    "regionName": {
 | 
			
		||||
      "description": "Region name in case of useAwsCredentials",
 | 
			
		||||
      "type": "string"
 | 
			
		||||
      "title": "AWS Region Name",
 | 
			
		||||
      "description": "Region name. Required when using AWS Credentials.",
 | 
			
		||||
      "type": "string",
 | 
			
		||||
      "default": null
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  "required": ["type"],
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user