mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-12-26 15:10:05 +00:00
Fix ElasticSearch Test Connection & Deploy (#13061)
This commit is contained in:
parent
a41326ea1e
commit
4e633877b3
@ -4,9 +4,12 @@
|
||||
"serviceConnection": {
|
||||
"config": {
|
||||
"type": "ElasticSearch",
|
||||
"hostPort": "localhost:9200"
|
||||
"hostPort": "http://localhost:9200"
|
||||
}
|
||||
},
|
||||
"sourceConfig": {
|
||||
"config": {
|
||||
"type": "SearchMetadata"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -174,7 +174,8 @@ plugins: Dict[str, Set[str]] = {
|
||||
"druid": {"pydruid>=0.6.5"},
|
||||
"dynamodb": {VERSIONS["boto3"]},
|
||||
"elasticsearch": {
|
||||
"elasticsearch==7.13.1"
|
||||
"elasticsearch==7.13.1",
|
||||
"elasticsearch8~=8.9.0",
|
||||
}, # also requires requests-aws4auth which is in base
|
||||
"glue": {VERSIONS["boto3"]},
|
||||
"great-expectations": {VERSIONS["great-expectations"]},
|
||||
|
||||
@ -4,7 +4,7 @@ source:
|
||||
serviceConnection:
|
||||
config:
|
||||
type: ElasticSearch
|
||||
hostPort: localhost:9200
|
||||
hostPort: http://localhost:9200
|
||||
sourceConfig:
|
||||
config:
|
||||
type: SearchMetadata
|
||||
|
||||
@ -14,7 +14,7 @@ Source connection handler
|
||||
"""
|
||||
from typing import Optional
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch8 import Elasticsearch
|
||||
|
||||
from metadata.generated.schema.entity.automations.workflow import (
|
||||
Workflow as AutomationWorkflow,
|
||||
@ -35,26 +35,34 @@ def get_connection(connection: ElasticsearchConnection) -> Elasticsearch:
|
||||
"""
|
||||
basic_auth = None
|
||||
api_key = None
|
||||
if isinstance(connection.authType, BasicAuthentication):
|
||||
if (
|
||||
isinstance(connection.authType, BasicAuthentication)
|
||||
and connection.authType.username
|
||||
):
|
||||
basic_auth = (
|
||||
connection.authType.username,
|
||||
connection.authType.password.get_secret_value(),
|
||||
connection.authType.password.get_secret_value()
|
||||
if connection.authType.password
|
||||
else None,
|
||||
)
|
||||
|
||||
if isinstance(connection.authType, ApiAuthentication):
|
||||
api_key = (
|
||||
connection.authType.apiKeyId,
|
||||
connection.authType.apiKey.get_secret_value(),
|
||||
)
|
||||
if connection.authType.apiKeyId and connection.authType.apiKey:
|
||||
api_key = (
|
||||
connection.authType.apiKeyId,
|
||||
connection.authType.apiKey.get_secret_value(),
|
||||
)
|
||||
elif connection.authType.apiKey:
|
||||
api_key = connection.authType.apiKey.get_secret_value()
|
||||
|
||||
if not connection.connectionArguments:
|
||||
connection.connectionArguments = init_empty_connection_arguments()
|
||||
|
||||
return Elasticsearch(
|
||||
[connection.hostPort],
|
||||
basic_auth=basic_auth,
|
||||
connection.hostPort,
|
||||
http_auth=basic_auth,
|
||||
api_key=api_key,
|
||||
scheme=connection.scheme.value,
|
||||
ca_certs=connection.caCert,
|
||||
**connection.connectionArguments.__root__
|
||||
)
|
||||
|
||||
|
||||
@ -13,7 +13,7 @@ Elasticsearch source to extract metadata
|
||||
"""
|
||||
from typing import Any, Iterable, Optional
|
||||
|
||||
from elasticsearch import Elasticsearch
|
||||
from elasticsearch8 import Elasticsearch
|
||||
|
||||
from metadata.generated.schema.api.data.createSearchIndex import (
|
||||
CreateSearchIndexRequest,
|
||||
@ -67,7 +67,7 @@ class ElasticsearchSource(SearchServiceSource):
|
||||
"""
|
||||
index_list = self.client.indices.get_alias() or {}
|
||||
for index in index_list.keys():
|
||||
yield self.client.indices.get(index)
|
||||
yield self.client.indices.get(index=str(index))
|
||||
|
||||
def get_search_index_name(self, search_index_details: dict) -> Optional[str]:
|
||||
"""
|
||||
|
||||
@ -17,6 +17,9 @@ from typing import Any, Iterable, List, Optional, Set
|
||||
from metadata.generated.schema.api.data.createSearchIndex import (
|
||||
CreateSearchIndexRequest,
|
||||
)
|
||||
from metadata.generated.schema.api.services.createSearchService import (
|
||||
CreateSearchServiceRequest,
|
||||
)
|
||||
from metadata.generated.schema.entity.data.searchIndex import (
|
||||
SearchIndex,
|
||||
SearchIndexSampleData,
|
||||
@ -165,9 +168,13 @@ class SearchServiceSource(TopologyRunnerMixin, Source, ABC):
|
||||
continue
|
||||
yield index_details
|
||||
|
||||
def yield_create_request_search_service(self, config: WorkflowSource):
|
||||
yield self.metadata.get_create_service_from_source(
|
||||
entity=SearchService, config=config
|
||||
def yield_create_request_search_service(
|
||||
self, config: WorkflowSource
|
||||
) -> Iterable[Either[CreateSearchServiceRequest]]:
|
||||
yield Either(
|
||||
right=self.metadata.get_create_service_from_source(
|
||||
entity=SearchService, config=config
|
||||
)
|
||||
)
|
||||
|
||||
def get_services(self) -> Iterable[WorkflowSource]:
|
||||
|
||||
@ -41,7 +41,7 @@ mock_es_config = {
|
||||
"username": "username",
|
||||
"password": "password",
|
||||
},
|
||||
"hostPort": "localhost:9200",
|
||||
"hostPort": "http://localhost:9200",
|
||||
}
|
||||
},
|
||||
"sourceConfig": {"config": {"type": "SearchMetadata"}},
|
||||
|
||||
@ -29,6 +29,7 @@ from metadata.generated.schema.entity.services.messagingService import Messaging
|
||||
from metadata.generated.schema.entity.services.metadataService import MetadataService
|
||||
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
|
||||
from metadata.generated.schema.entity.services.pipelineService import PipelineService
|
||||
from metadata.generated.schema.entity.services.searchService import SearchService
|
||||
from metadata.generated.schema.entity.services.storageService import StorageService
|
||||
from metadata.ingestion.models.encoders import show_secrets_encoder
|
||||
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
||||
@ -66,6 +67,17 @@ from metadata.workflow.metadata import MetadataWorkflow
|
||||
|
||||
logger = workflow_logger()
|
||||
|
||||
ENTITY_CLASS_MAP = {
|
||||
"databaseService": DatabaseService,
|
||||
"pipelineService": PipelineService,
|
||||
"dashboardService": DashboardService,
|
||||
"messagingService": MessagingService,
|
||||
"mlmodelService": MlModelService,
|
||||
"metadataService": MetadataService,
|
||||
"storageService": StorageService,
|
||||
"searchService": SearchService,
|
||||
}
|
||||
|
||||
|
||||
class InvalidServiceException(Exception):
|
||||
"""
|
||||
@ -119,7 +131,7 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
|
||||
|
||||
service_type = ingestion_pipeline.service.type
|
||||
|
||||
entity_class = None
|
||||
entity_class = ENTITY_CLASS_MAP.get(service_type)
|
||||
try:
|
||||
if service_type == "testSuite":
|
||||
return WorkflowSource(
|
||||
@ -129,57 +141,15 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
|
||||
serviceConnection=None, # retrieved from the test suite workflow using the `sourceConfig.config.entityFullyQualifiedName`
|
||||
)
|
||||
|
||||
if service_type == "databaseService":
|
||||
entity_class = DatabaseService
|
||||
service: DatabaseService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "pipelineService":
|
||||
entity_class = PipelineService
|
||||
service: PipelineService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "dashboardService":
|
||||
entity_class = DashboardService
|
||||
service: DashboardService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "messagingService":
|
||||
entity_class = MessagingService
|
||||
service: MessagingService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "mlmodelService":
|
||||
entity_class = MlModelService
|
||||
service: MlModelService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "metadataService":
|
||||
entity_class = MetadataService
|
||||
service: MetadataService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
elif service_type == "storageService":
|
||||
entity_class = StorageService
|
||||
service: StorageService = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
else:
|
||||
if entity_class is None:
|
||||
raise InvalidServiceException(f"Invalid Service Type: {service_type}")
|
||||
|
||||
service = metadata.get_by_name(
|
||||
entity=entity_class,
|
||||
fqn=ingestion_pipeline.service.name,
|
||||
nullable=False,
|
||||
)
|
||||
|
||||
except ValidationError as original_error:
|
||||
try:
|
||||
resp = metadata.client.get(
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
{% step srNumber=7 %}
|
||||
|
||||
{% stepDescription title="7. Configure Metadata Ingestion" %}
|
||||
|
||||
In this step we will configure the metadata ingestion pipeline,
|
||||
Please follow the instructions below
|
||||
|
||||
{% /stepDescription %}
|
||||
|
||||
{% stepVisualInfo %}
|
||||
|
||||
{% image
|
||||
src="/images/v1.2.0/connectors/configure-metadata-ingestion-search.png"
|
||||
alt="Configure Metadata Ingestion"
|
||||
caption="Configure Metadata Ingestion Page" /%}
|
||||
|
||||
{% /stepVisualInfo %}
|
||||
|
||||
{% /step %}
|
||||
|
||||
{% extraContent parentTagName="stepsContainer" %}
|
||||
|
||||
#### Metadata Ingestion Options
|
||||
|
||||
- **Name**: This field refers to the name of ingestion pipeline, you can customize the name or use the generated name.
|
||||
- **Search Index Filter Pattern (Optional)**: Use to search index filter patterns to control whether or not to include search index as part of metadata ingestion.
|
||||
- **Include**: Explicitly include search index by adding a list of comma-separated regular expressions to the Include field. OpenMetadata will include all search indexes with names matching one or more of the supplied regular expressions. All other schemas will be excluded.
|
||||
- **Exclude**: Explicitly exclude search index by adding a list of comma-separated regular expressions to the Exclude field. OpenMetadata will exclude all search indexes with names matching one or more of the supplied regular expressions. All other schemas will be included.
|
||||
- **Include Sample Data (toggle)**: Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
|
||||
- **Sample Size**: If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
|
||||
- **Enable Debug Log (toggle)**: Set the Enable Debug Log toggle to set the default log level to debug.
|
||||
|
||||
|
||||
{% /extraContent %}
|
||||
@ -103,3 +103,7 @@ the following docs to run the Ingestion Framework in any orchestrator externally
|
||||
|
||||
- [Amundsen](/connectors/metadata/amundsen)
|
||||
- [Atlas](/connectors/metadata/atlas)
|
||||
|
||||
## Search Services
|
||||
|
||||
- [ElasticSearch](/connectors/search/elasticsearch)
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
---
|
||||
title: ElasticSearch
|
||||
slug: /connectors/search/elasticsearch
|
||||
---
|
||||
|
||||
# ElasticSearch
|
||||
|
||||
| Feature | Status |
|
||||
|------------|------------------------------|
|
||||
| Search Indexes | {% icon iconName="check" /%} |
|
||||
| Sample Data | {% icon iconName="check" /%} |
|
||||
| Supported Versions | ElasticSearch 7.0 and above |
|
||||
| Stage | BETA |
|
||||
|
||||
In this section, we provide guides and references to use the ElasticSearch connector.
|
||||
|
||||
Configure and schedule ElasticSearch metadata workflow from the OpenMetadata UI:
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Metadata Ingestion](#metadata-ingestion)
|
||||
|
||||
{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/search/elasticsearch/yaml"} /%}
|
||||
|
||||
## Requirements
|
||||
|
||||
We extract ElasticSearch's metadata by using its [API](https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html). To run this ingestion, you just need a user with permissions to the ElasticSearch instance.
|
||||
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
{% partial
|
||||
file="/v1.2.0/connectors/metadata-ingestion-ui.md"
|
||||
variables={
|
||||
connector: "ElasticSearch",
|
||||
selectServicePath: "/images/v1.2.0/connectors/elasticsearch/select-service.png",
|
||||
addNewServicePath: "/images/v1.2.0/connectors/elasticsearch/add-new-service.png",
|
||||
serviceConnectionPath: "/images/v1.2.0/connectors/elasticsearch/service-connection.png",
|
||||
}
|
||||
/%}
|
||||
|
||||
{% stepsContainer %}
|
||||
{% extraContent parentTagName="stepsContainer" %}
|
||||
|
||||
#### Connection Details
|
||||
|
||||
- **Host and Port**: This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a URI string in the format `http://hostname:port` or `https://hostname:port`. For example, you might set it to `https://localhost:9200`.
|
||||
- **Authentication Types**:
|
||||
1. Basic Authentication
|
||||
- Username: Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
|
||||
- Password: Password of the user account to connect with ElasticSearch.
|
||||
2. API Key Authentication
|
||||
- API Key: API Key to connect to ElasticSearch required when API Key Authentication is enabled on ElasticSearch.
|
||||
- API Key Id: Enter API Key ID In case of API Key Authentication if there is any API Key ID associated with the API Key, otherwise this field can be left blank..
|
||||
- **Client Certificate Path**: In case the SSL is enabled on your ElasticSearch instance and CA certificate is required for authentication, then specify the path of certificate in this field. NOTE: In case of docker deployment you need to store this certificate accessible to OpenMetadata Ingestion docker container, you can do it via copying the certificate to the docker container or store it in the volume associate with the OpenMetadata Ingestion container.
|
||||
- **Connection Timeout in Seconds**: Connection timeout configuration for communicating with ElasticSearch APIs.
|
||||
|
||||
{% /extraContent %}
|
||||
|
||||
{% partial file="/v1.2.0/connectors/test-connection.md" /%}
|
||||
|
||||
{% partial file="/v1.2.0/connectors/search/configure-ingestion.md" /%}
|
||||
|
||||
{% partial file="/v1.2.0/connectors/ingestion-schedule-and-deploy.md" /%}
|
||||
|
||||
{% /stepsContainer %}
|
||||
|
||||
{% partial file="/v1.2.0/connectors/troubleshooting.md" /%}
|
||||
@ -0,0 +1,183 @@
|
||||
---
|
||||
title: Run the ElasticSearch Connector Externally
|
||||
slug: /connectors/search/elasticsearch/yaml
|
||||
---
|
||||
|
||||
# Run the ElasticSearch Connector Externally
|
||||
|
||||
In this section, we provide guides and references to use the ElasticSearch connector.
|
||||
|
||||
Configure and schedule ElasticSearch metadata and profiler workflows from the OpenMetadata UI:
|
||||
|
||||
- [Requirements](#requirements)
|
||||
- [Metadata Ingestion](#metadata-ingestion)
|
||||
|
||||
{% partial file="/v1.2.0/connectors/external-ingestion-deployment.md" /%}
|
||||
|
||||
## Requirements
|
||||
|
||||
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
|
||||
To deploy OpenMetadata, check the Deployment guides.
|
||||
{% /inlineCallout %}
|
||||
|
||||
|
||||
|
||||
### Python Requirements
|
||||
|
||||
To run the ElasticSearch ingestion, you will need to install:
|
||||
|
||||
```bash
|
||||
pip3 install "openmetadata-ingestion[elasticsearch]"
|
||||
```
|
||||
|
||||
## Metadata Ingestion
|
||||
|
||||
All connectors are defined as JSON Schemas.
|
||||
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/search/elasticSearchConnection.json)
|
||||
you can find the structure to create a connection to ElasticSearch.
|
||||
|
||||
In order to create and run a Metadata Ingestion workflow, we will follow
|
||||
the steps to create a YAML configuration able to connect to the source,
|
||||
process the Entities if needed, and reach the OpenMetadata server.
|
||||
|
||||
The workflow is modeled around the following
|
||||
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
|
||||
|
||||
### 1. Define the YAML Config
|
||||
|
||||
This is a sample config for ElasticSearch:
|
||||
|
||||
{% codePreview %}
|
||||
|
||||
{% codeInfoContainer %}
|
||||
|
||||
#### Source Configuration - Service Connection
|
||||
|
||||
{% codeInfo srNumber=1 %}
|
||||
|
||||
**hostPort**: This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a URI string in the format `http://hostname:port` or `https://hostname:port`. For example, you might set it to `https://localhost:9200`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=2 %}
|
||||
**Basic Authentication**
|
||||
|
||||
**username**: Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
|
||||
**password**: Password of the user account to connect with ElasticSearch.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=3 %}
|
||||
|
||||
**API Key Authentication**
|
||||
|
||||
**apiKey**: API Key to connect to ElasticSearch required when API Key Authentication is enabled on ElasticSearch.
|
||||
**apiKeyId**: Enter API Key ID In case of API Key Authentication if there is any API Key ID associated with the API Key, otherwise this field can be left blank or skipped.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% codeInfo srNumber=4 %}
|
||||
**caCert**: In case the SSL is enabled on your ElasticSearch instance and CA certificate is required for authentication, then specify the path of certificate in this field. NOTE: In case of docker deployment you need to store this certificate accessible to OpenMetadata Ingestion docker container, you can do it via copying the certificate to the docker container or store it in the volume associate with the OpenMetadata Ingestion container.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
{% codeInfo srNumber=5 %}
|
||||
**connectionTimeoutSecs**: Connection timeout configuration for communicating with ElasticSearch APIs.
|
||||
{% /codeInfo %}
|
||||
|
||||
|
||||
|
||||
#### Source Configuration - Source Config
|
||||
|
||||
{% codeInfo srNumber=6 %}
|
||||
|
||||
The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/searchServiceMetadataPipeline.json):
|
||||
|
||||
**includeSampleData**: Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
|
||||
|
||||
**sampleSize**: If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
|
||||
|
||||
**markDeletedSearchIndexes**: Optional configuration to soft delete `search indexes` in OpenMetadata if the source `search indexes` are deleted. After deleting, all the associated entities like lineage, etc., with that `search index` will be deleted.
|
||||
|
||||
**searchIndexFilterPattern**: Note that the `searchIndexFilterPattern` support regex to include or exclude search indexes during metadata ingestion process.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
#### Sink Configuration
|
||||
|
||||
{% codeInfo srNumber=7%}
|
||||
|
||||
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
|
||||
|
||||
{% /codeInfo %}
|
||||
|
||||
{% partial file="/v1.2.0/connectors/workflow-config.md" /%}
|
||||
|
||||
{% /codeInfoContainer %}
|
||||
|
||||
{% codeBlock fileName="filename.yaml" %}
|
||||
|
||||
```yaml
|
||||
source:
|
||||
type: elasticsearch
|
||||
serviceName: elasticsearch_source
|
||||
serviceConnection:
|
||||
config:
|
||||
type: ElasticSearch
|
||||
```
|
||||
```yaml {% srNumber=1 %}
|
||||
hostPort: http://localhost:9200
|
||||
```
|
||||
```yaml {% srNumber=2 %}
|
||||
authType:
|
||||
username: elastic
|
||||
password: my_own_password
|
||||
```
|
||||
```yaml {% srNumber=3 %}
|
||||
# apiKeyId: <api key id>
|
||||
# apiKey: <api key>
|
||||
```
|
||||
```yaml {% srNumber=4 %}
|
||||
caCert: /path/to/http_ca.crt
|
||||
```
|
||||
```yaml {% srNumber=5 %}
|
||||
connectionTimeoutSecs: 30
|
||||
```
|
||||
```yaml {% srNumber=6 %}
|
||||
sourceConfig:
|
||||
config:
|
||||
type: SearchMetadata
|
||||
# markDeletedSearchIndexes: True
|
||||
# includeSampleData: True
|
||||
# sampleSize: 10
|
||||
# searchIndexFilterPattern:
|
||||
# includes:
|
||||
# - index1
|
||||
# - index2
|
||||
# excludes:
|
||||
# - index4
|
||||
# - index3
|
||||
```
|
||||
```yaml {% srNumber=7 %}
|
||||
sink:
|
||||
type: metadata-rest
|
||||
config: {}
|
||||
```
|
||||
|
||||
{% partial file="/v1.2.0/connectors/workflow-config-yaml.md" /%}
|
||||
|
||||
{% /codeBlock %}
|
||||
|
||||
{% /codePreview %}
|
||||
|
||||
### 2. Run with the CLI
|
||||
|
||||
First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run:
|
||||
|
||||
```bash
|
||||
metadata ingest -c <path-to-yaml>
|
||||
```
|
||||
|
||||
Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration,
|
||||
you will be able to extract metadata from different sources.
|
||||
@ -0,0 +1,13 @@
|
||||
---
|
||||
title: Search Services
|
||||
slug: /connectors/search
|
||||
---
|
||||
|
||||
# Search Services
|
||||
|
||||
This is the supported list of connectors for Search Services:
|
||||
|
||||
- [ElasticSearch](/connectors/search/elasticsearch)
|
||||
|
||||
If you have a request for a new connector, don't hesitate to reach out in [Slack](https://slack.open-metadata.org/) or
|
||||
open a [feature request](https://github.com/open-metadata/OpenMetadata/issues/new/choose) in our GitHub repo.
|
||||
@ -487,6 +487,13 @@ site_menu:
|
||||
- category: Connectors / Storage / S3 / Run Externally
|
||||
url: /connectors/storage/s3/yaml
|
||||
|
||||
- category: Connectors / Search
|
||||
url: /connectors/search
|
||||
- category: Connectors / Search / ElasticSearch
|
||||
url: /connectors/search/elasticsearch
|
||||
- category: Connectors / Search / ElasticSearch / Run Externally
|
||||
url: /connectors/search/elasticsearch/yaml
|
||||
|
||||
- category: Connectors / Metadata
|
||||
url: /connectors/metadata
|
||||
- category: Connectors / Metadata / Amundsen
|
||||
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 136 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 122 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 110 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 354 KiB |
@ -179,6 +179,7 @@ public final class Entity {
|
||||
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.ML_MODEL, MLMODEL_SERVICE);
|
||||
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.METADATA, METADATA_SERVICE);
|
||||
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.STORAGE, STORAGE_SERVICE);
|
||||
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.SEARCH, SEARCH_SERVICE);
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
@ -12,6 +12,8 @@ import static org.openmetadata.service.util.TestUtils.ADMIN_AUTH_HEADERS;
|
||||
import static org.openmetadata.service.util.TestUtils.assertResponse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
import javax.ws.rs.client.WebTarget;
|
||||
@ -91,13 +93,14 @@ public class SearchServiceResourceTest extends EntityResourceTest<SearchService,
|
||||
}
|
||||
|
||||
@Test
|
||||
void put_updateService_as_admin_2xx(TestInfo test) throws IOException {
|
||||
void put_updateService_as_admin_2xx(TestInfo test) throws IOException, URISyntaxException {
|
||||
SearchConnection connection1 =
|
||||
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9300"));
|
||||
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9300")));
|
||||
SearchService service =
|
||||
createAndCheckEntity(createRequest(test).withDescription(null).withConnection(connection1), ADMIN_AUTH_HEADERS);
|
||||
|
||||
ElasticSearchConnection credentials2 = new ElasticSearchConnection().withHostPort("https://localhost:9400");
|
||||
ElasticSearchConnection credentials2 =
|
||||
new ElasticSearchConnection().withHostPort(new URI("https://localhost:9400"));
|
||||
SearchConnection connection2 = new SearchConnection().withConfig(credentials2);
|
||||
|
||||
// Update SearchService description and connection
|
||||
@ -136,11 +139,16 @@ public class SearchServiceResourceTest extends EntityResourceTest<SearchService,
|
||||
|
||||
@Override
|
||||
public CreateSearchService createRequest(String name) {
|
||||
return new CreateSearchService()
|
||||
.withName(name)
|
||||
.withServiceType(CreateSearchService.SearchServiceType.ElasticSearch)
|
||||
.withConnection(
|
||||
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9200")));
|
||||
try {
|
||||
return new CreateSearchService()
|
||||
.withName(name)
|
||||
.withServiceType(CreateSearchService.SearchServiceType.ElasticSearch)
|
||||
.withConnection(
|
||||
new SearchConnection()
|
||||
.withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9200"))));
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -226,8 +226,13 @@ public final class TestUtils {
|
||||
}
|
||||
|
||||
static {
|
||||
ELASTIC_SEARCH_CONNECTION =
|
||||
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9200"));
|
||||
try {
|
||||
ELASTIC_SEARCH_CONNECTION =
|
||||
new SearchConnection()
|
||||
.withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9200")));
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
OPEN_SEARCH_CONNECTION =
|
||||
new SearchConnection().withConfig(new OpenSearchConnection().withHostPort("http://localhost:9200"));
|
||||
}
|
||||
|
||||
@ -29,6 +29,9 @@
|
||||
},
|
||||
{
|
||||
"$ref": "../services/storageService.json#/definitions/storageConnection"
|
||||
},
|
||||
{
|
||||
"$ref": "../services/searchService.json#/definitions/searchConnection"
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
@ -12,41 +12,39 @@
|
||||
"enum": ["ElasticSearch"],
|
||||
"default": "ElasticSearch"
|
||||
},
|
||||
"connectionScheme": {
|
||||
"description": "ElasticSearch Connection Scheme",
|
||||
"type": "string",
|
||||
"enum": ["http", "https"],
|
||||
"default": "http"
|
||||
},
|
||||
"basicAuthentication": {
|
||||
"title": "Basic Authentication",
|
||||
"properties": {
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"description": "Elastic Search Username for Login",
|
||||
"type": "string"
|
||||
},
|
||||
"password": {
|
||||
"title": "Password",
|
||||
"description": "Elastic Search Password for Login",
|
||||
"type": "string",
|
||||
"format": "password"
|
||||
}
|
||||
},
|
||||
"required": ["username","password"],
|
||||
"type": "object"
|
||||
},
|
||||
"apiAuthentication": {
|
||||
"title": "API Key Authentication",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"apiKeyId": {
|
||||
"description": "Elastic Search API Key ID for API Authentication",
|
||||
"type": "string"
|
||||
},
|
||||
"apiKey": {
|
||||
"title": "API Key",
|
||||
"description": "Elastic Search API Key for API Authentication",
|
||||
"type": "string",
|
||||
"format": "password"
|
||||
}
|
||||
},
|
||||
"required": ["apiKeyId","apiKey"]
|
||||
},
|
||||
"apiKeyId": {
|
||||
"title": "API Key ID",
|
||||
"description": "Elastic Search API Key ID for API Authentication",
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"properties": {
|
||||
@ -59,13 +57,8 @@
|
||||
"hostPort": {
|
||||
"title": "Host and Port",
|
||||
"description": "Host and port of the ElasticSearch service.",
|
||||
"type": "string"
|
||||
},
|
||||
"scheme": {
|
||||
"description": "Http/Https connection scheme",
|
||||
"type": "string",
|
||||
"$ref": "#/definitions/connectionScheme",
|
||||
"default": "http"
|
||||
"format": "uri"
|
||||
},
|
||||
"authType": {
|
||||
"title": "Auth Configuration Type",
|
||||
@ -79,7 +72,13 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
"caCert": {
|
||||
"title": "Client Certificate Path",
|
||||
"description": "Path to CA Cert File",
|
||||
"type": "string"
|
||||
},
|
||||
"connectionTimeoutSecs": {
|
||||
"title": "Connection Timeout in Seconds",
|
||||
"description": "Connection Timeout in Seconds",
|
||||
"type": "integer",
|
||||
"default": 30
|
||||
@ -93,8 +92,5 @@
|
||||
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"hostPort"
|
||||
]
|
||||
"additionalProperties": false
|
||||
}
|
||||
@ -19,21 +19,25 @@
|
||||
"default": "SearchMetadata"
|
||||
},
|
||||
"searchIndexFilterPattern": {
|
||||
"title": "Search Index Filter Pattern",
|
||||
"description": "Regex to only fetch search indexes that matches the pattern.",
|
||||
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
|
||||
},
|
||||
"markDeletedSearchIndexes": {
|
||||
"title": "Mark Deleted Search Indexes",
|
||||
"description": "Optional configuration to soft delete search indexes in OpenMetadata if the source search indexes are deleted. Also, if the search index is deleted, all the associated entities like lineage, etc., with that search index will be deleted",
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"includeSampleData": {
|
||||
"title": "Include Sample Data",
|
||||
"description": "Optional configuration to turn off fetching sample data for search index.",
|
||||
"type": "boolean",
|
||||
"default": true
|
||||
},
|
||||
"sampleSize": {
|
||||
"description": "No. of rows of sample data we want to ingest.",
|
||||
"title": "Sample Size",
|
||||
"description": "No. of records of sample data we want to ingest.",
|
||||
"default": 10,
|
||||
"type": "integer"
|
||||
}
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
# ElasticSearch
|
||||
|
||||
In this section, we provide guides and references to use the ElasticSearch connector. You can view the full documentation for ElasticSearch [here](https://docs.open-metadata.org/connectors/search/elasticsearch).
|
||||
|
||||
## Requirements
|
||||
|
||||
We extract ElasticSearch's metadata by using its [API](https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html). To run this ingestion, you just need a user with permissions to the ElasticSearch instance.
|
||||
|
||||
You can find further information on the ElasticSearch connector in the [docs](https://docs.open-metadata.org/connectors/search/elasticsearch).
|
||||
|
||||
## Connection Details
|
||||
|
||||
$$section
|
||||
### Host and Port $(id="hostPort")
|
||||
|
||||
This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a string in the format `hostname:port`. For example, you might set the hostPort parameter to `localhost:9200`.
|
||||
|
||||
If you are running the OpenMetadata ingestion in a docker and your services are hosted on the `localhost`, then use `host.docker.internal:9200` as the value.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Username $(id="username")
|
||||
Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Password $(id="password")
|
||||
Password of the user account to connect with ElasticSearch.
|
||||
$$
|
||||
@ -0,0 +1,48 @@
|
||||
# Metadata
|
||||
|
||||
Database Service Metadata Pipeline Configuration.
|
||||
|
||||
## Configuration
|
||||
|
||||
$$section
|
||||
### Search Index Filter Pattern $(id="searchIndexFilterPattern")
|
||||
|
||||
Search index filter patterns to control whether to include search index as part of metadata ingestion.
|
||||
|
||||
**Include**: Explicitly include search index by adding a list of comma-separated regular expressions to the `Include` field. OpenMetadata will include all search indexes with names matching one or more of the supplied regular expressions. All other search indexes will be excluded.
|
||||
|
||||
For example, to include only those search indexes whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`.
|
||||
|
||||
**Exclude**: Explicitly exclude search index by adding a list of comma-separated regular expressions to the `Exclude` field. OpenMetadata will exclude all search indexes with names matching one or more of the supplied regular expressions. All other search indexes will be included.
|
||||
|
||||
For example, to exclude all search indexes with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`.
|
||||
|
||||
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on filter patterns.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Enable Debug Logs $(id="enableDebugLog")
|
||||
|
||||
Set the `Enable Debug Log` toggle to set the logging level of the process to debug. You can check these logs in the Ingestion tab of the service and dig deeper into any errors you might find.
|
||||
$$
|
||||
|
||||
|
||||
$$section
|
||||
### Mark Deleted Search Indexes $(id="markDeletedSearchIndexes")
|
||||
|
||||
Optional configuration to soft delete `search indexes` in OpenMetadata if the source `search indexes` are deleted. After deleting, all the associated entities like lineage, etc., with that `search index` will be deleted.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Include Sample Data $(id="includeSampleData")
|
||||
|
||||
Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
|
||||
$$
|
||||
|
||||
$$section
|
||||
### Sample Size $(id="sampleSize")
|
||||
|
||||
If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
|
||||
$$
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user