Fix ElasticSearch Test Connection & Deploy (#13061)

This commit is contained in:
Mayur Singal 2023-09-08 12:40:48 +05:30 committed by GitHub
parent a41326ea1e
commit 4e633877b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 496 additions and 105 deletions

View File

@ -4,9 +4,12 @@
"serviceConnection": {
"config": {
"type": "ElasticSearch",
"hostPort": "localhost:9200"
"hostPort": "http://localhost:9200"
}
},
"sourceConfig": {
"config": {
"type": "SearchMetadata"
}
}
}

View File

@ -174,7 +174,8 @@ plugins: Dict[str, Set[str]] = {
"druid": {"pydruid>=0.6.5"},
"dynamodb": {VERSIONS["boto3"]},
"elasticsearch": {
"elasticsearch==7.13.1"
"elasticsearch==7.13.1",
"elasticsearch8~=8.9.0",
}, # also requires requests-aws4auth which is in base
"glue": {VERSIONS["boto3"]},
"great-expectations": {VERSIONS["great-expectations"]},

View File

@ -4,7 +4,7 @@ source:
serviceConnection:
config:
type: ElasticSearch
hostPort: localhost:9200
hostPort: http://localhost:9200
sourceConfig:
config:
type: SearchMetadata

View File

@ -14,7 +14,7 @@ Source connection handler
"""
from typing import Optional
from elasticsearch import Elasticsearch
from elasticsearch8 import Elasticsearch
from metadata.generated.schema.entity.automations.workflow import (
Workflow as AutomationWorkflow,
@ -35,26 +35,34 @@ def get_connection(connection: ElasticsearchConnection) -> Elasticsearch:
"""
basic_auth = None
api_key = None
if isinstance(connection.authType, BasicAuthentication):
if (
isinstance(connection.authType, BasicAuthentication)
and connection.authType.username
):
basic_auth = (
connection.authType.username,
connection.authType.password.get_secret_value(),
connection.authType.password.get_secret_value()
if connection.authType.password
else None,
)
if isinstance(connection.authType, ApiAuthentication):
api_key = (
connection.authType.apiKeyId,
connection.authType.apiKey.get_secret_value(),
)
if connection.authType.apiKeyId and connection.authType.apiKey:
api_key = (
connection.authType.apiKeyId,
connection.authType.apiKey.get_secret_value(),
)
elif connection.authType.apiKey:
api_key = connection.authType.apiKey.get_secret_value()
if not connection.connectionArguments:
connection.connectionArguments = init_empty_connection_arguments()
return Elasticsearch(
[connection.hostPort],
basic_auth=basic_auth,
connection.hostPort,
http_auth=basic_auth,
api_key=api_key,
scheme=connection.scheme.value,
ca_certs=connection.caCert,
**connection.connectionArguments.__root__
)

View File

@ -13,7 +13,7 @@ Elasticsearch source to extract metadata
"""
from typing import Any, Iterable, Optional
from elasticsearch import Elasticsearch
from elasticsearch8 import Elasticsearch
from metadata.generated.schema.api.data.createSearchIndex import (
CreateSearchIndexRequest,
@ -67,7 +67,7 @@ class ElasticsearchSource(SearchServiceSource):
"""
index_list = self.client.indices.get_alias() or {}
for index in index_list.keys():
yield self.client.indices.get(index)
yield self.client.indices.get(index=str(index))
def get_search_index_name(self, search_index_details: dict) -> Optional[str]:
"""

View File

@ -17,6 +17,9 @@ from typing import Any, Iterable, List, Optional, Set
from metadata.generated.schema.api.data.createSearchIndex import (
CreateSearchIndexRequest,
)
from metadata.generated.schema.api.services.createSearchService import (
CreateSearchServiceRequest,
)
from metadata.generated.schema.entity.data.searchIndex import (
SearchIndex,
SearchIndexSampleData,
@ -165,9 +168,13 @@ class SearchServiceSource(TopologyRunnerMixin, Source, ABC):
continue
yield index_details
def yield_create_request_search_service(self, config: WorkflowSource):
yield self.metadata.get_create_service_from_source(
entity=SearchService, config=config
def yield_create_request_search_service(
self, config: WorkflowSource
) -> Iterable[Either[CreateSearchServiceRequest]]:
yield Either(
right=self.metadata.get_create_service_from_source(
entity=SearchService, config=config
)
)
def get_services(self) -> Iterable[WorkflowSource]:

View File

@ -41,7 +41,7 @@ mock_es_config = {
"username": "username",
"password": "password",
},
"hostPort": "localhost:9200",
"hostPort": "http://localhost:9200",
}
},
"sourceConfig": {"config": {"type": "SearchMetadata"}},

View File

@ -29,6 +29,7 @@ from metadata.generated.schema.entity.services.messagingService import Messaging
from metadata.generated.schema.entity.services.metadataService import MetadataService
from metadata.generated.schema.entity.services.mlmodelService import MlModelService
from metadata.generated.schema.entity.services.pipelineService import PipelineService
from metadata.generated.schema.entity.services.searchService import SearchService
from metadata.generated.schema.entity.services.storageService import StorageService
from metadata.ingestion.models.encoders import show_secrets_encoder
from metadata.ingestion.ometa.ometa_api import OpenMetadata
@ -66,6 +67,17 @@ from metadata.workflow.metadata import MetadataWorkflow
logger = workflow_logger()
ENTITY_CLASS_MAP = {
"databaseService": DatabaseService,
"pipelineService": PipelineService,
"dashboardService": DashboardService,
"messagingService": MessagingService,
"mlmodelService": MlModelService,
"metadataService": MetadataService,
"storageService": StorageService,
"searchService": SearchService,
}
class InvalidServiceException(Exception):
"""
@ -119,7 +131,7 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
service_type = ingestion_pipeline.service.type
entity_class = None
entity_class = ENTITY_CLASS_MAP.get(service_type)
try:
if service_type == "testSuite":
return WorkflowSource(
@ -129,57 +141,15 @@ def build_source(ingestion_pipeline: IngestionPipeline) -> WorkflowSource:
serviceConnection=None, # retrieved from the test suite workflow using the `sourceConfig.config.entityFullyQualifiedName`
)
if service_type == "databaseService":
entity_class = DatabaseService
service: DatabaseService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "pipelineService":
entity_class = PipelineService
service: PipelineService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "dashboardService":
entity_class = DashboardService
service: DashboardService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "messagingService":
entity_class = MessagingService
service: MessagingService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "mlmodelService":
entity_class = MlModelService
service: MlModelService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "metadataService":
entity_class = MetadataService
service: MetadataService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
elif service_type == "storageService":
entity_class = StorageService
service: StorageService = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
else:
if entity_class is None:
raise InvalidServiceException(f"Invalid Service Type: {service_type}")
service = metadata.get_by_name(
entity=entity_class,
fqn=ingestion_pipeline.service.name,
nullable=False,
)
except ValidationError as original_error:
try:
resp = metadata.client.get(

View File

@ -0,0 +1,34 @@
{% step srNumber=7 %}
{% stepDescription title="7. Configure Metadata Ingestion" %}
In this step we will configure the metadata ingestion pipeline,
Please follow the instructions below
{% /stepDescription %}
{% stepVisualInfo %}
{% image
src="/images/v1.2.0/connectors/configure-metadata-ingestion-search.png"
alt="Configure Metadata Ingestion"
caption="Configure Metadata Ingestion Page" /%}
{% /stepVisualInfo %}
{% /step %}
{% extraContent parentTagName="stepsContainer" %}
#### Metadata Ingestion Options
- **Name**: This field refers to the name of ingestion pipeline, you can customize the name or use the generated name.
- **Search Index Filter Pattern (Optional)**: Use to search index filter patterns to control whether or not to include search index as part of metadata ingestion.
- **Include**: Explicitly include search index by adding a list of comma-separated regular expressions to the Include field. OpenMetadata will include all search indexes with names matching one or more of the supplied regular expressions. All other schemas will be excluded.
- **Exclude**: Explicitly exclude search index by adding a list of comma-separated regular expressions to the Exclude field. OpenMetadata will exclude all search indexes with names matching one or more of the supplied regular expressions. All other schemas will be included.
- **Include Sample Data (toggle)**: Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
- **Sample Size**: If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
- **Enable Debug Log (toggle)**: Set the Enable Debug Log toggle to set the default log level to debug.
{% /extraContent %}

View File

@ -103,3 +103,7 @@ the following docs to run the Ingestion Framework in any orchestrator externally
- [Amundsen](/connectors/metadata/amundsen)
- [Atlas](/connectors/metadata/atlas)
## Search Services
- [ElasticSearch](/connectors/search/elasticsearch)

View File

@ -0,0 +1,67 @@
---
title: ElasticSearch
slug: /connectors/search/elasticsearch
---
# ElasticSearch
| Feature | Status |
|------------|------------------------------|
| Search Indexes | {% icon iconName="check" /%} |
| Sample Data | {% icon iconName="check" /%} |
| Supported Versions | ElasticSearch 7.0 and above |
| Stage | BETA |
In this section, we provide guides and references to use the ElasticSearch connector.
Configure and schedule ElasticSearch metadata workflow from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
{% partial file="/v1.2.0/connectors/ingestion-modes-tiles.md" variables={yamlPath: "/connectors/search/elasticsearch/yaml"} /%}
## Requirements
We extract ElasticSearch's metadata by using its [API](https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html). To run this ingestion, you just need a user with permissions to the ElasticSearch instance.
## Metadata Ingestion
{% partial
file="/v1.2.0/connectors/metadata-ingestion-ui.md"
variables={
connector: "ElasticSearch",
selectServicePath: "/images/v1.2.0/connectors/elasticsearch/select-service.png",
addNewServicePath: "/images/v1.2.0/connectors/elasticsearch/add-new-service.png",
serviceConnectionPath: "/images/v1.2.0/connectors/elasticsearch/service-connection.png",
}
/%}
{% stepsContainer %}
{% extraContent parentTagName="stepsContainer" %}
#### Connection Details
- **Host and Port**: This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a URI string in the format `http://hostname:port` or `https://hostname:port`. For example, you might set it to `https://localhost:9200`.
- **Authentication Types**:
1. Basic Authentication
- Username: Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
- Password: Password of the user account to connect with ElasticSearch.
2. API Key Authentication
- API Key: API Key to connect to ElasticSearch required when API Key Authentication is enabled on ElasticSearch.
- API Key Id: Enter API Key ID In case of API Key Authentication if there is any API Key ID associated with the API Key, otherwise this field can be left blank..
- **Client Certificate Path**: In case the SSL is enabled on your ElasticSearch instance and CA certificate is required for authentication, then specify the path of certificate in this field. NOTE: In case of docker deployment you need to store this certificate accessible to OpenMetadata Ingestion docker container, you can do it via copying the certificate to the docker container or store it in the volume associate with the OpenMetadata Ingestion container.
- **Connection Timeout in Seconds**: Connection timeout configuration for communicating with ElasticSearch APIs.
{% /extraContent %}
{% partial file="/v1.2.0/connectors/test-connection.md" /%}
{% partial file="/v1.2.0/connectors/search/configure-ingestion.md" /%}
{% partial file="/v1.2.0/connectors/ingestion-schedule-and-deploy.md" /%}
{% /stepsContainer %}
{% partial file="/v1.2.0/connectors/troubleshooting.md" /%}

View File

@ -0,0 +1,183 @@
---
title: Run the ElasticSearch Connector Externally
slug: /connectors/search/elasticsearch/yaml
---
# Run the ElasticSearch Connector Externally
In this section, we provide guides and references to use the ElasticSearch connector.
Configure and schedule ElasticSearch metadata and profiler workflows from the OpenMetadata UI:
- [Requirements](#requirements)
- [Metadata Ingestion](#metadata-ingestion)
{% partial file="/v1.2.0/connectors/external-ingestion-deployment.md" /%}
## Requirements
{%inlineCallout icon="description" bold="OpenMetadata 0.12 or later" href="/deployment"%}
To deploy OpenMetadata, check the Deployment guides.
{% /inlineCallout %}
### Python Requirements
To run the ElasticSearch ingestion, you will need to install:
```bash
pip3 install "openmetadata-ingestion[elasticsearch]"
```
## Metadata Ingestion
All connectors are defined as JSON Schemas.
[Here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/entity/services/connections/search/elasticSearchConnection.json)
you can find the structure to create a connection to ElasticSearch.
In order to create and run a Metadata Ingestion workflow, we will follow
the steps to create a YAML configuration able to connect to the source,
process the Entities if needed, and reach the OpenMetadata server.
The workflow is modeled around the following
[JSON Schema](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/workflow.json)
### 1. Define the YAML Config
This is a sample config for ElasticSearch:
{% codePreview %}
{% codeInfoContainer %}
#### Source Configuration - Service Connection
{% codeInfo srNumber=1 %}
**hostPort**: This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a URI string in the format `http://hostname:port` or `https://hostname:port`. For example, you might set it to `https://localhost:9200`.
{% /codeInfo %}
{% codeInfo srNumber=2 %}
**Basic Authentication**
**username**: Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
**password**: Password of the user account to connect with ElasticSearch.
{% /codeInfo %}
{% codeInfo srNumber=3 %}
**API Key Authentication**
**apiKey**: API Key to connect to ElasticSearch required when API Key Authentication is enabled on ElasticSearch.
**apiKeyId**: Enter API Key ID In case of API Key Authentication if there is any API Key ID associated with the API Key, otherwise this field can be left blank or skipped.
{% /codeInfo %}
{% codeInfo srNumber=4 %}
**caCert**: In case the SSL is enabled on your ElasticSearch instance and CA certificate is required for authentication, then specify the path of certificate in this field. NOTE: In case of docker deployment you need to store this certificate accessible to OpenMetadata Ingestion docker container, you can do it via copying the certificate to the docker container or store it in the volume associate with the OpenMetadata Ingestion container.
{% /codeInfo %}
{% codeInfo srNumber=5 %}
**connectionTimeoutSecs**: Connection timeout configuration for communicating with ElasticSearch APIs.
{% /codeInfo %}
#### Source Configuration - Source Config
{% codeInfo srNumber=6 %}
The `sourceConfig` is defined [here](https://github.com/open-metadata/OpenMetadata/blob/main/openmetadata-spec/src/main/resources/json/schema/metadataIngestion/searchServiceMetadataPipeline.json):
**includeSampleData**: Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
**sampleSize**: If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
**markDeletedSearchIndexes**: Optional configuration to soft delete `search indexes` in OpenMetadata if the source `search indexes` are deleted. After deleting, all the associated entities like lineage, etc., with that `search index` will be deleted.
**searchIndexFilterPattern**: Note that the `searchIndexFilterPattern` support regex to include or exclude search indexes during metadata ingestion process.
{% /codeInfo %}
#### Sink Configuration
{% codeInfo srNumber=7%}
To send the metadata to OpenMetadata, it needs to be specified as `type: metadata-rest`.
{% /codeInfo %}
{% partial file="/v1.2.0/connectors/workflow-config.md" /%}
{% /codeInfoContainer %}
{% codeBlock fileName="filename.yaml" %}
```yaml
source:
type: elasticsearch
serviceName: elasticsearch_source
serviceConnection:
config:
type: ElasticSearch
```
```yaml {% srNumber=1 %}
hostPort: http://localhost:9200
```
```yaml {% srNumber=2 %}
authType:
username: elastic
password: my_own_password
```
```yaml {% srNumber=3 %}
# apiKeyId: <api key id>
# apiKey: <api key>
```
```yaml {% srNumber=4 %}
caCert: /path/to/http_ca.crt
```
```yaml {% srNumber=5 %}
connectionTimeoutSecs: 30
```
```yaml {% srNumber=6 %}
sourceConfig:
config:
type: SearchMetadata
# markDeletedSearchIndexes: True
# includeSampleData: True
# sampleSize: 10
# searchIndexFilterPattern:
# includes:
# - index1
# - index2
# excludes:
# - index4
# - index3
```
```yaml {% srNumber=7 %}
sink:
type: metadata-rest
config: {}
```
{% partial file="/v1.2.0/connectors/workflow-config-yaml.md" /%}
{% /codeBlock %}
{% /codePreview %}
### 2. Run with the CLI
First, we will need to save the YAML file. Afterward, and with all requirements installed, we can run:
```bash
metadata ingest -c <path-to-yaml>
```
Note that from connector to connector, this recipe will always be the same. By updating the YAML configuration,
you will be able to extract metadata from different sources.

View File

@ -0,0 +1,13 @@
---
title: Search Services
slug: /connectors/search
---
# Search Services
This is the supported list of connectors for Search Services:
- [ElasticSearch](/connectors/search/elasticsearch)
If you have a request for a new connector, don't hesitate to reach out in [Slack](https://slack.open-metadata.org/) or
open a [feature request](https://github.com/open-metadata/OpenMetadata/issues/new/choose) in our GitHub repo.

View File

@ -487,6 +487,13 @@ site_menu:
- category: Connectors / Storage / S3 / Run Externally
url: /connectors/storage/s3/yaml
- category: Connectors / Search
url: /connectors/search
- category: Connectors / Search / ElasticSearch
url: /connectors/search/elasticsearch
- category: Connectors / Search / ElasticSearch / Run Externally
url: /connectors/search/elasticsearch/yaml
- category: Connectors / Metadata
url: /connectors/metadata
- category: Connectors / Metadata / Amundsen

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 354 KiB

View File

@ -179,6 +179,7 @@ public final class Entity {
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.ML_MODEL, MLMODEL_SERVICE);
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.METADATA, METADATA_SERVICE);
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.STORAGE, STORAGE_SERVICE);
SERVICE_TYPE_ENTITY_MAP.put(ServiceType.SEARCH, SEARCH_SERVICE);
}
//

View File

@ -12,6 +12,8 @@ import static org.openmetadata.service.util.TestUtils.ADMIN_AUTH_HEADERS;
import static org.openmetadata.service.util.TestUtils.assertResponse;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.UUID;
import javax.ws.rs.client.WebTarget;
@ -91,13 +93,14 @@ public class SearchServiceResourceTest extends EntityResourceTest<SearchService,
}
@Test
void put_updateService_as_admin_2xx(TestInfo test) throws IOException {
void put_updateService_as_admin_2xx(TestInfo test) throws IOException, URISyntaxException {
SearchConnection connection1 =
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9300"));
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9300")));
SearchService service =
createAndCheckEntity(createRequest(test).withDescription(null).withConnection(connection1), ADMIN_AUTH_HEADERS);
ElasticSearchConnection credentials2 = new ElasticSearchConnection().withHostPort("https://localhost:9400");
ElasticSearchConnection credentials2 =
new ElasticSearchConnection().withHostPort(new URI("https://localhost:9400"));
SearchConnection connection2 = new SearchConnection().withConfig(credentials2);
// Update SearchService description and connection
@ -136,11 +139,16 @@ public class SearchServiceResourceTest extends EntityResourceTest<SearchService,
@Override
public CreateSearchService createRequest(String name) {
return new CreateSearchService()
.withName(name)
.withServiceType(CreateSearchService.SearchServiceType.ElasticSearch)
.withConnection(
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9200")));
try {
return new CreateSearchService()
.withName(name)
.withServiceType(CreateSearchService.SearchServiceType.ElasticSearch)
.withConnection(
new SearchConnection()
.withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9200"))));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
@Override

View File

@ -226,8 +226,13 @@ public final class TestUtils {
}
static {
ELASTIC_SEARCH_CONNECTION =
new SearchConnection().withConfig(new ElasticSearchConnection().withHostPort("http://localhost:9200"));
try {
ELASTIC_SEARCH_CONNECTION =
new SearchConnection()
.withConfig(new ElasticSearchConnection().withHostPort(new URI("http://localhost:9200")));
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
OPEN_SEARCH_CONNECTION =
new SearchConnection().withConfig(new OpenSearchConnection().withHostPort("http://localhost:9200"));
}

View File

@ -29,6 +29,9 @@
},
{
"$ref": "../services/storageService.json#/definitions/storageConnection"
},
{
"$ref": "../services/searchService.json#/definitions/searchConnection"
}
]
},

View File

@ -12,41 +12,39 @@
"enum": ["ElasticSearch"],
"default": "ElasticSearch"
},
"connectionScheme": {
"description": "ElasticSearch Connection Scheme",
"type": "string",
"enum": ["http", "https"],
"default": "http"
},
"basicAuthentication": {
"title": "Basic Authentication",
"properties": {
"username": {
"title": "Username",
"description": "Elastic Search Username for Login",
"type": "string"
},
"password": {
"title": "Password",
"description": "Elastic Search Password for Login",
"type": "string",
"format": "password"
}
},
"required": ["username","password"],
"type": "object"
},
"apiAuthentication": {
"title": "API Key Authentication",
"type": "object",
"properties": {
"apiKeyId": {
"description": "Elastic Search API Key ID for API Authentication",
"type": "string"
},
"apiKey": {
"title": "API Key",
"description": "Elastic Search API Key for API Authentication",
"type": "string",
"format": "password"
}
},
"required": ["apiKeyId","apiKey"]
},
"apiKeyId": {
"title": "API Key ID",
"description": "Elastic Search API Key ID for API Authentication",
"type": "string"
}
}
}
},
"properties": {
@ -59,13 +57,8 @@
"hostPort": {
"title": "Host and Port",
"description": "Host and port of the ElasticSearch service.",
"type": "string"
},
"scheme": {
"description": "Http/Https connection scheme",
"type": "string",
"$ref": "#/definitions/connectionScheme",
"default": "http"
"format": "uri"
},
"authType": {
"title": "Auth Configuration Type",
@ -79,7 +72,13 @@
}
]
},
"caCert": {
"title": "Client Certificate Path",
"description": "Path to CA Cert File",
"type": "string"
},
"connectionTimeoutSecs": {
"title": "Connection Timeout in Seconds",
"description": "Connection Timeout in Seconds",
"type": "integer",
"default": 30
@ -93,8 +92,5 @@
"$ref": "../connectionBasicType.json#/definitions/supportsMetadataExtraction"
}
},
"additionalProperties": false,
"required": [
"hostPort"
]
"additionalProperties": false
}

View File

@ -19,21 +19,25 @@
"default": "SearchMetadata"
},
"searchIndexFilterPattern": {
"title": "Search Index Filter Pattern",
"description": "Regex to only fetch search indexes that matches the pattern.",
"$ref": "../type/filterPattern.json#/definitions/filterPattern"
},
"markDeletedSearchIndexes": {
"title": "Mark Deleted Search Indexes",
"description": "Optional configuration to soft delete search indexes in OpenMetadata if the source search indexes are deleted. Also, if the search index is deleted, all the associated entities like lineage, etc., with that search index will be deleted",
"type": "boolean",
"default": true
},
"includeSampleData": {
"title": "Include Sample Data",
"description": "Optional configuration to turn off fetching sample data for search index.",
"type": "boolean",
"default": true
},
"sampleSize": {
"description": "No. of rows of sample data we want to ingest.",
"title": "Sample Size",
"description": "No. of records of sample data we want to ingest.",
"default": 10,
"type": "integer"
}

View File

@ -0,0 +1,29 @@
# ElasticSearch
In this section, we provide guides and references to use the ElasticSearch connector. You can view the full documentation for ElasticSearch [here](https://docs.open-metadata.org/connectors/search/elasticsearch).
## Requirements
We extract ElasticSearch's metadata by using its [API](https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html). To run this ingestion, you just need a user with permissions to the ElasticSearch instance.
You can find further information on the ElasticSearch connector in the [docs](https://docs.open-metadata.org/connectors/search/elasticsearch).
## Connection Details
$$section
### Host and Port $(id="hostPort")
This parameter specifies the host and port of the ElasticSearch instance. This should be specified as a string in the format `hostname:port`. For example, you might set the hostPort parameter to `localhost:9200`.
If you are running the OpenMetadata ingestion in a docker and your services are hosted on the `localhost`, then use `host.docker.internal:9200` as the value.
$$
$$section
### Username $(id="username")
Username to connect to ElasticSearch required when Basic Authentication is enabled on ElasticSearch.
$$
$$section
### Password $(id="password")
Password of the user account to connect with ElasticSearch.
$$

View File

@ -0,0 +1,48 @@
# Metadata
Database Service Metadata Pipeline Configuration.
## Configuration
$$section
### Search Index Filter Pattern $(id="searchIndexFilterPattern")
Search index filter patterns to control whether to include search index as part of metadata ingestion.
**Include**: Explicitly include search index by adding a list of comma-separated regular expressions to the `Include` field. OpenMetadata will include all search indexes with names matching one or more of the supplied regular expressions. All other search indexes will be excluded.
For example, to include only those search indexes whose name starts with the word `demo`, add the regex pattern in the include field as `^demo.*`.
**Exclude**: Explicitly exclude search index by adding a list of comma-separated regular expressions to the `Exclude` field. OpenMetadata will exclude all search indexes with names matching one or more of the supplied regular expressions. All other search indexes will be included.
For example, to exclude all search indexes with the name containing the word `demo`, add the regex pattern in the exclude field as `.*demo.*`.
Checkout [this](https://docs.open-metadata.org/connectors/ingestion/workflows/metadata/filter-patterns/database#database-filter-pattern) document for further examples on filter patterns.
$$
$$section
### Enable Debug Logs $(id="enableDebugLog")
Set the `Enable Debug Log` toggle to set the logging level of the process to debug. You can check these logs in the Ingestion tab of the service and dig deeper into any errors you might find.
$$
$$section
### Mark Deleted Search Indexes $(id="markDeletedSearchIndexes")
Optional configuration to soft delete `search indexes` in OpenMetadata if the source `search indexes` are deleted. After deleting, all the associated entities like lineage, etc., with that `search index` will be deleted.
$$
$$section
### Include Sample Data $(id="includeSampleData")
Set the Ingest Sample Data toggle to control whether to ingest sample data as part of metadata ingestion.
$$
$$section
### Sample Size $(id="sampleSize")
If include sample data is enabled, 10 records will be ingested by default. Using this field you can customize the size of sample data.
$$