datahub/metadata-ingestion/tests/unit/test_elasticsearch_source.py
Harshal Sheth 25d9d6656c
feat(ingest): fix validators (#10115)
Co-authored-by: Tamas Nemeth <treff7es@gmail.com>
2024-03-27 15:20:55 -07:00

2515 lines
64 KiB
Python

import json
import logging
import re
from typing import Any, Dict, List, Tuple
import pydantic
import pytest
from datahub.ingestion.source.elastic_search import (
CollapseUrns,
ElasticsearchSourceConfig,
ElasticToSchemaFieldConverter,
collapse_urn,
)
from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField
logger = logging.getLogger(__name__)
def test_elasticsearch_throws_error_wrong_operation_config():
with pytest.raises(pydantic.ValidationError):
ElasticsearchSourceConfig.parse_obj(
{
"profiling": {
"enabled": True,
"operation_config": {
"lower_freq_profile_enabled": True,
},
}
}
)
def assert_field_paths_are_unique(fields: List[SchemaField]) -> None:
fields_paths = [f.fieldPath for f in fields if re.match(".*[^]]$", f.fieldPath)]
if fields_paths:
assert len(fields_paths) == len(set(fields_paths))
def assret_field_paths_match(
fields: List[SchemaField], expected_field_paths: List[str]
) -> None:
logger.debug('FieldPaths=\n"' + '",\n"'.join(f.fieldPath for f in fields) + '"')
assert len(fields) == len(expected_field_paths)
for f, efp in zip(fields, expected_field_paths):
assert f.fieldPath == efp
assert_field_paths_are_unique(fields)
# NOTE: Currently this is the list of all elastic indices that datahub uses for reasonable coverage.
# Simplify these later to just have enough coverage.
schema_test_cases: Dict[str, Tuple[str, List[str]]] = {
".ds-datahub_usage_event-000001": (
"""{
"@timestamp": {
"type": "date"
},
"actorUrn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"browserId": {
"type": "keyword"
},
"corp_user_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"corp_user_username": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dataset_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"dataset_platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"date": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"entityType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"entityUrn": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"hash": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"height": {
"type": "long"
},
"index": {
"type": "long"
},
"moduleId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"path": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"prevPathname": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"query": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"renderId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"renderType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"scenarioType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"search": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"timestamp": {
"type": "date"
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"total": {
"type": "long"
},
"type": {
"type": "keyword"
},
"url": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"userAgent": {
"type": "keyword"
},
"width": {
"type": "long"
}
}""",
[
"[version=2.0].[type=date].@timestamp",
"[version=2.0].[type=text].actorUrn",
"[version=2.0].[type=keyword].browserId",
"[version=2.0].[type=text].corp_user_name",
"[version=2.0].[type=text].corp_user_username",
"[version=2.0].[type=text].dataset_name",
"[version=2.0].[type=text].dataset_platform",
"[version=2.0].[type=text].date",
"[version=2.0].[type=text].entityType",
"[version=2.0].[type=text].entityUrn",
"[version=2.0].[type=text].hash",
"[version=2.0].[type=long].height",
"[version=2.0].[type=long].index",
"[version=2.0].[type=text].moduleId",
"[version=2.0].[type=text].path",
"[version=2.0].[type=text].prevPathname",
"[version=2.0].[type=text].query",
"[version=2.0].[type=text].renderId",
"[version=2.0].[type=text].renderType",
"[version=2.0].[type=text].scenarioType",
"[version=2.0].[type=text].search",
"[version=2.0].[type=date].timestamp",
"[version=2.0].[type=text].title",
"[version=2.0].[type=long].total",
"[version=2.0].[type=keyword].type",
"[version=2.0].[type=text].url",
"[version=2.0].[type=keyword].userAgent",
"[version=2.0].[type=long].width",
],
),
"chartindex_v2": (
"""{
"access": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword"
}
},
"normalizer": "keyword_normalizer"
},
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedDescription": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"glossaryTerms": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"queryType": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword"
}
},
"normalizer": "keyword_normalizer"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"title": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"tool": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"type": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword"
}
},
"normalizer": "keyword_normalizer"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].access",
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].editedDescription",
"[version=2.0].[type=text].glossaryTerms",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=keyword].queryType",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].title",
"[version=2.0].[type=keyword].tool",
"[version=2.0].[type=keyword].type",
"[version=2.0].[type=keyword].urn",
],
),
"corpgroupindex_v2": (
"""{
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"displayName": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].displayName",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"corpuserindex_v2": (
"""{
"active": {
"type": "boolean"
},
"email": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"fullName": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"hasTags": {
"type": "boolean"
},
"ldap": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"managerLdap": {
"type": "text",
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"skills": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"status": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"teams": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"title": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=boolean].active",
"[version=2.0].[type=keyword].email",
"[version=2.0].[type=keyword].fullName",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].ldap",
"[version=2.0].[type=text].managerLdap",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=keyword].skills",
"[version=2.0].[type=keyword].status",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].teams",
"[version=2.0].[type=keyword].title",
"[version=2.0].[type=keyword].urn",
],
),
"dashboardindex_v2": (
"""{
"access": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword"
}
},
"normalizer": "keyword_normalizer"
},
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedDescription": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"glossaryTerms": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"title": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"tool": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].access",
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].editedDescription",
"[version=2.0].[type=text].glossaryTerms",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].title",
"[version=2.0].[type=keyword].tool",
"[version=2.0].[type=keyword].urn",
],
),
"dataflowindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"cluster": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedDescription": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"flowId": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"glossaryTerms": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"orchestrator": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"project": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].cluster",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].editedDescription",
"[version=2.0].[type=keyword].flowId",
"[version=2.0].[type=text].glossaryTerms",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=keyword].orchestrator",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=keyword].project",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"datahubpolicyindex_v2": (
"""{
"urn": {
"type": "keyword"
}
}""",
["[version=2.0].[type=keyword].urn"],
),
"datahubretentionindex_v2": (
"""{
"urn": {
"type": "keyword"
}
}""",
["[version=2.0].[type=keyword].urn"],
),
"datajobindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"dataFlow": {
"type": "text",
"fields": {
"ngram": {
"type": "text",
"analyzer": "partial_urn_component"
}
},
"analyzer": "urn_component"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedDescription": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"glossaryTerms": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"inputs": {
"type": "text",
"analyzer": "urn_component"
},
"jobId": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"numInputDatasets": {
"type": "long"
},
"numOutputDatasets": {
"type": "long"
},
"outputs": {
"type": "text",
"analyzer": "urn_component"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=text].dataFlow",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].editedDescription",
"[version=2.0].[type=text].glossaryTerms",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=text].inputs",
"[version=2.0].[type=keyword].jobId",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=long].numInputDatasets",
"[version=2.0].[type=long].numOutputDatasets",
"[version=2.0].[type=text].outputs",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"dataplatformindex_v2": (
"""{
"urn": {
"type": "keyword"
}
}""",
["[version=2.0].[type=keyword].urn"],
),
"dataprocessindex_v2": (
"""{
"hasOwners": {
"type": "boolean"
},
"inputs": {
"type": "text",
"analyzer": "urn_component"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"numInputDatasets": {
"type": "long"
},
"numOutputDatasets": {
"type": "long"
},
"orchestrator": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"origin": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"outputs": {
"type": "text",
"analyzer": "urn_component"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=text].inputs",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=long].numInputDatasets",
"[version=2.0].[type=long].numOutputDatasets",
"[version=2.0].[type=keyword].orchestrator",
"[version=2.0].[type=keyword].origin",
"[version=2.0].[type=text].outputs",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=keyword].urn",
],
),
"dataset_datasetprofileaspect_v1": (
"""{
"@timestamp": {
"type": "date"
},
"event": {
"type": "object",
"enabled": false
},
"eventGranularity": {
"type": "keyword"
},
"isExploded": {
"type": "boolean"
},
"messageId": {
"type": "keyword"
},
"systemMetadata": {
"type": "object",
"enabled": false
},
"timestampMillis": {
"type": "date"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=date].@timestamp",
"[version=2.0].[type=object].event",
"[version=2.0].[type=keyword].eventGranularity",
"[version=2.0].[type=boolean].isExploded",
"[version=2.0].[type=keyword].messageId",
"[version=2.0].[type=object].systemMetadata",
"[version=2.0].[type=date].timestampMillis",
"[version=2.0].[type=keyword].urn",
],
),
"dataset_datasetusagestatisticsaspect_v1": (
"""{
"@timestamp": {
"type": "date"
},
"event": {
"type": "object",
"enabled": false
},
"eventGranularity": {
"type": "keyword"
},
"fieldCounts": {
"properties": {
"count": {
"type": "integer"
},
"fieldPath": {
"type": "keyword"
}
}
},
"isExploded": {
"type": "boolean"
},
"messageId": {
"type": "keyword"
},
"systemMetadata": {
"type": "object",
"enabled": false
},
"timestampMillis": {
"type": "date"
},
"topSqlQueries": {
"type": "keyword"
},
"totalSqlQueries": {
"type": "integer"
},
"uniqueUserCount": {
"type": "integer"
},
"urn": {
"type": "keyword"
},
"userCounts": {
"properties": {
"count": {
"type": "integer"
},
"user": {
"type": "keyword"
},
"userEmail": {
"type": "keyword"
}
}
}
}""",
[
"[version=2.0].[type=date].@timestamp",
"[version=2.0].[type=object].event",
"[version=2.0].[type=keyword].eventGranularity",
"[version=2.0].[type=properties].fieldCounts",
"[version=2.0].[type=properties].fieldCounts.[type=integer].count",
"[version=2.0].[type=properties].fieldCounts.[type=keyword].fieldPath",
"[version=2.0].[type=boolean].isExploded",
"[version=2.0].[type=keyword].messageId",
"[version=2.0].[type=object].systemMetadata",
"[version=2.0].[type=date].timestampMillis",
"[version=2.0].[type=keyword].topSqlQueries",
"[version=2.0].[type=integer].totalSqlQueries",
"[version=2.0].[type=integer].uniqueUserCount",
"[version=2.0].[type=keyword].urn",
"[version=2.0].[type=properties].userCounts",
"[version=2.0].[type=properties].userCounts.[type=integer].count",
"[version=2.0].[type=properties].userCounts.[type=keyword].user",
"[version=2.0].[type=properties].userCounts.[type=keyword].userEmail",
],
),
"datasetindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"deprecated": {
"type": "boolean"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedDescription": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedFieldDescriptions": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"editedFieldGlossaryTerms": {
"type": "text",
"analyzer": "urn_component"
},
"editedFieldTags": {
"type": "text",
"analyzer": "urn_component"
},
"fieldDescriptions": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"fieldGlossaryTerms": {
"type": "text",
"analyzer": "urn_component"
},
"fieldPaths": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"fieldTags": {
"type": "text",
"analyzer": "urn_component"
},
"glossaryTerms": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"materialized": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"origin": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"keyword": {
"type": "keyword"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"typeNames": {
"type": "keyword",
"fields": {
"keyword": {
"type": "keyword"
}
},
"normalizer": "keyword_normalizer"
},
"upstreams": {
"type": "text",
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=keyword].editedDescription",
"[version=2.0].[type=keyword].editedFieldDescriptions",
"[version=2.0].[type=text].editedFieldGlossaryTerms",
"[version=2.0].[type=text].editedFieldTags",
"[version=2.0].[type=keyword].fieldDescriptions",
"[version=2.0].[type=text].fieldGlossaryTerms",
"[version=2.0].[type=keyword].fieldPaths",
"[version=2.0].[type=text].fieldTags",
"[version=2.0].[type=text].glossaryTerms",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=boolean].materialized",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=keyword].origin",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].typeNames",
"[version=2.0].[type=text].upstreams",
"[version=2.0].[type=keyword].urn",
],
),
"glossarynodeindex_v2": (
"""{
"definition": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"hasOwners": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].definition",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=keyword].urn",
],
),
"glossarytermindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"definition": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"hasOwners": {
"type": "boolean"
},
"hasRelatedTerms": {
"type": "text",
"analyzer": "urn_component"
},
"isRelatedTerms": {
"type": "text",
"analyzer": "urn_component"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"sourceRef": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"termSource": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].definition",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=text].hasRelatedTerms",
"[version=2.0].[type=text].isRelatedTerms",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=keyword].sourceRef",
"[version=2.0].[type=keyword].termSource",
"[version=2.0].[type=keyword].urn",
],
),
"ilm-history-2-000001": (
"""{
"@timestamp": {
"type": "date",
"format": "epoch_millis"
},
"error_details": {
"type": "text"
},
"index": {
"type": "keyword"
},
"index_age": {
"type": "long"
},
"policy": {
"type": "keyword"
},
"state": {
"dynamic": "true",
"properties": {
"action": {
"type": "keyword"
},
"action_time": {
"type": "date",
"format": "epoch_millis"
},
"creation_date": {
"type": "date",
"format": "epoch_millis"
},
"failed_step": {
"type": "keyword"
},
"is_auto-retryable_error": {
"type": "keyword"
},
"phase": {
"type": "keyword"
},
"phase_definition": {
"type": "text"
},
"phase_time": {
"type": "date",
"format": "epoch_millis"
},
"step": {
"type": "keyword"
},
"step_info": {
"type": "text"
},
"step_time": {
"type": "date",
"format": "epoch_millis"
}
}
},
"success": {
"type": "boolean"
}
}""",
[
"[version=2.0].[type=date].@timestamp",
"[version=2.0].[type=text].error_details",
"[version=2.0].[type=keyword].index",
"[version=2.0].[type=long].index_age",
"[version=2.0].[type=keyword].policy",
"[version=2.0].[type=properties].state",
"[version=2.0].[type=properties].state.[type=keyword].action",
"[version=2.0].[type=properties].state.[type=date].action_time",
"[version=2.0].[type=properties].state.[type=date].creation_date",
"[version=2.0].[type=properties].state.[type=keyword].failed_step",
"[version=2.0].[type=properties].state.[type=keyword].is_auto-retryable_error",
"[version=2.0].[type=properties].state.[type=keyword].phase",
"[version=2.0].[type=properties].state.[type=text].phase_definition",
"[version=2.0].[type=properties].state.[type=date].phase_time",
"[version=2.0].[type=properties].state.[type=keyword].step",
"[version=2.0].[type=properties].state.[type=text].step_info",
"[version=2.0].[type=properties].state.[type=date].step_time",
"[version=2.0].[type=boolean].success",
],
),
"mlfeatureindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"deprecated": {
"type": "boolean"
},
"featureNamespace": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].featureNamespace",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"mlfeaturetableindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"deprecated": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"mlmodeldeploymentindex_v2": (
"""{
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"deprecated": {
"type": "boolean"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"origin": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"keyword": {
"type": "keyword"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=keyword].origin",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"mlmodelgroupindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"deprecated": {
"type": "boolean"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"origin": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=keyword].origin",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"mlmodelindex_v2": (
"""{
"browsePaths": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "slash_pattern"
}
},
"analyzer": "browse_path_hierarchy",
"fielddata": true
},
"customProperties": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"deprecated": {
"type": "boolean"
},
"description": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
}
},
"normalizer": "keyword_normalizer"
},
"hasDescription": {
"type": "boolean"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"origin": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"keyword": {
"type": "keyword"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"type": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=text].browsePaths",
"[version=2.0].[type=keyword].customProperties",
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].description",
"[version=2.0].[type=boolean].hasDescription",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=keyword].origin",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].type",
"[version=2.0].[type=keyword].urn",
],
),
"mlprimarykeyindex_v2": (
"""{
"deprecated": {
"type": "boolean"
},
"featureNamespace": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"hasOwners": {
"type": "boolean"
},
"hasTags": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"platform": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"tags": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
},
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=boolean].deprecated",
"[version=2.0].[type=keyword].featureNamespace",
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=boolean].hasTags",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=text].platform",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=text].tags",
"[version=2.0].[type=keyword].urn",
],
),
"schemafieldindex_v2": (
"""{
"fieldPath": {
"type": "keyword",
"normalizer": "keyword_normalizer"
},
"parent": {
"type": "text",
"analyzer": "urn_component"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].fieldPath",
"[version=2.0].[type=text].parent",
"[version=2.0].[type=keyword].urn",
],
),
"system_metadata_service_v1": (
"""{
"aspect": {
"type": "keyword"
},
"lastUpdated": {
"type": "long"
},
"registryName": {
"type": "keyword"
},
"registryVersion": {
"type": "keyword"
},
"runId": {
"type": "keyword"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=keyword].aspect",
"[version=2.0].[type=long].lastUpdated",
"[version=2.0].[type=keyword].registryName",
"[version=2.0].[type=keyword].registryVersion",
"[version=2.0].[type=keyword].runId",
"[version=2.0].[type=keyword].urn",
],
),
"tagindex_v2": (
"""{
"hasOwners": {
"type": "boolean"
},
"name": {
"type": "keyword",
"fields": {
"delimited": {
"type": "text",
"analyzer": "word_delimited"
},
"ngram": {
"type": "text",
"analyzer": "partial"
}
},
"normalizer": "keyword_normalizer"
},
"owners": {
"type": "text",
"analyzer": "urn_component"
},
"removed": {
"type": "boolean"
},
"urn": {
"type": "keyword"
}
}""",
[
"[version=2.0].[type=boolean].hasOwners",
"[version=2.0].[type=keyword].name",
"[version=2.0].[type=text].owners",
"[version=2.0].[type=boolean].removed",
"[version=2.0].[type=keyword].urn",
],
),
}
@pytest.mark.parametrize(
"schema, expected_field_paths",
schema_test_cases.values(),
ids=schema_test_cases.keys(),
)
def test_elastic_search_schema_conversion(
schema: str, expected_field_paths: List[str]
) -> None:
schema_dict: Dict[str, Any] = json.loads(schema)
mappings: Dict[str, Any] = {"properties": schema_dict}
actual_fields = list(ElasticToSchemaFieldConverter.get_schema_fields(mappings))
assret_field_paths_match(actual_fields, expected_field_paths)
def test_no_properties_in_mappings_schema() -> None:
fields = list(ElasticToSchemaFieldConverter.get_schema_fields({}))
assert fields == []
def test_host_port_parsing() -> None:
"""ensure we handle different styles of host_port specifications correctly"""
examples = [
"http://localhost:9200",
"https://localhost:9200",
"localhost:9300",
"localhost",
"http://localhost:3400",
"192.168.0.1",
"192.168.0.1:9200",
"http://192.168.2.1",
"https://192.168.0.1:9300",
"https://192.168.0.1/",
]
bad_examples = ["localhost:abcd", "htttp://localhost:1234", "localhost:9200//"]
for example in examples:
config_dict = {"host": example}
config = ElasticsearchSourceConfig.parse_obj(config_dict)
assert config.host == example
for bad_example in bad_examples:
config_dict = {"host": bad_example}
with pytest.raises(pydantic.ValidationError):
ElasticsearchSourceConfig.parse_obj(config_dict)
def test_collapse_urns() -> None:
assert (
collapse_urn(
urn="urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event-000059,PROD)",
collapse_urns=CollapseUrns(
urns_suffix_regex=[
"-\\d+$",
]
),
)
== "urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event,PROD)"
)
assert (
collapse_urn(
urn="urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event-2023.01.11,PROD)",
collapse_urns=CollapseUrns(
urns_suffix_regex=[
"-\\d{4}\\.\\d{2}\\.\\d{2}",
]
),
)
== "urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event,PROD)"
)