diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java index ca25a85fcd4..b702756fc53 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/jdbi3/TestSuiteRepository.java @@ -9,8 +9,10 @@ import static org.openmetadata.service.Entity.TEST_CASE; import static org.openmetadata.service.Entity.TEST_SUITE; import static org.openmetadata.service.util.FullyQualifiedName.quoteName; +import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.UUID; import javax.json.JsonArray; @@ -20,6 +22,7 @@ import javax.ws.rs.core.SecurityContext; import lombok.extern.slf4j.Slf4j; import org.jdbi.v3.sqlobject.transaction.Transaction; import org.openmetadata.schema.entity.data.Table; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.tests.ResultSummary; import org.openmetadata.schema.tests.TestSuite; import org.openmetadata.schema.tests.type.ColumnTestSummaryDefinition; @@ -31,6 +34,7 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.resources.dqtests.TestSuiteResource; import org.openmetadata.service.resources.feeds.MessageParser; import org.openmetadata.service.search.SearchClient; +import org.openmetadata.service.search.SearchIndexUtils; import org.openmetadata.service.util.EntityUtil; import org.openmetadata.service.util.FullyQualifiedName; import org.openmetadata.service.util.JsonUtils; @@ -226,6 +230,12 @@ public class TestSuiteRepository extends EntityRepository { .orElse(testSummary); } + public DataQualityReport getDataQualityReport(String q, String aggQuery, String index) + throws IOException { + Map aggregationString = SearchIndexUtils.buildAggregationString(aggQuery); + return searchRepository.genericAggregation(q, index, aggregationString); + } + public TestSummary getTestSummary(UUID testSuiteId) { JsonObject aggregationJson = JsonUtils.readJson(EXECUTION_SUMMARY_AGGS).asJsonObject(); try { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java index f930efe4c0d..a5f41fec538 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/dqtests/TestCaseResource.java @@ -91,7 +91,7 @@ public class TestCaseResource extends EntityResource:aggType=:field=`. You can sperate aggregation + query with a comma `,` to perform nested aggregations. + For example, `bucketName=table:aggType=terms:field=databaseName,bucketName=:aggType=:field=` + """, + responses = { + @ApiResponse( + responseCode = "200", + description = "Data Quality Report Results", + content = + @Content( + mediaType = "application/json", + schema = @Schema(implementation = DataQualityReport.class))) + }) + public DataQualityReport getDataQualityReport( + @Context UriInfo uriInfo, + @Context SecurityContext securityContext, + @Parameter( + description = "Search query to filter the aggregation results", + schema = @Schema(type = "String")) + @QueryParam("q") + String query, + @Parameter( + description = "Aggregation query to perform aggregation on the search results", + schema = @Schema(type = "String")) + @QueryParam("aggregationQuery") + String aggregationQuery, + @Parameter( + description = "Index to perform the aggregation against", + schema = @Schema(type = "String")) + @QueryParam("index") + String index) + throws IOException { + ResourceContext resourceContext = getResourceContext(); + OperationContext operationContext = + new OperationContext(entityType, MetadataOperation.VIEW_TESTS); + authorizer.authorize(securityContext, operationContext, resourceContext); + if (nullOrEmpty(aggregationQuery) || nullOrEmpty(index)) { + throw new IllegalArgumentException("aggregationQuery and index are required parameters"); + } + return repository.getDataQualityReport(query, aggregationQuery, index); + } + @POST @Operation( operationId = "createLogicalTestSuite", diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java index c6dd5c3e9b1..f83924c1945 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchClient.java @@ -18,6 +18,7 @@ import lombok.Getter; import org.apache.commons.lang3.tuple.Pair; import org.openmetadata.schema.dataInsight.DataInsightChartResult; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.service.exception.CustomExceptionMessage; import org.openmetadata.service.search.models.IndexMapping; @@ -122,6 +123,9 @@ public interface SearchClient { JsonObject aggregate(String query, String index, JsonObject aggregationJson) throws IOException; + DataQualityReport genericAggregation( + String query, String index, Map aggregationMetadata) throws IOException; + Response suggest(SearchRequest request) throws IOException; void createEntity(String indexName, String docId, String doc); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java index 0e937a6298c..7730730a5e3 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchIndexUtils.java @@ -1,11 +1,19 @@ package org.openmetadata.service.search; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; +import javax.json.JsonArray; +import javax.json.JsonObject; +import org.openmetadata.schema.tests.DataQualityReport; +import org.openmetadata.schema.tests.Datum; +import org.openmetadata.schema.tests.type.DataQualityReportMetadata; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.TagLabel; @@ -55,6 +63,223 @@ public final class SearchIndexUtils { currentMap.remove(lastKey); } + private static void handleLeafTermsAggregation( + JsonObject aggregationResults, List reportData, Map nodeData) { + Optional docCount = Optional.ofNullable(aggregationResults.get("doc_count").toString()); + docCount.ifPresentOrElse( + s -> nodeData.put("document_count", s), () -> nodeData.put("document_count", null)); + + Datum datum = new Datum(); + for (Map.Entry entry : nodeData.entrySet()) { + datum.withAdditionalProperty(entry.getKey(), entry.getValue()); + } + + reportData.add(datum); + } + + private static void handleLeafMetricsAggregation( + JsonObject aggregationResults, + List reportData, + Map nodeData, + String metric) { + Optional val = Optional.ofNullable(aggregationResults.getString("value_as_string")); + val.ifPresentOrElse(s -> nodeData.put(metric, s), () -> nodeData.put(metric, null)); + + Datum datum = new Datum(); + for (Map.Entry entry : nodeData.entrySet()) { + datum.withAdditionalProperty(entry.getKey(), entry.getValue()); + } + + reportData.add(datum); + } + + /* + * Get the metadata for the aggregation results. We'll use the metadata to build the report and + * to traverse the aggregation tree. 3 types of metadata are returned: + * 1. dimensions: the list of dimensions + * 2. metrics: the list of metrics + * 3. keys: the list of keys to traverse the aggregation tree + * + * @param aggregationMapList the list of aggregations + * @return the metadata + */ + private static DataQualityReportMetadata getAggregationMetadata( + List>> aggregationMapList) { + DataQualityReportMetadata metadata = new DataQualityReportMetadata(); + List dimensions = new ArrayList<>(); + List metrics = new ArrayList<>(); + List keys = new ArrayList<>(); + + for (List> aggregationsMap : aggregationMapList) { + for (int j = 0; j < aggregationsMap.size(); j++) { + Map aggregationMap = aggregationsMap.get(j); + String aggType = aggregationMap.get("aggType"); + String field = aggregationMap.get("field"); + + boolean isLeaf = j == aggregationsMap.size() - 1; + if (isLeaf) { + // leaf aggregation + if (!aggType.contains("term")) { + metrics.add(field); + } else { + dimensions.add(field); + metrics.add("document_count"); + } + } else { + dimensions.add(field); + } + String formattedAggType = aggType.contains("term") ? "s%s".formatted(aggType) : aggType; + keys.add("%s#%s".formatted(formattedAggType, aggregationMap.get("bucketName"))); + } + } + + metadata.withKeys(keys).withDimensions(dimensions).withMetrics(metrics); + + return metadata; + } + + /* + * Traverse the aggregation results and build the report data. Note that the method supports + * n levels of nested aggregations, but does not support sibling aggregations. + * + * @Param aggregationResults the aggregation results + * @Param reportData the report data + * @Param nodeData the node data + * @Param keys the keys to traverse the aggregation tree + * @Param metric the metric to add to the report data + * @Param dimensions the dimensions to add to the report data + * @return the report data + */ + private static void traverseAggregationResults( + JsonObject aggregationResults, + List reportData, + Map nodeData, + List keys, + String metric, + List dimensions) { + + if (keys.isEmpty()) { + // We are in the leaf of the term aggregation. We'll add the count of documents as the metric + handleLeafTermsAggregation(aggregationResults, reportData, nodeData); + return; + } + + String currentKey = + keys.get(0); // The current key represent the node in the aggregation tree (i.e. the current + // bucket) + Optional aggregation = + Optional.ofNullable(SearchClient.getAggregationObject(aggregationResults, currentKey)); + + aggregation.ifPresent( + agg -> { + Optional buckets = + Optional.ofNullable(SearchClient.getAggregationBuckets(agg)); + if (buckets.isEmpty()) { + // If the current node in the aggregation tree does not have further bucket + // it means we are in the leaf of the metric aggregation. We'll add the metric + handleLeafMetricsAggregation(agg, reportData, nodeData, metric); + } else { + buckets + .get() + .forEach( + bucket -> { + JsonObject bucketObject = (JsonObject) bucket; + Optional bucketKey = Optional.of(bucketObject.getString("key")); + + bucketKey.ifPresentOrElse( + s -> nodeData.put(dimensions.get(0), s), + () -> nodeData.put(dimensions.get(0), null)); + + // Traverse the next level of the aggregation tree. + // Dimensions and keys represent the same level in the tree. + // They are used for different purpose (i.e. dimensions are used to + // generate the report while the keys are used to traverse the aggregation + // tree) + traverseAggregationResults( + bucketObject, + reportData, + nodeData, + keys.subList(1, keys.size()), + metric, + dimensions.subList(1, dimensions.size())); + }); + } + }); + } + + public static DataQualityReport parseAggregationResults( + Optional aggregationResults, List>> aggregationMapList) { + DataQualityReportMetadata metadata = getAggregationMetadata(aggregationMapList); + List reportData = new ArrayList<>(); + + aggregationResults.ifPresent( + jsonObject -> + traverseAggregationResults( + jsonObject, + reportData, + new HashMap<>(), + metadata.getKeys(), + metadata.getMetrics().get(0), + metadata.getDimensions())); + + DataQualityReport report = new DataQualityReport(); + return report.withMetadata(metadata).withData(reportData); + } + + /* + * Build the aggregation string for the given aggregation + * + * @param aggregation the aggregation to build the string for. + * The aggregation string is in the form + * `bucketName:aggType:key=value,bucketName:aggType:key=value;bucketName:aggType:key=value` + * where `,` represents a nested aggregation and `;` represents a sibling aggregation + * NOTE: As of 07/25/2024 sibling aggregation parsing and processing has not been added + * @return the aggregation string + */ + public static Map buildAggregationString(String aggregation) { + Map metadata = new HashMap<>(); + + StringBuilder aggregationString = new StringBuilder(); + String[] siblings = aggregation.split(";"); + List>> aggregationsMapList = new ArrayList<>(); + + for (String sibling : siblings) { + List> aggregationsMap = new ArrayList<>(); + String[] nested = sibling.split(","); + for (int i = 0; i < nested.length; i++) { + Map aggregationMap = new HashMap<>(); + String[] parts = nested[i].split(":"); + + for (int j = 0; j < parts.length; j++) { + String part = parts[j]; + String[] kvPairs = part.split("="); + if (kvPairs[0].equals("field")) { + aggregationString + .append("\"") + .append(kvPairs[0]) + .append("\":\"") + .append(kvPairs[1]) + .append("\""); + aggregationString.append("}"); + } else { + aggregationString.append("\"").append(kvPairs[1]).append("\":{"); + } + aggregationMap.put(kvPairs[0], kvPairs[1]); + } + if (i < nested.length - 1) { + aggregationString.append(",\"aggs\":{"); + } + aggregationsMap.add(aggregationMap); + } + // nested aggregations will add the "aggs" key if nested.length > 1, hence *2 + aggregationString.append("}".repeat(((nested.length - 1) * 2) + 1)); + aggregationsMapList.add(aggregationsMap); + } + metadata.put("aggregationStr", aggregationString.toString()); + metadata.put("aggregationMapList", aggregationsMapList); + return metadata; + } + public static List parseTags(List tags) { if (tags == null) { return Collections.emptyList(); diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java index e57e58831ea..dbe76a846d6 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRepository.java @@ -56,6 +56,7 @@ import org.openmetadata.schema.EntityTimeSeriesInterface; import org.openmetadata.schema.analytics.ReportData; import org.openmetadata.schema.dataInsight.DataInsightChartResult; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.tests.TestSuite; import org.openmetadata.schema.type.ChangeDescription; import org.openmetadata.schema.type.EntityReference; @@ -765,6 +766,11 @@ public class SearchRepository { return searchClient.aggregate(query, index, aggregationJson); } + public DataQualityReport genericAggregation( + String query, String index, Map aggregationMetadata) throws IOException { + return searchClient.genericAggregation(query, index, aggregationMetadata); + } + public Response suggest(SearchRequest request) throws IOException { return searchClient.suggest(request); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java index 33b2d4a408a..81ed8bdb309 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java @@ -89,6 +89,7 @@ import es.org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramIn import es.org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; import es.org.elasticsearch.search.aggregations.bucket.terms.Terms; import es.org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import es.org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; import es.org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; import es.org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; import es.org.elasticsearch.search.builder.SearchSourceBuilder; @@ -117,6 +118,7 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -138,6 +140,7 @@ import org.openmetadata.schema.DataInsightInterface; import org.openmetadata.schema.dataInsight.DataInsightChartResult; import org.openmetadata.schema.entity.data.EntityHierarchy__1; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.Include; import org.openmetadata.sdk.exception.SearchException; @@ -146,6 +149,7 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.dataInsight.DataInsightAggregatorInterface; import org.openmetadata.service.jdbi3.DataInsightChartRepository; import org.openmetadata.service.search.SearchClient; +import org.openmetadata.service.search.SearchIndexUtils; import org.openmetadata.service.search.SearchRequest; import org.openmetadata.service.search.SearchSortFilter; import org.openmetadata.service.search.UpdateSearchEventsConstant; @@ -955,6 +959,12 @@ public class ElasticSearchClient implements SearchClient { AggregationBuilders.terms(key).field(termAggregation.getString("field")); aggregationBuilders.add(termsAggregationBuilder); break; + case "avg": + JsonObject avgAggregation = aggregation.getJsonObject(aggregationType); + AvgAggregationBuilder avgAggregationBuilder = + AggregationBuilders.avg(key).field(avgAggregation.getString("field")); + aggregationBuilders.add(avgAggregationBuilder); + break; case "nested": JsonObject nestedAggregation = aggregation.getJsonObject("nested"); AggregationBuilder nestedAggregationBuilder = @@ -992,6 +1002,45 @@ public class ElasticSearchClient implements SearchClient { return aggregationBuilders; } + @Override + public DataQualityReport genericAggregation( + String query, String index, Map aggregationMetadata) throws IOException { + String aggregationStr = (String) aggregationMetadata.get("aggregationStr"); + JsonObject aggregationObj = JsonUtils.readJson("{%s}".formatted(aggregationStr)).asJsonObject(); + List aggregationBuilder = buildAggregation(aggregationObj); + + // Create search request + es.org.elasticsearch.action.search.SearchRequest searchRequest = + new es.org.elasticsearch.action.search.SearchRequest( + Entity.getSearchRepository().getIndexOrAliasName(index)); + + // Create search source builder + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + if (query != null) { + XContentParser queryParser = + XContentType.JSON + .xContent() + .createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, query); + QueryBuilder parsedQuery = SearchSourceBuilder.fromXContent(queryParser).query(); + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(parsedQuery); + searchSourceBuilder.query(boolQueryBuilder); + } + searchSourceBuilder.size(0).timeout(new TimeValue(30, TimeUnit.SECONDS)); + + for (AggregationBuilder aggregation : aggregationBuilder) { + searchSourceBuilder.aggregation(aggregation); + } + + searchRequest.source(searchSourceBuilder); + String response = client.search(searchRequest, RequestOptions.DEFAULT).toString(); + JsonObject jsonResponse = JsonUtils.readJson(response).asJsonObject(); + Optional aggregationResults = + Optional.ofNullable(jsonResponse.getJsonObject("aggregations")); + return SearchIndexUtils.parseAggregationResults( + aggregationResults, + (List>>) aggregationMetadata.get("aggregationMapList")); + } + @Override public JsonObject aggregate(String query, String index, JsonObject aggregationJson) throws IOException { diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java index 73caff3955d..90ad6d72585 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/indexes/TestCaseIndex.java @@ -54,6 +54,7 @@ public record TestCaseIndex(TestCase testCase) implements SearchIndex { doc.put("testPlatforms", testDefinition.getTestPlatforms()); doc.put("dataQualityDimension", testDefinition.getDataQualityDimension()); doc.put("followers", SearchIndexUtils.parseFollowers(testCase.getFollowers())); + doc.put("testCaseType", testDefinition.getEntityType()); setParentRelationships(doc, testCase); return doc; } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java index a85f8f03bfe..a41d9f2f62b 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java @@ -43,6 +43,7 @@ import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -64,6 +65,7 @@ import org.openmetadata.schema.DataInsightInterface; import org.openmetadata.schema.dataInsight.DataInsightChartResult; import org.openmetadata.schema.entity.data.EntityHierarchy__1; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.Include; import org.openmetadata.sdk.exception.SearchException; @@ -72,6 +74,7 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.dataInsight.DataInsightAggregatorInterface; import org.openmetadata.service.jdbi3.DataInsightChartRepository; import org.openmetadata.service.search.SearchClient; +import org.openmetadata.service.search.SearchIndexUtils; import org.openmetadata.service.search.SearchRequest; import org.openmetadata.service.search.SearchSortFilter; import org.openmetadata.service.search.indexes.ContainerIndex; @@ -1006,6 +1009,45 @@ public class OpenSearchClient implements SearchClient { return aggregationBuilders; } + @Override + public DataQualityReport genericAggregation( + String query, String index, Map aggregationMetadata) throws IOException { + String aggregationStr = (String) aggregationMetadata.get("aggregationStr"); + JsonObject aggregationObj = JsonUtils.readJson("{%s}".formatted(aggregationStr)).asJsonObject(); + List aggregationBuilder = buildAggregation(aggregationObj); + + // Create search request + os.org.opensearch.action.search.SearchRequest searchRequest = + new os.org.opensearch.action.search.SearchRequest( + Entity.getSearchRepository().getIndexOrAliasName(index)); + + // Create search source builder + SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); + if (query != null) { + XContentParser queryParser = + XContentType.JSON + .xContent() + .createParser(X_CONTENT_REGISTRY, LoggingDeprecationHandler.INSTANCE, query); + QueryBuilder parsedQuery = SearchSourceBuilder.fromXContent(queryParser).query(); + BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery().must(parsedQuery); + searchSourceBuilder.query(boolQueryBuilder); + } + searchSourceBuilder.size(0).timeout(new TimeValue(30, TimeUnit.SECONDS)); + + for (AggregationBuilder aggregation : aggregationBuilder) { + searchSourceBuilder.aggregation(aggregation); + } + + searchRequest.source(searchSourceBuilder); + String response = client.search(searchRequest, RequestOptions.DEFAULT).toString(); + JsonObject jsonResponse = JsonUtils.readJson(response).asJsonObject(); + Optional aggregationResults = + Optional.ofNullable(jsonResponse.getJsonObject("aggregations")); + return SearchIndexUtils.parseAggregationResults( + aggregationResults, + (List>>) aggregationMetadata.get("aggregationMapList")); + } + @Override public JsonObject aggregate(String query, String index, JsonObject aggregationJson) throws IOException { diff --git a/openmetadata-service/src/main/resources/elasticsearch/en/test_case_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/en/test_case_index_mapping.json index a67086f5e7b..b6017cc1161 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/en/test_case_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/en/test_case_index_mapping.json @@ -152,20 +152,14 @@ "description": { "type": "text" }, - "entityType": { + "type": { "type": "keyword" - }, - "testPlatforms": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } } } }, + "testCaseType": { + "type": "keyword" + }, "entityType": { "type": "keyword" }, diff --git a/openmetadata-service/src/main/resources/elasticsearch/jp/test_case_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/jp/test_case_index_mapping.json index 8ee7acfc215..88de845a924 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/jp/test_case_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/jp/test_case_index_mapping.json @@ -175,20 +175,14 @@ "type": "text", "analyzer": "om_analyzer_jp" }, - "entityType": { + "type": { "type": "keyword" - }, - "testPlatforms": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } } } }, + "testCaseType": { + "type": "keyword" + }, "entityType": { "type": "keyword" }, diff --git a/openmetadata-service/src/main/resources/elasticsearch/zh/test_case_index_mapping.json b/openmetadata-service/src/main/resources/elasticsearch/zh/test_case_index_mapping.json index 097d105b569..b805b9e7832 100644 --- a/openmetadata-service/src/main/resources/elasticsearch/zh/test_case_index_mapping.json +++ b/openmetadata-service/src/main/resources/elasticsearch/zh/test_case_index_mapping.json @@ -198,20 +198,14 @@ } } }, - "entityType": { + "type": { "type": "keyword" - }, - "testPlatforms": { - "type": "text", - "fields": { - "keyword": { - "type": "keyword", - "ignore_above": 256 - } - } } } }, + "testCaseType": { + "type": "keyword" + }, "entityType": { "type": "keyword" }, diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/searchindex/SearchIndexResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/searchindex/SearchIndexResourceTest.java index 6df4ed92a21..4cf9fc80410 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/searchindex/SearchIndexResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/searchindex/SearchIndexResourceTest.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.openmetadata.common.utils.CommonUtil.listOf; import static org.openmetadata.service.Entity.FIELD_OWNER; import static org.openmetadata.service.Entity.TAG; +import static org.openmetadata.service.Entity.getSearchRepository; import static org.openmetadata.service.util.EntityUtil.fieldAdded; import static org.openmetadata.service.util.EntityUtil.fieldUpdated; import static org.openmetadata.service.util.TestUtils.ADMIN_AUTH_HEADERS; @@ -49,6 +50,7 @@ import org.openmetadata.schema.api.data.CreateSearchIndex; import org.openmetadata.schema.api.services.CreateSearchService; import org.openmetadata.schema.entity.data.SearchIndex; import org.openmetadata.schema.entity.services.SearchService; +import org.openmetadata.schema.tests.DataQualityReport; import org.openmetadata.schema.type.ChangeDescription; import org.openmetadata.schema.type.EntityReference; import org.openmetadata.schema.type.SearchIndexDataType; @@ -59,6 +61,8 @@ import org.openmetadata.service.Entity; import org.openmetadata.service.exception.CatalogExceptionMessage; import org.openmetadata.service.resources.EntityResourceTest; import org.openmetadata.service.resources.services.SearchServiceResourceTest; +import org.openmetadata.service.search.SearchIndexUtils; +import org.openmetadata.service.search.SearchRepository; import org.openmetadata.service.util.JsonUtils; import org.openmetadata.service.util.ResultList; import org.openmetadata.service.util.TestUtils; @@ -408,6 +412,107 @@ public class SearchIndexResourceTest extends EntityResourceTest actualAggregationstring = + SearchIndexUtils.buildAggregationString(aggregationString); + assertEquals(expectedAggregationString, actualAggregationstring.get("aggregationStr")); + + // Nested Aggregation (1 level) + aggregationString = + "bucketName=entityLinks:aggType=terms:field=entityLinks.nonNormalized,bucketName=status_counts:aggType=terms:field=testCaseResults.testCaseStatus"; + expectedAggregationString = + "\"entityLinks\":{\"terms\":{\"field\":\"entityLinks.nonNormalized\"},\"aggs\":{\"status_counts\":{\"terms\":{\"field\":\"testCaseResults.testCaseStatus\"}}}}"; + actualAggregationstring = SearchIndexUtils.buildAggregationString(aggregationString); + assertEquals(expectedAggregationString, actualAggregationstring.get("aggregationStr")); + + // Nested Aggregation (2 levels) + aggregationString = + "bucketName=entityLinks:aggType=terms:field=entityLinks.nonNormalized,bucketName=statusCount:aggType=terms:field=testCaseResults.testCaseStatus,bucketName=owner:aggType=terms:field=testSuite.owner"; + expectedAggregationString = + "\"entityLinks\":{\"terms\":{\"field\":\"entityLinks.nonNormalized\"},\"aggs\":{\"statusCount\":{\"terms\":{\"field\":\"testCaseResults.testCaseStatus\"},\"aggs\":{\"owner\":{\"terms\":{\"field\":\"testSuite.owner\"}}}}}}"; + actualAggregationstring = SearchIndexUtils.buildAggregationString(aggregationString); + assertEquals(expectedAggregationString, actualAggregationstring.get("aggregationStr")); + + // Metric Aggregation + aggregationString = + "bucketName=entityLinks:aggType=terms:field=entityLinks.nonNormalized,bucketName=minPrice:aggType=min:field=price.adjusted"; + actualAggregationstring = SearchIndexUtils.buildAggregationString(aggregationString); + expectedAggregationString = + "\"entityLinks\":{\"terms\":{\"field\":\"entityLinks.nonNormalized\"},\"aggs\":{\"minPrice\":{\"min\":{\"field\":\"price.adjusted\"}}}}"; + assertEquals(expectedAggregationString, actualAggregationstring.get("aggregationStr")); + } + + @Test + void testNewAggregation(TestInfo testInfo) throws IOException { + DataQualityReport dataQualityReport = new DataQualityReport(); + SearchRepository searchRepository = getSearchRepository(); + String query = + "{\"query\":{\"bool\":{\"should\":[{\"wildcard\":{\"fullyQualifiedName\":{\"value\":\"*tableForExecutableTestSuite\"}}},{\"wildcard\":{\"fullyQualifiedName\":{\"value\":\"*tableForExecutableTestSuiteTwo\"}}}]}}}"; + + String aggregationQuery = + "bucketName=fqn:aggType=terms:field=fullyQualifiedName,bucketName=avgTime:aggType=avg:field=updatedAt"; + Map aggregationString = + SearchIndexUtils.buildAggregationString(aggregationQuery); + dataQualityReport = searchRepository.genericAggregation(query, "table", aggregationString); + dataQualityReport + .getData() + .forEach( + (datum) -> { + Map m = datum.getAdditionalProperties(); + assertTrue(m.keySet().containsAll(List.of("fullyQualifiedName", "updatedAt"))); + }); + + aggregationQuery = + "bucketName=fqn:aggType=terms:field=fullyQualifiedName,bucketName=owner:aggType=terms:field=owner.name"; + aggregationString = SearchIndexUtils.buildAggregationString(aggregationQuery); + dataQualityReport = searchRepository.genericAggregation(query, "table", aggregationString); + dataQualityReport + .getData() + .forEach( + (datum) -> { + Map m = datum.getAdditionalProperties(); + assertTrue(m.keySet().containsAll(List.of("fullyQualifiedName", "owner.name"))); + }); + + aggregationQuery = + "bucketName=fqn:aggType=terms:field=fullyQualifiedName,bucketName=owner:aggType=terms:field=owner.name,bucketName=avgTime:aggType=avg:field=updatedAt"; + aggregationString = SearchIndexUtils.buildAggregationString(aggregationQuery); + dataQualityReport = searchRepository.genericAggregation(query, "table", aggregationString); + dataQualityReport + .getData() + .forEach( + (datum) -> { + Map m = datum.getAdditionalProperties(); + assertTrue( + m.keySet().containsAll(List.of("fullyQualifiedName", "owner.name", "updatedAt"))); + }); + + aggregationQuery = "bucketName=avgTime:aggType=avg:field=updatedAt"; + aggregationString = SearchIndexUtils.buildAggregationString(aggregationQuery); + dataQualityReport = searchRepository.genericAggregation(query, "table", aggregationString); + dataQualityReport + .getData() + .forEach( + (datum) -> { + Map m = datum.getAdditionalProperties(); + assertTrue(m.keySet().containsAll(List.of("updatedAt"))); + }); + + aggregationQuery = "bucketName=fqn:aggType=terms:field=fullyQualifiedName"; + aggregationString = SearchIndexUtils.buildAggregationString(aggregationQuery); + dataQualityReport = searchRepository.genericAggregation(query, "table", aggregationString); + dataQualityReport + .getData() + .forEach( + (datum) -> { + Map m = datum.getAdditionalProperties(); + assertTrue(m.keySet().containsAll(List.of("fullyQualifiedName"))); + }); + } + @Override public SearchIndex validateGetWithDifferentFields(SearchIndex searchIndex, boolean byName) throws HttpResponseException { diff --git a/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityReport.json b/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityReport.json new file mode 100644 index 00000000000..5f468a9f625 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/tests/dataQualityReport.json @@ -0,0 +1,54 @@ +{ + "$id": "https://open-metadata.org/schema/tests/dataQualityReport.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DataQualityReport", + "description": "Data Quality report and aggregation model.", + "type": "object", + "javaType": "org.openmetadata.schema.tests.DataQualityReport", + "definitions": { + "dataQualityReportMetadata": { + "description": "Schema to capture data quality reports and aggregation data.", + "javaType": "org.openmetadata.schema.tests.type.DataQualityReportMetadata", + "type": "object", + "properties": { + "keys": { + "description": "Keys to identify the data quality report.", + "type": "array", + "items": { + "type": "string" + } + }, + "dimensions": { + "description": "Dimensions to capture the data quality report.", + "type": "array", + "items": { + "type": "string" + } + }, + "metrics": { + "description": "Metrics to capture the data quality report.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "properties": { + "metadata": { + "description": "Metadata for the data quality report.", + "$ref": "#/definitions/dataQualityReportMetadata" + }, + "data": { + "description": "Data for the data quality report.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": {"type": "string"} + } + } + }, + "required": ["metadata", "data"], + "additionalProperties": false +} \ No newline at end of file