From 45bae608bdcd67980964a0cc0e94822296e7a596 Mon Sep 17 00:00:00 2001 From: sonika-shah <58761340+sonika-shah@users.noreply.github.com> Date: Thu, 18 Apr 2024 14:22:18 +0530 Subject: [PATCH] Fixes #10724 : Search with hierarchy api for glossary terms (#15865) * Fixes #10724 : New search with hierarchy api for glossary terms * Added query param in existing search api - to search with hierarchy * Created new json to manage hierarchy and other updates --- .../resources/search/SearchResource.java | 9 +- .../service/search/SearchRequest.java | 13 +++ .../elasticsearch/ElasticSearchClient.java | 107 ++++++++++++++++-- .../search/opensearch/OpenSearchClient.java | 104 +++++++++++++++-- .../service/resources/EntityResourceTest.java | 10 ++ .../glossary/GlossaryTermResourceTest.java | 47 ++++++++ .../json/schema/type/entityHierarchy.json | 75 ++++++++++++ 7 files changed, 346 insertions(+), 19 deletions(-) create mode 100644 openmetadata-spec/src/main/resources/json/schema/type/entityHierarchy.json diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java b/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java index 4d4a28cc78d..35ca8fceff7 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/resources/search/SearchResource.java @@ -150,7 +150,13 @@ public class SearchResource { description = "Get only selected fields of the document body for each hit. Empty value will return all fields") @QueryParam("include_source_fields") - List includeSourceFields) + List includeSourceFields, + @Parameter( + description = + "Fetch search results in hierarchical order of children elements. By default hierarchy is not fetched.") + @DefaultValue("false") + @QueryParam("getHierarchy") + boolean getHierarchy) throws IOException { if (nullOrEmpty(query)) { @@ -169,6 +175,7 @@ public class SearchResource { .deleted(deleted) .sortOrder(sortOrder) .includeSourceFields(includeSourceFields) + .getHierarchy(getHierarchy) .build(); return searchRepository.search(request); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRequest.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRequest.java index 5d232a61b83..4f3a60a71b8 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRequest.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/SearchRequest.java @@ -17,6 +17,8 @@ public class SearchRequest { private final String sortOrder; private final List includeSourceFields; + private final boolean getHierarchy; + public SearchRequest(ElasticSearchRequestBuilder builder) { this.query = builder.query; this.from = builder.from; @@ -31,6 +33,7 @@ public class SearchRequest { this.sortOrder = builder.sortOrder; this.includeSourceFields = builder.includeSourceFields; this.fieldName = builder.fieldName; + this.getHierarchy = builder.getHierarchy; } // Getters for the attributes @@ -87,6 +90,10 @@ public class SearchRequest { return includeSourceFields; } + public boolean getHierarchy() { + return getHierarchy; + } + // Builder class for ElasticSearchRequest public static class ElasticSearchRequestBuilder { @@ -103,6 +110,7 @@ public class SearchRequest { private boolean deleted; private String sortOrder; private List includeSourceFields; + private boolean getHierarchy; public ElasticSearchRequestBuilder(String query, int size, String index) { this.query = query; @@ -160,6 +168,11 @@ public class SearchRequest { return this; } + public ElasticSearchRequestBuilder getHierarchy(boolean getHierarchy) { + this.getHierarchy = getHierarchy; + return this; + } + public SearchRequest build() { return new SearchRequest(this); } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java index a28ea70b6f9..162402cef79 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchClient.java @@ -21,6 +21,7 @@ import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.SCHEMA_FIELD_NAMES; import static org.openmetadata.service.search.EntityBuilderConstant.UNIFIED; import static org.openmetadata.service.search.UpdateSearchEventsConstant.SENDING_REQUEST_TO_ELASTIC_SEARCH; +import static org.openmetadata.service.util.FullyQualifiedName.getParentFQN; import com.fasterxml.jackson.databind.JsonNode; import es.org.elasticsearch.ElasticsearchStatusException; @@ -105,10 +106,12 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; import javax.json.JsonObject; @@ -125,6 +128,7 @@ import org.apache.http.impl.client.BasicCredentialsProvider; import org.openmetadata.common.utils.CommonUtil; import org.openmetadata.schema.DataInsightInterface; import org.openmetadata.schema.dataInsight.DataInsightChartResult; +import org.openmetadata.schema.entity.data.EntityHierarchy__1; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; import org.openmetadata.sdk.exception.SearchException; import org.openmetadata.sdk.exception.SearchIndexNotFoundException; @@ -365,7 +369,7 @@ public class ElasticSearchClient implements SearchClient { .must(QueryBuilders.termQuery("deleted", request.deleted()))); } - if (!nullOrEmpty(request.getSortFieldParam())) { + if (!nullOrEmpty(request.getSortFieldParam()) && !request.getHierarchy()) { searchSourceBuilder.sort( request.getSortFieldParam(), SortOrder.fromString(request.getSortOrder())); } @@ -375,6 +379,12 @@ public class ElasticSearchClient implements SearchClient { QueryBuilders.boolQuery() .must(searchSourceBuilder.query()) .must(QueryBuilders.matchQuery("status", "Approved"))); + + if (request.getHierarchy()) { + searchSourceBuilder.sort( + SortBuilders.fieldSort("fullyQualifiedName") + .order(SortOrder.ASC)); // to get correct hierarchy of terms + } } /* for performance reasons ElasticSearch doesn't provide accurate hits @@ -396,14 +406,21 @@ public class ElasticSearchClient implements SearchClient { searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS)); try { - String response = - client - .search( - new es.org.elasticsearch.action.search.SearchRequest(request.getIndex()) - .source(searchSourceBuilder), - RequestOptions.DEFAULT) - .toString(); - return Response.status(OK).entity(response).build(); + + SearchResponse searchResponse = + client.search( + new es.org.elasticsearch.action.search.SearchRequest(request.getIndex()) + .source(searchSourceBuilder), + RequestOptions.DEFAULT); + + if (!request.getHierarchy()) { + return Response.status(OK).entity(searchResponse.toString()).build(); + } else { + // Build the nested hierarchy from elastic search response + List response = buildSearchHierarchy(request, searchResponse); + return Response.status(OK).entity(response).build(); + } + } catch (ElasticsearchStatusException e) { if (e.status() == RestStatus.NOT_FOUND) { throw new SearchIndexNotFoundException( @@ -414,6 +431,78 @@ public class ElasticSearchClient implements SearchClient { } } + public List buildSearchHierarchy(SearchRequest request, SearchResponse searchResponse) { + List response = new ArrayList<>(); + if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) { + response = buildGlossaryTermSearchHierarchy(searchResponse); + } + return response; + } + + public List buildGlossaryTermSearchHierarchy(SearchResponse searchResponse) { + Map termMap = + new LinkedHashMap<>(); // termMap represent glossary terms + Map rootTerms = + new LinkedHashMap<>(); // rootTerms represent glossaries + + for (var hit : searchResponse.getHits().getHits()) { + Map hitSourceMap = new HashMap<>(JsonUtils.getMap(hit.getSourceAsMap())); + + EntityHierarchy__1 term = extractHierarchyTermFromMap(hitSourceMap); + Map glossaryInfo = (Map) hitSourceMap.get("glossary"); + + if (glossaryInfo != null) { + EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(glossaryInfo); + rootTerms.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm); + } else { + Map parentInfo = (Map) hitSourceMap.get("parent"); + EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(parentInfo); + termMap.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm); + } + + termMap.putIfAbsent(term.getFullyQualifiedName(), term); + } + + termMap.putAll(rootTerms); + + termMap + .values() + .forEach( + term -> { + String parentFQN = getParentFQN(term.getFullyQualifiedName()); + String termFQN = term.getFullyQualifiedName(); + + if (parentFQN != null && termMap.containsKey(parentFQN)) { + EntityHierarchy__1 parentTerm = termMap.get(parentFQN); + List children = parentTerm.getChildren(); + children.add(term); + parentTerm.setChildren(children); + } else { + if (rootTerms.containsKey(termFQN)) { + EntityHierarchy__1 rootTerm = rootTerms.get(termFQN); + rootTerm.setChildren(term.getChildren()); + } + } + }); + + return new ArrayList<>(rootTerms.values()); + } + + private EntityHierarchy__1 extractHierarchyTermFromMap(Map termInfo) { + EntityHierarchy__1 term = new EntityHierarchy__1(); + if (termInfo != null) { + term.setId(UUID.fromString(termInfo.get("id").toString())); + term.setName(termInfo.get("name").toString()); + term.setDisplayName( + termInfo.get("displayName") != null + ? termInfo.get("displayName").toString() + : termInfo.get("name").toString()); + term.setFullyQualifiedName(termInfo.get("fullyQualifiedName").toString()); + term.setChildren(new ArrayList<>()); + } + return term; + } + @Override public SearchResultListMapper listWithOffset( String filter, diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java index 9d8c9c469ac..ed2a9a8941d 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchClient.java @@ -21,6 +21,7 @@ import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.SCHEMA_FIELD_NAMES; import static org.openmetadata.service.search.EntityBuilderConstant.UNIFIED; import static org.openmetadata.service.search.UpdateSearchEventsConstant.SENDING_REQUEST_TO_ELASTIC_SEARCH; +import static org.openmetadata.service.util.FullyQualifiedName.getParentFQN; import com.fasterxml.jackson.databind.JsonNode; import es.org.elasticsearch.index.IndexNotFoundException; @@ -31,10 +32,12 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.UUID; import java.util.concurrent.TimeUnit; import java.util.stream.Stream; import javax.json.JsonObject; @@ -51,6 +54,7 @@ import org.apache.http.impl.client.BasicCredentialsProvider; import org.openmetadata.common.utils.CommonUtil; import org.openmetadata.schema.DataInsightInterface; import org.openmetadata.schema.dataInsight.DataInsightChartResult; +import org.openmetadata.schema.entity.data.EntityHierarchy__1; import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration; import org.openmetadata.sdk.exception.SearchException; import org.openmetadata.sdk.exception.SearchIndexNotFoundException; @@ -362,16 +366,22 @@ public class OpenSearchClient implements SearchClient { .must(QueryBuilders.termQuery("deleted", request.deleted()))); } - if (!nullOrEmpty(request.getSortFieldParam())) { + if (!nullOrEmpty(request.getSortFieldParam()) && !request.getHierarchy()) { searchSourceBuilder.sort( request.getSortFieldParam(), SortOrder.fromString(request.getSortOrder())); } if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) { + searchSourceBuilder.query( QueryBuilders.boolQuery() .must(searchSourceBuilder.query()) .must(QueryBuilders.matchQuery("status", "Approved"))); + if (request.getHierarchy()) { + searchSourceBuilder.sort( + SortBuilders.fieldSort("fullyQualifiedName") + .order(SortOrder.ASC)); // to get correct hierarchy of terms + } } /* for performance reasons OpenSearch doesn't provide accurate hits @@ -393,20 +403,96 @@ public class OpenSearchClient implements SearchClient { searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS)); try { - String response = - client - .search( - new os.org.opensearch.action.search.SearchRequest(request.getIndex()) - .source(searchSourceBuilder), - RequestOptions.DEFAULT) - .toString(); - return Response.status(OK).entity(response).build(); + SearchResponse searchResponse = + client.search( + new os.org.opensearch.action.search.SearchRequest(request.getIndex()) + .source(searchSourceBuilder), + RequestOptions.DEFAULT); + if (!request.getHierarchy()) { + return Response.status(OK).entity(searchResponse.toString()).build(); + } else { + // Build the nested hierarchy from elastic search response + List response = buildSearchHierarchy(request, searchResponse); + return Response.status(OK).entity(response).build(); + } } catch (IndexNotFoundException e) { throw new SearchIndexNotFoundException( String.format("Failed to to find index %s", request.getIndex())); } } + public List buildSearchHierarchy(SearchRequest request, SearchResponse searchResponse) { + List response = new ArrayList<>(); + if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) { + response = buildGlossaryTermSearchHierarchy(searchResponse); + } + return response; + } + + public List buildGlossaryTermSearchHierarchy(SearchResponse searchResponse) { + Map termMap = + new LinkedHashMap<>(); // termMap represent glossary terms + Map rootTerms = + new LinkedHashMap<>(); // rootTerms represent glossaries + + for (var hit : searchResponse.getHits().getHits()) { + Map hitSourceMap = new HashMap<>(JsonUtils.getMap(hit.getSourceAsMap())); + + EntityHierarchy__1 term = extractHierarchyTermFromMap(hitSourceMap); + Map glossaryInfo = (Map) hitSourceMap.get("glossary"); + + if (glossaryInfo != null) { + EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(glossaryInfo); + rootTerms.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm); + } else { + Map parentInfo = (Map) hitSourceMap.get("parent"); + EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(parentInfo); + termMap.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm); + } + + termMap.putIfAbsent(term.getFullyQualifiedName(), term); + } + + termMap.putAll(rootTerms); + + termMap + .values() + .forEach( + term -> { + String parentFQN = getParentFQN(term.getFullyQualifiedName()); + String termFQN = term.getFullyQualifiedName(); + + if (parentFQN != null && termMap.containsKey(parentFQN)) { + EntityHierarchy__1 parentTerm = termMap.get(parentFQN); + List children = parentTerm.getChildren(); + children.add(term); + parentTerm.setChildren(children); + } else { + if (rootTerms.containsKey(termFQN)) { + EntityHierarchy__1 rootTerm = rootTerms.get(termFQN); + rootTerm.setChildren(term.getChildren()); + } + } + }); + + return new ArrayList<>(rootTerms.values()); + } + + private EntityHierarchy__1 extractHierarchyTermFromMap(Map termInfo) { + EntityHierarchy__1 term = new EntityHierarchy__1(); + if (termInfo != null) { + term.setId(UUID.fromString(termInfo.get("id").toString())); + term.setName(termInfo.get("name").toString()); + term.setDisplayName( + termInfo.get("displayName") != null + ? termInfo.get("displayName").toString() + : termInfo.get("name").toString()); + term.setFullyQualifiedName(termInfo.get("fullyQualifiedName").toString()); + term.setChildren(new ArrayList<>()); + } + return term; + } + @Override public SearchResultListMapper listWithOffset( String filter, diff --git a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java index e9260be7526..07a7f0aa2d4 100644 --- a/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java +++ b/openmetadata-service/src/test/java/org/openmetadata/service/resources/EntityResourceTest.java @@ -2341,6 +2341,16 @@ public abstract class EntityResourceTest glossaries = JsonUtils.readObjects(response, EntityHierarchy__1.class); + boolean isChild = + glossaries.stream() + .filter(glossary -> "g1".equals(glossary.getName())) // Find glossary with name "g1" + .findFirst() + .map( + g1Glossary -> + g1Glossary.getChildren().stream() // Work with this glossary's children + .filter( + glossary -> + "parentGlossaryTerm" + .equals(glossary.getName())) // Find the specific parent term + .flatMap( + glossary -> + glossary + .getChildren() + .stream()) // Flatten the stream of children terms + .anyMatch( + term -> + "childGlossaryTerm" + .equals( + term.getName()))) // Check if the specific child term exists + .orElse(false); // Return false if no glossary named "g1" was found + + assertTrue(isChild, "childGlossaryTerm should be a child of parentGlossaryTerm"); + } + public GlossaryTerm createTerm(Glossary glossary, GlossaryTerm parent, String termName) throws IOException { return createTerm(glossary, parent, termName, glossary.getReviewers()); diff --git a/openmetadata-spec/src/main/resources/json/schema/type/entityHierarchy.json b/openmetadata-spec/src/main/resources/json/schema/type/entityHierarchy.json new file mode 100644 index 00000000000..73fbe9afce7 --- /dev/null +++ b/openmetadata-spec/src/main/resources/json/schema/type/entityHierarchy.json @@ -0,0 +1,75 @@ +{ + "$id": "https://open-metadata.org/schema/entity/data/entityHierarchy.json", + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "EntityHierarchy", + "description": "This schema defines the entity hierarchy structure.", + "$comment": "@om-entity-type", + "type": "object", + "javaType": "org.openmetadata.schema.entity.data.EntityHierarchy", + "definitions": { + "EntityHierarchy": { + "type": "object", + "properties": { + "id": { + "description": "Unique identifier of an entity hierarchy instance.", + "$ref": "../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Preferred name for the entity hierarchy.", + "$ref": "../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display name that identifies this hierarchy.", + "type": "string" + }, + "description": { + "description": "Description of the entity hierarchy.", + "$ref": "../type/basic.json#/definitions/markdown" + }, + "fullyQualifiedName": { + "description": "A unique name that identifies an entity within the hierarchy. It captures name hierarchy in the form of `rootEntity.childEntity`.", + "$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "children": { + "description": "Other entities that are children of this entity.", + "type": "array", + "items": { + "$ref": "#/definitions/EntityHierarchy" + } + } + }, + "required": ["id", "name", "description"] + } + }, + "properties": { + "id": { + "description": "Unique identifier of an entity hierarchy instance.", + "$ref": "../type/basic.json#/definitions/uuid" + }, + "name": { + "description": "Preferred name for the entity hierarchy.", + "$ref": "../type/basic.json#/definitions/entityName" + }, + "displayName": { + "description": "Display name that identifies this hierarchy.", + "type": "string" + }, + "description": { + "description": "Description of the entity hierarchy.", + "$ref": "../type/basic.json#/definitions/markdown" + }, + "fullyQualifiedName": { + "description": "A unique name that identifies an entity within the hierarchy. It captures name hierarchy in the form of `rootEntity.childEntity`.", + "$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName" + }, + "children": { + "description": "Other entities that are children of this entity.", + "type": "array", + "items": { + "$ref": "#/definitions/EntityHierarchy" + } + } + }, + "required": ["id", "name", "description"], + "additionalProperties": false +}