Fixes #10724 : Search with hierarchy api for glossary terms (#15865)

* Fixes #10724 : New search with hierarchy api for glossary terms

* Added query param in existing search api - to search with hierarchy

* Created new json to manage hierarchy and other updates
This commit is contained in:
sonika-shah 2024-04-18 14:22:18 +05:30 committed by GitHub
parent d81978c388
commit 45bae608bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 346 additions and 19 deletions

View File

@ -150,7 +150,13 @@ public class SearchResource {
description =
"Get only selected fields of the document body for each hit. Empty value will return all fields")
@QueryParam("include_source_fields")
List<String> includeSourceFields)
List<String> includeSourceFields,
@Parameter(
description =
"Fetch search results in hierarchical order of children elements. By default hierarchy is not fetched.")
@DefaultValue("false")
@QueryParam("getHierarchy")
boolean getHierarchy)
throws IOException {
if (nullOrEmpty(query)) {
@ -169,6 +175,7 @@ public class SearchResource {
.deleted(deleted)
.sortOrder(sortOrder)
.includeSourceFields(includeSourceFields)
.getHierarchy(getHierarchy)
.build();
return searchRepository.search(request);
}

View File

@ -17,6 +17,8 @@ public class SearchRequest {
private final String sortOrder;
private final List<String> includeSourceFields;
private final boolean getHierarchy;
public SearchRequest(ElasticSearchRequestBuilder builder) {
this.query = builder.query;
this.from = builder.from;
@ -31,6 +33,7 @@ public class SearchRequest {
this.sortOrder = builder.sortOrder;
this.includeSourceFields = builder.includeSourceFields;
this.fieldName = builder.fieldName;
this.getHierarchy = builder.getHierarchy;
}
// Getters for the attributes
@ -87,6 +90,10 @@ public class SearchRequest {
return includeSourceFields;
}
public boolean getHierarchy() {
return getHierarchy;
}
// Builder class for ElasticSearchRequest
public static class ElasticSearchRequestBuilder {
@ -103,6 +110,7 @@ public class SearchRequest {
private boolean deleted;
private String sortOrder;
private List<String> includeSourceFields;
private boolean getHierarchy;
public ElasticSearchRequestBuilder(String query, int size, String index) {
this.query = query;
@ -160,6 +168,11 @@ public class SearchRequest {
return this;
}
public ElasticSearchRequestBuilder getHierarchy(boolean getHierarchy) {
this.getHierarchy = getHierarchy;
return this;
}
public SearchRequest build() {
return new SearchRequest(this);
}

View File

@ -21,6 +21,7 @@ import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.SCHEMA_FIELD_NAMES;
import static org.openmetadata.service.search.EntityBuilderConstant.UNIFIED;
import static org.openmetadata.service.search.UpdateSearchEventsConstant.SENDING_REQUEST_TO_ELASTIC_SEARCH;
import static org.openmetadata.service.util.FullyQualifiedName.getParentFQN;
import com.fasterxml.jackson.databind.JsonNode;
import es.org.elasticsearch.ElasticsearchStatusException;
@ -105,10 +106,12 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import javax.json.JsonObject;
@ -125,6 +128,7 @@ import org.apache.http.impl.client.BasicCredentialsProvider;
import org.openmetadata.common.utils.CommonUtil;
import org.openmetadata.schema.DataInsightInterface;
import org.openmetadata.schema.dataInsight.DataInsightChartResult;
import org.openmetadata.schema.entity.data.EntityHierarchy__1;
import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration;
import org.openmetadata.sdk.exception.SearchException;
import org.openmetadata.sdk.exception.SearchIndexNotFoundException;
@ -365,7 +369,7 @@ public class ElasticSearchClient implements SearchClient {
.must(QueryBuilders.termQuery("deleted", request.deleted())));
}
if (!nullOrEmpty(request.getSortFieldParam())) {
if (!nullOrEmpty(request.getSortFieldParam()) && !request.getHierarchy()) {
searchSourceBuilder.sort(
request.getSortFieldParam(), SortOrder.fromString(request.getSortOrder()));
}
@ -375,6 +379,12 @@ public class ElasticSearchClient implements SearchClient {
QueryBuilders.boolQuery()
.must(searchSourceBuilder.query())
.must(QueryBuilders.matchQuery("status", "Approved")));
if (request.getHierarchy()) {
searchSourceBuilder.sort(
SortBuilders.fieldSort("fullyQualifiedName")
.order(SortOrder.ASC)); // to get correct hierarchy of terms
}
}
/* for performance reasons ElasticSearch doesn't provide accurate hits
@ -396,14 +406,21 @@ public class ElasticSearchClient implements SearchClient {
searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS));
try {
String response =
client
.search(
new es.org.elasticsearch.action.search.SearchRequest(request.getIndex())
.source(searchSourceBuilder),
RequestOptions.DEFAULT)
.toString();
return Response.status(OK).entity(response).build();
SearchResponse searchResponse =
client.search(
new es.org.elasticsearch.action.search.SearchRequest(request.getIndex())
.source(searchSourceBuilder),
RequestOptions.DEFAULT);
if (!request.getHierarchy()) {
return Response.status(OK).entity(searchResponse.toString()).build();
} else {
// Build the nested hierarchy from elastic search response
List<?> response = buildSearchHierarchy(request, searchResponse);
return Response.status(OK).entity(response).build();
}
} catch (ElasticsearchStatusException e) {
if (e.status() == RestStatus.NOT_FOUND) {
throw new SearchIndexNotFoundException(
@ -414,6 +431,78 @@ public class ElasticSearchClient implements SearchClient {
}
}
public List<?> buildSearchHierarchy(SearchRequest request, SearchResponse searchResponse) {
List<?> response = new ArrayList<>();
if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) {
response = buildGlossaryTermSearchHierarchy(searchResponse);
}
return response;
}
public List<EntityHierarchy__1> buildGlossaryTermSearchHierarchy(SearchResponse searchResponse) {
Map<String, EntityHierarchy__1> termMap =
new LinkedHashMap<>(); // termMap represent glossary terms
Map<String, EntityHierarchy__1> rootTerms =
new LinkedHashMap<>(); // rootTerms represent glossaries
for (var hit : searchResponse.getHits().getHits()) {
Map<String, Object> hitSourceMap = new HashMap<>(JsonUtils.getMap(hit.getSourceAsMap()));
EntityHierarchy__1 term = extractHierarchyTermFromMap(hitSourceMap);
Map<String, Object> glossaryInfo = (Map<String, Object>) hitSourceMap.get("glossary");
if (glossaryInfo != null) {
EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(glossaryInfo);
rootTerms.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm);
} else {
Map<String, Object> parentInfo = (Map<String, Object>) hitSourceMap.get("parent");
EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(parentInfo);
termMap.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm);
}
termMap.putIfAbsent(term.getFullyQualifiedName(), term);
}
termMap.putAll(rootTerms);
termMap
.values()
.forEach(
term -> {
String parentFQN = getParentFQN(term.getFullyQualifiedName());
String termFQN = term.getFullyQualifiedName();
if (parentFQN != null && termMap.containsKey(parentFQN)) {
EntityHierarchy__1 parentTerm = termMap.get(parentFQN);
List<EntityHierarchy__1> children = parentTerm.getChildren();
children.add(term);
parentTerm.setChildren(children);
} else {
if (rootTerms.containsKey(termFQN)) {
EntityHierarchy__1 rootTerm = rootTerms.get(termFQN);
rootTerm.setChildren(term.getChildren());
}
}
});
return new ArrayList<>(rootTerms.values());
}
private EntityHierarchy__1 extractHierarchyTermFromMap(Map<String, Object> termInfo) {
EntityHierarchy__1 term = new EntityHierarchy__1();
if (termInfo != null) {
term.setId(UUID.fromString(termInfo.get("id").toString()));
term.setName(termInfo.get("name").toString());
term.setDisplayName(
termInfo.get("displayName") != null
? termInfo.get("displayName").toString()
: termInfo.get("name").toString());
term.setFullyQualifiedName(termInfo.get("fullyQualifiedName").toString());
term.setChildren(new ArrayList<>());
}
return term;
}
@Override
public SearchResultListMapper listWithOffset(
String filter,

View File

@ -21,6 +21,7 @@ import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.SCHEMA_FIELD_NAMES;
import static org.openmetadata.service.search.EntityBuilderConstant.UNIFIED;
import static org.openmetadata.service.search.UpdateSearchEventsConstant.SENDING_REQUEST_TO_ELASTIC_SEARCH;
import static org.openmetadata.service.util.FullyQualifiedName.getParentFQN;
import com.fasterxml.jackson.databind.JsonNode;
import es.org.elasticsearch.index.IndexNotFoundException;
@ -31,10 +32,12 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import javax.json.JsonObject;
@ -51,6 +54,7 @@ import org.apache.http.impl.client.BasicCredentialsProvider;
import org.openmetadata.common.utils.CommonUtil;
import org.openmetadata.schema.DataInsightInterface;
import org.openmetadata.schema.dataInsight.DataInsightChartResult;
import org.openmetadata.schema.entity.data.EntityHierarchy__1;
import org.openmetadata.schema.service.configuration.elasticsearch.ElasticSearchConfiguration;
import org.openmetadata.sdk.exception.SearchException;
import org.openmetadata.sdk.exception.SearchIndexNotFoundException;
@ -362,16 +366,22 @@ public class OpenSearchClient implements SearchClient {
.must(QueryBuilders.termQuery("deleted", request.deleted())));
}
if (!nullOrEmpty(request.getSortFieldParam())) {
if (!nullOrEmpty(request.getSortFieldParam()) && !request.getHierarchy()) {
searchSourceBuilder.sort(
request.getSortFieldParam(), SortOrder.fromString(request.getSortOrder()));
}
if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) {
searchSourceBuilder.query(
QueryBuilders.boolQuery()
.must(searchSourceBuilder.query())
.must(QueryBuilders.matchQuery("status", "Approved")));
if (request.getHierarchy()) {
searchSourceBuilder.sort(
SortBuilders.fieldSort("fullyQualifiedName")
.order(SortOrder.ASC)); // to get correct hierarchy of terms
}
}
/* for performance reasons OpenSearch doesn't provide accurate hits
@ -393,20 +403,96 @@ public class OpenSearchClient implements SearchClient {
searchSourceBuilder.timeout(new TimeValue(30, TimeUnit.SECONDS));
try {
String response =
client
.search(
new os.org.opensearch.action.search.SearchRequest(request.getIndex())
.source(searchSourceBuilder),
RequestOptions.DEFAULT)
.toString();
return Response.status(OK).entity(response).build();
SearchResponse searchResponse =
client.search(
new os.org.opensearch.action.search.SearchRequest(request.getIndex())
.source(searchSourceBuilder),
RequestOptions.DEFAULT);
if (!request.getHierarchy()) {
return Response.status(OK).entity(searchResponse.toString()).build();
} else {
// Build the nested hierarchy from elastic search response
List<?> response = buildSearchHierarchy(request, searchResponse);
return Response.status(OK).entity(response).build();
}
} catch (IndexNotFoundException e) {
throw new SearchIndexNotFoundException(
String.format("Failed to to find index %s", request.getIndex()));
}
}
public List<?> buildSearchHierarchy(SearchRequest request, SearchResponse searchResponse) {
List<?> response = new ArrayList<>();
if (request.getIndex().equalsIgnoreCase("glossary_term_search_index")) {
response = buildGlossaryTermSearchHierarchy(searchResponse);
}
return response;
}
public List<EntityHierarchy__1> buildGlossaryTermSearchHierarchy(SearchResponse searchResponse) {
Map<String, EntityHierarchy__1> termMap =
new LinkedHashMap<>(); // termMap represent glossary terms
Map<String, EntityHierarchy__1> rootTerms =
new LinkedHashMap<>(); // rootTerms represent glossaries
for (var hit : searchResponse.getHits().getHits()) {
Map<String, Object> hitSourceMap = new HashMap<>(JsonUtils.getMap(hit.getSourceAsMap()));
EntityHierarchy__1 term = extractHierarchyTermFromMap(hitSourceMap);
Map<String, Object> glossaryInfo = (Map<String, Object>) hitSourceMap.get("glossary");
if (glossaryInfo != null) {
EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(glossaryInfo);
rootTerms.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm);
} else {
Map<String, Object> parentInfo = (Map<String, Object>) hitSourceMap.get("parent");
EntityHierarchy__1 parentTerm = extractHierarchyTermFromMap(parentInfo);
termMap.putIfAbsent(parentTerm.getFullyQualifiedName(), parentTerm);
}
termMap.putIfAbsent(term.getFullyQualifiedName(), term);
}
termMap.putAll(rootTerms);
termMap
.values()
.forEach(
term -> {
String parentFQN = getParentFQN(term.getFullyQualifiedName());
String termFQN = term.getFullyQualifiedName();
if (parentFQN != null && termMap.containsKey(parentFQN)) {
EntityHierarchy__1 parentTerm = termMap.get(parentFQN);
List<EntityHierarchy__1> children = parentTerm.getChildren();
children.add(term);
parentTerm.setChildren(children);
} else {
if (rootTerms.containsKey(termFQN)) {
EntityHierarchy__1 rootTerm = rootTerms.get(termFQN);
rootTerm.setChildren(term.getChildren());
}
}
});
return new ArrayList<>(rootTerms.values());
}
private EntityHierarchy__1 extractHierarchyTermFromMap(Map<String, Object> termInfo) {
EntityHierarchy__1 term = new EntityHierarchy__1();
if (termInfo != null) {
term.setId(UUID.fromString(termInfo.get("id").toString()));
term.setName(termInfo.get("name").toString());
term.setDisplayName(
termInfo.get("displayName") != null
? termInfo.get("displayName").toString()
: termInfo.get("name").toString());
term.setFullyQualifiedName(termInfo.get("fullyQualifiedName").toString());
term.setChildren(new ArrayList<>());
}
return term;
}
@Override
public SearchResultListMapper listWithOffset(
String filter,

View File

@ -2341,6 +2341,16 @@ public abstract class EntityResourceTest<T extends EntityInterface, K extends Cr
return response;
}
public static String getResponseFormSearchWithHierarchy(String indexName)
throws HttpResponseException {
WebTarget target =
getResource(
String.format(
"search/query?q=&index=%s&from=0&deleted=false&size=100&getHierarchy=true",
indexName));
return TestUtils.get(target, String.class, ADMIN_AUTH_HEADERS);
}
@Test
@Execution(ExecutionMode.CONCURRENT)
void test_cleanupConversations(TestInfo test) throws HttpResponseException {

View File

@ -67,6 +67,7 @@ import org.openmetadata.schema.api.data.CreateGlossaryTerm;
import org.openmetadata.schema.api.data.CreateTable;
import org.openmetadata.schema.api.data.TermReference;
import org.openmetadata.schema.api.feed.ResolveTask;
import org.openmetadata.schema.entity.data.EntityHierarchy__1;
import org.openmetadata.schema.entity.data.Glossary;
import org.openmetadata.schema.entity.data.GlossaryTerm;
import org.openmetadata.schema.entity.data.GlossaryTerm.Status;
@ -663,6 +664,52 @@ public class GlossaryTermResourceTest extends EntityResourceTest<GlossaryTerm, C
String.format("user instance for %s not found", reviewerReference.getId()));
}
@Test
public void test_buildGlossaryTermNestedHierarchy(TestInfo test) throws HttpResponseException {
CreateGlossaryTerm create =
createRequest("parentGlossaryTerm", "", "", null)
.withReviewers(null)
.withSynonyms(null)
.withStyle(null);
GlossaryTerm parentGlossaryTerm = createEntity(create, ADMIN_AUTH_HEADERS);
// Create glossary childGlossaryTerm under parentGlossaryTerm in glossary g1
create =
createRequest("childGlossaryTerm", "", "", null)
.withSynonyms(null)
.withReviewers(null)
.withSynonyms(null)
.withParent(parentGlossaryTerm.getFullyQualifiedName());
GlossaryTerm childGlossaryTerm = createEntity(create, ADMIN_AUTH_HEADERS);
String response = getResponseFormSearchWithHierarchy("glossary_term_search_index");
List<EntityHierarchy__1> glossaries = JsonUtils.readObjects(response, EntityHierarchy__1.class);
boolean isChild =
glossaries.stream()
.filter(glossary -> "g1".equals(glossary.getName())) // Find glossary with name "g1"
.findFirst()
.map(
g1Glossary ->
g1Glossary.getChildren().stream() // Work with this glossary's children
.filter(
glossary ->
"parentGlossaryTerm"
.equals(glossary.getName())) // Find the specific parent term
.flatMap(
glossary ->
glossary
.getChildren()
.stream()) // Flatten the stream of children terms
.anyMatch(
term ->
"childGlossaryTerm"
.equals(
term.getName()))) // Check if the specific child term exists
.orElse(false); // Return false if no glossary named "g1" was found
assertTrue(isChild, "childGlossaryTerm should be a child of parentGlossaryTerm");
}
public GlossaryTerm createTerm(Glossary glossary, GlossaryTerm parent, String termName)
throws IOException {
return createTerm(glossary, parent, termName, glossary.getReviewers());

View File

@ -0,0 +1,75 @@
{
"$id": "https://open-metadata.org/schema/entity/data/entityHierarchy.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "EntityHierarchy",
"description": "This schema defines the entity hierarchy structure.",
"$comment": "@om-entity-type",
"type": "object",
"javaType": "org.openmetadata.schema.entity.data.EntityHierarchy",
"definitions": {
"EntityHierarchy": {
"type": "object",
"properties": {
"id": {
"description": "Unique identifier of an entity hierarchy instance.",
"$ref": "../type/basic.json#/definitions/uuid"
},
"name": {
"description": "Preferred name for the entity hierarchy.",
"$ref": "../type/basic.json#/definitions/entityName"
},
"displayName": {
"description": "Display name that identifies this hierarchy.",
"type": "string"
},
"description": {
"description": "Description of the entity hierarchy.",
"$ref": "../type/basic.json#/definitions/markdown"
},
"fullyQualifiedName": {
"description": "A unique name that identifies an entity within the hierarchy. It captures name hierarchy in the form of `rootEntity.childEntity`.",
"$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName"
},
"children": {
"description": "Other entities that are children of this entity.",
"type": "array",
"items": {
"$ref": "#/definitions/EntityHierarchy"
}
}
},
"required": ["id", "name", "description"]
}
},
"properties": {
"id": {
"description": "Unique identifier of an entity hierarchy instance.",
"$ref": "../type/basic.json#/definitions/uuid"
},
"name": {
"description": "Preferred name for the entity hierarchy.",
"$ref": "../type/basic.json#/definitions/entityName"
},
"displayName": {
"description": "Display name that identifies this hierarchy.",
"type": "string"
},
"description": {
"description": "Description of the entity hierarchy.",
"$ref": "../type/basic.json#/definitions/markdown"
},
"fullyQualifiedName": {
"description": "A unique name that identifies an entity within the hierarchy. It captures name hierarchy in the form of `rootEntity.childEntity`.",
"$ref": "../type/basic.json#/definitions/fullyQualifiedEntityName"
},
"children": {
"description": "Other entities that are children of this entity.",
"type": "array",
"items": {
"$ref": "#/definitions/EntityHierarchy"
}
}
},
"required": ["id", "name", "description"],
"additionalProperties": false
}