From cc18ba15546b0abf0c1e3d4d487ffef1b3654d95 Mon Sep 17 00:00:00 2001 From: sonika-shah <58761340+sonika-shah@users.noreply.github.com> Date: Tue, 16 Sep 2025 15:16:06 +0530 Subject: [PATCH] feat: Add configurable filters aggregation for tag source segregation in search (#23363) * feat: Add configurable filters aggregation for tag source segregation in search * Update generated TypeScript types * use script based aggregation * use script based aggregation * Update generated TypeScript types * update aggregations with script * add migrations to fetch updated searchSettings --------- Co-authored-by: github-actions[bot] Co-authored-by: Karan Hotchandani <33024356+karanh37@users.noreply.github.com> --- .../native/1.10.0/mysql/schemaChanges.sql | 5 ++- .../native/1.10.0/postgres/schemaChanges.sql | 5 ++- .../ElasticSearchSourceBuilderFactory.java | 38 +++++++++++++++---- .../OpenSearchSourceBuilderFactory.java | 38 +++++++++++++++---- .../json/data/settings/searchSettings.json | 15 ++++++++ .../en/domain_index_mapping.json | 8 +++- .../jp/domain_index_mapping.json | 8 +++- .../ru/domain_index_mapping.json | 8 +++- .../zh/domain_index_mapping.json | 8 +++- .../schema/configuration/searchSettings.json | 7 +++- .../ui/playwright/utils/searchSettingUtils.ts | 2 + .../api/search/previewSearchRequest.ts | 6 ++- .../generated/configuration/searchSettings.ts | 6 ++- .../ui/src/generated/settings/settings.ts | 6 ++- 14 files changed, 134 insertions(+), 26 deletions(-) diff --git a/bootstrap/sql/migrations/native/1.10.0/mysql/schemaChanges.sql b/bootstrap/sql/migrations/native/1.10.0/mysql/schemaChanges.sql index 4caaee09bfa..1c84fc23e3a 100644 --- a/bootstrap/sql/migrations/native/1.10.0/mysql/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.10.0/mysql/schemaChanges.sql @@ -23,4 +23,7 @@ ALTER TABLE metric_entity ADD COLUMN customUnitOfMeasurement VARCHAR(256) GENERATED ALWAYS AS (json_unquote(json_extract(json, '$.customUnitOfMeasurement'))) VIRTUAL; -- Add index on the virtual column -CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); \ No newline at end of file +CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); + +-- Fetch updated searchSettings +DELETE FROM openmetadata_settings WHERE configType = 'searchSettings'; \ No newline at end of file diff --git a/bootstrap/sql/migrations/native/1.10.0/postgres/schemaChanges.sql b/bootstrap/sql/migrations/native/1.10.0/postgres/schemaChanges.sql index c032990c51d..aa12bc1794d 100644 --- a/bootstrap/sql/migrations/native/1.10.0/postgres/schemaChanges.sql +++ b/bootstrap/sql/migrations/native/1.10.0/postgres/schemaChanges.sql @@ -25,4 +25,7 @@ ALTER TABLE metric_entity ADD COLUMN customUnitOfMeasurement VARCHAR(256) GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED; -- Add index on the column -CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); \ No newline at end of file +CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); + +-- Fetch updated searchSettings +DELETE FROM openmetadata_settings WHERE configType = 'searchSettings'; \ No newline at end of file diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchSourceBuilderFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchSourceBuilderFactory.java index 1057ab12490..5f285f89f68 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchSourceBuilderFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/elasticsearch/ElasticSearchSourceBuilderFactory.java @@ -2,6 +2,7 @@ package org.openmetadata.service.search.elasticsearch; import static es.org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS; import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; +import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty; import static org.openmetadata.service.search.EntityBuilderConstant.MAX_ANALYZED_OFFSET; import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; @@ -20,6 +21,7 @@ import es.org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBu import es.org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; import es.org.elasticsearch.search.aggregations.AggregationBuilders; +import es.org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import es.org.elasticsearch.search.builder.SearchSourceBuilder; import es.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import java.util.ArrayList; @@ -120,11 +122,21 @@ public class ElasticSearchSourceBuilderFactory .getGlobalSettings() .getAggregations() .forEach( - agg -> - searchSourceBuilder.aggregation( - AggregationBuilders.terms(agg.getName()) - .field(agg.getField()) - .size(searchSettings.getGlobalSettings().getMaxAggregateSize()))); + agg -> { + TermsAggregationBuilder termsAgg = + AggregationBuilders.terms(agg.getName()) + .size(searchSettings.getGlobalSettings().getMaxAggregateSize()); + + if (!nullOrEmpty(agg.getField())) { + termsAgg.field(agg.getField()); + } + + if (!nullOrEmpty(agg.getScript())) { + termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript())); + } + + searchSourceBuilder.aggregation(termsAgg); + }); return searchSourceBuilder; } @@ -646,10 +658,20 @@ public class ElasticSearchSourceBuilderFactory for (var entry : aggregations.entrySet()) { Aggregation agg = entry.getValue(); - searchSourceBuilder.aggregation( + + TermsAggregationBuilder termsAgg = AggregationBuilders.terms(agg.getName()) - .field(agg.getField()) - .size(searchSettings.getGlobalSettings().getMaxAggregateSize())); + .size(searchSettings.getGlobalSettings().getMaxAggregateSize()); + + if (!nullOrEmpty(agg.getField())) { + termsAgg.field(agg.getField()); + } + + if (!nullOrEmpty(agg.getScript())) { + termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript())); + } + + searchSourceBuilder.aggregation(termsAgg); } } diff --git a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchSourceBuilderFactory.java b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchSourceBuilderFactory.java index 934bdf61226..4a16f65dc1c 100644 --- a/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchSourceBuilderFactory.java +++ b/openmetadata-service/src/main/java/org/openmetadata/service/search/opensearch/OpenSearchSourceBuilderFactory.java @@ -1,6 +1,7 @@ package org.openmetadata.service.search.opensearch; import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; +import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty; import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; import static os.org.opensearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS; @@ -39,6 +40,7 @@ import os.org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuild import os.org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import os.org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import os.org.opensearch.search.aggregations.AggregationBuilders; +import os.org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import os.org.opensearch.search.builder.SearchSourceBuilder; import os.org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; @@ -125,11 +127,21 @@ public class OpenSearchSourceBuilderFactory .getGlobalSettings() .getAggregations() .forEach( - agg -> - searchSourceBuilder.aggregation( - AggregationBuilders.terms(agg.getName()) - .field(agg.getField()) - .size(searchSettings.getGlobalSettings().getMaxAggregateSize()))); + agg -> { + TermsAggregationBuilder termsAgg = + AggregationBuilders.terms(agg.getName()) + .size(searchSettings.getGlobalSettings().getMaxAggregateSize()); + + if (!nullOrEmpty(agg.getField())) { + termsAgg.field(agg.getField()); + } + + if (!nullOrEmpty(agg.getScript())) { + termsAgg.script(new os.org.opensearch.script.Script(agg.getScript())); + } + + searchSourceBuilder.aggregation(termsAgg); + }); return searchSourceBuilder; } @@ -647,10 +659,20 @@ public class OpenSearchSourceBuilderFactory for (var entry : aggregations.entrySet()) { Aggregation agg = entry.getValue(); - searchSourceBuilder.aggregation( + + TermsAggregationBuilder termsAgg = AggregationBuilders.terms(agg.getName()) - .field(agg.getField()) - .size(searchSettings.getGlobalSettings().getMaxAggregateSize())); + .size(searchSettings.getGlobalSettings().getMaxAggregateSize()); + + if (!nullOrEmpty(agg.getField())) { + termsAgg.field(agg.getField()); + } + + if (!nullOrEmpty(agg.getScript())) { + termsAgg.script(new os.org.opensearch.script.Script(agg.getScript())); + } + + searchSourceBuilder.aggregation(termsAgg); } } diff --git a/openmetadata-service/src/main/resources/json/data/settings/searchSettings.json b/openmetadata-service/src/main/resources/json/data/settings/searchSettings.json index fd24bc6c352..05ac64bcccd 100644 --- a/openmetadata-service/src/main/resources/json/data/settings/searchSettings.json +++ b/openmetadata-service/src/main/resources/json/data/settings/searchSettings.json @@ -40,6 +40,16 @@ "type": "terms", "field": "tags.tagFQN" }, + { + "name": "classificationTags", + "type": "terms", + "script": "def classificationTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Classification') { classificationTags.add(tag.tagFQN); } } } return classificationTags;" + }, + { + "name": "glossaryTags", + "type": "terms", + "script": "def glossaryTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Glossary') { glossaryTags.add(tag.tagFQN); } } } return glossaryTags;" + }, { "name": "certification.tagLabel.tagFQN", "type": "terms", @@ -1369,6 +1379,11 @@ "name": "fqnParts_agg", "type": "terms", "field": "fqnParts" + }, + { + "name": "domainType.keyword", + "type": "terms", + "field": "domainType.keyword" } ], "scoreMode": "sum", diff --git a/openmetadata-spec/src/main/resources/elasticsearch/en/domain_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/en/domain_index_mapping.json index fb151bef262..89a8b866311 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/en/domain_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/en/domain_index_mapping.json @@ -85,7 +85,13 @@ } }, "domainType": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "name": { "type": "text", diff --git a/openmetadata-spec/src/main/resources/elasticsearch/jp/domain_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/jp/domain_index_mapping.json index c4bea63172c..7b21f52e26d 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/jp/domain_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/jp/domain_index_mapping.json @@ -73,7 +73,13 @@ } }, "domainType": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "name": { "type": "text", diff --git a/openmetadata-spec/src/main/resources/elasticsearch/ru/domain_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/ru/domain_index_mapping.json index 94a5a037bd3..66e19b89e73 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/ru/domain_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/ru/domain_index_mapping.json @@ -104,7 +104,13 @@ } }, "domainType": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "name": { "type": "text", diff --git a/openmetadata-spec/src/main/resources/elasticsearch/zh/domain_index_mapping.json b/openmetadata-spec/src/main/resources/elasticsearch/zh/domain_index_mapping.json index 0729e85d19b..e96beb0ca0e 100644 --- a/openmetadata-spec/src/main/resources/elasticsearch/zh/domain_index_mapping.json +++ b/openmetadata-spec/src/main/resources/elasticsearch/zh/domain_index_mapping.json @@ -70,7 +70,13 @@ } }, "domainType": { - "type": "text" + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } }, "name": { "type": "text", diff --git a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json index fbc83c65ffe..87fe868f53e 100644 --- a/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json +++ b/openmetadata-spec/src/main/resources/json/schema/configuration/searchSettings.json @@ -380,9 +380,14 @@ "field": { "type": "string", "description": "The field on which this aggregation is performed." + }, + "script": { + "type": "string", + "description": "Optional script to apply on the terms aggregation.", + "default": "" } }, - "required": ["name", "type", "field"], + "required": ["name", "type"], "additionalProperties": false }, "allowedSearchFields": { diff --git a/openmetadata-ui/src/main/resources/ui/playwright/utils/searchSettingUtils.ts b/openmetadata-ui/src/main/resources/ui/playwright/utils/searchSettingUtils.ts index 5e93ee57fcb..a6ad1b1a12d 100644 --- a/openmetadata-ui/src/main/resources/ui/playwright/utils/searchSettingUtils.ts +++ b/openmetadata-ui/src/main/resources/ui/playwright/utils/searchSettingUtils.ts @@ -49,11 +49,13 @@ export const mockEntitySearchConfig = { name: 'database.displayName.keyword', type: 'terms', field: 'database.displayName.keyword', + script: '', }, { name: 'databaseSchema.displayName.keyword', type: 'terms', field: 'databaseSchema.displayName.keyword', + script: '', }, ], termBoosts: [], diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/api/search/previewSearchRequest.ts b/openmetadata-ui/src/main/resources/ui/src/generated/api/search/previewSearchRequest.ts index 7952d7cc6d9..95bf7b4453e 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/api/search/previewSearchRequest.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/api/search/previewSearchRequest.ts @@ -155,11 +155,15 @@ export interface Aggregation { /** * The field on which this aggregation is performed. */ - field: string; + field?: string; /** * A descriptive name for the aggregation. */ name: string; + /** + * Optional script to apply on the terms aggregation. + */ + script?: string; /** * The type of aggregation to perform. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts index c66e48558df..f3efc299253 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/configuration/searchSettings.ts @@ -123,11 +123,15 @@ export interface Aggregation { /** * The field on which this aggregation is performed. */ - field: string; + field?: string; /** * A descriptive name for the aggregation. */ name: string; + /** + * Optional script to apply on the terms aggregation. + */ + script?: string; /** * The type of aggregation to perform. */ diff --git a/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts b/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts index 02b1a13cfc0..b1ebe933d03 100644 --- a/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts +++ b/openmetadata-ui/src/main/resources/ui/src/generated/settings/settings.ts @@ -531,11 +531,15 @@ export interface Aggregation { /** * The field on which this aggregation is performed. */ - field: string; + field?: string; /** * A descriptive name for the aggregation. */ name: string; + /** + * Optional script to apply on the terms aggregation. + */ + script?: string; /** * The type of aggregation to perform. */