feat: Add configurable filters aggregation for tag source segregation in search (#23363)

* feat: Add configurable filters aggregation for tag source segregation in search

* Update generated TypeScript types

* use script based aggregation

* use script based aggregation

* Update generated TypeScript types

* update aggregations with script

* add migrations to fetch updated searchSettings

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Karan Hotchandani <33024356+karanh37@users.noreply.github.com>
This commit is contained in:
sonika-shah 2025-09-16 15:16:06 +05:30 committed by GitHub
parent 7672f85592
commit cc18ba1554
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 134 additions and 26 deletions

View File

@ -23,4 +23,7 @@ ALTER TABLE metric_entity
ADD COLUMN customUnitOfMeasurement VARCHAR(256)
GENERATED ALWAYS AS (json_unquote(json_extract(json, '$.customUnitOfMeasurement'))) VIRTUAL;
-- Add index on the virtual column
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
-- Fetch updated searchSettings
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';

View File

@ -25,4 +25,7 @@ ALTER TABLE metric_entity
ADD COLUMN customUnitOfMeasurement VARCHAR(256)
GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED;
-- Add index on the column
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
-- Fetch updated searchSettings
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';

View File

@ -2,6 +2,7 @@ package org.openmetadata.service.search.elasticsearch;
import static es.org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.service.search.EntityBuilderConstant.MAX_ANALYZED_OFFSET;
import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
@ -20,6 +21,7 @@ import es.org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBu
import es.org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import es.org.elasticsearch.search.aggregations.AggregationBuilders;
import es.org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import es.org.elasticsearch.search.builder.SearchSourceBuilder;
import es.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import java.util.ArrayList;
@ -120,11 +122,21 @@ public class ElasticSearchSourceBuilderFactory
.getGlobalSettings()
.getAggregations()
.forEach(
agg ->
searchSourceBuilder.aggregation(
AggregationBuilders.terms(agg.getName())
.field(agg.getField())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize())));
agg -> {
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize());
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
});
return searchSourceBuilder;
}
@ -646,10 +658,20 @@ public class ElasticSearchSourceBuilderFactory
for (var entry : aggregations.entrySet()) {
Aggregation agg = entry.getValue();
searchSourceBuilder.aggregation(
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName())
.field(agg.getField())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize()));
.size(searchSettings.getGlobalSettings().getMaxAggregateSize());
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
}
}

View File

@ -1,6 +1,7 @@
package org.openmetadata.service.search.opensearch;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
import static os.org.opensearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS;
@ -39,6 +40,7 @@ import os.org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuild
import os.org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
import os.org.opensearch.index.query.functionscore.ScoreFunctionBuilders;
import os.org.opensearch.search.aggregations.AggregationBuilders;
import os.org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import os.org.opensearch.search.builder.SearchSourceBuilder;
import os.org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
@ -125,11 +127,21 @@ public class OpenSearchSourceBuilderFactory
.getGlobalSettings()
.getAggregations()
.forEach(
agg ->
searchSourceBuilder.aggregation(
AggregationBuilders.terms(agg.getName())
.field(agg.getField())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize())));
agg -> {
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize());
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new os.org.opensearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
});
return searchSourceBuilder;
}
@ -647,10 +659,20 @@ public class OpenSearchSourceBuilderFactory
for (var entry : aggregations.entrySet()) {
Aggregation agg = entry.getValue();
searchSourceBuilder.aggregation(
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName())
.field(agg.getField())
.size(searchSettings.getGlobalSettings().getMaxAggregateSize()));
.size(searchSettings.getGlobalSettings().getMaxAggregateSize());
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new os.org.opensearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
}
}

View File

@ -40,6 +40,16 @@
"type": "terms",
"field": "tags.tagFQN"
},
{
"name": "classificationTags",
"type": "terms",
"script": "def classificationTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Classification') { classificationTags.add(tag.tagFQN); } } } return classificationTags;"
},
{
"name": "glossaryTags",
"type": "terms",
"script": "def glossaryTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Glossary') { glossaryTags.add(tag.tagFQN); } } } return glossaryTags;"
},
{
"name": "certification.tagLabel.tagFQN",
"type": "terms",
@ -1369,6 +1379,11 @@
"name": "fqnParts_agg",
"type": "terms",
"field": "fqnParts"
},
{
"name": "domainType.keyword",
"type": "terms",
"field": "domainType.keyword"
}
],
"scoreMode": "sum",

View File

@ -85,7 +85,13 @@
}
},
"domainType": {
"type": "text"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",

View File

@ -73,7 +73,13 @@
}
},
"domainType": {
"type": "text"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",

View File

@ -104,7 +104,13 @@
}
},
"domainType": {
"type": "text"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",

View File

@ -70,7 +70,13 @@
}
},
"domainType": {
"type": "text"
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"name": {
"type": "text",

View File

@ -380,9 +380,14 @@
"field": {
"type": "string",
"description": "The field on which this aggregation is performed."
},
"script": {
"type": "string",
"description": "Optional script to apply on the terms aggregation.",
"default": ""
}
},
"required": ["name", "type", "field"],
"required": ["name", "type"],
"additionalProperties": false
},
"allowedSearchFields": {

View File

@ -49,11 +49,13 @@ export const mockEntitySearchConfig = {
name: 'database.displayName.keyword',
type: 'terms',
field: 'database.displayName.keyword',
script: '',
},
{
name: 'databaseSchema.displayName.keyword',
type: 'terms',
field: 'databaseSchema.displayName.keyword',
script: '',
},
],
termBoosts: [],

View File

@ -155,11 +155,15 @@ export interface Aggregation {
/**
* The field on which this aggregation is performed.
*/
field: string;
field?: string;
/**
* A descriptive name for the aggregation.
*/
name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/**
* The type of aggregation to perform.
*/

View File

@ -123,11 +123,15 @@ export interface Aggregation {
/**
* The field on which this aggregation is performed.
*/
field: string;
field?: string;
/**
* A descriptive name for the aggregation.
*/
name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/**
* The type of aggregation to perform.
*/

View File

@ -531,11 +531,15 @@ export interface Aggregation {
/**
* The field on which this aggregation is performed.
*/
field: string;
field?: string;
/**
* A descriptive name for the aggregation.
*/
name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/**
* The type of aggregation to perform.
*/