feat: Add configurable filters aggregation for tag source segregation in search (#23363)

* feat: Add configurable filters aggregation for tag source segregation in search

* Update generated TypeScript types

* use script based aggregation

* use script based aggregation

* Update generated TypeScript types

* update aggregations with script

* add migrations to fetch updated searchSettings

---------

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: Karan Hotchandani <33024356+karanh37@users.noreply.github.com>
This commit is contained in:
sonika-shah 2025-09-16 15:16:06 +05:30 committed by GitHub
parent 7672f85592
commit cc18ba1554
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 134 additions and 26 deletions

View File

@ -24,3 +24,6 @@ ADD COLUMN customUnitOfMeasurement VARCHAR(256)
GENERATED ALWAYS AS (json_unquote(json_extract(json, '$.customUnitOfMeasurement'))) VIRTUAL; GENERATED ALWAYS AS (json_unquote(json_extract(json, '$.customUnitOfMeasurement'))) VIRTUAL;
-- Add index on the virtual column -- Add index on the virtual column
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
-- Fetch updated searchSettings
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';

View File

@ -26,3 +26,6 @@ ADD COLUMN customUnitOfMeasurement VARCHAR(256)
GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED; GENERATED ALWAYS AS ((json->>'customUnitOfMeasurement')::VARCHAR(256)) STORED;
-- Add index on the column -- Add index on the column
CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement); CREATE INDEX idx_metric_custom_unit ON metric_entity(customUnitOfMeasurement);
-- Fetch updated searchSettings
DELETE FROM openmetadata_settings WHERE configType = 'searchSettings';

View File

@ -2,6 +2,7 @@ package org.openmetadata.service.search.elasticsearch;
import static es.org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS; import static es.org.elasticsearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.service.search.EntityBuilderConstant.MAX_ANALYZED_OFFSET; import static org.openmetadata.service.search.EntityBuilderConstant.MAX_ANALYZED_OFFSET;
import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
@ -20,6 +21,7 @@ import es.org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBu
import es.org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import es.org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; import es.org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
import es.org.elasticsearch.search.aggregations.AggregationBuilders; import es.org.elasticsearch.search.aggregations.AggregationBuilders;
import es.org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import es.org.elasticsearch.search.builder.SearchSourceBuilder; import es.org.elasticsearch.search.builder.SearchSourceBuilder;
import es.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import es.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import java.util.ArrayList; import java.util.ArrayList;
@ -120,11 +122,21 @@ public class ElasticSearchSourceBuilderFactory
.getGlobalSettings() .getGlobalSettings()
.getAggregations() .getAggregations()
.forEach( .forEach(
agg -> agg -> {
searchSourceBuilder.aggregation( TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName()) AggregationBuilders.terms(agg.getName())
.field(agg.getField()) .size(searchSettings.getGlobalSettings().getMaxAggregateSize());
.size(searchSettings.getGlobalSettings().getMaxAggregateSize())));
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
});
return searchSourceBuilder; return searchSourceBuilder;
} }
@ -646,10 +658,20 @@ public class ElasticSearchSourceBuilderFactory
for (var entry : aggregations.entrySet()) { for (var entry : aggregations.entrySet()) {
Aggregation agg = entry.getValue(); Aggregation agg = entry.getValue();
searchSourceBuilder.aggregation(
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName()) AggregationBuilders.terms(agg.getName())
.field(agg.getField()) .size(searchSettings.getGlobalSettings().getMaxAggregateSize());
.size(searchSettings.getGlobalSettings().getMaxAggregateSize()));
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new es.org.elasticsearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
} }
} }

View File

@ -1,6 +1,7 @@
package org.openmetadata.service.search.opensearch; package org.openmetadata.service.search.opensearch;
import static org.openmetadata.common.utils.CommonUtil.listOrEmpty; import static org.openmetadata.common.utils.CommonUtil.listOrEmpty;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.POST_TAG;
import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG; import static org.openmetadata.service.search.EntityBuilderConstant.PRE_TAG;
import static os.org.opensearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS; import static os.org.opensearch.index.query.MultiMatchQueryBuilder.Type.MOST_FIELDS;
@ -39,6 +40,7 @@ import os.org.opensearch.index.query.functionscore.FieldValueFactorFunctionBuild
import os.org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; import os.org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder;
import os.org.opensearch.index.query.functionscore.ScoreFunctionBuilders; import os.org.opensearch.index.query.functionscore.ScoreFunctionBuilders;
import os.org.opensearch.search.aggregations.AggregationBuilders; import os.org.opensearch.search.aggregations.AggregationBuilders;
import os.org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import os.org.opensearch.search.builder.SearchSourceBuilder; import os.org.opensearch.search.builder.SearchSourceBuilder;
import os.org.opensearch.search.fetch.subphase.highlight.HighlightBuilder; import os.org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;
@ -125,11 +127,21 @@ public class OpenSearchSourceBuilderFactory
.getGlobalSettings() .getGlobalSettings()
.getAggregations() .getAggregations()
.forEach( .forEach(
agg -> agg -> {
searchSourceBuilder.aggregation( TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName()) AggregationBuilders.terms(agg.getName())
.field(agg.getField()) .size(searchSettings.getGlobalSettings().getMaxAggregateSize());
.size(searchSettings.getGlobalSettings().getMaxAggregateSize())));
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new os.org.opensearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
});
return searchSourceBuilder; return searchSourceBuilder;
} }
@ -647,10 +659,20 @@ public class OpenSearchSourceBuilderFactory
for (var entry : aggregations.entrySet()) { for (var entry : aggregations.entrySet()) {
Aggregation agg = entry.getValue(); Aggregation agg = entry.getValue();
searchSourceBuilder.aggregation(
TermsAggregationBuilder termsAgg =
AggregationBuilders.terms(agg.getName()) AggregationBuilders.terms(agg.getName())
.field(agg.getField()) .size(searchSettings.getGlobalSettings().getMaxAggregateSize());
.size(searchSettings.getGlobalSettings().getMaxAggregateSize()));
if (!nullOrEmpty(agg.getField())) {
termsAgg.field(agg.getField());
}
if (!nullOrEmpty(agg.getScript())) {
termsAgg.script(new os.org.opensearch.script.Script(agg.getScript()));
}
searchSourceBuilder.aggregation(termsAgg);
} }
} }

View File

@ -40,6 +40,16 @@
"type": "terms", "type": "terms",
"field": "tags.tagFQN" "field": "tags.tagFQN"
}, },
{
"name": "classificationTags",
"type": "terms",
"script": "def classificationTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Classification') { classificationTags.add(tag.tagFQN); } } } return classificationTags;"
},
{
"name": "glossaryTags",
"type": "terms",
"script": "def glossaryTags = []; if (params._source.tags != null) { for (tag in params._source.tags) { if (tag.source == 'Glossary') { glossaryTags.add(tag.tagFQN); } } } return glossaryTags;"
},
{ {
"name": "certification.tagLabel.tagFQN", "name": "certification.tagLabel.tagFQN",
"type": "terms", "type": "terms",
@ -1369,6 +1379,11 @@
"name": "fqnParts_agg", "name": "fqnParts_agg",
"type": "terms", "type": "terms",
"field": "fqnParts" "field": "fqnParts"
},
{
"name": "domainType.keyword",
"type": "terms",
"field": "domainType.keyword"
} }
], ],
"scoreMode": "sum", "scoreMode": "sum",

View File

@ -85,7 +85,13 @@
} }
}, },
"domainType": { "domainType": {
"type": "text" "type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"name": { "name": {
"type": "text", "type": "text",

View File

@ -73,7 +73,13 @@
} }
}, },
"domainType": { "domainType": {
"type": "text" "type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"name": { "name": {
"type": "text", "type": "text",

View File

@ -104,7 +104,13 @@
} }
}, },
"domainType": { "domainType": {
"type": "text" "type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"name": { "name": {
"type": "text", "type": "text",

View File

@ -70,7 +70,13 @@
} }
}, },
"domainType": { "domainType": {
"type": "text" "type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}, },
"name": { "name": {
"type": "text", "type": "text",

View File

@ -380,9 +380,14 @@
"field": { "field": {
"type": "string", "type": "string",
"description": "The field on which this aggregation is performed." "description": "The field on which this aggregation is performed."
},
"script": {
"type": "string",
"description": "Optional script to apply on the terms aggregation.",
"default": ""
} }
}, },
"required": ["name", "type", "field"], "required": ["name", "type"],
"additionalProperties": false "additionalProperties": false
}, },
"allowedSearchFields": { "allowedSearchFields": {

View File

@ -49,11 +49,13 @@ export const mockEntitySearchConfig = {
name: 'database.displayName.keyword', name: 'database.displayName.keyword',
type: 'terms', type: 'terms',
field: 'database.displayName.keyword', field: 'database.displayName.keyword',
script: '',
}, },
{ {
name: 'databaseSchema.displayName.keyword', name: 'databaseSchema.displayName.keyword',
type: 'terms', type: 'terms',
field: 'databaseSchema.displayName.keyword', field: 'databaseSchema.displayName.keyword',
script: '',
}, },
], ],
termBoosts: [], termBoosts: [],

View File

@ -155,11 +155,15 @@ export interface Aggregation {
/** /**
* The field on which this aggregation is performed. * The field on which this aggregation is performed.
*/ */
field: string; field?: string;
/** /**
* A descriptive name for the aggregation. * A descriptive name for the aggregation.
*/ */
name: string; name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/** /**
* The type of aggregation to perform. * The type of aggregation to perform.
*/ */

View File

@ -123,11 +123,15 @@ export interface Aggregation {
/** /**
* The field on which this aggregation is performed. * The field on which this aggregation is performed.
*/ */
field: string; field?: string;
/** /**
* A descriptive name for the aggregation. * A descriptive name for the aggregation.
*/ */
name: string; name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/** /**
* The type of aggregation to perform. * The type of aggregation to perform.
*/ */

View File

@ -531,11 +531,15 @@ export interface Aggregation {
/** /**
* The field on which this aggregation is performed. * The field on which this aggregation is performed.
*/ */
field: string; field?: string;
/** /**
* A descriptive name for the aggregation. * A descriptive name for the aggregation.
*/ */
name: string; name: string;
/**
* Optional script to apply on the terms aggregation.
*/
script?: string;
/** /**
* The type of aggregation to perform. * The type of aggregation to perform.
*/ */