Add IAM config for natural language search (#20742)

* Add IAM config for natural language search

* Add IAM config for natural language search

* Add IAM config for natural language search

---------

Co-authored-by: Mohit Yadav <105265192+mohityadav766@users.noreply.github.com>
This commit is contained in:
Sriharsha Chintalapani 2025-04-16 11:03:47 -07:00 committed by GitHub
parent bb36a13735
commit 03abcb60f7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 149 additions and 11 deletions

View File

@ -309,6 +309,7 @@ elasticsearch:
modelId: ${AWS_BEDROCK_MODEL_ID:-""} modelId: ${AWS_BEDROCK_MODEL_ID:-""}
accessKey: ${AWS_BEDROCK_ACCESS_KEY:-""} accessKey: ${AWS_BEDROCK_ACCESS_KEY:-""}
secretKey: ${AWS_BEDROCK_SECRET_KEY:-""} secretKey: ${AWS_BEDROCK_SECRET_KEY:-""}
useIamRole: ${AWS_BEDROCK_USE_IAM:-"false"}
eventMonitoringConfiguration: eventMonitoringConfiguration:

View File

@ -881,9 +881,10 @@ public class ElasticSearchClient implements SearchClient {
String transformedQuery = nlqService.transformNaturalLanguageQuery(request, null); String transformedQuery = nlqService.transformNaturalLanguageQuery(request, null);
XContentParser parser = createXContentParser(transformedQuery); XContentParser parser = createXContentParser(transformedQuery);
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
searchSourceBuilder.from(request.getFrom());
searchSourceBuilder.size(request.getSize());
ElasticSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory(); ElasticSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex()); sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
LOG.debug("Transformed NLQ query: {}", transformedQuery); LOG.debug("Transformed NLQ query: {}", transformedQuery);
es.org.elasticsearch.action.search.SearchRequest searchRequest = es.org.elasticsearch.action.search.SearchRequest searchRequest =
new es.org.elasticsearch.action.search.SearchRequest(request.getIndex()); new es.org.elasticsearch.action.search.SearchRequest(request.getIndex());

View File

@ -537,6 +537,8 @@ public class OpenSearchClient implements SearchClient {
LOG.debug("Transformed NLQ query: {}", transformedQuery); LOG.debug("Transformed NLQ query: {}", transformedQuery);
XContentParser parser = createXContentParser(transformedQuery); XContentParser parser = createXContentParser(transformedQuery);
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser); SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
searchSourceBuilder.from(request.getFrom());
searchSourceBuilder.size(request.getSize());
OpenSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory(); OpenSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex()); sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
os.org.opensearch.action.search.SearchRequest searchRequest = os.org.opensearch.action.search.SearchRequest searchRequest =
@ -544,15 +546,15 @@ public class OpenSearchClient implements SearchClient {
searchRequest.source(searchSourceBuilder); searchRequest.source(searchSourceBuilder);
os.org.opensearch.action.search.SearchResponse response = os.org.opensearch.action.search.SearchResponse response =
client.search(searchRequest, os.org.opensearch.client.RequestOptions.DEFAULT); client.search(searchRequest, os.org.opensearch.client.RequestOptions.DEFAULT);
if (response.getHits().getTotalHits().value > 0) { if (response.getHits() != null
&& response.getHits().getTotalHits() != null
&& response.getHits().getTotalHits().value > 0) {
nlqService.cacheQuery(request.getQuery(), transformedQuery); nlqService.cacheQuery(request.getQuery(), transformedQuery);
} }
return Response.status(Response.Status.OK).entity(response.toString()).build(); return Response.status(Response.Status.OK).entity(response.toString()).build();
} }
} catch (Exception e) { } catch (Exception e) {
LOG.error("Error transforming or executing NLQ query: {}", e.getMessage(), e); LOG.error("Error transforming or executing NLQ query: {}", e.getMessage(), e);
// Try using the built-in OpenSearch NLQ feature as a first fallback
return fallbackToBasicSearch(request, subjectContext); return fallbackToBasicSearch(request, subjectContext);
} }
} else { } else {

View File

@ -1297,6 +1297,14 @@
{ {
"name": "fqnParts", "name": "fqnParts",
"description": "Search on individual parts of the fully qualified name, allowing more flexible matching on hierarchical components." "description": "Search on individual parts of the fully qualified name, allowing more flexible matching on hierarchical components."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with database."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with database."
} }
] ]
}, },
@ -1334,6 +1342,14 @@
{ {
"name": "fqnParts", "name": "fqnParts",
"description": "Search on individual parts of the fully qualified name for more flexible hierarchical matching." "description": "Search on individual parts of the fully qualified name for more flexible hierarchical matching."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with database schema."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with database schema."
} }
] ]
}, },
@ -1395,6 +1411,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified tables by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified tables by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with table."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with table."
} }
] ]
}, },
@ -1444,6 +1468,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified storedProcedures by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified storedProcedures by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with stored procedure."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with stored procedure."
} }
] ]
}, },
@ -1489,6 +1521,14 @@
{ {
"name": "queryText.ngram", "name": "queryText.ngram",
"description": "Partial matching on query text to find queries containing specific SQL fragments." "description": "Partial matching on query text to find queries containing specific SQL fragments."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with query."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with query."
} }
] ]
}, },
@ -1574,6 +1614,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified topics by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified topics by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with topic."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with topic."
} }
] ]
}, },
@ -1651,6 +1699,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified dashboards by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified dashboards by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with dashboard."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with dashboard."
} }
] ]
}, },
@ -1720,6 +1776,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified dashboard data models by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified dashboard data models by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with dashboard data model."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with dashboard data model."
} }
] ]
}, },
@ -1777,6 +1841,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified pipelines by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified pipelines by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with pipeline."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with pipeline."
} }
] ]
}, },
@ -1834,6 +1906,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified mlModels by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified mlModels by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with mlmodel."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with mlmodel."
} }
] ]
}, },
@ -1873,11 +1953,11 @@
"description": "Search on parts of the hierarchical name for flexible matching." "description": "Search on parts of the hierarchical name for flexible matching."
}, },
{ {
"name": "mlFeatures.name", "name": "searchIndex.responseSchema.name",
"description": "Search on the names of fields indexed in the search index." "description": "Search on the names of fields indexed in the search index."
}, },
{ {
"name": "mlFeatures.description", "name": "searchIndex.responseSchema.description",
"description": "Search on field descriptions to find search indexes with fields serving specific purposes." "description": "Search on field descriptions to find search indexes with fields serving specific purposes."
}, },
{ {
@ -1891,6 +1971,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified searchIndexes by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified searchIndexes by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with search index."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with search index."
} }
] ]
}, },
@ -1960,6 +2048,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified containers by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified containers by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with container."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with container."
} }
] ]
}, },
@ -2025,6 +2121,14 @@
{ {
"name": "certification.tagLabel.tagFQN.text", "name": "certification.tagLabel.tagFQN.text",
"description": "Search within parts of certification tag names. Useful for finding certified apiEndpoint by partial certification name." "description": "Search within parts of certification tag names. Useful for finding certified apiEndpoint by partial certification name."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with apiEndpoint."
},
{
"name": "dataProducts.displayName.keyword",
"description": "Exact match on dataProducts associated with apiEndpoint."
} }
] ]
}, },
@ -2201,6 +2305,14 @@
{ {
"name": "glossary.displayName", "name": "glossary.displayName",
"description": "Search on the display name of the parent glossary." "description": "Search on the display name of the parent glossary."
},
{
"name": "domain.name.keyword",
"description": "Exact match on domain associated with table."
},
{
"name": "domain.displayName.keyword",
"description": "Exact match on domain associated with glossary term."
} }
] ]
}, },
@ -2270,29 +2382,42 @@
} }
], ],
"nlqConfiguration": { "nlqConfiguration": {
"promptTemplate": "You are an assistant that translates natural language queries into OpenSearch queries.\n\n{{INSTRUCTIONS}}\n\nUSER QUERY: {{QUERY}}\n\nGenerate a valid OpenSearch JSON query object. Return ONLY the JSON query with no explanations.", "promptTemplate": "You are an expert OpenSearch query generator specializing in the OpenMetadata schema. Your primary task is to translate natural language queries into precise OpenSearch JSON queries based *strictly* on the provided FIELD SUMMARY and INTERPRETATION GUIDELINES. Pay close attention to field descriptions to select the correct fields and query types.\n\n{{INSTRUCTIONS}}\n\nUSER QUERY: {{QUERY}}\n\nGenerate a valid OpenSearch JSON query object. Return ONLY the JSON query with no explanations.",
"globalInstructions": [ "globalInstructions": [
{
"section": "QUERY GENERATION PROCESS",
"content": "1. Analyze the USER QUERY to identify key entities, concepts (like domain, owner, tags, data product), and filter values.\n2. Consult the provided FIELD SUMMARY for the detected entity type.\n3. Select the MOST APPROPRIATE field(s) from the summary based on their descriptions.\n4. Determine the correct query type (`match` for flexible/text search, `term` for exact/keyword search) based on the field description and the guidelines below.\n5. Construct the OpenSearch query JSON using ONLY the selected fields and types.",
"order": 1
},
{ {
"section": "COMMON MISTAKES TO AVOID", "section": "COMMON MISTAKES TO AVOID",
"content": "1. Missing entityType filter when type is mentioned\n2. Using nested queries for non-nested fields\n3. Incorrect field names\n\nThe following examples show INCORRECT patterns followed by the CORRECT version:", "content": "1. Missing entityType filter when type is mentioned\n2. Using nested queries for non-nested fields\n3. Incorrect field names\n\nThe following examples show INCORRECT patterns followed by the CORRECT version:",
"order": 5 "order": 5
}, },
{ {
"section": "INCORRECT VS CORRECT PATTERNS", "section": "INCORRECT VS CORRECT PATTERNS",
"content": "❌ INCORRECT: Tables query missing entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n✅ CORRECT: Tables query with entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n❌ INCORRECT: Using nested query for columns\n{\"bool\": {\"must\": [{\"nested\": {\"path\": \"columns\", \"query\": {\"match\": {\"columns.name\": \"SKU\"}}}}]}}\n\n✅ CORRECT: Direct match query for columns\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}\n\n❌ INCORRECT: Using owner.name (singular)\n{\"term\": {\"owner.name\": \"marketing\"}}\n\n✅ CORRECT: Using owners.name (plural)\n{\"term\": {\"owners.name\": \"marketing\"}}\n\n❌ INCORRECT: Using tags for tier\n{\"term\": {\"tags.tagFQN\": \"Tier1\"}}\n\n✅ CORRECT: Using tier.tagFQN with proper format\n{\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}", "content": "❌ INCORRECT: Tables query missing entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n✅ CORRECT: Tables query with entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n❌ INCORRECT: Using nested query for columns\n{\"bool\": {\"must\": [{\"nested\": {\"path\": \"columns\", \"query\": {\"match\": {\"columns.name\": \"SKU\"}}}}]}}\n\n✅ CORRECT: Direct match query for columns\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}\n\n❌ INCORRECT: Using owner.name (singular)\n{\"term\": {\"owner.name\": \"marketing\"}}\n\n✅ CORRECT: Using owners.name (plural)\n{\"term\": {\"owners.name\": \"marketing\"}}\n\n❌ INCORRECT: Using tags for tier\n{\"term\": {\"tags.tagFQN\": \"Tier1\"}}\n\n✅ CORRECT: Using tier.tagFQN with proper format\n{\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}",
"order": 6 "order": 6
}, },
{
"section": "QUERY TYPE SELECTION (CRITICAL: Filtering vs. Searching)",
"content": "- **FOR FILTERING by specific values** (e.g., domain name, owner name, tag FQN, tier, specific ID): ALWAYS prefer a `term` query on the corresponding `.keyword` field (e.g., `domain.displayName.keyword`, `owners.displayName.keyword`, `tags.tagFQN`, `tier.tagFQN`). Check the FIELD SUMMARY for available `.keyword` variants.\n- **FOR SEARCHING within text** (e.g., descriptions, general names where partial matches are okay): Use a `match` query on the base text field (e.g., `description`, `name`).\n- Assume `.keyword` fields with normalizers handle case-insensitivity for `term` queries.",
"order": 10
},
{ {
"section": "CRITICAL FIELD CORRECTIONS", "section": "CRITICAL FIELD CORRECTIONS",
"content": "1. Use 'owners.name' (plural) NOT 'owner.name' (singular)\n2. Use 'tags.tagFQN' NOT just 'tags' for regular tags\n3. Use 'tier.tagFQN' NOT 'tags.tagFQN' for tier-related queries\n4. Top-level fields like 'owners', 'tags', 'tier' do NOT need nested queries\n5. Only use nested queries for 'columns', 'charts', 'tasks', etc.", "content": "1. Use 'owners.name' (plural) NOT 'owner.name' (singular)\n2. Use 'tags.tagFQN' NOT just 'tags' for regular tags\n3. Use 'tier.tagFQN' NOT 'tags.tagFQN' for tier-related queries\n4. Top-level fields like 'owners', 'tags', 'tier' do NOT need nested queries\n5. Only use nested queries for 'columns', 'charts', 'tasks', etc.",
"order": 10 "order": 11
},
{
"section": "AMBIGUITY RESOLUTION",
"content": "- If the user query mentions 'domain', verify if the FIELD SUMMARY lists a specific 'domain' field (like `domain.displayName.keyword`). If yes, use it. If not, or if context suggests otherwise, reconsider. DO NOT default to `service.name` for business domains.\n- If the user query mentions 'data product', verify if the FIELD SUMMARY lists a specific 'data product' field (like `dataProducts.displayName`). If yes, use it.\n- For general terms, use the field description in the FIELD SUMMARY to determine the best fit (e.g., prefer `displayName` for user-friendly name searches over raw `name`).",
"order": 14
}, },
{ {
"section": "ENTITY TYPE FILTERING", "section": "ENTITY TYPE FILTERING",
"content": "When a user specifies a particular entity type (e.g., 'tables', 'dashboards', 'pipelines'):\n1. ALWAYS add a term filter for 'entityType' with the SINGULAR form\n2. For example: If query mentions 'tables', add {\"term\": {\"entityType\": \"table\"}} (not 'tables')\n3. For example: If query mentions 'dashboards', add {\"term\": {\"entityType\": \"dashboard\"}} (not 'dashboards')\n4. This MUST be combined with any other filters (like tags, owners, etc.) using a 'must' clause", "content": "When a user specifies a particular entity type (e.g., 'tables', 'dashboards', 'pipelines'):\n1. ALWAYS add a term filter for 'entityType' with the SINGULAR form\n2. For example: If query mentions 'tables', add {\"term\": {\"entityType\": \"table\"}} (not 'tables')\n3. For example: If query mentions 'dashboards', add {\"term\": {\"entityType\": \"dashboard\"}} (not 'dashboards')\n4. This MUST be combined with any other filters (like tags, owners, etc.) using a 'must' clause",
"order": 12 "order": 14
}, },
{ {
"section": "TIER TAG FORMATTING", "section": "TIER TAG FORMATTING",

View File

@ -128,6 +128,11 @@
"secretKey": { "secretKey": {
"description": "AWS secret key for Bedrock service authentication", "description": "AWS secret key for Bedrock service authentication",
"type": "string" "type": "string"
},
"useIamRole": {
"description": "Set to true to use IAM role based authentication instead of access/secret keys.",
"type": "boolean",
"default": false
} }
}, },
"additionalProperties": false "additionalProperties": false

View File

@ -119,6 +119,10 @@ export interface Bedrock {
* AWS secret key for Bedrock service authentication * AWS secret key for Bedrock service authentication
*/ */
secretKey?: string; secretKey?: string;
/**
* Set to true to use IAM role based authentication instead of access/secret keys.
*/
useIamRole?: boolean;
} }
/** /**