Added better Searching via Claude tool (#23343)

(cherry picked from commit 455d9cd68cb96f6f26a80eae7e5ef85ccf26a37e)
This commit is contained in:
Mohit Yadav 2025-09-12 18:55:24 +05:30 committed by mohitdeuex
parent eab715d279
commit c6f17ee84a
14 changed files with 274 additions and 326 deletions

View File

@ -130,7 +130,6 @@ public class McpServer implements McpServerProvider {
private McpStatelessServerFeatures.SyncPromptSpecification getPrompt(McpSchema.Prompt prompt) { private McpStatelessServerFeatures.SyncPromptSpecification getPrompt(McpSchema.Prompt prompt) {
return new McpStatelessServerFeatures.SyncPromptSpecification( return new McpStatelessServerFeatures.SyncPromptSpecification(
prompt, prompt,
(exchange, arguments) -> (exchange, arguments) -> promptsContext.callPrompt(jwtFilter, prompt.name(), arguments));
promptsContext.callPrompt(jwtFilter, prompt.name(), arguments).getResult());
} }
} }

View File

@ -16,7 +16,7 @@ public class DefaultPromptsContext {
return getPrompts(promptFilePath); return getPrompts(promptFilePath);
} }
public WrappedGetPromptResult callPrompt( public McpSchema.GetPromptResult callPrompt(
JwtFilter jwtFilter, String promptName, McpSchema.GetPromptRequest promptRequest) { JwtFilter jwtFilter, String promptName, McpSchema.GetPromptRequest promptRequest) {
Map<String, Object> params = promptRequest.arguments(); Map<String, Object> params = promptRequest.arguments();
CatalogSecurityContext securityContext = CatalogSecurityContext securityContext =
@ -25,7 +25,7 @@ public class DefaultPromptsContext {
"Catalog Principal: {} is trying to call the prompt: {}", "Catalog Principal: {} is trying to call the prompt: {}",
securityContext.getUserPrincipal().getName(), securityContext.getUserPrincipal().getName(),
promptName); promptName);
WrappedGetPromptResult result; McpSchema.GetPromptResult result;
try { try {
switch (promptName) { switch (promptName) {
case "create-greeting": case "create-greeting":
@ -35,17 +35,14 @@ public class DefaultPromptsContext {
result = new SearchPrompt().callPrompt(promptRequest); result = new SearchPrompt().callPrompt(promptRequest);
break; break;
default: default:
return new WrappedGetPromptResult( return new McpSchema.GetPromptResult("error", new ArrayList<>());
new McpSchema.GetPromptResult("error", new ArrayList<>()), true);
} }
return result; return result;
} catch (Exception ex) { } catch (Exception ex) {
LOG.error("Error executing tool: {}", ex.getMessage()); LOG.error("Error executing tool: {}", ex.getMessage());
return new WrappedGetPromptResult( return new McpSchema.GetPromptResult(
new McpSchema.GetPromptResult( String.format("Error executing tool: %s", ex.getMessage()), new ArrayList<>());
String.format("Error executing tool: %s", ex.getMessage()), new ArrayList<>()),
false);
} }
} }
} }

View File

@ -6,15 +6,13 @@ import java.util.List;
public class GreetingsPrompt implements McpPrompt { public class GreetingsPrompt implements McpPrompt {
@Override @Override
public WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) { public McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
return new WrappedGetPromptResult( return new McpSchema.GetPromptResult(
new McpSchema.GetPromptResult( null,
null, List.of(
List.of( new McpSchema.PromptMessage(
new McpSchema.PromptMessage( McpSchema.Role.ASSISTANT,
McpSchema.Role.ASSISTANT, new McpSchema.TextContent(
new McpSchema.TextContent( "Please generate a greeting in ${style} style to ${name}."))));
"Please generate a greeting in ${style} style to ${name}.")))),
false);
} }
} }

View File

@ -3,5 +3,5 @@ package org.openmetadata.mcp.prompts;
import io.modelcontextprotocol.spec.McpSchema; import io.modelcontextprotocol.spec.McpSchema;
public interface McpPrompt { public interface McpPrompt {
WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest); McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest);
} }

View File

@ -6,7 +6,7 @@ import java.util.Map;
public class SearchPrompt implements McpPrompt { public class SearchPrompt implements McpPrompt {
@Override @Override
public WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) { public McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
Map<String, Object> params = promptRequest.arguments(); Map<String, Object> params = promptRequest.arguments();
String query = (String) params.get("query"); String query = (String) params.get("query");
int limit = 10; int limit = 10;
@ -19,16 +19,14 @@ public class SearchPrompt implements McpPrompt {
} }
} }
String entityType = (String) params.get("entity_type"); String entityType = (String) params.get("entity_type");
return new WrappedGetPromptResult( return new McpSchema.GetPromptResult(
new McpSchema.GetPromptResult( "Message can be used to get search results",
"Message can be used to get search results", List.of(
List.of( new McpSchema.PromptMessage(
new McpSchema.PromptMessage( McpSchema.Role.ASSISTANT,
McpSchema.Role.ASSISTANT, new McpSchema.TextContent(
new McpSchema.TextContent( String.format(
String.format( "Search for `%s` in OpenMetadata where entity type is `%s` and with a limit of `%s` . Summarise the information for all the results properly and also make sure to provide clickable links using the href field from the results.",
"Search for `%s` in OpenMetadata where entity type is `%s` and with a limit of `%s` . Summarise the information for all the results properly and also make sure to provide clickable links using the href field from the results.", query, entityType, limit)))));
query, entityType, limit))))),
false);
} }
} }

View File

@ -1,17 +0,0 @@
package org.openmetadata.mcp.prompts;
import io.modelcontextprotocol.spec.McpSchema;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class WrappedGetPromptResult {
private McpSchema.GetPromptResult result;
private boolean isError;
public WrappedGetPromptResult(McpSchema.GetPromptResult result, boolean isError) {
this.result = result;
this.isError = isError;
}
}

View File

@ -1,9 +1,14 @@
package org.openmetadata.mcp.tools; package org.openmetadata.mcp.tools;
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
import static org.openmetadata.service.search.SearchUtil.mapEntityTypesToIndexNames; import static org.openmetadata.service.search.SearchUtil.mapEntityTypesToIndexNames;
import static org.openmetadata.service.security.DefaultAuthorizer.getSubjectContext; import static org.openmetadata.service.security.DefaultAuthorizer.getSubjectContext;
import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import es.org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import es.org.elasticsearch.xcontent.XContentParser;
import es.org.elasticsearch.xcontent.XContentType;
import jakarta.ws.rs.core.Response; import jakarta.ws.rs.core.Response;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -17,6 +22,7 @@ import org.openmetadata.schema.search.SearchRequest;
import org.openmetadata.schema.utils.JsonUtils; import org.openmetadata.schema.utils.JsonUtils;
import org.openmetadata.service.Entity; import org.openmetadata.service.Entity;
import org.openmetadata.service.limits.Limits; import org.openmetadata.service.limits.Limits;
import org.openmetadata.service.search.elasticsearch.EsUtils;
import org.openmetadata.service.security.Authorizer; import org.openmetadata.service.security.Authorizer;
import org.openmetadata.service.security.auth.CatalogSecurityContext; import org.openmetadata.service.security.auth.CatalogSecurityContext;
import org.openmetadata.service.security.policyevaluator.SubjectContext; import org.openmetadata.service.security.policyevaluator.SubjectContext;
@ -86,17 +92,30 @@ public class SearchMetadataTool implements McpTool {
throws IOException { throws IOException {
LOG.info("Executing searchMetadata with params: {}", params); LOG.info("Executing searchMetadata with params: {}", params);
String query = params.containsKey("query") ? (String) params.get("query") : "*"; String query = params.containsKey("query") ? (String) params.get("query") : "*";
int limit = 10; String entityType = params.containsKey("entityType") ? (String) params.get("entityType") : null;
if (params.containsKey("limit")) { String index = entityType == null ? "dataAsset" : mapEntityTypesToIndexNames(entityType);
Object limitObj = params.get("limit");
int size = 10;
if (params.containsKey("size")) {
Object limitObj = params.get("size");
if (limitObj instanceof Number number) { if (limitObj instanceof Number number) {
limit = number.intValue(); size = number.intValue();
} else if (limitObj instanceof String string) { } else if (limitObj instanceof String string) {
limit = Integer.parseInt(string); size = Integer.parseInt(string);
} }
} }
limit = Math.min(limit, 50); int from = 0;
if (params.containsKey("from")) {
Object limitObj = params.get("from");
if (limitObj instanceof Number number) {
from = number.intValue();
} else if (limitObj instanceof String string) {
from = Integer.parseInt(string);
}
}
size = Math.min(size, 50);
boolean includeDeleted = false; boolean includeDeleted = false;
if (params.containsKey("include_deleted")) { if (params.containsKey("include_deleted")) {
@ -120,28 +139,63 @@ public class SearchMetadataTool implements McpTool {
} }
} }
String entityType = String queryFilter = null;
params.containsKey("entity_type") ? (String) params.get("entity_type") : null; if (params.containsKey("queryFilter")) {
String index = mapEntityTypesToIndexNames(entityType); queryFilter = (String) params.get("queryFilter");
JsonNode queryNode = JsonUtils.getObjectMapper().readTree(queryFilter);
if (!queryNode.has("query")) {
ObjectNode queryWrapper = JsonUtils.getObjectMapper().createObjectNode();
queryWrapper.set("query", queryNode);
queryFilter = JsonUtils.pojoToJson(queryWrapper);
} else {
queryFilter = JsonUtils.pojoToJson(queryNode);
}
LOG.debug("Applied query filter to query: {}", queryFilter);
} else {
}
LOG.info( LOG.info(
"Search query: {}, index: {}, limit: {}, includeDeleted: {}", "Search query: {}, index: {}, limit: {}, includeDeleted: {}",
query, queryFilter,
index, index,
limit, size,
includeDeleted); includeDeleted);
SearchRequest searchRequest = SearchRequest searchRequest;
new SearchRequest() if (!nullOrEmpty(queryFilter)) {
.withQuery(query) // When queryFilter is provided, use it directly as it's already a transformed OpenSearch
.withIndex(Entity.getSearchRepository().getIndexOrAliasName(index)) // query
.withSize(limit) searchRequest =
.withFrom(0) new SearchRequest()
.withFetchSource(true) .withIndex(Entity.getSearchRepository().getIndexOrAliasName(index))
.withDeleted(includeDeleted); .withQueryFilter(queryFilter)
.withSize(size)
.withFrom(from)
.withFetchSource(true)
.withDeleted(includeDeleted);
} else {
// Fallback to basic query when no queryFilter is provided
searchRequest =
new SearchRequest()
.withQuery(query)
.withIndex(Entity.getSearchRepository().getIndexOrAliasName(index))
.withSize(size)
.withFrom(from)
.withFetchSource(true)
.withDeleted(includeDeleted);
}
SubjectContext subjectContext = getSubjectContext(securityContext); SubjectContext subjectContext = getSubjectContext(securityContext);
Response response = Entity.getSearchRepository().search(searchRequest, subjectContext); Response response;
if (!nullOrEmpty(queryFilter)) {
// Use direct query method when queryFilter is provided since it's already a transformed query
response = Entity.getSearchRepository().searchWithDirectQuery(searchRequest, subjectContext);
} else {
// Use regular search for basic queries
response = Entity.getSearchRepository().search(searchRequest, subjectContext);
}
Map<String, Object> searchResponse; Map<String, Object> searchResponse;
if (response.getEntity() instanceof String responseStr) { if (response.getEntity() instanceof String responseStr) {
@ -153,7 +207,7 @@ public class SearchMetadataTool implements McpTool {
searchResponse = JsonUtils.convertValue(response.getEntity(), Map.class); searchResponse = JsonUtils.convertValue(response.getEntity(), Map.class);
} }
return buildEnhancedSearchResponse(searchResponse, query, limit, requestedFields); return buildEnhancedSearchResponse(searchResponse, query, size, requestedFields);
} }
@Override @Override
@ -181,31 +235,29 @@ public class SearchMetadataTool implements McpTool {
} }
List<Object> hits = safeGetList(topHits.get("hits")); List<Object> hits = safeGetList(topHits.get("hits"));
if (hits == null || hits.isEmpty()) {
return createEmptyResponse();
}
List<Map<String, Object>> cleanedResults = new ArrayList<>(); List<Map<String, Object>> cleanedResults = new ArrayList<>();
int totalResults = 0; int totalResults = 0;
if (hits != null && !hits.isEmpty()) {
if (topHits.get("total") instanceof Map) { if (topHits.get("total") instanceof Map) {
Map<String, Object> totalObj = safeGetMap(topHits.get("total")); Map<String, Object> totalObj = safeGetMap(topHits.get("total"));
if (totalObj != null && totalObj.get("value") instanceof Number) { if (totalObj != null && totalObj.get("value") instanceof Number) {
totalResults = ((Number) totalObj.get("value")).intValue(); totalResults = ((Number) totalObj.get("value")).intValue();
}
} else if (topHits.get("total") instanceof Number) {
totalResults = ((Number) topHits.get("total")).intValue();
} }
} else if (topHits.get("total") instanceof Number) {
totalResults = ((Number) topHits.get("total")).intValue();
}
for (Object hitObj : hits) { for (Object hitObj : hits) {
Map<String, Object> hit = safeGetMap(hitObj); Map<String, Object> hit = safeGetMap(hitObj);
if (hit == null) continue; if (hit == null) continue;
Map<String, Object> source = safeGetMap(hit.get("_source")); Map<String, Object> source = safeGetMap(hit.get("_source"));
if (source == null) continue; if (source == null) continue;
Map<String, Object> cleanedSource = cleanSearchResult(source, requestedFields); Map<String, Object> cleanedSource = cleanSearchResult(source, requestedFields);
cleanedResults.add(cleanedSource); cleanedResults.add(cleanedSource);
}
} }
Map<String, Object> result = new HashMap<>(); Map<String, Object> result = new HashMap<>();
@ -214,11 +266,16 @@ public class SearchMetadataTool implements McpTool {
result.put("returnedCount", cleanedResults.size()); result.put("returnedCount", cleanedResults.size());
result.put("query", query); result.put("query", query);
// Add aggregations if present in search response
if (searchResponse.containsKey("aggregations")) {
result.put("aggregations", searchResponse.get("aggregations"));
}
if (totalResults > requestedLimit) { if (totalResults > requestedLimit) {
result.put( result.put(
"message", "message",
String.format( String.format(
"Found %d total results, showing first %d. Use pagination or refine your search for more specific results.", "Found %d total results, showing first %d. Use pagination or refine your search for more specific results, you can call these 3 times by yourself with pagination , and then only if the user ask for more paginate.",
totalResults, cleanedResults.size())); totalResults, cleanedResults.size()));
result.put("hasMore", true); result.put("hasMore", true);
} }
@ -239,11 +296,18 @@ public class SearchMetadataTool implements McpTool {
// Add any specifically requested additional fields // Add any specifically requested additional fields
for (String field : requestedFields) { for (String field : requestedFields) {
if (source.containsKey(field) && !ESSENTIAL_FIELDS_ONLY.contains(field)) { if (source.containsKey(field)) {
result.put(field, source.get(field)); result.put(field, source.get(field));
} }
} }
// Cleanup Description in case of huge description
if (result.containsKey("description")) {
String description = (String) result.get("description");
if (description.length() > 3000) {
result.put("description", description.substring(0, 300) + "...");
}
}
return result; return result;
} }
@ -271,4 +335,15 @@ public class SearchMetadataTool implements McpTool {
private static List<Object> safeGetList(Object obj) { private static List<Object> safeGetList(Object obj) {
return (obj instanceof List) ? (List<Object>) obj : null; return (obj instanceof List) ? (List<Object>) obj : null;
} }
private XContentParser createXContentParser(String query) throws IOException {
try {
return XContentType.JSON
.xContent()
.createParser(EsUtils.esXContentRegistry, LoggingDeprecationHandler.INSTANCE, query);
} catch (IOException e) {
LOG.error("Failed to create XContentParser", e);
throw e;
}
}
} }

View File

@ -2,30 +2,76 @@
"tools": [ "tools": [
{ {
"name": "search_metadata", "name": "search_metadata",
"description": "Find your data and business terms in OpenMetadata. For example if the user asks to 'find tables that contain customers information', then 'customers' should be the query, and the entity_type should be 'table'. Here make sure to use 'Href' is available in result to create a hyperlink to the entity in OpenMetadata.", "description": "Find your data and business terms in OpenMetadata. This tool automatically generates optimized OpenSearch queries based on natural language input and searches the OpenMetadata catalog for tables, dashboards, topics, pipelines, and other data assets.",
"parameters": { "parameters": {
"description": "The search query to find metadata in the OpenMetadata catalog, entity type could be table, topic etc. Limit can be used to paginate on the data.", "description": "Search for metadata in the OpenMetadata catalog using natural language queries. The tool automatically converts your search intent into optimized OpenSearch queries with proper filtering.",
"type": "object", "type": "object",
"properties": { "properties": {
"query": { "query": {
"type": "string", "type": "string",
"description": "Keywords to use for searching." "description": "Optional , Only specify if queryFilter is not passed .Natural language search query that will be converted to OpenSearch syntax in queryFilter. Examples: 'tables owned by marketing', 'dashboards with customer data', 'find all tier1 tables', 'tables with column SKU', 'count BigQuery services', 'tables tagged with PII.Sensitive'."
}, },
"entity_type": { "entityType": {
"type": "string", "type": "string",
"description": "Optional entity type to filter results. The OpenMetadata entities are categorized as follows: Service Entities include databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, and searchService; Data Asset Entities include apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, and container; User Entities include user and team; Domain entities include domain and dataProduct; and Governance entities include metric, glossary, and glossaryTerm." "description": "Optional entity type to filter results, only specify if queryFilter is not passed. If not specified, will be auto-detected from the query. Available types: Service Entities (databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, searchService), Data Asset Entities (apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, container), User Entities (user, team), Domain entities (domain, dataProduct), Governance entities (metric, glossary, glossaryTerm)."
}, },
"limit": { "queryFilter": {
"type": "string",
"description": "REQUIRED: OpenSearch JSON query filter that you must generate based on the natural language query. Use the comprehensive guidelines below to create the proper OpenSearch query.\n\n=== QUERY GENERATION INSTRUCTIONS ===\n\nYou are an expert OpenSearch query generator for OpenMetadata. Your task is to translate the natural language query into a precise OpenSearch JSON query.\n\n## QUERY TYPE DETECTION\nFirst determine if the user wants:\n1. **SEARCH QUERY** - Finding specific entities (e.g., \"find tables owned by marketing\")\n2. **AGGREGATION QUERY** - Counting, grouping, statistics (e.g., \"how many BigQuery services\", \"count assets by team\")\n3. **BOTH** - Search and aggregation combined\n\n## FIELD MAPPING FOR TABLES\n- displayName.keyword: Exact match on table display name\n- name: Text analysis on table name with tokenization\n- name.ngram: Partial matching using n-grams\n- description: Full-text search on descriptions\n- fullyQualifiedName: Complete hierarchical name search\n- columns.name.keyword: Exact match on column names\n- columns.description: Full-text search on column descriptions\n- tags.tagFQN.text: Search within tag name parts\n- tier.tagFQN.text: Search within tier classification parts\n- domain.displayName.keyword: Exact match on domain\n- dataProducts.displayName.keyword: Exact match on data products\n- owners.name: Owner name (use 'owners' plural, not 'owner')\n- service.name: Service name filtering\n- extension.*: Custom properties (e.g., extension.businessOwner.name)\n\n## CRITICAL QUERY RULES\n1. **Entity Type Filtering**: Always add {\"term\": {\"entityType\": \"table\"}} when searching tables (use SINGULAR form)\n2. **Field Selection**: Use 'term' for exact matches (.keyword fields), 'match' for text search\n3. **Owners Field**: Use 'owners.name' (plural) NOT 'owner.name'\n4. **Tier Format**: Use \"Tier.Tier1\", \"Tier.Tier2\", \"Tier.Tier3\" format\n5. **Tags**: Use 'tags.tagFQN' for tag filtering\n6. **Columns**: Use direct match queries, NOT nested (columns are NOT nested)\n7. **Custom Properties**: Use 'extension.propertyName' pattern\n\n## EXAMPLE QUERY PATTERNS\n\n**Tables owned by marketing:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"marketing\"}}]}}\n```\n\n**Tables with customer data:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"customer\"}}]}}\n```\n\n**Tier1 tables:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}]}}\n```\n\n**Tables with PII tag:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}]}}\n```\n\n**Count query - How many BigQuery services:**\n```json\n{\"size\": 0, \"query\": {\"bool\": {\"must\": [{\"term\": {\"entityType\": \"databaseService\"}}, {\"term\": {\"serviceType\": \"BigQuery\"}}]}}, \"aggs\": {\"total_count\": {\"value_count\": {\"field\": \"_id\"}}}}\n```\n\n**Tables with business owner infrastructure:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"extension.businessOwner.name\": \"infrastructure\"}}]}}\n```\n\n## AGGREGATION FIELDS\nFor counting/grouping use these aggregation fields:\n- serviceType, service.displayName.keyword, entityType\n- tier.tagFQN, owners.displayName.keyword, domain.displayName.keyword\n- tags.tagFQN, database.name.keyword, databaseSchema.name.keyword\n\n## IMPORTANT MISTAKES TO AVOID\n❌ Missing entityType filter when type mentioned\n❌ Using 'owner.name' instead of 'owners.name'\n❌ Using nested queries for columns (columns are NOT nested)\n❌ Wrong tier format (use \"Tier.Tier1\" not \"tier1\")\n❌ Using 'tags.tagFQN' for tier (use 'tier.tagFQN')\n\n**Generate the complete OpenSearch JSON query and return ONLY the JSON object.**"
},
"from": {
"type": "integer", "type": "integer",
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more." "description": "In case the request more than 'size' results, this is the offset from the first result you want to fetch. Default is 0.",
"default": 10
},
"size": {
"type": "integer",
"description": "Number of results to return. Default is 10, we can iteratively page through results by increasing 'from'. Maximum allowed is 50.",
"default": 10
}, },
"fields": { "fields": {
"type": "string", "type": "string",
"description": "Comma-separated list of additional fields to include in the response. By default, returns essential fields: name, displayName, fullyQualifiedName, description, entityType, service, database, databaseSchema, serviceType, href, tags, owners, tier, tableType, columnNames. Available additional fields by entity type:\n\nTable entities: columns, schemaDefinition, queries, upstreamLineage, entityRelationship\nTopic entities: messageSchema, partitions, replicationFactor\nDashboard entities: charts, dataModels, project\nPipeline entities: tasks, pipelineUrl, scheduleInterval\nAll entities: createdAt, updatedAt, changeDescription, extension, domain, dataProducts, lifeCycle, sourceHash\n\nExample: 'columns,queries' to include column details and sample queries for tables." "description": "Comma-separated additional fields to include. Default returns: name, displayName, fullyQualifiedName, description, entityType, service, database, databaseSchema, serviceType, href, tags, owners, tier, tableType, columnNames.\n\nAdditional fields by entity type:\n- Table entities: columns, schemaDefinition, queries, upstreamLineage, entityRelationship\n- Topic entities: messageSchema, partitions, replicationFactor \n- Dashboard entities: charts, dataModels, project\n- Pipeline entities: tasks, pipelineUrl, scheduleInterval\n- All entities: createdAt, updatedAt, changeDescription, extension, domain, dataProducts, lifeCycle, sourceHash\n\nExample: 'columns,queries' for table column details and sample queries."
} }
}, },
"required": [ "required": ["queryFilter"],
"query" "examples": [
{
"description": "Find tables owned by marketing team",
"query": "tables owned by marketing",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"marketing\"}}]}}",
"entityType": "table"
},
{
"description": "Search for customer data across all tables",
"query": "tables with customer data",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"customer\"}}]}}"
},
{
"description": "Find all Tier 1 classified tables",
"query": "tables with tier1 classification",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}]}}"
},
{
"description": "Count BigQuery services",
"query": "how many BigQuery services are there",
"queryFilter": "{\"size\": 0, \"query\": {\"bool\": {\"must\": [{\"term\": {\"entityType\": \"databaseService\"}}, {\"term\": {\"serviceType\": \"BigQuery\"}}]}}, \"aggs\": {\"total_count\": {\"value_count\": {\"field\": \"_id\"}}}}"
},
{
"description": "Find tables with specific column",
"query": "tables with column SKU",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}"
},
{
"description": "Search by tags",
"query": "tables tagged with PII.Sensitive",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}]}}"
},
{
"description": "Custom property search",
"query": "tables where business owner is infrastructure",
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"extension.businessOwner.name\": \"infrastructure\"}}]}}"
}
] ]
} }
}, },

View File

@ -306,6 +306,9 @@ public interface SearchClient<T> {
Response searchWithNLQ(SearchRequest request, SubjectContext subjectContext) throws IOException; Response searchWithNLQ(SearchRequest request, SubjectContext subjectContext) throws IOException;
Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
throws IOException;
Response getDocByID(String indexName, String entityId) throws IOException; Response getDocByID(String indexName, String entityId) throws IOException;
default ExecutorService getAsyncExecutor() { default ExecutorService getAsyncExecutor() {

View File

@ -1197,6 +1197,11 @@ public class SearchRepository {
return searchClient.searchWithNLQ(request, subjectContext); return searchClient.searchWithNLQ(request, subjectContext);
} }
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
throws IOException {
return searchClient.searchWithDirectQuery(request, subjectContext);
}
public Response getDocument(String indexName, UUID entityId) throws IOException { public Response getDocument(String indexName, UUID entityId) throws IOException {
return searchClient.getDocByID(indexName, entityId.toString()); return searchClient.getDocByID(indexName, entityId.toString());
} }

View File

@ -905,6 +905,40 @@ public class ElasticSearchClient implements SearchClient<RestHighLevelClient> {
} }
} }
@Override
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
throws IOException {
LOG.info("Executing direct OpenSearch query: {}", request.getQueryFilter());
try {
XContentParser parser = createXContentParser(request.getQueryFilter());
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
searchSourceBuilder.from(request.getFrom());
searchSourceBuilder.size(request.getSize());
// Apply RBAC constraints
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
// Add aggregations if needed
ElasticSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
es.org.elasticsearch.action.search.SearchRequest esRequest =
new es.org.elasticsearch.action.search.SearchRequest(request.getIndex());
esRequest.source(searchSourceBuilder);
es.org.elasticsearch.action.search.SearchResponse response =
client.search(esRequest, RequestOptions.DEFAULT);
LOG.debug("Direct query search completed successfully");
return Response.status(Response.Status.OK).entity(response.toString()).build();
} catch (Exception e) {
LOG.error("Error executing direct query search: {}", e.getMessage(), e);
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
.entity(String.format("Failed to execute direct query search: %s", e.getMessage()))
.build();
}
}
private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) { private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) {
try { try {
LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery()); LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery());

View File

@ -571,6 +571,41 @@ public class OpenSearchClient implements SearchClient<RestHighLevelClient> {
} }
} }
@Override
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
throws IOException {
LOG.info("Executing direct OpenSearch query: {}", request.getQueryFilter());
try {
XContentParser parser = createXContentParser(request.getQueryFilter());
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
searchSourceBuilder.from(request.getFrom());
searchSourceBuilder.size(request.getSize());
// Apply RBAC constraints
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
// Add aggregations if needed
OpenSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
os.org.opensearch.action.search.SearchRequest osRequest =
new os.org.opensearch.action.search.SearchRequest(request.getIndex());
osRequest.source(searchSourceBuilder);
// Use DFS Query Then Fetch for consistent scoring across shards
osRequest.searchType(SearchType.DFS_QUERY_THEN_FETCH);
SearchResponse response = client.search(osRequest, OPENSEARCH_REQUEST_OPTIONS);
LOG.debug("Direct query search completed successfully");
return Response.status(Response.Status.OK).entity(response.toString()).build();
} catch (Exception e) {
LOG.error("Error executing direct query search: {}", e.getMessage(), e);
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
.entity(String.format("Failed to execute direct query search: %s", e.getMessage()))
.build();
}
}
private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) { private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) {
try { try {
LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery()); LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery());

View File

@ -1,38 +0,0 @@
{
"tools": [
{
"name": "create-greeting",
"description": "Generate a customized greeting message",
"arguments": [
{
"name": "name",
"description": "Name of the person to greet",
"required": true
},
{
"name": "style",
"description": "The style of greeting, such a formal, excited, or casual. If not specified casual will be used"
}
]
},
{
"name": "search_metadata",
"description": "Creates a prompt for Searching metadata in OpenMetadata.",
"arguments": [
{
"name": "query",
"description": "Keywords to use for searching.",
"required": true
},
{
"name": "entity_type",
"description": "Entity Type to Filter Report."
},
{
"name": "limit",
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more."
}
]
}
]
}

View File

@ -1,187 +0,0 @@
{
"tools": [
{
"name": "search_metadata",
"description": "Find your data and business terms in OpenMetadata. For example if the user asks to 'find tables that contain customers information', then 'customers' should be the query, and the entity_type should be 'table'. Here make sure to use 'Href' is available in result to create a hyperlink to the entity in OpenMetadata.",
"parameters": {
"description": "The search query to find metadata in the OpenMetadata catalog, entity type could be table, topic etc. Limit can be used to paginate on the data.",
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Keywords to use for searching."
},
"entity_type": {
"type": "string",
"description": "Optional entity type to filter results. The OpenMetadata entities are categorized as follows: Service Entities include databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, and searchService; Data Asset Entities include apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, and container; User Entities include user and team; Domain entities include domain and dataProduct; and Governance entities include metric, glossary, and glossaryTerm."
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more."
}
},
"required": [
"query"
]
}
},
{
"name": "get_entity_details",
"description": "Get detailed information about a specific entity",
"parameters": {
"description": "Fqn is the fully qualified name of the entity. Entity type could be table, topic etc.",
"type": "object",
"properties": {
"entity_type": {
"type": "string",
"description": "Type of entity"
},
"fqn": {
"type": "string",
"description": "Fully qualified name of the entity"
}
},
"required": [
"entity_type",
"fqn"
]
}
},
{
"name": "create_glossary_term",
"description": "Creates a new Glossary Term. Note that a glossary term must be part of a Glossary, so the glossary must be specified in the parameters. If you can't find the right glossary to use, respond back to the user to create a new Glossary first. Note that you can help the user to create the Glossary as well. If you don't find any Glossary that could be related, please list to the user the available Glossaries so users can choose if they want to create or reuse something. Also, note that glossary terms can be hierarchical: for example, a glossary term 'Accounts' can have a child term 'Credit Account', 'Savings Account', etc. So if you find any terms that can be related, it might make sense to create a new term as a child of an existing term.",
"parameters": {
"type": "object",
"properties": {
"glossary": {
"type": "string",
"description": "Glossary in which the term belongs. This should be its fully qualified name."
},
"parentTerm": {
"type": "string",
"description": "Optional parent term for the new term. This should be its fully qualified name defined as <glossary>.<term>. If the Glossary Term has other parents, the Fully Qualified Name will be <glossary>.<parent>...<term>. If not provided, the term will be created at the root level of the glossary."
},
"name": {
"type": "string",
"description": "Glossary Term name."
},
"description": {
"type": "string",
"description": "Glossary Term description."
},
"owners": {
"type": "array",
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
"items": {
"type": "string"
}
}
},
"required": [
"glossary",
"name",
"description"
]
}
},
{
"name": "create_glossary",
"description": "Creates a new Glossary. A Glossary is a collection of terms that are used to define the business vocabulary of an organization. Typically, similar terms are grouped together in a Glossary. For example, a Glossary names 'Marketing' could contain terms like 'Campaign', 'Lead', 'Opportunity', etc.",
"parameters": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Glossary Term name."
},
"description": {
"type": "string",
"description": "Glossary Term description."
},
"owners": {
"type": "array",
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
"items": {
"type": "string"
}
},
"reviewers": {
"type": "array",
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
"items": {
"type": "string"
}
},
"mutuallyExclusive": {
"type": "boolean",
"description": "Glossary terms that are direct children in this glossary are mutually exclusive. When mutually exclusive is `true` only one term can be used to tag an entity. When mutually exclusive is `false`, multiple terms from this group can be used to tag an entity. This is an important setting. If you are not sure, ask the user to clarify. If the user doesn't know, set it to `false`.",
"default": false
}
},
"required": [
"name",
"description",
"mutuallyExclusive"
]
}
},
{
"name": "patch_entity",
"description": "Patches an Entity based on a JSONPatch. Beforehand the Entity should be validated by finding it and creating a proper patch.",
"parameters": {
"type": "object",
"properties": {
"entityType": {
"type": "string",
"description": "Entity Type to patch."
},
"entityFqn": {
"type": "string",
"description": "Fully Qualified Name of the Entity to be patched."
},
"patch": {
"type": "string",
"description": "JSONPatch as String format."
}
},
"required": [
"entityType",
"entityFqn",
"patch"
]
}
},
{
"name": "get_entity_lineage",
"description": "Get detailed information about lineage (upstream/downstream dependencies) of a specific entity. Use this for root cause (upstream entities) or impact (downstream entities) analysis and explaining dependencies between entities.",
"parameters": {
"description": "Fqn is the fully qualified name of the entity. Entity type could be table, topic etc.",
"type": "object",
"properties": {
"entity_type": {
"type": "string",
"description": "Type of entity"
},
"fqn": {
"type": "string",
"description": "Fully qualified name of the entity"
},
"upstream_depth": {
"type": "integer",
"description": "Depth for reaching upstream entities. Default is 5."
},
"downstream_depth": {
"type": "integer",
"description": "Depth for reaching downstream entities. Default is 5."
}
},
"required": [
"entity_type",
"fqn",
"upstream_depth",
"downstream_depth"
]
}
}
]
}