mirror of
https://github.com/open-metadata/OpenMetadata.git
synced 2025-09-28 18:35:07 +00:00
Added better Searching via Claude tool (#23343)
(cherry picked from commit 455d9cd68cb96f6f26a80eae7e5ef85ccf26a37e)
This commit is contained in:
parent
eab715d279
commit
c6f17ee84a
@ -130,7 +130,6 @@ public class McpServer implements McpServerProvider {
|
||||
private McpStatelessServerFeatures.SyncPromptSpecification getPrompt(McpSchema.Prompt prompt) {
|
||||
return new McpStatelessServerFeatures.SyncPromptSpecification(
|
||||
prompt,
|
||||
(exchange, arguments) ->
|
||||
promptsContext.callPrompt(jwtFilter, prompt.name(), arguments).getResult());
|
||||
(exchange, arguments) -> promptsContext.callPrompt(jwtFilter, prompt.name(), arguments));
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ public class DefaultPromptsContext {
|
||||
return getPrompts(promptFilePath);
|
||||
}
|
||||
|
||||
public WrappedGetPromptResult callPrompt(
|
||||
public McpSchema.GetPromptResult callPrompt(
|
||||
JwtFilter jwtFilter, String promptName, McpSchema.GetPromptRequest promptRequest) {
|
||||
Map<String, Object> params = promptRequest.arguments();
|
||||
CatalogSecurityContext securityContext =
|
||||
@ -25,7 +25,7 @@ public class DefaultPromptsContext {
|
||||
"Catalog Principal: {} is trying to call the prompt: {}",
|
||||
securityContext.getUserPrincipal().getName(),
|
||||
promptName);
|
||||
WrappedGetPromptResult result;
|
||||
McpSchema.GetPromptResult result;
|
||||
try {
|
||||
switch (promptName) {
|
||||
case "create-greeting":
|
||||
@ -35,17 +35,14 @@ public class DefaultPromptsContext {
|
||||
result = new SearchPrompt().callPrompt(promptRequest);
|
||||
break;
|
||||
default:
|
||||
return new WrappedGetPromptResult(
|
||||
new McpSchema.GetPromptResult("error", new ArrayList<>()), true);
|
||||
return new McpSchema.GetPromptResult("error", new ArrayList<>());
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Error executing tool: {}", ex.getMessage());
|
||||
return new WrappedGetPromptResult(
|
||||
new McpSchema.GetPromptResult(
|
||||
String.format("Error executing tool: %s", ex.getMessage()), new ArrayList<>()),
|
||||
false);
|
||||
return new McpSchema.GetPromptResult(
|
||||
String.format("Error executing tool: %s", ex.getMessage()), new ArrayList<>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6,15 +6,13 @@ import java.util.List;
|
||||
public class GreetingsPrompt implements McpPrompt {
|
||||
|
||||
@Override
|
||||
public WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
|
||||
return new WrappedGetPromptResult(
|
||||
new McpSchema.GetPromptResult(
|
||||
public McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
|
||||
return new McpSchema.GetPromptResult(
|
||||
null,
|
||||
List.of(
|
||||
new McpSchema.PromptMessage(
|
||||
McpSchema.Role.ASSISTANT,
|
||||
new McpSchema.TextContent(
|
||||
"Please generate a greeting in ${style} style to ${name}.")))),
|
||||
false);
|
||||
"Please generate a greeting in ${style} style to ${name}."))));
|
||||
}
|
||||
}
|
||||
|
@ -3,5 +3,5 @@ package org.openmetadata.mcp.prompts;
|
||||
import io.modelcontextprotocol.spec.McpSchema;
|
||||
|
||||
public interface McpPrompt {
|
||||
WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest);
|
||||
McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest);
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import java.util.Map;
|
||||
|
||||
public class SearchPrompt implements McpPrompt {
|
||||
@Override
|
||||
public WrappedGetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
|
||||
public McpSchema.GetPromptResult callPrompt(McpSchema.GetPromptRequest promptRequest) {
|
||||
Map<String, Object> params = promptRequest.arguments();
|
||||
String query = (String) params.get("query");
|
||||
int limit = 10;
|
||||
@ -19,8 +19,7 @@ public class SearchPrompt implements McpPrompt {
|
||||
}
|
||||
}
|
||||
String entityType = (String) params.get("entity_type");
|
||||
return new WrappedGetPromptResult(
|
||||
new McpSchema.GetPromptResult(
|
||||
return new McpSchema.GetPromptResult(
|
||||
"Message can be used to get search results",
|
||||
List.of(
|
||||
new McpSchema.PromptMessage(
|
||||
@ -28,7 +27,6 @@ public class SearchPrompt implements McpPrompt {
|
||||
new McpSchema.TextContent(
|
||||
String.format(
|
||||
"Search for `%s` in OpenMetadata where entity type is `%s` and with a limit of `%s` . Summarise the information for all the results properly and also make sure to provide clickable links using the href field from the results.",
|
||||
query, entityType, limit))))),
|
||||
false);
|
||||
query, entityType, limit)))));
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +0,0 @@
|
||||
package org.openmetadata.mcp.prompts;
|
||||
|
||||
import io.modelcontextprotocol.spec.McpSchema;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class WrappedGetPromptResult {
|
||||
private McpSchema.GetPromptResult result;
|
||||
private boolean isError;
|
||||
|
||||
public WrappedGetPromptResult(McpSchema.GetPromptResult result, boolean isError) {
|
||||
this.result = result;
|
||||
this.isError = isError;
|
||||
}
|
||||
}
|
@ -1,9 +1,14 @@
|
||||
package org.openmetadata.mcp.tools;
|
||||
|
||||
import static org.openmetadata.common.utils.CommonUtil.nullOrEmpty;
|
||||
import static org.openmetadata.service.search.SearchUtil.mapEntityTypesToIndexNames;
|
||||
import static org.openmetadata.service.security.DefaultAuthorizer.getSubjectContext;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import es.org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
|
||||
import es.org.elasticsearch.xcontent.XContentParser;
|
||||
import es.org.elasticsearch.xcontent.XContentType;
|
||||
import jakarta.ws.rs.core.Response;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -17,6 +22,7 @@ import org.openmetadata.schema.search.SearchRequest;
|
||||
import org.openmetadata.schema.utils.JsonUtils;
|
||||
import org.openmetadata.service.Entity;
|
||||
import org.openmetadata.service.limits.Limits;
|
||||
import org.openmetadata.service.search.elasticsearch.EsUtils;
|
||||
import org.openmetadata.service.security.Authorizer;
|
||||
import org.openmetadata.service.security.auth.CatalogSecurityContext;
|
||||
import org.openmetadata.service.security.policyevaluator.SubjectContext;
|
||||
@ -86,17 +92,30 @@ public class SearchMetadataTool implements McpTool {
|
||||
throws IOException {
|
||||
LOG.info("Executing searchMetadata with params: {}", params);
|
||||
String query = params.containsKey("query") ? (String) params.get("query") : "*";
|
||||
int limit = 10;
|
||||
if (params.containsKey("limit")) {
|
||||
Object limitObj = params.get("limit");
|
||||
String entityType = params.containsKey("entityType") ? (String) params.get("entityType") : null;
|
||||
String index = entityType == null ? "dataAsset" : mapEntityTypesToIndexNames(entityType);
|
||||
|
||||
int size = 10;
|
||||
if (params.containsKey("size")) {
|
||||
Object limitObj = params.get("size");
|
||||
if (limitObj instanceof Number number) {
|
||||
limit = number.intValue();
|
||||
size = number.intValue();
|
||||
} else if (limitObj instanceof String string) {
|
||||
limit = Integer.parseInt(string);
|
||||
size = Integer.parseInt(string);
|
||||
}
|
||||
}
|
||||
|
||||
limit = Math.min(limit, 50);
|
||||
int from = 0;
|
||||
if (params.containsKey("from")) {
|
||||
Object limitObj = params.get("from");
|
||||
if (limitObj instanceof Number number) {
|
||||
from = number.intValue();
|
||||
} else if (limitObj instanceof String string) {
|
||||
from = Integer.parseInt(string);
|
||||
}
|
||||
}
|
||||
|
||||
size = Math.min(size, 50);
|
||||
|
||||
boolean includeDeleted = false;
|
||||
if (params.containsKey("include_deleted")) {
|
||||
@ -120,28 +139,63 @@ public class SearchMetadataTool implements McpTool {
|
||||
}
|
||||
}
|
||||
|
||||
String entityType =
|
||||
params.containsKey("entity_type") ? (String) params.get("entity_type") : null;
|
||||
String index = mapEntityTypesToIndexNames(entityType);
|
||||
String queryFilter = null;
|
||||
if (params.containsKey("queryFilter")) {
|
||||
queryFilter = (String) params.get("queryFilter");
|
||||
JsonNode queryNode = JsonUtils.getObjectMapper().readTree(queryFilter);
|
||||
|
||||
if (!queryNode.has("query")) {
|
||||
ObjectNode queryWrapper = JsonUtils.getObjectMapper().createObjectNode();
|
||||
queryWrapper.set("query", queryNode);
|
||||
queryFilter = JsonUtils.pojoToJson(queryWrapper);
|
||||
} else {
|
||||
queryFilter = JsonUtils.pojoToJson(queryNode);
|
||||
}
|
||||
LOG.debug("Applied query filter to query: {}", queryFilter);
|
||||
} else {
|
||||
|
||||
}
|
||||
|
||||
LOG.info(
|
||||
"Search query: {}, index: {}, limit: {}, includeDeleted: {}",
|
||||
query,
|
||||
queryFilter,
|
||||
index,
|
||||
limit,
|
||||
size,
|
||||
includeDeleted);
|
||||
|
||||
SearchRequest searchRequest =
|
||||
SearchRequest searchRequest;
|
||||
if (!nullOrEmpty(queryFilter)) {
|
||||
// When queryFilter is provided, use it directly as it's already a transformed OpenSearch
|
||||
// query
|
||||
searchRequest =
|
||||
new SearchRequest()
|
||||
.withIndex(Entity.getSearchRepository().getIndexOrAliasName(index))
|
||||
.withQueryFilter(queryFilter)
|
||||
.withSize(size)
|
||||
.withFrom(from)
|
||||
.withFetchSource(true)
|
||||
.withDeleted(includeDeleted);
|
||||
} else {
|
||||
// Fallback to basic query when no queryFilter is provided
|
||||
searchRequest =
|
||||
new SearchRequest()
|
||||
.withQuery(query)
|
||||
.withIndex(Entity.getSearchRepository().getIndexOrAliasName(index))
|
||||
.withSize(limit)
|
||||
.withFrom(0)
|
||||
.withSize(size)
|
||||
.withFrom(from)
|
||||
.withFetchSource(true)
|
||||
.withDeleted(includeDeleted);
|
||||
}
|
||||
|
||||
SubjectContext subjectContext = getSubjectContext(securityContext);
|
||||
Response response = Entity.getSearchRepository().search(searchRequest, subjectContext);
|
||||
Response response;
|
||||
if (!nullOrEmpty(queryFilter)) {
|
||||
// Use direct query method when queryFilter is provided since it's already a transformed query
|
||||
response = Entity.getSearchRepository().searchWithDirectQuery(searchRequest, subjectContext);
|
||||
} else {
|
||||
// Use regular search for basic queries
|
||||
response = Entity.getSearchRepository().search(searchRequest, subjectContext);
|
||||
}
|
||||
|
||||
Map<String, Object> searchResponse;
|
||||
if (response.getEntity() instanceof String responseStr) {
|
||||
@ -153,7 +207,7 @@ public class SearchMetadataTool implements McpTool {
|
||||
searchResponse = JsonUtils.convertValue(response.getEntity(), Map.class);
|
||||
}
|
||||
|
||||
return buildEnhancedSearchResponse(searchResponse, query, limit, requestedFields);
|
||||
return buildEnhancedSearchResponse(searchResponse, query, size, requestedFields);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -181,12 +235,9 @@ public class SearchMetadataTool implements McpTool {
|
||||
}
|
||||
|
||||
List<Object> hits = safeGetList(topHits.get("hits"));
|
||||
if (hits == null || hits.isEmpty()) {
|
||||
return createEmptyResponse();
|
||||
}
|
||||
|
||||
List<Map<String, Object>> cleanedResults = new ArrayList<>();
|
||||
int totalResults = 0;
|
||||
if (hits != null && !hits.isEmpty()) {
|
||||
|
||||
if (topHits.get("total") instanceof Map) {
|
||||
Map<String, Object> totalObj = safeGetMap(topHits.get("total"));
|
||||
@ -207,6 +258,7 @@ public class SearchMetadataTool implements McpTool {
|
||||
Map<String, Object> cleanedSource = cleanSearchResult(source, requestedFields);
|
||||
cleanedResults.add(cleanedSource);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Object> result = new HashMap<>();
|
||||
result.put("results", cleanedResults);
|
||||
@ -214,11 +266,16 @@ public class SearchMetadataTool implements McpTool {
|
||||
result.put("returnedCount", cleanedResults.size());
|
||||
result.put("query", query);
|
||||
|
||||
// Add aggregations if present in search response
|
||||
if (searchResponse.containsKey("aggregations")) {
|
||||
result.put("aggregations", searchResponse.get("aggregations"));
|
||||
}
|
||||
|
||||
if (totalResults > requestedLimit) {
|
||||
result.put(
|
||||
"message",
|
||||
String.format(
|
||||
"Found %d total results, showing first %d. Use pagination or refine your search for more specific results.",
|
||||
"Found %d total results, showing first %d. Use pagination or refine your search for more specific results, you can call these 3 times by yourself with pagination , and then only if the user ask for more paginate.",
|
||||
totalResults, cleanedResults.size()));
|
||||
result.put("hasMore", true);
|
||||
}
|
||||
@ -239,11 +296,18 @@ public class SearchMetadataTool implements McpTool {
|
||||
|
||||
// Add any specifically requested additional fields
|
||||
for (String field : requestedFields) {
|
||||
if (source.containsKey(field) && !ESSENTIAL_FIELDS_ONLY.contains(field)) {
|
||||
if (source.containsKey(field)) {
|
||||
result.put(field, source.get(field));
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup Description in case of huge description
|
||||
if (result.containsKey("description")) {
|
||||
String description = (String) result.get("description");
|
||||
if (description.length() > 3000) {
|
||||
result.put("description", description.substring(0, 300) + "...");
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -271,4 +335,15 @@ public class SearchMetadataTool implements McpTool {
|
||||
private static List<Object> safeGetList(Object obj) {
|
||||
return (obj instanceof List) ? (List<Object>) obj : null;
|
||||
}
|
||||
|
||||
private XContentParser createXContentParser(String query) throws IOException {
|
||||
try {
|
||||
return XContentType.JSON
|
||||
.xContent()
|
||||
.createParser(EsUtils.esXContentRegistry, LoggingDeprecationHandler.INSTANCE, query);
|
||||
} catch (IOException e) {
|
||||
LOG.error("Failed to create XContentParser", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,30 +2,76 @@
|
||||
"tools": [
|
||||
{
|
||||
"name": "search_metadata",
|
||||
"description": "Find your data and business terms in OpenMetadata. For example if the user asks to 'find tables that contain customers information', then 'customers' should be the query, and the entity_type should be 'table'. Here make sure to use 'Href' is available in result to create a hyperlink to the entity in OpenMetadata.",
|
||||
"description": "Find your data and business terms in OpenMetadata. This tool automatically generates optimized OpenSearch queries based on natural language input and searches the OpenMetadata catalog for tables, dashboards, topics, pipelines, and other data assets.",
|
||||
"parameters": {
|
||||
"description": "The search query to find metadata in the OpenMetadata catalog, entity type could be table, topic etc. Limit can be used to paginate on the data.",
|
||||
"description": "Search for metadata in the OpenMetadata catalog using natural language queries. The tool automatically converts your search intent into optimized OpenSearch queries with proper filtering.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Keywords to use for searching."
|
||||
"description": "Optional , Only specify if queryFilter is not passed .Natural language search query that will be converted to OpenSearch syntax in queryFilter. Examples: 'tables owned by marketing', 'dashboards with customer data', 'find all tier1 tables', 'tables with column SKU', 'count BigQuery services', 'tables tagged with PII.Sensitive'."
|
||||
},
|
||||
"entity_type": {
|
||||
"entityType": {
|
||||
"type": "string",
|
||||
"description": "Optional entity type to filter results. The OpenMetadata entities are categorized as follows: Service Entities include databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, and searchService; Data Asset Entities include apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, and container; User Entities include user and team; Domain entities include domain and dataProduct; and Governance entities include metric, glossary, and glossaryTerm."
|
||||
"description": "Optional entity type to filter results, only specify if queryFilter is not passed. If not specified, will be auto-detected from the query. Available types: Service Entities (databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, searchService), Data Asset Entities (apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, container), User Entities (user, team), Domain entities (domain, dataProduct), Governance entities (metric, glossary, glossaryTerm)."
|
||||
},
|
||||
"limit": {
|
||||
"queryFilter": {
|
||||
"type": "string",
|
||||
"description": "REQUIRED: OpenSearch JSON query filter that you must generate based on the natural language query. Use the comprehensive guidelines below to create the proper OpenSearch query.\n\n=== QUERY GENERATION INSTRUCTIONS ===\n\nYou are an expert OpenSearch query generator for OpenMetadata. Your task is to translate the natural language query into a precise OpenSearch JSON query.\n\n## QUERY TYPE DETECTION\nFirst determine if the user wants:\n1. **SEARCH QUERY** - Finding specific entities (e.g., \"find tables owned by marketing\")\n2. **AGGREGATION QUERY** - Counting, grouping, statistics (e.g., \"how many BigQuery services\", \"count assets by team\")\n3. **BOTH** - Search and aggregation combined\n\n## FIELD MAPPING FOR TABLES\n- displayName.keyword: Exact match on table display name\n- name: Text analysis on table name with tokenization\n- name.ngram: Partial matching using n-grams\n- description: Full-text search on descriptions\n- fullyQualifiedName: Complete hierarchical name search\n- columns.name.keyword: Exact match on column names\n- columns.description: Full-text search on column descriptions\n- tags.tagFQN.text: Search within tag name parts\n- tier.tagFQN.text: Search within tier classification parts\n- domain.displayName.keyword: Exact match on domain\n- dataProducts.displayName.keyword: Exact match on data products\n- owners.name: Owner name (use 'owners' plural, not 'owner')\n- service.name: Service name filtering\n- extension.*: Custom properties (e.g., extension.businessOwner.name)\n\n## CRITICAL QUERY RULES\n1. **Entity Type Filtering**: Always add {\"term\": {\"entityType\": \"table\"}} when searching tables (use SINGULAR form)\n2. **Field Selection**: Use 'term' for exact matches (.keyword fields), 'match' for text search\n3. **Owners Field**: Use 'owners.name' (plural) NOT 'owner.name'\n4. **Tier Format**: Use \"Tier.Tier1\", \"Tier.Tier2\", \"Tier.Tier3\" format\n5. **Tags**: Use 'tags.tagFQN' for tag filtering\n6. **Columns**: Use direct match queries, NOT nested (columns are NOT nested)\n7. **Custom Properties**: Use 'extension.propertyName' pattern\n\n## EXAMPLE QUERY PATTERNS\n\n**Tables owned by marketing:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"marketing\"}}]}}\n```\n\n**Tables with customer data:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"customer\"}}]}}\n```\n\n**Tier1 tables:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}]}}\n```\n\n**Tables with PII tag:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}]}}\n```\n\n**Count query - How many BigQuery services:**\n```json\n{\"size\": 0, \"query\": {\"bool\": {\"must\": [{\"term\": {\"entityType\": \"databaseService\"}}, {\"term\": {\"serviceType\": \"BigQuery\"}}]}}, \"aggs\": {\"total_count\": {\"value_count\": {\"field\": \"_id\"}}}}\n```\n\n**Tables with business owner infrastructure:**\n```json\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"extension.businessOwner.name\": \"infrastructure\"}}]}}\n```\n\n## AGGREGATION FIELDS\nFor counting/grouping use these aggregation fields:\n- serviceType, service.displayName.keyword, entityType\n- tier.tagFQN, owners.displayName.keyword, domain.displayName.keyword\n- tags.tagFQN, database.name.keyword, databaseSchema.name.keyword\n\n## IMPORTANT MISTAKES TO AVOID\n❌ Missing entityType filter when type mentioned\n❌ Using 'owner.name' instead of 'owners.name'\n❌ Using nested queries for columns (columns are NOT nested)\n❌ Wrong tier format (use \"Tier.Tier1\" not \"tier1\")\n❌ Using 'tags.tagFQN' for tier (use 'tier.tagFQN')\n\n**Generate the complete OpenSearch JSON query and return ONLY the JSON object.**"
|
||||
},
|
||||
"from": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more."
|
||||
"description": "In case the request more than 'size' results, this is the offset from the first result you want to fetch. Default is 0.",
|
||||
"default": 10
|
||||
},
|
||||
"size": {
|
||||
"type": "integer",
|
||||
"description": "Number of results to return. Default is 10, we can iteratively page through results by increasing 'from'. Maximum allowed is 50.",
|
||||
"default": 10
|
||||
},
|
||||
"fields": {
|
||||
"type": "string",
|
||||
"description": "Comma-separated list of additional fields to include in the response. By default, returns essential fields: name, displayName, fullyQualifiedName, description, entityType, service, database, databaseSchema, serviceType, href, tags, owners, tier, tableType, columnNames. Available additional fields by entity type:\n\nTable entities: columns, schemaDefinition, queries, upstreamLineage, entityRelationship\nTopic entities: messageSchema, partitions, replicationFactor\nDashboard entities: charts, dataModels, project\nPipeline entities: tasks, pipelineUrl, scheduleInterval\nAll entities: createdAt, updatedAt, changeDescription, extension, domain, dataProducts, lifeCycle, sourceHash\n\nExample: 'columns,queries' to include column details and sample queries for tables."
|
||||
"description": "Comma-separated additional fields to include. Default returns: name, displayName, fullyQualifiedName, description, entityType, service, database, databaseSchema, serviceType, href, tags, owners, tier, tableType, columnNames.\n\nAdditional fields by entity type:\n- Table entities: columns, schemaDefinition, queries, upstreamLineage, entityRelationship\n- Topic entities: messageSchema, partitions, replicationFactor \n- Dashboard entities: charts, dataModels, project\n- Pipeline entities: tasks, pipelineUrl, scheduleInterval\n- All entities: createdAt, updatedAt, changeDescription, extension, domain, dataProducts, lifeCycle, sourceHash\n\nExample: 'columns,queries' for table column details and sample queries."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
"required": ["queryFilter"],
|
||||
"examples": [
|
||||
{
|
||||
"description": "Find tables owned by marketing team",
|
||||
"query": "tables owned by marketing",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"marketing\"}}]}}",
|
||||
"entityType": "table"
|
||||
},
|
||||
{
|
||||
"description": "Search for customer data across all tables",
|
||||
"query": "tables with customer data",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"customer\"}}]}}"
|
||||
},
|
||||
{
|
||||
"description": "Find all Tier 1 classified tables",
|
||||
"query": "tables with tier1 classification",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}]}}"
|
||||
},
|
||||
{
|
||||
"description": "Count BigQuery services",
|
||||
"query": "how many BigQuery services are there",
|
||||
"queryFilter": "{\"size\": 0, \"query\": {\"bool\": {\"must\": [{\"term\": {\"entityType\": \"databaseService\"}}, {\"term\": {\"serviceType\": \"BigQuery\"}}]}}, \"aggs\": {\"total_count\": {\"value_count\": {\"field\": \"_id\"}}}}"
|
||||
},
|
||||
{
|
||||
"description": "Find tables with specific column",
|
||||
"query": "tables with column SKU",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}"
|
||||
},
|
||||
{
|
||||
"description": "Search by tags",
|
||||
"query": "tables tagged with PII.Sensitive",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}]}}"
|
||||
},
|
||||
{
|
||||
"description": "Custom property search",
|
||||
"query": "tables where business owner is infrastructure",
|
||||
"queryFilter": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"extension.businessOwner.name\": \"infrastructure\"}}]}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
@ -306,6 +306,9 @@ public interface SearchClient<T> {
|
||||
|
||||
Response searchWithNLQ(SearchRequest request, SubjectContext subjectContext) throws IOException;
|
||||
|
||||
Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
|
||||
throws IOException;
|
||||
|
||||
Response getDocByID(String indexName, String entityId) throws IOException;
|
||||
|
||||
default ExecutorService getAsyncExecutor() {
|
||||
|
@ -1197,6 +1197,11 @@ public class SearchRepository {
|
||||
return searchClient.searchWithNLQ(request, subjectContext);
|
||||
}
|
||||
|
||||
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
|
||||
throws IOException {
|
||||
return searchClient.searchWithDirectQuery(request, subjectContext);
|
||||
}
|
||||
|
||||
public Response getDocument(String indexName, UUID entityId) throws IOException {
|
||||
return searchClient.getDocByID(indexName, entityId.toString());
|
||||
}
|
||||
|
@ -905,6 +905,40 @@ public class ElasticSearchClient implements SearchClient<RestHighLevelClient> {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
|
||||
throws IOException {
|
||||
LOG.info("Executing direct OpenSearch query: {}", request.getQueryFilter());
|
||||
try {
|
||||
XContentParser parser = createXContentParser(request.getQueryFilter());
|
||||
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
|
||||
searchSourceBuilder.from(request.getFrom());
|
||||
searchSourceBuilder.size(request.getSize());
|
||||
|
||||
// Apply RBAC constraints
|
||||
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
|
||||
|
||||
// Add aggregations if needed
|
||||
ElasticSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
|
||||
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
|
||||
|
||||
es.org.elasticsearch.action.search.SearchRequest esRequest =
|
||||
new es.org.elasticsearch.action.search.SearchRequest(request.getIndex());
|
||||
esRequest.source(searchSourceBuilder);
|
||||
|
||||
es.org.elasticsearch.action.search.SearchResponse response =
|
||||
client.search(esRequest, RequestOptions.DEFAULT);
|
||||
|
||||
LOG.debug("Direct query search completed successfully");
|
||||
return Response.status(Response.Status.OK).entity(response.toString()).build();
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error executing direct query search: {}", e.getMessage(), e);
|
||||
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
|
||||
.entity(String.format("Failed to execute direct query search: %s", e.getMessage()))
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) {
|
||||
try {
|
||||
LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery());
|
||||
|
@ -571,6 +571,41 @@ public class OpenSearchClient implements SearchClient<RestHighLevelClient> {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response searchWithDirectQuery(SearchRequest request, SubjectContext subjectContext)
|
||||
throws IOException {
|
||||
LOG.info("Executing direct OpenSearch query: {}", request.getQueryFilter());
|
||||
try {
|
||||
XContentParser parser = createXContentParser(request.getQueryFilter());
|
||||
SearchSourceBuilder searchSourceBuilder = SearchSourceBuilder.fromXContent(parser);
|
||||
searchSourceBuilder.from(request.getFrom());
|
||||
searchSourceBuilder.size(request.getSize());
|
||||
|
||||
// Apply RBAC constraints
|
||||
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
|
||||
|
||||
// Add aggregations if needed
|
||||
OpenSearchSourceBuilderFactory sourceBuilderFactory = getSearchBuilderFactory();
|
||||
sourceBuilderFactory.addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
|
||||
|
||||
os.org.opensearch.action.search.SearchRequest osRequest =
|
||||
new os.org.opensearch.action.search.SearchRequest(request.getIndex());
|
||||
osRequest.source(searchSourceBuilder);
|
||||
// Use DFS Query Then Fetch for consistent scoring across shards
|
||||
osRequest.searchType(SearchType.DFS_QUERY_THEN_FETCH);
|
||||
|
||||
SearchResponse response = client.search(osRequest, OPENSEARCH_REQUEST_OPTIONS);
|
||||
|
||||
LOG.debug("Direct query search completed successfully");
|
||||
return Response.status(Response.Status.OK).entity(response.toString()).build();
|
||||
} catch (Exception e) {
|
||||
LOG.error("Error executing direct query search: {}", e.getMessage(), e);
|
||||
return Response.status(Response.Status.INTERNAL_SERVER_ERROR)
|
||||
.entity(String.format("Failed to execute direct query search: %s", e.getMessage()))
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
private Response fallbackToBasicSearch(SearchRequest request, SubjectContext subjectContext) {
|
||||
try {
|
||||
LOG.debug("Falling back to basic query_string search for NLQ: {}", request.getQuery());
|
||||
|
@ -1,38 +0,0 @@
|
||||
{
|
||||
"tools": [
|
||||
{
|
||||
"name": "create-greeting",
|
||||
"description": "Generate a customized greeting message",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "name",
|
||||
"description": "Name of the person to greet",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"name": "style",
|
||||
"description": "The style of greeting, such a formal, excited, or casual. If not specified casual will be used"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "search_metadata",
|
||||
"description": "Creates a prompt for Searching metadata in OpenMetadata.",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "query",
|
||||
"description": "Keywords to use for searching.",
|
||||
"required": true
|
||||
},
|
||||
{
|
||||
"name": "entity_type",
|
||||
"description": "Entity Type to Filter Report."
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@ -1,187 +0,0 @@
|
||||
{
|
||||
"tools": [
|
||||
{
|
||||
"name": "search_metadata",
|
||||
"description": "Find your data and business terms in OpenMetadata. For example if the user asks to 'find tables that contain customers information', then 'customers' should be the query, and the entity_type should be 'table'. Here make sure to use 'Href' is available in result to create a hyperlink to the entity in OpenMetadata.",
|
||||
"parameters": {
|
||||
"description": "The search query to find metadata in the OpenMetadata catalog, entity type could be table, topic etc. Limit can be used to paginate on the data.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Keywords to use for searching."
|
||||
},
|
||||
"entity_type": {
|
||||
"type": "string",
|
||||
"description": "Optional entity type to filter results. The OpenMetadata entities are categorized as follows: Service Entities include databaseService, messagingService, apiService, dashboardService, pipelineService, storageService, mlmodelService, metadataService, and searchService; Data Asset Entities include apiCollection, apiEndpoint, table, storedProcedure, database, databaseSchema, dashboard, dashboardDataModel, pipeline, chart, topic, searchIndex, mlmodel, and container; User Entities include user and team; Domain entities include domain and dataProduct; and Governance entities include metric, glossary, and glossaryTerm."
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Maximum number of results to return. Default is 10. Try to keep this number low unless the user asks for more."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"query"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_entity_details",
|
||||
"description": "Get detailed information about a specific entity",
|
||||
"parameters": {
|
||||
"description": "Fqn is the fully qualified name of the entity. Entity type could be table, topic etc.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"entity_type": {
|
||||
"type": "string",
|
||||
"description": "Type of entity"
|
||||
},
|
||||
"fqn": {
|
||||
"type": "string",
|
||||
"description": "Fully qualified name of the entity"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"entity_type",
|
||||
"fqn"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "create_glossary_term",
|
||||
"description": "Creates a new Glossary Term. Note that a glossary term must be part of a Glossary, so the glossary must be specified in the parameters. If you can't find the right glossary to use, respond back to the user to create a new Glossary first. Note that you can help the user to create the Glossary as well. If you don't find any Glossary that could be related, please list to the user the available Glossaries so users can choose if they want to create or reuse something. Also, note that glossary terms can be hierarchical: for example, a glossary term 'Accounts' can have a child term 'Credit Account', 'Savings Account', etc. So if you find any terms that can be related, it might make sense to create a new term as a child of an existing term.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"glossary": {
|
||||
"type": "string",
|
||||
"description": "Glossary in which the term belongs. This should be its fully qualified name."
|
||||
},
|
||||
"parentTerm": {
|
||||
"type": "string",
|
||||
"description": "Optional parent term for the new term. This should be its fully qualified name defined as <glossary>.<term>. If the Glossary Term has other parents, the Fully Qualified Name will be <glossary>.<parent>...<term>. If not provided, the term will be created at the root level of the glossary."
|
||||
},
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Glossary Term name."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Glossary Term description."
|
||||
},
|
||||
"owners": {
|
||||
"type": "array",
|
||||
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"glossary",
|
||||
"name",
|
||||
"description"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "create_glossary",
|
||||
"description": "Creates a new Glossary. A Glossary is a collection of terms that are used to define the business vocabulary of an organization. Typically, similar terms are grouped together in a Glossary. For example, a Glossary names 'Marketing' could contain terms like 'Campaign', 'Lead', 'Opportunity', etc.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string",
|
||||
"description": "Glossary Term name."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Glossary Term description."
|
||||
},
|
||||
"owners": {
|
||||
"type": "array",
|
||||
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"reviewers": {
|
||||
"type": "array",
|
||||
"description": "Glossary Term owner. This could be an OpenMetadata User or Team. If you don't know who the owner is, you can leave this empty, but let the user know that they can add owners later.",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"mutuallyExclusive": {
|
||||
"type": "boolean",
|
||||
"description": "Glossary terms that are direct children in this glossary are mutually exclusive. When mutually exclusive is `true` only one term can be used to tag an entity. When mutually exclusive is `false`, multiple terms from this group can be used to tag an entity. This is an important setting. If you are not sure, ask the user to clarify. If the user doesn't know, set it to `false`.",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"name",
|
||||
"description",
|
||||
"mutuallyExclusive"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "patch_entity",
|
||||
"description": "Patches an Entity based on a JSONPatch. Beforehand the Entity should be validated by finding it and creating a proper patch.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"entityType": {
|
||||
"type": "string",
|
||||
"description": "Entity Type to patch."
|
||||
},
|
||||
"entityFqn": {
|
||||
"type": "string",
|
||||
"description": "Fully Qualified Name of the Entity to be patched."
|
||||
},
|
||||
"patch": {
|
||||
"type": "string",
|
||||
"description": "JSONPatch as String format."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"entityType",
|
||||
"entityFqn",
|
||||
"patch"
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "get_entity_lineage",
|
||||
"description": "Get detailed information about lineage (upstream/downstream dependencies) of a specific entity. Use this for root cause (upstream entities) or impact (downstream entities) analysis and explaining dependencies between entities.",
|
||||
"parameters": {
|
||||
"description": "Fqn is the fully qualified name of the entity. Entity type could be table, topic etc.",
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"entity_type": {
|
||||
"type": "string",
|
||||
"description": "Type of entity"
|
||||
},
|
||||
"fqn": {
|
||||
"type": "string",
|
||||
"description": "Fully qualified name of the entity"
|
||||
},
|
||||
"upstream_depth": {
|
||||
"type": "integer",
|
||||
"description": "Depth for reaching upstream entities. Default is 5."
|
||||
},
|
||||
"downstream_depth": {
|
||||
"type": "integer",
|
||||
"description": "Depth for reaching downstream entities. Default is 5."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"entity_type",
|
||||
"fqn",
|
||||
"upstream_depth",
|
||||
"downstream_depth"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user