fix(nlq): Use dual-field search for owner queries to improve match rates (#23794)

This commit is contained in:
Bhanu Agrawal 2025-10-14 20:31:04 +05:30 committed by GitHub
parent 582f9322ab
commit 650627a025
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3646,7 +3646,7 @@
},
{
"section": "INCORRECT VS CORRECT PATTERNS",
"content": "\u274c INCORRECT: Tables query missing entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n\u2705 CORRECT: Tables query with entityType filter\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"accounting\"}}]}}\n\n\u274c INCORRECT: Using nested query for columns\n{\"bool\": {\"must\": [{\"nested\": {\"path\": \"columns\", \"query\": {\"match\": {\"columns.name\": \"SKU\"}}}}]}}\n\n\u2705 CORRECT: Direct match query for columns\n{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}\n\n\u274c INCORRECT: Using owner.name (singular)\n{\"term\": {\"owner.name\": \"marketing\"}}\n\n\u2705 CORRECT: Using owners.name (plural)\n{\"term\": {\"owners.name\": \"marketing\"}}\n\n\u274c INCORRECT: Using tags for tier\n{\"term\": {\"tags.tagFQN\": \"Tier1\"}}\n\n\u2705 CORRECT: Using tier.tagFQN with proper format\n{\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}",
"content": "\u274c INCORRECT: Tables query missing entityType filter AND using only one owner field\n{\"bool\": {\"must\": [{\"term\": {\"owners.displayName.keyword\": \"accounting\"}}]}}\n\n\u2705 CORRECT: Tables query with entityType filter\n{\"bool\": {\"must\": [{\"bool\": {\"should\": [{\"term\": {\"entityType.keyword\": \"table\"}}]}}, {\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"accounting\"}}, {\"term\": {\"owners.displayName.keyword\": \"accounting\"}}]}}]}}\n\n\u274c INCORRECT: Using nested query for columns\n{\"bool\": {\"must\": [{\"nested\": {\"path\": \"columns\", \"query\": {\"match\": {\"columns.name\": \"SKU\"}}}}]}}\n\n\u2705 CORRECT: Direct match query for columns\n{\"bool\": {\"must\": [{\"bool\": {\"should\": [{\"term\": {\"entityType.keyword\": \"table\"}}]}}, {\"match\": {\"columns.name\": \"SKU\"}}]}}\n\n\u274c INCORRECT: Using owner.name (singular) or only one field\n{\"term\": {\"owner.name\": \"marketing\"}}\n{\"term\": {\"owners.name\": \"marketing\"}}\n\n\u2705 CORRECT: Search BOTH owners.name and owners.displayName.keyword\n{\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"marketing\"}}, {\"term\": {\"owners.displayName.keyword\": \"marketing\"}}]}}\n\n\u274c INCORRECT: Using tags for tier\n{\"term\": {\"tags.tagFQN\": \"Tier1\"}}\n\n\u2705 CORRECT: Using tier.tagFQN with proper format\n{\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}",
"order": 6
},
{
@ -3656,9 +3656,14 @@
},
{
"section": "CRITICAL FIELD CORRECTIONS",
"content": "1. Use 'owners.name' (plural) NOT 'owner.name' (singular)\n2. Use 'tags.tagFQN' NOT just 'tags' for regular tags\n3. Use 'tier.tagFQN' NOT 'tags.tagFQN' for tier-related queries\n4. Top-level fields like 'owners', 'tags', 'tier' do NOT need nested queries\n5. Only use nested queries for 'columns', 'charts', 'tasks', etc.",
"content": "1. ALWAYS search BOTH 'owners.name' AND 'owners.displayName.keyword' in a should clause, NOT 'owner.name' (singular)\n2. Use 'tags.tagFQN' NOT just 'tags' for regular tags\n3. Use 'tier.tagFQN' NOT 'tags.tagFQN' for tier-related queries\n4. Top-level fields like 'owners', 'tags', 'tier' do NOT need nested queries\n5. Only use nested queries for 'columns', 'charts', 'tasks', etc.",
"order": 11
},
{
"section": "OWNER FIELD SELECTION",
"content": "When filtering by owner, ALWAYS search BOTH fields simultaneously:\n\n✅ CORRECT PATTERN (use for ALL owner queries):\n{\"bool\": {\"should\": [\n {\"term\": {\"owners.name\": \"value\"}},\n {\"term\": {\"owners.displayName.keyword\": \"value\"}}\n]}}\n\nThis matches whether the value is a username (e.g., 'jsmith') OR display name (e.g., 'John Smith').\n\n⚠ NEVER use 'owner.name' (singular) - always use 'owners' (plural)\n⚠ ALWAYS include BOTH fields in a should clause",
"order": 12
},
{
"section": "AMBIGUITY RESOLUTION",
"content": "- If the user query mentions 'domain', verify if the FIELD SUMMARY lists a specific 'domain' field (like `domain.displayName.keyword`). If yes, use it. If not, or if context suggests otherwise, reconsider. DO NOT default to `service.name` for business domains.\n- If the user query mentions 'data product', verify if the FIELD SUMMARY lists a specific 'data product' field (like `dataProducts.displayName`). If yes, use it.\n- For general terms, use the field description in the FIELD SUMMARY to determine the best fit (e.g., prefer `displayName` for user-friendly name searches over raw `name`).",
@ -3686,7 +3691,7 @@
"sections": [
{
"section": "TABLE FIELDS",
"content": "- name (text): Use match query for fuzzy text search\n Example: {\"match\": {\"name\": \"customer\"}}\n\n- name.keyword (exact): Use term query for exact matching\n Example: {\"term\": {\"name.keyword\": \"customer_orders\"}}\n\n- description (text): Use match query for content search\n Example: {\"match\": {\"description\": \"sales data\"}}\n\n- owners.name (keyword): Use term query for exact matching\n Example: {\"term\": {\"owners.name\": \"marketing\"}}\n\n- tier.tagFQN (keyword): Use term query for tier classification\n Example: {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}\n\n- tags.tagFQN (keyword): Use term query for tags\n Example: {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}\n\n- columns.name (keyword): Use direct match queries for columns\n Example: {\"match\": {\"columns.name\": \"address\"}}\n \u26a0\ufe0f IMPORTANT: Do NOT use nested query syntax for columns\n\n- service.name, database.name, databaseSchema.name (keywords): Use term queries\n Example: {\"term\": {\"service.name\": \"snowflake\"}}",
"content": "- name (text): Use match query for fuzzy text search\n Example: {\"match\": {\"name\": \"customer\"}}\n\n- name.keyword (exact): Use term query for exact matching\n Example: {\"term\": {\"name.keyword\": \"customer_orders\"}}\n\n- description (text): Use match query for content search\n Example: {\"match\": {\"description\": \"sales data\"}}\n\n- owners (keyword fields): ALWAYS search BOTH owners.name and owners.displayName.keyword\n Example: {\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"value\"}}, {\"term\": {\"owners.displayName.keyword\": \"value\"}}]}}\n This matches usernames OR display names in a single query\n\n- tier.tagFQN (keyword): Use term query for tier classification\n Example: {\"term\": {\"tier.tagFQN\": \"Tier.Tier1\"}}\n\n- tags.tagFQN (keyword): Use term query for tags\n Example: {\"term\": {\"tags.tagFQN\": \"PII.Sensitive\"}}\n\n- columns.name (keyword): Use direct match queries for columns\n Example: {\"match\": {\"columns.name\": \"address\"}}\n ⚠️ IMPORTANT: Do NOT use nested query syntax for columns\n\n- service.name, database.name, databaseSchema.name (keywords): Use term queries\n Example: {\"term\": {\"service.name\": \"snowflake\"}}",
"order": 10
}
]
@ -3696,7 +3701,7 @@
"sections": [
{
"section": "DASHBOARD FIELDS",
"content": "- charts.name (nested): Use nested query for searching charts\n Example: {\"nested\": {\"path\": \"charts\", \"query\": {\"match\": {\"charts.name\": \"revenue\"}}}}\n\n- service.name: Use term query for the dashboard service\n Example: {\"term\": {\"service.name\": \"looker\"}}\n\n- owners.name: Use term query for ownership\n Example: {\"term\": {\"owners.name\": \"marketing\"}}",
"content": "- charts.name (nested): Use nested query for searching charts\n Example: {\"nested\": {\"path\": \"charts\", \"query\": {\"match\": {\"charts.name\": \"revenue\"}}}}\n\n- service.name: Use term query for the dashboard service\n Example: {\"term\": {\"service.name\": \"looker\"}}\n\n- owners (keyword fields): ALWAYS search BOTH owners.name and owners.displayName.keyword\n Example: {\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"value\"}}, {\"term\": {\"owners.displayName.keyword\": \"value\"}}]}}\n This matches usernames OR display names in a single query",
"order": 10
}
]
@ -3705,7 +3710,7 @@
"examples": [
{
"query": "tables owned by marketing",
"esQuery": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"term\": {\"owners.name\": \"marketing\"}}]}}",
"esQuery": "{\"bool\": {\"must\": [{\"term\": {\"entityType\": \"table\"}}, {\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"marketing\"}}, {\"term\": {\"owners.displayName.keyword\": \"marketing\"}}]}}]}}",
"entityTypes": [
"table"
]
@ -3726,7 +3731,7 @@
},
{
"query": "tables owned by marketing",
"esQuery": "{\"bool\": {\"must\": [{\"term\": {\"owners.name\": \"marketing\"}}]}}",
"esQuery": "{\"bool\": {\"must\": [{\"bool\": {\"should\": [{\"term\": {\"owners.name\": \"marketing\"}}, {\"term\": {\"owners.displayName.keyword\": \"marketing\"}}]}}]}}",
"entityTypes": [
"table"
]