Issues in Search Relevancy (#21841)

* Fix explain turned by default, use dfs_query_then_fetch in cases of sharding of search cluster

* Add exact match configs

* Add exact match configs

* Update Logic to build search source builder with exact match priority

* Revert "Update Logic to build search source builder with exact match priority"

This reverts commit 175a2e9c6b67ee90d4b2a35af89bb035e8c45131.

* Revert "Add exact match configs"

This reverts commit 3fd52606610bbb97a676170004cab6d7adc31a0d.

* revert display name change

* make boost mode as sum by defaul

* add more fqnparts for schema and database

* revert DFS_QUERY_THEN_FETCH since sharding wasn the issue

* use fqn split

* Refactor FQN Parts

---------

Co-authored-by: Sriharsha Chintalapani <harsha@getcollate.io>
Co-authored-by: Sriharsha Chintalapani <harshach@users.noreply.github.com>
This commit is contained in:
Mohit Yadav 2025-06-19 05:03:46 +05:30 committed by GitHub
parent 12097f15cd
commit f388e570c1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 571 additions and 45 deletions

View File

@ -32491,6 +32491,218 @@
},
"sourceUrl": "http://localhost:8080/test_long_description_table",
"tags": []
},
{
"name": "ssot_utilization_detail",
"description": "A detail of each agent's time throughout the day. Individual events in each system are collapsed by aux_code, aux_channel, worktype_name, and reporting_date.",
"version": 0.5,
"updatedAt": 1638354087391,
"updatedBy": "anonymous",
"tableType": "Regular",
"columns": [
{
"name": "reporting_date",
"dataType": "DATE",
"dataTypeDisplay": "date",
"description": "The date the event began.",
"tags": [],
"ordinalPosition": 1
},
{
"name": "agent_id",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "The unique identifier for the agent from customer_success.remitly_employee.",
"tags": [],
"ordinalPosition": 2
},
{
"name": "agent_email",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "The agent email.",
"tags": [],
"ordinalPosition": 3
}
],
"database": {
"id": "50da1ff8-4e1d-4967-8931-45edbf4dd908",
"type": "database",
"name": "sample_data.ecommerce_db",
"description": "This **mock** database contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databases/50da1ff8-4e1d-4967-8931-45edbf4dd908"
},
"tags": [],
"usageSummary": {
"dailyStats": {
"count": 0,
"percentileRank": 0
},
"weeklyStats": {
"count": 0,
"percentileRank": 0
},
"monthlyStats": {
"count": 0,
"percentileRank": 0
},
"date": "2021-12-01"
},
"followers": []
},
{
"name": "itm_utilization_detail_login_events",
"description": "An intermediate table used to create ssot_utilization_detail. This table cleans up the \"CS Platform: login\" event dimensions to be congruent with downstream analytics.",
"version": 0.3,
"updatedAt": 1638354087391,
"updatedBy": "anonymous",
"tableType": "Regular",
"columns": [
{
"name": "reporting_date",
"dataType": "DATE",
"dataTypeDisplay": "date",
"description": "Event date",
"tags": [],
"ordinalPosition": 1
},
{
"name": "agent_id",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "Agent identifier",
"tags": [],
"ordinalPosition": 2
}
],
"database": {
"id": "50da1ff8-4e1d-4967-8931-45edbf4dd908",
"type": "database",
"name": "sample_data.ecommerce_db",
"description": "This **mock** database contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databases/50da1ff8-4e1d-4967-8931-45edbf4dd908"
},
"tags": [],
"usageSummary": {
"dailyStats": {
"count": 10,
"percentileRank": 30
},
"weeklyStats": {
"count": 50,
"percentileRank": 40
},
"monthlyStats": {
"count": 200,
"percentileRank": 50
},
"date": "2021-12-01"
},
"followers": []
},
{
"name": "customer_metrics_daily",
"description": "Daily customer metrics aggregated from various sources including ssot_utilization_detail and other operational tables.",
"version": 0.2,
"updatedAt": 1638354087391,
"updatedBy": "anonymous",
"tableType": "Regular",
"columns": [
{
"name": "metric_date",
"dataType": "DATE",
"dataTypeDisplay": "date",
"description": "Date of the metric",
"tags": [],
"ordinalPosition": 1
},
{
"name": "customer_count",
"dataType": "INT",
"dataTypeDisplay": "integer",
"description": "Total number of customers",
"tags": [],
"ordinalPosition": 2
}
],
"database": {
"id": "50da1ff8-4e1d-4967-8931-45edbf4dd908",
"type": "database",
"name": "sample_data.ecommerce_db",
"description": "This **mock** database contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databases/50da1ff8-4e1d-4967-8931-45edbf4dd908"
},
"tags": [],
"usageSummary": {
"dailyStats": {
"count": 100,
"percentileRank": 70
},
"weeklyStats": {
"count": 500,
"percentileRank": 80
},
"monthlyStats": {
"count": 2000,
"percentileRank": 90
},
"date": "2021-12-01"
},
"followers": []
},
{
"name": "agent_performance_summary",
"description": "Summary of agent performance metrics derived from multiple tables including ssot_utilization_detail for comprehensive reporting.",
"version": 0.4,
"updatedAt": 1638354087391,
"updatedBy": "anonymous",
"tableType": "Regular",
"columns": [
{
"name": "agent_id",
"dataType": "VARCHAR",
"dataLength": 100,
"dataTypeDisplay": "varchar",
"description": "Agent identifier",
"tags": [],
"ordinalPosition": 1
},
{
"name": "performance_score",
"dataType": "DECIMAL",
"dataTypeDisplay": "decimal",
"description": "Overall performance score",
"tags": [],
"ordinalPosition": 2
}
],
"database": {
"id": "50da1ff8-4e1d-4967-8931-45edbf4dd908",
"type": "database",
"name": "sample_data.ecommerce_db",
"description": "This **mock** database contains tables related to shopify sales and orders with related dimension tables.",
"href": "http://localhost:8585/api/v1/databases/50da1ff8-4e1d-4967-8931-45edbf4dd908"
},
"tags": [],
"usageSummary": {
"dailyStats": {
"count": 50,
"percentileRank": 60
},
"weeklyStats": {
"count": 250,
"percentileRank": 65
},
"monthlyStats": {
"count": 1000,
"percentileRank": 70
},
"date": "2021-12-01"
},
"followers": []
}
]
}

View File

@ -948,6 +948,12 @@
<artifactId>opensearch-rest-high-level-client</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.open-metadata</groupId>
<artifactId>opensearch-deps</artifactId>
<version>1.8.0-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
<profiles>

View File

@ -72,6 +72,20 @@ public interface SearchSourceBuilderFactory<S, Q, H, F> {
* @return a search source builder configured for the specific entity type
*/
default S getSearchSourceBuilder(String index, String q, int from, int size) {
return getSearchSourceBuilder(index, q, from, size, false);
}
/**
* Get the appropriate search source builder based on the index name.
*
* @param index the index name
* @param q the search query
* @param from the starting offset
* @param size the number of results to return
* @param explain whether to include explanation of the search results
* @return a search source builder configured for the specific entity type
*/
default S getSearchSourceBuilder(String index, String q, int from, int size, boolean explain) {
String indexName = Entity.getSearchRepository().getIndexNameWithoutAlias(index);
if (isTimeSeriesIndex(indexName)) {
@ -87,7 +101,7 @@ public interface SearchSourceBuilderFactory<S, Q, H, F> {
}
if (isDataAssetIndex(indexName)) {
return buildDataAssetSearchBuilder(indexName, q, from, size);
return buildDataAssetSearchBuilder(indexName, q, from, size, explain);
}
if (indexName.equals("all") || indexName.equals("dataAsset")) {
@ -105,6 +119,9 @@ public interface SearchSourceBuilderFactory<S, Q, H, F> {
S buildDataAssetSearchBuilder(String indexName, String query, int from, int size);
S buildDataAssetSearchBuilder(
String indexName, String query, int from, int size, boolean explain);
S buildCommonSearchBuilder(String query, int from, int size);
S buildUserOrTeamSearchBuilder(String query, int from, int size);

View File

@ -366,7 +366,11 @@ public class ElasticSearchClient implements SearchClient {
new ElasticSearchSourceBuilderFactory(searchSettings);
SearchSourceBuilder searchSourceBuilder =
searchBuilderFactory.getSearchSourceBuilder(
request.getIndex(), request.getQuery(), request.getFrom(), request.getSize());
request.getIndex(),
request.getQuery(),
request.getFrom(),
request.getSize(),
request.getExplain());
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
// Add Filter

View File

@ -163,6 +163,12 @@ public class ElasticSearchSourceBuilderFactory
@Override
public SearchSourceBuilder buildDataAssetSearchBuilder(
String indexName, String query, int from, int size) {
return buildDataAssetSearchBuilder(indexName, query, from, size, false);
}
@Override
public SearchSourceBuilder buildDataAssetSearchBuilder(
String indexName, String query, int from, int size, boolean explain) {
AssetTypeConfiguration assetConfig = findAssetTypeConfig(indexName, searchSettings);
Map<String, Float> fuzzyFields;
Map<String, Float> nonFuzzyFields;
@ -312,7 +318,7 @@ public class ElasticSearchSourceBuilderFactory
}
addConfiguredAggregations(searchSourceBuilder, assetConfig);
searchSourceBuilder.explain(true);
searchSourceBuilder.explain(explain);
return searchSourceBuilder;
}

View File

@ -19,11 +19,11 @@ import static org.openmetadata.service.util.FullyQualifiedName.getParentFQN;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.openmetadata.schema.EntityInterface;
@ -128,29 +128,12 @@ public interface SearchIndex {
}
default Set<String> getFQNParts(String fqn) {
Set<String> fqnParts = new HashSet<>();
fqnParts.add(fqn);
String parent = FullyQualifiedName.getParentFQN(fqn);
while (parent != null) {
fqnParts.add(parent);
parent = FullyQualifiedName.getParentFQN(parent);
}
return fqnParts;
}
var parts = FullyQualifiedName.split(fqn);
var entityName = parts[parts.length - 1];
// Add suggest inputs to fqnParts to support partial/wildcard search on names.
// In some case of basic Test suite name is not part of the fullyQualifiedName, so it must be
// added separately.
default Set<String> getFQNParts(String fqn, List<String> fqnSplits) {
Set<String> fqnParts = new HashSet<>();
fqnParts.add(fqn);
String parent = FullyQualifiedName.getParentFQN(fqn);
while (parent != null) {
fqnParts.add(parent);
parent = FullyQualifiedName.getParentFQN(parent);
}
fqnParts.addAll(fqnSplits);
return fqnParts;
return FullyQualifiedName.getAllParts(fqn).stream()
.filter(part -> !part.equals(entityName))
.collect(Collectors.toSet());
}
default List<EntityReference> getEntitiesWithDisplayName(List<EntityReference> entities) {

View File

@ -137,6 +137,7 @@ import os.org.opensearch.action.delete.DeleteRequest;
import os.org.opensearch.action.get.GetRequest;
import os.org.opensearch.action.get.GetResponse;
import os.org.opensearch.action.search.SearchResponse;
import os.org.opensearch.action.search.SearchType;
import os.org.opensearch.action.support.WriteRequest;
import os.org.opensearch.action.support.master.AcknowledgedResponse;
import os.org.opensearch.action.update.UpdateRequest;
@ -384,7 +385,11 @@ public class OpenSearchClient implements SearchClient {
new OpenSearchSourceBuilderFactory(searchSettings);
SearchSourceBuilder searchSourceBuilder =
searchBuilderFactory.getSearchSourceBuilder(
request.getIndex(), request.getQuery(), request.getFrom(), request.getSize());
request.getIndex(),
request.getQuery(),
request.getFrom(),
request.getSize(),
request.getExplain());
buildSearchRBACQuery(subjectContext, searchSourceBuilder);
@ -509,6 +514,10 @@ public class OpenSearchClient implements SearchClient {
os.org.opensearch.action.search.SearchRequest searchRequest =
new os.org.opensearch.action.search.SearchRequest(request.getIndex());
searchRequest.source(searchSourceBuilder);
// Use DFS Query Then Fetch for consistent scoring across shards
searchRequest.searchType(SearchType.DFS_QUERY_THEN_FETCH);
os.org.opensearch.action.search.SearchResponse response =
client.search(searchRequest, os.org.opensearch.client.RequestOptions.DEFAULT);
if (response.getHits() != null
@ -541,6 +550,10 @@ public class OpenSearchClient implements SearchClient {
os.org.opensearch.action.search.SearchRequest osRequest =
new os.org.opensearch.action.search.SearchRequest(request.getIndex());
osRequest.source(searchSourceBuilder);
// Use DFS Query Then Fetch for consistent scoring across shards
osRequest.searchType(SearchType.DFS_QUERY_THEN_FETCH);
getSearchBuilderFactory().addAggregationsToNLQQuery(searchSourceBuilder, request.getIndex());
SearchResponse searchResponse = client.search(osRequest, OPENSEARCH_REQUEST_OPTIONS);
return Response.status(Response.Status.OK).entity(searchResponse.toString()).build();

View File

@ -162,6 +162,12 @@ public class OpenSearchSourceBuilderFactory
@Override
public SearchSourceBuilder buildDataAssetSearchBuilder(
String indexName, String query, int from, int size) {
return buildDataAssetSearchBuilder(indexName, query, from, size, false);
}
@Override
public SearchSourceBuilder buildDataAssetSearchBuilder(
String indexName, String query, int from, int size, boolean explain) {
AssetTypeConfiguration assetConfig = findAssetTypeConfig(indexName, searchSettings);
Map<String, Float> fuzzyFields;
Map<String, Float> nonFuzzyFields;
@ -312,7 +318,7 @@ public class OpenSearchSourceBuilderFactory
}
addConfiguredAggregations(searchSourceBuilder, assetConfig);
searchSourceBuilder.explain(true);
searchSourceBuilder.explain(explain);
return searchSourceBuilder;
}

View File

@ -14,7 +14,9 @@
package org.openmetadata.service.util;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.antlr.v4.runtime.BailErrorStrategy;
@ -206,4 +208,63 @@ public class FullyQualifiedName {
public static String getColumnName(String columnFQN) {
return FullyQualifiedName.split(columnFQN)[4]; // Get from column name from FQN
}
/**
* Generates all possible FQN parts for search and matching purposes.
* For example, given FQN "service.database.schema.table", this method generates:
* - Full hierarchy: "service", "service.database", "service.database.schema", "service.database.schema.table"
* - Individual parts: "service", "database", "schema", "table"
* - Bottom-up combinations: "database.schema.table", "schema.table", "table"
*
* @param fqn The fully qualified name to generate parts from
* @return Set of all possible FQN parts
*/
public static Set<String> getAllParts(String fqn) {
var parts = split(fqn);
var fqnParts = new HashSet<String>();
// Generate all possible sub-paths
for (int start = 0; start < parts.length; start++) {
for (int end = start + 1; end <= parts.length; end++) {
var subPath =
String.join(Entity.SEPARATOR, java.util.Arrays.copyOfRange(parts, start, end));
fqnParts.add(subPath);
}
}
return fqnParts;
}
/**
* Generates hierarchical FQN parts from root to the full FQN.
* For example, given FQN "service.database.schema.table", this method generates:
* ["service", "service.database", "service.database.schema", "service.database.schema.table"]
*
* @param fqn The fully qualified name to generate hierarchy from
* @return List of hierarchical FQN parts from root to full FQN
*/
public static List<String> getHierarchicalParts(String fqn) {
var parts = split(fqn);
return java.util.stream.IntStream.rangeClosed(1, parts.length)
.mapToObj(i -> String.join(Entity.SEPARATOR, java.util.Arrays.copyOfRange(parts, 0, i)))
.toList();
}
/**
* Gets all ancestor FQNs for a given FQN.
* For example, given FQN "service.database.schema.table", this method returns:
* ["service.database.schema", "service.database", "service"]
*
* @param fqn The fully qualified name to get ancestors from
* @return List of ancestor FQNs (excluding the input FQN itself)
*/
public static List<String> getAncestors(String fqn) {
var parts = split(fqn);
return java.util.stream.IntStream.range(1, parts.length)
.mapToObj(
i ->
String.join(
Entity.SEPARATOR, java.util.Arrays.copyOfRange(parts, 0, parts.length - i)))
.toList();
}
}

View File

@ -154,7 +154,7 @@
"aggregations": [
],
"scoreMode": "sum",
"boostMode": "multiply"
"boostMode": "sum"
},
{
"assetType": "databaseSchema",
@ -200,7 +200,7 @@
"aggregations": [
],
"scoreMode": "sum",
"boostMode": "multiply"
"boostMode": "sum"
},
{
"assetType": "table",
@ -271,7 +271,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"description",
@ -341,7 +341,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"description",
@ -400,7 +400,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"description",
@ -468,7 +468,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -571,7 +571,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -641,7 +641,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -701,7 +701,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -762,7 +762,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -823,7 +823,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -888,7 +888,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -976,7 +976,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -1027,7 +1027,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -1073,7 +1073,7 @@
"aggregations": [
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -1127,7 +1127,7 @@
"aggregations": [
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",
@ -1204,7 +1204,7 @@
}
],
"scoreMode": "sum",
"boostMode": "multiply",
"boostMode": "sum",
"highlightFields": [
"name",
"displayName",

View File

@ -0,0 +1,91 @@
package org.openmetadata.service.search.indexes;
import static org.junit.jupiter.api.Assertions.*;
import java.util.Set;
import java.util.stream.Collectors;
import org.junit.jupiter.api.Test;
import org.openmetadata.service.util.FullyQualifiedName;
class SearchIndexTest {
// Test the getFQNParts logic directly without instantiating SearchIndex
private Set<String> getFQNParts(String fqn) {
var parts = FullyQualifiedName.split(fqn);
var entityName = parts[parts.length - 1];
return FullyQualifiedName.getAllParts(fqn).stream()
.filter(part -> !part.equals(entityName))
.collect(Collectors.toSet());
}
@Test
void testGetFQNParts_excludesEntityName() {
String tableFqn = "service.database.schema.table";
Set<String> parts = getFQNParts(tableFqn);
assertFalse(parts.contains("table"), "Entity name 'table' should not be included in FQN parts");
assertTrue(parts.contains("service"));
assertTrue(parts.contains("database"));
assertTrue(parts.contains("schema"));
assertTrue(parts.contains("service.database"));
assertTrue(parts.contains("service.database.schema"));
assertTrue(parts.contains("service.database.schema.table"));
assertTrue(parts.contains("database.schema"));
assertTrue(parts.contains("schema.table"));
assertTrue(parts.contains("database.schema.table"));
assertEquals(9, parts.size());
}
@Test
void testGetFQNParts_withDifferentPatterns() {
// Test pipeline pattern: service.pipeline
String pipelineFqn = "airflow.my_pipeline";
Set<String> pipelineParts = getFQNParts(pipelineFqn);
assertFalse(pipelineParts.contains("my_pipeline"), "Entity name should not be included");
assertTrue(pipelineParts.contains("airflow"));
assertEquals(2, pipelineParts.size());
// Test dashboard pattern: service.dashboard
String dashboardFqn = "looker.sales_dashboard";
Set<String> dashboardParts = getFQNParts(dashboardFqn);
assertFalse(dashboardParts.contains("sales_dashboard"), "Entity name should not be included");
assertTrue(dashboardParts.contains("looker"));
assertEquals(2, dashboardParts.size());
// Test dashboard chart pattern: service.dashboard.chart
String chartFqn = "tableau.analytics.revenue_chart";
Set<String> chartParts = getFQNParts(chartFqn);
assertFalse(chartParts.contains("revenue_chart"), "Entity name should not be included");
assertTrue(chartParts.contains("tableau"));
assertTrue(chartParts.contains("analytics"));
assertTrue(chartParts.contains("tableau.analytics"));
assertEquals(5, chartParts.size());
}
@Test
void testGetFQNParts_withQuotedNames() {
// Test with quoted names that contain dots
String quotedFqn = "\"service.v1\".database.\"schema.prod\".\"table.users\"";
Set<String> parts = getFQNParts(quotedFqn);
// Verify that the entity name is not included
assertFalse(parts.contains("\"table.users\""), "Entity name should not be included");
assertFalse(parts.contains("table.users"), "Entity name should not be included");
// Verify other parts are included
assertTrue(parts.contains("\"service.v1\""));
assertTrue(parts.contains("database"));
assertTrue(parts.contains("\"schema.prod\""));
}
@Test
void testGetFQNParts_withSinglePart() {
// Test with a single part FQN (edge case)
String singlePartFqn = "standalone_entity";
Set<String> parts = getFQNParts(singlePartFqn);
// Should return empty set since we exclude the entity name
assertTrue(parts.isEmpty(), "Single part FQN should return empty set");
}
}

View File

@ -7,6 +7,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.util.List;
import java.util.Set;
import org.antlr.v4.runtime.misc.ParseCancellationException;
import org.junit.jupiter.api.Test;
@ -96,4 +97,82 @@ class FullyQualifiedNameTest {
assertFalse(FullyQualifiedName.isParent("a.b.c", "a.b.c"));
assertFalse(FullyQualifiedName.isParent("a.b c", "a.b"));
}
@Test
void test_getAllParts() {
Set<String> parts = FullyQualifiedName.getAllParts("a.b.c.d");
assertTrue(parts.contains("a"));
assertTrue(parts.contains("b"));
assertTrue(parts.contains("c"));
assertTrue(parts.contains("d"));
// Should contain top-down hierarchy
assertTrue(parts.contains("a"));
assertTrue(parts.contains("a.b"));
assertTrue(parts.contains("a.b.c"));
assertTrue(parts.contains("a.b.c.d"));
// Should contain bottom-up combinations
assertTrue(parts.contains("b.c.d"));
assertTrue(parts.contains("c.d"));
assertEquals(10, parts.size()); // 4 individual + 4 top-down + 2 bottom-up
// Test with quoted names
Set<String> quotedParts = FullyQualifiedName.getAllParts("\"a.1\".\"b.2\".c.d");
assertTrue(quotedParts.contains("\"a.1\""));
assertTrue(quotedParts.contains("\"b.2\""));
assertTrue(quotedParts.contains("c"));
assertTrue(quotedParts.contains("d"));
assertTrue(quotedParts.contains("\"a.1\".\"b.2\".c.d"));
assertTrue(quotedParts.contains("\"b.2\".c.d"));
// Test with single part
Set<String> singlePart = FullyQualifiedName.getAllParts("service");
assertEquals(1, singlePart.size());
assertTrue(singlePart.contains("service"));
}
@Test
void test_getHierarchicalParts() {
List<String> hierarchy = FullyQualifiedName.getHierarchicalParts("a.b.c.d");
assertEquals(4, hierarchy.size());
assertEquals("a", hierarchy.get(0));
assertEquals("a.b", hierarchy.get(1));
assertEquals("a.b.c", hierarchy.get(2));
assertEquals("a.b.c.d", hierarchy.get(3));
// Test with quoted names
List<String> quotedHierarchy = FullyQualifiedName.getHierarchicalParts("\"a.1\".b.\"c.3\"");
assertEquals(3, quotedHierarchy.size());
assertEquals("\"a.1\"", quotedHierarchy.get(0));
assertEquals("\"a.1\".b", quotedHierarchy.get(1));
assertEquals("\"a.1\".b.\"c.3\"", quotedHierarchy.get(2));
// Test with single part
List<String> singleHierarchy = FullyQualifiedName.getHierarchicalParts("service");
assertEquals(1, singleHierarchy.size());
assertEquals("service", singleHierarchy.getFirst());
}
@Test
void test_getAncestors() {
List<String> ancestors = FullyQualifiedName.getAncestors("a.b.c.d");
assertEquals(3, ancestors.size());
assertEquals("a.b.c", ancestors.get(0));
assertEquals("a.b", ancestors.get(1));
assertEquals("a", ancestors.get(2));
List<String> twoPartAncestors = FullyQualifiedName.getAncestors("a.b");
assertEquals(1, twoPartAncestors.size());
assertEquals("a", twoPartAncestors.getFirst());
// Test with single part (no ancestors)
List<String> noAncestors = FullyQualifiedName.getAncestors("service");
assertEquals(0, noAncestors.size());
// Test with quoted names
List<String> quotedAncestors = FullyQualifiedName.getAncestors("\"a.1\".b.\"c.3\".d");
assertEquals(3, quotedAncestors.size());
assertEquals("\"a.1\".b.\"c.3\"", quotedAncestors.get(0));
assertEquals("\"a.1\".b", quotedAncestors.get(1));
assertEquals("\"a.1\"", quotedAncestors.get(2));
}
}

View File

@ -1,3 +1,15 @@
/*
* Copyright 2025 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This schema defines the `Database Service` is a service such as MySQL, BigQuery,
* Redshift, Postgres, or Snowflake. Alternative terms such as Database Cluster, Database

View File

@ -1,3 +1,15 @@
/*
* Copyright 2025 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Ingestion Pipeline Config is used to set up a DAG and deploy. This entity is used to
* setup metadata/quality pipelines on Apache Airflow.

View File

@ -1,3 +1,15 @@
/*
* Copyright 2025 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This schema defines the Pipeline Service entity, such as Airflow and Prefect.
*/

View File

@ -1,3 +1,15 @@
/*
* Copyright 2025 Collate.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Defines a background job that is triggered on insertion of new record in background_jobs
* table.