fix(structuredProperties): fixes underscore behavior in structured property names (#11746)

Co-authored-by: david-leifker <114954101+david-leifker@users.noreply.github.com>
This commit is contained in:
RyanHolstien 2024-10-30 13:41:09 -05:00 committed by GitHub
parent 143fc011fa
commit cf3b08fecb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 282 additions and 32 deletions

View File

@ -6,7 +6,6 @@ import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_ENTITY_NAME;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD;
import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.Status;
@ -120,12 +119,13 @@ public class StructuredPropertyUtils {
lookupDefinitionFromFilterOrFacetName(
@Nonnull String fieldOrFacetName, @Nullable AspectRetriever aspectRetriever) {
if (fieldOrFacetName.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD + ".")) {
String fqn =
fieldOrFacetName
.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)
.replace(".keyword", "")
.replace(".delimited", "");
// Coming in from the UI this is structuredProperties.<FQN> + any particular specifier for
// subfield (.keyword etc)
String fqn = fieldOrFacetToFQN(fieldOrFacetName);
// FQN Maps directly to URN with urn:li:structuredProperties:FQN
Urn urn = toURNFromFQN(fqn);
Map<Urn, Map<String, Aspect>> result =
Objects.requireNonNull(aspectRetriever)
.getLatestAspectObjects(
@ -223,9 +223,8 @@ public class StructuredPropertyUtils {
* @param fqn structured property's fqn
* @return the expected structured property urn
*/
private static Urn toURNFromFQN(@Nonnull String fqn) {
return UrnUtils.getUrn(
String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn.replace('_', '.')));
public static Urn toURNFromFQN(@Nonnull String fqn) {
return UrnUtils.getUrn(String.join(":", "urn:li", STRUCTURED_PROPERTY_ENTITY_NAME, fqn));
}
public static void validateFilter(
@ -235,12 +234,13 @@ public class StructuredPropertyUtils {
return;
}
Set<String> fieldNames = new HashSet<>();
Set<String> fqns = new HashSet<>();
if (filter.getCriteria() != null) {
for (Criterion c : filter.getCriteria()) {
if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
fieldNames.add(stripStructuredPropertyPrefix(c.getField()));
String fqn = fieldOrFacetToFQN(c.getField());
fqns.add(fqn);
}
}
}
@ -249,24 +249,23 @@ public class StructuredPropertyUtils {
for (ConjunctiveCriterion cc : filter.getOr()) {
for (Criterion c : cc.getAnd()) {
if (c.getField().startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
fieldNames.add(stripStructuredPropertyPrefix(c.getField()));
String fqn = fieldOrFacetToFQN(c.getField());
fqns.add(fqn);
}
}
}
}
if (!fieldNames.isEmpty()) {
validateStructuredPropertyFQN(fieldNames, Objects.requireNonNull(aspectRetriever));
if (!fqns.isEmpty()) {
validateStructuredPropertyFQN(fqns, Objects.requireNonNull(aspectRetriever));
}
}
private static String stripStructuredPropertyPrefix(String s) {
if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD_PREFIX)) {
return s.substring(STRUCTURED_PROPERTY_MAPPING_VERSIONED_FIELD.length() + 1).split("[.]")[0];
} else if (s.startsWith(STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX)) {
return s.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1).split("[.]")[0];
}
return s;
private static String fieldOrFacetToFQN(String fieldOrFacet) {
return fieldOrFacet
.substring(STRUCTURED_PROPERTY_MAPPING_FIELD.length() + 1)
.replace(".keyword", "")
.replace(".delimited", "");
}
public static Date toDate(PrimitivePropertyValue value) throws DateTimeParseException {

View File

@ -44,6 +44,8 @@ public class AggregationQueryBuilderTest {
public void setup() throws RemoteInvocationException, URISyntaxException {
Urn helloUrn = Urn.createFromString("urn:li:structuredProperty:hello");
Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten");
Urn underscoresAndDotsUrn =
Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
// legacy
aspectRetriever = mock(AspectRetriever.class);
@ -75,6 +77,21 @@ public class AggregationQueryBuilderTest {
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinition.data()))));
StructuredPropertyDefinition structPropUnderscoresAndDotsDefinition =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL);
structPropUnderscoresAndDotsDefinition.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinition.setQualifiedName("under.scores.and.dots_make_a_mess");
structPropUnderscoresAndDotsDefinition.setDisplayName("under.scores.and.dots_make_a_mess");
when(aspectRetriever.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinition.data()))));
// V1
aspectRetrieverV1 = mock(AspectRetriever.class);
when(aspectRetrieverV1.getEntityRegistry())
@ -105,6 +122,21 @@ public class AggregationQueryBuilderTest {
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinitionV1.data()))));
StructuredPropertyDefinition structPropUnderscoresAndDotsDefinitionV1 =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinitionV1.setVersion("00000000000001");
structPropUnderscoresAndDotsDefinitionV1.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinitionV1.setQualifiedName("under.scores.and.dots_make_a_mess");
structPropUnderscoresAndDotsDefinitionV1.setDisplayName("under.scores.and.dots_make_a_mess");
when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinitionV1.data()))));
}
@Test
@ -269,6 +301,46 @@ public class AggregationQueryBuilderTest {
Set.of("structuredProperties.ab_fgh_ten.keyword", "structuredProperties.hello.keyword"));
}
@Test
public void testAggregateOverStructuredPropertyNamespaced() {
SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);
AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
List<AggregationBuilder> aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
Assert.assertEquals(aggs.size(), 1);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
// Check that field name is sanitized to correct field name
Assert.assertEquals(
agg.field(),
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
"Terms aggregate must be on a keyword or subfield keyword");
// Two structured properties
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetriever),
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties.hello"));
Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties.under_scores_and_dots_make_a_mess.keyword",
"structuredProperties.hello.keyword"));
}
@Test
public void testAggregateOverStructuredPropertyV1() {
SearchConfiguration config = new SearchConfiguration();
@ -309,6 +381,46 @@ public class AggregationQueryBuilderTest {
"structuredProperties._versioned.hello.00000000000001.string.keyword"));
}
@Test
public void testAggregateOverStructuredPropertyNamespacedV1() {
SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);
AggregationQueryBuilder builder =
new AggregationQueryBuilder(
config, ImmutableMap.of(mock(EntitySpec.class), ImmutableList.of()));
List<AggregationBuilder> aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of("structuredProperties.under.scores.and.dots_make_a_mess"));
Assert.assertEquals(aggs.size(), 1);
AggregationBuilder aggBuilder = aggs.get(0);
Assert.assertTrue(aggBuilder instanceof TermsAggregationBuilder);
TermsAggregationBuilder agg = (TermsAggregationBuilder) aggBuilder;
// Check that field name is sanitized to correct field name
Assert.assertEquals(
agg.field(),
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
"Terms aggregation must be on a keyword field or subfield.");
// Two structured properties
aggs =
builder.getAggregations(
TestOperationContexts.systemContextNoSearchAuthorization(aspectRetrieverV1),
List.of(
"structuredProperties.under.scores.and.dots_make_a_mess",
"structuredProperties._versioned.hello.00000000000001.string"));
Assert.assertEquals(aggs.size(), 2);
Assert.assertEquals(
aggs.stream()
.map(aggr -> ((TermsAggregationBuilder) aggr).field())
.collect(Collectors.toSet()),
Set.of(
"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword",
"structuredProperties._versioned.hello.00000000000001.string.keyword"));
}
@Test
public void testAggregateOverFieldsAndStructProp() {
SearchableAnnotation annotation1 =

View File

@ -1,24 +1,34 @@
package com.linkedin.metadata.search.query.request;
import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES;
import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME;
import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion;
import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion;
import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion;
import static com.linkedin.metadata.utils.SearchUtil.*;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import static org.testng.Assert.*;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.DataMap;
import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
import com.linkedin.entity.Aspect;
import com.linkedin.metadata.TestEntitySpecBuilder;
import com.linkedin.metadata.aspect.AspectRetriever;
import com.linkedin.metadata.aspect.GraphRetriever;
import com.linkedin.metadata.config.search.ExactMatchConfiguration;
import com.linkedin.metadata.config.search.PartialConfiguration;
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.config.search.WordGramConfiguration;
import com.linkedin.metadata.entity.SearchRetriever;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.StructuredPropertyUtils;
import com.linkedin.metadata.query.filter.Condition;
import com.linkedin.metadata.query.filter.ConjunctiveCriterion;
import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray;
@ -28,6 +38,8 @@ import com.linkedin.metadata.query.filter.Filter;
import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain;
import com.linkedin.metadata.search.elasticsearch.query.request.SearchRequestHandler;
import io.datahubproject.metadata.context.OperationContext;
import io.datahubproject.metadata.context.RetrieverContext;
import io.datahubproject.test.metadata.context.TestOperationContexts;
import io.datahubproject.test.search.config.SearchCommonTestConfiguration;
import java.util.ArrayList;
import java.util.Collection;
@ -633,6 +645,49 @@ public class SearchRequestHandlerTest extends AbstractTestNGSpringContextTests {
assertEquals(((ExistsQueryBuilder) mustHaveV1.must().get(0)).fieldName(), "browsePaths");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testInvalidStructuredProperty() {
AspectRetriever aspectRetriever = mock(AspectRetriever.class);
Map<Urn, Map<String, Aspect>> aspectResponse = new HashMap<>();
DataMap statusData = new DataMap();
statusData.put("removed", true);
Aspect status = new Aspect(statusData);
Urn structPropUrn = StructuredPropertyUtils.toURNFromFQN("under.scores.and.dots.make_a_mess");
aspectResponse.put(structPropUrn, ImmutableMap.of(STATUS_ASPECT_NAME, status));
when(aspectRetriever.getLatestAspectObjects(
Collections.singleton(structPropUrn), ImmutableSet.of(STATUS_ASPECT_NAME)))
.thenReturn(aspectResponse);
OperationContext mockRetrieverContext =
TestOperationContexts.systemContextNoSearchAuthorization(
RetrieverContext.builder()
.aspectRetriever(aspectRetriever)
.graphRetriever(mock(GraphRetriever.class))
.searchRetriever(mock(SearchRetriever.class))
.build());
Criterion structuredPropCriterion =
buildExistsCriterion("structuredProperties.under.scores.and.dots.make_a_mess");
CriterionArray criterionArray = new CriterionArray();
criterionArray.add(structuredPropCriterion);
ConjunctiveCriterion conjunctiveCriterion = new ConjunctiveCriterion();
conjunctiveCriterion.setAnd(criterionArray);
ConjunctiveCriterionArray conjunctiveCriterionArray = new ConjunctiveCriterionArray();
conjunctiveCriterionArray.add(conjunctiveCriterion);
Filter filter = new Filter();
filter.setOr(conjunctiveCriterionArray);
BoolQueryBuilder test =
SearchRequestHandler.getFilterQuery(
mockRetrieverContext.withSearchFlags(flags -> flags.setFulltext(false)),
filter,
new HashMap<>(),
QueryFilterRewriteChain.EMPTY);
}
@Test
public void testQueryByDefault() {
final Set<String> COMMON =

View File

@ -43,6 +43,8 @@ public class ESUtilsTest {
@BeforeClass
public static void setup() throws RemoteInvocationException, URISyntaxException {
Urn abFghTenUrn = Urn.createFromString("urn:li:structuredProperty:ab.fgh.ten");
Urn underscoresAndDotsUrn =
Urn.createFromString("urn:li:structuredProperty:under.scores.and.dots_make_a_mess");
// legacy
aspectRetriever = mock(AspectRetriever.class);
@ -62,6 +64,20 @@ public class ESUtilsTest {
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinition.data()))));
StructuredPropertyDefinition structPropUnderscoresAndDotsDefinition =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinition.setVersion(null, SetMode.REMOVE_IF_NULL);
structPropUnderscoresAndDotsDefinition.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinition.setQualifiedName("under.scores.and.dots_make_a_mess");
when(aspectRetriever.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinition.data()))));
// V1
aspectRetrieverV1 = mock(AspectRetriever.class);
when(aspectRetrieverV1.getEntityRegistry())
@ -80,6 +96,20 @@ public class ESUtilsTest {
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropAbFghTenDefinitionV1.data()))));
StructuredPropertyDefinition structPropUnderscoresAndDotsDefinitionV1 =
new StructuredPropertyDefinition();
structPropUnderscoresAndDotsDefinitionV1.setVersion("00000000000001");
structPropUnderscoresAndDotsDefinitionV1.setValueType(
Urn.createFromString(DATA_TYPE_URN_PREFIX + "string"));
structPropUnderscoresAndDotsDefinitionV1.setQualifiedName("under.scores.and.dots_make_a_mess");
when(aspectRetrieverV1.getLatestAspectObjects(eq(Set.of(underscoresAndDotsUrn)), anySet()))
.thenReturn(
Map.of(
underscoresAndDotsUrn,
Map.of(
STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME,
new Aspect(structPropUnderscoresAndDotsDefinitionV1.data()))));
}
@Test
@ -703,6 +733,31 @@ public class ESUtilsTest {
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromNamespacedStructPropEqualsValue() {
final Criterion singleValueCriterion =
buildCriterion(
"structuredProperties.under.scores.and.dots_make_a_mess", Condition.EQUAL, "value1");
OperationContext opContext = mock(OperationContext.class);
when(opContext.getAspectRetriever()).thenReturn(aspectRetriever);
QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion, false, new HashMap<>(), opContext, QueryFilterRewriteChain.EMPTY);
String expected =
"{\n"
+ " \"terms\" : {\n"
+ " \"structuredProperties.under_scores_and_dots_make_a_mess.keyword\" : [\n"
+ " \"value1\"\n"
+ " ],\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"structuredProperties.under.scores.and.dots_make_a_mess\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromStructPropEqualsValueV1() {
@ -731,6 +786,35 @@ public class ESUtilsTest {
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromNamespacedStructPropEqualsValueV1() {
final Criterion singleValueCriterion =
buildCriterion(
"structuredProperties.under.scores.and.dots_make_a_mess", Condition.EQUAL, "value1");
OperationContext opContextV1 = mock(OperationContext.class);
when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1);
QueryBuilder result =
ESUtils.getQueryBuilderFromCriterion(
singleValueCriterion,
false,
new HashMap<>(),
opContextV1,
QueryFilterRewriteChain.EMPTY);
String expected =
"{\n"
+ " \"terms\" : {\n"
+ " \"structuredProperties._versioned.under_scores_and_dots_make_a_mess.00000000000001.string.keyword\" : [\n"
+ " \"value1\"\n"
+ " ],\n"
+ " \"boost\" : 1.0,\n"
+ " \"_name\" : \"structuredProperties.under.scores.and.dots_make_a_mess\"\n"
+ " }\n"
+ "}";
Assert.assertEquals(result.toString(), expected);
}
@Test
public void testGetQueryBuilderFromStructPropExists() {
final Criterion singleValueCriterion = buildExistsCriterion("structuredProperties.ab.fgh.ten");

View File

@ -165,13 +165,13 @@ def get_property_from_entity(
return None
def to_es_name(property_name=None, namespace=default_namespace, qualified_name=None):
def to_es_filter_name(
property_name=None, namespace=default_namespace, qualified_name=None
):
if property_name:
namespace_field = namespace.replace(".", "_")
return f"structuredProperties.{namespace_field}_{property_name}"
return f"structuredProperties.{namespace}.{property_name}"
else:
escaped_qualified_name = qualified_name.replace(".", "_")
return f"structuredProperties.{escaped_qualified_name}"
return f"structuredProperties.{qualified_name}"
# @tenacity.retry(
@ -446,7 +446,7 @@ def test_structured_property_search(
graph_client.get_urns_by_filter(
extraFilters=[
{
"field": to_es_name(dataset_property_name),
"field": to_es_filter_name(dataset_property_name),
"negated": "false",
"condition": "EXISTS",
}
@ -475,7 +475,7 @@ def test_structured_property_search(
entity_types=["tag"],
extraFilters=[
{
"field": to_es_name(
"field": to_es_filter_name(
field_property_name, namespace="io.datahubproject.test"
),
"negated": "false",
@ -492,7 +492,7 @@ def test_structured_property_search(
entity_types=["dataset", "tag"],
extraFilters=[
{
"field": to_es_name(dataset_property_name),
"field": to_es_filter_name(dataset_property_name),
"negated": "false",
"condition": "EXISTS",
}
@ -690,7 +690,7 @@ def test_dataset_structured_property_soft_delete_search_filter_validation(
graph_client.get_urns_by_filter(
extraFilters=[
{
"field": to_es_name(dataset_property_name),
"field": to_es_filter_name(property_name=dataset_property_name),
"negated": "false",
"condition": "EXISTS",
}
@ -710,7 +710,7 @@ def test_dataset_structured_property_soft_delete_search_filter_validation(
graph_client.get_urns_by_filter(
extraFilters=[
{
"field": to_es_name(dataset_property_name),
"field": to_es_filter_name(property_name=dataset_property_name),
"negated": "false",
"condition": "EXISTS",
}
@ -779,7 +779,7 @@ def test_dataset_structured_property_delete(ingest_cleanup_data, graph_client, c
graph_client.get_urns_by_filter(
extraFilters=[
{
"field": to_es_name(qualified_name=qualified_name),
"field": to_es_filter_name(qualified_name=qualified_name),
"negated": "false",
"condition": "EXISTS",
}