feat(search): Add searchable annotation to maps (#3136)

This commit is contained in:
Dexter Lee 2021-09-07 22:58:44 -07:00 committed by GitHub
parent 792a08c283
commit e30d7238c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 99 additions and 50 deletions

View File

@ -167,11 +167,11 @@ The Aspect has four key components: its properties, the @Aspect annotation, the
references to other entities, of type Urn or optionally `<Entity>Urn`
- The @Aspect annotation. This is used to declare that the record is an Aspect and can be included in an entitys
Snapshot. Unlike the other two annotations, @Aspect is applied to the entire record rather than a specific field.
Note, you can mark an aspect as a timeseries aspect. Check out
this [doc](metadata-model.md#timeseries-aspects) for details.
- The @Searchable annotation. This annotation can be applied to any primitive field to indicate that it should be
indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see the
annotation docs further down in this document.
Note, you can mark an aspect as a timeseries aspect. Check out this [doc](metadata-model.md#timeseries-aspects) for
details.
- The @Searchable annotation. This annotation can be applied to any primitive field or a map field to indicate that it
should be indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see
the annotation docs further down in this document.
- The @Relationship annotations. These annotations create edges between the Snapshots Urn and the destination of the
annotated field when the snapshots are ingested. @Relationship annotations must be applied to fields of type Urn. In
the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied
@ -398,6 +398,9 @@ ranking.
Now, when Datahub ingests Dashboards, it will index the Dashboards title in Elasticsearch. When a user searches for
Dashboards, that query will be used to search on the title index and matching Dashboards will be returned.
Note, when @Searchable annotation is applied to a map, it will convert it into a list with "key.toString()
=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed.
#### @Relationship
This annotation is applied to fields inside an Aspect. This annotation creates edges between an Entitys Urn and the

View File

@ -24,6 +24,8 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
private final List<SearchableFieldSpec> _specs = new ArrayList<>();
private final Map<String, String> _searchFieldNamesToPatch = new HashMap<>();
private static final String MAP = "map";
public List<SearchableFieldSpec> getSpecs() {
return _specs;
}
@ -38,45 +40,59 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema();
// First, check properties for primary annotation definition.
final Map<String, Object> properties = context.getEnclosingField().getProperties();
final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME);
final Object annotationObj = getAnnotationObj(context);
if (primaryAnnotationObj != null) {
validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString());
}
// Next, check resolved properties for annotations on primitives.
final Map<String, Object> resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema);
final Object resolvedAnnotationObj = resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME);
if (resolvedAnnotationObj != null) {
if (annotationObj != null) {
if (currentSchema.getDereferencedDataSchema().isComplex()) {
final ComplexDataSchema complexSchema = (ComplexDataSchema) currentSchema;
if (isValidComplexType(complexSchema)) {
extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context);
extractSearchableAnnotation(annotationObj, currentSchema, context);
}
} else if (isValidPrimitiveType((PrimitiveDataSchema) currentSchema)) {
extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context);
extractSearchableAnnotation(annotationObj, currentSchema, context);
} else {
throw new ModelValidationException(String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString()));
throw new ModelValidationException(
String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString()));
}
}
}
}
private void extractSearchableAnnotation(
final Object annotationObj,
final DataSchema currentSchema,
private Object getAnnotationObj(TraverserContext context) {
final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema();
// First, check properties for primary annotation definition.
final Map<String, Object> properties = context.getEnclosingField().getProperties();
final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME);
if (primaryAnnotationObj != null) {
validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString());
// Unfortunately, annotations on collections always need to be a nested map (byproduct of making overrides work)
// As such, for annotation maps, we make it a single entry map, where the key has no meaning
if (currentSchema.getDereferencedType() == DataSchema.Type.MAP && primaryAnnotationObj instanceof Map
&& !((Map) primaryAnnotationObj).isEmpty()) {
return ((Map<?, ?>) primaryAnnotationObj).entrySet().stream().findFirst().get().getValue();
}
}
// Check if the path has map in it. Individual values of the maps (actual maps are caught above) can be ignored
if (context.getTraversePath().contains(MAP)) {
return null;
}
// Next, check resolved properties for annotations on primitives.
final Map<String, Object> resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema);
return resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME);
}
private void extractSearchableAnnotation(final Object annotationObj, final DataSchema currentSchema,
final TraverserContext context) {
final PathSpec path = new PathSpec(context.getSchemaPathSpec());
final SearchableAnnotation annotation =
SearchableAnnotation.fromPegasusAnnotationObject(
annotationObj,
FieldSpecUtils.getSchemaFieldName(path),
SearchableAnnotation.fromPegasusAnnotationObject(annotationObj, FieldSpecUtils.getSchemaFieldName(path),
currentSchema.getDereferencedType(), path.toString());
if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName())
&& !_searchFieldNamesToPatch.get(annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) {
if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) && !_searchFieldNamesToPatch.get(
annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) {
throw new ModelValidationException(
String.format("Entity has multiple searchable fields with the same field name %s",
annotation.getFieldName()));
@ -97,7 +113,8 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
}
private Boolean isValidComplexType(final ComplexDataSchema schema) {
return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType());
return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType())
|| DataSchema.Type.MAP.equals(schema.getDereferencedDataSchema().getDereferencedType());
}
private Boolean isValidPrimitiveType(final PrimitiveDataSchema schema) {
@ -107,7 +124,9 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
private void validatePropertiesAnnotation(DataSchema currentSchema, Object annotationObj, String pathStr) {
// If primitive, assume the annotation is well formed until resolvedProperties reflects it.
if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM)) {
if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM) || currentSchema
.getDereferencedType()
.equals(DataSchema.Type.MAP)) {
return;
}
@ -115,26 +134,22 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
if (!Map.class.isAssignableFrom(annotationObj.getClass())) {
throw new ModelValidationException(String.format(
"Failed to validate @%s annotation declared inside %s: Invalid value type provided (Expected Map)",
SearchableAnnotation.ANNOTATION_NAME,
pathStr
));
SearchableAnnotation.ANNOTATION_NAME, pathStr));
}
Map<String, Object> annotationMap = (Map<String, Object>) annotationObj;
if (annotationMap.size() == 0) {
throw new ModelValidationException(
String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
pathStr,
currentSchema.getType()));
throw new ModelValidationException(String.format(
"Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
pathStr, currentSchema.getType()));
}
for (String key : annotationMap.keySet()) {
if (!key.startsWith(Character.toString(PathSpec.SEPARATOR))) {
throw new ModelValidationException(
String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
pathStr,
currentSchema.getType()));
throw new ModelValidationException(String.format(
"Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
pathStr, currentSchema.getType()));
}
}
}

View File

@ -104,6 +104,8 @@ public class SearchableAnnotation {
case INT:
case FLOAT:
return FieldType.COUNT;
case MAP:
return FieldType.KEYWORD;
default:
return FieldType.TEXT;
}

View File

@ -114,7 +114,12 @@ public class EntitySpecBuilderTest {
assertEquals(new TestEntityInfo().schema().getFullName(), testEntityInfo.getPegasusSchema().getFullName());
// Assert on Searchable Fields
assertEquals(7, testEntityInfo.getSearchableFieldSpecs().size());
assertEquals(8, testEntityInfo.getSearchableFieldSpecs().size());
assertEquals("customProperties", testEntityInfo.getSearchableFieldSpecMap().get(
new PathSpec("customProperties").toString()).getSearchableAnnotation().getFieldName());
assertEquals(SearchableAnnotation.FieldType.KEYWORD, testEntityInfo.getSearchableFieldSpecMap().get(
new PathSpec("customProperties").toString())
.getSearchableAnnotation().getFieldType());
assertEquals("textFieldOverride", testEntityInfo.getSearchableFieldSpecMap().get(
new PathSpec("textField").toString()).getSearchableAnnotation().getFieldName());
assertEquals(SearchableAnnotation.FieldType.TEXT, testEntityInfo.getSearchableFieldSpecMap().get(

View File

@ -6,6 +6,7 @@ import com.linkedin.metadata.dao.utils.RecordUtils;
import com.linkedin.metadata.models.AspectSpec;
import com.linkedin.metadata.models.EntitySpec;
import com.linkedin.metadata.models.FieldSpec;
import com.linkedin.util.Pair;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -21,6 +22,7 @@ import java.util.stream.Collectors;
public class FieldExtractor {
private static final String ARRAY_WILDCARD = "*";
private static final int MAX_VALUE_LENGTH = 200;
private FieldExtractor() {
}
@ -40,7 +42,17 @@ public class FieldExtractor {
long numArrayWildcards = getNumArrayWildcards(fieldSpec.getPath());
// Not an array field
if (numArrayWildcards == 0) {
extractedFields.put(fieldSpec, Collections.singletonList(value.get()));
// For maps, convert it into a list of the form key=value (Filter out long values)
if (value.get() instanceof Map) {
extractedFields.put(fieldSpec, ((Map<?, ?>) value.get()).entrySet()
.stream()
.map(entry -> new Pair<>(entry.getKey().toString(), entry.getValue().toString()))
.filter(entry -> entry.getValue().length() < MAX_VALUE_LENGTH)
.map(entry -> entry.getKey() + "=" + entry.getValue())
.collect(Collectors.toList()));
} else {
extractedFields.put(fieldSpec, Collections.singletonList(value.get()));
}
} else {
List<Object> valueList = (List<Object>) value.get();
// If the field is a nested list of values, flatten it

View File

@ -102,7 +102,7 @@ public class SearchDocumentTransformer {
return;
}
if (isArray) {
if (isArray || valueType == DataSchema.Type.MAP) {
ArrayNode arrayNode = JsonNodeFactory.instance.arrayNode();
fieldValues.forEach(value -> getNodeForValue(valueType, value, fieldType).ifPresent(arrayNode::add));
searchDocument.set(fieldName, arrayNode);

View File

@ -11,9 +11,11 @@ import com.datahub.test.TestEntityInfo;
import com.datahub.test.TestEntityKey;
import com.datahub.test.TestEntitySnapshot;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.linkedin.common.urn.TestEntityUrn;
import com.linkedin.common.urn.Urn;
import com.linkedin.data.template.StringArray;
import com.linkedin.data.template.StringMap;
public class TestEntityUtil {
@ -37,6 +39,7 @@ public class TestEntityUtil {
ImmutableList.of(new SimpleNestedRecord2().setNestedArrayStringField("nestedArray1"),
new SimpleNestedRecord2().setNestedArrayStringField("nestedArray2")
.setNestedArrayArrayField(new StringArray(ImmutableList.of("testNestedArray1", "testNestedArray2"))))));
testEntityInfo.setCustomProperties(new StringMap(ImmutableMap.of("key1", "value1", "key2", "value2")));
return testEntityInfo;
}

View File

@ -43,5 +43,6 @@ public class FieldExtractorTest {
assertEquals(result.get(nameToSpec.get("nestedIntegerField")), ImmutableList.of(1));
assertEquals(result.get(nameToSpec.get("nestedArrayStringField")), ImmutableList.of("nestedArray1", "nestedArray2"));
assertEquals(result.get(nameToSpec.get("nestedArrayArrayField")), ImmutableList.of("testNestedArray1", "testNestedArray2"));
assertEquals(result.get(nameToSpec.get("customProperties")), ImmutableList.of("key1=value1", "key2=value2"));
}
}

View File

@ -17,12 +17,13 @@ public class MappingsBuilderTest {
Map<String, Object> result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec());
assertEquals(result.size(), 1);
Map<String, Object> properties = (Map<String, Object>) result.get("properties");
assertEquals(properties.size(), 11);
assertEquals(properties.size(), 12);
assertEquals(properties.get("urn"), ImmutableMap.of("type", "keyword"));
assertTrue(properties.containsKey("browsePaths"));
// KEYWORD
assertEquals(properties.get("keyPart3"), ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer"));
assertEquals(properties.get("customProperties"),
ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer"));
// TEXT
Map<String, Object> nestedArrayStringField = (Map<String, Object>) properties.get("nestedArrayStringField");
assertEquals(nestedArrayStringField.get("type"), "keyword");

View File

@ -25,10 +25,11 @@ public class SearchQueryBuilderTest {
assertEquals(keywordQuery.queryString(), "testQuery");
assertEquals(keywordQuery.analyzer(), "custom_keyword");
Map<String, Float> keywordFields = keywordQuery.fields();
assertEquals(keywordFields.size(), 7);
assertEquals(keywordFields.size(), 8);
assertEquals(keywordFields.get("keyPart1").floatValue(), 10.0f);
assertFalse(keywordFields.containsKey("keyPart3"));
assertEquals(keywordFields.get("textFieldOverride").floatValue(), 1.0f);
assertEquals(keywordFields.get("customProperties").floatValue(), 1.0f);
QueryStringQueryBuilder textQuery = (QueryStringQueryBuilder) shouldQueries.get(1);
assertEquals(textQuery.queryString(), "testQuery");
assertEquals(textQuery.analyzer(), "word_delimited");

View File

@ -34,10 +34,10 @@ public class SearchRequestHandlerTest {
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
List<String> fields =
highlightBuilder.fields().stream().map(HighlightBuilder.Field::name).collect(Collectors.toList());
assertEquals(fields.size(), 14);
assertEquals(fields.size(), 16);
List<String> highlightableFields =
ImmutableList.of("keyPart1", "textArrayField", "textFieldOverride", "foreignKey", "nestedForeignKey",
"nestedArrayStringField", "nestedArrayArrayField");
"nestedArrayStringField", "nestedArrayArrayField", "customProperties");
highlightableFields.forEach(field -> {
assertTrue(fields.contains(field));
assertTrue(fields.contains(field + ".*"));

View File

@ -7,5 +7,10 @@ record CustomProperties {
/**
* Custom property bag.
*/
@Searchable = {
"/*": {
"queryByDefault": true
}
}
customProperties: map[string, string] = { }
}

View File

@ -1,6 +1,7 @@
namespace com.datahub.test
import com.linkedin.common.Urn
import com.linkedin.common.CustomProperties
/**
* Info associated with a Test Entity
@ -8,7 +9,7 @@ import com.linkedin.common.Urn
@Aspect = {
"name": "testEntityInfo"
}
record TestEntityInfo {
record TestEntityInfo includes CustomProperties {
@Searchable = {
"fieldName": "textFieldOverride",