mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(search): Add searchable annotation to maps (#3136)
This commit is contained in:
parent
792a08c283
commit
e30d7238c0
@ -167,11 +167,11 @@ The Aspect has four key components: its properties, the @Aspect annotation, the
|
||||
references to other entities, of type Urn or optionally `<Entity>Urn`
|
||||
- The @Aspect annotation. This is used to declare that the record is an Aspect and can be included in an entity’s
|
||||
Snapshot. Unlike the other two annotations, @Aspect is applied to the entire record rather than a specific field.
|
||||
Note, you can mark an aspect as a timeseries aspect. Check out
|
||||
this [doc](metadata-model.md#timeseries-aspects) for details.
|
||||
- The @Searchable annotation. This annotation can be applied to any primitive field to indicate that it should be
|
||||
indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see the
|
||||
annotation docs further down in this document.
|
||||
Note, you can mark an aspect as a timeseries aspect. Check out this [doc](metadata-model.md#timeseries-aspects) for
|
||||
details.
|
||||
- The @Searchable annotation. This annotation can be applied to any primitive field or a map field to indicate that it
|
||||
should be indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see
|
||||
the annotation docs further down in this document.
|
||||
- The @Relationship annotations. These annotations create edges between the Snapshot’s Urn and the destination of the
|
||||
annotated field when the snapshots are ingested. @Relationship annotations must be applied to fields of type Urn. In
|
||||
the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied
|
||||
@ -398,6 +398,9 @@ ranking.
|
||||
Now, when Datahub ingests Dashboards, it will index the Dashboard’s title in Elasticsearch. When a user searches for
|
||||
Dashboards, that query will be used to search on the title index and matching Dashboards will be returned.
|
||||
|
||||
Note, when @Searchable annotation is applied to a map, it will convert it into a list with "key.toString()
|
||||
=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed.
|
||||
|
||||
#### @Relationship
|
||||
|
||||
This annotation is applied to fields inside an Aspect. This annotation creates edges between an Entity’s Urn and the
|
||||
|
@ -24,6 +24,8 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
|
||||
private final List<SearchableFieldSpec> _specs = new ArrayList<>();
|
||||
private final Map<String, String> _searchFieldNamesToPatch = new HashMap<>();
|
||||
|
||||
private static final String MAP = "map";
|
||||
|
||||
public List<SearchableFieldSpec> getSpecs() {
|
||||
return _specs;
|
||||
}
|
||||
@ -38,45 +40,59 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
|
||||
|
||||
final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema();
|
||||
|
||||
// First, check properties for primary annotation definition.
|
||||
final Map<String, Object> properties = context.getEnclosingField().getProperties();
|
||||
final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME);
|
||||
final Object annotationObj = getAnnotationObj(context);
|
||||
|
||||
if (primaryAnnotationObj != null) {
|
||||
validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString());
|
||||
}
|
||||
|
||||
// Next, check resolved properties for annotations on primitives.
|
||||
final Map<String, Object> resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema);
|
||||
final Object resolvedAnnotationObj = resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME);
|
||||
|
||||
if (resolvedAnnotationObj != null) {
|
||||
if (annotationObj != null) {
|
||||
if (currentSchema.getDereferencedDataSchema().isComplex()) {
|
||||
final ComplexDataSchema complexSchema = (ComplexDataSchema) currentSchema;
|
||||
if (isValidComplexType(complexSchema)) {
|
||||
extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context);
|
||||
extractSearchableAnnotation(annotationObj, currentSchema, context);
|
||||
}
|
||||
} else if (isValidPrimitiveType((PrimitiveDataSchema) currentSchema)) {
|
||||
extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context);
|
||||
extractSearchableAnnotation(annotationObj, currentSchema, context);
|
||||
} else {
|
||||
throw new ModelValidationException(String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString()));
|
||||
throw new ModelValidationException(
|
||||
String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void extractSearchableAnnotation(
|
||||
final Object annotationObj,
|
||||
final DataSchema currentSchema,
|
||||
private Object getAnnotationObj(TraverserContext context) {
|
||||
final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema();
|
||||
|
||||
// First, check properties for primary annotation definition.
|
||||
final Map<String, Object> properties = context.getEnclosingField().getProperties();
|
||||
final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME);
|
||||
|
||||
if (primaryAnnotationObj != null) {
|
||||
validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString());
|
||||
// Unfortunately, annotations on collections always need to be a nested map (byproduct of making overrides work)
|
||||
// As such, for annotation maps, we make it a single entry map, where the key has no meaning
|
||||
if (currentSchema.getDereferencedType() == DataSchema.Type.MAP && primaryAnnotationObj instanceof Map
|
||||
&& !((Map) primaryAnnotationObj).isEmpty()) {
|
||||
return ((Map<?, ?>) primaryAnnotationObj).entrySet().stream().findFirst().get().getValue();
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the path has map in it. Individual values of the maps (actual maps are caught above) can be ignored
|
||||
if (context.getTraversePath().contains(MAP)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Next, check resolved properties for annotations on primitives.
|
||||
final Map<String, Object> resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema);
|
||||
return resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME);
|
||||
}
|
||||
|
||||
private void extractSearchableAnnotation(final Object annotationObj, final DataSchema currentSchema,
|
||||
final TraverserContext context) {
|
||||
final PathSpec path = new PathSpec(context.getSchemaPathSpec());
|
||||
final SearchableAnnotation annotation =
|
||||
SearchableAnnotation.fromPegasusAnnotationObject(
|
||||
annotationObj,
|
||||
FieldSpecUtils.getSchemaFieldName(path),
|
||||
SearchableAnnotation.fromPegasusAnnotationObject(annotationObj, FieldSpecUtils.getSchemaFieldName(path),
|
||||
currentSchema.getDereferencedType(), path.toString());
|
||||
if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName())
|
||||
&& !_searchFieldNamesToPatch.get(annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) {
|
||||
if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) && !_searchFieldNamesToPatch.get(
|
||||
annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) {
|
||||
throw new ModelValidationException(
|
||||
String.format("Entity has multiple searchable fields with the same field name %s",
|
||||
annotation.getFieldName()));
|
||||
@ -97,7 +113,8 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
|
||||
}
|
||||
|
||||
private Boolean isValidComplexType(final ComplexDataSchema schema) {
|
||||
return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType());
|
||||
return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType())
|
||||
|| DataSchema.Type.MAP.equals(schema.getDereferencedDataSchema().getDereferencedType());
|
||||
}
|
||||
|
||||
private Boolean isValidPrimitiveType(final PrimitiveDataSchema schema) {
|
||||
@ -107,7 +124,9 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
|
||||
private void validatePropertiesAnnotation(DataSchema currentSchema, Object annotationObj, String pathStr) {
|
||||
|
||||
// If primitive, assume the annotation is well formed until resolvedProperties reflects it.
|
||||
if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM)) {
|
||||
if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM) || currentSchema
|
||||
.getDereferencedType()
|
||||
.equals(DataSchema.Type.MAP)) {
|
||||
return;
|
||||
}
|
||||
|
||||
@ -115,26 +134,22 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor {
|
||||
if (!Map.class.isAssignableFrom(annotationObj.getClass())) {
|
||||
throw new ModelValidationException(String.format(
|
||||
"Failed to validate @%s annotation declared inside %s: Invalid value type provided (Expected Map)",
|
||||
SearchableAnnotation.ANNOTATION_NAME,
|
||||
pathStr
|
||||
));
|
||||
SearchableAnnotation.ANNOTATION_NAME, pathStr));
|
||||
}
|
||||
|
||||
Map<String, Object> annotationMap = (Map<String, Object>) annotationObj;
|
||||
|
||||
if (annotationMap.size() == 0) {
|
||||
throw new ModelValidationException(
|
||||
String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
|
||||
pathStr,
|
||||
currentSchema.getType()));
|
||||
throw new ModelValidationException(String.format(
|
||||
"Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
|
||||
pathStr, currentSchema.getType()));
|
||||
}
|
||||
|
||||
for (String key : annotationMap.keySet()) {
|
||||
if (!key.startsWith(Character.toString(PathSpec.SEPARATOR))) {
|
||||
throw new ModelValidationException(
|
||||
String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
|
||||
pathStr,
|
||||
currentSchema.getType()));
|
||||
throw new ModelValidationException(String.format(
|
||||
"Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.",
|
||||
pathStr, currentSchema.getType()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -104,6 +104,8 @@ public class SearchableAnnotation {
|
||||
case INT:
|
||||
case FLOAT:
|
||||
return FieldType.COUNT;
|
||||
case MAP:
|
||||
return FieldType.KEYWORD;
|
||||
default:
|
||||
return FieldType.TEXT;
|
||||
}
|
||||
|
@ -114,7 +114,12 @@ public class EntitySpecBuilderTest {
|
||||
assertEquals(new TestEntityInfo().schema().getFullName(), testEntityInfo.getPegasusSchema().getFullName());
|
||||
|
||||
// Assert on Searchable Fields
|
||||
assertEquals(7, testEntityInfo.getSearchableFieldSpecs().size());
|
||||
assertEquals(8, testEntityInfo.getSearchableFieldSpecs().size());
|
||||
assertEquals("customProperties", testEntityInfo.getSearchableFieldSpecMap().get(
|
||||
new PathSpec("customProperties").toString()).getSearchableAnnotation().getFieldName());
|
||||
assertEquals(SearchableAnnotation.FieldType.KEYWORD, testEntityInfo.getSearchableFieldSpecMap().get(
|
||||
new PathSpec("customProperties").toString())
|
||||
.getSearchableAnnotation().getFieldType());
|
||||
assertEquals("textFieldOverride", testEntityInfo.getSearchableFieldSpecMap().get(
|
||||
new PathSpec("textField").toString()).getSearchableAnnotation().getFieldName());
|
||||
assertEquals(SearchableAnnotation.FieldType.TEXT, testEntityInfo.getSearchableFieldSpecMap().get(
|
||||
|
@ -6,6 +6,7 @@ import com.linkedin.metadata.dao.utils.RecordUtils;
|
||||
import com.linkedin.metadata.models.AspectSpec;
|
||||
import com.linkedin.metadata.models.EntitySpec;
|
||||
import com.linkedin.metadata.models.FieldSpec;
|
||||
import com.linkedin.util.Pair;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
@ -21,6 +22,7 @@ import java.util.stream.Collectors;
|
||||
public class FieldExtractor {
|
||||
|
||||
private static final String ARRAY_WILDCARD = "*";
|
||||
private static final int MAX_VALUE_LENGTH = 200;
|
||||
|
||||
private FieldExtractor() {
|
||||
}
|
||||
@ -40,7 +42,17 @@ public class FieldExtractor {
|
||||
long numArrayWildcards = getNumArrayWildcards(fieldSpec.getPath());
|
||||
// Not an array field
|
||||
if (numArrayWildcards == 0) {
|
||||
extractedFields.put(fieldSpec, Collections.singletonList(value.get()));
|
||||
// For maps, convert it into a list of the form key=value (Filter out long values)
|
||||
if (value.get() instanceof Map) {
|
||||
extractedFields.put(fieldSpec, ((Map<?, ?>) value.get()).entrySet()
|
||||
.stream()
|
||||
.map(entry -> new Pair<>(entry.getKey().toString(), entry.getValue().toString()))
|
||||
.filter(entry -> entry.getValue().length() < MAX_VALUE_LENGTH)
|
||||
.map(entry -> entry.getKey() + "=" + entry.getValue())
|
||||
.collect(Collectors.toList()));
|
||||
} else {
|
||||
extractedFields.put(fieldSpec, Collections.singletonList(value.get()));
|
||||
}
|
||||
} else {
|
||||
List<Object> valueList = (List<Object>) value.get();
|
||||
// If the field is a nested list of values, flatten it
|
||||
|
@ -102,7 +102,7 @@ public class SearchDocumentTransformer {
|
||||
return;
|
||||
}
|
||||
|
||||
if (isArray) {
|
||||
if (isArray || valueType == DataSchema.Type.MAP) {
|
||||
ArrayNode arrayNode = JsonNodeFactory.instance.arrayNode();
|
||||
fieldValues.forEach(value -> getNodeForValue(valueType, value, fieldType).ifPresent(arrayNode::add));
|
||||
searchDocument.set(fieldName, arrayNode);
|
||||
|
@ -11,9 +11,11 @@ import com.datahub.test.TestEntityInfo;
|
||||
import com.datahub.test.TestEntityKey;
|
||||
import com.datahub.test.TestEntitySnapshot;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.linkedin.common.urn.TestEntityUrn;
|
||||
import com.linkedin.common.urn.Urn;
|
||||
import com.linkedin.data.template.StringArray;
|
||||
import com.linkedin.data.template.StringMap;
|
||||
|
||||
|
||||
public class TestEntityUtil {
|
||||
@ -37,6 +39,7 @@ public class TestEntityUtil {
|
||||
ImmutableList.of(new SimpleNestedRecord2().setNestedArrayStringField("nestedArray1"),
|
||||
new SimpleNestedRecord2().setNestedArrayStringField("nestedArray2")
|
||||
.setNestedArrayArrayField(new StringArray(ImmutableList.of("testNestedArray1", "testNestedArray2"))))));
|
||||
testEntityInfo.setCustomProperties(new StringMap(ImmutableMap.of("key1", "value1", "key2", "value2")));
|
||||
return testEntityInfo;
|
||||
}
|
||||
|
||||
|
@ -43,5 +43,6 @@ public class FieldExtractorTest {
|
||||
assertEquals(result.get(nameToSpec.get("nestedIntegerField")), ImmutableList.of(1));
|
||||
assertEquals(result.get(nameToSpec.get("nestedArrayStringField")), ImmutableList.of("nestedArray1", "nestedArray2"));
|
||||
assertEquals(result.get(nameToSpec.get("nestedArrayArrayField")), ImmutableList.of("testNestedArray1", "testNestedArray2"));
|
||||
assertEquals(result.get(nameToSpec.get("customProperties")), ImmutableList.of("key1=value1", "key2=value2"));
|
||||
}
|
||||
}
|
||||
|
@ -17,12 +17,13 @@ public class MappingsBuilderTest {
|
||||
Map<String, Object> result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec());
|
||||
assertEquals(result.size(), 1);
|
||||
Map<String, Object> properties = (Map<String, Object>) result.get("properties");
|
||||
assertEquals(properties.size(), 11);
|
||||
assertEquals(properties.size(), 12);
|
||||
assertEquals(properties.get("urn"), ImmutableMap.of("type", "keyword"));
|
||||
assertTrue(properties.containsKey("browsePaths"));
|
||||
// KEYWORD
|
||||
assertEquals(properties.get("keyPart3"), ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer"));
|
||||
|
||||
assertEquals(properties.get("customProperties"),
|
||||
ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer"));
|
||||
// TEXT
|
||||
Map<String, Object> nestedArrayStringField = (Map<String, Object>) properties.get("nestedArrayStringField");
|
||||
assertEquals(nestedArrayStringField.get("type"), "keyword");
|
||||
|
@ -25,10 +25,11 @@ public class SearchQueryBuilderTest {
|
||||
assertEquals(keywordQuery.queryString(), "testQuery");
|
||||
assertEquals(keywordQuery.analyzer(), "custom_keyword");
|
||||
Map<String, Float> keywordFields = keywordQuery.fields();
|
||||
assertEquals(keywordFields.size(), 7);
|
||||
assertEquals(keywordFields.size(), 8);
|
||||
assertEquals(keywordFields.get("keyPart1").floatValue(), 10.0f);
|
||||
assertFalse(keywordFields.containsKey("keyPart3"));
|
||||
assertEquals(keywordFields.get("textFieldOverride").floatValue(), 1.0f);
|
||||
assertEquals(keywordFields.get("customProperties").floatValue(), 1.0f);
|
||||
QueryStringQueryBuilder textQuery = (QueryStringQueryBuilder) shouldQueries.get(1);
|
||||
assertEquals(textQuery.queryString(), "testQuery");
|
||||
assertEquals(textQuery.analyzer(), "word_delimited");
|
||||
|
@ -34,10 +34,10 @@ public class SearchRequestHandlerTest {
|
||||
HighlightBuilder highlightBuilder = sourceBuilder.highlighter();
|
||||
List<String> fields =
|
||||
highlightBuilder.fields().stream().map(HighlightBuilder.Field::name).collect(Collectors.toList());
|
||||
assertEquals(fields.size(), 14);
|
||||
assertEquals(fields.size(), 16);
|
||||
List<String> highlightableFields =
|
||||
ImmutableList.of("keyPart1", "textArrayField", "textFieldOverride", "foreignKey", "nestedForeignKey",
|
||||
"nestedArrayStringField", "nestedArrayArrayField");
|
||||
"nestedArrayStringField", "nestedArrayArrayField", "customProperties");
|
||||
highlightableFields.forEach(field -> {
|
||||
assertTrue(fields.contains(field));
|
||||
assertTrue(fields.contains(field + ".*"));
|
||||
|
@ -7,5 +7,10 @@ record CustomProperties {
|
||||
/**
|
||||
* Custom property bag.
|
||||
*/
|
||||
@Searchable = {
|
||||
"/*": {
|
||||
"queryByDefault": true
|
||||
}
|
||||
}
|
||||
customProperties: map[string, string] = { }
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
namespace com.datahub.test
|
||||
|
||||
import com.linkedin.common.Urn
|
||||
import com.linkedin.common.CustomProperties
|
||||
|
||||
/**
|
||||
* Info associated with a Test Entity
|
||||
@ -8,7 +9,7 @@ import com.linkedin.common.Urn
|
||||
@Aspect = {
|
||||
"name": "testEntityInfo"
|
||||
}
|
||||
record TestEntityInfo {
|
||||
record TestEntityInfo includes CustomProperties {
|
||||
|
||||
@Searchable = {
|
||||
"fieldName": "textFieldOverride",
|
||||
|
Loading…
x
Reference in New Issue
Block a user