mirror of
https://github.com/datahub-project/datahub.git
synced 2025-12-28 02:17:53 +00:00
feat(dataset): Enable search of datasets by field names (#2001)
* feat(dataset): Enable search of datasets by field names
This commit is contained in:
parent
6f59a91865
commit
5d083143db
@ -57,6 +57,15 @@ export const fields: Array<ISearchEntityRenderProps> = [
|
||||
desc: 'The platform of the dataset',
|
||||
example: 'platform:kafka'
|
||||
},
|
||||
{
|
||||
showInAutoCompletion: true,
|
||||
fieldName: 'fieldPaths',
|
||||
showInResultsPreview: false,
|
||||
displayName: 'fieldPaths',
|
||||
showInFacets: true,
|
||||
desc: 'Fields of the dataset',
|
||||
example: 'fieldPaths:field1'
|
||||
},
|
||||
{
|
||||
showInAutoCompletion: false,
|
||||
fieldName: 'healthScore',
|
||||
|
||||
@ -52,6 +52,13 @@
|
||||
"type": "custom",
|
||||
"tokenizer": "dataset_pattern"
|
||||
},
|
||||
"field_pattern": {
|
||||
"filter": [
|
||||
"lowercase"
|
||||
],
|
||||
"type": "custom",
|
||||
"tokenizer": "field_pattern"
|
||||
},
|
||||
"comma_pattern": {
|
||||
"filter": [
|
||||
"lowercase"
|
||||
@ -103,6 +110,14 @@
|
||||
"type": "custom",
|
||||
"tokenizer": "dataset_pattern"
|
||||
},
|
||||
"field_pattern_ngram": {
|
||||
"filter": [
|
||||
"lowercase",
|
||||
"autocomplete_filter"
|
||||
],
|
||||
"type": "custom",
|
||||
"tokenizer": "field_pattern"
|
||||
},
|
||||
"custom_browse_slash": {
|
||||
"filter": [
|
||||
"lowercase"
|
||||
@ -133,6 +148,10 @@
|
||||
"dataset_pattern": {
|
||||
"pattern": "[./]",
|
||||
"type": "pattern"
|
||||
},
|
||||
"field_pattern": {
|
||||
"pattern": "[./]",
|
||||
"type": "pattern"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -171,7 +190,8 @@
|
||||
"keyword": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
},
|
||||
"hasOwners": {
|
||||
"type": "boolean"
|
||||
@ -201,6 +221,28 @@
|
||||
},
|
||||
"normalizer": "my_normalizer"
|
||||
},
|
||||
"fieldPaths": {
|
||||
"type": "keyword",
|
||||
"fields": {
|
||||
"field_pattern_ngram": {
|
||||
"type": "text",
|
||||
"analyzer": "field_pattern_ngram"
|
||||
},
|
||||
"delimited": {
|
||||
"type": "text",
|
||||
"analyzer": "delimit"
|
||||
},
|
||||
"ngram": {
|
||||
"type": "text",
|
||||
"analyzer": "custom_ngram"
|
||||
},
|
||||
"pattern": {
|
||||
"type": "text",
|
||||
"analyzer": "field_pattern"
|
||||
}
|
||||
},
|
||||
"normalizer": "my_normalizer"
|
||||
},
|
||||
"num_downstream_datasets": {
|
||||
"type": "long"
|
||||
},
|
||||
|
||||
@ -11,14 +11,14 @@ import com.linkedin.dataset.DatasetProperties;
|
||||
import com.linkedin.dataset.UpstreamLineage;
|
||||
import com.linkedin.metadata.search.DatasetDocument;
|
||||
import com.linkedin.metadata.snapshot.DatasetSnapshot;
|
||||
import com.linkedin.schema.SchemaField;
|
||||
import com.linkedin.schema.SchemaMetadata;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import javax.annotation.Nonnull;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
public class DatasetIndexBuilder extends BaseIndexBuilder<DatasetDocument> {
|
||||
@ -79,10 +79,11 @@ public class DatasetIndexBuilder extends BaseIndexBuilder<DatasetDocument> {
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn, @Nonnull SchemaMetadata schemaMetadata) {
|
||||
return new DatasetDocument()
|
||||
.setUrn(urn)
|
||||
.setHasSchema(true);
|
||||
private DatasetDocument getDocumentToUpdateFromAspect(@Nonnull DatasetUrn urn,
|
||||
@Nonnull SchemaMetadata schemaMetadata) {
|
||||
final StringArray fieldPaths = new StringArray(
|
||||
schemaMetadata.getFields().stream().map(SchemaField::getFieldPath).collect(Collectors.toList()));
|
||||
return new DatasetDocument().setUrn(urn).setHasSchema(true).setFieldPaths(fieldPaths);
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
|
||||
@ -23,6 +23,10 @@ import com.linkedin.metadata.aspect.DatasetAspect;
|
||||
import com.linkedin.metadata.dao.utils.ModelUtils;
|
||||
import com.linkedin.metadata.search.DatasetDocument;
|
||||
import com.linkedin.metadata.snapshot.DatasetSnapshot;
|
||||
import com.linkedin.schema.BooleanType;
|
||||
import com.linkedin.schema.SchemaField;
|
||||
import com.linkedin.schema.SchemaFieldArray;
|
||||
import com.linkedin.schema.SchemaFieldDataType;
|
||||
import com.linkedin.schema.SchemaMetadata;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
@ -178,11 +182,17 @@ public class DatasetIndexBuilderTest {
|
||||
@Test
|
||||
public void schemaMetadata() {
|
||||
// given
|
||||
final SchemaFieldArray schemaFieldArray = new SchemaFieldArray(new SchemaField().setFieldPath("foo.bar.baz")
|
||||
.setType(new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType())))
|
||||
.setNullable(false)
|
||||
.setNativeDataType("boolean")
|
||||
.setRecursive(false));
|
||||
final DatasetUrn datasetUrn = new DatasetUrn(new DataPlatformUrn("foo"), "bar", FabricType.PROD);
|
||||
final SchemaMetadata schemaMetadata = new SchemaMetadata();
|
||||
final SchemaMetadata schemaMetadata = new SchemaMetadata().setFields(schemaFieldArray);
|
||||
final DatasetSnapshot datasetSnapshot = ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn,
|
||||
Collections.singletonList(ModelUtils.newAspectUnion(DatasetAspect.class, schemaMetadata)));
|
||||
final DatasetDocument expectedDocument1 = new DatasetDocument().setUrn(datasetUrn).setHasSchema(true);
|
||||
final DatasetDocument expectedDocument1 =
|
||||
new DatasetDocument().setUrn(datasetUrn).setHasSchema(true).setFieldPaths(new StringArray("foo.bar.baz"));
|
||||
final DatasetDocument expectedDocument2 = new DatasetDocument().setUrn(datasetUrn)
|
||||
.setBrowsePaths(new StringArray("/prod/foo/bar"))
|
||||
.setOrigin(FabricType.PROD)
|
||||
|
||||
@ -43,6 +43,11 @@ record DatasetDocument includes BaseDocument {
|
||||
*/
|
||||
description: optional string
|
||||
|
||||
/**
|
||||
* Field paths of the dataset
|
||||
*/
|
||||
fieldPaths: optional array[string]
|
||||
|
||||
/**
|
||||
* Flag to indicate if the dataset has non empty corp users as owners or not.
|
||||
*/
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user