add aspects to VALUE model of datasets (#1940)

This commit is contained in:
Jyoti Wadhwani 2020-10-22 21:29:28 -07:00 committed by GitHub
parent e936e2b856
commit 4bfcb4b508
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 478 additions and 395 deletions

View File

@ -56,8 +56,8 @@ public class DatasetUtil {
if (dataset.hasPlatformNativeType()) {
view.setNativeType(dataset.getPlatformNativeType().name());
}
if (dataset.hasRemoved()) {
view.setRemoved(dataset.isRemoved());
if (dataset.getStatus() != null) {
view.setRemoved(dataset.getStatus().isRemoved());
}
if (dataset.hasDeprecation()) {
view.setDeprecated(dataset.getDeprecation().isDeprecated());

View File

@ -2,8 +2,12 @@ namespace com.linkedin.dataset
import com.linkedin.common.ChangeAuditStamps
import com.linkedin.common.DatasetUrn
import com.linkedin.common.InstitutionalMemory
import com.linkedin.common.Ownership
import com.linkedin.common.Status
import com.linkedin.common.Uri
import com.linkedin.common.VersionTag
import com.linkedin.schema.SchemaMetadata
/**
* Dataset spec for a data store. A collection of data conforming to a single schema that can evolve over time. This is equivalent to a Table in most data platforms. Espresso dataset: Identity.Profile; oracle dataset: member2.member_profile; hdfs dataset: /data/databases/JOBS/JOB_APPLICATIONS; kafka: PageViewEvent
@ -74,10 +78,37 @@ record Dataset includes DatasetKey, ChangeAuditStamps, VersionTag {
/**
* whether the dataset is removed or not
*/
@deprecated
removed: boolean = false
/**
* The dataset deprecation status
*/
deprecation: optional DatasetDeprecation
/**
* Institutional memory metadata of the dataset
*/
institutionalMemory: optional InstitutionalMemory
/**
* Ownership metadata of the dataset
*/
ownership: optional Ownership
/**
* Schema metadata of the dataset
*/
schemaMetadata: optional SchemaMetadata
/**
* Status metadata of the dataset
*/
status: optional Status
/**
* Upstream lineage metadata of the dataset
*/
upstreamLineage: optional UpstreamLineage
}

View File

@ -356,7 +356,8 @@
"name" : "removed",
"type" : "boolean",
"doc" : "whether the dataset is removed or not",
"default" : false
"default" : false,
"deprecated" : true
}, {
"name" : "deprecation",
"type" : {
@ -385,19 +386,408 @@
},
"doc" : "The dataset deprecation status",
"optional" : true
}, {
"name" : "institutionalMemory",
"type" : "com.linkedin.common.InstitutionalMemory",
"doc" : "Institutional memory metadata of the dataset",
"optional" : true
}, {
"name" : "ownership",
"type" : "com.linkedin.common.Ownership",
"doc" : "Ownership metadata of the dataset",
"optional" : true
}, {
"name" : "schemaMetadata",
"type" : {
"type" : "record",
"name" : "SchemaMetadata",
"namespace" : "com.linkedin.schema",
"doc" : "SchemaMetadata to describe metadata related to store schema",
"include" : [ {
"type" : "record",
"name" : "SchemaMetadataKey",
"doc" : "Key to retrieve schema metadata.",
"fields" : [ {
"name" : "schemaName",
"type" : "string",
"doc" : "Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking",
"validate" : {
"strlen" : {
"max" : 500,
"min" : 1
}
}
}, {
"name" : "platform",
"type" : "com.linkedin.common.DataPlatformUrn",
"doc" : "Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})"
}, {
"name" : "version",
"type" : "long",
"doc" : "Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version."
} ]
}, "com.linkedin.common.ChangeAuditStamps" ],
"fields" : [ {
"name" : "dataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "Dataset this schema metadata is associated with.",
"optional" : true
}, {
"name" : "cluster",
"type" : "string",
"doc" : "The cluster this schema metadata resides from",
"optional" : true
}, {
"name" : "hash",
"type" : "string",
"doc" : "the SHA1 hash of the schema content"
}, {
"name" : "platformSchema",
"type" : [ {
"type" : "record",
"name" : "EspressoSchema",
"doc" : "Schema text of an espresso table schema.",
"fields" : [ {
"name" : "documentSchema",
"type" : "string",
"doc" : "The native espresso document schema."
}, {
"name" : "tableSchema",
"type" : "string",
"doc" : "The espresso table schema definition."
} ]
}, {
"type" : "record",
"name" : "OracleDDL",
"doc" : "Schema holder for oracle data definition language that describes an oracle table.",
"fields" : [ {
"name" : "tableSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema."
} ]
}, {
"type" : "record",
"name" : "MySqlDDL",
"doc" : "Schema holder for MySql data definition language that describes an MySql table.",
"fields" : [ {
"name" : "tableSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema."
} ]
}, {
"type" : "record",
"name" : "PrestoDDL",
"doc" : "Schema holder for presto data definition language that describes a presto view.",
"fields" : [ {
"name" : "rawSchema",
"type" : "string",
"doc" : "The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL."
} ]
}, {
"type" : "record",
"name" : "KafkaSchema",
"doc" : "Schema holder for kafka schema.",
"fields" : [ {
"name" : "documentSchema",
"type" : "string",
"doc" : "The native kafka document schema. This is a human readable avro document schema."
} ]
}, {
"type" : "record",
"name" : "BinaryJsonSchema",
"doc" : "Schema text of binary JSON schema.",
"fields" : [ {
"name" : "schema",
"type" : "string",
"doc" : "The native schema text for binary JSON file format."
} ]
}, {
"type" : "record",
"name" : "OrcSchema",
"doc" : "Schema text of an ORC schema.",
"fields" : [ {
"name" : "schema",
"type" : "string",
"doc" : "The native schema for ORC file format."
} ]
}, {
"type" : "record",
"name" : "Schemaless",
"doc" : "The dataset has no specific schema associated with it",
"fields" : [ ]
}, {
"type" : "record",
"name" : "KeyValueSchema",
"doc" : "Schema text of a key-value store schema.",
"fields" : [ {
"name" : "keySchema",
"type" : "string",
"doc" : "The raw schema for the key in the key-value store."
}, {
"name" : "valueSchema",
"type" : "string",
"doc" : "The raw schema for the value in the key-value store."
} ]
}, {
"type" : "record",
"name" : "OtherSchema",
"doc" : "Schema holder for undefined schema types.",
"fields" : [ {
"name" : "rawSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform."
} ]
} ],
"doc" : "The native schema in the dataset's platform."
}, {
"name" : "fields",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "SchemaField",
"doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema",
"fields" : [ {
"name" : "fieldPath",
"type" : {
"type" : "typeref",
"name" : "SchemaFieldPath",
"namespace" : "com.linkedin.dataset",
"doc" : "Schema field path as described by schema normalizations rules: http://go/tms-schema",
"ref" : "string"
},
"doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above."
}, {
"name" : "jsonPath",
"type" : "string",
"doc" : "Flattened name of a field in JSON Path notation.",
"optional" : true
}, {
"name" : "nullable",
"type" : "boolean",
"doc" : "Indicates if this field is optional or nullable",
"default" : false
}, {
"name" : "description",
"type" : "string",
"doc" : "Description",
"optional" : true
}, {
"name" : "type",
"type" : {
"type" : "record",
"name" : "SchemaFieldDataType",
"doc" : "Schema field data types",
"fields" : [ {
"name" : "type",
"type" : [ {
"type" : "record",
"name" : "BooleanType",
"doc" : "Boolean field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "FixedType",
"doc" : "Fixed field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "StringType",
"doc" : "String field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "BytesType",
"doc" : "Bytes field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "NumberType",
"doc" : "Number data type: long, integer, short, etc..",
"fields" : [ ]
}, {
"type" : "record",
"name" : "EnumType",
"doc" : "Enum field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "NullType",
"doc" : "Null field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "MapType",
"doc" : "Map field type.",
"fields" : [ {
"name" : "keyType",
"type" : "string",
"doc" : "Key type in a map",
"optional" : true
}, {
"name" : "valueType",
"type" : "string",
"doc" : "Type of the value in a map",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "ArrayType",
"doc" : "Array field type.",
"fields" : [ {
"name" : "nestedType",
"type" : {
"type" : "array",
"items" : "string"
},
"doc" : "List of types this array holds.",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "UnionType",
"doc" : "Union field type.",
"fields" : [ {
"name" : "nestedTypes",
"type" : {
"type" : "array",
"items" : "string"
},
"doc" : "List of types in union type.",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "RecordType",
"doc" : "Record field type.",
"fields" : [ ]
} ],
"doc" : "Data platform specific types"
} ]
},
"doc" : "Platform independent field type of the field."
}, {
"name" : "nativeDataType",
"type" : "string",
"doc" : "The native type of the field in the dataset's platform as declared by platform schema."
}, {
"name" : "recursive",
"type" : "boolean",
"doc" : "There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.",
"default" : false
} ]
}
},
"doc" : "Client provided a list of fields from document schema."
}, {
"name" : "primaryKeys",
"type" : {
"type" : "array",
"items" : "com.linkedin.dataset.SchemaFieldPath"
},
"doc" : "Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.",
"optional" : true
}, {
"name" : "foreignKeysSpecs",
"type" : {
"type" : "map",
"values" : {
"type" : "record",
"name" : "ForeignKeySpec",
"doc" : "Description of a foreign key in a schema.",
"fields" : [ {
"name" : "foreignKey",
"type" : [ {
"type" : "record",
"name" : "DatasetFieldForeignKey",
"doc" : "For non-urn based foregin keys.",
"fields" : [ {
"name" : "parentDataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "dataset that stores the resource."
}, {
"name" : "currentFieldPaths",
"type" : {
"type" : "array",
"items" : "com.linkedin.dataset.SchemaFieldPath"
},
"doc" : "List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset."
}, {
"name" : "parentField",
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "SchemaField@fieldPath that uniquely identify field in parent dataset that this field references."
} ]
}, {
"type" : "record",
"name" : "UrnForeignKey",
"doc" : "If SchemaMetadata fields make any external references and references are of type com.linkeidn.common.Urn or any children, this models can be used to mark it.",
"fields" : [ {
"name" : "currentFieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Field in hosting(current) SchemaMetadata."
} ]
} ],
"doc" : "Foreign key definition in metadata schema."
} ]
}
},
"doc" : "Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.",
"optional" : true
} ]
},
"doc" : "Schema metadata of the dataset",
"optional" : true
}, {
"name" : "status",
"type" : "com.linkedin.common.Status",
"doc" : "Status metadata of the dataset",
"optional" : true
}, {
"name" : "upstreamLineage",
"type" : {
"type" : "record",
"name" : "UpstreamLineage",
"doc" : "Upstream lineage of a dataset",
"fields" : [ {
"name" : "upstreams",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "Upstream",
"doc" : "Upstream lineage information about a dataset including the source reporting the lineage",
"fields" : [ {
"name" : "auditStamp",
"type" : "com.linkedin.common.AuditStamp",
"doc" : "Audit stamp containing who reported the lineage and when"
}, {
"name" : "dataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "The upstream dataset the lineage points to"
}, {
"name" : "type",
"type" : {
"type" : "enum",
"name" : "DatasetLineageType",
"doc" : "The various types of supported dataset lineage",
"symbols" : [ "COPY", "TRANSFORMED", "VIEW" ],
"symbolDocs" : {
"COPY" : "Direct copy without modification",
"TRANSFORMED" : "Transformed data with modification (format or content change)",
"VIEW" : "Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources"
}
},
"doc" : "The type of the lineage"
} ]
}
},
"doc" : "List of upstream dataset lineage information"
} ]
},
"doc" : "Upstream lineage metadata of the dataset",
"optional" : true
} ]
}, "com.linkedin.dataset.DatasetDeprecation", "com.linkedin.dataset.DatasetKey", {
"type" : "enum",
"name" : "DatasetLineageType",
"namespace" : "com.linkedin.dataset",
"doc" : "The various types of supported dataset lineage",
"symbols" : [ "COPY", "TRANSFORMED", "VIEW" ],
"symbolDocs" : {
"COPY" : "Direct copy without modification",
"TRANSFORMED" : "Transformed data with modification (format or content change)",
"VIEW" : "Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources"
}
}, {
}, "com.linkedin.dataset.DatasetDeprecation", "com.linkedin.dataset.DatasetKey", "com.linkedin.dataset.DatasetLineageType", {
"type" : "record",
"name" : "DatasetProperties",
"namespace" : "com.linkedin.dataset",
@ -460,44 +850,7 @@
},
"doc" : "List of downstream dataset lineage information"
} ]
}, "com.linkedin.dataset.PlatformNativeType", {
"type" : "typeref",
"name" : "SchemaFieldPath",
"namespace" : "com.linkedin.dataset",
"doc" : "Schema field path as described by schema normalizations rules: http://go/tms-schema",
"ref" : "string"
}, {
"type" : "record",
"name" : "Upstream",
"namespace" : "com.linkedin.dataset",
"doc" : "Upstream lineage information about a dataset including the source reporting the lineage",
"fields" : [ {
"name" : "auditStamp",
"type" : "com.linkedin.common.AuditStamp",
"doc" : "Audit stamp containing who reported the lineage and when"
}, {
"name" : "dataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "The upstream dataset the lineage points to"
}, {
"name" : "type",
"type" : "DatasetLineageType",
"doc" : "The type of the lineage"
} ]
}, {
"type" : "record",
"name" : "UpstreamLineage",
"namespace" : "com.linkedin.dataset",
"doc" : "Upstream lineage of a dataset",
"fields" : [ {
"name" : "upstreams",
"type" : {
"type" : "array",
"items" : "Upstream"
},
"doc" : "List of upstream dataset lineage information"
} ]
}, {
}, "com.linkedin.dataset.PlatformNativeType", "com.linkedin.dataset.SchemaFieldPath", "com.linkedin.dataset.Upstream", "com.linkedin.dataset.UpstreamLineage", {
"type" : "record",
"name" : "UpstreamLineageDelta",
"namespace" : "com.linkedin.dataset",
@ -515,337 +868,7 @@
"name" : "DatasetAspect",
"namespace" : "com.linkedin.metadata.aspect",
"doc" : "A union of all supported metadata aspects for a Dataset",
"ref" : [ "com.linkedin.dataset.DatasetProperties", "com.linkedin.dataset.DatasetDeprecation", "com.linkedin.dataset.UpstreamLineage", "com.linkedin.common.InstitutionalMemory", "com.linkedin.common.Ownership", "com.linkedin.common.Status", {
"type" : "record",
"name" : "SchemaMetadata",
"namespace" : "com.linkedin.schema",
"doc" : "SchemaMetadata to describe metadata related to store schema",
"include" : [ {
"type" : "record",
"name" : "SchemaMetadataKey",
"doc" : "Key to retrieve schema metadata.",
"fields" : [ {
"name" : "schemaName",
"type" : "string",
"doc" : "Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking",
"validate" : {
"strlen" : {
"max" : 500,
"min" : 1
}
}
}, {
"name" : "platform",
"type" : "com.linkedin.common.DataPlatformUrn",
"doc" : "Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})"
}, {
"name" : "version",
"type" : "long",
"doc" : "Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version."
} ]
}, "com.linkedin.common.ChangeAuditStamps" ],
"fields" : [ {
"name" : "dataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "Dataset this schema metadata is associated with.",
"optional" : true
}, {
"name" : "cluster",
"type" : "string",
"doc" : "The cluster this schema metadata resides from",
"optional" : true
}, {
"name" : "hash",
"type" : "string",
"doc" : "the SHA1 hash of the schema content"
}, {
"name" : "platformSchema",
"type" : [ {
"type" : "record",
"name" : "EspressoSchema",
"doc" : "Schema text of an espresso table schema.",
"fields" : [ {
"name" : "documentSchema",
"type" : "string",
"doc" : "The native espresso document schema."
}, {
"name" : "tableSchema",
"type" : "string",
"doc" : "The espresso table schema definition."
} ]
}, {
"type" : "record",
"name" : "OracleDDL",
"doc" : "Schema holder for oracle data definition language that describes an oracle table.",
"fields" : [ {
"name" : "tableSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema."
} ]
}, {
"type" : "record",
"name" : "MySqlDDL",
"doc" : "Schema holder for MySql data definition language that describes an MySql table.",
"fields" : [ {
"name" : "tableSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform. This is a human readable (json blob) table schema."
} ]
}, {
"type" : "record",
"name" : "PrestoDDL",
"doc" : "Schema holder for presto data definition language that describes a presto view.",
"fields" : [ {
"name" : "rawSchema",
"type" : "string",
"doc" : "The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL."
} ]
}, {
"type" : "record",
"name" : "KafkaSchema",
"doc" : "Schema holder for kafka schema.",
"fields" : [ {
"name" : "documentSchema",
"type" : "string",
"doc" : "The native kafka document schema. This is a human readable avro document schema."
} ]
}, {
"type" : "record",
"name" : "BinaryJsonSchema",
"doc" : "Schema text of binary JSON schema.",
"fields" : [ {
"name" : "schema",
"type" : "string",
"doc" : "The native schema text for binary JSON file format."
} ]
}, {
"type" : "record",
"name" : "OrcSchema",
"doc" : "Schema text of an ORC schema.",
"fields" : [ {
"name" : "schema",
"type" : "string",
"doc" : "The native schema for ORC file format."
} ]
}, {
"type" : "record",
"name" : "Schemaless",
"doc" : "The dataset has no specific schema associated with it",
"fields" : [ ]
}, {
"type" : "record",
"name" : "KeyValueSchema",
"doc" : "Schema text of a key-value store schema.",
"fields" : [ {
"name" : "keySchema",
"type" : "string",
"doc" : "The raw schema for the key in the key-value store."
}, {
"name" : "valueSchema",
"type" : "string",
"doc" : "The raw schema for the value in the key-value store."
} ]
}, {
"type" : "record",
"name" : "OtherSchema",
"doc" : "Schema holder for undefined schema types.",
"fields" : [ {
"name" : "rawSchema",
"type" : "string",
"doc" : "The native schema in the dataset's platform."
} ]
} ],
"doc" : "The native schema in the dataset's platform."
}, {
"name" : "fields",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "SchemaField",
"doc" : "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema",
"fields" : [ {
"name" : "fieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above."
}, {
"name" : "jsonPath",
"type" : "string",
"doc" : "Flattened name of a field in JSON Path notation.",
"optional" : true
}, {
"name" : "nullable",
"type" : "boolean",
"doc" : "Indicates if this field is optional or nullable",
"default" : false
}, {
"name" : "description",
"type" : "string",
"doc" : "Description",
"optional" : true
}, {
"name" : "type",
"type" : {
"type" : "record",
"name" : "SchemaFieldDataType",
"doc" : "Schema field data types",
"fields" : [ {
"name" : "type",
"type" : [ {
"type" : "record",
"name" : "BooleanType",
"doc" : "Boolean field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "FixedType",
"doc" : "Fixed field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "StringType",
"doc" : "String field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "BytesType",
"doc" : "Bytes field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "NumberType",
"doc" : "Number data type: long, integer, short, etc..",
"fields" : [ ]
}, {
"type" : "record",
"name" : "EnumType",
"doc" : "Enum field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "NullType",
"doc" : "Null field type.",
"fields" : [ ]
}, {
"type" : "record",
"name" : "MapType",
"doc" : "Map field type.",
"fields" : [ {
"name" : "keyType",
"type" : "string",
"doc" : "Key type in a map",
"optional" : true
}, {
"name" : "valueType",
"type" : "string",
"doc" : "Type of the value in a map",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "ArrayType",
"doc" : "Array field type.",
"fields" : [ {
"name" : "nestedType",
"type" : {
"type" : "array",
"items" : "string"
},
"doc" : "List of types this array holds.",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "UnionType",
"doc" : "Union field type.",
"fields" : [ {
"name" : "nestedTypes",
"type" : {
"type" : "array",
"items" : "string"
},
"doc" : "List of types in union type.",
"optional" : true
} ]
}, {
"type" : "record",
"name" : "RecordType",
"doc" : "Record field type.",
"fields" : [ ]
} ],
"doc" : "Data platform specific types"
} ]
},
"doc" : "Platform independent field type of the field."
}, {
"name" : "nativeDataType",
"type" : "string",
"doc" : "The native type of the field in the dataset's platform as declared by platform schema."
}, {
"name" : "recursive",
"type" : "boolean",
"doc" : "There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.",
"default" : false
} ]
}
},
"doc" : "Client provided a list of fields from document schema."
}, {
"name" : "primaryKeys",
"type" : {
"type" : "array",
"items" : "com.linkedin.dataset.SchemaFieldPath"
},
"doc" : "Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.",
"optional" : true
}, {
"name" : "foreignKeysSpecs",
"type" : {
"type" : "map",
"values" : {
"type" : "record",
"name" : "ForeignKeySpec",
"doc" : "Description of a foreign key in a schema.",
"fields" : [ {
"name" : "foreignKey",
"type" : [ {
"type" : "record",
"name" : "DatasetFieldForeignKey",
"doc" : "For non-urn based foregin keys.",
"fields" : [ {
"name" : "parentDataset",
"type" : "com.linkedin.common.DatasetUrn",
"doc" : "dataset that stores the resource."
}, {
"name" : "currentFieldPaths",
"type" : {
"type" : "array",
"items" : "com.linkedin.dataset.SchemaFieldPath"
},
"doc" : "List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset."
}, {
"name" : "parentField",
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "SchemaField@fieldPath that uniquely identify field in parent dataset that this field references."
} ]
}, {
"type" : "record",
"name" : "UrnForeignKey",
"doc" : "If SchemaMetadata fields make any external references and references are of type com.linkeidn.common.Urn or any children, this models can be used to mark it.",
"fields" : [ {
"name" : "currentFieldPath",
"type" : "com.linkedin.dataset.SchemaFieldPath",
"doc" : "Field in hosting(current) SchemaMetadata."
} ]
} ],
"doc" : "Foreign key definition in metadata schema."
} ]
}
},
"doc" : "Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.",
"optional" : true
} ]
} ]
"ref" : [ "com.linkedin.dataset.DatasetProperties", "com.linkedin.dataset.DatasetDeprecation", "com.linkedin.dataset.UpstreamLineage", "com.linkedin.common.InstitutionalMemory", "com.linkedin.common.Ownership", "com.linkedin.common.Status", "com.linkedin.schema.SchemaMetadata" ]
}, {
"type" : "record",
"name" : "AggregationMetadata",

View File

@ -1,5 +1,7 @@
package com.linkedin.metadata.resources.dataset;
import com.linkedin.common.InstitutionalMemory;
import com.linkedin.common.Ownership;
import com.linkedin.common.Status;
import com.linkedin.common.urn.DatasetUrn;
import com.linkedin.common.urn.Urn;
@ -8,6 +10,7 @@ import com.linkedin.dataset.Dataset;
import com.linkedin.dataset.DatasetDeprecation;
import com.linkedin.dataset.DatasetKey;
import com.linkedin.dataset.DatasetProperties;
import com.linkedin.dataset.UpstreamLineage;
import com.linkedin.metadata.aspect.DatasetAspect;
import com.linkedin.metadata.dao.BaseBrowseDAO;
import com.linkedin.metadata.dao.BaseLocalDAO;
@ -35,6 +38,7 @@ import com.linkedin.restli.server.annotations.PagingContextParam;
import com.linkedin.restli.server.annotations.QueryParam;
import com.linkedin.restli.server.annotations.RestLiCollection;
import com.linkedin.restli.server.annotations.RestMethod;
import com.linkedin.schema.SchemaMetadata;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@ -123,19 +127,28 @@ public final class Datasets extends BaseBrowsableEntityResource<
ModelUtils.getAspectsFromSnapshot(snapshot).forEach(aspect -> {
if (aspect instanceof DatasetProperties) {
DatasetProperties datasetProperties = DatasetProperties.class.cast(aspect);
final DatasetProperties datasetProperties = (DatasetProperties) aspect;
value.setProperties(datasetProperties.getCustomProperties());
value.setTags(datasetProperties.getTags());
if (datasetProperties.hasUri()) {
if (datasetProperties.getUri() != null) {
value.setUri(datasetProperties.getUri());
}
if (datasetProperties.hasDescription()) {
if (datasetProperties.getDescription() != null) {
value.setDescription(datasetProperties.getDescription());
}
} else if (aspect instanceof DatasetDeprecation) {
value.setDeprecation(DatasetDeprecation.class.cast(aspect));
value.setDeprecation((DatasetDeprecation) aspect);
} else if (aspect instanceof InstitutionalMemory) {
value.setInstitutionalMemory((InstitutionalMemory) aspect);
} else if (aspect instanceof Ownership) {
value.setOwnership((Ownership) aspect);
} else if (aspect instanceof SchemaMetadata) {
value.setSchemaMetadata((SchemaMetadata) aspect);
} else if (aspect instanceof Status) {
value.setRemoved(Status.class.cast(aspect).isRemoved());
value.setStatus((Status) aspect);
value.setRemoved(((Status) aspect).isRemoved());
} else if (aspect instanceof UpstreamLineage) {
value.setUpstreamLineage((UpstreamLineage) aspect);
}
});
return value;
@ -145,14 +158,30 @@ public final class Datasets extends BaseBrowsableEntityResource<
@Nonnull
protected DatasetSnapshot toSnapshot(@Nonnull Dataset dataset, @Nonnull DatasetUrn datasetUrn) {
final List<DatasetAspect> aspects = new ArrayList<>();
if (dataset.hasProperties()) {
if (dataset.getProperties() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, getDatasetPropertiesAspect(dataset)));
}
if (dataset.hasDeprecation()) {
if (dataset.getDeprecation() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getDeprecation()));
}
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, new Status().setRemoved(dataset.isRemoved())));
if (dataset.getInstitutionalMemory() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getInstitutionalMemory()));
}
if (dataset.getOwnership() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getOwnership()));
}
if (dataset.getSchemaMetadata() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getSchemaMetadata()));
}
if (dataset.getStatus() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getStatus()));
}
if (dataset.getUpstreamLineage() != null) {
aspects.add(ModelUtils.newAspectUnion(DatasetAspect.class, dataset.getUpstreamLineage()));
}
if (dataset.hasRemoved()) {
aspects.add(DatasetAspect.create(new Status().setRemoved(dataset.isRemoved())));
}
return ModelUtils.newSnapshot(DatasetSnapshot.class, datasetUrn, aspects);
}
@ -161,10 +190,10 @@ public final class Datasets extends BaseBrowsableEntityResource<
final DatasetProperties datasetProperties = new DatasetProperties();
datasetProperties.setDescription(dataset.getDescription());
datasetProperties.setTags(dataset.getTags());
if (dataset.hasUri()) {
if (dataset.getUri() != null) {
datasetProperties.setUri(dataset.getUri());
}
if (dataset.hasPlatform()) {
if (dataset.getProperties() != null) {
datasetProperties.setCustomProperties(dataset.getProperties());
}
return datasetProperties;