mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-18 14:16:48 +00:00
feat(models): Introducing Dataset Partitions Aspect (#10997)
Co-authored-by: John Joyce <john@Johns-MBP.lan> Co-authored-by: John Joyce <john@ip-192-168-1-200.us-west-2.compute.internal>
This commit is contained in:
parent
9321e94247
commit
6f09b96b1d
@ -0,0 +1,24 @@
|
||||
namespace com.linkedin.dataset
|
||||
|
||||
import com.linkedin.common.AuditStamp
|
||||
|
||||
/**
|
||||
* Defines how the data is partitioned
|
||||
*/
|
||||
record PartitionSummary {
|
||||
/**
|
||||
* A unique id / value for the partition for which statistics were collected,
|
||||
* generated by applying the key definition to a given row.
|
||||
*/
|
||||
partition: string
|
||||
|
||||
/**
|
||||
* The created time for a given partition.
|
||||
*/
|
||||
created: optional AuditStamp
|
||||
|
||||
/**
|
||||
* The last modified / touched time for a given partition.
|
||||
*/
|
||||
lastModified: optional AuditStamp
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
namespace com.linkedin.dataset
|
||||
|
||||
/**
|
||||
* Defines how the data is partitioned for Data Lake tables (e.g. Hive, S3, Iceberg, Delta, Hudi, etc).
|
||||
*/
|
||||
@Aspect = {
|
||||
"name": "partitionsSummary"
|
||||
}
|
||||
record PartitionsSummary {
|
||||
/**
|
||||
* The minimum partition as ordered
|
||||
*/
|
||||
minPartition: optional PartitionSummary
|
||||
|
||||
/**
|
||||
* The maximum partition as ordered
|
||||
*/
|
||||
maxPartition: optional PartitionSummary
|
||||
}
|
@ -150,6 +150,8 @@ record SchemaField {
|
||||
|
||||
/**
|
||||
* For Datasets which are partitioned, this determines the partitioning key.
|
||||
* Note that multiple columns can be part of a partitioning key, but currently we do not support
|
||||
* rendering the ordered partitioning key.
|
||||
*/
|
||||
isPartitioningKey: optional boolean
|
||||
|
||||
|
@ -1,24 +1,28 @@
|
||||
namespace com.linkedin.timeseries
|
||||
|
||||
/**
|
||||
* Defines how the data is partitioned
|
||||
* A reference to a specific partition in a dataset.
|
||||
*/
|
||||
record PartitionSpec {
|
||||
|
||||
type: enum PartitionType {
|
||||
FULL_TABLE,
|
||||
QUERY,
|
||||
PARTITION
|
||||
} = "PARTITION"
|
||||
|
||||
/**
|
||||
* String representation of the partition
|
||||
* A unique id / value for the partition for which statistics were collected,
|
||||
* generated by applying the key definition to a given row.
|
||||
*/
|
||||
@TimeseriesField = {}
|
||||
partition: string
|
||||
|
||||
/**
|
||||
* Time window of the partition if applicable
|
||||
* Time window of the partition, if we are able to extract it from the partition key.
|
||||
*/
|
||||
timePartition: optional TimeWindow
|
||||
|
||||
/**
|
||||
* Unused!
|
||||
*/
|
||||
@deprecated
|
||||
type: enum PartitionType {
|
||||
FULL_TABLE,
|
||||
QUERY,
|
||||
PARTITION
|
||||
} = "PARTITION"
|
||||
}
|
@ -45,6 +45,7 @@ entities:
|
||||
- access
|
||||
- structuredProperties
|
||||
- forms
|
||||
- partitionsSummary
|
||||
- name: dataHubPolicy
|
||||
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
|
||||
category: internal
|
||||
|
Loading…
x
Reference in New Issue
Block a user