mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-18 14:16:48 +00:00
feat(models): Introducing Dataset Partitions Aspect (#10997)
Co-authored-by: John Joyce <john@Johns-MBP.lan> Co-authored-by: John Joyce <john@ip-192-168-1-200.us-west-2.compute.internal>
This commit is contained in:
parent
9321e94247
commit
6f09b96b1d
@ -0,0 +1,24 @@
|
|||||||
|
namespace com.linkedin.dataset
|
||||||
|
|
||||||
|
import com.linkedin.common.AuditStamp
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines how the data is partitioned
|
||||||
|
*/
|
||||||
|
record PartitionSummary {
|
||||||
|
/**
|
||||||
|
* A unique id / value for the partition for which statistics were collected,
|
||||||
|
* generated by applying the key definition to a given row.
|
||||||
|
*/
|
||||||
|
partition: string
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The created time for a given partition.
|
||||||
|
*/
|
||||||
|
created: optional AuditStamp
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The last modified / touched time for a given partition.
|
||||||
|
*/
|
||||||
|
lastModified: optional AuditStamp
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
namespace com.linkedin.dataset
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines how the data is partitioned for Data Lake tables (e.g. Hive, S3, Iceberg, Delta, Hudi, etc).
|
||||||
|
*/
|
||||||
|
@Aspect = {
|
||||||
|
"name": "partitionsSummary"
|
||||||
|
}
|
||||||
|
record PartitionsSummary {
|
||||||
|
/**
|
||||||
|
* The minimum partition as ordered
|
||||||
|
*/
|
||||||
|
minPartition: optional PartitionSummary
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum partition as ordered
|
||||||
|
*/
|
||||||
|
maxPartition: optional PartitionSummary
|
||||||
|
}
|
@ -150,6 +150,8 @@ record SchemaField {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* For Datasets which are partitioned, this determines the partitioning key.
|
* For Datasets which are partitioned, this determines the partitioning key.
|
||||||
|
* Note that multiple columns can be part of a partitioning key, but currently we do not support
|
||||||
|
* rendering the ordered partitioning key.
|
||||||
*/
|
*/
|
||||||
isPartitioningKey: optional boolean
|
isPartitioningKey: optional boolean
|
||||||
|
|
||||||
|
@ -1,24 +1,28 @@
|
|||||||
namespace com.linkedin.timeseries
|
namespace com.linkedin.timeseries
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Defines how the data is partitioned
|
* A reference to a specific partition in a dataset.
|
||||||
*/
|
*/
|
||||||
record PartitionSpec {
|
record PartitionSpec {
|
||||||
|
|
||||||
type: enum PartitionType {
|
|
||||||
FULL_TABLE,
|
|
||||||
QUERY,
|
|
||||||
PARTITION
|
|
||||||
} = "PARTITION"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* String representation of the partition
|
* A unique id / value for the partition for which statistics were collected,
|
||||||
|
* generated by applying the key definition to a given row.
|
||||||
*/
|
*/
|
||||||
@TimeseriesField = {}
|
@TimeseriesField = {}
|
||||||
partition: string
|
partition: string
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Time window of the partition if applicable
|
* Time window of the partition, if we are able to extract it from the partition key.
|
||||||
*/
|
*/
|
||||||
timePartition: optional TimeWindow
|
timePartition: optional TimeWindow
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unused!
|
||||||
|
*/
|
||||||
|
@deprecated
|
||||||
|
type: enum PartitionType {
|
||||||
|
FULL_TABLE,
|
||||||
|
QUERY,
|
||||||
|
PARTITION
|
||||||
|
} = "PARTITION"
|
||||||
}
|
}
|
@ -45,6 +45,7 @@ entities:
|
|||||||
- access
|
- access
|
||||||
- structuredProperties
|
- structuredProperties
|
||||||
- forms
|
- forms
|
||||||
|
- partitionsSummary
|
||||||
- name: dataHubPolicy
|
- name: dataHubPolicy
|
||||||
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
|
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
|
||||||
category: internal
|
category: internal
|
||||||
|
Loading…
x
Reference in New Issue
Block a user