mirror of
https://github.com/datahub-project/datahub.git
synced 2025-11-01 19:25:56 +00:00
Add Retention Policy section in MCE schema (#1259)
This commit is contained in:
parent
bc9be69822
commit
89993d7591
@ -5,29 +5,34 @@
|
||||
"fields": [
|
||||
{
|
||||
"name": "compliancePurgeType",
|
||||
"type": {
|
||||
"type": "enum",
|
||||
"name": "CompliancePurgeType",
|
||||
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
|
||||
"symbols": [
|
||||
"AUTO_PURGE",
|
||||
"CUSTOM_PURGE",
|
||||
"LIMITED_RETENTION",
|
||||
"PURGE_NOT_APPLICABLE",
|
||||
"MANUAL_PURGE",
|
||||
"MANUAL_LIMITED_RETENTION",
|
||||
"PURGE_EXEMPTED"
|
||||
],
|
||||
"symbolDocs": {
|
||||
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
|
||||
"CUSTOM_PURGE": "This has been replaced by MANUAL_PURGE. Do NOT use it any more.",
|
||||
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
|
||||
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
|
||||
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
|
||||
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
|
||||
"PURGE_EXEMPTED": "Dataset exempted from purging"
|
||||
"type": [
|
||||
"null",
|
||||
{
|
||||
"type": "enum",
|
||||
"name": "CompliancePurgeType",
|
||||
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
|
||||
"symbols": [
|
||||
"AUTO_PURGE",
|
||||
"CUSTOM_PURGE",
|
||||
"LIMITED_RETENTION",
|
||||
"PURGE_NOT_APPLICABLE",
|
||||
"MANUAL_PURGE",
|
||||
"MANUAL_LIMITED_RETENTION",
|
||||
"PURGE_EXEMPTED"
|
||||
],
|
||||
"symbolDocs": {
|
||||
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
|
||||
"CUSTOM_PURGE": "This has been replaced by MANUAL_PURGE. Do NOT use it any more.",
|
||||
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
|
||||
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
|
||||
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
|
||||
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
|
||||
"PURGE_EXEMPTED": "Dataset exempted from purging"
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
"name": "retentionWindow",
|
||||
@ -35,7 +40,7 @@
|
||||
"null",
|
||||
"long"
|
||||
],
|
||||
"doc": "How long data is retained in seconds for the case of MANUAL_LIMITED_RETENTION",
|
||||
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
@ -44,7 +49,7 @@
|
||||
"null",
|
||||
"string"
|
||||
],
|
||||
"doc": "The additional information about purging if the purge type is PURGE_EXEMPTED",
|
||||
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
|
||||
@ -119,6 +119,15 @@
|
||||
],
|
||||
"doc": "Human-entered compliance metadata"
|
||||
},
|
||||
{
|
||||
"name": "retentionPolicy",
|
||||
"type": [
|
||||
"null",
|
||||
"RetentionPolicy"
|
||||
],
|
||||
"default": null,
|
||||
"doc": "Human-entered retention policy metadata"
|
||||
},
|
||||
{
|
||||
"name": "suggestedCompliancePolicy",
|
||||
"type": [
|
||||
|
||||
@ -2,35 +2,47 @@
|
||||
"type": "record",
|
||||
"name": "RetentionPolicy",
|
||||
"namespace": "com.linkedin.events.metadata",
|
||||
"doc" : "Retention policy at the dataset level",
|
||||
"fields" : [
|
||||
{ "name" : "retentionType",
|
||||
"type" : {
|
||||
"type" : "enum",
|
||||
"name" : "RetentionType",
|
||||
"doc" : "types of different retention policies",
|
||||
"symbols" : [ "LIMITED", "LEGAL_HOLD", "UNLIMITED" ],
|
||||
"symbolDocs" : {
|
||||
"LIMITED" : "Data is stored for limited time only",
|
||||
"LEGAL_HOLD" : "Data is held for legal compliance or investigation",
|
||||
"UNLIMITED" : "Data can be held indefinitely"
|
||||
}
|
||||
},
|
||||
"doc" : "Retention type on dataset"
|
||||
},
|
||||
{ "name" : "retentionWindow",
|
||||
"type" : [ "null", "long" ],
|
||||
"doc" : "Time in (unit) how long data is retained for in case of LIMITED retention",
|
||||
"default" : null
|
||||
},
|
||||
{ "name" : "retentionWindowUnit",
|
||||
"type" : [ "null", {
|
||||
"doc": "Retention policy at the dataset level",
|
||||
"fields": [
|
||||
{
|
||||
"name": "purgeType",
|
||||
"type": {
|
||||
"type": "enum",
|
||||
"name": "TimePeriodUnit",
|
||||
"doc": "Unit of time period",
|
||||
"symbols": [ "YEAR", "MONTH", "WEEK", "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND"]
|
||||
}],
|
||||
"default" : null
|
||||
"name": "PurgeType",
|
||||
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
|
||||
"symbols": [
|
||||
"AUTO_PURGE",
|
||||
"LIMITED_RETENTION",
|
||||
"MANUAL_PURGE",
|
||||
"MANUAL_LIMITED_RETENTION",
|
||||
"PURGE_NOT_APPLICABLE",
|
||||
"PURGE_EXEMPTED"
|
||||
],
|
||||
"symbolDocs": {
|
||||
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
|
||||
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
|
||||
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
|
||||
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
|
||||
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
|
||||
"PURGE_EXEMPTED": "Dataset exempted from purging"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "retentionWindow",
|
||||
"type": [
|
||||
"null",
|
||||
"long"
|
||||
],
|
||||
"doc": "How long data is retained in seconds for the case of MANUAL_LIMITED_RETENTION"
|
||||
},
|
||||
{
|
||||
"name": "purgeNote",
|
||||
"type": [
|
||||
"null",
|
||||
"string"
|
||||
],
|
||||
"doc": "The additional information about purging if the purge type is PURGE_EXEMPTED"
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -1,87 +0,0 @@
|
||||
{
|
||||
"type": "record",
|
||||
"name": "SecuritySpecification",
|
||||
"namespace": "com.linkedin.events.metadata",
|
||||
"doc": "Security related specification, such as Privacy Compliance, Confidentail, Retention",
|
||||
"fields": [
|
||||
{ "name": "classification",
|
||||
"type": {
|
||||
"type": "record",
|
||||
"name": "ConfidentialClassification",
|
||||
"fields": [
|
||||
{ "name": "highlyConfidential",
|
||||
"type": ["null", { "type": "array", "items": "string" }],
|
||||
"doc": "list of highly confidential fields",
|
||||
"default": null },
|
||||
{ "name": "confidential",
|
||||
"type": ["null", { "type": "array", "items": "string" }],
|
||||
"doc": "list of confidential fields", "default": null },
|
||||
{ "name": "limitedDistribution",
|
||||
"type": ["null", { "type": "array", "items": "string" }],
|
||||
"doc": "list of limited distribution fields", "default": null },
|
||||
{ "name": "mustBeEncrypted",
|
||||
"type": ["null", { "type": "array", "items": "string" }],
|
||||
"doc": "list of fields that must be encrypted", "default": null },
|
||||
{ "name": "mustBeMasked",
|
||||
"type": ["null", { "type": "array", "items": "string" }],
|
||||
"doc": "list of fields that must be masked", "default": null }
|
||||
]
|
||||
},
|
||||
"doc": "Classify the sensitive fields into 5 categories: HighlyConfidential, Confidential, LimitedDistribution, MustBeEncrypted, MustBeMasked. If every field is confidential, * can be used."
|
||||
},
|
||||
{ "name": "recordOwnerType", "type" :
|
||||
{ "type": "enum",
|
||||
"name": "RecordOwnerType",
|
||||
"doc": "Record ownership spec that marks the owner of a record",
|
||||
"symbols" : [ "MEMBER", "CUSTOMER", "JOINT", "INTERNAL", "COMPANY" ],
|
||||
"symbolDocs" : {
|
||||
"INTERNAL" : "data is generated by an internal entity, such as internal app, internal service",
|
||||
"CUSTOMER" : "If the data is generated through a customer or enterprise product integration, that data is owned by a Customer.",
|
||||
"JOINT" : "If the data is generated by an action of a member to a customer object, such as a member clicks the ad posted by a customer, the ownership is joint.",
|
||||
"COMPANY" : "If data is generated/produced by company",
|
||||
"MEMBER" : "If data is generated/produced by member. All datasets that marked as member, should comply with legal retention policies."
|
||||
}
|
||||
}
|
||||
},
|
||||
{ "name" : "retentionPolicy",
|
||||
"type" : [ "null", "RetentionPolicy" ],
|
||||
"doc": "Retention"
|
||||
},
|
||||
{ "name" : "geographicAffinity",
|
||||
"type" : [ "null", {
|
||||
"type" : "record",
|
||||
"name" : "GeographicAffinity",
|
||||
"fields" : [ {
|
||||
"name" : "affinity",
|
||||
"type" : [ "null", {
|
||||
"type" : "enum",
|
||||
"name" : "AffinityType",
|
||||
"symbols" : [ "LIMITED", "EXCLUDED" ]
|
||||
} ],
|
||||
"doc" : "Affinity type",
|
||||
"default" : null
|
||||
},
|
||||
{ "name" : "locations",
|
||||
"type" : {
|
||||
"type" : "array",
|
||||
"items" : {
|
||||
"type" : "record",
|
||||
"name" : "Locale",
|
||||
"namespace" : "com.linkedin.common",
|
||||
"doc" : "Motivated by java.util.Locale",
|
||||
"fields" : [
|
||||
{ "name" : "language", "type" : "string", "doc" : "A lowercase two-letter language code as defined by ISO-639." },
|
||||
{ "name" : "country", "type" : [ "null", "string" ], "doc" : "An uppercase two-letter country code as defined by ISO-3166.", "default" : null },
|
||||
{ "name" : "variant", "type" : [ "null", "string" ], "doc" : "Vendor or browser-specific code.", "default" : null }
|
||||
]
|
||||
}
|
||||
},
|
||||
"doc" : "List of locations data should be stored at"
|
||||
}
|
||||
]
|
||||
}],
|
||||
"doc" : "Geographic affinity if applicable",
|
||||
"default" : null
|
||||
}
|
||||
]
|
||||
}
|
||||
@ -2,7 +2,7 @@
|
||||
"type": "record",
|
||||
"name": "SuggestedCompliancePolicy",
|
||||
"namespace": "com.linkedin.events.metadata",
|
||||
"doc" : "Compliance metadata suggested by algorithms.",
|
||||
"doc": "Compliance metadata suggested by algorithms.",
|
||||
"fields": [
|
||||
{
|
||||
"name": "suggestedFieldClassifications",
|
||||
@ -15,7 +15,10 @@
|
||||
"fields": [
|
||||
{
|
||||
"name": "uid",
|
||||
"type": [ "null", "string" ],
|
||||
"type": [
|
||||
"null",
|
||||
"string"
|
||||
],
|
||||
"doc": "Unique identifier for the suggestion.",
|
||||
"default": null
|
||||
},
|
||||
@ -45,14 +48,17 @@
|
||||
"fields": [
|
||||
{
|
||||
"name": "uid",
|
||||
"type": [ "null", "string" ],
|
||||
"type": [
|
||||
"null",
|
||||
"string"
|
||||
],
|
||||
"doc": "Unique identifier for the suggestion.",
|
||||
"default": null
|
||||
},
|
||||
{
|
||||
"name": "isContaining",
|
||||
"type": "boolean",
|
||||
"doc": "Whether the dataset contains the specific kind of data."
|
||||
"doc": "Whether the dataset contains the specific kind of data."
|
||||
},
|
||||
{
|
||||
"name": "confidenceLevel",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user