Add Retention Policy section in MCE schema (#1259)

This commit is contained in:
Yi (Alan) Wang 2018-07-17 17:35:08 -07:00 committed by GitHub
parent bc9be69822
commit 89993d7591
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 88 additions and 143 deletions

View File

@ -5,29 +5,34 @@
"fields": [
{
"name": "compliancePurgeType",
"type": {
"type": "enum",
"name": "CompliancePurgeType",
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
"symbols": [
"AUTO_PURGE",
"CUSTOM_PURGE",
"LIMITED_RETENTION",
"PURGE_NOT_APPLICABLE",
"MANUAL_PURGE",
"MANUAL_LIMITED_RETENTION",
"PURGE_EXEMPTED"
],
"symbolDocs": {
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
"CUSTOM_PURGE": "This has been replaced by MANUAL_PURGE. Do NOT use it any more.",
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
"PURGE_EXEMPTED": "Dataset exempted from purging"
"type": [
"null",
{
"type": "enum",
"name": "CompliancePurgeType",
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
"symbols": [
"AUTO_PURGE",
"CUSTOM_PURGE",
"LIMITED_RETENTION",
"PURGE_NOT_APPLICABLE",
"MANUAL_PURGE",
"MANUAL_LIMITED_RETENTION",
"PURGE_EXEMPTED"
],
"symbolDocs": {
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
"CUSTOM_PURGE": "This has been replaced by MANUAL_PURGE. Do NOT use it any more.",
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
"PURGE_EXEMPTED": "Dataset exempted from purging"
}
}
}
],
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
"default": null
},
{
"name": "retentionWindow",
@ -35,7 +40,7 @@
"null",
"long"
],
"doc": "How long data is retained in seconds for the case of MANUAL_LIMITED_RETENTION",
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
"default": null
},
{
@ -44,7 +49,7 @@
"null",
"string"
],
"doc": "The additional information about purging if the purge type is PURGE_EXEMPTED",
"doc": "[Deprecated] Please use RetentionPolicy to set purge info instead",
"default": null
},
{

View File

@ -119,6 +119,15 @@
],
"doc": "Human-entered compliance metadata"
},
{
"name": "retentionPolicy",
"type": [
"null",
"RetentionPolicy"
],
"default": null,
"doc": "Human-entered retention policy metadata"
},
{
"name": "suggestedCompliancePolicy",
"type": [

View File

@ -2,35 +2,47 @@
"type": "record",
"name": "RetentionPolicy",
"namespace": "com.linkedin.events.metadata",
"doc" : "Retention policy at the dataset level",
"fields" : [
{ "name" : "retentionType",
"type" : {
"type" : "enum",
"name" : "RetentionType",
"doc" : "types of different retention policies",
"symbols" : [ "LIMITED", "LEGAL_HOLD", "UNLIMITED" ],
"symbolDocs" : {
"LIMITED" : "Data is stored for limited time only",
"LEGAL_HOLD" : "Data is held for legal compliance or investigation",
"UNLIMITED" : "Data can be held indefinitely"
}
},
"doc" : "Retention type on dataset"
},
{ "name" : "retentionWindow",
"type" : [ "null", "long" ],
"doc" : "Time in (unit) how long data is retained for in case of LIMITED retention",
"default" : null
},
{ "name" : "retentionWindowUnit",
"type" : [ "null", {
"doc": "Retention policy at the dataset level",
"fields": [
{
"name": "purgeType",
"type": {
"type": "enum",
"name": "TimePeriodUnit",
"doc": "Unit of time period",
"symbols": [ "YEAR", "MONTH", "WEEK", "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND"]
}],
"default" : null
"name": "PurgeType",
"doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.",
"symbols": [
"AUTO_PURGE",
"LIMITED_RETENTION",
"MANUAL_PURGE",
"MANUAL_LIMITED_RETENTION",
"PURGE_NOT_APPLICABLE",
"PURGE_EXEMPTED"
],
"symbolDocs": {
"AUTO_PURGE": "Dataset is setup for automatic data purge with existing framework",
"LIMITED_RETENTION": "Dataset contains PII but are automatically deleted by the system within a time limit",
"MANUAL_PURGE": "Data owners setup a custom data purge flow to accomplish data clean-up",
"MANUAL_LIMITED_RETENTION": "Data owners setup a custom system the delete or regenerate the data within a time limit",
"PURGE_NOT_APPLICABLE": "Dataset does not contain PII and does not require clean-up",
"PURGE_EXEMPTED": "Dataset exempted from purging"
}
}
},
{
"name": "retentionWindow",
"type": [
"null",
"long"
],
"doc": "How long data is retained in seconds for the case of MANUAL_LIMITED_RETENTION"
},
{
"name": "purgeNote",
"type": [
"null",
"string"
],
"doc": "The additional information about purging if the purge type is PURGE_EXEMPTED"
}
]
}

View File

@ -1,87 +0,0 @@
{
"type": "record",
"name": "SecuritySpecification",
"namespace": "com.linkedin.events.metadata",
"doc": "Security related specification, such as Privacy Compliance, Confidentail, Retention",
"fields": [
{ "name": "classification",
"type": {
"type": "record",
"name": "ConfidentialClassification",
"fields": [
{ "name": "highlyConfidential",
"type": ["null", { "type": "array", "items": "string" }],
"doc": "list of highly confidential fields",
"default": null },
{ "name": "confidential",
"type": ["null", { "type": "array", "items": "string" }],
"doc": "list of confidential fields", "default": null },
{ "name": "limitedDistribution",
"type": ["null", { "type": "array", "items": "string" }],
"doc": "list of limited distribution fields", "default": null },
{ "name": "mustBeEncrypted",
"type": ["null", { "type": "array", "items": "string" }],
"doc": "list of fields that must be encrypted", "default": null },
{ "name": "mustBeMasked",
"type": ["null", { "type": "array", "items": "string" }],
"doc": "list of fields that must be masked", "default": null }
]
},
"doc": "Classify the sensitive fields into 5 categories: HighlyConfidential, Confidential, LimitedDistribution, MustBeEncrypted, MustBeMasked. If every field is confidential, * can be used."
},
{ "name": "recordOwnerType", "type" :
{ "type": "enum",
"name": "RecordOwnerType",
"doc": "Record ownership spec that marks the owner of a record",
"symbols" : [ "MEMBER", "CUSTOMER", "JOINT", "INTERNAL", "COMPANY" ],
"symbolDocs" : {
"INTERNAL" : "data is generated by an internal entity, such as internal app, internal service",
"CUSTOMER" : "If the data is generated through a customer or enterprise product integration, that data is owned by a Customer.",
"JOINT" : "If the data is generated by an action of a member to a customer object, such as a member clicks the ad posted by a customer, the ownership is joint.",
"COMPANY" : "If data is generated/produced by company",
"MEMBER" : "If data is generated/produced by member. All datasets that marked as member, should comply with legal retention policies."
}
}
},
{ "name" : "retentionPolicy",
"type" : [ "null", "RetentionPolicy" ],
"doc": "Retention"
},
{ "name" : "geographicAffinity",
"type" : [ "null", {
"type" : "record",
"name" : "GeographicAffinity",
"fields" : [ {
"name" : "affinity",
"type" : [ "null", {
"type" : "enum",
"name" : "AffinityType",
"symbols" : [ "LIMITED", "EXCLUDED" ]
} ],
"doc" : "Affinity type",
"default" : null
},
{ "name" : "locations",
"type" : {
"type" : "array",
"items" : {
"type" : "record",
"name" : "Locale",
"namespace" : "com.linkedin.common",
"doc" : "Motivated by java.util.Locale",
"fields" : [
{ "name" : "language", "type" : "string", "doc" : "A lowercase two-letter language code as defined by ISO-639." },
{ "name" : "country", "type" : [ "null", "string" ], "doc" : "An uppercase two-letter country code as defined by ISO-3166.", "default" : null },
{ "name" : "variant", "type" : [ "null", "string" ], "doc" : "Vendor or browser-specific code.", "default" : null }
]
}
},
"doc" : "List of locations data should be stored at"
}
]
}],
"doc" : "Geographic affinity if applicable",
"default" : null
}
]
}

View File

@ -2,7 +2,7 @@
"type": "record",
"name": "SuggestedCompliancePolicy",
"namespace": "com.linkedin.events.metadata",
"doc" : "Compliance metadata suggested by algorithms.",
"doc": "Compliance metadata suggested by algorithms.",
"fields": [
{
"name": "suggestedFieldClassifications",
@ -15,7 +15,10 @@
"fields": [
{
"name": "uid",
"type": [ "null", "string" ],
"type": [
"null",
"string"
],
"doc": "Unique identifier for the suggestion.",
"default": null
},
@ -45,14 +48,17 @@
"fields": [
{
"name": "uid",
"type": [ "null", "string" ],
"type": [
"null",
"string"
],
"doc": "Unique identifier for the suggestion.",
"default": null
},
{
"name": "isContaining",
"type": "boolean",
"doc": "Whether the dataset contains the specific kind of data."
"doc": "Whether the dataset contains the specific kind of data."
},
{
"name": "confidenceLevel",