Add kafka MQE schema (#1016)

This commit is contained in:
Yi (Alan) Wang 2018-03-09 15:45:28 -08:00 committed by GitHub
parent 523b203dd6
commit c20b9879f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 138 additions and 1 deletions

View File

@ -34,7 +34,7 @@
"symbolDocs": { "symbolDocs": {
"DIRECT_COPY": "Direct copy without modification", "DIRECT_COPY": "Direct copy without modification",
"TRANSFORMED": "Transformed data with modification (format or content change)", "TRANSFORMED": "Transformed data with modification (format or content change)",
"VIEW": "Represents a view relationship where source is the view, and destination is the table.", "VIEW": "Represents a view relationship where source is the table, and destination is the view (e.g. Teradata, Dali)",
"MANAGE": "Represents a higher level of source dataset (e.g. HIVE), managing a lower level of destination dataset (e.g. HDFS)" "MANAGE": "Represents a higher level of source dataset (e.g. HIVE), managing a lower level of destination dataset (e.g. HDFS)"
} }
}, },

View File

@ -0,0 +1,103 @@
{
"type": "record",
"name": "MetadataQualityEvent",
"namespace": "com.linkedin.events.metadata",
"doc": "Metadata quality event is emitted by data processing job to capture potential metadata quality issues.",
"fields": [
{
"name": "auditHeader",
"type": "com.linkedin.events.KafkaAuditHeader",
"doc": "This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader"
},
{
"name": "datasetIdentifier",
"type": "DatasetIdentifier",
"doc": "Unique Identifier of a dataset, which contains three parts: dataPlatform, name, origin"
},
{
"name": "changeAuditStamp",
"type": "ChangeAuditStamp",
"doc": "The change auditing information, including actor, change type and timestamp, etc"
},
{
"name": "issueType",
"type": {
"type": "enum",
"name": "MetadataQualityIssueType",
"doc": "",
"symbols": [
"DQF_VIOLATION",
"MISSING_METADATA",
"METADATA_SCHEMA_MISMATCH",
"METADATA_DATA_MISMATCH",
"EVALUATION_ERROR",
"OTHER"
],
"symbolDocs": {
"DQF_VIOLATION": "DQF rule violation",
"MISSING_METADATA": "Metadata is missing where it's required",
"METADATA_SCHEMA_MISMATCH": "Metadata doesn't match up with the schema",
"METADATA_DATA_MISMATCH": "Metadata doesn't match up with the actual data",
"EVALUATION_ERROR": "Error encounter when evaluating expression",
"OTHER": "Other type of issues"
}
}
},
{
"name": "severity",
"type": {
"type": "enum",
"name": "MetadataQualityIssueSeverity",
"doc": "",
"symbols": [
"INFO",
"WARNING",
"CRITICAL"
],
"symbolDocs": {
"INFO": "The issue is for information only",
"WARNING": "The issue is non-blocking but should be monitored",
"CRITICAL": "The issue is blocking the process"
}
}
},
{
"name": "message",
"type": [
"null",
"string"
],
"default": null,
"doc": "An optional message associated with this issue."
},
{
"name": "totalInputRecords",
"type": "int",
"doc": "Total number of input records"
},
{
"name": "DqfRule",
"type": [
"null",
"string"
],
"doc": "The violated DQF rule. Only applicable if issueType is set to DQF_VIOLATION"
},
{
"name": "issues",
"doc": "List of metadata quality issues",
"type": [
"null",
{
"type": "array",
"items": "MetadataQualityIssue"
}
]
},
{
"name": "reference",
"type": "string",
"doc": "Reference for this issue, e.g. URL to DQF dashboard or Azkaban execution"
}
]
}

View File

@ -0,0 +1,34 @@
{
"type": "record",
"name": "MetadataQualityIssue",
"namespace": "com.linkedin.events.metadata",
"doc": "Metadata quality issue",
"fields": [
{
"name": "complianceMetadata",
"type": [
"null",
"ComplianceEntity"
],
"doc": "The compliance metadata provided"
},
{
"name": "message",
"type": "string",
"doc": "Error message for the issue"
},
{
"name": "counts",
"type": "int",
"doc": "Number of records affected by the issue"
},
{
"name": "quarantinedLocation",
"type": [
"null",
"string"
],
"doc": "Location where affected data have been moved to"
}
]
}