diff --git a/li-utils/src/main/java/com/linkedin/common/urn/RegisteredSchemaUrn.java b/li-utils/src/main/java/com/linkedin/common/urn/RegisteredSchemaUrn.java deleted file mode 100644 index d834b90ee0..0000000000 --- a/li-utils/src/main/java/com/linkedin/common/urn/RegisteredSchemaUrn.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.linkedin.common.urn; - -import com.linkedin.common.RegisteredSchemaType; - - -public final class RegisteredSchemaUrn extends Urn { - - public static final String ENTITY_TYPE = "registeredSchema"; - - private static final String CONTENT_FORMAT = "(%s,%s)"; - - private final RegisteredSchemaType typeEntity; - - private final String nameEntity; - - public RegisteredSchemaUrn(RegisteredSchemaType type, String name) { - super(ENTITY_TYPE, String.format(CONTENT_FORMAT, type, name)); - this.typeEntity = type; - this.nameEntity = name; - } - - public RegisteredSchemaType getTypeEntity() { - return typeEntity; - } - - public String getNameEntity() { - return nameEntity; - } -} diff --git a/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaType.pdsc b/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaType.pdsc deleted file mode 100644 index fd37e3411c..0000000000 --- a/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaType.pdsc +++ /dev/null @@ -1,18 +0,0 @@ -{ - "name": "RegisteredSchemaType", - "namespace": "com.linkedin.common", - "type": "enum", - "symbols": [ - "KAFKA", - "ESPRESSO_DOCUMENT", - "ESPRESSO_KEY", - "DATA_DERIVED" - ], - "doc": "Type of registered schema, e.g. Kafka schema or Espresso Document schema. Name length of the enum is limited to 20.", - "symbolDocs": { - "KAFKA": "Schema for Kafka topics", - "ESPRESSO_DOCUMENT": "Schema for Espresso Document", - "ESPRESSO_KEY": "Schema for Espresso Key", - "DATA_DERIVED": "Schema for /data/derived HDFS datasets" - } -} diff --git a/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaUrn.pdsc b/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaUrn.pdsc deleted file mode 100644 index 8b8fac9bd0..0000000000 --- a/li-utils/src/main/pegasus/com/linkedin/common/RegisteredSchemaUrn.pdsc +++ /dev/null @@ -1,33 +0,0 @@ -{ - "ref" : "string", - "java" : { - "class" : "com.linkedin.common.urn.RegisteredSchemaUrn" - }, - "name" : "RegisteredSchemaUrn", - "namespace" : "com.linkedin.common", - "doc" : "Standardized identifier for registered schema metadata. Examples include Kafka, Espresso and Oracle schemas.", - "type" : "typeref", - "validate" : { - "com.linkedin.common.validator.TypedUrnValidator" : { - "accessible" : true, - "entityType" : "registeredSchema", - "owningTeam" : "urn:li:internalTeam:datahub", - "constructable" : true, - "name" : "RegisteredSchema", - "namespace" : "li", - "doc" : "Standardized identifier for registered schema metadata. Examples include Kafka, Espresso and Oracle schemas.", - "owners" : [ "urn:li:corpuser:fbar", "urn:li:corpuser:bfoo" ], - "fields" : [ { - "name" : "type", - "doc" : "Type of the registered schema, e.g. Kafka, Espresso Documnet.", - "type" : "com.linkedin.common.RegisteredSchemaType" - }, { - "name" : "name", - "doc" : "Name of the registered schema, e.g. for kafka, . for Espresso", - "type" : "string", - "maxLength" : 300 - } ], - "maxLength" : 347 - } - } -} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce-schema.avsc b/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce-schema.avsc deleted file mode 100644 index fd06c932cf..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce-schema.avsc +++ /dev/null @@ -1 +0,0 @@ -{"type":"record","name":"MetadataChangeEvent","namespace":"com.linkedin.pegasus2avro.mxe","fields":[{"name":"auditHeader","type":["null",{"type":"record","name":"KafkaAuditHeader","namespace":"com.linkedin.events","fields":[{"name":"time","type":"long","doc":"The time at which the event was emitted into kafka."},{"name":"server","type":"string","doc":"The fully qualified name of the host from which the event is being emitted.","compliance":"NONE"},{"name":"instance","type":["null","string"],"doc":"The instance on the server from which the event is being emitted. e.g. i001","default":null,"compliance":"NONE"},{"name":"appName","type":"string","doc":"The name of the application from which the event is being emitted. see go/appname","compliance":"NONE"},{"name":"messageId","type":{"type":"fixed","name":"UUID","namespace":"com.linkedin.events","size":16},"doc":"A unique identifier for the message","compliance":"NONE"},{"name":"auditVersion","type":["null","int"],"doc":"The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing","default":null,"compliance":"NONE"},{"name":"fabricUrn","type":["null","string"],"doc":"The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric.","default":null,"compliance":"NONE"},{"name":"clusterConnectionString","type":["null","string"],"doc":"This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.","default":null,"compliance":"NONE"}]}],"doc":"Kafka audit header. See go/kafkaauditheader for more info.","default":null},{"name":"proposedSnapshot","type":["null",{"type":"record","name":"CorpUserSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"CorpUserInfo","namespace":"com.linkedin.pegasus2avro.identity","fields":[{"name":"active","type":"boolean","doc":"Whether the corpUser is active"},{"name":"displayName","type":["null","string"],"doc":"displayName of this user , e.g. Hang Zhang(DataHQ)","default":null},{"name":"email","type":"string","doc":"email address of this user"},{"name":"title","type":["null","string"],"doc":"title of this user","default":null},{"name":"managerUrn","type":["null","string"],"doc":"direct manager of this user","default":null},{"name":"departmentId","type":["null","long"],"doc":"department id this user belong to","default":null},{"name":"departmentName","type":["null","string"],"doc":"department name this user belong to","default":null},{"name":"firstName","type":["null","string"],"doc":"first name of this user","default":null},{"name":"lastName","type":["null","string"],"doc":"last name of this user","default":null},{"name":"fullName","type":["null","string"],"doc":"Common name of this user, format is firstName + lastName (split by a whitespace)","default":null},{"name":"countryCode","type":["null","string"],"doc":"two uppercase letters country code. e.g. US","default":null}]}]},"doc":"The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"CorpGroupSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"CorpGroupInfo","namespace":"com.linkedin.pegasus2avro.identity","fields":[{"name":"email","type":"string","doc":"email of this group"},{"name":"admins","type":{"type":"array","items":"string"},"doc":"owners of this group"},{"name":"members","type":{"type":"array","items":"string"},"doc":"List of ldap urn in this group."},{"name":"groups","type":{"type":"array","items":"string"},"doc":"List of groups in this group."}]}]},"doc":"The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"DataConceptSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"InstitutionalMemory","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"elements","type":{"type":"array","items":{"type":"record","name":"InstitutionalMemoryMetadata","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"url","type":"string","doc":"Link to an engineering design document or a wiki page."},{"name":"description","type":"string","doc":"Description of the link."},{"name":"createStamp","type":{"type":"record","name":"AuditStamp","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"time","type":"long","doc":"When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."},{"name":"actor","type":"string","doc":"The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change."},{"name":"impersonator","type":["null","string"],"doc":"The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.","default":null}]},"doc":"Audit stamp associated with creation of this record"}]}},"doc":"List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record."}]},{"type":"record","name":"Ownership","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"owners","type":{"type":"array","items":{"type":"record","name":"Owner","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"owner","type":"string","doc":"Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name"},{"name":"type","type":{"type":"enum","name":"OwnershipType","namespace":"com.linkedin.pegasus2avro.common","symbols":["DEVELOPER","SUBJECT_MATTER_EXPERT","DELEGATE","PRODUCER","CONSUMER","STAKEHOLDER"]},"doc":"The type of the ownership"},{"name":"source","type":["null",{"type":"record","name":"OwnershipSource","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"type","type":{"type":"enum","name":"OwnershipSourceType","namespace":"com.linkedin.pegasus2avro.common","symbols":["AUDIT","DATABASE","FILE_SYSTEM","ISSUE_TRACKING_SYSTEM","MANUAL","SERVICE","SOURCE_CONTROL","OTHER"]},"doc":"The type of the source"},{"name":"url","type":["null","string"],"doc":"A reference URL for the source","default":null}]}],"doc":"Source information for the ownership","default":null}]}},"doc":"List of owners of the entity."},{"name":"lastModified","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"Audit stamp containing who last modified the record and when.","default":null}]},{"type":"record","name":"DataConceptProperties","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"name","type":"string","doc":"Name of the Data Concept"},{"name":"tags","type":{"type":"array","items":"string"},"doc":"Tags associated with the Data Concept"},{"name":"lastUpdatedTimeStamp","type":"com.linkedin.pegasus2avro.common.AuditStamp","doc":"Audit stamp associated with modification of this record"},{"name":"description","type":"string","doc":"Description of the Data Concept"}]},{"type":"record","name":"RelatedEntities","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"createStamp","type":"com.linkedin.pegasus2avro.common.AuditStamp","doc":"Audit stamp associated with creation of this record"},{"name":"relatedDatasets","type":{"type":"array","items":{"type":"record","name":"RelatedDatasetEntity","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"datasetUrn","type":"string","doc":"Urn for the related Dataset entity"},{"name":"description","type":"string","doc":"Description of how this Dataset entity is related to this Data Concept."}]}},"doc":"List of datasets related to this data concept"},{"name":"relatedMetrics","type":{"type":"array","items":{"type":"record","name":"RelatedMetricEntity","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"metricUrn","type":"string","doc":"Urn for the related Metric entity"},{"name":"description","type":"string","doc":"Description of how this Metric entity is related to this Data Concept."}]}},"doc":"List of metrics related to this data concept"},{"name":"relatedFeatures","type":{"type":"array","items":{"type":"record","name":"RelatedFeatureEntity","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"featureUrn","type":"string","doc":"Urn for the related Feature entity"},{"name":"description","type":"string","doc":"Description of how this Feature entity is related to this Data Concept."}]}},"doc":"List of features related to this data concept"},{"name":"relatedInchartsDashboards","type":{"type":"array","items":{"type":"record","name":"RelatedInchartsDashboardEntity","namespace":"com.linkedin.pegasus2avro.dataConcept","fields":[{"name":"inchartsDashboardUrn","type":"string","doc":"Urn for the related Incharts Dashboard entity"},{"name":"description","type":"string","doc":"Description of how this Incharts Dashboard entity is related to this Data Concept."}]}},"doc":"List of Incharts dashboards related to this data concept"}]}]},"doc":"The list of metadata aspects associated with the data concept. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"DatasetGroupSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"DatasetGroupInfo","namespace":"com.linkedin.pegasus2avro.datasetGroup","fields":[{"name":"purpose","type":"string","doc":"Purpose for this dataset group"},{"name":"contact","type":"string","doc":"Person to contact regarding this group"}]},{"type":"record","name":"DatasetGroupMembership","namespace":"com.linkedin.pegasus2avro.datasetGroup","fields":[{"name":"datasets","type":{"type":"array","items":"string"},"doc":"The list of datasets that this dataset group contains"}]}]},"doc":"The list of metadata aspects associated with the dataset group. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"DatasetSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":["com.linkedin.pegasus2avro.common.InstitutionalMemory","com.linkedin.pegasus2avro.common.Ownership",{"type":"record","name":"OwnershipSuggestion","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"owners","type":{"type":"array","items":"com.linkedin.pegasus2avro.common.Owner"},"doc":"List of owners of the entity."}]},{"type":"record","name":"Status","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"removed","type":"boolean","doc":"whether the entity is removed or not","default":false}]},{"type":"record","name":"ComplianceInfo","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"originUrns","type":["null",{"type":"array","items":"string"}],"doc":"To present a list of origin (upstream) datasets for resolved compliance relationship like VIEW lineage type. i.e. multiple source datasets go to 1 or multiple dataset(s)","default":null},{"name":"complianceFields","type":{"type":"array","items":{"type":"record","name":"FieldCompliance","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"fieldPath","type":"string","doc":"Schema field path to identify a specific dataset field"},{"name":"dataType","type":{"type":"enum","name":"ComplianceDataType","namespace":"com.linkedin.pegasus2avro.dataset","symbols":["ADDRESS","ADVERTISER_ID","ARTICLE_ID","AUTHENTICATION_TOKEN","CITY_STATE_REGION","COMPANY_ID","CONTENT_TOPIC_ID","CONTRACT_ID","COOKIE_BEACON_BROWSER_ID","CUSTOM_ID","DATE_OF_BIRTH","DEVICE_ID_ADVERTISING_ID","ELEVATE_CONTRACT_ID","ELEVATE_SEAT_ID","EMAIL","ENTERPRISE_ACCOUNT_ID","ENTERPRISE_PROFILE_ID","EVENT_TIME","FINANCIAL_NUMBER","FREEFORMED_UGC","GROUP_ID","HANDLES","HIRE_STREAM_ID","INGESTED_CONTENT_ID","INTEREST_ID","IP_ADDRESS","JOB_ID","LATITUDE_LONGITUDE","LOGS_PII","LYNDA_MASTER_ADMIN_ID","LYNDA_USER_ID","MEMBER_ID","MEMBER_PHOTO","MESSAGE","MIXED_ID","NAME","NATIONAL_ID","NONE","OTHER_PII","PASSWORD_CREDENTIAL","PAYMENT_INFO","PHONE","PROFILE_URL","SALARY","SEAT_ID","SLIDESHARE_USER_ID","SOCIAL_NETWORK_ID","TRANSACTION_TIME","UGC_ID","UNSTRUCTURED_PII","ZUORA_ACCOUNT_ID"]},"doc":"The The business / semantic meaning or data type of data fields"},{"name":"fieldFormat","type":["null",{"type":"enum","name":"FieldFormat","namespace":"com.linkedin.pegasus2avro.dataset","symbols":["NUMERIC","URN","REVERSED_URN","COMPOSITE_URN","RAW","CUSTOM","ENCODED","HASHED"]}],"doc":"Specify the optional field format (go/gdpr-taxonomy). When data classification type is of ID type (MemberId, GroupId, CompanyId, CustomId, MixedId), the field format needs to be specified, otherwise it's optional","default":null},{"name":"valuePattern","type":["null","string"],"doc":"Optional pattern for the value. Required for CUSTOM fieldFormat","default":null},{"name":"nonOwner","type":["boolean","null"],"doc":"[Deprecated] use purgeKey instead. Set this field to true if the field doesn't owns the entire row/record","default":false},{"name":"purgeKey","type":["null","boolean"],"doc":"Set this to true if the field is the purge key of the entire row/record, and false otherwise. Applicable if compliance data type is of ID type.","default":null},{"name":"securityClassification","type":["null",{"type":"enum","name":"SecurityClassification","namespace":"com.linkedin.pegasus2avro.dataset","symbols":["HIGHLY_CONFIDENTIAL","CONFIDENTIAL","LIMITED_DISTRIBUTION","GENERAL","PUBLIC"]}],"doc":"Security classification that governs handling of this field (go/dht). Optional when dataTypes is NONE.","default":null},{"name":"providedByUser","type":["null","boolean"],"doc":"Whether this field contains data directly provided by LinkedIn users.","default":null},{"name":"containingPersonalData","type":["null","boolean"],"doc":"Whether this field contains personal data. This is always derived from dataType and should never be set explicitly.","default":null},{"name":"readonly","type":["null","boolean"],"doc":"Whether this field is system generated and thus readonly. This should never be set explicitly.","default":null}]}},"doc":"A complete list of dataset schema fields and their corresponding compliance information"},{"name":"datasetClassification","type":["null",{"type":"record","name":"DatasetClassification","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"connectionsOrFollowersOrFollowing","type":["null","boolean"],"doc":"Dataset contains information about member connections, followers or following","default":null},{"name":"profile","type":["null","boolean"],"doc":"Dataset contains member profile data","default":null},{"name":"messaging","type":["null","boolean"],"doc":"Dataset contains messaging data, including both the message content and metadata (sender, receiver, time, etc)","default":null},{"name":"thirdPartyIntegrationsInUse","type":["null","boolean"],"doc":"Dataset contains third party integration usage data","default":null},{"name":"activity","type":["null","boolean"],"doc":"Dataset contains member activity information that is viewable to other members (newsfeed posts, blog posts, shares, likes, etc)","default":null},{"name":"settings","type":["null","boolean"],"doc":"Dataset contains member personal settings","default":null},{"name":"jobApplicationFlow","type":["null","boolean"],"doc":"Dataset contains job application flow information: job applications, resumes, application status, etc","default":null},{"name":"enterpriseProduct","type":["null","boolean"],"doc":"Dataset contains enterprise product data: recruiter search data, sales navigator data, etc","default":null},{"name":"accountStatus","type":["null","boolean"],"doc":"Dataset contains member account status: premium, sales navigator user, etc","default":null},{"name":"addressBookImports","type":["null","boolean"],"doc":"Dataset contains address book import data or data derived from imported address book data","default":null},{"name":"microsoftData","type":["null","boolean"],"doc":"Dataset contains data coming over from Microsoft","default":null},{"name":"subsidiaryData","type":["null","boolean"],"doc":"Dataset contains data from companies LinkedIn acquired (Lynda, Slideshare, etc)","default":null},{"name":"otherThirdPartyIntegrations","type":["null","boolean"],"doc":"Dataset contains other third party integrations: cell phone uploads, etc","default":null},{"name":"device","type":["null","boolean"],"doc":"Dataset contains member device information: IP address, device ID, browser ID, etc","default":null},{"name":"searchHistory","type":["null","boolean"],"doc":"Dataset contains search history on LinkedIn platforms","default":null},{"name":"courseViewingHistory","type":["null","boolean"],"doc":"Dataset contains course viewing history on LinkedIn Learning","default":null},{"name":"whoViewedMyProfile","type":["null","boolean"],"doc":"Dataset contains data that is ingested/created/inferred/derived in relation to providing the WVMP feature","default":null},{"name":"profileViewsByMe","type":["null","boolean"],"doc":"Dataset contains data regarding which profiles a member viewed (member IDs for the other profiles, referring URLs, etc)","default":null},{"name":"advertising","type":["null","boolean"],"doc":"Dataset contains advertising data: data from advertising partners, ad clicks/views, etc","default":null},{"name":"usageOrErrorOrConnectivity","type":["null","boolean"],"doc":"Dataset contains member usage, error reporting or connectivity data","default":null},{"name":"otherClickstreamOrBrowsingData","type":["null","boolean"],"doc":"Dataset contains other click-related data that doesn't fit into a bucket above.","default":null},{"name":"employeeData","type":["null","boolean"],"doc":"Dataset contains only employee data.","default":null}]}],"doc":"Dataset level description of whether the dataset contains certain types of information","default":null},{"name":"datasetConfidentiality","type":["null","com.linkedin.pegasus2avro.dataset.SecurityClassification"],"doc":"Overall dataset confidentiality or security classification. Classification is derived from the list of sensitive fields spec.","default":null},{"name":"containingPersonalData","type":["null","boolean"],"doc":"Whether this dataset contains personal data. This can be explicitly set for schemaless system or derived from complianceFields.","default":null},{"name":"lastModified","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"Audit stamp containing who last modified the record and when.","default":null}]},{"type":"record","name":"ComplianceInfoSuggestion","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"suggestedContainingPersonalData","type":["boolean","null"],"doc":"Suggestion for whether this dataset contains personal data. This is derived from suggestedFieldClassifications.","default":false},{"name":"suggestedFieldClassifications","type":{"type":"array","items":{"type":"record","name":"SuggestedFieldClassification","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"suggestion","type":"com.linkedin.pegasus2avro.dataset.FieldCompliance","doc":"Suggestion for the field level compliance metadata."},{"name":"confidenceLevel","type":"float","doc":"The confidence level for this suggestion. The range is [0.0, 1.0]"},{"name":"uid","type":["null","string"],"doc":"Unique identifier of this suggestion","default":null}]}},"doc":"A list of suggested field-level compliance metadata."}]},{"type":"record","name":"DataValidationConfig","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"configName","type":"string","doc":"Configuration name"},{"name":"configVersion","type":"string","doc":"Configuration version specified by user, to be used for internal documentation purposes by the user; this field is not used by Data Sentinel","default":"0.0.1"},{"name":"columnDefinitions","type":{"type":"array","items":{"type":"record","name":"ColumnDefinition","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"Definition name"},{"name":"columnPath","type":"string","doc":"Path to the (nested) column, where null must be encoded as an empty string","default":""},{"name":"udfPath","type":"string","doc":"Fully qualified class name of the UDF. Must extend appropriate trait from com.linkedin.pegasus2avro.datasentinel.core.udf, where null must be encoded as an empty string","default":""},{"name":"sqlExpr","type":"string","doc":"SQL expression from original DataFrame or columnPath DataFrame, where null must be encoded as an empty string","default":""},{"name":"computeQuantiles","type":"boolean","doc":"Indicates if quantile values are to be computed; default is false; if computed, we compute min(1000, recordCount) quantile values","default":false},{"name":"computeFrequentValues","type":"boolean","doc":"Indicates if frequent values are to be computed; default is false; if computed, we compute min(100, record count) most frequent values","default":false},{"name":"minFrequency","type":"long","doc":"Indicates the minimum count frequency (at least 1000) of the column definition value in order for the value's frequent count to be computed; if minimum count frequency < 1000, compute min(100, record count) most frequent values","default":0},{"name":"explode","type":"boolean","doc":"Assuming the column values are collections of primitive types, indicates whether all collections are to be unrolled for frequent values to be computed","default":false},{"name":"pii","type":"boolean","doc":"Indicates if this column definition contains PII; default is false; if true, we strip out stats and assertions that could contain PII before pushing the data out of HDFS","default":false},{"name":"groupByInfo","type":{"type":"array","items":{"type":"record","name":"GroupByDefinition","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"The column definition to group on"},{"name":"definitionValues","type":{"type":"array","items":"string"},"doc":"List of values of interest for this column definition; an empty list implies all values are of interest","default":[]}]}},"doc":"The column definitions to group by","default":[]},{"name":"constraints","type":{"type":"array","items":{"type":"record","name":"DataAssertion","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"dataAssertionName","type":"string","doc":"Data assertion name"},{"name":"dataAssertionDescription","type":"string","doc":"Data assertion description to provide context, where null must be encoded as an empty string","default":""},{"name":"consumerAssertion","type":"boolean","doc":"Indicates if assertion is not supported by data stewards","default":false},{"name":"dataAssertionTags","type":"string","doc":"Comma separated tags to allow users to categorize data assertions, where null must be encoded as an empty string","default":""},{"name":"dataAssertionType","type":{"type":"enum","name":"DataAssertionType","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","symbols":["DEFINITION_ALL_TRUE","DEFINITION_NOT_ALL_TRUE","DEFINITION_ALL_FALSE","DEFINITION_NOT_ALL_FALSE","DEFINITION_ALL_NULLS","DEFINITION_NOT_ALL_NULLS","DEFINITION_EXCLUDE_NULLS","DEFINITION_VALUES_ARE_NUMERIC","DEFINITION_NOT_ALL_ZEROS","DEFINITION_NOT_ALL_POSITIVES","DEFINITION_NOT_ALL_NEGATIVES","DEFINITION_VALUES_IN_RANGE","DEFINITION_METRIC_IN_RANGE","DEFINITION_METRICS_RATIO_BOUNDED","DEFINITION_VALUES_ARE_DISTINCT","DEFINITION_VALUES_NOT_IDENTICAL","DEFINITION_MATCHES_REGEX","DEFINITION_DOES_NOT_MATCH_REGEX","DEFINITION_NOT_EMPTY","DEFINITION_NOT_ALL_EMPTY","DEFINITION_EXCLUDE_VALUES","DEFINITION_INCLUDE_VALUES","DEFINITION_ENUMERATE_VALUES","CHECK_DATASET_TIMESTAMP","COMPARE_SCHEMAS","COMPARE_RECORD_COUNTS","COMPARE_DATA_VOLUMES","COMPARE_DEFINITION_METRICS","COMPARE_DISTRIBUTIONS","COMPARE_FREQUENT_VALUES"]},"doc":"Data assertion type"},{"name":"dataAssertionParameters","type":{"type":"array","items":{"type":"record","name":"DataAssertionParameter","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"dataAssertionParameterType","type":{"type":"enum","name":"DataAssertionParameterType","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","symbols":["DEFINITION_NAME","DEFINITION_METRIC","DEFINITION_METRIC_INFO","DEFINITION_VALUES","REGEX","IGNORE_CASE","NO_TRIM_SPACE","SIGNIFICANCE_LEVEL","LOWER_BOUND_FRACTION","UPPER_BOUND_FRACTION","LOWER_BOUND_OF_RANGE","UPPER_BOUND_OF_RANGE","DURATION_DAYS","DURATION_HOURS","IGNORE_DOCSTRINGS","NEGATE_ASSERTION","DISABLE_ASSERTION","DISABLE_ASSERTION_ON_VERSION_CHANGE","MAX_FAILURE_COUNT","MAX_FAILURE_FRACTION","ABORT_ON_FAILURE","ALERT_ON_FAILURE","SAMPLE_ON_FAILURE","TOP_MATCHES","INSPECT_ELEMENTS","ELEMENT_INFO","DISTRIBUTION_TEST","FAIL_ON_DEPRECATED_VALUES","IGNORE_NEW_VALUES","VALUE_COUNT_CHANGE_THRESHOLD","VALUE_COUNT_INCREASE_THRESHOLD","CHURN_THRESHOLD"]},"doc":"Data assertion parameter type"},{"name":"dataAssertionParameterValues","type":{"type":"array","items":"string"},"doc":"Data assertion parameter values","default":[]},{"name":"groupByInfo","type":{"type":"array","items":{"type":"record","name":"GroupByInfoDefinitionNameValuePair","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"The column definition that the definition has been grouped on"},{"name":"definitionValue","type":"string","doc":"The value of interest for this GROUP BY column definition"}]}},"doc":"List of GROUP BY definition name-value pairs this definition has been filtered by","default":[]},{"name":"definitionMetricInfoValues","type":{"type":"array","items":{"type":"record","name":"DefinitionMetricInfo","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"Definition name of referenced column definition"},{"name":"groupByInfo","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datasentinel.GroupByInfoDefinitionNameValuePair"},"doc":"List of GROUP BY definition name-value pairs this definition has been filtered by","default":[]},{"name":"definitionMetric","type":{"type":"enum","name":"DefinitionMetricType","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","symbols":["COUNT_DISTINCT","COUNT_EMPTY","COUNT_FALSE","COUNT_NEGATIVE","COUNT_NOTNULL","COUNT_NULL","COUNT_NUMERIC","COUNT_POSITIVE","COUNT_TOTAL","COUNT_TRUE","COUNT_ZERO","MIN","MAX","MEAN","STD_DEV","SKEWNESS","KURTOSIS","SUM","MEDIAN","PERCENTILE_99_5","PERCENTILE_99","PERCENTILE_95","PERCENTILE_90","PERCENTILE_75","PERCENTILE_25","IQR","MAD"]},"doc":"Metric for column definition referenced in definitionName field"}]}},"doc":"Information about the column definitions metrics and to use for the assertion","default":[]},{"name":"elementInfo","type":["null",{"type":"record","name":"ElementInfo","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"collectionEvaluation","type":{"type":"enum","name":"CollectionEvaluationType","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","symbols":["ALL_TRUE","ALL_FALSE","ANY_TRUE","ANY_FALSE"]},"doc":"If the referenced column definition is a collection of records, specifies evaluation semantics (e.g. ALL_TRUE, ALL_FALSE, ANY_TRUE, ANY_FALSE)","default":"ALL_TRUE"},{"name":"elementColumnPath","type":"string","doc":"If the referenced column definition is a collection of records, specifies path (within record) to element, where null must be encoded as an empty string","default":""}]}],"doc":"Information about whether and how to evaluate column definitions of type collection","default":null}]}},"doc":"Information about data assertion parameters","default":[]}]}},"doc":"The data assertions applied to this column definition","default":[]}]}},"doc":"Definition set containing path expressions & UDFs used in the config. Metrics are computed for all definitions."},{"name":"dataAssertions","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datasentinel.DataAssertion"},"doc":"Set containing names, types, and parameters for data assertions","default":[]},{"name":"sliceDefinitions","type":{"type":"array","items":{"type":"record","name":"SliceDefinition","namespace":"com.linkedin.pegasus2avro.dataset.datasentinel","fields":[{"name":"sliceName","type":"string","doc":"Slice name"},{"name":"udfPath","type":"string","doc":"Fully qualified class name of the UDF. Must extend appropriate trait from com.linkedin.pegasus2avro.datasentinel.core.udf, where null must be encoded as an empty string","default":""},{"name":"sqlExpr","type":"string","doc":"SQL expression that filters records from original DataFrame, in other words what would be a SQL WHERE expression, where null must be encoded as an empty string","default":""}]}},"doc":"Set containing UDFs or SQL expressions for slices","default":[]}]},{"type":"record","name":"DatasetProperties","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"description","type":["null","string"],"doc":"Documentation of the dataset","default":null},{"name":"uri","type":["null","string"],"doc":"The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).","default":null},{"name":"tags","type":{"type":"array","items":"string"},"doc":"tags for the dataset","default":[]},{"name":"customProperties","type":{"type":"map","values":"string"},"doc":"A key-value map to capture any other non-standardized properties for the dataset","default":{}}]},{"type":"record","name":"ExportPolicy","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"containsUserGeneratedContent","type":"boolean","doc":"Indicates that the dataset contains data directly created by the member"},{"name":"containsUserActionGeneratedContent","type":"boolean","doc":"Indicates that the dataset contains data created as a result of direct member action on the site"},{"name":"containsUserDerivedContent","type":"boolean","doc":"Indicates that the dataset contains data owned by the member but not directly created by the member or due to member actions on the site"},{"name":"dataset","type":["null","string"],"doc":"[Deprecated] Do NOT use. Dataset this export policy is associated with.","default":null},{"name":"version","type":["long","null"],"doc":"[Deprecated] Do NOT use. The version of the export policy.","default":0},{"name":"created","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"[Deprecated] Do NOT use.","default":null},{"name":"lastModified","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"Audit stamp containing who last modified the record and when.","default":null},{"name":"deleted","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"[Deprecated] Do NOT use.","default":null}]},{"type":"record","name":"ReplicationConfig","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"gobblinConfig","type":["null","string"],"doc":"The raw gobblin HOCON configuration as a string","default":null}]},{"type":"record","name":"RetentionPolicy","namespace":"com.linkedin.pegasus2avro.dataset","fields":[{"name":"purgeType","type":{"type":"enum","name":"CompliancePurgeType","namespace":"com.linkedin.pegasus2avro.dataset","symbols":["AUTO_PURGE","MANUAL_PURGE","LIMITED_RETENTION","LIMITED_RETENTION_WITH_LOCKING","MANUAL_LIMITED_RETENTION","PURGE_NOT_APPLICABLE","PURGE_EXEMPTED"]},"doc":"Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies."},{"name":"purgeNote","type":["null","string"],"doc":"The additional information about purging if the purge type is PURGE_EXEMPTED","default":null},{"name":"lastModified","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"Audit stamp containing who last modified the record and when.","default":null}]},{"type":"record","name":"UMPDatasetProperties","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"name","type":"string","doc":"Name of the UMP dataset"},{"name":"description","type":"string","doc":"Documentation of the UMP dataset"},{"name":"bucket","type":"string","doc":"Relevant bucket of the metric"},{"name":"domain","type":"string","doc":"Domain name of the metric. One domain contains several buckets"},{"name":"tags","type":{"type":"array","items":"string"},"doc":"Tags associated with the UMP dataset this metric was obtained from","default":[]},{"name":"datasetClassifications","type":{"type":"array","items":"string"},"doc":"List of dataset classifications associated with the UMP dataset this metric was obtained from","default":[]},{"name":"timeSeries","type":"boolean","doc":"Indicates whether the UMP dataset is date partitioned or snapshot","default":false},{"name":"schedules","type":{"type":"array","items":{"type":"record","name":"ScheduleProperties","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"value","type":"string","doc":"Schedule time when the flow is run"},{"name":"cluster","type":{"type":"enum","name":"HadoopClusterType","namespace":"com.linkedin.pegasus2avro.common","symbols":["HOLDEM","WAR","FARO"]},"doc":"Cluster qualifier for this schedule"}]}},"doc":"List of schedule properties describing the execution schedules of the dataset","default":[]},{"name":"downstreamApps","type":{"type":"array","items":{"type":"record","name":"DownstreamAppData","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"downstreamAppName","type":{"type":"enum","name":"MetricDownstreamAppType","namespace":"com.linkedin.pegasus2avro.metric.ump","symbols":["RAPTOR","THIRDEYE","XLNT","PINOT","VOLDEMORT"]},"doc":"Type of the downstream app"},{"name":"downstreamContext","type":{"type":"record","name":"UMPDatasetDownstreamAppInfo","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"params","type":["null",{"type":"map","values":["string","int"]}],"doc":"Downstream app parameters","default":null},{"name":"attributes","type":["null",{"type":"array","items":"string"}],"doc":"Attributes to be onboarded on the downstream app","default":null},{"name":"topKDimensions","type":["null",{"type":"map","values":{"type":"array","items":{"type":"record","name":"UMPTopKMetric","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"metric","type":"string","doc":"Metric name associated with topK metric"},{"name":"value","type":"string","doc":"Metric value associated with topK metric"}]}}}],"doc":"Top K dimensions associated with the downstream app","default":null},{"name":"topKThresholdMetrics","type":["null",{"type":"array","items":"com.linkedin.pegasus2avro.dataset.ump.UMPTopKMetric"}],"doc":"Top K threshold metrics associated with downstream app","default":null},{"name":"disabledMetrics","type":["null",{"type":"array","items":"string"}],"doc":"Disabled metrics associated with downstream app","default":null},{"name":"dimensionValues","type":["null",{"type":"map","values":{"type":"array","items":"string"}}],"doc":"Dimensions to add along with the cardinality. This is if you wish to onboard metric on Raptor. Currently, UMP only supports adding dimensions for the member attribute in your data","default":null},{"name":"thirdEye","type":["null","boolean"],"doc":"Flag to indicate if the metric is onboarded on Third eye","default":null},{"name":"groupingSets","type":["null",{"type":"array","items":{"type":"array","items":"string"}}],"doc":"Custom grouping sets against which non additive metrics needs to be computed","default":null},{"name":"rollUps","type":["null",{"type":"array","items":{"type":"array","items":"string"}}],"doc":"Rollups provides a way to slice and dice across the hierarchy of dimensions","default":null},{"name":"voldemortBNPJobs","type":["null",{"type":"array","items":{"type":"record","name":"UMPDatasetVoldemortBNPJob","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"name","type":"string","doc":"Name of Voldemort BNP job"},{"name":"dataFile","type":"string","doc":"Name of the datafile associated with Voldemort BNP job"},{"name":"avroKeyField","type":"string","doc":"AVRO key field name"},{"name":"avroValueField","type":"string","doc":"AVRO field value"},{"name":"pushCluster","type":"string","doc":"Voldemort cluster to push"},{"name":"pushStoreName","type":"string","doc":"Voldemort store name"},{"name":"pushStoreDesc","type":"string","doc":"Voldemort store description"},{"name":"pushStoreOwners","type":"string","doc":"Voldemort store owners"}]}}],"doc":"Voldemort BuildAndPush jobs that are responsible for pushing UMP's datafile output to Voldemort","default":null},{"name":"nonAdditiveCubeFilters","type":["null",{"type":"map","values":"string"}],"doc":"Non additive cube filters associated with the downstream app","default":null}]},"doc":"Metadata associated with the downstream app"}]}},"doc":"List of supported downstream apps where the metrics are visualized","default":[]},{"name":"metrics","type":{"type":"array","items":"string"},"doc":"List of metrics associated with the UMP dataset","default":[]},{"name":"frequency","type":{"type":"array","items":{"type":"enum","name":"MetricFrequencyType","namespace":"com.linkedin.pegasus2avro.metric","symbols":["REALTIME","HOURLY","DAILY","WEEKLY","MONTHLY"]}},"doc":"Indicates how frequently metrics are generated from the UMP dataset","default":["DAILY"]},{"name":"highPriority","type":"boolean","doc":"Indicates whether this UMP dataset is onboarded to high priority (HP) queue (refer to go/dfsreonboarding for guidelines)","default":false},{"name":"dataLocations","type":{"type":"array","items":{"type":"record","name":"LocationProperties","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"platform","type":"string","doc":"Standardized platform urn where generated metrics' output data is stored/located"},{"name":"dataset","type":"string","doc":"Display name/path of the actual dataset where generated metrics output data is stored/located as specified by the user. Path can contain snapshot info like #LATEST which is captured by this field"},{"name":"datasetUrn","type":"string","doc":"Dataset Urn corresponding to data location. This is used for linking to the underlying Dataset entity."},{"name":"cluster","type":["null","com.linkedin.pegasus2avro.common.HadoopClusterType"],"doc":"Cluster on which the generated metrics' output data is located. Among the two fields cluster and fabric, only one should be set","default":null},{"name":"fabric","type":["null",{"type":"enum","name":"FabricType","namespace":"com.linkedin.pegasus2avro.common","symbols":["DEV","EI","PROD","CORP","LIT","PRIME","MANDA","AZURECONTROL","AZUREPROD","AZUREEI"]}],"doc":"Fabric on which the generated metrics' output data is located. Among the two fields cluster and fabric, only one should be set","default":null},{"name":"retention","type":"string","doc":"Retention of generated metrics' output storage. This could be in days or hours"},{"name":"frequency","type":"com.linkedin.pegasus2avro.metric.MetricFrequencyType","doc":"Frequency type that qualifies this data location","default":"DAILY"}]}},"doc":"List of locations where the output data of generated metrics is stored","default":[]},{"name":"dataInput","type":["null",{"type":"record","name":"UMPDataInput","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"sources","type":{"type":"array","items":{"type":"record","name":"SourceInfo","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"name","type":"string","doc":"Name of the input dataset, as defined in dataset.conf"},{"name":"path","type":"string","doc":"Display name or path of the dataset as specified in dataset.conf."},{"name":"platform","type":"string","doc":"Standardized platform urn where this input dataset is defined"},{"name":"datasetUrn","type":"string","doc":"Dataset Urn corresponding to this input. This is used for linking to the underlying Dataset entity."},{"name":"frequency","type":"com.linkedin.pegasus2avro.metric.MetricFrequencyType","doc":"Frequency type that qualifies this metric source","default":"DAILY"},{"name":"cluster","type":["null","com.linkedin.pegasus2avro.common.HadoopClusterType"],"doc":"Cluster on which this input dataset is defined. Among the two fields cluster and fabric, only one should be set","default":null},{"name":"fabric","type":["null","com.linkedin.pegasus2avro.common.FabricType"],"doc":"Fabric on which this input dataset is defined. Among the two fields cluster and fabric, only one should be set","default":null},{"name":"range","type":"int","doc":"Range of the input data source","default":1},{"name":"metaHiveTable","type":["null","string"],"doc":"Hive table associated with the input data source","default":null},{"name":"window","type":"int","doc":"Window is the number of days of data that you write","default":1}]}},"doc":"List of input data sources along with their paths and platforms","default":[]}]}],"doc":"Input data sources to the UMP dataset used in metric computation. This is the union of all input sources used in the computation of all metrics associated with this UMP dataset","default":null},{"name":"dimensions","type":{"type":"array","items":{"type":"record","name":"DimensionProperties","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"name","type":"string","doc":"Name of the field"},{"name":"description","type":"string","doc":"Description of the field"},{"name":"fieldFormat","type":["null","com.linkedin.pegasus2avro.dataset.FieldFormat"],"doc":"Format associated with the field","default":null},{"name":"complianceType","type":"com.linkedin.pegasus2avro.dataset.ComplianceDataType","doc":"Compliance type associated with the field"},{"name":"freq","type":"com.linkedin.pegasus2avro.metric.MetricFrequencyType","doc":"Frequency type that qualifies this dimension","default":"DAILY"}]}},"doc":"Dimensions associated with UMP dataset. Dimensions are the context under which the measurements were made (e.g. device type - iphone vs android vs destop, pageKey). The list of dimensions here must be the same as or a subset of the dimensions specified at the root dataset level","default":[]},{"name":"code","type":{"type":"record","name":"UMPMetricCode","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"directory","type":{"type":"record","name":"CodeInfo","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"name","type":"string","doc":"Name of the file/directory where the code is located e.g. metric-defs/provider/src/hp/careers_sa/careers/dataset.conf"},{"name":"codeSearchUrl","type":"string","doc":"Link to the codesearch location where the file/directory is located e.g. https://jarvis.corp.linkedin.com/codesearch/result/?name=dataset.conf&path=metric-defs%2Fprovider%2Fsrc%2Fhp%2Fcareers_sa%2Fcareers&reponame=metrics%2Fmetric-defs"},{"name":"changeLogUrl","type":"string","doc":"Link to the change log of the file/directory e.g. https://git.corp.linkedin.com:1367/a/plugins/gitiles/metrics/metric-defs/+log/master/provider/src/hp/careers_sa/careers/dataset.conf"},{"name":"gitUrl","type":"string","doc":"Link to the git location where the file/directory is located e.g. https://git.corp.linkedin.com:1367/a/plugins/gitiles/metrics/metric-defs/+/master/provider/src/hp/careers_sa/careers/dataset.conf"}]},"doc":"Directory info inside which the source code for metric computation is kept"},{"name":"files","type":{"type":"array","items":"com.linkedin.pegasus2avro.metric.ump.CodeInfo"},"doc":"Files inside the code directory which includes the pig script, source code, etc","default":[]},{"name":"aclOwners","type":{"type":"array","items":"string"},"doc":"List of ACL owners of the code directory","default":[]}]},"doc":"Code metadata associated with the UMP dataset"},{"name":"executionFlows","type":{"type":"array","items":{"type":"record","name":"UMPDatasetExecutionFlowData","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"flowType","type":{"type":"enum","name":"UMPDatasetAzkabanFlowType","namespace":"com.linkedin.pegasus2avro.dataset.ump","symbols":["PRODUCTION","BACKFILL"]},"doc":"Azkaban project type"},{"name":"flowURLs","type":{"type":"array","items":{"type":"record","name":"UMPDatasetAzkabanFlowInfo","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"workflowUrl","type":"string","doc":"Azkaban workflow link"},{"name":"projectName","type":"string","doc":"Display name of the Azkaban project"}]}},"doc":"Details about UMP dataset Azkaban workflow links"}]}},"doc":"Execution workflows of UMP dataset"},{"name":"enabled","type":"boolean","doc":"Flag to indicate if the UMP dataset is enabled/active or not","default":true},{"name":"artifactACL","type":["null",{"type":"record","name":"ArtifactACL","namespace":"com.linkedin.pegasus2avro.dataset.ump","fields":[{"name":"preApproved","type":{"type":"array","items":"string"},"doc":"This list contains end user ldap accounts. Users in this list get automatic approval for JIT access","default":[]},{"name":"prod","type":{"type":"array","items":"string"},"doc":"This list contains services/applications (thirdeye-controller, raptor-frontend, etc) or grid headless accounts which can access data associated with UMP dataset","default":[]}]}],"doc":"This field controls who can access output of UMP dataset in Pinot and HDFS/Hive. There are two lists in artifact_acl: prod and pre_approved.","default":null}]},"com.linkedin.pegasus2avro.dataset.datasentinel.ColumnDefinition","com.linkedin.pegasus2avro.dataset.datasentinel.DataAssertion","com.linkedin.pegasus2avro.dataset.datasentinel.SliceDefinition"]},"doc":"The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"DatasetInstanceSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for specific dataset instance."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"DatasetInstanceAlert","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"alerts","type":{"type":"array","items":{"type":"record","name":"DataFidelityAlertDetails","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"alertInstanceId","type":"int","doc":"the id of the alert instance"},{"name":"alertConfigurationId","type":"int","doc":"the foreign key to the configuration for this alert"},{"name":"alertType","type":{"type":"enum","name":"AlterTypeEnum","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","symbols":["MISSING_PARTITION","SLA_VIOLATION","THRESHOLD_VIOLATION"]},"doc":"type of alert"},{"name":"alertMessage","type":"string","doc":"a description of the alert"},{"name":"severity","type":{"type":"enum","name":"SeverityEnum","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","symbols":["LOW","MEDIUM","HIGH"]},"doc":"indicate how severe the alert is"},{"name":"created","type":"com.linkedin.pegasus2avro.common.AuditStamp","doc":"the epoch time in millisecond indicating when this alert is created"},{"name":"ownedPerson","type":"string","doc":"the person who configured the definition of this alert. e.g. urn:li:Corpuser:chguo"}]}},"doc":"all open alerts at dataset level for current dataset instance"},{"name":"latestAlerts","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datafidelity.DataFidelityAlertDetails"},"doc":"latest 10 alerts, at both instance level and partition level, for current dataset. In terms of snapshot tables, it only keeps records for the latest snapshot."}]},{"type":"record","name":"DatasetInstanceStats","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"numRows","type":"long","doc":"For partition tables, it's the number of rows for all partitions of this dataset. For snapshot tables, it's for the latest snapshot of this dataset."},{"name":"numFiles","type":"long","doc":"For partition tables, it's the number of files for current dataset. For snapshot tables, it's for the latest snapshot of this dataset."},{"name":"numBytes","type":"long","doc":"For partition tables, it's the data file size for current dataset. For snapshot tables, it's for the latest snapshot of this dataset."},{"name":"dailyUsageStats","type":{"type":"array","items":{"type":"record","name":"DataFidelityUsageStats","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"startAt","type":"long","doc":"the inclusive start time for current range"},{"name":"endAt","type":"long","doc":"the exclusive end time for current range"},{"name":"timeGranularity","type":{"type":"enum","name":"TimeGranularityEnum","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","symbols":["HOUR","DAY","WEEK","MONTH"]},"doc":"the granularity of startTime and endTime"},{"name":"totalQueries","type":"int","doc":"count of queries executed on a dataset within the specified range [startTime, endTime)"},{"name":"totalUniqueUsers","type":"int","doc":"count of unique users within the specified range [startTime, endTime)"}]}},"doc":"daily usage statistics for the past 7 days"},{"name":"weeklyUsageStats","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datafidelity.DataFidelityUsageStats"},"doc":"weekly usage statistics for the past 4 weeks"},{"name":"createdAt","type":"long","doc":"the epoch time when this statistic is created"},{"name":"currentHighWatermark","type":["null","long"],"doc":"the current high watermark when the statistics are taken","default":null},{"name":"highWatermarkColumn","type":["null","string"],"doc":"the column used as the watermark","default":null}]},{"type":"record","name":"WIMDReport","namespace":"com.linkedin.pegasus2avro.datasetInstance.wimd","fields":[{"name":"hourlyMetric","type":["null",{"type":"record","name":"WIMDMetric","namespace":"com.linkedin.pegasus2avro.datasetInstance.wimd","fields":[{"name":"freshness","type":["null","double"],"doc":"freshness of the dataset. Represents how fresh the data w.r.t to SLA defined for a dataset. Optional when freshness cannot be computed","default":null},{"name":"lastPublishedAt","type":"long","doc":"last published timestamp of the dataset instance and partition type"},{"name":"partitionReports","type":{"type":"array","items":{"type":"record","name":"DatasetPartitionReport","namespace":"com.linkedin.pegasus2avro.datasetInstance.wimd","fields":[{"name":"datasetPartition","type":"string","doc":"URN for specific dataset partition."},{"name":"wimdURL","type":"string","doc":"URL for the partition WIMD is referring to. This link points to the WIMD UI of this dataset partition"},{"name":"createdAt","type":"long","doc":"Timestamp of the current status of the partition."},{"name":"version","type":"long","doc":"version of dataset partition. Represents the number of times the current dataset partition is altered after it's creation. First version starts from 1. This is counted inside WIMD"},{"name":"availabilityStatus","type":{"type":"enum","name":"PartitionAvailabilityStatus","namespace":"com.linkedin.pegasus2avro.datasetInstance.wimd","symbols":["NOT_PUBLISHED","PUBLISHED","REMOVED"]},"doc":"Status of Dataset Partition. eg., PUBLISHED, REMOVED"},{"name":"publisher","type":{"type":"enum","name":"PlatformName","namespace":"com.linkedin.events.common.datamonitor","symbols":["UMP","GOBBLIN","TRACKING","FETL","XLNT","GDPR_OBFUSCATION","GOBBLIN_GDPR_PURGE","SCANNER","AUDIT"]},"doc":"The name of the publisher who produced this dataset partition. e.g., UMP, PURGE, OBUFSCATION, XLNT"}]}},"doc":"partitions and their properties reported by WIMD"},{"name":"wimdURL","type":"string","doc":"URL of the partition WIMD is referring to. This url points to the WIMD UI of this dataset partition group"}]}],"doc":"Metrics computed for partitions at hourly granularity","default":null},{"name":"dailyMetric","type":["null","com.linkedin.pegasus2avro.datasetInstance.wimd.WIMDMetric"],"doc":"Metrics computed for partitions at daily granularity","default":null},{"name":"weeklyMetric","type":["null","com.linkedin.pegasus2avro.datasetInstance.wimd.WIMDMetric"],"doc":"Metrics computed for partitions at weekly granularity","default":null},{"name":"biWeeklyMetric","type":["null","com.linkedin.pegasus2avro.datasetInstance.wimd.WIMDMetric"],"doc":"Metrics computed for partitions at bi-weekly granularity","default":null},{"name":"monthlyMetric","type":["null","com.linkedin.pegasus2avro.datasetInstance.wimd.WIMDMetric"],"doc":"Metrics computed for partitions at monthly granularity","default":null}]}]},"doc":"The list of metadata aspects associated with the dataset instance. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"DatasetPartitionSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for specific dataset partition."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"DataValidationReports","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataValidationReports","type":{"type":"array","items":{"type":"record","name":"DataValidationReport","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataSentinelVersion","type":"string","doc":"Data Sentinel version","default":"0.6.0"},{"name":"dataSentinelReportVersion","type":"string","doc":"Data Sentinel report version","default":"0.4.0"},{"name":"datasetVersion","type":"string","doc":"Dataset version","default":"0.0.1"},{"name":"reportSummary","type":{"type":"record","name":"DataAssertionResultsSummary","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"numberAssertionsPassed","type":"int","doc":"Number of assertions (PASS)","default":0},{"name":"numberAssertionsFailed","type":"int","doc":"Number of assertions (FAIL)","default":0},{"name":"numberAssertionsFailedWithSampleOnFailure","type":"int","doc":"Number of assertions (FAIL) and sampleOnFailure flag set","default":0},{"name":"numberAssertionsFailedWithAbortOnFailure","type":"int","doc":"Number of assertions (FAIL) and abortOnFailure flag set","default":0},{"name":"numberAssertionsFailedWithAlertOnFailure","type":"int","doc":"Number of assertions (FAIL) and alertOnFailure flag set","default":0},{"name":"numberAssertionsUnknown","type":"int","doc":"Number of assertions (UNKNOWN)","default":0}]},"doc":"Data validation report summary"},{"name":"dataValidationConfig","type":"com.linkedin.pegasus2avro.dataset.datasentinel.DataValidationConfig","doc":"Data validation configuration"},{"name":"sliceName","type":"string","doc":"Name of the slice used on the dataset"},{"name":"groupBySummary","type":{"type":"array","items":{"type":"record","name":"GroupBySummary","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"The column definition whose GROUP BY summary this is"},{"name":"groupByInfo","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datasentinel.GroupByDefinition"},"doc":"The GROUP BY info for the above column definition, with definitionValues populated","default":[]}]}},"doc":"GROUP BY summary for the column definitions specified in the validation config","default":[]},{"name":"priorProfilePath","type":"string","doc":"Prior profile path"},{"name":"newDataPathOriginal","type":"string","doc":"New data path (as originally specified)"},{"name":"newDataPath","type":"string","doc":"New data path (after resolution of #LATEST modifiers)"},{"name":"workflowDetails","type":{"type":"record","name":"WorkflowDetails","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataCenter","type":"string","doc":"Data center in which the flow was run"},{"name":"cluster","type":"string","doc":"Hadoop cluster on which the flow was run"},{"name":"gateway","type":"string","doc":"Gateway through which the flow was run"},{"name":"projectName","type":"string","doc":"Name of the Azkaban project that the flow belongs to"},{"name":"parentWorkflowName","type":"string","doc":"Name of the workflow in which the Data Sentinel flow is present"},{"name":"dataSentinelWorkflowName","type":"string","doc":"Name of the Data Sentinel workflow"},{"name":"jobExecutionUrl","type":"string","doc":"Azkaban URL (with attempt ID) for the Data Sentinel Spark job"}]},"doc":"Details about the workflow that ran Data Sentinel"},{"name":"dataSpecificationConfig","type":{"type":"record","name":"DataSpecificationConfig","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataFormat","type":{"type":"enum","name":"DataFormat","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["AVRO","ORC","DALI"]},"doc":"Format of the dataset"},{"name":"timePeriod","type":{"type":"enum","name":"TimePeriod","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["DAILY","HOURLY"]},"doc":"Time period: daily or hourly"},{"name":"dateTimeStart","type":"string","doc":"Start datetime of the period to read from (inclusive)"},{"name":"dateTimeEnd","type":"string","doc":"End datetime of the period to read from (inclusive)"},{"name":"daliFilterExp","type":"string","doc":"Filtering criteria (other than datepartition ranges) for the Dali dataset"},{"name":"daliDatepartitionColumn","type":"string","doc":"Name of the datepartition column for the input Dali dataset."},{"name":"daliDatepartitionFormat","type":"string","doc":"Datetime format of the datepartition column for the input Dali dataset."},{"name":"samplePercent","type":"double","doc":"Sample percentage"},{"name":"sampleSeed","type":"long","doc":"Sampling seed (must be > 0)"},{"name":"newDataFlexibleFormat","type":{"type":"map","values":"string"},"doc":"FlexibleAvroStorage configs for loading newDataPath"}]},"doc":"Data specification configuration"},{"name":"dataAssertionResults","type":{"type":"array","items":{"type":"record","name":"DataAssertionResult","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataAssertion","type":"com.linkedin.pegasus2avro.dataset.datasentinel.DataAssertion","doc":"Information about a single data assertion"},{"name":"outcome","type":{"type":"enum","name":"AssertionValue","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["FAIL","PASS","UNKNOWN"]},"doc":"Result of testing data assertion (UNKNOWN if test not run)","default":"UNKNOWN"},{"name":"evaluationDetails","type":{"type":"record","name":"DataAssertionEvaluationDetails","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"assertionSummary","type":"string","doc":"Summary of assertion information: type, name, column definitions, where null must be encoded as an empty string","default":""},{"name":"outcomeSummary","type":"string","doc":"Description of assertion evaluation outcome, where null must be encoded as an empty string","default":""},{"name":"baseValue","type":"string","doc":"Base value used in comparison, e.g., for COMPARE_RECORD_COUNTS, this would be prior dataset number of records, where null must be encoded as an empty string","default":""},{"name":"baseDescription","type":"string","doc":"Description of base value, where null must be encoded as an empty string","default":""},{"name":"failureValue","type":"string","doc":"Failure value used in evaluation, where null must be encoded as an empty string","default":""},{"name":"failureDescription","type":"string","doc":"Description of failure value, where null must be encoded as an empty string","default":""},{"name":"actualValue","type":"string","doc":"Actual value used in comparison, e.g., for COMPARE_RECORD_COUNTS, this would be new dataset number of records, where null must be encoded as an empty string","default":""},{"name":"actualDescription","type":"string","doc":"Description of actual value, where null must be encoded as an empty string","default":""},{"name":"percentageChange","type":"string","doc":"Percentage change in value from base value to actual value, where null must be encoded as an empty string","default":""},{"name":"percentageChangeDescription","type":"string","doc":"Percentage change in value from base value to actual value","default":"Percentage change"},{"name":"thresholdValue","type":"string","doc":"Threshold value used in evaluation, e.g., for DEFINITION_ALL_TRUE with a MAX_FAILURE_COUNT, it would be the value of MAX_FAILURE_COUNT, where null must be encoded as an empty string","default":""},{"name":"thresholdDescription","type":"string","doc":"Description of threshold value, where null must be encoded as an empty string","default":""},{"name":"deprecatedValues","type":{"type":"array","items":{"type":"record","name":"LMSValueInfo","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"value","type":"string","doc":"Value of interest"},{"name":"priorCount","type":"string","doc":"Number of occurrences of value in prior dataset"},{"name":"newCount","type":"string","doc":"Number of occurrences of value in current dataset"},{"name":"percentThreshold","type":"string","doc":"Percent threshold for comparing the current count to the prior count"}]}},"doc":"Information about deprecated values","default":[]},{"name":"newValues","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.LMSValueInfo"},"doc":"Information about new values","default":[]},{"name":"decreasedValues","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.LMSValueInfo"},"doc":"Information about decreased values","default":[]},{"name":"increasedValues","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.LMSValueInfo"},"doc":"Information about increased values","default":[]}]},"doc":"Information about data assertion evaluation"}]}},"doc":"Information about data assertion results","default":[]},{"name":"failureSamplesPath","type":"string","doc":"Failure samples path, where null must be encoded as an empty string","default":""}]}},"doc":"A list of DatasetValidationReport, this is outcome of one DSS execution","default":[]},{"name":"dssExecutionURL","type":["null","string"],"doc":"DSS web Execution URL(go/dssweb), one execution generates one DataValidationReports. E.g https://data-sentinel-webapp.corp.linkedin.com/#/datasets/778/executions/23377/validation-report","default":null},{"name":"dssDatasetURL","type":["null","string"],"doc":"DSS web Dataset URL(go/dssweb), one execution(DataValidationReports) is for one dataset, e.g. https://data-sentinel-webapp.corp.linkedin.com/#/datasets/778/ ","default":null}]},{"type":"record","name":"DatasetProfile","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"dataSentinelVersion","type":"string","doc":"Data Sentinel version","default":"0.0.1"},{"name":"dataSentinelProfileVersion","type":"string","doc":"Data Sentinel profile version","default":"0.0.1"},{"name":"datasetVersion","type":"string","doc":"Dataset version","default":"0.0.1"},{"name":"profilePath","type":"string","doc":"Path to where profile information is stored and retrieved"},{"name":"datasetPathOriginal","type":"string","doc":"Original path to dataset used to generate profile information"},{"name":"datasetPath","type":"string","doc":"Resolved path to dataset used to generate profile information"},{"name":"workflowDetails","type":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.WorkflowDetails","doc":"Details about the workflow that ran Data Sentinel"},{"name":"dataSpecificationConfig","type":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.DataSpecificationConfig","doc":"Data specification configuration"},{"name":"datasetTimestamp","type":"long","doc":"Dataset last modified time in Unix epoch time"},{"name":"schemaString","type":"string","doc":"Schema as a string"},{"name":"dataValidationConfig","type":"com.linkedin.pegasus2avro.dataset.datasentinel.DataValidationConfig","doc":"Data validation configuration"},{"name":"sliceName","type":"string","doc":"Name of the slice used on the dataset"},{"name":"groupBySummary","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.GroupBySummary"},"doc":"GROUP BY summary for the column definitions specified in the validation config","default":[]},{"name":"datasetMetrics","type":{"type":"array","items":{"type":"record","name":"DatasetMetric","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"metricType","type":{"type":"enum","name":"DatasetMetricType","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["RECORD_COUNT","DATA_VOLUME"]},"doc":"Dataset metric type"},{"name":"metricValue","type":"string","doc":"Metric value"}]}},"doc":"Metrics pertaining to the entire dataset","default":[]},{"name":"definitionMetrics","type":{"type":"array","items":{"type":"record","name":"DefinitionMetric","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"Column definition name"},{"name":"groupByInfo","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datasentinel.GroupByInfoDefinitionNameValuePair"},"doc":"List of GROUP BY definition name-value pairs this definition has been filtered by","default":[]},{"name":"metricType","type":"com.linkedin.pegasus2avro.dataset.datasentinel.DefinitionMetricType","doc":"Definition metric type"},{"name":"metricValue","type":"string","doc":"Metric value"}]}},"doc":"Metrics pertaining to definitions given in columnDefinitions","default":[]},{"name":"definitionValuesMetrics","type":{"type":"array","items":{"type":"record","name":"DefinitionValuesMetric","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"definitionName","type":"string","doc":"Column definition name"},{"name":"metricType","type":{"type":"enum","name":"DefinitionMultiValuedMetricType","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["TOP_VALUES_BY_COUNT","QUANTILES"]},"doc":"Definition multi-valued metric type"},{"name":"metricParameterType","type":{"type":"enum","name":"DefinitionMetricParameterType","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["TOP_VALUES_COUNT","QUANTILES_COUNT"]},"doc":"Definition metric parameter type"},{"name":"metricParameterValues","type":{"type":"array","items":"string"},"doc":"Definition metric parameter values","default":[]},{"name":"metricValues","type":{"type":"array","items":{"type":"record","name":"DefinitionMultiValuedMetric","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"value","type":"string","doc":"Column definition value"},{"name":"count","type":"string","doc":"Count of value; if count is not meaningful as for quantiles, then empty"}]}},"doc":"Set of definition multi-valued metrics","default":[]}]}},"doc":"Metrics pertaining to definitions given in columnDefinitions","default":[]},{"name":"numericFeatureMetrics","type":{"type":"array","items":{"type":"record","name":"FeatureMetric","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","fields":[{"name":"featureName","type":"string","doc":"Feature name"},{"name":"term","type":"string","doc":"Term name"},{"name":"metricType","type":{"type":"enum","name":"FeatureMetricType","namespace":"com.linkedin.pegasus2avro.datasetPartition.datasentinel","symbols":["COUNT_DISTINCT_TERM","COUNT_FEATURE","COUNT_RATIO_TERM","COUNT_RATIO_TOP_K_TERM","COUNT_TERM","COUNT_TOP_K_TERM","MAX","MEAN","MIN","PERCENTILE_25","PERCENTILE_50","PERCENTILE_75","PERCENTILE_90","PERCENTILE_95","PERCENTILE_99","STD_DEV"]},"doc":"Feature metric type"},{"name":"metricValue","type":"string","doc":"Metric value"}]}},"doc":"Metrics pertaining to numeric frame features","default":[]},{"name":"specifiedCategoricalFeatureMetrics","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.FeatureMetric"},"doc":"Metrics pertaining to specified categorical frame features","default":[]},{"name":"lowCardinalityCategoricalFeatureMetrics","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.FeatureMetric"},"doc":"Metrics pertaining to low cardinality categorical frame features","default":[]},{"name":"highCardinalityCategoricalFeatureMetrics","type":{"type":"array","items":"com.linkedin.pegasus2avro.datasetPartition.datasentinel.FeatureMetric"},"doc":"Metrics pertaining to high cardinality categorical frame features","default":[]}]},{"type":"record","name":"DatasetPartitionAlert","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"alerts","type":{"type":"array","items":"com.linkedin.pegasus2avro.dataset.datafidelity.DataFidelityAlertDetails"},"doc":"all open alerts at the partition level for current dataset partition instance. In terms of snapshot tables, a partition means a snapshot."}]},{"type":"record","name":"DatasetPartitionStats","namespace":"com.linkedin.pegasus2avro.dataset.datafidelity","fields":[{"name":"numRows","type":"long","doc":"the number of rows for current partition"},{"name":"numFiles","type":"int","doc":"the number of files for current partition"},{"name":"numBytes","type":"long","doc":"the data file size for current partition"},{"name":"firstAvailableAt","type":"long","doc":"the epoch time in millisecond when this partition is first available"},{"name":"lastAvailableAt","type":"long","doc":"the epoch time in millisecond when the latest version of this partition is available"},{"name":"createdAt","type":"long","doc":"the epoch time when the statistic is created"}]}]},"doc":"The list of metadata aspects associated with the dataset partition. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"FeatureSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":["com.linkedin.pegasus2avro.common.Ownership",{"type":"record","name":"FrameFeatureConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"namespace","type":"string","doc":"Namespace of the feature, e.g. jymbii, waterloo, careers etc"},{"name":"name","type":"string","doc":"Name of the feature"},{"name":"expression","type":["null","string"],"doc":"Feature expression","default":null},{"name":"type","type":["null",{"type":"enum","name":"FrameFeatureType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["CATEGORICAL","CATEGORICAL_SET","TERM_VECTOR","NUMERIC","DENSE_VECTOR"]}],"doc":"Type of the feature","default":null},{"name":"multiproductInfo","type":["null",{"type":"record","name":"FeatureMultiproductInfo","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"multiproduct","type":"string","doc":"Urn of the feature multiproduct"},{"name":"name","type":"string","doc":"The canonical name of the frame feature Multiproduct"},{"name":"version","type":"string","doc":"Version of the multiproduct for onboarding the feature"},{"name":"offlineModuleName","type":["null","string"],"doc":"Name of the offline module of frame-feature multiproduct in which this feature has been defined","default":null},{"name":"onlineModuleName","type":["null","string"],"doc":"Name of the online module of frame-feature multiproduct in which this feature has been defined","default":null}]}],"doc":"Properties of the multiproduct in which this feature is defined","default":null},{"name":"globalProperties","type":{"type":"record","name":"FrameGlobalProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"strictMode","type":"boolean","doc":"control whether Frame would ignore syntax NPE errors from MVEL expressions. See https://iwww.corp.linkedin.com/wiki/cf/display/ENGS/Frame+User+Manual#FrameUserManual-MVEL","default":false}]},"doc":"Global properties as defined in the globals section of frame config"},{"name":"udf","type":["null","string"],"doc":"MVEL funtion or UDF defined in feature expression","default":null},{"name":"anchors","type":{"type":"array","items":{"type":"record","name":"FrameAnchorConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"name","type":"string","doc":"Name of the anchor"},{"name":"source","type":{"type":"record","name":"FrameSourceConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"name","type":"string","doc":"Name of the source"},{"name":"type","type":{"type":"enum","name":"FrameSourceType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["HDFS","ESPRESSO","VOLDEMORT","RESTLI","VENICE","PASSTHROUGH"]},"doc":"Supported source types in Frame"},{"name":"datasetUrn","type":["null","string"],"doc":"Dataset urn corresponding to the source. Some sources may not have datasetUrn like passthrough sources.","default":null},{"name":"properties","type":[{"type":"record","name":"HDFSSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"path","type":"string","doc":"File URI (HDFS path) of the dataset, or Dali URI (dalids:///)"},{"name":"extraParameters","type":["null",{"type":"map","values":["string",{"type":"array","items":"string"}]}],"doc":"Extra parameters for Frame's generic data manipulation","default":null},{"name":"timeseries","type":"boolean","doc":"Set as true to identify a fact dataset for aggregation","default":false},{"name":"timestamp","type":["null","string"],"doc":"Field name of timestamp column in fact data","default":null},{"name":"timestampFormat","type":["null","string"],"doc":"Format of the timestamp value, in java.time.DateTimeFormatter format","default":null},{"name":"hasTimeSnapshot","type":"boolean","doc":"True or false to specify if the source is pointing to a time-sensitive path","default":false}]},{"type":"record","name":"EspressoSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"database","type":"string","doc":"Espresso database name"},{"name":"table","type":"string","doc":"Espresso table name"},{"name":"d2Uri","type":"string","doc":"D2 URI of the espresso database"}]},{"type":"record","name":"VoldemortSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"storeName","type":"string","doc":"Voldemort Store name"},{"name":"keyExpression","type":"string","doc":"MVEL key expression"},{"name":"bootstrapUrl","type":"string","doc":"The connection point for the voldemort store"}]},{"type":"record","name":"RestliSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"restResourceName","type":"string","doc":"Restli resource name"},{"name":"keyExpression","type":"string","doc":"MVEL key expression"},{"name":"pathSpec","type":"string","doc":"Path spec, a comma separated string"},{"name":"restRequestParams","type":{"type":"map","values":["string",{"type":"array","items":"string"},{"type":"map","values":"string"}]},"doc":"A map for specifying additional parameters needed by Rest.li"}]},{"type":"record","name":"VeniceSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"storeName","type":"string","doc":"Venice database name"},{"name":"keyExpression","type":"string","doc":"MVEL key expression"}]},{"type":"record","name":"PassthroughSourceProperties","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"dataModel","type":["null","string"],"doc":"Describes the class that the features will be extracted from. It can POJO or pegasus objects","default":null}]}],"doc":"Properties associated with the source"}]},"doc":"Source associated with this anchor"},{"name":"key","type":["null","string",{"type":"array","items":"string"}],"doc":"Key expression for the anchor. For online feature, there is no need to define a key","default":null},{"name":"extractor","type":["null","string"],"doc":"Fully qualified extractor class name","default":null},{"name":"availability","type":{"type":"record","name":"AvailabilityInfo","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"environment","type":{"type":"enum","name":"AvailabilityEnvironmentType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["ONLINE","OFFLINE","NEARLINE"]},"doc":"Represents the environment where the feature/anchor is available, e.g. OFFLINE, ONLINE and NEARLINE"},{"name":"fabric","type":"com.linkedin.pegasus2avro.common.FabricType","doc":"Fabric where feature/anchor is available in combination with the environment"}]},"doc":"Environment and fabric where anchor is defined, e.g. in ONLINE EI fabric or PROD"}]}},"doc":"Anchors of the feature as defined in all feature configs"},{"name":"defaultValue","type":["null","string","int","long","double","float",{"type":"array","items":"string"},{"type":"map","values":"string"}],"doc":"Default value of a feature","default":null},{"name":"slidingWindowAggregationInfo","type":["null",{"type":"record","name":"FrameSlidingWindowAggregationInfo","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"aggregation","type":{"type":"enum","name":"FeatureAggregationType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["AVG","COUNT","LATEST","MAX","SUM","TIMESINCE"]},"doc":"Aggregation function to be applied for sliding window aggregation"},{"name":"window","type":"string","doc":"Length of window time, supports 4 type of units: d(day), h(hour), m(minute), s(second). The example value are 7d, 5h, 3m or 1s"},{"name":"filter","type":["null","string"],"doc":"A Spark SQL expression for filtering the fact data before aggregation","default":null},{"name":"groupBy","type":["null","string"],"doc":"The column/field on which the data will be grouped by before aggregation","default":null},{"name":"limit","type":["null","int"],"doc":"A number specifying for each group, taking the records with the TOP k aggregation value","default":null}]}],"doc":"Properties associated with Sliding Window Aggregation feature","default":null}]},{"type":"record","name":"FrameFeatureEditableConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"description","type":["null","string"],"doc":"Description of the feature","default":null},{"name":"documentationLink","type":["null","string"],"doc":"Link to the documentation of the feature","default":null},{"name":"category","type":["null",{"type":"record","name":"FrameFeatureEditableConfigCategory","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"categoryString","type":["null","string"],"default":null},{"name":"fieldDiscriminator","type":{"type":"enum","name":"FrameFeatureEditableConfigCategoryDiscriminator","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["categoryString"]},"doc":"Contains the name of the field that has its value set."}]}],"doc":"Dot (.) separated feature category path, e.g. Job.ApplySave","default":null},{"name":"baseEntity","type":["null",{"type":"record","name":"FrameFeatureEditableConfigBaseEntity","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"baseEntityString","type":["null","string"],"default":null},{"name":"fieldDiscriminator","type":{"type":"enum","name":"FrameFeatureEditableConfigBaseEntityDiscriminator","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["baseEntityString"]},"doc":"Contains the name of the field that has its value set."}]}],"doc":"Base category of the feature, e.g. Member, Job etc","default":null},{"name":"classification","type":["null",{"type":"record","name":"FrameFeatureEditableConfigClassification","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"classificationString","type":["null","string"],"default":null},{"name":"fieldDiscriminator","type":{"type":"enum","name":"FrameFeatureEditableConfigClassificationDiscriminator","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["classificationString"]},"doc":"Contains the name of the field that has its value set."}]}],"doc":"Classification category of the feature, e.g. Characteristic, Activity etc","default":null},{"name":"inferType","type":["null",{"type":"enum","name":"FeatureInferType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["FACT","INFERRED"]}],"doc":"Infer type of the feature, e.g. Fact or Inferred","default":null}]},{"type":"record","name":"FrameDerivedFeatureConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"namespace","type":"string","doc":"Namespace of the feature, e.g. jymbii, waterloo, careers etc"},{"name":"name","type":"string","doc":"Name of the feature"},{"name":"expression","type":["null","string"],"doc":"Feature expression","default":null},{"name":"type","type":["null","com.linkedin.pegasus2avro.feature.frame.FrameFeatureType"],"doc":"Type of the feature","default":null},{"name":"multiproductInfo","type":["null","com.linkedin.pegasus2avro.feature.frame.FeatureMultiproductInfo"],"doc":"Properties of the multiproduct in which this feature is defined","default":null},{"name":"globalProperties","type":"com.linkedin.pegasus2avro.feature.frame.FrameGlobalProperties","doc":"Global properties as defined in the globals section of frame config"},{"name":"key","type":["null","string",{"type":"array","items":"string"}],"doc":"Key expression for the feature","default":null},{"name":"inputs","type":["null",{"type":"array","items":{"type":"record","name":"FrameDerivedFeatureInput","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"name","type":"string","doc":"Name of the input"},{"name":"key","type":["string",{"type":"array","items":"string"}],"doc":"key associated with the input"},{"name":"feature","type":"string","doc":"Feature associated with this input"}]}}],"doc":"Inputs for this derived feature","default":null},{"name":"baseFeatures","type":{"type":"array","items":"string"},"doc":"Base or upstream features associated with this derived feature"}]},{"type":"record","name":"FrameFeatureStatusConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"status","type":{"type":"enum","name":"FeatureStatusType","namespace":"com.linkedin.pegasus2avro.feature.frame","symbols":["PUBLISHED","UNPUBLISHED","DELETED","HIDDEN","INCOMPLETE"]},"doc":"Status of the feature","default":"UNPUBLISHED"}]},{"type":"record","name":"FrameFeatureAvailabilityConfig","namespace":"com.linkedin.pegasus2avro.feature.frame","fields":[{"name":"availability","type":{"type":"array","items":"com.linkedin.pegasus2avro.feature.frame.AvailabilityInfo"},"doc":"Availability of the feature in different environments like offline, online and nearline and fabrics like ei, prod etc"}]}]},"doc":"The list of metadata aspects associated with the feature. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"GridUserSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"GridUserInfo","namespace":"com.linkedin.pegasus2avro.identity","fields":[{"name":"corpUser","type":"string","doc":"ldap user of the GridUser account, in most case corpUser name equals to griduser name."}]}]},"doc":"The list of metadata aspects associated with the GridUser. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"GridGroupSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"GridGroupInfo","namespace":"com.linkedin.pegasus2avro.identity","fields":[{"name":"admins","type":{"type":"array","items":"string"},"doc":"List of GridUser urns of this HEADLESS user."},{"name":"memberUsers","type":{"type":"array","items":"string"},"doc":"List of GridUser urns member in this account, note, admins are part of the members."},{"name":"memberGroups","type":{"type":"array","items":"string"},"doc":"List of GridGroup urns in this account."}]}]},"doc":"The list of metadata aspects associated with the GridGroup. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"InchartsChartInstanceSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity which the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"InchartsChartInstanceMetadata","namespace":"com.linkedin.pegasus2avro.chart","fields":[{"name":"created","type":"com.linkedin.pegasus2avro.common.AuditStamp","doc":"An AuditStamp corresponding to the creation of this resource/association/sub-resource"},{"name":"lastModified","type":"com.linkedin.pegasus2avro.common.AuditStamp","doc":"An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created"},{"name":"deleted","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.","default":null},{"name":"title","type":{"type":"record","name":"MultiLocaleString","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"localized","type":{"type":"map","values":"string"},"doc":"Maps a locale to a localized version of the string. Each key is a Locale record converted to string format, with the language, country and variant separated by underscores. Examples: 'en', 'de_DE', 'en_US_WIN', 'de__POSIX', 'fr__MAC'."},{"name":"preferredLocale","type":["null",{"type":"record","name":"Locale","namespace":"com.linkedin.pegasus2avro.common","fields":[{"name":"language","type":"string","doc":"A lowercase two-letter language code as defined by ISO-639."},{"name":"country","type":["null","string"],"doc":"An uppercase two-letter country code as defined by ISO-3166.","default":null},{"name":"variant","type":["null","string"],"doc":"Vendor or browser-specific code.","default":null}]}],"doc":"The preferred locale to use, based on standard rules","default":null}]},"doc":"The chart title, displayed to the user in the user's preferred language."},{"name":"description","type":["null","com.linkedin.pegasus2avro.common.MultiLocaleString"],"doc":"The chart description, displayed to the user in the user's preferred language.","default":null},{"name":"versionDescription","type":["null","com.linkedin.pegasus2avro.common.MultiLocaleString"],"doc":"The chart version description, displayed to the user in the user's preferred language.","default":null},{"name":"chartQuery","type":{"type":"record","name":"InchartsChartQuery","namespace":"com.linkedin.pegasus2avro.chart","fields":[{"name":"chartQuery","type":"string","doc":"A query representing the data associated with the chart."},{"name":"protocol","type":{"type":"enum","name":"ProtocolType","namespace":"com.linkedin.pegasus2avro.chart","symbols":["RQL","PRISM"]},"doc":"The type of protocol for the chart query."},{"name":"version","type":"string","doc":"The version of query language in the chart query, e.g., 2.5; 1.0."}]},"doc":"Information of chart query representing the data associated with the chart."}]},"com.linkedin.pegasus2avro.common.Status"]},"doc":"The list of metadata aspects associated with the chart instance. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"InchartsChartSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity which the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":[{"type":"record","name":"InchartsChartEdit","namespace":"com.linkedin.pegasus2avro.chart","fields":[{"name":"editMode","type":["null","com.linkedin.pegasus2avro.common.AuditStamp"],"doc":"Data captured giving insight into when the chart is being edited, and who is currently editing.","default":null}]},{"type":"record","name":"InchartsChartInstances","namespace":"com.linkedin.pegasus2avro.chart","fields":[{"name":"instances","type":{"type":"array","items":"string"},"doc":"Instances of the chart."}]},"com.linkedin.pegasus2avro.common.Ownership","com.linkedin.pegasus2avro.common.Status"]},"doc":"The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"MetricSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":["com.linkedin.pegasus2avro.common.Ownership",{"type":"record","name":"UMPMetricProperties","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"name","type":"string","doc":"Name of the UMP metric"},{"name":"dataset","type":"string","doc":"Name of the UMP dataset from which the metric was obtained"},{"name":"bucket","type":"string","doc":"Relevant bucket of the metric, refer to go/umpmetricsubjectareas e.g. sales_navigator"},{"name":"domain","type":["null","string"],"doc":"Domain name of the metric. One domain contains several buckets. This mapping can be found in raptor metadata (raptor/categorylist endpoint) e.g. lss","default":null},{"name":"displayName","type":["null","string"],"doc":"Display name of the UMP metric","default":null},{"name":"displayGroup","type":["null","string"],"doc":"Display group name of the UMP metric","default":null},{"name":"datafiles","type":{"type":"array","items":{"type":"record","name":"UMPDatafileProperties","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"name","type":"string","doc":"Name of the datafile"},{"name":"frequency","type":"com.linkedin.pegasus2avro.metric.MetricFrequencyType","doc":"Frequency type associated with the datafile","default":"DAILY"},{"name":"enabled","type":"boolean","doc":"Flag to indicate if the datafile is enabled or not","default":true},{"name":"delay","type":"int","doc":"Delay is specified as number of days, and indicates that both inputs and outputs are delayed by this amount","default":1},{"name":"window","type":"int","doc":"Window is the number of days of output data that you write","default":1},{"name":"scriptType","type":{"type":"enum","name":"UMPMetricScriptType","namespace":"com.linkedin.pegasus2avro.metric.ump","symbols":["PIG","HIVE","SPARK","NONE"]},"doc":"Type of script used to generate the metric. For each datafile in your UMP dataset, there will be a Pig, hive or spark script.","default":"NONE"}]}},"doc":"Datafile properties used to generate the metric","default":[]},{"name":"description","type":"string","doc":"Documentation of the metric"},{"name":"tier","type":{"type":"array","items":{"type":"record","name":"TierInfo","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"frequency","type":"com.linkedin.pegasus2avro.metric.MetricFrequencyType","doc":"Frequency type that qualifies this tier","default":"DAILY"},{"name":"value","type":"int","doc":"Tier value associated with the metric. Tier 0 is the executive metric, Tier 1 and 2 are high priority metrics, Tier 3 is the normal one","default":2}]}},"doc":"Importance of the metric"},{"name":"frequency","type":{"type":"array","items":"com.linkedin.pegasus2avro.metric.MetricFrequencyType"},"doc":"Indicates how frequently the metric is generated","default":["DAILY"]},{"name":"derived","type":"boolean","doc":"Indicates whether this metric was derived from other metrics","default":false},{"name":"highPriority","type":"boolean","doc":"Indicates whether this metric is in UMP HP queue (refer to go/dfsreonboarding for guidelines on onboarding a UMP metric to High Priority queue)","default":false},{"name":"pii","type":"boolean","doc":"Indicates whether this metric contains PII information. This is when any of the measures or attributes contains PII information","default":false},{"name":"tags","type":{"type":"array","items":"string"},"doc":"Tags associated with the UMP dataset this metric was obtained from","default":[]},{"name":"goodDirection","type":{"type":"enum","name":"UMPMetricDirectionType","namespace":"com.linkedin.pegasus2avro.metric.ump","symbols":["UP","DOWN"]},"doc":"Good direction for the metric. This can take values UP or DOWN","default":"UP"},{"name":"formulaData","type":{"type":"record","name":"UMPMetricFormula","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"formula","type":"string","doc":"The formula used to define the metric"},{"name":"aggregationFunction","type":"string","doc":"The function used in the formula e.g. SUM, MIN, MAX, COUNT, etc"},{"name":"attributes","type":{"type":"array","items":{"type":"record","name":"MetricColumnInfo","namespace":"com.linkedin.pegasus2avro.metric.ump","fields":[{"name":"name","type":"string","doc":"Name of the field"},{"name":"description","type":"string","doc":"Description of the field"},{"name":"fieldFormat","type":["null","com.linkedin.pegasus2avro.dataset.FieldFormat"],"doc":"Format associated with the field","default":null},{"name":"complianceType","type":"com.linkedin.pegasus2avro.dataset.ComplianceDataType","doc":"Compliance type associated with the field"}]}},"doc":"Attributes are the entities associated with the metric. MemberId is the most common attribute","default":[]},{"name":"measures","type":{"type":"array","items":"com.linkedin.pegasus2avro.metric.ump.MetricColumnInfo"},"doc":"Additive metrics are SUM, MIN or MAX aggregation of measures. Measures are some numerical fields in dataset, and the formula declares a metric that is an aggregation of those measures","default":[]},{"name":"upstreamMetrics","type":{"type":"array","items":"string"},"doc":"The upstream metrics from which this metric was derived from, if it was","default":[]}]},"doc":"Formula associated with UMP metric"}]}]},"doc":"The list of metadata aspects associated with the metric. Depending on the use case, this can either be all, or a selection, of supported aspects."}]},{"type":"record","name":"RegisteredSchemaSnapshot","namespace":"com.linkedin.pegasus2avro.metadata.snapshot","fields":[{"name":"urn","type":"string","doc":"URN for the schema the metadata snapshot is associated with."},{"name":"aspects","type":{"type":"array","items":["com.linkedin.pegasus2avro.dataset.ComplianceInfo",{"type":"record","name":"SchemaDefinition","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"rawSchema","type":["null",{"type":"record","name":"AvroSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"schema","type":"string","doc":"The native Avro schema text."}]},{"type":"record","name":"BinaryJsonSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"schema","type":"string","doc":"The native schema text for binary JSON file format."}]},{"type":"record","name":"DDL","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"tableDDL","type":"string","doc":"The native SQL DDL that describe an Relational DBMS table."}]},{"type":"record","name":"EspressoSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"documentSchema","type":"string","doc":"The native espresso document schema."},{"name":"tableSchema","type":"string","doc":"The espresso table schema definition."}]},{"type":"record","name":"KeyValueSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"keySchema","type":"string","doc":"The raw schema for the key in the key-value store."},{"name":"valueSchema","type":"string","doc":"The raw schema for the value in the key-value store."}]},{"type":"record","name":"OrcSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"schema","type":"string","doc":"The native schema for ORC file format."}]},{"type":"record","name":"ParquetSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"schema","type":"string","doc":"The native Parquet schema text."}]},{"type":"record","name":"Schemaless","namespace":"com.linkedin.pegasus2avro.schema","fields":[]}],"doc":"The dataset schema as observed from the data platform.","default":null},{"name":"normalizedSchema","type":["null",{"type":"record","name":"NormalizedSchema","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"normalizedFields","type":{"type":"array","items":{"type":"record","name":"SchemaField","namespace":"com.linkedin.pegasus2avro.schema","fields":[{"name":"fieldPath","type":"string","doc":"Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above."},{"name":"jsonPath","type":["null","string"],"doc":"Flattened name of a field in JSON Path notation.","default":null},{"name":"nullable","type":"boolean","doc":"Indicates if this field is optional or nullable","default":false},{"name":"description","type":["null","string"],"doc":"Description","default":null},{"name":"type","type":{"type":"enum","name":"SchemaFieldDataType","namespace":"com.linkedin.pegasus2avro.schema","symbols":["BOOLEAN","BYTES","ENUM","NULL","NUMBER","STRING","ARRAY","MAP","RECORD","UNION"]},"doc":"Platform independent field type of the field."},{"name":"nativeDataType","type":"string","doc":"The native type of the field in the dataset's platform as declared by platform schema."},{"name":"recursive","type":"boolean","doc":"There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.","default":false}]}},"doc":"List of normalized schema fields."}]}],"doc":"The standardized schema after normalization process.","default":null}]}]},"doc":"The list of metadata aspects associated with the schema. Depending on the use case, this can either be all, or a selection, of supported aspects."}]}],"doc":"Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot.","default":null},{"name":"proposedDelta","type":["null",{"type":"record","name":"DatasetGroupDelta","namespace":"com.linkedin.pegasus2avro.metadata.delta","fields":[{"name":"urn","type":"string","doc":"URN for the entity the metadata delta is associated with."},{"name":"delta","type":[{"type":"record","name":"MembershipDelta","namespace":"com.linkedin.pegasus2avro.datasetGroup","fields":[{"name":"membersToAdd","type":{"type":"array","items":"string"},"doc":"The list of dataset to be added to the group"},{"name":"membersToRemove","type":{"type":"array","items":"string"},"doc":"The list of dataset to be removed from the group"}]}],"doc":"The specific type of metadata delta to apply."}]}],"doc":"Delta of the proposed metadata partial update.","default":null}]} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce.json b/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce.json deleted file mode 100644 index f2ec01a5d2..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/old-mce.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MetricSnapshot": { - "urn": "urn:li:metric:(UMP,seo.seo_bot_crawls.mynameisjyoti)", - "aspects": [ - { - "com.linkedin.pegasus2avro.metric.ump.UMPMetricProperties": { - "name": "mynameisjyoti", - "dataset": "seo_bot_crawls", - "bucket": "seo", - "domain": { - "string": "" - }, - "displayName": null, - "displayGroup": null, - "datafiles": [ - { - "name": "bot_crawl_metrics", - "frequency": "DAILY", - "enabled": true, - "delay": 1, - "window": 1, - "scriptType": "PIG" - } - ], - "description": "All crawls by Google bots", - "tier": [ - { - "frequency": "DAILY", - "value": 2 - } - ], - "frequency": [ - "DAILY" - ], - "derived": false, - "highPriority": false, - "pii": false, - "tags": [ - "growth", - "seo" - ], - "goodDirection": "UP", - "formulaData": { - "formula": "sum(google_crawl_count)", - "aggregationFunction": "SUM", - "attributes": [ - ], - "measures": [ - { - "name": "google_crawl_count", - "description": "google crawler visit count", - "fieldFormat": null, - "complianceType": "NONE" - } - ], - "upstreamMetrics": [ - ] - } - } - } - ] - } - }, - "proposedDelta": null -} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json b/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json deleted file mode 100644 index 03acfb43e6..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mae.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "auditHeader": null, - "oldSnapshot": null, - "newSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MetricSnapshot": { - "urn": "urn:li:metric:(foo,bar)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:foo", - "type": "DEVELOPER", - "source": null - } - ], - "lastModified": null - } - } - ] - } - } -} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json b/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json deleted file mode 100644 index e027e74bf6..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-avro2pegasus-mce.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "auditHeader": null, - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.MetricSnapshot": { - "urn": "urn:li:metric:(foo,bar)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:foo", - "type": "DEVELOPER", - "source": null - } - ], - "lastModified": null - } - } - ] - } - }, - "proposedDelta": null -} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json b/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json deleted file mode 100644 index 8c5aeba6b8..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mae.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "newSnapshot": { - "com.linkedin.metadata.snapshot.MetricSnapshot": { - "urn": "urn:li:metric:(foo,bar)", - "aspects": [ - { - "com.linkedin.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:foo", - "type": "DEVELOPER" - } - ] - } - } - ] - } - } -} \ No newline at end of file diff --git a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json b/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json deleted file mode 100644 index a86963a249..0000000000 --- a/metadata-events/mxe-utils-avro-1.7/src/test/resources/test-pegasus2avro-mce.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "proposedSnapshot": { - "com.linkedin.metadata.snapshot.MetricSnapshot": { - "urn": "urn:li:metric:(foo,bar)", - "aspects": [ - { - "com.linkedin.common.Ownership": { - "owners": [ - { - "owner": "urn:li:corpuser:foo", - "type": "DEVELOPER" - } - ] - } - } - ] - } - } -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/schema/RegisteredSchemaKey.pdsc b/metadata-models/src/main/pegasus/com/linkedin/schema/RegisteredSchemaKey.pdsc deleted file mode 100644 index 59e3169ece..0000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/schema/RegisteredSchemaKey.pdsc +++ /dev/null @@ -1,24 +0,0 @@ -{ - "name": "RegisteredSchemaKey", - "type": "record", - "namespace": "com.linkedin.schema", - "doc": "Key for Registered Schema resource", - "fields": [ - { - "name": "type", - "type": "com.linkedin.common.RegisteredSchemaType", - "doc": "Type of the registered schema, e.g. Kafka, Espresso Documnet." - }, - { - "name": "name", - "type": "string", - "doc": "Name of the registered schema, e.g. for kafka, .
for Espresso", - "validate": { - "strlen": { - "min": 1, - "max": 300 - } - } - } - ] -} \ No newline at end of file diff --git a/metadata-test-utils/src/main/java/com/linkedin/metadata/utils/TestUtils.java b/metadata-test-utils/src/main/java/com/linkedin/metadata/utils/TestUtils.java index 94d0c3c6d6..0b9ddddfe9 100644 --- a/metadata-test-utils/src/main/java/com/linkedin/metadata/utils/TestUtils.java +++ b/metadata-test-utils/src/main/java/com/linkedin/metadata/utils/TestUtils.java @@ -9,12 +9,10 @@ import com.linkedin.common.OwnershipSource; import com.linkedin.common.OwnershipSourceType; import com.linkedin.common.OwnershipSuggestion; import com.linkedin.common.OwnershipType; -import com.linkedin.common.RegisteredSchemaType; import com.linkedin.common.urn.CorpuserUrn; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetGroupUrn; import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.common.urn.RegisteredSchemaUrn; import com.linkedin.common.urn.Urn; import java.io.IOException; import java.nio.charset.Charset; @@ -45,11 +43,6 @@ public class TestUtils { return new DatasetGroupUrn("foo", name); } - @Nonnull - public static RegisteredSchemaUrn makeRegisteredSchemaUrn(@Nonnull String name) { - return new RegisteredSchemaUrn(RegisteredSchemaType.KAFKA, name); - } - @Nonnull public static Owner makeOwner(@Nonnull String ldap) { return makeOwner(ldap, OwnershipType.DEVELOPER);