diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 6fb9649eb0..0ff1d19451 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -63,6 +63,7 @@ jobs: run: ./metadata-ingestion/scripts/install_deps.sh - name: Run metadata-ingestion tests run: ./gradlew :metadata-ingestion:build :metadata-ingestion:check + metadata-ingestion-by-version: runs-on: ubuntu-latest strategy: @@ -75,6 +76,8 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: ./metadata-ingestion/scripts/install_deps.sh && python -m pip install --upgrade pip && pip install tox tox-gh-actions + - name: Codegen + run: ./gradlew :metadata-ingestion:codegen - name: Run tox tests run: cd metadata-ingestion && tox diff --git a/.gitignore b/.gitignore index 1b691ad0aa..2470df4910 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ **/src/mainGenerated* **/src/testGenerated* metadata-events/mxe-registration/src/main/resources/**/*.avsc +metadata-ingestion/src/datahub/metadata # Java .java-version diff --git a/metadata-ingestion/adding-source.md b/metadata-ingestion/adding-source.md index aaeb87dcc9..a505321605 100644 --- a/metadata-ingestion/adding-source.md +++ b/metadata-ingestion/adding-source.md @@ -18,7 +18,7 @@ The reporter interface enables the source to report statistics, warnings, failur The core for the source is the `get_workunits` method, which produces a stream of MCE objects. The [file source](./src/datahub/ingestion/source/file.py) is a good and simple example. -The MetadataChangeEventClass is defined in the [metadata models](./src/datahub/metadata/schema_classes.py). There are also some [convenience methods](./src/datahub/emitter/mce_builder.py) for commonly used operations. +The MetadataChangeEventClass is defined in the metadata models which are generated under `metadata-ingestion/src/datahub/metadata/schema_classes.py`. There are also some [convenience methods](./src/datahub/emitter/mce_builder.py) for commonly used operations. ### 4. Set up the dependencies diff --git a/metadata-ingestion/src/datahub/metadata/__init__.py b/metadata-ingestion/src/datahub/metadata/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/__init__.py b/metadata-ingestion/src/datahub/metadata/com/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/events/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/events/__init__.py deleted file mode 100644 index 6bb82c355b..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/events/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ....schema_classes import KafkaAuditHeaderClass - - -KafkaAuditHeader = KafkaAuditHeaderClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py deleted file mode 100644 index 0c13f9f9da..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/chart/__init__.py +++ /dev/null @@ -1,19 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import ChartInfoClass -from .....schema_classes import ChartQueryClass -from .....schema_classes import ChartQueryTypeClass -from .....schema_classes import ChartTypeClass -from .....schema_classes import EditableChartPropertiesClass - - -ChartInfo = ChartInfoClass -ChartQuery = ChartQueryClass -ChartQueryType = ChartQueryTypeClass -ChartType = ChartTypeClass -EditableChartProperties = EditableChartPropertiesClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py deleted file mode 100644 index ad1779a931..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import AccessLevelClass -from .....schema_classes import AuditStampClass -from .....schema_classes import BrowsePathsClass -from .....schema_classes import ChangeAuditStampsClass -from .....schema_classes import CostClass -from .....schema_classes import CostCostClass -from .....schema_classes import CostCostDiscriminatorClass -from .....schema_classes import CostTypeClass -from .....schema_classes import DeprecationClass -from .....schema_classes import FabricTypeClass -from .....schema_classes import GlobalTagsClass -from .....schema_classes import GlossaryTermAssociationClass -from .....schema_classes import GlossaryTermsClass -from .....schema_classes import InstitutionalMemoryClass -from .....schema_classes import InstitutionalMemoryMetadataClass -from .....schema_classes import MLFeatureDataTypeClass -from .....schema_classes import OwnerClass -from .....schema_classes import OwnershipClass -from .....schema_classes import OwnershipSourceClass -from .....schema_classes import OwnershipSourceTypeClass -from .....schema_classes import OwnershipTypeClass -from .....schema_classes import StatusClass -from .....schema_classes import TagAssociationClass -from .....schema_classes import VersionTagClass -from .....schema_classes import WindowDurationClass - - -AccessLevel = AccessLevelClass -AuditStamp = AuditStampClass -BrowsePaths = BrowsePathsClass -ChangeAuditStamps = ChangeAuditStampsClass -Cost = CostClass -CostCost = CostCostClass -CostCostDiscriminator = CostCostDiscriminatorClass -CostType = CostTypeClass -Deprecation = DeprecationClass -FabricType = FabricTypeClass -GlobalTags = GlobalTagsClass -GlossaryTermAssociation = GlossaryTermAssociationClass -GlossaryTerms = GlossaryTermsClass -InstitutionalMemory = InstitutionalMemoryClass -InstitutionalMemoryMetadata = InstitutionalMemoryMetadataClass -MLFeatureDataType = MLFeatureDataTypeClass -Owner = OwnerClass -Ownership = OwnershipClass -OwnershipSource = OwnershipSourceClass -OwnershipSourceType = OwnershipSourceTypeClass -OwnershipType = OwnershipTypeClass -Status = StatusClass -TagAssociation = TagAssociationClass -VersionTag = VersionTagClass -WindowDuration = WindowDurationClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py deleted file mode 100644 index 4f1e2f797a..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/common/fieldtransformer/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import TransformationTypeClass -from ......schema_classes import UDFTransformerClass - - -TransformationType = TransformationTypeClass -UDFTransformer = UDFTransformerClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dashboard/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dashboard/__init__.py deleted file mode 100644 index 986c6e048d..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dashboard/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DashboardInfoClass -from .....schema_classes import EditableDashboardPropertiesClass - - -DashboardInfo = DashboardInfoClass -EditableDashboardProperties = EditableDashboardPropertiesClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/__init__.py deleted file mode 100644 index 80e8e082d1..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DataFlowInfoClass -from .....schema_classes import DataJobInfoClass -from .....schema_classes import DataJobInputOutputClass -from .....schema_classes import EditableDataFlowPropertiesClass -from .....schema_classes import EditableDataJobPropertiesClass -from .....schema_classes import JobStatusClass - - -DataFlowInfo = DataFlowInfoClass -DataJobInfo = DataJobInfoClass -DataJobInputOutput = DataJobInputOutputClass -EditableDataFlowProperties = EditableDataFlowPropertiesClass -EditableDataJobProperties = EditableDataJobPropertiesClass -JobStatus = JobStatusClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/azkaban/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/azkaban/__init__.py deleted file mode 100644 index 13c26a929b..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/datajob/azkaban/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import AzkabanJobTypeClass - - -AzkabanJobType = AzkabanJobTypeClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataplatform/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataplatform/__init__.py deleted file mode 100644 index 30b0a188a2..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataplatform/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DataPlatformInfoClass -from .....schema_classes import PlatformTypeClass - - -DataPlatformInfo = DataPlatformInfoClass -PlatformType = PlatformTypeClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataprocess/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataprocess/__init__.py deleted file mode 100644 index e7abece20e..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataprocess/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DataProcessInfoClass - - -DataProcessInfo = DataProcessInfoClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py deleted file mode 100644 index 48b6cd4b36..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/dataset/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DatasetDeprecationClass -from .....schema_classes import DatasetFieldMappingClass -from .....schema_classes import DatasetFieldProfileClass -from .....schema_classes import DatasetFieldUsageCountsClass -from .....schema_classes import DatasetLineageTypeClass -from .....schema_classes import DatasetProfileClass -from .....schema_classes import DatasetPropertiesClass -from .....schema_classes import DatasetUpstreamLineageClass -from .....schema_classes import DatasetUsageStatisticsClass -from .....schema_classes import DatasetUserUsageCountsClass -from .....schema_classes import EditableDatasetPropertiesClass -from .....schema_classes import HistogramClass -from .....schema_classes import QuantileClass -from .....schema_classes import UpstreamClass -from .....schema_classes import UpstreamLineageClass -from .....schema_classes import ValueFrequencyClass - - -DatasetDeprecation = DatasetDeprecationClass -DatasetFieldMapping = DatasetFieldMappingClass -DatasetFieldProfile = DatasetFieldProfileClass -DatasetFieldUsageCounts = DatasetFieldUsageCountsClass -DatasetLineageType = DatasetLineageTypeClass -DatasetProfile = DatasetProfileClass -DatasetProperties = DatasetPropertiesClass -DatasetUpstreamLineage = DatasetUpstreamLineageClass -DatasetUsageStatistics = DatasetUsageStatisticsClass -DatasetUserUsageCounts = DatasetUserUsageCountsClass -EditableDatasetProperties = EditableDatasetPropertiesClass -Histogram = HistogramClass -Quantile = QuantileClass -Upstream = UpstreamClass -UpstreamLineage = UpstreamLineageClass -ValueFrequency = ValueFrequencyClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/metadata/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/metadata/__init__.py deleted file mode 100644 index 50637ea6d7..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/events/metadata/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import ChangeTypeClass - - -ChangeType = ChangeTypeClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/glossary/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/glossary/__init__.py deleted file mode 100644 index 776bbaf792..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/glossary/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import GlossaryNodeInfoClass -from .....schema_classes import GlossaryRelatedTermsClass -from .....schema_classes import GlossaryTermInfoClass - - -GlossaryNodeInfo = GlossaryNodeInfoClass -GlossaryRelatedTerms = GlossaryRelatedTermsClass -GlossaryTermInfo = GlossaryTermInfoClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py deleted file mode 100644 index bdf0d17d1f..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import CorpGroupInfoClass -from .....schema_classes import CorpUserEditableInfoClass -from .....schema_classes import CorpUserInfoClass -from .....schema_classes import GroupMembershipClass - - -CorpGroupInfo = CorpGroupInfoClass -CorpUserEditableInfo = CorpUserEditableInfoClass -CorpUserInfo = CorpUserInfoClass -GroupMembership = GroupMembershipClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py deleted file mode 100644 index e6b3cbe481..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import ChartKeyClass -from ......schema_classes import CorpGroupKeyClass -from ......schema_classes import CorpUserKeyClass -from ......schema_classes import DashboardKeyClass -from ......schema_classes import DataFlowKeyClass -from ......schema_classes import DataHubPolicyKeyClass -from ......schema_classes import DataJobKeyClass -from ......schema_classes import DataPlatformKeyClass -from ......schema_classes import DataProcessKeyClass -from ......schema_classes import DatasetKeyClass -from ......schema_classes import GlossaryNodeKeyClass -from ......schema_classes import GlossaryTermKeyClass -from ......schema_classes import MLFeatureKeyClass -from ......schema_classes import MLFeatureTableKeyClass -from ......schema_classes import MLModelDeploymentKeyClass -from ......schema_classes import MLModelGroupKeyClass -from ......schema_classes import MLModelKeyClass -from ......schema_classes import MLPrimaryKeyKeyClass -from ......schema_classes import SchemaFieldKeyClass -from ......schema_classes import TagKeyClass - - -ChartKey = ChartKeyClass -CorpGroupKey = CorpGroupKeyClass -CorpUserKey = CorpUserKeyClass -DashboardKey = DashboardKeyClass -DataFlowKey = DataFlowKeyClass -DataHubPolicyKey = DataHubPolicyKeyClass -DataJobKey = DataJobKeyClass -DataPlatformKey = DataPlatformKeyClass -DataProcessKey = DataProcessKeyClass -DatasetKey = DatasetKeyClass -GlossaryNodeKey = GlossaryNodeKeyClass -GlossaryTermKey = GlossaryTermKeyClass -MLFeatureKey = MLFeatureKeyClass -MLFeatureTableKey = MLFeatureTableKeyClass -MLModelDeploymentKey = MLModelDeploymentKeyClass -MLModelGroupKey = MLModelGroupKeyClass -MLModelKey = MLModelKeyClass -MLPrimaryKeyKey = MLPrimaryKeyKeyClass -SchemaFieldKey = SchemaFieldKeyClass -TagKey = TagKeyClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py deleted file mode 100644 index 2736983e26..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py +++ /dev/null @@ -1,49 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import ChartSnapshotClass -from ......schema_classes import CorpGroupSnapshotClass -from ......schema_classes import CorpUserSnapshotClass -from ......schema_classes import DashboardSnapshotClass -from ......schema_classes import DataFlowSnapshotClass -from ......schema_classes import DataHubPolicySnapshotClass -from ......schema_classes import DataJobSnapshotClass -from ......schema_classes import DataPlatformSnapshotClass -from ......schema_classes import DataProcessSnapshotClass -from ......schema_classes import DatasetSnapshotClass -from ......schema_classes import GlossaryNodeSnapshotClass -from ......schema_classes import GlossaryTermSnapshotClass -from ......schema_classes import MLFeatureSnapshotClass -from ......schema_classes import MLFeatureTableSnapshotClass -from ......schema_classes import MLModelDeploymentSnapshotClass -from ......schema_classes import MLModelGroupSnapshotClass -from ......schema_classes import MLModelSnapshotClass -from ......schema_classes import MLPrimaryKeySnapshotClass -from ......schema_classes import SchemaFieldSnapshotClass -from ......schema_classes import TagSnapshotClass - - -ChartSnapshot = ChartSnapshotClass -CorpGroupSnapshot = CorpGroupSnapshotClass -CorpUserSnapshot = CorpUserSnapshotClass -DashboardSnapshot = DashboardSnapshotClass -DataFlowSnapshot = DataFlowSnapshotClass -DataHubPolicySnapshot = DataHubPolicySnapshotClass -DataJobSnapshot = DataJobSnapshotClass -DataPlatformSnapshot = DataPlatformSnapshotClass -DataProcessSnapshot = DataProcessSnapshotClass -DatasetSnapshot = DatasetSnapshotClass -GlossaryNodeSnapshot = GlossaryNodeSnapshotClass -GlossaryTermSnapshot = GlossaryTermSnapshotClass -MLFeatureSnapshot = MLFeatureSnapshotClass -MLFeatureTableSnapshot = MLFeatureTableSnapshotClass -MLModelDeploymentSnapshot = MLModelDeploymentSnapshotClass -MLModelGroupSnapshot = MLModelGroupSnapshotClass -MLModelSnapshot = MLModelSnapshotClass -MLPrimaryKeySnapshot = MLPrimaryKeySnapshotClass -SchemaFieldSnapshot = SchemaFieldSnapshotClass -TagSnapshot = TagSnapshotClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py deleted file mode 100644 index 144057cdbf..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py deleted file mode 100644 index 06a94dde84..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from ......schema_classes import BaseDataClass -from ......schema_classes import CaveatDetailsClass -from ......schema_classes import CaveatsAndRecommendationsClass -from ......schema_classes import DeploymentStatusClass -from ......schema_classes import EthicalConsiderationsClass -from ......schema_classes import EvaluationDataClass -from ......schema_classes import IntendedUseClass -from ......schema_classes import IntendedUserTypeClass -from ......schema_classes import MLFeaturePropertiesClass -from ......schema_classes import MLFeatureTablePropertiesClass -from ......schema_classes import MLHyperParamClass -from ......schema_classes import MLMetricClass -from ......schema_classes import MLModelDeploymentPropertiesClass -from ......schema_classes import MLModelFactorPromptsClass -from ......schema_classes import MLModelFactorsClass -from ......schema_classes import MLModelGroupPropertiesClass -from ......schema_classes import MLModelPropertiesClass -from ......schema_classes import MLPrimaryKeyPropertiesClass -from ......schema_classes import MetricsClass -from ......schema_classes import QuantitativeAnalysesClass -from ......schema_classes import SourceCodeClass -from ......schema_classes import SourceCodeUrlClass -from ......schema_classes import SourceCodeUrlTypeClass -from ......schema_classes import TrainingDataClass - - -BaseData = BaseDataClass -CaveatDetails = CaveatDetailsClass -CaveatsAndRecommendations = CaveatsAndRecommendationsClass -DeploymentStatus = DeploymentStatusClass -EthicalConsiderations = EthicalConsiderationsClass -EvaluationData = EvaluationDataClass -IntendedUse = IntendedUseClass -IntendedUserType = IntendedUserTypeClass -MLFeatureProperties = MLFeaturePropertiesClass -MLFeatureTableProperties = MLFeatureTablePropertiesClass -MLHyperParam = MLHyperParamClass -MLMetric = MLMetricClass -MLModelDeploymentProperties = MLModelDeploymentPropertiesClass -MLModelFactorPrompts = MLModelFactorPromptsClass -MLModelFactors = MLModelFactorsClass -MLModelGroupProperties = MLModelGroupPropertiesClass -MLModelProperties = MLModelPropertiesClass -MLPrimaryKeyProperties = MLPrimaryKeyPropertiesClass -Metrics = MetricsClass -QuantitativeAnalyses = QuantitativeAnalysesClass -SourceCode = SourceCodeClass -SourceCodeUrl = SourceCodeUrlClass -SourceCodeUrlType = SourceCodeUrlTypeClass -TrainingData = TrainingDataClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py deleted file mode 100644 index 269d4a3127..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/mxe/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import GenericAspectClass -from .....schema_classes import MetadataChangeEventClass -from .....schema_classes import MetadataChangeProposalClass -from .....schema_classes import SystemMetadataClass - - -GenericAspect = GenericAspectClass -MetadataChangeEvent = MetadataChangeEventClass -MetadataChangeProposal = MetadataChangeProposalClass -SystemMetadata = SystemMetadataClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py deleted file mode 100644 index c54f86ee3f..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/policy/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import DataHubActorFilterClass -from .....schema_classes import DataHubPolicyInfoClass -from .....schema_classes import DataHubResourceFilterClass - - -DataHubActorFilter = DataHubActorFilterClass -DataHubPolicyInfo = DataHubPolicyInfoClass -DataHubResourceFilter = DataHubResourceFilterClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py deleted file mode 100644 index cf06a2f2bb..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/schema/__init__.py +++ /dev/null @@ -1,73 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import ArrayTypeClass -from .....schema_classes import BinaryJsonSchemaClass -from .....schema_classes import BooleanTypeClass -from .....schema_classes import BytesTypeClass -from .....schema_classes import DatasetFieldForeignKeyClass -from .....schema_classes import DateTypeClass -from .....schema_classes import EditableSchemaFieldInfoClass -from .....schema_classes import EditableSchemaMetadataClass -from .....schema_classes import EnumTypeClass -from .....schema_classes import EspressoSchemaClass -from .....schema_classes import FixedTypeClass -from .....schema_classes import ForeignKeyConstraintClass -from .....schema_classes import ForeignKeySpecClass -from .....schema_classes import KafkaSchemaClass -from .....schema_classes import KeyValueSchemaClass -from .....schema_classes import MapTypeClass -from .....schema_classes import MySqlDDLClass -from .....schema_classes import NullTypeClass -from .....schema_classes import NumberTypeClass -from .....schema_classes import OracleDDLClass -from .....schema_classes import OrcSchemaClass -from .....schema_classes import OtherSchemaClass -from .....schema_classes import PrestoDDLClass -from .....schema_classes import RecordTypeClass -from .....schema_classes import SchemaFieldClass -from .....schema_classes import SchemaFieldDataTypeClass -from .....schema_classes import SchemaMetadataClass -from .....schema_classes import SchemalessClass -from .....schema_classes import StringTypeClass -from .....schema_classes import TimeTypeClass -from .....schema_classes import UnionTypeClass -from .....schema_classes import UrnForeignKeyClass - - -ArrayType = ArrayTypeClass -BinaryJsonSchema = BinaryJsonSchemaClass -BooleanType = BooleanTypeClass -BytesType = BytesTypeClass -DatasetFieldForeignKey = DatasetFieldForeignKeyClass -DateType = DateTypeClass -EditableSchemaFieldInfo = EditableSchemaFieldInfoClass -EditableSchemaMetadata = EditableSchemaMetadataClass -EnumType = EnumTypeClass -EspressoSchema = EspressoSchemaClass -FixedType = FixedTypeClass -ForeignKeyConstraint = ForeignKeyConstraintClass -ForeignKeySpec = ForeignKeySpecClass -KafkaSchema = KafkaSchemaClass -KeyValueSchema = KeyValueSchemaClass -MapType = MapTypeClass -MySqlDDL = MySqlDDLClass -NullType = NullTypeClass -NumberType = NumberTypeClass -OracleDDL = OracleDDLClass -OrcSchema = OrcSchemaClass -OtherSchema = OtherSchemaClass -PrestoDDL = PrestoDDLClass -RecordType = RecordTypeClass -SchemaField = SchemaFieldClass -SchemaFieldDataType = SchemaFieldDataTypeClass -SchemaMetadata = SchemaMetadataClass -Schemaless = SchemalessClass -StringType = StringTypeClass -TimeType = TimeTypeClass -UnionType = UnionTypeClass -UrnForeignKey = UrnForeignKeyClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py deleted file mode 100644 index 7f2005d47e..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import TagPropertiesClass - - -TagProperties = TagPropertiesClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/timeseries/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/timeseries/__init__.py deleted file mode 100644 index bb7d8e9b02..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/timeseries/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import CalendarIntervalClass -from .....schema_classes import PartitionSpecClass -from .....schema_classes import TimeWindowClass -from .....schema_classes import TimeWindowSizeClass - - -CalendarInterval = CalendarIntervalClass -PartitionSpec = PartitionSpecClass -TimeWindow = TimeWindowClass -TimeWindowSize = TimeWindowSizeClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/usage/__init__.py b/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/usage/__init__.py deleted file mode 100644 index f5482018ce..0000000000 --- a/metadata-ingestion/src/datahub/metadata/com/linkedin/pegasus2avro/usage/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -from .....schema_classes import FieldUsageCountsClass -from .....schema_classes import UsageAggregationClass -from .....schema_classes import UsageAggregationMetricsClass -from .....schema_classes import UserUsageCountsClass - - -FieldUsageCounts = FieldUsageCountsClass -UsageAggregation = UsageAggregationClass -UsageAggregationMetrics = UsageAggregationMetricsClass -UserUsageCounts = UserUsageCountsClass -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/schema.avsc b/metadata-ingestion/src/datahub/metadata/schema.avsc deleted file mode 100644 index 55c62f4732..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schema.avsc +++ /dev/null @@ -1,6579 +0,0 @@ -[ - "null", - { - "type": "record", - "name": "MetadataChangeEvent", - "namespace": "com.linkedin.pegasus2avro.mxe", - "fields": [ - { - "type": [ - "null", - { - "type": "record", - "name": "KafkaAuditHeader", - "namespace": "com.linkedin.events", - "fields": [ - { - "compliance": [ - { - "policy": "EVENT_TIME" - } - ], - "type": "long", - "name": "time", - "doc": "The time at which the event was emitted into kafka." - }, - { - "compliance": "NONE", - "type": "string", - "name": "server", - "doc": "The fully qualified name of the host from which the event is being emitted." - }, - { - "compliance": "NONE", - "type": [ - "null", - "string" - ], - "name": "instance", - "default": null, - "doc": "The instance on the server from which the event is being emitted. e.g. i001" - }, - { - "compliance": "NONE", - "type": "string", - "name": "appName", - "doc": "The name of the application from which the event is being emitted. see go/appname" - }, - { - "compliance": "NONE", - "type": { - "type": "fixed", - "name": "UUID", - "namespace": "com.linkedin.events", - "size": 16 - }, - "name": "messageId", - "doc": "A unique identifier for the message" - }, - { - "compliance": "NONE", - "type": [ - "null", - "int" - ], - "name": "auditVersion", - "default": null, - "doc": "The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing" - }, - { - "compliance": "NONE", - "type": [ - "null", - "string" - ], - "name": "fabricUrn", - "default": null, - "doc": "The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric." - }, - { - "compliance": "NONE", - "type": [ - "null", - "string" - ], - "name": "clusterConnectionString", - "default": null, - "doc": "This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from." - } - ], - "doc": "This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader" - } - ], - "name": "auditHeader", - "default": null, - "doc": "Kafka audit header. See go/kafkaauditheader for more info." - }, - { - "type": [ - { - "type": "record", - "Entity": { - "keyAspect": "chartKey", - "name": "chart" - }, - "name": "ChartSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.ChartUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "chartKey" - }, - "name": "ChartKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "boostScore": 4.0, - "fieldName": "tool", - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "dashboardTool", - "doc": "The name of the dashboard tool such as looker, redash etc." - }, - { - "type": "string", - "name": "chartId", - "doc": "Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'" - } - ], - "doc": "Key for a Chart" - }, - { - "type": "record", - "Aspect": { - "name": "chartInfo" - }, - "name": "ChartInfo", - "namespace": "com.linkedin.pegasus2avro.chart", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "title", - "doc": "Title of the chart" - }, - { - "Searchable": {}, - "type": "string", - "name": "description", - "doc": "Detailed description about the chart" - }, - { - "type": { - "type": "record", - "name": "ChangeAuditStamps", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "record", - "name": "AuditStamp", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": "long", - "name": "time", - "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "actor", - "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": [ - "null", - "string" - ], - "name": "impersonator", - "default": null, - "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor." - } - ], - "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage." - }, - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - } - ], - "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into various lifecycle stages, and who acted to move it into those lifecycle stages. The recommended best practice is to include this record in your record schema, and annotate its fields as @readOnly in your resource. See https://github.com/linkedin/rest.li/wiki/Validation-in-Rest.li#restli-validation-annotations" - }, - "name": "lastModified", - "doc": "Captures information about who created/last modified/deleted this chart and when" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "chartUrl", - "default": null, - "doc": "URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart" - }, - { - "Relationship": { - "/*/string": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "type": [ - "null", - { - "type": "array", - "items": [ - "string" - ] - } - ], - "name": "inputs", - "default": null, - "doc": "Data sources for the chart" - }, - { - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - }, - "type": [ - "null", - { - "type": "enum", - "symbolDocs": { - "BAR": "Chart showing a Bar chart", - "PIE": "Chart showing a Pie chart", - "SCATTER": "Chart showing a Scatter plot", - "TABLE": "Chart showing a table", - "TEXT": "Chart showing Markdown formatted text" - }, - "name": "ChartType", - "namespace": "com.linkedin.pegasus2avro.chart", - "symbols": [ - "BAR", - "PIE", - "SCATTER", - "TABLE", - "TEXT", - "LINE", - "AREA", - "HISTOGRAM", - "BOX_PLOT" - ], - "doc": "The various types of charts" - } - ], - "name": "type", - "default": null, - "doc": "Type of the chart" - }, - { - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - }, - "type": [ - "null", - { - "type": "enum", - "symbolDocs": { - "PRIVATE": "Private availability to certain set of users", - "PUBLIC": "Publicly available access level" - }, - "name": "AccessLevel", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "PUBLIC", - "PRIVATE" - ], - "doc": "The various access levels" - } - ], - "name": "access", - "default": null, - "doc": "Access level for the chart" - }, - { - "type": [ - "null", - "long" - ], - "name": "lastRefreshed", - "default": null, - "doc": "The time when this chart last refreshed" - } - ], - "doc": "Information about a chart" - }, - { - "type": "record", - "Aspect": { - "name": "chartQuery" - }, - "name": "ChartQuery", - "namespace": "com.linkedin.pegasus2avro.chart", - "fields": [ - { - "type": "string", - "name": "rawQuery", - "doc": "Raw query to build a chart from input datasets" - }, - { - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - }, - "type": { - "type": "enum", - "symbolDocs": { - "LOOKML": "LookML queries", - "SQL": "SQL type queries" - }, - "name": "ChartQueryType", - "namespace": "com.linkedin.pegasus2avro.chart", - "symbols": [ - "LOOKML", - "SQL" - ] - }, - "name": "type", - "doc": "Chart query type" - } - ], - "doc": "Information for chart query which is used for getting data of the chart" - }, - { - "type": "record", - "Aspect": { - "name": "editableChartProperties" - }, - "name": "EditableChartProperties", - "namespace": "com.linkedin.pegasus2avro.chart", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Edited documentation of the chart " - } - ], - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines" - }, - { - "type": "record", - "Aspect": { - "name": "ownership" - }, - "name": "Ownership", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Owner", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Relationship": { - "entityTypes": [ - "corpUser", - "corpGroup" - ], - "name": "OwnedBy" - }, - "Searchable": { - "fieldName": "owners", - "fieldType": "URN", - "hasValuesFieldName": "hasOwners", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "owner", - "doc": "Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name\n(Caveat: only corpuser is currently supported in the frontend.)" - }, - { - "type": { - "type": "enum", - "symbolDocs": { - "CONSUMER": "A person, group, or service that consumes the data", - "DATAOWNER": "A person or group that is owning the data", - "DELEGATE": "A person or a group that overseas the operation, e.g. a DBA or SRE.", - "DEVELOPER": "A person or group that is in charge of developing the code", - "PRODUCER": "A person, group, or service that produces/generates the data", - "STAKEHOLDER": "A person or a group that has direct business interest" - }, - "name": "OwnershipType", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "DEVELOPER", - "DATAOWNER", - "DELEGATE", - "PRODUCER", - "CONSUMER", - "STAKEHOLDER" - ], - "doc": "Owner category or owner role" - }, - "name": "type", - "doc": "The type of the ownership" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "OwnershipSource", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "enum", - "symbolDocs": { - "AUDIT": "Auditing system or audit logs", - "DATABASE": "Database, e.g. GRANTS table", - "FILE_SYSTEM": "File system, e.g. file/directory owner", - "ISSUE_TRACKING_SYSTEM": "Issue tracking system, e.g. Jira", - "MANUAL": "Manually provided by a user", - "OTHER": "Other sources", - "SERVICE": "Other ownership-like service, e.g. Nuage, ACL service etc", - "SOURCE_CONTROL": "SCM system, e.g. GIT, SVN" - }, - "name": "OwnershipSourceType", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "AUDIT", - "DATABASE", - "FILE_SYSTEM", - "ISSUE_TRACKING_SYSTEM", - "MANUAL", - "SERVICE", - "SOURCE_CONTROL", - "OTHER" - ] - }, - "name": "type", - "doc": "The type of the source" - }, - { - "type": [ - "null", - "string" - ], - "name": "url", - "default": null, - "doc": "A reference URL for the source" - } - ], - "doc": "Source/provider of the ownership information" - } - ], - "name": "source", - "default": null, - "doc": "Source information for the ownership" - } - ], - "doc": "Ownership information" - } - }, - "name": "owners", - "doc": "List of owners of the entity." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data." - } - ], - "doc": "Ownership information of an entity." - }, - { - "type": "record", - "Aspect": { - "name": "status" - }, - "name": "Status", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Searchable": { - "fieldType": "BOOLEAN" - }, - "type": "boolean", - "name": "removed", - "default": false, - "doc": "whether the entity is removed or not" - } - ], - "doc": "The status metadata of an entity, e.g. dataset, metric, feature, etc.\nThis aspect is used to represent soft deletes conventionally." - }, - { - "type": "record", - "Aspect": { - "name": "globalTags" - }, - "name": "GlobalTags", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "TagAssociation", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Searchable": { - "fieldName": "tags", - "fieldType": "URN_PARTIAL", - "hasValuesFieldName": "hasTags" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.TagUrn" - }, - "type": "string", - "name": "tag", - "doc": "Urn of the applied tag" - } - ], - "doc": "Properties of an applied tag. For now, just an Urn. In the future we can extend this with other properties, e.g.\npropagation parameters." - } - }, - "name": "tags", - "doc": "Tags associated with a given entity" - } - ], - "doc": "Tag aspect used for applying tags to an entity" - }, - { - "type": "record", - "Aspect": { - "name": "browsePaths" - }, - "name": "BrowsePaths", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Searchable": { - "/*": { - "fieldName": "browsePaths", - "fieldType": "BROWSE_PATH" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "paths", - "doc": "A list of valid browse paths for the entity.\n\nBrowse paths are expected to be backslash-separated strings. For example: 'prod/snowflake/datasetName'" - } - ], - "doc": "Shared aspect containing Browse Paths to be indexed for an entity." - }, - { - "type": "record", - "Aspect": { - "name": "glossaryTerms" - }, - "name": "GlossaryTerms", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "GlossaryTermAssociation", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Searchable": { - "fieldName": "glossaryTerms", - "fieldType": "URN_PARTIAL" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn" - }, - "type": "string", - "name": "urn", - "doc": "Urn of the applied glossary term" - } - ], - "doc": "Properties of an applied glossary term." - } - }, - "name": "terms", - "doc": "The related business terms" - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "auditStamp", - "doc": "Audit stamp containing who reported the related business term" - } - ], - "doc": "Related business terms information" - }, - { - "type": "record", - "Aspect": { - "name": "institutionalMemory" - }, - "name": "InstitutionalMemory", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "InstitutionalMemoryMetadata", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": "string", - "name": "url", - "doc": "Link to an engineering design document or a wiki page." - }, - { - "type": "string", - "name": "description", - "doc": "Description of the link." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "createStamp", - "doc": "Audit stamp associated with creation of this record" - } - ], - "doc": "Metadata corresponding to a record of institutional memory." - } - }, - "name": "elements", - "doc": "List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record." - } - ], - "doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity." - } - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific Chart entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "corpGroupKey", - "name": "corpGroup" - }, - "name": "CorpGroupSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpGroupUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "corpGroupKey" - }, - "name": "CorpGroupKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "The URL-encoded name of the AD/LDAP group. Serves as a globally unique identifier within DataHub." - } - ], - "doc": "Key for a CorpGroup" - }, - { - "type": "record", - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpGroupUrn" - ], - "name": "corpGroupInfo" - }, - "name": "CorpGroupInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "fields": [ - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": [ - "null", - "string" - ], - "name": "displayName", - "default": null, - "doc": "The name to use when displaying the group." - }, - { - "type": [ - "null", - "string" - ], - "name": "email", - "default": null, - "doc": "email of this group" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "corpUser" - ], - "name": "OwnedBy" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "admins", - "doc": "owners of this group" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "corpUser" - ], - "name": "IsPartOf" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "members", - "doc": "List of ldap urn in this group." - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "corpGroup" - ], - "name": "IsPartOf" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "groups", - "doc": "List of groups in this group." - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "A description of the group." - } - ], - "doc": "group of corpUser, it may contains nested group" - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific CorpGroup entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "corpUserKey", - "name": "corpuser" - }, - "name": "CorpUserSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "corpUserKey" - }, - "name": "CorpUserKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "boostScore": 2.0, - "enableAutocomplete": true, - "fieldName": "ldap", - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "username", - "doc": "The name of the AD/LDAP user." - } - ], - "doc": "Key for a CorpUser" - }, - { - "type": "record", - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpuserUrn" - ], - "name": "corpUserInfo" - }, - "name": "CorpUserInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "fields": [ - { - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 2.0 - } - }, - "type": "boolean", - "name": "active", - "doc": "Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools" - }, - { - "type": [ - "null", - "string" - ], - "name": "displayName", - "default": null, - "doc": "displayName of this user , e.g. Hang Zhang(DataHQ)" - }, - { - "Searchable": { - "fieldType": "KEYWORD", - "queryByDefault": true - }, - "type": [ - "null", - "string" - ], - "name": "email", - "default": null, - "doc": "email address of this user" - }, - { - "Searchable": { - "fieldType": "KEYWORD", - "queryByDefault": true - }, - "type": [ - "null", - "string" - ], - "name": "title", - "default": null, - "doc": "title of this user" - }, - { - "Relationship": { - "entityTypes": [ - "corpUser" - ], - "name": "ReportsTo" - }, - "Searchable": { - "fieldName": "managerLdap", - "fieldType": "URN", - "queryByDefault": true - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - }, - "type": [ - "null", - "string" - ], - "name": "managerUrn", - "default": null, - "doc": "direct manager of this user" - }, - { - "type": [ - "null", - "long" - ], - "name": "departmentId", - "default": null, - "doc": "department id this user belong to" - }, - { - "type": [ - "null", - "string" - ], - "name": "departmentName", - "default": null, - "doc": "department name this user belong to" - }, - { - "type": [ - "null", - "string" - ], - "name": "firstName", - "default": null, - "doc": "first name of this user" - }, - { - "type": [ - "null", - "string" - ], - "name": "lastName", - "default": null, - "doc": "last name of this user" - }, - { - "Searchable": { - "boostScore": 10.0, - "fieldType": "TEXT_PARTIAL", - "queryByDefault": true - }, - "type": [ - "null", - "string" - ], - "name": "fullName", - "default": null, - "doc": "Common name of this user, format is firstName + lastName (split by a whitespace)" - }, - { - "type": [ - "null", - "string" - ], - "name": "countryCode", - "default": null, - "doc": "two uppercase letters country code. e.g. US" - } - ], - "doc": "Linkedin corp user information" - }, - { - "type": "record", - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpuserUrn" - ], - "name": "corpUserEditableInfo" - }, - "name": "CorpUserEditableInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "aboutMe", - "default": null, - "doc": "About me section of the user" - }, - { - "Searchable": { - "/*": { - "fieldType": "TEXT" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "teams", - "default": [], - "doc": "Teams that the user belongs to e.g. Metadata" - }, - { - "Searchable": { - "/*": { - "fieldType": "TEXT" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "skills", - "default": [], - "doc": "Skills that the user possesses e.g. Machine Learning" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": "string", - "name": "pictureLink", - "default": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/default_avatar.png", - "doc": "A URL which points to a picture which user wants to set as a profile photo" - } - ], - "doc": "Linkedin corp user information that can be edited from UI" - }, - { - "type": "record", - "Aspect": { - "name": "groupMembership" - }, - "name": "GroupMembership", - "namespace": "com.linkedin.pegasus2avro.identity", - "fields": [ - { - "Relationship": { - "/*": { - "entityTypes": [ - "corpGroup" - ], - "name": "IsMemberOfGroup" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "groups" - } - ], - "doc": "Carries information about the CorpGroups a user is in." - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific CorpUser entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dashboardKey", - "name": "dashboard" - }, - "name": "DashboardSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DashboardUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dashboardKey" - }, - "name": "DashboardKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "boostScore": 4.0, - "fieldName": "tool", - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "dashboardTool", - "doc": "The name of the dashboard tool such as looker, redash etc." - }, - { - "type": "string", - "name": "dashboardId", - "doc": "Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as 'looker.linkedin.com/dashboards/1234'" - } - ], - "doc": "Key for a Dashboard" - }, - { - "type": "record", - "Aspect": { - "name": "dashboardInfo" - }, - "name": "DashboardInfo", - "namespace": "com.linkedin.pegasus2avro.dashboard", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "title", - "doc": "Title of the dashboard" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": "string", - "name": "description", - "doc": "Detailed description about the dashboard" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "chart" - ], - "name": "Contains" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "charts", - "default": [], - "doc": "Charts in a dashboard" - }, - { - "type": "com.linkedin.pegasus2avro.common.ChangeAuditStamps", - "name": "lastModified", - "doc": "Captures information about who created/last modified/deleted this dashboard and when" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "dashboardUrl", - "default": null, - "doc": "URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard" - }, - { - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - }, - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AccessLevel" - ], - "name": "access", - "default": null, - "doc": "Access level for the dashboard" - }, - { - "type": [ - "null", - "long" - ], - "name": "lastRefreshed", - "default": null, - "doc": "The time when this dashboard last refreshed" - } - ], - "doc": "Information about a dashboard" - }, - { - "type": "record", - "Aspect": { - "name": "editableDashboardProperties" - }, - "name": "EditableDashboardProperties", - "namespace": "com.linkedin.pegasus2avro.dashboard", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Edited documentation of the dashboard" - } - ], - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the dashboard. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific Dashboard entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dataFlowKey", - "name": "dataFlow" - }, - "name": "DataFlowSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataFlowUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dataFlowKey" - }, - "name": "DataFlowKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "orchestrator", - "doc": "Workflow manager like azkaban, airflow which orchestrates the flow" - }, - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "flowId", - "doc": "Unique Identifier of the data flow" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "cluster", - "doc": "Cluster where the flow is executed" - } - ], - "doc": "Key for a Data Flow" - }, - { - "type": "record", - "Aspect": { - "name": "dataFlowInfo" - }, - "name": "DataFlowInfo", - "namespace": "com.linkedin.pegasus2avro.datajob", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Flow name" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Flow description" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": [ - "null", - "string" - ], - "name": "project", - "default": null, - "doc": "Optional project/namespace associated with the flow" - } - ], - "doc": "Information about a Data processing flow" - }, - { - "type": "record", - "Aspect": { - "name": "editableDataFlowProperties" - }, - "name": "EditableDataFlowProperties", - "namespace": "com.linkedin.pegasus2avro.datajob", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Edited documentation of the data flow" - } - ], - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the data flow. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific DataFlow entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dataJobKey", - "name": "dataJob" - }, - "name": "DataJobSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataJobUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dataJobKey" - }, - "name": "DataJobKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Relationship": { - "entityTypes": [ - "dataFlow" - ], - "name": "IsPartOf" - }, - "Searchable": { - "fieldName": "dataFlow", - "fieldType": "URN_PARTIAL", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "flow", - "doc": "Standardized data processing flow urn representing the flow for the job" - }, - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "jobId", - "doc": "Unique Identifier of the data job" - } - ], - "doc": "Key for a Data Job" - }, - { - "type": "record", - "Aspect": { - "name": "dataJobInfo" - }, - "name": "DataJobInfo", - "namespace": "com.linkedin.pegasus2avro.datajob", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Job name" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Job description" - }, - { - "type": [ - { - "type": "enum", - "symbolDocs": { - "COMMAND": "The command job type is one of the basic built-in types. It runs multiple UNIX commands using java processbuilder.\nUpon execution, Azkaban spawns off a process to run the command.", - "GLUE": "Glue type is for running AWS Glue job transforms.", - "HADOOP_JAVA": "Runs a java program with ability to access Hadoop cluster.\nhttps://azkaban.readthedocs.io/en/latest/jobTypes.html#java-job-type", - "HADOOP_SHELL": "In large part, this is the same Command type. The difference is its ability to talk to a Hadoop cluster\nsecurely, via Hadoop tokens.", - "HIVE": "Hive type is for running Hive jobs.", - "PIG": "Pig type is for running Pig jobs.", - "SQL": "SQL is for running Presto, mysql queries etc" - }, - "name": "AzkabanJobType", - "namespace": "com.linkedin.pegasus2avro.datajob.azkaban", - "symbols": [ - "COMMAND", - "HADOOP_JAVA", - "HADOOP_SHELL", - "HIVE", - "PIG", - "SQL", - "GLUE" - ], - "doc": "The various types of support azkaban jobs" - }, - "string" - ], - "name": "type", - "doc": "Datajob type\n**NOTE**: AzkabanJobType is deprecated. Please use strings instead." - }, - { - "Relationship": { - "entityTypes": [ - "dataFlow" - ], - "name": "IsPartOf" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataFlowUrn" - }, - "type": [ - "null", - "string" - ], - "name": "flowUrn", - "default": null, - "doc": "DataFlow urn that this job is part of" - }, - { - "type": [ - "null", - { - "type": "enum", - "symbolDocs": { - "COMPLETED": "Jobs with successful completion.", - "FAILED": "Jobs that have failed.", - "IN_PROGRESS": "Jobs currently running.", - "STARTING": "Jobs being initialized.", - "STOPPED": "Jobs that have stopped.", - "STOPPING": "Jobs being stopped.", - "UNKNOWN": "Jobs with unknown status (either unmappable or unavailable)" - }, - "name": "JobStatus", - "namespace": "com.linkedin.pegasus2avro.datajob", - "symbols": [ - "STARTING", - "IN_PROGRESS", - "STOPPING", - "STOPPED", - "COMPLETED", - "FAILED", - "UNKNOWN" - ], - "doc": "Job statuses" - } - ], - "name": "status", - "default": null, - "doc": "Status of the job" - } - ], - "doc": "Information about a Data processing job" - }, - { - "type": "record", - "Aspect": { - "name": "dataJobInputOutput" - }, - "name": "DataJobInputOutput", - "namespace": "com.linkedin.pegasus2avro.datajob", - "fields": [ - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "inputs", - "fieldType": "URN", - "numValuesFieldName": "numInputDatasets", - "queryByDefault": false - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "inputDatasets", - "doc": "Input datasets consumed by the data job during processing" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Produces" - } - }, - "Searchable": { - "/*": { - "fieldName": "outputs", - "fieldType": "URN", - "numValuesFieldName": "numOutputDatasets", - "queryByDefault": false - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "outputDatasets", - "doc": "Output datasets produced by the data job during processing" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "DownstreamOf" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "inputDatajobs", - "default": null, - "doc": "Input datajobs that this data job depends on" - } - ], - "doc": "Information about the inputs and outputs of a Data processing job" - }, - { - "type": "record", - "Aspect": { - "name": "editableDataJobProperties" - }, - "name": "EditableDataJobProperties", - "namespace": "com.linkedin.pegasus2avro.datajob", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Edited documentation of the data job " - } - ], - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the data job. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific DataJob entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "datasetKey", - "name": "dataset" - }, - "name": "DatasetSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "datasetKey" - }, - "name": "DatasetKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "platform", - "doc": "Data platform urn associated with the dataset" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Dataset native name e.g. ., /dir/subdir/, or " - }, - { - "Searchable": { - "addToFilters": true, - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": { - "type": "enum", - "symbolDocs": { - "CORP": "Designates corporation fabrics", - "DEV": "Designates development fabrics", - "EI": "Designates early-integration (staging) fabrics", - "PROD": "Designates production fabrics" - }, - "name": "FabricType", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "DEV", - "EI", - "PROD", - "CORP" - ], - "doc": "Fabric group type" - }, - "name": "origin", - "doc": "Fabric type where dataset belongs to or where it was generated." - } - ], - "doc": "Key for a Dataset" - }, - { - "type": "record", - "Aspect": { - "name": "datasetProperties" - }, - "name": "DatasetProperties", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the dataset" - }, - { - "java": { - "class": "java.net.URI" - }, - "type": [ - "null", - "string" - ], - "name": "uri", - "default": null, - "doc": "The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic)." - }, - { - "type": { - "type": "array", - "items": "string" - }, - "name": "tags", - "default": [], - "doc": "[Legacy] Unstructured tags for the dataset. Structured tags can be applied via the `GlobalTags` aspect." - } - ], - "doc": "Properties associated with a Dataset" - }, - { - "type": "record", - "Aspect": { - "name": "editableDatasetProperties" - }, - "name": "EditableDatasetProperties", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the dataset" - } - ], - "doc": "EditableDatasetProperties stores editable changes made to dataset properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines" - }, - { - "type": "record", - "Aspect": { - "name": "datasetDeprecation" - }, - "name": "DatasetDeprecation", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 0.5 - } - }, - "type": "boolean", - "name": "deprecated", - "doc": "Whether the dataset is deprecated by owner." - }, - { - "type": [ - "null", - "long" - ], - "name": "decommissionTime", - "default": null, - "doc": "The time user plan to decommission this dataset." - }, - { - "type": "string", - "name": "note", - "doc": "Additional information about the dataset deprecation plan, such as the wiki, doc, RB." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": [ - "null", - "string" - ], - "name": "actor", - "default": null, - "doc": "The corpuser URN which will be credited for modifying this deprecation content." - } - ], - "doc": "Dataset deprecation status" - }, - { - "type": "record", - "Aspect": { - "name": "datasetUpstreamLineage" - }, - "name": "DatasetUpstreamLineage", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldMapping", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "doc": "Audit stamp containing who reported the field mapping and when" - }, - { - "type": [ - { - "type": "enum", - "symbolDocs": { - "BLACKBOX": "Field transformation expressed as unknown black box function.", - "IDENTITY": "Field transformation expressed as Identity function." - }, - "name": "TransformationType", - "namespace": "com.linkedin.pegasus2avro.common.fieldtransformer", - "symbols": [ - "BLACKBOX", - "IDENTITY" - ], - "doc": "Type of the transformation involved in generating destination fields from source fields." - }, - { - "type": "record", - "name": "UDFTransformer", - "namespace": "com.linkedin.pegasus2avro.common.fieldtransformer", - "fields": [ - { - "type": "string", - "name": "udf", - "doc": "A UDF mentioning how the source fields got transformed to destination field. This is the FQCN(Fully Qualified Class Name) of the udf." - } - ], - "doc": "Field transformation expressed in UDF" - } - ], - "name": "transformation", - "doc": "Transfomration function between the fields involved" - }, - { - "type": { - "type": "array", - "items": [ - "string" - ] - }, - "name": "sourceFields", - "doc": "Source fields from which the fine grained lineage is derived" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetFieldUrn" - }, - "type": "string", - "name": "destinationField", - "doc": "Destination field which is derived from source fields" - } - ], - "doc": "Representation of mapping between fields in source dataset to the field in destination dataset" - } - }, - "name": "fieldMappings", - "doc": "Upstream to downstream field level lineage mappings" - } - ], - "doc": "Fine Grained upstream lineage for fields in a dataset" - }, - { - "type": "record", - "Aspect": { - "name": "upstreamLineage" - }, - "name": "UpstreamLineage", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Upstream", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "deprecated": "we no longer associate a timestamp per upstream edge", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "auditStamp", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "Audit stamp containing who reported the lineage and when.\nWARNING: this field is deprecated and may be removed in a future release." - }, - { - "Relationship": { - "entityTypes": [ - "dataset" - ], - "name": "DownstreamOf" - }, - "Searchable": { - "fieldName": "upstreams", - "fieldType": "URN", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - }, - "type": "string", - "name": "dataset", - "doc": "The upstream dataset the lineage points to" - }, - { - "type": { - "type": "enum", - "symbolDocs": { - "COPY": "Direct copy without modification", - "TRANSFORMED": "Transformed data with modification (format or content change)", - "VIEW": "Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources" - }, - "name": "DatasetLineageType", - "namespace": "com.linkedin.pegasus2avro.dataset", - "symbols": [ - "COPY", - "TRANSFORMED", - "VIEW" - ], - "doc": "The various types of supported dataset lineage" - }, - "name": "type", - "doc": "The type of the lineage" - } - ], - "doc": "Upstream lineage information about a dataset including the source reporting the lineage" - } - }, - "name": "upstreams", - "doc": "List of upstream dataset lineage information" - } - ], - "doc": "Upstream lineage of a dataset" - }, - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - { - "type": "record", - "Aspect": { - "name": "schemaMetadata" - }, - "name": "SchemaMetadata", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "validate": { - "strlen": { - "max": 500, - "min": 1 - } - }, - "type": "string", - "name": "schemaName", - "doc": "Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataPlatformUrn" - }, - "type": "string", - "name": "platform", - "doc": "Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})" - }, - { - "type": "long", - "name": "version", - "doc": "Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - }, - "type": [ - "null", - "string" - ], - "name": "dataset", - "default": null, - "doc": "Dataset this schema metadata is associated with." - }, - { - "type": [ - "null", - "string" - ], - "name": "cluster", - "default": null, - "doc": "The cluster this schema metadata resides from" - }, - { - "type": "string", - "name": "hash", - "doc": "the SHA1 hash of the schema content" - }, - { - "type": [ - { - "type": "record", - "name": "EspressoSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "documentSchema", - "doc": "The native espresso document schema." - }, - { - "type": "string", - "name": "tableSchema", - "doc": "The espresso table schema definition." - } - ], - "doc": "Schema text of an espresso table schema." - }, - { - "type": "record", - "name": "OracleDDL", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "tableSchema", - "doc": "The native schema in the dataset's platform. This is a human readable (json blob) table schema." - } - ], - "doc": "Schema holder for oracle data definition language that describes an oracle table." - }, - { - "type": "record", - "name": "MySqlDDL", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "tableSchema", - "doc": "The native schema in the dataset's platform. This is a human readable (json blob) table schema." - } - ], - "doc": "Schema holder for MySql data definition language that describes an MySql table." - }, - { - "type": "record", - "name": "PrestoDDL", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "rawSchema", - "doc": "The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL." - } - ], - "doc": "Schema holder for presto data definition language that describes a presto view." - }, - { - "type": "record", - "name": "KafkaSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "documentSchema", - "doc": "The native kafka document schema. This is a human readable avro document schema." - }, - { - "type": [ - "null", - "string" - ], - "name": "keySchema", - "default": null, - "doc": "The native kafka key schema as retrieved from Schema Registry" - } - ], - "doc": "Schema holder for kafka schema." - }, - { - "type": "record", - "name": "BinaryJsonSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "schema", - "doc": "The native schema text for binary JSON file format." - } - ], - "doc": "Schema text of binary JSON schema." - }, - { - "type": "record", - "name": "OrcSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "schema", - "doc": "The native schema for ORC file format." - } - ], - "doc": "Schema text of an ORC schema." - }, - { - "type": "record", - "name": "Schemaless", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "The dataset has no specific schema associated with it" - }, - { - "type": "record", - "name": "KeyValueSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "keySchema", - "doc": "The raw schema for the key in the key-value store." - }, - { - "type": "string", - "name": "valueSchema", - "doc": "The raw schema for the value in the key-value store." - } - ], - "doc": "Schema text of a key-value store schema." - }, - { - "type": "record", - "name": "OtherSchema", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "rawSchema", - "doc": "The native schema in the dataset's platform." - } - ], - "doc": "Schema holder for undefined schema types." - } - ], - "name": "platformSchema", - "doc": "The native schema in the dataset's platform." - }, - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "SchemaField", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "Searchable": { - "fieldName": "fieldPaths", - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "fieldPath", - "doc": "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above." - }, - { - "type": [ - "null", - "string" - ], - "name": "jsonPath", - "default": null, - "doc": "Flattened name of a field in JSON Path notation." - }, - { - "type": "boolean", - "name": "nullable", - "default": false, - "doc": "Indicates if this field is optional or nullable" - }, - { - "Searchable": { - "boostScore": 0.1, - "fieldName": "fieldDescriptions", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Description" - }, - { - "type": { - "type": "record", - "name": "SchemaFieldDataType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": [ - { - "type": "record", - "name": "BooleanType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Boolean field type." - }, - { - "type": "record", - "name": "FixedType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Fixed field type." - }, - { - "type": "record", - "name": "StringType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "String field type." - }, - { - "type": "record", - "name": "BytesType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Bytes field type." - }, - { - "type": "record", - "name": "NumberType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Number data type: long, integer, short, etc.." - }, - { - "type": "record", - "name": "DateType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Date field type." - }, - { - "type": "record", - "name": "TimeType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Time field type. This should also be used for datetimes." - }, - { - "type": "record", - "name": "EnumType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Enum field type." - }, - { - "type": "record", - "name": "NullType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Null field type." - }, - { - "type": "record", - "name": "MapType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "keyType", - "default": null, - "doc": "Key type in a map" - }, - { - "type": [ - "null", - "string" - ], - "name": "valueType", - "default": null, - "doc": "Type of the value in a map" - } - ], - "doc": "Map field type." - }, - { - "type": "record", - "name": "ArrayType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "nestedType", - "default": null, - "doc": "List of types this array holds." - } - ], - "doc": "Array field type." - }, - { - "type": "record", - "name": "UnionType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "nestedTypes", - "default": null, - "doc": "List of types in union type." - } - ], - "doc": "Union field type." - }, - { - "type": "record", - "name": "RecordType", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [], - "doc": "Record field type." - } - ], - "name": "type", - "doc": "Data platform specific types" - } - ], - "doc": "Schema field data types" - }, - "name": "type", - "doc": "Platform independent field type of the field." - }, - { - "type": "string", - "name": "nativeDataType", - "doc": "The native type of the field in the dataset's platform as declared by platform schema." - }, - { - "type": "boolean", - "name": "recursive", - "default": false, - "doc": "There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive." - }, - { - "Searchable": { - "/tags/*/tag": { - "boostScore": 0.5, - "fieldName": "fieldTags", - "fieldType": "URN_PARTIAL" - } - }, - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlobalTags" - ], - "name": "globalTags", - "default": null, - "doc": "Tags associated with the field" - }, - { - "Searchable": { - "/terms/*/urn": { - "boostScore": 0.5, - "fieldName": "fieldGlossaryTerms", - "fieldType": "URN_PARTIAL" - } - }, - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlossaryTerms" - ], - "name": "glossaryTerms", - "default": null, - "doc": "Glossary terms associated with the field" - }, - { - "type": "boolean", - "name": "isPartOfKey", - "default": false, - "doc": "For schema fields that are part of complex keys, set this field to true\nWe do this to easily distinguish between value and key fields" - } - ], - "doc": "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema" - } - }, - "name": "fields", - "doc": "Client provided a list of fields from document schema." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "primaryKeys", - "default": null, - "doc": "Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath." - }, - { - "deprecated": "Use foreignKeys instead.", - "type": [ - "null", - { - "type": "map", - "values": { - "type": "record", - "name": "ForeignKeySpec", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": [ - { - "type": "record", - "name": "DatasetFieldForeignKey", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - }, - "type": "string", - "name": "parentDataset", - "doc": "dataset that stores the resource." - }, - { - "type": { - "type": "array", - "items": "string" - }, - "name": "currentFieldPaths", - "doc": "List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset." - }, - { - "type": "string", - "name": "parentField", - "doc": "SchemaField@fieldPath that uniquely identify field in parent dataset that this field references." - } - ], - "doc": "For non-urn based foregin keys." - }, - { - "type": "record", - "name": "UrnForeignKey", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "currentFieldPath", - "doc": "Field in hosting(current) SchemaMetadata." - } - ], - "doc": "If SchemaMetadata fields make any external references and references are of type com.linkedin.pegasus2avro.common.Urn or any children, this models can be used to mark it." - } - ], - "name": "foreignKey", - "doc": "Foreign key definition in metadata schema." - } - ], - "doc": "Description of a foreign key in a schema." - } - } - ], - "name": "foreignKeysSpecs", - "default": null, - "doc": "Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref." - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "ForeignKeyConstraint", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "name", - "doc": "Name of the constraint, likely provided from the source" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "schemaField" - ], - "name": "ForeignKeyTo" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "foreignFields", - "doc": "Fields the constraint maps to on the foreign dataset" - }, - { - "type": { - "type": "array", - "items": "string" - }, - "name": "sourceFields", - "doc": "Fields the constraint maps to on the source dataset" - }, - { - "Relationship": { - "entityTypes": [ - "dataset" - ], - "name": "ForeignKeyToDataset" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "foreignDataset", - "doc": "Reference to the foreign dataset for ease of lookup" - } - ], - "doc": "Description of a foreign key constraint in a schema." - } - } - ], - "name": "foreignKeys", - "default": null, - "doc": "List of foreign key constraints for the schema" - } - ], - "doc": "SchemaMetadata to describe metadata related to store schema" - }, - { - "type": "record", - "Aspect": { - "name": "editableSchemaMetadata" - }, - "name": "EditableSchemaMetadata", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "created", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data." - }, - { - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "name": "lastModified", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "name": "deleted", - "default": null, - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics." - }, - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "EditableSchemaFieldInfo", - "namespace": "com.linkedin.pegasus2avro.schema", - "fields": [ - { - "type": "string", - "name": "fieldPath", - "doc": "FieldPath uniquely identifying the SchemaField this metadata is associated with" - }, - { - "Searchable": { - "boostScore": 0.1, - "fieldName": "editedFieldDescriptions", - "fieldType": "TEXT" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Description" - }, - { - "Searchable": { - "/tags/*/tag": { - "boostScore": 0.5, - "fieldName": "editedFieldTags", - "fieldType": "URN_PARTIAL" - } - }, - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlobalTags" - ], - "name": "globalTags", - "default": null, - "doc": "Tags associated with the field" - }, - { - "Searchable": { - "/terms/*/urn": { - "boostScore": 0.5, - "fieldName": "editedFieldGlossaryTerms", - "fieldType": "URN_PARTIAL" - } - }, - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlossaryTerms" - ], - "name": "glossaryTerms", - "default": null, - "doc": "Glossary terms associated with the field" - } - ], - "doc": "SchemaField to describe metadata related to dataset schema." - } - }, - "name": "editableSchemaFieldInfo", - "doc": "Client provided a list of fields from document schema." - } - ], - "doc": "EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines." - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific dataset entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dataProcessKey", - "name": "dataProcess" - }, - "deprecated": "Use DataJob instead.", - "name": "DataProcessSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataProcessUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dataProcessKey" - }, - "name": "DataProcessKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "boostScore": 4.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Process name i.e. an ETL job name" - }, - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "orchestrator", - "doc": "Standardized Orchestrator where data process is defined.\nTODO: Migrate towards something that can be validated like DataPlatform urn" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": "com.linkedin.pegasus2avro.common.FabricType", - "name": "origin", - "doc": "Fabric type where dataset belongs to or where it was generated." - } - ], - "doc": "Key for a Data Process" - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "Aspect": { - "name": "dataProcessInfo" - }, - "name": "DataProcessInfo", - "namespace": "com.linkedin.pegasus2avro.dataprocess", - "fields": [ - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "inputs", - "fieldType": "URN", - "numValuesFieldName": "numInputDatasets", - "queryByDefault": false - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "inputs", - "default": null, - "doc": "the inputs of the data process" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "outputs", - "fieldType": "URN", - "numValuesFieldName": "numOutputDatasets", - "queryByDefault": false - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "outputs", - "default": null, - "doc": "the outputs of the data process" - } - ], - "doc": "The inputs and outputs of this data process" - }, - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the data process. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific Data process entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dataPlatformKey", - "name": "dataPlatform" - }, - "name": "DataPlatformSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataPlatformUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dataPlatformKey" - }, - "name": "DataPlatformKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "type": "string", - "name": "platformName", - "doc": "Data platform name i.e. hdfs, oracle, espresso" - } - ], - "doc": "Key for a Data Platform" - }, - { - "type": "record", - "Aspect": { - "name": "dataPlatformInfo" - }, - "name": "DataPlatformInfo", - "namespace": "com.linkedin.pegasus2avro.dataplatform", - "fields": [ - { - "validate": { - "strlen": { - "max": 15 - } - }, - "type": "string", - "name": "name", - "doc": "Name of the data platform" - }, - { - "type": [ - "null", - "string" - ], - "name": "displayName", - "default": null, - "doc": "The name that will be used for displaying a platform type." - }, - { - "type": { - "type": "enum", - "symbolDocs": { - "FILE_SYSTEM": "Value for a file system, e.g. hdfs", - "KEY_VALUE_STORE": "Value for a key value store, e.g. espresso, voldemort", - "MESSAGE_BROKER": "Value for a message broker, e.g. kafka", - "OBJECT_STORE": "Value for an object store, e.g. ambry", - "OLAP_DATASTORE": "Value for an OLAP datastore, e.g. pinot", - "OTHERS": "Value for other platforms, e.g salesforce, dovetail", - "QUERY_ENGINE": "Value for a query engine, e.g. presto", - "RELATIONAL_DB": "Value for a relational database, e.g. oracle, mysql", - "SEARCH_ENGINE": "Value for a search engine, e.g seas" - }, - "name": "PlatformType", - "namespace": "com.linkedin.pegasus2avro.dataplatform", - "symbols": [ - "FILE_SYSTEM", - "KEY_VALUE_STORE", - "MESSAGE_BROKER", - "OBJECT_STORE", - "OLAP_DATASTORE", - "OTHERS", - "QUERY_ENGINE", - "RELATIONAL_DB", - "SEARCH_ENGINE" - ], - "doc": "Platform types available at LinkedIn" - }, - "name": "type", - "doc": "Platform type this data platform describes" - }, - { - "type": "string", - "name": "datasetNameDelimiter", - "doc": "The delimiter in the dataset names on the data platform, e.g. '/' for HDFS and '.' for Oracle" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "logoUrl", - "default": null, - "doc": "The URL for a logo associated with the platform" - } - ], - "doc": "Information about a data platform" - } - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the data platform. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific dataplatform entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlModelKey", - "name": "mlModel" - }, - "name": "MLModelSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.MLModelUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlModelKey" - }, - "name": "MLModelKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "platform", - "doc": "Standardized platform urn for the model" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the MLModel" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": "com.linkedin.pegasus2avro.common.FabricType", - "name": "origin", - "doc": "Fabric type where model belongs to or where it was generated" - } - ], - "doc": "Key for an ML model" - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "Aspect": { - "name": "mlModelProperties" - }, - "name": "MLModelProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLModel" - }, - { - "type": [ - "null", - "long" - ], - "name": "date", - "default": null, - "doc": "Date when the MLModel was developed" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "VersionTag", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "versionTag", - "default": null - } - ], - "doc": "A resource-defined string representing the resource state for the purpose of concurrency control" - } - ], - "name": "version", - "default": null, - "doc": "Version of the MLModel" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": [ - "null", - "string" - ], - "name": "type", - "default": null, - "doc": "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc" - }, - { - "type": [ - "null", - { - "type": "map", - "values": [ - "string", - "int", - "float", - "double", - "boolean" - ] - } - ], - "name": "hyperParameters", - "default": null, - "doc": "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams" - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "Aspect": { - "name": "mlHyperParam" - }, - "name": "MLHyperParam", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": "string", - "name": "name", - "doc": "Name of the MLHyperParam" - }, - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLHyperParam" - }, - { - "type": [ - "null", - "string" - ], - "name": "value", - "default": null, - "doc": "The value of the MLHyperParam" - }, - { - "type": [ - "null", - "long" - ], - "name": "createdAt", - "default": null, - "doc": "Date when the MLHyperParam was developed" - } - ], - "doc": "Properties associated with an ML Hyper Param" - } - } - ], - "name": "hyperParams", - "default": null, - "doc": "Hyperparameters of the MLModel" - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "Aspect": { - "name": "mlMetric" - }, - "name": "MLMetric", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": "string", - "name": "name", - "doc": "Name of the mlMetric" - }, - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the mlMetric" - }, - { - "type": [ - "null", - "string" - ], - "name": "value", - "default": null, - "doc": "The value of the mlMetric" - }, - { - "type": [ - "null", - "long" - ], - "name": "createdAt", - "default": null, - "doc": "Date when the mlMetric was developed" - } - ], - "doc": "Properties associated with an ML Metric" - } - } - ], - "name": "trainingMetrics", - "default": null, - "doc": "Metrics of the MLModel used in training" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "com.linkedin.pegasus2avro.ml.metadata.MLMetric" - } - ], - "name": "onlineMetrics", - "default": null, - "doc": "Metrics of the MLModel used in production" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "mlFeatures", - "default": null, - "doc": "List of features used for MLModel training" - }, - { - "type": { - "type": "array", - "items": "string" - }, - "name": "tags", - "default": [], - "doc": "Tags for the MLModel" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "mlModelDeployment" - ], - "name": "DeployedTo" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "deployments", - "default": null, - "doc": "Deployments for the MLModel" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "TrainedBy" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "trainingJobs", - "default": null, - "doc": "List of jobs (if any) used to train the model" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "UsedBy" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "downstreamJobs", - "default": null, - "doc": "List of jobs (if any) that use the model" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "mlModelGroup" - ], - "name": "MemberOf" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "groups", - "default": null, - "doc": "Groups the model belongs to" - } - ], - "doc": "Properties associated with a ML Model" - }, - { - "type": "record", - "Aspect": { - "name": "intendedUse" - }, - "name": "IntendedUse", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "primaryUses", - "default": null, - "doc": "Primary Use cases for the MLModel." - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "enum", - "name": "IntendedUserType", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "symbols": [ - "ENTERPRISE", - "HOBBY", - "ENTERTAINMENT" - ] - } - } - ], - "name": "primaryUsers", - "default": null, - "doc": "Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "outOfScopeUses", - "default": null, - "doc": "Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to." - } - ], - "doc": "Intended Use for the ML Model" - }, - { - "type": "record", - "Aspect": { - "name": "mlModelFactorPrompts" - }, - "name": "MLModelFactorPrompts", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "MLModelFactors", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "groups", - "default": null, - "doc": "Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances.\nFor human-centric machine learning MLModels, groups are people who share one or multiple characteristics." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "instrumentation", - "default": null, - "doc": "The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel.\nFor example, a face detection model may perform differently depending on the camera\u2019s hardware and software,\nincluding lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "environment", - "default": null, - "doc": "A further factor affecting MLModel performance is the environment in which it is deployed." - } - ], - "doc": "Factors affecting the performance of the MLModel." - } - } - ], - "name": "relevantFactors", - "default": null, - "doc": "What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "com.linkedin.pegasus2avro.ml.metadata.MLModelFactors" - } - ], - "name": "evaluationFactors", - "default": null, - "doc": "Which factors are being reported, and why were these chosen?" - } - ], - "doc": "Prompts which affect the performance of the MLModel" - }, - { - "type": "record", - "Aspect": { - "name": "mlModelMetrics" - }, - "name": "Metrics", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "performanceMeasures", - "default": null, - "doc": "Measures of MLModel performance" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "decisionThreshold", - "default": null, - "doc": "Decision Thresholds used (if any)?" - } - ], - "doc": "Metrics to be featured for the MLModel." - }, - { - "type": "record", - "Aspect": { - "name": "mlModelEvaluationData" - }, - "name": "EvaluationData", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "BaseData", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - }, - "type": "string", - "name": "dataset", - "doc": "What dataset were used in the MLModel?" - }, - { - "type": [ - "null", - "string" - ], - "name": "motivation", - "default": null, - "doc": "Why was this dataset chosen?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "preProcessing", - "default": null, - "doc": "How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?" - } - ], - "doc": "BaseData record" - } - }, - "name": "evaluationData", - "doc": "Details on the dataset(s) used for the quantitative analyses in the MLModel" - } - ], - "doc": "All referenced datasets would ideally point to any set of documents that provide visibility into the source and composition of the dataset." - }, - { - "type": "record", - "Aspect": { - "name": "mlModelTrainingData" - }, - "name": "TrainingData", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": { - "type": "array", - "items": "com.linkedin.pegasus2avro.ml.metadata.BaseData" - }, - "name": "trainingData", - "doc": "Details on the dataset(s) used for training the MLModel" - } - ], - "doc": "Ideally, the MLModel card would contain as much information about the training data as the evaluation data. However, there might be cases where it is not feasible to provide this level of detailed information about the training data. For example, the data may be proprietary, or require a non-disclosure agreement. In these cases, we advocate for basic details about the distributions over groups in the data, as well as any other details that could inform stakeholders on the kinds of biases the model may have encoded." - }, - { - "type": "record", - "Aspect": { - "name": "mlModelQuantitativeAnalyses" - }, - "name": "QuantitativeAnalyses", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "unitaryResults", - "default": null, - "doc": "Link to a dashboard with results showing how the MLModel performed with respect to each factor" - }, - { - "type": [ - "null", - "string" - ], - "name": "intersectionalResults", - "default": null, - "doc": "Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?" - } - ], - "doc": "Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative analyses should provide the results of evaluating the MLModel according to the chosen metrics, providing confidence interval values when possible." - }, - { - "type": "record", - "Aspect": { - "name": "mlModelEthicalConsiderations" - }, - "name": "EthicalConsiderations", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "data", - "default": null, - "doc": "Does the MLModel use any sensitive data (e.g., protected classes)?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "humanLife", - "default": null, - "doc": " Is the MLModel intended to inform decisions about matters central to human life or flourishing \u2013 e.g., health or safety? Or could it be used in such a way?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "mitigations", - "default": null, - "doc": "What risk mitigation strategies were used during MLModel development?" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "risksAndHarms", - "default": null, - "doc": "What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "useCases", - "default": null, - "doc": "Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section" - } - ], - "doc": "This section is intended to demonstrate the ethical considerations that went into MLModel development, surfacing ethical challenges and solutions to stakeholders." - }, - { - "type": "record", - "Aspect": { - "name": "mlModelCaveatsAndRecommendations" - }, - "name": "CaveatsAndRecommendations", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - { - "type": "record", - "name": "CaveatDetails", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - "boolean" - ], - "name": "needsFurtherTesting", - "default": null, - "doc": "Did the results suggest any further testing?" - }, - { - "type": [ - "null", - "string" - ], - "name": "caveatDescription", - "default": null, - "doc": "Caveat Description\nFor ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "groupsNotRepresented", - "default": null, - "doc": "Relevant groups that were not represented in the evaluation dataset?" - } - ], - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?" - } - ], - "name": "caveats", - "default": null, - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?" - }, - { - "type": [ - "null", - "string" - ], - "name": "recommendations", - "default": null, - "doc": "Recommendations on where this MLModel should be used." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "idealDatasetCharacteristics", - "default": null, - "doc": "Ideal characteristics of an evaluation dataset for this MLModel" - } - ], - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?" - }, - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - { - "type": "record", - "Aspect": { - "name": "sourceCode" - }, - "name": "SourceCode", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": { - "type": "array", - "items": { - "type": "record", - "name": "SourceCodeUrl", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": { - "type": "enum", - "name": "SourceCodeUrlType", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "symbols": [ - "ML_MODEL_SOURCE_CODE", - "TRAINING_PIPELINE_SOURCE_CODE", - "EVALUATION_PIPELINE_SOURCE_CODE" - ] - }, - "name": "type", - "doc": "Source Code Url Types" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": "string", - "name": "sourceCodeUrl", - "doc": "Source Code Url" - } - ], - "doc": "Source Code Url Entity" - } - }, - "name": "sourceCode", - "doc": "Source Code along with types" - } - ], - "doc": "Source Code" - }, - "com.linkedin.pegasus2avro.common.Status", - { - "type": "record", - "Aspect": { - "name": "cost" - }, - "name": "Cost", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": { - "type": "enum", - "symbolDocs": { - "ORG_COST_TYPE": "Org Cost Type to which the Cost of this entity should be attributed to" - }, - "name": "CostType", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "ORG_COST_TYPE" - ], - "doc": "Type of Cost Code" - }, - "name": "costType" - }, - { - "type": { - "type": "record", - "name": "CostCost", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "type": [ - "null", - "double" - ], - "name": "costId", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "costCode", - "default": null - }, - { - "type": { - "type": "enum", - "name": "CostCostDiscriminator", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "costId", - "costCode" - ] - }, - "name": "fieldDiscriminator", - "doc": "Contains the name of the field that has its value set." - } - ] - }, - "name": "cost" - } - ] - }, - { - "type": "record", - "Aspect": { - "name": "deprecation" - }, - "name": "Deprecation", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 0.5 - } - }, - "type": "boolean", - "name": "deprecated", - "doc": "Whether the entity is deprecated." - }, - { - "type": [ - "null", - "long" - ], - "name": "decommissionTime", - "default": null, - "doc": "The time user plan to decommission this entity." - }, - { - "type": "string", - "name": "note", - "doc": "Additional information about the entity deprecation plan, such as the wiki, doc, RB." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - }, - "type": "string", - "name": "actor", - "doc": "The corpuser URN which will be credited for modifying this deprecation content." - } - ], - "doc": "Deprecation status of an entity" - }, - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLModel. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "MLModel Snapshot entity details." - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlPrimaryKeyKey", - "name": "mlPrimaryKey" - }, - "name": "MLPrimaryKeySnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlPrimaryKeyKey" - }, - "name": "MLPrimaryKeyKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "featureNamespace", - "doc": "Namespace for the primary key" - }, - { - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the primary key" - } - ], - "doc": "Key for an MLPrimaryKey" - }, - { - "type": "record", - "Aspect": { - "name": "mlPrimaryKeyProperties" - }, - "name": "MLPrimaryKeyProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLPrimaryKey" - }, - { - "type": [ - "null", - { - "type": "enum", - "symbolDocs": { - "AUDIO": "Audio Data", - "BINARY": "Binary data is discrete data that can be in only one of two categories \u2014 either yes or no, 1 or 0, off or on, etc", - "BYTE": "Bytes data are binary-encoded values that can represent complex objects.", - "CONTINUOUS": "Continuous data are made of uncountable values, often the result of a measurement such as height, weight, age etc.", - "COUNT": "Count data is discrete whole number data \u2014 no negative numbers here.\nCount data often has many small values, such as zero and one.", - "IMAGE": "Image Data", - "INTERVAL": "Interval data has equal spaces between the numbers and does not represent a temporal pattern.\nExamples include percentages, temperatures, and income.", - "MAP": "Mapping Data Type ex: dict, map", - "NOMINAL": "Nominal data is made of discrete values with no numerical relationship between the different categories \u2014 mean and median are meaningless.\nAnimal species is one example. For example, pig is not higher than bird and lower than fish.", - "ORDINAL": "Ordinal data are discrete integers that can be ranked or sorted.\nFor example, the distance between first and second may not be the same as the distance between second and third.", - "SEQUENCE": "Sequence Data Type ex: list, tuple, range", - "SET": "Set Data Type ex: set, frozenset", - "TEXT": "Text Data", - "TIME": "Time data is a cyclical, repeating continuous form of data.\nThe relevant time features can be any period\u2014 daily, weekly, monthly, annual, etc.", - "UNKNOWN": "Unknown data are data that we don't know the type for.", - "USELESS": "Useless data is unique, discrete data with no potential relationship with the outcome variable.\nA useless feature has high cardinality. An example would be bank account numbers that were generated randomly.", - "VIDEO": "Video Data" - }, - "name": "MLFeatureDataType", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "USELESS", - "NOMINAL", - "ORDINAL", - "BINARY", - "COUNT", - "TIME", - "INTERVAL", - "IMAGE", - "VIDEO", - "AUDIO", - "TEXT", - "MAP", - "SEQUENCE", - "SET", - "CONTINUOUS", - "BYTE", - "UNKNOWN" - ], - "doc": "MLFeature Data Type" - } - ], - "name": "dataType", - "default": null, - "doc": "Data Type of the MLPrimaryKey" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "name": "version", - "default": null, - "doc": "Version of the MLPrimaryKey" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "DerivedFrom" - } - }, - "type": { - "type": "array", - "items": "string" - }, - "name": "sources", - "doc": "Source of the MLPrimaryKey" - } - ], - "doc": "Properties associated with a MLPrimaryKey" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLPrimaryKey. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ] - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlFeatureKey", - "name": "mlFeature" - }, - "name": "MLFeatureSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.MLFeatureUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlFeatureKey" - }, - "name": "MLFeatureKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "featureNamespace", - "doc": "Namespace for the feature" - }, - { - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the feature" - } - ], - "doc": "Key for an MLFeature" - }, - { - "type": "record", - "Aspect": { - "name": "mlFeatureProperties" - }, - "name": "MLFeatureProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLFeature" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.MLFeatureDataType" - ], - "name": "dataType", - "default": null, - "doc": "Data Type of the MLFeature" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "name": "version", - "default": null, - "doc": "Version of the MLFeature" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "DerivedFrom" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "sources", - "default": null, - "doc": "Source of the MLFeature" - } - ], - "doc": "Properties associated with a MLFeature" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLFeature. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ] - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlFeatureTableKey", - "name": "mlFeatureTable" - }, - "name": "MLFeatureTableSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlFeatureTableKey" - }, - "name": "MLFeatureTableKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Relationship": { - "entityTypes": [ - "dataPlatform" - ], - "name": "SourcePlatform" - }, - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "platform", - "doc": "Data platform urn associated with the feature table" - }, - { - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the feature table" - } - ], - "doc": "Key for an MLFeatureTable" - }, - { - "type": "record", - "Aspect": { - "name": "mlFeatureTableProperties" - }, - "name": "MLFeatureTableProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLFeatureTable" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "mlFeature" - ], - "name": "Contains" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "mlFeatures", - "default": null, - "doc": "List of features contained in the feature table" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "mlPrimaryKey" - ], - "name": "KeyedBy" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "mlPrimaryKeys", - "default": null, - "doc": "List of primary keys in the feature table (if multiple, assumed to act as a composite key)" - } - ], - "doc": "Properties associated with a MLFeatureTable" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ] - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlModelDeploymentKey", - "name": "mlModelDeployment" - }, - "name": "MLModelDeploymentSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlModelDeploymentKey" - }, - "name": "MLModelDeploymentKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "platform", - "doc": "Standardized platform urn for the model Deployment" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the MLModelDeployment" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": "com.linkedin.pegasus2avro.common.FabricType", - "name": "origin", - "doc": "Fabric type where model Deployment belongs to or where it was generated" - } - ], - "doc": "Key for an ML model deployment" - }, - { - "type": "record", - "Aspect": { - "name": "mlModelDeploymentProperties" - }, - "name": "MLModelDeploymentProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "externalUrl", - "default": null, - "doc": "URL where the reference exist" - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLModelDeployment" - }, - { - "type": [ - "null", - "long" - ], - "name": "createdAt", - "default": null, - "doc": "Date when the MLModelDeployment was developed" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "name": "version", - "default": null, - "doc": "Version of the MLModelDeployment" - }, - { - "type": [ - "null", - { - "type": "enum", - "symbolDocs": { - "CREATING": "Deployments being created.", - "DELETING": "Deployments being deleted.", - "FAILED": "Deployments with an error state.", - "IN_SERVICE": "Deployments that are active.", - "OUT_OF_SERVICE": "Deployments out of service.", - "ROLLING_BACK": "Deployments being reverted to a previous version.", - "UNKNOWN": "Deployments with unknown/unmappable state.", - "UPDATING": "Deployments being updated." - }, - "name": "DeploymentStatus", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "symbols": [ - "OUT_OF_SERVICE", - "CREATING", - "UPDATING", - "ROLLING_BACK", - "IN_SERVICE", - "DELETING", - "FAILED", - "UNKNOWN" - ], - "doc": "Model endpoint statuses" - } - ], - "name": "status", - "default": null, - "doc": "Status of the deployment" - } - ], - "doc": "Properties associated with an ML Model Deployment" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLModelDeployment. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ] - }, - { - "type": "record", - "Entity": { - "keyAspect": "mlModelGroupKey", - "name": "mlModelGroup" - }, - "name": "MLModelGroupSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "mlModelGroupKey" - }, - "name": "MLModelGroupKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "platform", - "doc": "Standardized platform urn for the model group" - }, - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "Name of the MLModelGroup" - }, - { - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - }, - "type": "com.linkedin.pegasus2avro.common.FabricType", - "name": "origin", - "doc": "Fabric type where model group belongs to or where it was generated" - } - ], - "doc": "Key for an ML model group" - }, - { - "type": "record", - "Aspect": { - "name": "mlModelGroupProperties" - }, - "name": "MLModelGroupProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "fields": [ - { - "Searchable": { - "/*": { - "queryByDefault": true - } - }, - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "Custom property bag." - }, - { - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - }, - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the MLModelGroup" - }, - { - "type": [ - "null", - "long" - ], - "name": "createdAt", - "default": null, - "doc": "Date when the MLModelGroup was developed" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "name": "version", - "default": null, - "doc": "Version of the MLModelGroup" - } - ], - "doc": "Properties associated with an ML Model Group" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the MLModelGroup. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ] - }, - { - "type": "record", - "Entity": { - "keyAspect": "tagKey", - "name": "tag" - }, - "name": "TagSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.TagUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "tagKey" - }, - "name": "TagKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name", - "doc": "The unique tag name" - } - ], - "doc": "Key for a Tag" - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "Aspect": { - "name": "tagProperties" - }, - "name": "TagProperties", - "namespace": "com.linkedin.pegasus2avro.tag", - "fields": [ - { - "type": "string", - "name": "name", - "doc": "Name of the tag" - }, - { - "type": [ - "null", - "string" - ], - "name": "description", - "default": null, - "doc": "Documentation of the tag" - } - ], - "doc": "Properties associated with a Tag" - }, - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific dataset entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "glossaryTermKey", - "name": "glossaryTerm" - }, - "name": "GlossaryTermSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "glossaryTermKey" - }, - "name": "GlossaryTermKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name" - } - ], - "doc": "Key for a GlossaryTerm" - }, - { - "type": "record", - "Aspect": { - "name": "glossaryTermInfo" - }, - "name": "GlossaryTermInfo", - "namespace": "com.linkedin.pegasus2avro.glossary", - "fields": [ - { - "Searchable": {}, - "type": "string", - "name": "definition", - "doc": "Definition of business term" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - }, - "type": [ - "null", - "string" - ], - "name": "parentNode", - "default": null, - "doc": "Parent node of the glossary term" - }, - { - "Searchable": { - "fieldType": "KEYWORD" - }, - "type": "string", - "name": "termSource", - "doc": "Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL" - }, - { - "Searchable": { - "fieldType": "KEYWORD" - }, - "type": [ - "null", - "string" - ], - "name": "sourceRef", - "default": null, - "doc": "External Reference to the business-term" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - }, - "type": [ - "null", - "string" - ], - "name": "sourceUrl", - "default": null, - "doc": "The abstracted URL such as https://spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument." - }, - { - "type": { - "type": "map", - "values": "string" - }, - "name": "customProperties", - "default": {}, - "doc": "A key-value map to capture any other non-standardized properties for the glossary term" - }, - { - "type": [ - "null", - "string" - ], - "name": "rawSchema", - "default": null, - "doc": "Schema definition of the glossary term" - } - ], - "doc": "Properties associated with a GlossaryTerm" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.BrowsePaths", - { - "type": "record", - "Aspect": { - "name": "glossaryRelatedTerms" - }, - "name": "GlossaryRelatedTerms", - "namespace": "com.linkedin.pegasus2avro.glossary", - "fields": [ - { - "Relationship": { - "/*": { - "entityTypes": [ - "glossaryTerm" - ], - "name": "IsA" - } - }, - "Searchable": { - "/*": { - "boostScore": 2.0, - "fieldName": "isRelatedTerms", - "fieldType": "URN" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "isRelatedTerms", - "default": null, - "doc": "The relationship Is A with glossary term" - }, - { - "Relationship": { - "/*": { - "entityTypes": [ - "glossaryTerm" - ], - "name": "HasA" - } - }, - "Searchable": { - "/*": { - "boostScore": 2.0, - "fieldName": "hasRelatedTerms", - "fieldType": "URN" - } - }, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "hasRelatedTerms", - "default": null, - "doc": "The relationship Has A with glossary term" - } - ], - "doc": "Has A / Is A lineage information about a glossary Term reporting the lineage" - } - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the GlossaryTerm. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific GlossaryTerm entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "glossaryNodeKey", - "name": "glossaryNode" - }, - "name": "GlossaryNodeSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "glossaryNodeKey" - }, - "name": "GlossaryNodeKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - }, - "type": "string", - "name": "name" - } - ], - "doc": "Key for a GlossaryNode" - }, - { - "type": "record", - "Aspect": { - "name": "glossaryNodeInfo" - }, - "name": "GlossaryNodeInfo", - "namespace": "com.linkedin.pegasus2avro.glossary", - "fields": [ - { - "Searchable": {}, - "type": "string", - "name": "definition", - "doc": "Definition of business node" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - }, - "type": [ - "null", - "string" - ], - "name": "parentNode", - "default": null, - "doc": "Parent node of the glossary term" - } - ], - "doc": "Properties associated with a GlossaryNode" - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the GlossaryNode. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific GlossaryNode entity." - }, - { - "type": "record", - "Entity": { - "keyAspect": "dataHubPolicyKey", - "name": "dataHubPolicy" - }, - "name": "DataHubPolicySnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "dataHubPolicyKey" - }, - "name": "DataHubPolicyKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "type": "string", - "name": "id", - "doc": "A unique id for the DataHub access policy record. Generated on the server side at policy creation time." - } - ], - "doc": "Key for a DataHub Policy" - }, - { - "type": "record", - "Aspect": { - "name": "dataHubPolicyInfo" - }, - "name": "DataHubPolicyInfo", - "namespace": "com.linkedin.pegasus2avro.policy", - "fields": [ - { - "type": "string", - "name": "displayName", - "doc": "Display name of the Policy" - }, - { - "type": "string", - "name": "description", - "doc": "Description of the Policy" - }, - { - "type": "string", - "name": "type", - "doc": "The type of policy" - }, - { - "type": "string", - "name": "state", - "doc": "The state of policy, ACTIVE or INACTIVE" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "DataHubResourceFilter", - "namespace": "com.linkedin.pegasus2avro.policy", - "fields": [ - { - "type": [ - "null", - "string" - ], - "name": "type", - "default": null, - "doc": "The type of resource that the policy applies to. This will most often be a data asset entity name, for\nexample 'dataset'. It is not strictly required because in the future we will want to support filtering a resource\nby domain, as well." - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "resources", - "default": null, - "doc": "A specific set of resources to apply the policy to, e.g. asset urns" - }, - { - "type": "boolean", - "name": "allResources", - "default": false, - "doc": "Whether the policy should be applied to all assets matching the filter." - } - ], - "doc": "Information used to filter DataHub resource." - } - ], - "name": "resources", - "default": null, - "doc": "The resource that the policy applies to. Not required for some 'Platform' privileges." - }, - { - "type": { - "type": "array", - "items": "string" - }, - "name": "privileges", - "doc": "The privileges that the policy grants." - }, - { - "type": { - "type": "record", - "name": "DataHubActorFilter", - "namespace": "com.linkedin.pegasus2avro.policy", - "fields": [ - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "users", - "default": null, - "doc": "A specific set of users to apply the policy to (disjunctive)" - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "groups", - "default": null, - "doc": "A specific set of groups to apply the policy to (disjunctive)" - }, - { - "type": "boolean", - "name": "resourceOwners", - "default": false, - "doc": "Whether the filter should return true for owners of a particular resource.\nOnly applies to policies of type 'Metadata', which have a resource associated with them." - }, - { - "type": "boolean", - "name": "allUsers", - "default": false, - "doc": "Whether the filter should apply to all users." - }, - { - "type": "boolean", - "name": "allGroups", - "default": false, - "doc": "Whether the filter should apply to all groups." - } - ], - "doc": "Information used to filter DataHub actors." - }, - "name": "actors", - "doc": "The actors that the policy applies to." - }, - { - "type": "boolean", - "name": "editable", - "default": true, - "doc": "Whether the policy should be editable via the UI" - } - ], - "doc": "Information about a DataHub (UI) access policy." - } - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the DataHub access policy." - } - ], - "doc": "A metadata snapshot for DataHub Access Policy data." - }, - { - "type": "record", - "Entity": { - "keyAspect": "schemaFieldKey", - "name": "schemaField" - }, - "name": "SchemaFieldSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "urn", - "doc": "URN for the entity the metadata snapshot is associated with." - }, - { - "type": { - "type": "array", - "items": [ - { - "type": "record", - "Aspect": { - "name": "schemaFieldKey" - }, - "name": "SchemaFieldKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "fields": [ - { - "Searchable": { - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "parent", - "doc": "Parent associated with the schema field" - }, - { - "Searchable": { - "fieldType": "KEYWORD" - }, - "type": "string", - "name": "fieldPath", - "doc": "fieldPath identifying the schema field" - } - ], - "doc": "Key for a SchemaField" - } - ] - }, - "name": "aspects", - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "doc": "A metadata snapshot for a specific schema field entity." - } - ], - "name": "proposedSnapshot", - "doc": "Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot." - }, - { - "type": [ - "null" - ], - "name": "proposedDelta", - "default": null, - "doc": "Delta of the proposed metadata partial update." - }, - { - "type": [ - "null", - { - "type": "record", - "name": "SystemMetadata", - "namespace": "com.linkedin.pegasus2avro.mxe", - "fields": [ - { - "type": [ - "long", - "null" - ], - "name": "lastObserved", - "default": 0, - "doc": "The timestamp the metadata was observed at" - }, - { - "type": [ - "string", - "null" - ], - "name": "runId", - "default": "no-run-id-provided", - "doc": "The run id that produced the metadata" - }, - { - "type": [ - "null", - { - "type": "map", - "values": "string" - } - ], - "name": "properties", - "default": null, - "doc": "Additional properties" - } - ], - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead." - } - ], - "name": "systemMetadata", - "default": null, - "doc": "Metadata around how the snapshot was ingested" - } - ], - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead." - }, - { - "type": "record", - "name": "MetadataChangeProposal", - "namespace": "com.linkedin.pegasus2avro.mxe", - "fields": [ - { - "type": [ - "null", - "com.linkedin.events.KafkaAuditHeader" - ], - "name": "auditHeader", - "default": null, - "doc": "Kafka audit header. See go/kafkaauditheader for more info." - }, - { - "type": "string", - "name": "entityType", - "doc": "Type of the entity being written to" - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": [ - "null", - "string" - ], - "name": "entityUrn", - "default": null, - "doc": "Urn of the entity being written\n" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "GenericAspect", - "namespace": "com.linkedin.pegasus2avro.mxe", - "fields": [ - { - "type": "bytes", - "name": "value" - }, - { - "type": "string", - "name": "contentType" - } - ], - "doc": "Generic record structure for serializing an Aspect\n" - } - ], - "name": "entityKeyAspect", - "default": null, - "doc": "Key aspect of the entity being written" - }, - { - "type": { - "type": "enum", - "symbolDocs": { - "CREATE": "NOT SUPPORTED YET\ninsert if not exists. otherwise fail", - "DELETE": "NOT SUPPORTED YET\ndelete action", - "PATCH": "NOT SUPPORTED YET\npatch the changes instead of full replace", - "UPDATE": "NOT SUPPORTED YET\nupdate if exists. otherwise fail", - "UPSERT": "insert if not exists. otherwise update" - }, - "name": "ChangeType", - "namespace": "com.linkedin.pegasus2avro.events.metadata", - "symbols": [ - "UPSERT", - "CREATE", - "UPDATE", - "DELETE", - "PATCH" - ], - "doc": "Descriptor for a change action" - }, - "name": "changeType", - "doc": "Type of change being proposed" - }, - { - "type": [ - "null", - "string" - ], - "name": "aspectName", - "default": null, - "doc": "Aspect of the entity being written to\nNot filling this out implies that the writer wants to affect the entire entity\nNote: This is only valid for CREATE and DELETE operations.\n" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.mxe.GenericAspect" - ], - "name": "aspect", - "default": null - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.mxe.SystemMetadata" - ], - "name": "systemMetadata", - "default": null, - "doc": "A string->string map of custom properties that one might want to attach to an event\n" - } - ], - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead." - }, - { - "type": "record", - "name": "UsageAggregation", - "namespace": "com.linkedin.pegasus2avro.usage", - "fields": [ - { - "type": "long", - "name": "bucket", - "doc": " Bucket start time in milliseconds " - }, - { - "type": { - "type": "enum", - "name": "WindowDuration", - "namespace": "com.linkedin.pegasus2avro.common", - "symbols": [ - "YEAR", - "MONTH", - "WEEK", - "DAY", - "HOUR" - ], - "doc": "Enum to define the length of a bucket when doing aggregations" - }, - "name": "duration", - "doc": " Bucket duration " - }, - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "resource", - "doc": " Resource associated with these usage stats " - }, - { - "type": { - "type": "record", - "name": "UsageAggregationMetrics", - "namespace": "com.linkedin.pegasus2avro.usage", - "fields": [ - { - "type": [ - "null", - "int" - ], - "name": "uniqueUserCount", - "default": null, - "doc": " Unique user count " - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "UserUsageCounts", - "namespace": "com.linkedin.pegasus2avro.usage", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": [ - "null", - "string" - ], - "name": "user", - "default": null - }, - { - "type": "int", - "name": "count" - }, - { - "type": [ - "null", - "string" - ], - "name": "userEmail", - "default": null, - "doc": " If user_email is set, we attempt to resolve the user's urn upon ingest " - } - ], - "doc": " Records a single user's usage counts for a given resource " - } - } - ], - "name": "users", - "default": null, - "doc": " Users within this bucket, with frequency counts " - }, - { - "type": [ - "null", - "int" - ], - "name": "totalSqlQueries", - "default": null, - "doc": " Total SQL query count " - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "topSqlQueries", - "default": null, - "doc": " Frequent SQL queries; mostly makes sense for datasets in SQL databases " - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "FieldUsageCounts", - "namespace": "com.linkedin.pegasus2avro.usage", - "fields": [ - { - "type": "string", - "name": "fieldName" - }, - { - "type": "int", - "name": "count" - } - ], - "doc": " Records field-level usage counts for a given resource " - } - } - ], - "name": "fields", - "default": null, - "doc": " Field-level usage stats " - } - ], - "doc": "Metrics for usage data for a given resource and bucket. Not all fields\nmake sense for all buckets, so every field is optional." - }, - "name": "metrics", - "doc": " Metrics associated with this bucket " - } - ], - "doc": "Usage data for a given resource, rolled up into a bucket." - }, - "com.linkedin.pegasus2avro.chart.ChartInfo", - "com.linkedin.pegasus2avro.chart.ChartQuery", - "com.linkedin.pegasus2avro.chart.EditableChartProperties", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.Cost", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.dashboard.DashboardInfo", - "com.linkedin.pegasus2avro.dashboard.EditableDashboardProperties", - "com.linkedin.pegasus2avro.datajob.DataFlowInfo", - "com.linkedin.pegasus2avro.datajob.DataJobInfo", - "com.linkedin.pegasus2avro.datajob.DataJobInputOutput", - "com.linkedin.pegasus2avro.datajob.EditableDataFlowProperties", - "com.linkedin.pegasus2avro.datajob.EditableDataJobProperties", - "com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo", - "com.linkedin.pegasus2avro.dataprocess.DataProcessInfo", - "com.linkedin.pegasus2avro.dataset.DatasetDeprecation", - { - "type": "record", - "Aspect": { - "name": "datasetProfile", - "type": "timeseries" - }, - "name": "DatasetProfile", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "long", - "name": "timestampMillis", - "doc": "The event timestamp field as epoch at UTC in milli seconds." - }, - { - "type": [ - "null", - { - "type": "record", - "name": "TimeWindowSize", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "fields": [ - { - "type": { - "type": "enum", - "name": "CalendarInterval", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "symbols": [ - "SECOND", - "MINUTE", - "HOUR", - "DAY", - "WEEK", - "MONTH", - "QUARTER", - "YEAR" - ] - }, - "name": "unit", - "doc": "Interval unit such as minute/hour/day etc." - }, - { - "type": "int", - "name": "multiple", - "default": 1, - "doc": "How many units. Defaults to 1." - } - ], - "doc": "Defines the size of a time window." - } - ], - "name": "eventGranularity", - "default": null, - "doc": "Granularity of the event if applicable" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "PartitionSpec", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "fields": [ - { - "type": "string", - "name": "partition", - "doc": "String representation of the partition" - }, - { - "type": [ - "null", - { - "type": "record", - "name": "TimeWindow", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "fields": [ - { - "type": "long", - "name": "startTimeMillis", - "doc": "Start time as epoch at UTC." - }, - { - "type": "com.linkedin.pegasus2avro.timeseries.TimeWindowSize", - "name": "length", - "doc": "The length of the window." - } - ] - } - ], - "name": "timePartition", - "default": null, - "doc": "Time window of the partition if applicable" - } - ], - "doc": "Defines how the data is partitioned" - } - ], - "name": "partitionSpec", - "default": null, - "doc": "The optional partition specification." - }, - { - "type": [ - "null", - "long" - ], - "name": "rowCount", - "default": null - }, - { - "type": [ - "null", - "long" - ], - "name": "columnCount", - "default": null - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldProfile", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "string", - "name": "fieldPath" - }, - { - "type": [ - "null", - "long" - ], - "name": "uniqueCount", - "default": null - }, - { - "type": [ - "null", - "float" - ], - "name": "uniqueProportion", - "default": null - }, - { - "type": [ - "null", - "long" - ], - "name": "nullCount", - "default": null - }, - { - "type": [ - "null", - "float" - ], - "name": "nullProportion", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "min", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "max", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "mean", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "median", - "default": null - }, - { - "type": [ - "null", - "string" - ], - "name": "stdev", - "default": null - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "Quantile", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "string", - "name": "quantile" - }, - { - "type": "string", - "name": "value" - } - ] - } - } - ], - "name": "quantiles", - "default": null - }, - { - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "ValueFrequency", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "string", - "name": "value" - }, - { - "type": "long", - "name": "frequency" - } - ] - } - } - ], - "name": "distinctValueFrequencies", - "default": null - }, - { - "type": [ - "null", - { - "type": "record", - "name": "Histogram", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": { - "type": "array", - "items": "string" - }, - "name": "boundaries" - }, - { - "type": { - "type": "array", - "items": "float" - }, - "name": "heights" - } - ] - } - ], - "name": "histogram", - "default": null - }, - { - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "sampleValues", - "default": null - } - ], - "doc": "Stats corresponding to fields in a dataset" - } - } - ], - "name": "fieldProfiles", - "default": null - } - ], - "doc": "Stats corresponding to datasets" - }, - "com.linkedin.pegasus2avro.dataset.DatasetProperties", - "com.linkedin.pegasus2avro.dataset.DatasetUpstreamLineage", - { - "type": "record", - "Aspect": { - "name": "datasetUsageStatistics", - "type": "timeseries" - }, - "name": "DatasetUsageStatistics", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "long", - "name": "timestampMillis", - "doc": "The event timestamp field as epoch at UTC in milli seconds." - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.timeseries.TimeWindowSize" - ], - "name": "eventGranularity", - "default": null, - "doc": "Granularity of the event if applicable" - }, - { - "type": [ - "null", - "com.linkedin.pegasus2avro.timeseries.PartitionSpec" - ], - "name": "partitionSpec", - "default": null, - "doc": "The optional partition specification." - }, - { - "TimeseriesField": {}, - "type": [ - "null", - "int" - ], - "name": "uniqueUserCount", - "default": null, - "doc": "Unique user count" - }, - { - "TimeseriesField": {}, - "type": [ - "null", - "int" - ], - "name": "totalSqlQueries", - "default": null, - "doc": "Total SQL query count" - }, - { - "TimeseriesField": {}, - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "name": "topSqlQueries", - "default": null, - "doc": "Frequent SQL queries; mostly makes sense for datasets in SQL databases" - }, - { - "TimeseriesFieldCollection": { - "key": "user" - }, - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetUserUsageCounts", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - }, - "type": "string", - "name": "user", - "doc": "The unique id of the user." - }, - { - "TimeseriesField": {}, - "type": "int", - "name": "count", - "doc": "Number of times the dataset has been used by the user." - }, - { - "TimeseriesField": {}, - "type": [ - "null", - "string" - ], - "name": "userEmail", - "default": null, - "doc": "If user_email is set, we attempt to resolve the user's urn upon ingest" - } - ], - "doc": "Records a single user's usage counts for a given resource" - } - } - ], - "name": "userCounts", - "default": null, - "doc": "Users within this bucket, with frequency counts" - }, - { - "TimeseriesFieldCollection": { - "key": "fieldPath" - }, - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldUsageCounts", - "namespace": "com.linkedin.pegasus2avro.dataset", - "fields": [ - { - "type": "string", - "name": "fieldPath", - "doc": "The name of the field." - }, - { - "TimeseriesField": {}, - "type": "int", - "name": "count", - "doc": "Number of times the field has been used." - } - ], - "doc": "Records field-level usage counts for a given dataset" - } - } - ], - "name": "fieldCounts", - "default": null, - "doc": "Field-level usage stats" - } - ], - "doc": "Stats corresponding to dataset's usage." - }, - "com.linkedin.pegasus2avro.dataset.EditableDatasetProperties", - "com.linkedin.pegasus2avro.dataset.UpstreamLineage", - "com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo", - "com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms", - "com.linkedin.pegasus2avro.glossary.GlossaryTermInfo", - "com.linkedin.pegasus2avro.identity.CorpGroupInfo", - "com.linkedin.pegasus2avro.identity.CorpUserEditableInfo", - "com.linkedin.pegasus2avro.identity.CorpUserInfo", - "com.linkedin.pegasus2avro.identity.GroupMembership", - "com.linkedin.pegasus2avro.metadata.key.ChartKey", - "com.linkedin.pegasus2avro.metadata.key.CorpGroupKey", - "com.linkedin.pegasus2avro.metadata.key.CorpUserKey", - "com.linkedin.pegasus2avro.metadata.key.DashboardKey", - "com.linkedin.pegasus2avro.metadata.key.DataFlowKey", - "com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey", - "com.linkedin.pegasus2avro.metadata.key.DataJobKey", - "com.linkedin.pegasus2avro.metadata.key.DataPlatformKey", - "com.linkedin.pegasus2avro.metadata.key.DataProcessKey", - "com.linkedin.pegasus2avro.metadata.key.DatasetKey", - "com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey", - "com.linkedin.pegasus2avro.metadata.key.GlossaryTermKey", - "com.linkedin.pegasus2avro.metadata.key.MLFeatureKey", - "com.linkedin.pegasus2avro.metadata.key.MLFeatureTableKey", - "com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey", - "com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey", - "com.linkedin.pegasus2avro.metadata.key.MLModelKey", - "com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey", - "com.linkedin.pegasus2avro.metadata.key.SchemaFieldKey", - "com.linkedin.pegasus2avro.metadata.key.TagKey", - "com.linkedin.pegasus2avro.ml.metadata.CaveatsAndRecommendations", - "com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations", - "com.linkedin.pegasus2avro.ml.metadata.EvaluationData", - "com.linkedin.pegasus2avro.ml.metadata.IntendedUse", - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties", - "com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties", - "com.linkedin.pegasus2avro.ml.metadata.MLHyperParam", - "com.linkedin.pegasus2avro.ml.metadata.MLMetric", - "com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties", - "com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts", - "com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties", - "com.linkedin.pegasus2avro.ml.metadata.MLModelProperties", - "com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties", - "com.linkedin.pegasus2avro.ml.metadata.Metrics", - "com.linkedin.pegasus2avro.ml.metadata.QuantitativeAnalyses", - "com.linkedin.pegasus2avro.ml.metadata.SourceCode", - "com.linkedin.pegasus2avro.ml.metadata.TrainingData", - "com.linkedin.pegasus2avro.policy.DataHubPolicyInfo", - "com.linkedin.pegasus2avro.schema.EditableSchemaMetadata", - "com.linkedin.pegasus2avro.schema.SchemaMetadata", - "com.linkedin.pegasus2avro.tag.TagProperties" -] \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schema_classes.py b/metadata-ingestion/src/datahub/metadata/schema_classes.py deleted file mode 100644 index 0578dd2ea4..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schema_classes.py +++ /dev/null @@ -1,10626 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off -import json -import os.path -import decimal -import datetime -import six -from avrogen.dict_wrapper import DictWrapper -from avrogen import avrojson -from avro.schema import RecordSchema, SchemaFromJSONData as make_avsc_object -from avro import schema as avro_schema -from typing import List, Dict, Union, Optional - - -def __read_file(file_name): - with open(file_name, "r") as f: - return f.read() - - -def __get_names_and_schema(json_str): - names = avro_schema.Names() - schema = make_avsc_object(json.loads(json_str), names) - return names, schema - - -SCHEMA_JSON_STR = __read_file(os.path.join(os.path.dirname(__file__), "schema.avsc")) - - -__NAMES, SCHEMA = __get_names_and_schema(SCHEMA_JSON_STR) -__SCHEMAS: Dict[str, RecordSchema] = {} - - -def get_schema_type(fullname): - return __SCHEMAS.get(fullname) - - -__SCHEMAS = dict((n.fullname.lstrip("."), n) for n in six.itervalues(__NAMES.names)) - -class KafkaAuditHeaderClass(DictWrapper): - """This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.events.KafkaAuditHeader") - def __init__(self, - time: int, - server: str, - appName: str, - messageId: bytes, - instance: Union[None, str]=None, - auditVersion: Union[None, int]=None, - fabricUrn: Union[None, str]=None, - clusterConnectionString: Union[None, str]=None, - ): - super().__init__() - - self.time = time - self.server = server - self.instance = instance - self.appName = appName - self.messageId = messageId - self.auditVersion = auditVersion - self.fabricUrn = fabricUrn - self.clusterConnectionString = clusterConnectionString - - @classmethod - def construct_with_defaults(cls) -> "KafkaAuditHeaderClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.time = int() - self.server = str() - self.instance = self.RECORD_SCHEMA.field_map["instance"].default - self.appName = str() - self.messageId = bytes() - self.auditVersion = self.RECORD_SCHEMA.field_map["auditVersion"].default - self.fabricUrn = self.RECORD_SCHEMA.field_map["fabricUrn"].default - self.clusterConnectionString = self.RECORD_SCHEMA.field_map["clusterConnectionString"].default - - - @property - def time(self) -> int: - """Getter: The time at which the event was emitted into kafka.""" - return self._inner_dict.get('time') # type: ignore - - @time.setter - def time(self, value: int) -> None: - """Setter: The time at which the event was emitted into kafka.""" - self._inner_dict['time'] = value - - - @property - def server(self) -> str: - """Getter: The fully qualified name of the host from which the event is being emitted.""" - return self._inner_dict.get('server') # type: ignore - - @server.setter - def server(self, value: str) -> None: - """Setter: The fully qualified name of the host from which the event is being emitted.""" - self._inner_dict['server'] = value - - - @property - def instance(self) -> Union[None, str]: - """Getter: The instance on the server from which the event is being emitted. e.g. i001""" - return self._inner_dict.get('instance') # type: ignore - - @instance.setter - def instance(self, value: Union[None, str]) -> None: - """Setter: The instance on the server from which the event is being emitted. e.g. i001""" - self._inner_dict['instance'] = value - - - @property - def appName(self) -> str: - """Getter: The name of the application from which the event is being emitted. see go/appname""" - return self._inner_dict.get('appName') # type: ignore - - @appName.setter - def appName(self, value: str) -> None: - """Setter: The name of the application from which the event is being emitted. see go/appname""" - self._inner_dict['appName'] = value - - - @property - def messageId(self) -> bytes: - """Getter: A unique identifier for the message""" - return self._inner_dict.get('messageId') # type: ignore - - @messageId.setter - def messageId(self, value: bytes) -> None: - """Setter: A unique identifier for the message""" - self._inner_dict['messageId'] = value - - - @property - def auditVersion(self) -> Union[None, int]: - """Getter: The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing""" - return self._inner_dict.get('auditVersion') # type: ignore - - @auditVersion.setter - def auditVersion(self, value: Union[None, int]) -> None: - """Setter: The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing""" - self._inner_dict['auditVersion'] = value - - - @property - def fabricUrn(self) -> Union[None, str]: - """Getter: The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric.""" - return self._inner_dict.get('fabricUrn') # type: ignore - - @fabricUrn.setter - def fabricUrn(self, value: Union[None, str]) -> None: - """Setter: The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric.""" - self._inner_dict['fabricUrn'] = value - - - @property - def clusterConnectionString(self) -> Union[None, str]: - """Getter: This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.""" - return self._inner_dict.get('clusterConnectionString') # type: ignore - - @clusterConnectionString.setter - def clusterConnectionString(self, value: Union[None, str]) -> None: - """Setter: This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.""" - self._inner_dict['clusterConnectionString'] = value - - -class ChartInfoClass(DictWrapper): - """Information about a chart""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.chart.ChartInfo") - def __init__(self, - title: str, - description: str, - lastModified: "ChangeAuditStampsClass", - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - chartUrl: Union[None, str]=None, - inputs: Union[None, List[str]]=None, - type: Union[None, Union[str, "ChartTypeClass"]]=None, - access: Union[None, Union[str, "AccessLevelClass"]]=None, - lastRefreshed: Union[None, int]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.title = title - self.description = description - self.lastModified = lastModified - self.chartUrl = chartUrl - self.inputs = inputs - self.type = type - self.access = access - self.lastRefreshed = lastRefreshed - - @classmethod - def construct_with_defaults(cls) -> "ChartInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.title = str() - self.description = str() - self.lastModified = ChangeAuditStampsClass.construct_with_defaults() - self.chartUrl = self.RECORD_SCHEMA.field_map["chartUrl"].default - self.inputs = self.RECORD_SCHEMA.field_map["inputs"].default - self.type = self.RECORD_SCHEMA.field_map["type"].default - self.access = self.RECORD_SCHEMA.field_map["access"].default - self.lastRefreshed = self.RECORD_SCHEMA.field_map["lastRefreshed"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def title(self) -> str: - """Getter: Title of the chart""" - return self._inner_dict.get('title') # type: ignore - - @title.setter - def title(self, value: str) -> None: - """Setter: Title of the chart""" - self._inner_dict['title'] = value - - - @property - def description(self) -> str: - """Getter: Detailed description about the chart""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: str) -> None: - """Setter: Detailed description about the chart""" - self._inner_dict['description'] = value - - - @property - def lastModified(self) -> "ChangeAuditStampsClass": - """Getter: Captures information about who created/last modified/deleted this chart and when""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "ChangeAuditStampsClass") -> None: - """Setter: Captures information about who created/last modified/deleted this chart and when""" - self._inner_dict['lastModified'] = value - - - @property - def chartUrl(self) -> Union[None, str]: - """Getter: URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart""" - return self._inner_dict.get('chartUrl') # type: ignore - - @chartUrl.setter - def chartUrl(self, value: Union[None, str]) -> None: - """Setter: URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart""" - self._inner_dict['chartUrl'] = value - - - @property - def inputs(self) -> Union[None, List[str]]: - """Getter: Data sources for the chart""" - return self._inner_dict.get('inputs') # type: ignore - - @inputs.setter - def inputs(self, value: Union[None, List[str]]) -> None: - """Setter: Data sources for the chart""" - self._inner_dict['inputs'] = value - - - @property - def type(self) -> Union[None, Union[str, "ChartTypeClass"]]: - """Getter: Type of the chart""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[None, Union[str, "ChartTypeClass"]]) -> None: - """Setter: Type of the chart""" - self._inner_dict['type'] = value - - - @property - def access(self) -> Union[None, Union[str, "AccessLevelClass"]]: - """Getter: Access level for the chart""" - return self._inner_dict.get('access') # type: ignore - - @access.setter - def access(self, value: Union[None, Union[str, "AccessLevelClass"]]) -> None: - """Setter: Access level for the chart""" - self._inner_dict['access'] = value - - - @property - def lastRefreshed(self) -> Union[None, int]: - """Getter: The time when this chart last refreshed""" - return self._inner_dict.get('lastRefreshed') # type: ignore - - @lastRefreshed.setter - def lastRefreshed(self, value: Union[None, int]) -> None: - """Setter: The time when this chart last refreshed""" - self._inner_dict['lastRefreshed'] = value - - -class ChartQueryClass(DictWrapper): - """Information for chart query which is used for getting data of the chart""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.chart.ChartQuery") - def __init__(self, - rawQuery: str, - type: Union[str, "ChartQueryTypeClass"], - ): - super().__init__() - - self.rawQuery = rawQuery - self.type = type - - @classmethod - def construct_with_defaults(cls) -> "ChartQueryClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.rawQuery = str() - self.type = ChartQueryTypeClass.LOOKML - - - @property - def rawQuery(self) -> str: - """Getter: Raw query to build a chart from input datasets""" - return self._inner_dict.get('rawQuery') # type: ignore - - @rawQuery.setter - def rawQuery(self, value: str) -> None: - """Setter: Raw query to build a chart from input datasets""" - self._inner_dict['rawQuery'] = value - - - @property - def type(self) -> Union[str, "ChartQueryTypeClass"]: - """Getter: Chart query type""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "ChartQueryTypeClass"]) -> None: - """Setter: Chart query type""" - self._inner_dict['type'] = value - - -class ChartQueryTypeClass(object): - # No docs available. - - - """LookML queries""" - LOOKML = "LOOKML" - - """SQL type queries""" - SQL = "SQL" - - -class ChartTypeClass(object): - """The various types of charts""" - - - """Chart showing a Bar chart""" - BAR = "BAR" - - """Chart showing a Pie chart""" - PIE = "PIE" - - """Chart showing a Scatter plot""" - SCATTER = "SCATTER" - - """Chart showing a table""" - TABLE = "TABLE" - - """Chart showing Markdown formatted text""" - TEXT = "TEXT" - - LINE = "LINE" - - AREA = "AREA" - - HISTOGRAM = "HISTOGRAM" - - BOX_PLOT = "BOX_PLOT" - - -class EditableChartPropertiesClass(DictWrapper): - """Stores editable changes made to properties. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.chart.EditableChartProperties") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - description: Union[None, str]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "EditableChartPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Edited documentation of the chart """ - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Edited documentation of the chart """ - self._inner_dict['description'] = value - - -class AccessLevelClass(object): - """The various access levels""" - - - """Publicly available access level""" - PUBLIC = "PUBLIC" - - """Private availability to certain set of users""" - PRIVATE = "PRIVATE" - - -class AuditStampClass(DictWrapper): - """Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.AuditStamp") - def __init__(self, - time: int, - actor: str, - impersonator: Union[None, str]=None, - ): - super().__init__() - - self.time = time - self.actor = actor - self.impersonator = impersonator - - @classmethod - def construct_with_defaults(cls) -> "AuditStampClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.time = int() - self.actor = str() - self.impersonator = self.RECORD_SCHEMA.field_map["impersonator"].default - - - @property - def time(self) -> int: - """Getter: When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent.""" - return self._inner_dict.get('time') # type: ignore - - @time.setter - def time(self, value: int) -> None: - """Setter: When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent.""" - self._inner_dict['time'] = value - - - @property - def actor(self) -> str: - """Getter: The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.""" - return self._inner_dict.get('actor') # type: ignore - - @actor.setter - def actor(self, value: str) -> None: - """Setter: The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.""" - self._inner_dict['actor'] = value - - - @property - def impersonator(self) -> Union[None, str]: - """Getter: The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.""" - return self._inner_dict.get('impersonator') # type: ignore - - @impersonator.setter - def impersonator(self, value: Union[None, str]) -> None: - """Setter: The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.""" - self._inner_dict['impersonator'] = value - - -class BrowsePathsClass(DictWrapper): - """Shared aspect containing Browse Paths to be indexed for an entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.BrowsePaths") - def __init__(self, - paths: List[str], - ): - super().__init__() - - self.paths = paths - - @classmethod - def construct_with_defaults(cls) -> "BrowsePathsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.paths = list() - - - @property - def paths(self) -> List[str]: - """Getter: A list of valid browse paths for the entity. - - Browse paths are expected to be backslash-separated strings. For example: 'prod/snowflake/datasetName'""" - return self._inner_dict.get('paths') # type: ignore - - @paths.setter - def paths(self, value: List[str]) -> None: - """Setter: A list of valid browse paths for the entity. - - Browse paths are expected to be backslash-separated strings. For example: 'prod/snowflake/datasetName'""" - self._inner_dict['paths'] = value - - -class ChangeAuditStampsClass(DictWrapper): - """Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into various lifecycle stages, and who acted to move it into those lifecycle stages. The recommended best practice is to include this record in your record schema, and annotate its fields as @readOnly in your resource. See https://github.com/linkedin/rest.li/wiki/Validation-in-Rest.li#restli-validation-annotations""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.ChangeAuditStamps") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - - @classmethod - def construct_with_defaults(cls) -> "ChangeAuditStampsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - -class CostClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.Cost") - def __init__(self, - costType: Union[str, "CostTypeClass"], - cost: "CostCostClass", - ): - super().__init__() - - self.costType = costType - self.cost = cost - - @classmethod - def construct_with_defaults(cls) -> "CostClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.costType = CostTypeClass.ORG_COST_TYPE - self.cost = CostCostClass.construct_with_defaults() - - - @property - def costType(self) -> Union[str, "CostTypeClass"]: - # No docs available. - return self._inner_dict.get('costType') # type: ignore - - @costType.setter - def costType(self, value: Union[str, "CostTypeClass"]) -> None: - # No docs available. - self._inner_dict['costType'] = value - - - @property - def cost(self) -> "CostCostClass": - # No docs available. - return self._inner_dict.get('cost') # type: ignore - - @cost.setter - def cost(self, value: "CostCostClass") -> None: - # No docs available. - self._inner_dict['cost'] = value - - -class CostCostClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.CostCost") - def __init__(self, - fieldDiscriminator: Union[str, "CostCostDiscriminatorClass"], - costId: Union[None, float]=None, - costCode: Union[None, str]=None, - ): - super().__init__() - - self.costId = costId - self.costCode = costCode - self.fieldDiscriminator = fieldDiscriminator - - @classmethod - def construct_with_defaults(cls) -> "CostCostClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.costId = self.RECORD_SCHEMA.field_map["costId"].default - self.costCode = self.RECORD_SCHEMA.field_map["costCode"].default - self.fieldDiscriminator = CostCostDiscriminatorClass.costId - - - @property - def costId(self) -> Union[None, float]: - # No docs available. - return self._inner_dict.get('costId') # type: ignore - - @costId.setter - def costId(self, value: Union[None, float]) -> None: - # No docs available. - self._inner_dict['costId'] = value - - - @property - def costCode(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('costCode') # type: ignore - - @costCode.setter - def costCode(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['costCode'] = value - - - @property - def fieldDiscriminator(self) -> Union[str, "CostCostDiscriminatorClass"]: - """Getter: Contains the name of the field that has its value set.""" - return self._inner_dict.get('fieldDiscriminator') # type: ignore - - @fieldDiscriminator.setter - def fieldDiscriminator(self, value: Union[str, "CostCostDiscriminatorClass"]) -> None: - """Setter: Contains the name of the field that has its value set.""" - self._inner_dict['fieldDiscriminator'] = value - - -class CostCostDiscriminatorClass(object): - # No docs available. - - costId = "costId" - costCode = "costCode" - - -class CostTypeClass(object): - """Type of Cost Code""" - - - """Org Cost Type to which the Cost of this entity should be attributed to""" - ORG_COST_TYPE = "ORG_COST_TYPE" - - -class DeprecationClass(DictWrapper): - """Deprecation status of an entity""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.Deprecation") - def __init__(self, - deprecated: bool, - note: str, - actor: str, - decommissionTime: Union[None, int]=None, - ): - super().__init__() - - self.deprecated = deprecated - self.decommissionTime = decommissionTime - self.note = note - self.actor = actor - - @classmethod - def construct_with_defaults(cls) -> "DeprecationClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.deprecated = bool() - self.decommissionTime = self.RECORD_SCHEMA.field_map["decommissionTime"].default - self.note = str() - self.actor = str() - - - @property - def deprecated(self) -> bool: - """Getter: Whether the entity is deprecated.""" - return self._inner_dict.get('deprecated') # type: ignore - - @deprecated.setter - def deprecated(self, value: bool) -> None: - """Setter: Whether the entity is deprecated.""" - self._inner_dict['deprecated'] = value - - - @property - def decommissionTime(self) -> Union[None, int]: - """Getter: The time user plan to decommission this entity.""" - return self._inner_dict.get('decommissionTime') # type: ignore - - @decommissionTime.setter - def decommissionTime(self, value: Union[None, int]) -> None: - """Setter: The time user plan to decommission this entity.""" - self._inner_dict['decommissionTime'] = value - - - @property - def note(self) -> str: - """Getter: Additional information about the entity deprecation plan, such as the wiki, doc, RB.""" - return self._inner_dict.get('note') # type: ignore - - @note.setter - def note(self, value: str) -> None: - """Setter: Additional information about the entity deprecation plan, such as the wiki, doc, RB.""" - self._inner_dict['note'] = value - - - @property - def actor(self) -> str: - """Getter: The corpuser URN which will be credited for modifying this deprecation content.""" - return self._inner_dict.get('actor') # type: ignore - - @actor.setter - def actor(self, value: str) -> None: - """Setter: The corpuser URN which will be credited for modifying this deprecation content.""" - self._inner_dict['actor'] = value - - -class FabricTypeClass(object): - """Fabric group type""" - - - """Designates development fabrics""" - DEV = "DEV" - - """Designates early-integration (staging) fabrics""" - EI = "EI" - - """Designates production fabrics""" - PROD = "PROD" - - """Designates corporation fabrics""" - CORP = "CORP" - - -class GlobalTagsClass(DictWrapper): - """Tag aspect used for applying tags to an entity""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.GlobalTags") - def __init__(self, - tags: List["TagAssociationClass"], - ): - super().__init__() - - self.tags = tags - - @classmethod - def construct_with_defaults(cls) -> "GlobalTagsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.tags = list() - - - @property - def tags(self) -> List["TagAssociationClass"]: - """Getter: Tags associated with a given entity""" - return self._inner_dict.get('tags') # type: ignore - - @tags.setter - def tags(self, value: List["TagAssociationClass"]) -> None: - """Setter: Tags associated with a given entity""" - self._inner_dict['tags'] = value - - -class GlossaryTermAssociationClass(DictWrapper): - """Properties of an applied glossary term.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.GlossaryTermAssociation") - def __init__(self, - urn: str, - ): - super().__init__() - - self.urn = urn - - @classmethod - def construct_with_defaults(cls) -> "GlossaryTermAssociationClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - - - @property - def urn(self) -> str: - """Getter: Urn of the applied glossary term""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: Urn of the applied glossary term""" - self._inner_dict['urn'] = value - - -class GlossaryTermsClass(DictWrapper): - """Related business terms information""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.GlossaryTerms") - def __init__(self, - terms: List["GlossaryTermAssociationClass"], - auditStamp: "AuditStampClass", - ): - super().__init__() - - self.terms = terms - self.auditStamp = auditStamp - - @classmethod - def construct_with_defaults(cls) -> "GlossaryTermsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.terms = list() - self.auditStamp = AuditStampClass.construct_with_defaults() - - - @property - def terms(self) -> List["GlossaryTermAssociationClass"]: - """Getter: The related business terms""" - return self._inner_dict.get('terms') # type: ignore - - @terms.setter - def terms(self, value: List["GlossaryTermAssociationClass"]) -> None: - """Setter: The related business terms""" - self._inner_dict['terms'] = value - - - @property - def auditStamp(self) -> "AuditStampClass": - """Getter: Audit stamp containing who reported the related business term""" - return self._inner_dict.get('auditStamp') # type: ignore - - @auditStamp.setter - def auditStamp(self, value: "AuditStampClass") -> None: - """Setter: Audit stamp containing who reported the related business term""" - self._inner_dict['auditStamp'] = value - - -class InstitutionalMemoryClass(DictWrapper): - """Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.InstitutionalMemory") - def __init__(self, - elements: List["InstitutionalMemoryMetadataClass"], - ): - super().__init__() - - self.elements = elements - - @classmethod - def construct_with_defaults(cls) -> "InstitutionalMemoryClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.elements = list() - - - @property - def elements(self) -> List["InstitutionalMemoryMetadataClass"]: - """Getter: List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record.""" - return self._inner_dict.get('elements') # type: ignore - - @elements.setter - def elements(self, value: List["InstitutionalMemoryMetadataClass"]) -> None: - """Setter: List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record.""" - self._inner_dict['elements'] = value - - -class InstitutionalMemoryMetadataClass(DictWrapper): - """Metadata corresponding to a record of institutional memory.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.InstitutionalMemoryMetadata") - def __init__(self, - url: str, - description: str, - createStamp: "AuditStampClass", - ): - super().__init__() - - self.url = url - self.description = description - self.createStamp = createStamp - - @classmethod - def construct_with_defaults(cls) -> "InstitutionalMemoryMetadataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.url = str() - self.description = str() - self.createStamp = AuditStampClass.construct_with_defaults() - - - @property - def url(self) -> str: - """Getter: Link to an engineering design document or a wiki page.""" - return self._inner_dict.get('url') # type: ignore - - @url.setter - def url(self, value: str) -> None: - """Setter: Link to an engineering design document or a wiki page.""" - self._inner_dict['url'] = value - - - @property - def description(self) -> str: - """Getter: Description of the link.""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: str) -> None: - """Setter: Description of the link.""" - self._inner_dict['description'] = value - - - @property - def createStamp(self) -> "AuditStampClass": - """Getter: Audit stamp associated with creation of this record""" - return self._inner_dict.get('createStamp') # type: ignore - - @createStamp.setter - def createStamp(self, value: "AuditStampClass") -> None: - """Setter: Audit stamp associated with creation of this record""" - self._inner_dict['createStamp'] = value - - -class MLFeatureDataTypeClass(object): - """MLFeature Data Type""" - - - """Useless data is unique, discrete data with no potential relationship with the outcome variable. - A useless feature has high cardinality. An example would be bank account numbers that were generated randomly.""" - USELESS = "USELESS" - - """Nominal data is made of discrete values with no numerical relationship between the different categories — mean and median are meaningless. - Animal species is one example. For example, pig is not higher than bird and lower than fish.""" - NOMINAL = "NOMINAL" - - """Ordinal data are discrete integers that can be ranked or sorted. - For example, the distance between first and second may not be the same as the distance between second and third.""" - ORDINAL = "ORDINAL" - - """Binary data is discrete data that can be in only one of two categories — either yes or no, 1 or 0, off or on, etc""" - BINARY = "BINARY" - - """Count data is discrete whole number data — no negative numbers here. - Count data often has many small values, such as zero and one.""" - COUNT = "COUNT" - - """Time data is a cyclical, repeating continuous form of data. - The relevant time features can be any period— daily, weekly, monthly, annual, etc.""" - TIME = "TIME" - - """Interval data has equal spaces between the numbers and does not represent a temporal pattern. - Examples include percentages, temperatures, and income.""" - INTERVAL = "INTERVAL" - - """Image Data""" - IMAGE = "IMAGE" - - """Video Data""" - VIDEO = "VIDEO" - - """Audio Data""" - AUDIO = "AUDIO" - - """Text Data""" - TEXT = "TEXT" - - """Mapping Data Type ex: dict, map""" - MAP = "MAP" - - """Sequence Data Type ex: list, tuple, range""" - SEQUENCE = "SEQUENCE" - - """Set Data Type ex: set, frozenset""" - SET = "SET" - - """Continuous data are made of uncountable values, often the result of a measurement such as height, weight, age etc.""" - CONTINUOUS = "CONTINUOUS" - - """Bytes data are binary-encoded values that can represent complex objects.""" - BYTE = "BYTE" - - """Unknown data are data that we don't know the type for.""" - UNKNOWN = "UNKNOWN" - - -class OwnerClass(DictWrapper): - """Ownership information""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.Owner") - def __init__(self, - owner: str, - type: Union[str, "OwnershipTypeClass"], - source: Union[None, "OwnershipSourceClass"]=None, - ): - super().__init__() - - self.owner = owner - self.type = type - self.source = source - - @classmethod - def construct_with_defaults(cls) -> "OwnerClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.owner = str() - self.type = OwnershipTypeClass.DEVELOPER - self.source = self.RECORD_SCHEMA.field_map["source"].default - - - @property - def owner(self) -> str: - """Getter: Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name - (Caveat: only corpuser is currently supported in the frontend.)""" - return self._inner_dict.get('owner') # type: ignore - - @owner.setter - def owner(self, value: str) -> None: - """Setter: Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name - (Caveat: only corpuser is currently supported in the frontend.)""" - self._inner_dict['owner'] = value - - - @property - def type(self) -> Union[str, "OwnershipTypeClass"]: - """Getter: The type of the ownership""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "OwnershipTypeClass"]) -> None: - """Setter: The type of the ownership""" - self._inner_dict['type'] = value - - - @property - def source(self) -> Union[None, "OwnershipSourceClass"]: - """Getter: Source information for the ownership""" - return self._inner_dict.get('source') # type: ignore - - @source.setter - def source(self, value: Union[None, "OwnershipSourceClass"]) -> None: - """Setter: Source information for the ownership""" - self._inner_dict['source'] = value - - -class OwnershipClass(DictWrapper): - """Ownership information of an entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.Ownership") - def __init__(self, - owners: List["OwnerClass"], - lastModified: Optional["AuditStampClass"]=None, - ): - super().__init__() - - self.owners = owners - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - - @classmethod - def construct_with_defaults(cls) -> "OwnershipClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.owners = list() - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - - - @property - def owners(self) -> List["OwnerClass"]: - """Getter: List of owners of the entity.""" - return self._inner_dict.get('owners') # type: ignore - - @owners.setter - def owners(self, value: List["OwnerClass"]) -> None: - """Setter: List of owners of the entity.""" - self._inner_dict['owners'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data.""" - self._inner_dict['lastModified'] = value - - -class OwnershipSourceClass(DictWrapper): - """Source/provider of the ownership information""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.OwnershipSource") - def __init__(self, - type: Union[str, "OwnershipSourceTypeClass"], - url: Union[None, str]=None, - ): - super().__init__() - - self.type = type - self.url = url - - @classmethod - def construct_with_defaults(cls) -> "OwnershipSourceClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.type = OwnershipSourceTypeClass.AUDIT - self.url = self.RECORD_SCHEMA.field_map["url"].default - - - @property - def type(self) -> Union[str, "OwnershipSourceTypeClass"]: - """Getter: The type of the source""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "OwnershipSourceTypeClass"]) -> None: - """Setter: The type of the source""" - self._inner_dict['type'] = value - - - @property - def url(self) -> Union[None, str]: - """Getter: A reference URL for the source""" - return self._inner_dict.get('url') # type: ignore - - @url.setter - def url(self, value: Union[None, str]) -> None: - """Setter: A reference URL for the source""" - self._inner_dict['url'] = value - - -class OwnershipSourceTypeClass(object): - # No docs available. - - - """Auditing system or audit logs""" - AUDIT = "AUDIT" - - """Database, e.g. GRANTS table""" - DATABASE = "DATABASE" - - """File system, e.g. file/directory owner""" - FILE_SYSTEM = "FILE_SYSTEM" - - """Issue tracking system, e.g. Jira""" - ISSUE_TRACKING_SYSTEM = "ISSUE_TRACKING_SYSTEM" - - """Manually provided by a user""" - MANUAL = "MANUAL" - - """Other ownership-like service, e.g. Nuage, ACL service etc""" - SERVICE = "SERVICE" - - """SCM system, e.g. GIT, SVN""" - SOURCE_CONTROL = "SOURCE_CONTROL" - - """Other sources""" - OTHER = "OTHER" - - -class OwnershipTypeClass(object): - """Owner category or owner role""" - - - """A person or group that is in charge of developing the code""" - DEVELOPER = "DEVELOPER" - - """A person or group that is owning the data""" - DATAOWNER = "DATAOWNER" - - """A person or a group that overseas the operation, e.g. a DBA or SRE.""" - DELEGATE = "DELEGATE" - - """A person, group, or service that produces/generates the data""" - PRODUCER = "PRODUCER" - - """A person, group, or service that consumes the data""" - CONSUMER = "CONSUMER" - - """A person or a group that has direct business interest""" - STAKEHOLDER = "STAKEHOLDER" - - -class StatusClass(DictWrapper): - """The status metadata of an entity, e.g. dataset, metric, feature, etc. - This aspect is used to represent soft deletes conventionally.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.Status") - def __init__(self, - removed: Optional[bool]=None, - ): - super().__init__() - - if removed is None: - # default: False - self.removed = self.RECORD_SCHEMA.field_map["removed"].default - else: - self.removed = removed - - @classmethod - def construct_with_defaults(cls) -> "StatusClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.removed = self.RECORD_SCHEMA.field_map["removed"].default - - - @property - def removed(self) -> bool: - """Getter: whether the entity is removed or not""" - return self._inner_dict.get('removed') # type: ignore - - @removed.setter - def removed(self, value: bool) -> None: - """Setter: whether the entity is removed or not""" - self._inner_dict['removed'] = value - - -class TagAssociationClass(DictWrapper): - """Properties of an applied tag. For now, just an Urn. In the future we can extend this with other properties, e.g. - propagation parameters.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.TagAssociation") - def __init__(self, - tag: str, - ): - super().__init__() - - self.tag = tag - - @classmethod - def construct_with_defaults(cls) -> "TagAssociationClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.tag = str() - - - @property - def tag(self) -> str: - """Getter: Urn of the applied tag""" - return self._inner_dict.get('tag') # type: ignore - - @tag.setter - def tag(self, value: str) -> None: - """Setter: Urn of the applied tag""" - self._inner_dict['tag'] = value - - -class VersionTagClass(DictWrapper): - """A resource-defined string representing the resource state for the purpose of concurrency control""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.VersionTag") - def __init__(self, - versionTag: Union[None, str]=None, - ): - super().__init__() - - self.versionTag = versionTag - - @classmethod - def construct_with_defaults(cls) -> "VersionTagClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.versionTag = self.RECORD_SCHEMA.field_map["versionTag"].default - - - @property - def versionTag(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('versionTag') # type: ignore - - @versionTag.setter - def versionTag(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['versionTag'] = value - - -class WindowDurationClass(object): - """Enum to define the length of a bucket when doing aggregations""" - - YEAR = "YEAR" - MONTH = "MONTH" - WEEK = "WEEK" - DAY = "DAY" - HOUR = "HOUR" - - -class TransformationTypeClass(object): - """Type of the transformation involved in generating destination fields from source fields.""" - - - """Field transformation expressed as unknown black box function.""" - BLACKBOX = "BLACKBOX" - - """Field transformation expressed as Identity function.""" - IDENTITY = "IDENTITY" - - -class UDFTransformerClass(DictWrapper): - """Field transformation expressed in UDF""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.common.fieldtransformer.UDFTransformer") - def __init__(self, - udf: str, - ): - super().__init__() - - self.udf = udf - - @classmethod - def construct_with_defaults(cls) -> "UDFTransformerClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.udf = str() - - - @property - def udf(self) -> str: - """Getter: A UDF mentioning how the source fields got transformed to destination field. This is the FQCN(Fully Qualified Class Name) of the udf.""" - return self._inner_dict.get('udf') # type: ignore - - @udf.setter - def udf(self, value: str) -> None: - """Setter: A UDF mentioning how the source fields got transformed to destination field. This is the FQCN(Fully Qualified Class Name) of the udf.""" - self._inner_dict['udf'] = value - - -class DashboardInfoClass(DictWrapper): - """Information about a dashboard""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dashboard.DashboardInfo") - def __init__(self, - title: str, - description: str, - lastModified: "ChangeAuditStampsClass", - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - charts: Optional[List[str]]=None, - dashboardUrl: Union[None, str]=None, - access: Union[None, Union[str, "AccessLevelClass"]]=None, - lastRefreshed: Union[None, int]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.title = title - self.description = description - if charts is None: - # default: [] - self.charts = list() - else: - self.charts = charts - self.lastModified = lastModified - self.dashboardUrl = dashboardUrl - self.access = access - self.lastRefreshed = lastRefreshed - - @classmethod - def construct_with_defaults(cls) -> "DashboardInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.title = str() - self.description = str() - self.charts = list() - self.lastModified = ChangeAuditStampsClass.construct_with_defaults() - self.dashboardUrl = self.RECORD_SCHEMA.field_map["dashboardUrl"].default - self.access = self.RECORD_SCHEMA.field_map["access"].default - self.lastRefreshed = self.RECORD_SCHEMA.field_map["lastRefreshed"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def title(self) -> str: - """Getter: Title of the dashboard""" - return self._inner_dict.get('title') # type: ignore - - @title.setter - def title(self, value: str) -> None: - """Setter: Title of the dashboard""" - self._inner_dict['title'] = value - - - @property - def description(self) -> str: - """Getter: Detailed description about the dashboard""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: str) -> None: - """Setter: Detailed description about the dashboard""" - self._inner_dict['description'] = value - - - @property - def charts(self) -> List[str]: - """Getter: Charts in a dashboard""" - return self._inner_dict.get('charts') # type: ignore - - @charts.setter - def charts(self, value: List[str]) -> None: - """Setter: Charts in a dashboard""" - self._inner_dict['charts'] = value - - - @property - def lastModified(self) -> "ChangeAuditStampsClass": - """Getter: Captures information about who created/last modified/deleted this dashboard and when""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "ChangeAuditStampsClass") -> None: - """Setter: Captures information about who created/last modified/deleted this dashboard and when""" - self._inner_dict['lastModified'] = value - - - @property - def dashboardUrl(self) -> Union[None, str]: - """Getter: URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard""" - return self._inner_dict.get('dashboardUrl') # type: ignore - - @dashboardUrl.setter - def dashboardUrl(self, value: Union[None, str]) -> None: - """Setter: URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard""" - self._inner_dict['dashboardUrl'] = value - - - @property - def access(self) -> Union[None, Union[str, "AccessLevelClass"]]: - """Getter: Access level for the dashboard""" - return self._inner_dict.get('access') # type: ignore - - @access.setter - def access(self, value: Union[None, Union[str, "AccessLevelClass"]]) -> None: - """Setter: Access level for the dashboard""" - self._inner_dict['access'] = value - - - @property - def lastRefreshed(self) -> Union[None, int]: - """Getter: The time when this dashboard last refreshed""" - return self._inner_dict.get('lastRefreshed') # type: ignore - - @lastRefreshed.setter - def lastRefreshed(self, value: Union[None, int]) -> None: - """Setter: The time when this dashboard last refreshed""" - self._inner_dict['lastRefreshed'] = value - - -class EditableDashboardPropertiesClass(DictWrapper): - """Stores editable changes made to properties. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dashboard.EditableDashboardProperties") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - description: Union[None, str]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "EditableDashboardPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Edited documentation of the dashboard""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Edited documentation of the dashboard""" - self._inner_dict['description'] = value - - -class DataFlowInfoClass(DictWrapper): - """Information about a Data processing flow""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.datajob.DataFlowInfo") - def __init__(self, - name: str, - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - description: Union[None, str]=None, - project: Union[None, str]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.name = name - self.description = description - self.project = project - - @classmethod - def construct_with_defaults(cls) -> "DataFlowInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.name = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.project = self.RECORD_SCHEMA.field_map["project"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def name(self) -> str: - """Getter: Flow name""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Flow name""" - self._inner_dict['name'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Flow description""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Flow description""" - self._inner_dict['description'] = value - - - @property - def project(self) -> Union[None, str]: - """Getter: Optional project/namespace associated with the flow""" - return self._inner_dict.get('project') # type: ignore - - @project.setter - def project(self, value: Union[None, str]) -> None: - """Setter: Optional project/namespace associated with the flow""" - self._inner_dict['project'] = value - - -class DataJobInfoClass(DictWrapper): - """Information about a Data processing job""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.datajob.DataJobInfo") - def __init__(self, - name: str, - type: Union[Union[str, "AzkabanJobTypeClass"], str], - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - description: Union[None, str]=None, - flowUrn: Union[None, str]=None, - status: Union[None, Union[str, "JobStatusClass"]]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.name = name - self.description = description - self.type = type - self.flowUrn = flowUrn - self.status = status - - @classmethod - def construct_with_defaults(cls) -> "DataJobInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.name = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.type = AzkabanJobTypeClass.COMMAND - self.flowUrn = self.RECORD_SCHEMA.field_map["flowUrn"].default - self.status = self.RECORD_SCHEMA.field_map["status"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def name(self) -> str: - """Getter: Job name""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Job name""" - self._inner_dict['name'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Job description""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Job description""" - self._inner_dict['description'] = value - - - @property - def type(self) -> Union[Union[str, "AzkabanJobTypeClass"], str]: - """Getter: Datajob type - **NOTE**: AzkabanJobType is deprecated. Please use strings instead.""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[Union[str, "AzkabanJobTypeClass"], str]) -> None: - """Setter: Datajob type - **NOTE**: AzkabanJobType is deprecated. Please use strings instead.""" - self._inner_dict['type'] = value - - - @property - def flowUrn(self) -> Union[None, str]: - """Getter: DataFlow urn that this job is part of""" - return self._inner_dict.get('flowUrn') # type: ignore - - @flowUrn.setter - def flowUrn(self, value: Union[None, str]) -> None: - """Setter: DataFlow urn that this job is part of""" - self._inner_dict['flowUrn'] = value - - - @property - def status(self) -> Union[None, Union[str, "JobStatusClass"]]: - """Getter: Status of the job""" - return self._inner_dict.get('status') # type: ignore - - @status.setter - def status(self, value: Union[None, Union[str, "JobStatusClass"]]) -> None: - """Setter: Status of the job""" - self._inner_dict['status'] = value - - -class DataJobInputOutputClass(DictWrapper): - """Information about the inputs and outputs of a Data processing job""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.datajob.DataJobInputOutput") - def __init__(self, - inputDatasets: List[str], - outputDatasets: List[str], - inputDatajobs: Union[None, List[str]]=None, - ): - super().__init__() - - self.inputDatasets = inputDatasets - self.outputDatasets = outputDatasets - self.inputDatajobs = inputDatajobs - - @classmethod - def construct_with_defaults(cls) -> "DataJobInputOutputClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.inputDatasets = list() - self.outputDatasets = list() - self.inputDatajobs = self.RECORD_SCHEMA.field_map["inputDatajobs"].default - - - @property - def inputDatasets(self) -> List[str]: - """Getter: Input datasets consumed by the data job during processing""" - return self._inner_dict.get('inputDatasets') # type: ignore - - @inputDatasets.setter - def inputDatasets(self, value: List[str]) -> None: - """Setter: Input datasets consumed by the data job during processing""" - self._inner_dict['inputDatasets'] = value - - - @property - def outputDatasets(self) -> List[str]: - """Getter: Output datasets produced by the data job during processing""" - return self._inner_dict.get('outputDatasets') # type: ignore - - @outputDatasets.setter - def outputDatasets(self, value: List[str]) -> None: - """Setter: Output datasets produced by the data job during processing""" - self._inner_dict['outputDatasets'] = value - - - @property - def inputDatajobs(self) -> Union[None, List[str]]: - """Getter: Input datajobs that this data job depends on""" - return self._inner_dict.get('inputDatajobs') # type: ignore - - @inputDatajobs.setter - def inputDatajobs(self, value: Union[None, List[str]]) -> None: - """Setter: Input datajobs that this data job depends on""" - self._inner_dict['inputDatajobs'] = value - - -class EditableDataFlowPropertiesClass(DictWrapper): - """Stores editable changes made to properties. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.datajob.EditableDataFlowProperties") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - description: Union[None, str]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "EditableDataFlowPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Edited documentation of the data flow""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Edited documentation of the data flow""" - self._inner_dict['description'] = value - - -class EditableDataJobPropertiesClass(DictWrapper): - """Stores editable changes made to properties. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.datajob.EditableDataJobProperties") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - description: Union[None, str]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "EditableDataJobPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Edited documentation of the data job """ - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Edited documentation of the data job """ - self._inner_dict['description'] = value - - -class JobStatusClass(object): - """Job statuses""" - - - """Jobs being initialized.""" - STARTING = "STARTING" - - """Jobs currently running.""" - IN_PROGRESS = "IN_PROGRESS" - - """Jobs being stopped.""" - STOPPING = "STOPPING" - - """Jobs that have stopped.""" - STOPPED = "STOPPED" - - """Jobs with successful completion.""" - COMPLETED = "COMPLETED" - - """Jobs that have failed.""" - FAILED = "FAILED" - - """Jobs with unknown status (either unmappable or unavailable)""" - UNKNOWN = "UNKNOWN" - - -class AzkabanJobTypeClass(object): - """The various types of support azkaban jobs""" - - - """The command job type is one of the basic built-in types. It runs multiple UNIX commands using java processbuilder. - Upon execution, Azkaban spawns off a process to run the command.""" - COMMAND = "COMMAND" - - """Runs a java program with ability to access Hadoop cluster. - https://azkaban.readthedocs.io/en/latest/jobTypes.html#java-job-type""" - HADOOP_JAVA = "HADOOP_JAVA" - - """In large part, this is the same Command type. The difference is its ability to talk to a Hadoop cluster - securely, via Hadoop tokens.""" - HADOOP_SHELL = "HADOOP_SHELL" - - """Hive type is for running Hive jobs.""" - HIVE = "HIVE" - - """Pig type is for running Pig jobs.""" - PIG = "PIG" - - """SQL is for running Presto, mysql queries etc""" - SQL = "SQL" - - """Glue type is for running AWS Glue job transforms.""" - GLUE = "GLUE" - - -class DataPlatformInfoClass(DictWrapper): - """Information about a data platform""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo") - def __init__(self, - name: str, - type: Union[str, "PlatformTypeClass"], - datasetNameDelimiter: str, - displayName: Union[None, str]=None, - logoUrl: Union[None, str]=None, - ): - super().__init__() - - self.name = name - self.displayName = displayName - self.type = type - self.datasetNameDelimiter = datasetNameDelimiter - self.logoUrl = logoUrl - - @classmethod - def construct_with_defaults(cls) -> "DataPlatformInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.displayName = self.RECORD_SCHEMA.field_map["displayName"].default - self.type = PlatformTypeClass.FILE_SYSTEM - self.datasetNameDelimiter = str() - self.logoUrl = self.RECORD_SCHEMA.field_map["logoUrl"].default - - - @property - def name(self) -> str: - """Getter: Name of the data platform""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the data platform""" - self._inner_dict['name'] = value - - - @property - def displayName(self) -> Union[None, str]: - """Getter: The name that will be used for displaying a platform type.""" - return self._inner_dict.get('displayName') # type: ignore - - @displayName.setter - def displayName(self, value: Union[None, str]) -> None: - """Setter: The name that will be used for displaying a platform type.""" - self._inner_dict['displayName'] = value - - - @property - def type(self) -> Union[str, "PlatformTypeClass"]: - """Getter: Platform type this data platform describes""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "PlatformTypeClass"]) -> None: - """Setter: Platform type this data platform describes""" - self._inner_dict['type'] = value - - - @property - def datasetNameDelimiter(self) -> str: - """Getter: The delimiter in the dataset names on the data platform, e.g. '/' for HDFS and '.' for Oracle""" - return self._inner_dict.get('datasetNameDelimiter') # type: ignore - - @datasetNameDelimiter.setter - def datasetNameDelimiter(self, value: str) -> None: - """Setter: The delimiter in the dataset names on the data platform, e.g. '/' for HDFS and '.' for Oracle""" - self._inner_dict['datasetNameDelimiter'] = value - - - @property - def logoUrl(self) -> Union[None, str]: - """Getter: The URL for a logo associated with the platform""" - return self._inner_dict.get('logoUrl') # type: ignore - - @logoUrl.setter - def logoUrl(self, value: Union[None, str]) -> None: - """Setter: The URL for a logo associated with the platform""" - self._inner_dict['logoUrl'] = value - - -class PlatformTypeClass(object): - """Platform types available at LinkedIn""" - - - """Value for a file system, e.g. hdfs""" - FILE_SYSTEM = "FILE_SYSTEM" - - """Value for a key value store, e.g. espresso, voldemort""" - KEY_VALUE_STORE = "KEY_VALUE_STORE" - - """Value for a message broker, e.g. kafka""" - MESSAGE_BROKER = "MESSAGE_BROKER" - - """Value for an object store, e.g. ambry""" - OBJECT_STORE = "OBJECT_STORE" - - """Value for an OLAP datastore, e.g. pinot""" - OLAP_DATASTORE = "OLAP_DATASTORE" - - """Value for other platforms, e.g salesforce, dovetail""" - OTHERS = "OTHERS" - - """Value for a query engine, e.g. presto""" - QUERY_ENGINE = "QUERY_ENGINE" - - """Value for a relational database, e.g. oracle, mysql""" - RELATIONAL_DB = "RELATIONAL_DB" - - """Value for a search engine, e.g seas""" - SEARCH_ENGINE = "SEARCH_ENGINE" - - -class DataProcessInfoClass(DictWrapper): - """The inputs and outputs of this data process""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataprocess.DataProcessInfo") - def __init__(self, - inputs: Union[None, List[str]]=None, - outputs: Union[None, List[str]]=None, - ): - super().__init__() - - self.inputs = inputs - self.outputs = outputs - - @classmethod - def construct_with_defaults(cls) -> "DataProcessInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.inputs = self.RECORD_SCHEMA.field_map["inputs"].default - self.outputs = self.RECORD_SCHEMA.field_map["outputs"].default - - - @property - def inputs(self) -> Union[None, List[str]]: - """Getter: the inputs of the data process""" - return self._inner_dict.get('inputs') # type: ignore - - @inputs.setter - def inputs(self, value: Union[None, List[str]]) -> None: - """Setter: the inputs of the data process""" - self._inner_dict['inputs'] = value - - - @property - def outputs(self) -> Union[None, List[str]]: - """Getter: the outputs of the data process""" - return self._inner_dict.get('outputs') # type: ignore - - @outputs.setter - def outputs(self, value: Union[None, List[str]]) -> None: - """Setter: the outputs of the data process""" - self._inner_dict['outputs'] = value - - -class DatasetDeprecationClass(DictWrapper): - """Dataset deprecation status""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetDeprecation") - def __init__(self, - deprecated: bool, - note: str, - decommissionTime: Union[None, int]=None, - actor: Union[None, str]=None, - ): - super().__init__() - - self.deprecated = deprecated - self.decommissionTime = decommissionTime - self.note = note - self.actor = actor - - @classmethod - def construct_with_defaults(cls) -> "DatasetDeprecationClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.deprecated = bool() - self.decommissionTime = self.RECORD_SCHEMA.field_map["decommissionTime"].default - self.note = str() - self.actor = self.RECORD_SCHEMA.field_map["actor"].default - - - @property - def deprecated(self) -> bool: - """Getter: Whether the dataset is deprecated by owner.""" - return self._inner_dict.get('deprecated') # type: ignore - - @deprecated.setter - def deprecated(self, value: bool) -> None: - """Setter: Whether the dataset is deprecated by owner.""" - self._inner_dict['deprecated'] = value - - - @property - def decommissionTime(self) -> Union[None, int]: - """Getter: The time user plan to decommission this dataset.""" - return self._inner_dict.get('decommissionTime') # type: ignore - - @decommissionTime.setter - def decommissionTime(self, value: Union[None, int]) -> None: - """Setter: The time user plan to decommission this dataset.""" - self._inner_dict['decommissionTime'] = value - - - @property - def note(self) -> str: - """Getter: Additional information about the dataset deprecation plan, such as the wiki, doc, RB.""" - return self._inner_dict.get('note') # type: ignore - - @note.setter - def note(self, value: str) -> None: - """Setter: Additional information about the dataset deprecation plan, such as the wiki, doc, RB.""" - self._inner_dict['note'] = value - - - @property - def actor(self) -> Union[None, str]: - """Getter: The corpuser URN which will be credited for modifying this deprecation content.""" - return self._inner_dict.get('actor') # type: ignore - - @actor.setter - def actor(self, value: Union[None, str]) -> None: - """Setter: The corpuser URN which will be credited for modifying this deprecation content.""" - self._inner_dict['actor'] = value - - -class DatasetFieldMappingClass(DictWrapper): - """Representation of mapping between fields in source dataset to the field in destination dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetFieldMapping") - def __init__(self, - created: "AuditStampClass", - transformation: Union[Union[str, "TransformationTypeClass"], "UDFTransformerClass"], - sourceFields: List[str], - destinationField: str, - ): - super().__init__() - - self.created = created - self.transformation = transformation - self.sourceFields = sourceFields - self.destinationField = destinationField - - @classmethod - def construct_with_defaults(cls) -> "DatasetFieldMappingClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = AuditStampClass.construct_with_defaults() - self.transformation = TransformationTypeClass.BLACKBOX - self.sourceFields = list() - self.destinationField = str() - - - @property - def created(self) -> "AuditStampClass": - """Getter: Audit stamp containing who reported the field mapping and when""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: Audit stamp containing who reported the field mapping and when""" - self._inner_dict['created'] = value - - - @property - def transformation(self) -> Union[Union[str, "TransformationTypeClass"], "UDFTransformerClass"]: - """Getter: Transfomration function between the fields involved""" - return self._inner_dict.get('transformation') # type: ignore - - @transformation.setter - def transformation(self, value: Union[Union[str, "TransformationTypeClass"], "UDFTransformerClass"]) -> None: - """Setter: Transfomration function between the fields involved""" - self._inner_dict['transformation'] = value - - - @property - def sourceFields(self) -> List[str]: - """Getter: Source fields from which the fine grained lineage is derived""" - return self._inner_dict.get('sourceFields') # type: ignore - - @sourceFields.setter - def sourceFields(self, value: List[str]) -> None: - """Setter: Source fields from which the fine grained lineage is derived""" - self._inner_dict['sourceFields'] = value - - - @property - def destinationField(self) -> str: - """Getter: Destination field which is derived from source fields""" - return self._inner_dict.get('destinationField') # type: ignore - - @destinationField.setter - def destinationField(self, value: str) -> None: - """Setter: Destination field which is derived from source fields""" - self._inner_dict['destinationField'] = value - - -class DatasetFieldProfileClass(DictWrapper): - """Stats corresponding to fields in a dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetFieldProfile") - def __init__(self, - fieldPath: str, - uniqueCount: Union[None, int]=None, - uniqueProportion: Union[None, float]=None, - nullCount: Union[None, int]=None, - nullProportion: Union[None, float]=None, - min: Union[None, str]=None, - max: Union[None, str]=None, - mean: Union[None, str]=None, - median: Union[None, str]=None, - stdev: Union[None, str]=None, - quantiles: Union[None, List["QuantileClass"]]=None, - distinctValueFrequencies: Union[None, List["ValueFrequencyClass"]]=None, - histogram: Union[None, "HistogramClass"]=None, - sampleValues: Union[None, List[str]]=None, - ): - super().__init__() - - self.fieldPath = fieldPath - self.uniqueCount = uniqueCount - self.uniqueProportion = uniqueProportion - self.nullCount = nullCount - self.nullProportion = nullProportion - self.min = min - self.max = max - self.mean = mean - self.median = median - self.stdev = stdev - self.quantiles = quantiles - self.distinctValueFrequencies = distinctValueFrequencies - self.histogram = histogram - self.sampleValues = sampleValues - - @classmethod - def construct_with_defaults(cls) -> "DatasetFieldProfileClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldPath = str() - self.uniqueCount = self.RECORD_SCHEMA.field_map["uniqueCount"].default - self.uniqueProportion = self.RECORD_SCHEMA.field_map["uniqueProportion"].default - self.nullCount = self.RECORD_SCHEMA.field_map["nullCount"].default - self.nullProportion = self.RECORD_SCHEMA.field_map["nullProportion"].default - self.min = self.RECORD_SCHEMA.field_map["min"].default - self.max = self.RECORD_SCHEMA.field_map["max"].default - self.mean = self.RECORD_SCHEMA.field_map["mean"].default - self.median = self.RECORD_SCHEMA.field_map["median"].default - self.stdev = self.RECORD_SCHEMA.field_map["stdev"].default - self.quantiles = self.RECORD_SCHEMA.field_map["quantiles"].default - self.distinctValueFrequencies = self.RECORD_SCHEMA.field_map["distinctValueFrequencies"].default - self.histogram = self.RECORD_SCHEMA.field_map["histogram"].default - self.sampleValues = self.RECORD_SCHEMA.field_map["sampleValues"].default - - - @property - def fieldPath(self) -> str: - # No docs available. - return self._inner_dict.get('fieldPath') # type: ignore - - @fieldPath.setter - def fieldPath(self, value: str) -> None: - # No docs available. - self._inner_dict['fieldPath'] = value - - - @property - def uniqueCount(self) -> Union[None, int]: - # No docs available. - return self._inner_dict.get('uniqueCount') # type: ignore - - @uniqueCount.setter - def uniqueCount(self, value: Union[None, int]) -> None: - # No docs available. - self._inner_dict['uniqueCount'] = value - - - @property - def uniqueProportion(self) -> Union[None, float]: - # No docs available. - return self._inner_dict.get('uniqueProportion') # type: ignore - - @uniqueProportion.setter - def uniqueProportion(self, value: Union[None, float]) -> None: - # No docs available. - self._inner_dict['uniqueProportion'] = value - - - @property - def nullCount(self) -> Union[None, int]: - # No docs available. - return self._inner_dict.get('nullCount') # type: ignore - - @nullCount.setter - def nullCount(self, value: Union[None, int]) -> None: - # No docs available. - self._inner_dict['nullCount'] = value - - - @property - def nullProportion(self) -> Union[None, float]: - # No docs available. - return self._inner_dict.get('nullProportion') # type: ignore - - @nullProportion.setter - def nullProportion(self, value: Union[None, float]) -> None: - # No docs available. - self._inner_dict['nullProportion'] = value - - - @property - def min(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('min') # type: ignore - - @min.setter - def min(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['min'] = value - - - @property - def max(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('max') # type: ignore - - @max.setter - def max(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['max'] = value - - - @property - def mean(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('mean') # type: ignore - - @mean.setter - def mean(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['mean'] = value - - - @property - def median(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('median') # type: ignore - - @median.setter - def median(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['median'] = value - - - @property - def stdev(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('stdev') # type: ignore - - @stdev.setter - def stdev(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['stdev'] = value - - - @property - def quantiles(self) -> Union[None, List["QuantileClass"]]: - # No docs available. - return self._inner_dict.get('quantiles') # type: ignore - - @quantiles.setter - def quantiles(self, value: Union[None, List["QuantileClass"]]) -> None: - # No docs available. - self._inner_dict['quantiles'] = value - - - @property - def distinctValueFrequencies(self) -> Union[None, List["ValueFrequencyClass"]]: - # No docs available. - return self._inner_dict.get('distinctValueFrequencies') # type: ignore - - @distinctValueFrequencies.setter - def distinctValueFrequencies(self, value: Union[None, List["ValueFrequencyClass"]]) -> None: - # No docs available. - self._inner_dict['distinctValueFrequencies'] = value - - - @property - def histogram(self) -> Union[None, "HistogramClass"]: - # No docs available. - return self._inner_dict.get('histogram') # type: ignore - - @histogram.setter - def histogram(self, value: Union[None, "HistogramClass"]) -> None: - # No docs available. - self._inner_dict['histogram'] = value - - - @property - def sampleValues(self) -> Union[None, List[str]]: - # No docs available. - return self._inner_dict.get('sampleValues') # type: ignore - - @sampleValues.setter - def sampleValues(self, value: Union[None, List[str]]) -> None: - # No docs available. - self._inner_dict['sampleValues'] = value - - -class DatasetFieldUsageCountsClass(DictWrapper): - """Records field-level usage counts for a given dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetFieldUsageCounts") - def __init__(self, - fieldPath: str, - count: int, - ): - super().__init__() - - self.fieldPath = fieldPath - self.count = count - - @classmethod - def construct_with_defaults(cls) -> "DatasetFieldUsageCountsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldPath = str() - self.count = int() - - - @property - def fieldPath(self) -> str: - """Getter: The name of the field.""" - return self._inner_dict.get('fieldPath') # type: ignore - - @fieldPath.setter - def fieldPath(self, value: str) -> None: - """Setter: The name of the field.""" - self._inner_dict['fieldPath'] = value - - - @property - def count(self) -> int: - """Getter: Number of times the field has been used.""" - return self._inner_dict.get('count') # type: ignore - - @count.setter - def count(self, value: int) -> None: - """Setter: Number of times the field has been used.""" - self._inner_dict['count'] = value - - -class DatasetLineageTypeClass(object): - """The various types of supported dataset lineage""" - - - """Direct copy without modification""" - COPY = "COPY" - - """Transformed data with modification (format or content change)""" - TRANSFORMED = "TRANSFORMED" - - """Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources""" - VIEW = "VIEW" - - -class DatasetProfileClass(DictWrapper): - """Stats corresponding to datasets""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetProfile") - def __init__(self, - timestampMillis: int, - eventGranularity: Union[None, "TimeWindowSizeClass"]=None, - partitionSpec: Union[None, "PartitionSpecClass"]=None, - rowCount: Union[None, int]=None, - columnCount: Union[None, int]=None, - fieldProfiles: Union[None, List["DatasetFieldProfileClass"]]=None, - ): - super().__init__() - - self.timestampMillis = timestampMillis - self.eventGranularity = eventGranularity - self.partitionSpec = partitionSpec - self.rowCount = rowCount - self.columnCount = columnCount - self.fieldProfiles = fieldProfiles - - @classmethod - def construct_with_defaults(cls) -> "DatasetProfileClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.timestampMillis = int() - self.eventGranularity = self.RECORD_SCHEMA.field_map["eventGranularity"].default - self.partitionSpec = self.RECORD_SCHEMA.field_map["partitionSpec"].default - self.rowCount = self.RECORD_SCHEMA.field_map["rowCount"].default - self.columnCount = self.RECORD_SCHEMA.field_map["columnCount"].default - self.fieldProfiles = self.RECORD_SCHEMA.field_map["fieldProfiles"].default - - - @property - def timestampMillis(self) -> int: - """Getter: The event timestamp field as epoch at UTC in milli seconds.""" - return self._inner_dict.get('timestampMillis') # type: ignore - - @timestampMillis.setter - def timestampMillis(self, value: int) -> None: - """Setter: The event timestamp field as epoch at UTC in milli seconds.""" - self._inner_dict['timestampMillis'] = value - - - @property - def eventGranularity(self) -> Union[None, "TimeWindowSizeClass"]: - """Getter: Granularity of the event if applicable""" - return self._inner_dict.get('eventGranularity') # type: ignore - - @eventGranularity.setter - def eventGranularity(self, value: Union[None, "TimeWindowSizeClass"]) -> None: - """Setter: Granularity of the event if applicable""" - self._inner_dict['eventGranularity'] = value - - - @property - def partitionSpec(self) -> Union[None, "PartitionSpecClass"]: - """Getter: The optional partition specification.""" - return self._inner_dict.get('partitionSpec') # type: ignore - - @partitionSpec.setter - def partitionSpec(self, value: Union[None, "PartitionSpecClass"]) -> None: - """Setter: The optional partition specification.""" - self._inner_dict['partitionSpec'] = value - - - @property - def rowCount(self) -> Union[None, int]: - # No docs available. - return self._inner_dict.get('rowCount') # type: ignore - - @rowCount.setter - def rowCount(self, value: Union[None, int]) -> None: - # No docs available. - self._inner_dict['rowCount'] = value - - - @property - def columnCount(self) -> Union[None, int]: - # No docs available. - return self._inner_dict.get('columnCount') # type: ignore - - @columnCount.setter - def columnCount(self, value: Union[None, int]) -> None: - # No docs available. - self._inner_dict['columnCount'] = value - - - @property - def fieldProfiles(self) -> Union[None, List["DatasetFieldProfileClass"]]: - # No docs available. - return self._inner_dict.get('fieldProfiles') # type: ignore - - @fieldProfiles.setter - def fieldProfiles(self, value: Union[None, List["DatasetFieldProfileClass"]]) -> None: - # No docs available. - self._inner_dict['fieldProfiles'] = value - - -class DatasetPropertiesClass(DictWrapper): - """Properties associated with a Dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetProperties") - def __init__(self, - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - description: Union[None, str]=None, - uri: Union[None, str]=None, - tags: Optional[List[str]]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.description = description - self.uri = uri - if tags is None: - # default: [] - self.tags = list() - else: - self.tags = tags - - @classmethod - def construct_with_defaults(cls) -> "DatasetPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.uri = self.RECORD_SCHEMA.field_map["uri"].default - self.tags = list() - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the dataset""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the dataset""" - self._inner_dict['description'] = value - - - @property - def uri(self) -> Union[None, str]: - """Getter: The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).""" - return self._inner_dict.get('uri') # type: ignore - - @uri.setter - def uri(self, value: Union[None, str]) -> None: - """Setter: The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).""" - self._inner_dict['uri'] = value - - - @property - def tags(self) -> List[str]: - """Getter: [Legacy] Unstructured tags for the dataset. Structured tags can be applied via the `GlobalTags` aspect.""" - return self._inner_dict.get('tags') # type: ignore - - @tags.setter - def tags(self, value: List[str]) -> None: - """Setter: [Legacy] Unstructured tags for the dataset. Structured tags can be applied via the `GlobalTags` aspect.""" - self._inner_dict['tags'] = value - - -class DatasetUpstreamLineageClass(DictWrapper): - """Fine Grained upstream lineage for fields in a dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetUpstreamLineage") - def __init__(self, - fieldMappings: List["DatasetFieldMappingClass"], - ): - super().__init__() - - self.fieldMappings = fieldMappings - - @classmethod - def construct_with_defaults(cls) -> "DatasetUpstreamLineageClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldMappings = list() - - - @property - def fieldMappings(self) -> List["DatasetFieldMappingClass"]: - """Getter: Upstream to downstream field level lineage mappings""" - return self._inner_dict.get('fieldMappings') # type: ignore - - @fieldMappings.setter - def fieldMappings(self, value: List["DatasetFieldMappingClass"]) -> None: - """Setter: Upstream to downstream field level lineage mappings""" - self._inner_dict['fieldMappings'] = value - - -class DatasetUsageStatisticsClass(DictWrapper): - """Stats corresponding to dataset's usage.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetUsageStatistics") - def __init__(self, - timestampMillis: int, - eventGranularity: Union[None, "TimeWindowSizeClass"]=None, - partitionSpec: Union[None, "PartitionSpecClass"]=None, - uniqueUserCount: Union[None, int]=None, - totalSqlQueries: Union[None, int]=None, - topSqlQueries: Union[None, List[str]]=None, - userCounts: Union[None, List["DatasetUserUsageCountsClass"]]=None, - fieldCounts: Union[None, List["DatasetFieldUsageCountsClass"]]=None, - ): - super().__init__() - - self.timestampMillis = timestampMillis - self.eventGranularity = eventGranularity - self.partitionSpec = partitionSpec - self.uniqueUserCount = uniqueUserCount - self.totalSqlQueries = totalSqlQueries - self.topSqlQueries = topSqlQueries - self.userCounts = userCounts - self.fieldCounts = fieldCounts - - @classmethod - def construct_with_defaults(cls) -> "DatasetUsageStatisticsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.timestampMillis = int() - self.eventGranularity = self.RECORD_SCHEMA.field_map["eventGranularity"].default - self.partitionSpec = self.RECORD_SCHEMA.field_map["partitionSpec"].default - self.uniqueUserCount = self.RECORD_SCHEMA.field_map["uniqueUserCount"].default - self.totalSqlQueries = self.RECORD_SCHEMA.field_map["totalSqlQueries"].default - self.topSqlQueries = self.RECORD_SCHEMA.field_map["topSqlQueries"].default - self.userCounts = self.RECORD_SCHEMA.field_map["userCounts"].default - self.fieldCounts = self.RECORD_SCHEMA.field_map["fieldCounts"].default - - - @property - def timestampMillis(self) -> int: - """Getter: The event timestamp field as epoch at UTC in milli seconds.""" - return self._inner_dict.get('timestampMillis') # type: ignore - - @timestampMillis.setter - def timestampMillis(self, value: int) -> None: - """Setter: The event timestamp field as epoch at UTC in milli seconds.""" - self._inner_dict['timestampMillis'] = value - - - @property - def eventGranularity(self) -> Union[None, "TimeWindowSizeClass"]: - """Getter: Granularity of the event if applicable""" - return self._inner_dict.get('eventGranularity') # type: ignore - - @eventGranularity.setter - def eventGranularity(self, value: Union[None, "TimeWindowSizeClass"]) -> None: - """Setter: Granularity of the event if applicable""" - self._inner_dict['eventGranularity'] = value - - - @property - def partitionSpec(self) -> Union[None, "PartitionSpecClass"]: - """Getter: The optional partition specification.""" - return self._inner_dict.get('partitionSpec') # type: ignore - - @partitionSpec.setter - def partitionSpec(self, value: Union[None, "PartitionSpecClass"]) -> None: - """Setter: The optional partition specification.""" - self._inner_dict['partitionSpec'] = value - - - @property - def uniqueUserCount(self) -> Union[None, int]: - """Getter: Unique user count""" - return self._inner_dict.get('uniqueUserCount') # type: ignore - - @uniqueUserCount.setter - def uniqueUserCount(self, value: Union[None, int]) -> None: - """Setter: Unique user count""" - self._inner_dict['uniqueUserCount'] = value - - - @property - def totalSqlQueries(self) -> Union[None, int]: - """Getter: Total SQL query count""" - return self._inner_dict.get('totalSqlQueries') # type: ignore - - @totalSqlQueries.setter - def totalSqlQueries(self, value: Union[None, int]) -> None: - """Setter: Total SQL query count""" - self._inner_dict['totalSqlQueries'] = value - - - @property - def topSqlQueries(self) -> Union[None, List[str]]: - """Getter: Frequent SQL queries; mostly makes sense for datasets in SQL databases""" - return self._inner_dict.get('topSqlQueries') # type: ignore - - @topSqlQueries.setter - def topSqlQueries(self, value: Union[None, List[str]]) -> None: - """Setter: Frequent SQL queries; mostly makes sense for datasets in SQL databases""" - self._inner_dict['topSqlQueries'] = value - - - @property - def userCounts(self) -> Union[None, List["DatasetUserUsageCountsClass"]]: - """Getter: Users within this bucket, with frequency counts""" - return self._inner_dict.get('userCounts') # type: ignore - - @userCounts.setter - def userCounts(self, value: Union[None, List["DatasetUserUsageCountsClass"]]) -> None: - """Setter: Users within this bucket, with frequency counts""" - self._inner_dict['userCounts'] = value - - - @property - def fieldCounts(self) -> Union[None, List["DatasetFieldUsageCountsClass"]]: - """Getter: Field-level usage stats""" - return self._inner_dict.get('fieldCounts') # type: ignore - - @fieldCounts.setter - def fieldCounts(self, value: Union[None, List["DatasetFieldUsageCountsClass"]]) -> None: - """Setter: Field-level usage stats""" - self._inner_dict['fieldCounts'] = value - - -class DatasetUserUsageCountsClass(DictWrapper): - """Records a single user's usage counts for a given resource""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.DatasetUserUsageCounts") - def __init__(self, - user: str, - count: int, - userEmail: Union[None, str]=None, - ): - super().__init__() - - self.user = user - self.count = count - self.userEmail = userEmail - - @classmethod - def construct_with_defaults(cls) -> "DatasetUserUsageCountsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.user = str() - self.count = int() - self.userEmail = self.RECORD_SCHEMA.field_map["userEmail"].default - - - @property - def user(self) -> str: - """Getter: The unique id of the user.""" - return self._inner_dict.get('user') # type: ignore - - @user.setter - def user(self, value: str) -> None: - """Setter: The unique id of the user.""" - self._inner_dict['user'] = value - - - @property - def count(self) -> int: - """Getter: Number of times the dataset has been used by the user.""" - return self._inner_dict.get('count') # type: ignore - - @count.setter - def count(self, value: int) -> None: - """Setter: Number of times the dataset has been used by the user.""" - self._inner_dict['count'] = value - - - @property - def userEmail(self) -> Union[None, str]: - """Getter: If user_email is set, we attempt to resolve the user's urn upon ingest""" - return self._inner_dict.get('userEmail') # type: ignore - - @userEmail.setter - def userEmail(self, value: Union[None, str]) -> None: - """Setter: If user_email is set, we attempt to resolve the user's urn upon ingest""" - self._inner_dict['userEmail'] = value - - -class EditableDatasetPropertiesClass(DictWrapper): - """EditableDatasetProperties stores editable changes made to dataset properties. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.EditableDatasetProperties") - def __init__(self, - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - description: Union[None, str]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "EditableDatasetPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the dataset""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the dataset""" - self._inner_dict['description'] = value - - -class HistogramClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.Histogram") - def __init__(self, - boundaries: List[str], - heights: List[float], - ): - super().__init__() - - self.boundaries = boundaries - self.heights = heights - - @classmethod - def construct_with_defaults(cls) -> "HistogramClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.boundaries = list() - self.heights = list() - - - @property - def boundaries(self) -> List[str]: - # No docs available. - return self._inner_dict.get('boundaries') # type: ignore - - @boundaries.setter - def boundaries(self, value: List[str]) -> None: - # No docs available. - self._inner_dict['boundaries'] = value - - - @property - def heights(self) -> List[float]: - # No docs available. - return self._inner_dict.get('heights') # type: ignore - - @heights.setter - def heights(self, value: List[float]) -> None: - # No docs available. - self._inner_dict['heights'] = value - - -class QuantileClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.Quantile") - def __init__(self, - quantile: str, - value: str, - ): - super().__init__() - - self.quantile = quantile - self.value = value - - @classmethod - def construct_with_defaults(cls) -> "QuantileClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.quantile = str() - self.value = str() - - - @property - def quantile(self) -> str: - # No docs available. - return self._inner_dict.get('quantile') # type: ignore - - @quantile.setter - def quantile(self, value: str) -> None: - # No docs available. - self._inner_dict['quantile'] = value - - - @property - def value(self) -> str: - # No docs available. - return self._inner_dict.get('value') # type: ignore - - @value.setter - def value(self, value: str) -> None: - # No docs available. - self._inner_dict['value'] = value - - -class UpstreamClass(DictWrapper): - """Upstream lineage information about a dataset including the source reporting the lineage""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.Upstream") - def __init__(self, - dataset: str, - type: Union[str, "DatasetLineageTypeClass"], - auditStamp: Optional["AuditStampClass"]=None, - ): - super().__init__() - - if auditStamp is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.auditStamp = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["auditStamp"].default, writers_schema=self.RECORD_SCHEMA.field_map["auditStamp"].type) - else: - self.auditStamp = auditStamp - self.dataset = dataset - self.type = type - - @classmethod - def construct_with_defaults(cls) -> "UpstreamClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.auditStamp = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["auditStamp"].default, writers_schema=self.RECORD_SCHEMA.field_map["auditStamp"].type) - self.dataset = str() - self.type = DatasetLineageTypeClass.COPY - - - @property - def auditStamp(self) -> "AuditStampClass": - """Getter: Audit stamp containing who reported the lineage and when. - WARNING: this field is deprecated and may be removed in a future release.""" - return self._inner_dict.get('auditStamp') # type: ignore - - @auditStamp.setter - def auditStamp(self, value: "AuditStampClass") -> None: - """Setter: Audit stamp containing who reported the lineage and when. - WARNING: this field is deprecated and may be removed in a future release.""" - self._inner_dict['auditStamp'] = value - - - @property - def dataset(self) -> str: - """Getter: The upstream dataset the lineage points to""" - return self._inner_dict.get('dataset') # type: ignore - - @dataset.setter - def dataset(self, value: str) -> None: - """Setter: The upstream dataset the lineage points to""" - self._inner_dict['dataset'] = value - - - @property - def type(self) -> Union[str, "DatasetLineageTypeClass"]: - """Getter: The type of the lineage""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "DatasetLineageTypeClass"]) -> None: - """Setter: The type of the lineage""" - self._inner_dict['type'] = value - - -class UpstreamLineageClass(DictWrapper): - """Upstream lineage of a dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.UpstreamLineage") - def __init__(self, - upstreams: List["UpstreamClass"], - ): - super().__init__() - - self.upstreams = upstreams - - @classmethod - def construct_with_defaults(cls) -> "UpstreamLineageClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.upstreams = list() - - - @property - def upstreams(self) -> List["UpstreamClass"]: - """Getter: List of upstream dataset lineage information""" - return self._inner_dict.get('upstreams') # type: ignore - - @upstreams.setter - def upstreams(self, value: List["UpstreamClass"]) -> None: - """Setter: List of upstream dataset lineage information""" - self._inner_dict['upstreams'] = value - - -class ValueFrequencyClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.dataset.ValueFrequency") - def __init__(self, - value: str, - frequency: int, - ): - super().__init__() - - self.value = value - self.frequency = frequency - - @classmethod - def construct_with_defaults(cls) -> "ValueFrequencyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.value = str() - self.frequency = int() - - - @property - def value(self) -> str: - # No docs available. - return self._inner_dict.get('value') # type: ignore - - @value.setter - def value(self, value: str) -> None: - # No docs available. - self._inner_dict['value'] = value - - - @property - def frequency(self) -> int: - # No docs available. - return self._inner_dict.get('frequency') # type: ignore - - @frequency.setter - def frequency(self, value: int) -> None: - # No docs available. - self._inner_dict['frequency'] = value - - -class ChangeTypeClass(object): - """Descriptor for a change action""" - - - """insert if not exists. otherwise update""" - UPSERT = "UPSERT" - - """NOT SUPPORTED YET - insert if not exists. otherwise fail""" - CREATE = "CREATE" - - """NOT SUPPORTED YET - update if exists. otherwise fail""" - UPDATE = "UPDATE" - - """NOT SUPPORTED YET - delete action""" - DELETE = "DELETE" - - """NOT SUPPORTED YET - patch the changes instead of full replace""" - PATCH = "PATCH" - - -class GlossaryNodeInfoClass(DictWrapper): - """Properties associated with a GlossaryNode""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo") - def __init__(self, - definition: str, - parentNode: Union[None, str]=None, - ): - super().__init__() - - self.definition = definition - self.parentNode = parentNode - - @classmethod - def construct_with_defaults(cls) -> "GlossaryNodeInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.definition = str() - self.parentNode = self.RECORD_SCHEMA.field_map["parentNode"].default - - - @property - def definition(self) -> str: - """Getter: Definition of business node""" - return self._inner_dict.get('definition') # type: ignore - - @definition.setter - def definition(self, value: str) -> None: - """Setter: Definition of business node""" - self._inner_dict['definition'] = value - - - @property - def parentNode(self) -> Union[None, str]: - """Getter: Parent node of the glossary term""" - return self._inner_dict.get('parentNode') # type: ignore - - @parentNode.setter - def parentNode(self, value: Union[None, str]) -> None: - """Setter: Parent node of the glossary term""" - self._inner_dict['parentNode'] = value - - -class GlossaryRelatedTermsClass(DictWrapper): - """Has A / Is A lineage information about a glossary Term reporting the lineage""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms") - def __init__(self, - isRelatedTerms: Union[None, List[str]]=None, - hasRelatedTerms: Union[None, List[str]]=None, - ): - super().__init__() - - self.isRelatedTerms = isRelatedTerms - self.hasRelatedTerms = hasRelatedTerms - - @classmethod - def construct_with_defaults(cls) -> "GlossaryRelatedTermsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.isRelatedTerms = self.RECORD_SCHEMA.field_map["isRelatedTerms"].default - self.hasRelatedTerms = self.RECORD_SCHEMA.field_map["hasRelatedTerms"].default - - - @property - def isRelatedTerms(self) -> Union[None, List[str]]: - """Getter: The relationship Is A with glossary term""" - return self._inner_dict.get('isRelatedTerms') # type: ignore - - @isRelatedTerms.setter - def isRelatedTerms(self, value: Union[None, List[str]]) -> None: - """Setter: The relationship Is A with glossary term""" - self._inner_dict['isRelatedTerms'] = value - - - @property - def hasRelatedTerms(self) -> Union[None, List[str]]: - """Getter: The relationship Has A with glossary term""" - return self._inner_dict.get('hasRelatedTerms') # type: ignore - - @hasRelatedTerms.setter - def hasRelatedTerms(self, value: Union[None, List[str]]) -> None: - """Setter: The relationship Has A with glossary term""" - self._inner_dict['hasRelatedTerms'] = value - - -class GlossaryTermInfoClass(DictWrapper): - """Properties associated with a GlossaryTerm""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.glossary.GlossaryTermInfo") - def __init__(self, - definition: str, - termSource: str, - parentNode: Union[None, str]=None, - sourceRef: Union[None, str]=None, - sourceUrl: Union[None, str]=None, - customProperties: Optional[Dict[str, str]]=None, - rawSchema: Union[None, str]=None, - ): - super().__init__() - - self.definition = definition - self.parentNode = parentNode - self.termSource = termSource - self.sourceRef = sourceRef - self.sourceUrl = sourceUrl - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.rawSchema = rawSchema - - @classmethod - def construct_with_defaults(cls) -> "GlossaryTermInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.definition = str() - self.parentNode = self.RECORD_SCHEMA.field_map["parentNode"].default - self.termSource = str() - self.sourceRef = self.RECORD_SCHEMA.field_map["sourceRef"].default - self.sourceUrl = self.RECORD_SCHEMA.field_map["sourceUrl"].default - self.customProperties = dict() - self.rawSchema = self.RECORD_SCHEMA.field_map["rawSchema"].default - - - @property - def definition(self) -> str: - """Getter: Definition of business term""" - return self._inner_dict.get('definition') # type: ignore - - @definition.setter - def definition(self, value: str) -> None: - """Setter: Definition of business term""" - self._inner_dict['definition'] = value - - - @property - def parentNode(self) -> Union[None, str]: - """Getter: Parent node of the glossary term""" - return self._inner_dict.get('parentNode') # type: ignore - - @parentNode.setter - def parentNode(self, value: Union[None, str]) -> None: - """Setter: Parent node of the glossary term""" - self._inner_dict['parentNode'] = value - - - @property - def termSource(self) -> str: - """Getter: Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL""" - return self._inner_dict.get('termSource') # type: ignore - - @termSource.setter - def termSource(self, value: str) -> None: - """Setter: Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL""" - self._inner_dict['termSource'] = value - - - @property - def sourceRef(self) -> Union[None, str]: - """Getter: External Reference to the business-term""" - return self._inner_dict.get('sourceRef') # type: ignore - - @sourceRef.setter - def sourceRef(self, value: Union[None, str]) -> None: - """Setter: External Reference to the business-term""" - self._inner_dict['sourceRef'] = value - - - @property - def sourceUrl(self) -> Union[None, str]: - """Getter: The abstracted URL such as https://spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument.""" - return self._inner_dict.get('sourceUrl') # type: ignore - - @sourceUrl.setter - def sourceUrl(self, value: Union[None, str]) -> None: - """Setter: The abstracted URL such as https://spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument.""" - self._inner_dict['sourceUrl'] = value - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: A key-value map to capture any other non-standardized properties for the glossary term""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: A key-value map to capture any other non-standardized properties for the glossary term""" - self._inner_dict['customProperties'] = value - - - @property - def rawSchema(self) -> Union[None, str]: - """Getter: Schema definition of the glossary term""" - return self._inner_dict.get('rawSchema') # type: ignore - - @rawSchema.setter - def rawSchema(self, value: Union[None, str]) -> None: - """Setter: Schema definition of the glossary term""" - self._inner_dict['rawSchema'] = value - - -class CorpGroupInfoClass(DictWrapper): - """group of corpUser, it may contains nested group""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.identity.CorpGroupInfo") - def __init__(self, - admins: List[str], - members: List[str], - groups: List[str], - displayName: Union[None, str]=None, - email: Union[None, str]=None, - description: Union[None, str]=None, - ): - super().__init__() - - self.displayName = displayName - self.email = email - self.admins = admins - self.members = members - self.groups = groups - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "CorpGroupInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.displayName = self.RECORD_SCHEMA.field_map["displayName"].default - self.email = self.RECORD_SCHEMA.field_map["email"].default - self.admins = list() - self.members = list() - self.groups = list() - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def displayName(self) -> Union[None, str]: - """Getter: The name to use when displaying the group.""" - return self._inner_dict.get('displayName') # type: ignore - - @displayName.setter - def displayName(self, value: Union[None, str]) -> None: - """Setter: The name to use when displaying the group.""" - self._inner_dict['displayName'] = value - - - @property - def email(self) -> Union[None, str]: - """Getter: email of this group""" - return self._inner_dict.get('email') # type: ignore - - @email.setter - def email(self, value: Union[None, str]) -> None: - """Setter: email of this group""" - self._inner_dict['email'] = value - - - @property - def admins(self) -> List[str]: - """Getter: owners of this group""" - return self._inner_dict.get('admins') # type: ignore - - @admins.setter - def admins(self, value: List[str]) -> None: - """Setter: owners of this group""" - self._inner_dict['admins'] = value - - - @property - def members(self) -> List[str]: - """Getter: List of ldap urn in this group.""" - return self._inner_dict.get('members') # type: ignore - - @members.setter - def members(self, value: List[str]) -> None: - """Setter: List of ldap urn in this group.""" - self._inner_dict['members'] = value - - - @property - def groups(self) -> List[str]: - """Getter: List of groups in this group.""" - return self._inner_dict.get('groups') # type: ignore - - @groups.setter - def groups(self, value: List[str]) -> None: - """Setter: List of groups in this group.""" - self._inner_dict['groups'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: A description of the group.""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: A description of the group.""" - self._inner_dict['description'] = value - - -class CorpUserEditableInfoClass(DictWrapper): - """Linkedin corp user information that can be edited from UI""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.identity.CorpUserEditableInfo") - def __init__(self, - aboutMe: Union[None, str]=None, - teams: Optional[List[str]]=None, - skills: Optional[List[str]]=None, - pictureLink: Optional[str]=None, - ): - super().__init__() - - self.aboutMe = aboutMe - if teams is None: - # default: [] - self.teams = list() - else: - self.teams = teams - if skills is None: - # default: [] - self.skills = list() - else: - self.skills = skills - if pictureLink is None: - # default: 'https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/default_avatar.png' - self.pictureLink = self.RECORD_SCHEMA.field_map["pictureLink"].default - else: - self.pictureLink = pictureLink - - @classmethod - def construct_with_defaults(cls) -> "CorpUserEditableInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.aboutMe = self.RECORD_SCHEMA.field_map["aboutMe"].default - self.teams = list() - self.skills = list() - self.pictureLink = self.RECORD_SCHEMA.field_map["pictureLink"].default - - - @property - def aboutMe(self) -> Union[None, str]: - """Getter: About me section of the user""" - return self._inner_dict.get('aboutMe') # type: ignore - - @aboutMe.setter - def aboutMe(self, value: Union[None, str]) -> None: - """Setter: About me section of the user""" - self._inner_dict['aboutMe'] = value - - - @property - def teams(self) -> List[str]: - """Getter: Teams that the user belongs to e.g. Metadata""" - return self._inner_dict.get('teams') # type: ignore - - @teams.setter - def teams(self, value: List[str]) -> None: - """Setter: Teams that the user belongs to e.g. Metadata""" - self._inner_dict['teams'] = value - - - @property - def skills(self) -> List[str]: - """Getter: Skills that the user possesses e.g. Machine Learning""" - return self._inner_dict.get('skills') # type: ignore - - @skills.setter - def skills(self, value: List[str]) -> None: - """Setter: Skills that the user possesses e.g. Machine Learning""" - self._inner_dict['skills'] = value - - - @property - def pictureLink(self) -> str: - """Getter: A URL which points to a picture which user wants to set as a profile photo""" - return self._inner_dict.get('pictureLink') # type: ignore - - @pictureLink.setter - def pictureLink(self, value: str) -> None: - """Setter: A URL which points to a picture which user wants to set as a profile photo""" - self._inner_dict['pictureLink'] = value - - -class CorpUserInfoClass(DictWrapper): - """Linkedin corp user information""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.identity.CorpUserInfo") - def __init__(self, - active: bool, - displayName: Union[None, str]=None, - email: Union[None, str]=None, - title: Union[None, str]=None, - managerUrn: Union[None, str]=None, - departmentId: Union[None, int]=None, - departmentName: Union[None, str]=None, - firstName: Union[None, str]=None, - lastName: Union[None, str]=None, - fullName: Union[None, str]=None, - countryCode: Union[None, str]=None, - ): - super().__init__() - - self.active = active - self.displayName = displayName - self.email = email - self.title = title - self.managerUrn = managerUrn - self.departmentId = departmentId - self.departmentName = departmentName - self.firstName = firstName - self.lastName = lastName - self.fullName = fullName - self.countryCode = countryCode - - @classmethod - def construct_with_defaults(cls) -> "CorpUserInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.active = bool() - self.displayName = self.RECORD_SCHEMA.field_map["displayName"].default - self.email = self.RECORD_SCHEMA.field_map["email"].default - self.title = self.RECORD_SCHEMA.field_map["title"].default - self.managerUrn = self.RECORD_SCHEMA.field_map["managerUrn"].default - self.departmentId = self.RECORD_SCHEMA.field_map["departmentId"].default - self.departmentName = self.RECORD_SCHEMA.field_map["departmentName"].default - self.firstName = self.RECORD_SCHEMA.field_map["firstName"].default - self.lastName = self.RECORD_SCHEMA.field_map["lastName"].default - self.fullName = self.RECORD_SCHEMA.field_map["fullName"].default - self.countryCode = self.RECORD_SCHEMA.field_map["countryCode"].default - - - @property - def active(self) -> bool: - """Getter: Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools""" - return self._inner_dict.get('active') # type: ignore - - @active.setter - def active(self, value: bool) -> None: - """Setter: Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools""" - self._inner_dict['active'] = value - - - @property - def displayName(self) -> Union[None, str]: - """Getter: displayName of this user , e.g. Hang Zhang(DataHQ)""" - return self._inner_dict.get('displayName') # type: ignore - - @displayName.setter - def displayName(self, value: Union[None, str]) -> None: - """Setter: displayName of this user , e.g. Hang Zhang(DataHQ)""" - self._inner_dict['displayName'] = value - - - @property - def email(self) -> Union[None, str]: - """Getter: email address of this user""" - return self._inner_dict.get('email') # type: ignore - - @email.setter - def email(self, value: Union[None, str]) -> None: - """Setter: email address of this user""" - self._inner_dict['email'] = value - - - @property - def title(self) -> Union[None, str]: - """Getter: title of this user""" - return self._inner_dict.get('title') # type: ignore - - @title.setter - def title(self, value: Union[None, str]) -> None: - """Setter: title of this user""" - self._inner_dict['title'] = value - - - @property - def managerUrn(self) -> Union[None, str]: - """Getter: direct manager of this user""" - return self._inner_dict.get('managerUrn') # type: ignore - - @managerUrn.setter - def managerUrn(self, value: Union[None, str]) -> None: - """Setter: direct manager of this user""" - self._inner_dict['managerUrn'] = value - - - @property - def departmentId(self) -> Union[None, int]: - """Getter: department id this user belong to""" - return self._inner_dict.get('departmentId') # type: ignore - - @departmentId.setter - def departmentId(self, value: Union[None, int]) -> None: - """Setter: department id this user belong to""" - self._inner_dict['departmentId'] = value - - - @property - def departmentName(self) -> Union[None, str]: - """Getter: department name this user belong to""" - return self._inner_dict.get('departmentName') # type: ignore - - @departmentName.setter - def departmentName(self, value: Union[None, str]) -> None: - """Setter: department name this user belong to""" - self._inner_dict['departmentName'] = value - - - @property - def firstName(self) -> Union[None, str]: - """Getter: first name of this user""" - return self._inner_dict.get('firstName') # type: ignore - - @firstName.setter - def firstName(self, value: Union[None, str]) -> None: - """Setter: first name of this user""" - self._inner_dict['firstName'] = value - - - @property - def lastName(self) -> Union[None, str]: - """Getter: last name of this user""" - return self._inner_dict.get('lastName') # type: ignore - - @lastName.setter - def lastName(self, value: Union[None, str]) -> None: - """Setter: last name of this user""" - self._inner_dict['lastName'] = value - - - @property - def fullName(self) -> Union[None, str]: - """Getter: Common name of this user, format is firstName + lastName (split by a whitespace)""" - return self._inner_dict.get('fullName') # type: ignore - - @fullName.setter - def fullName(self, value: Union[None, str]) -> None: - """Setter: Common name of this user, format is firstName + lastName (split by a whitespace)""" - self._inner_dict['fullName'] = value - - - @property - def countryCode(self) -> Union[None, str]: - """Getter: two uppercase letters country code. e.g. US""" - return self._inner_dict.get('countryCode') # type: ignore - - @countryCode.setter - def countryCode(self, value: Union[None, str]) -> None: - """Setter: two uppercase letters country code. e.g. US""" - self._inner_dict['countryCode'] = value - - -class GroupMembershipClass(DictWrapper): - """Carries information about the CorpGroups a user is in.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.identity.GroupMembership") - def __init__(self, - groups: List[str], - ): - super().__init__() - - self.groups = groups - - @classmethod - def construct_with_defaults(cls) -> "GroupMembershipClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.groups = list() - - - @property - def groups(self) -> List[str]: - # No docs available. - return self._inner_dict.get('groups') # type: ignore - - @groups.setter - def groups(self, value: List[str]) -> None: - # No docs available. - self._inner_dict['groups'] = value - - -class ChartKeyClass(DictWrapper): - """Key for a Chart""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.ChartKey") - def __init__(self, - dashboardTool: str, - chartId: str, - ): - super().__init__() - - self.dashboardTool = dashboardTool - self.chartId = chartId - - @classmethod - def construct_with_defaults(cls) -> "ChartKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.dashboardTool = str() - self.chartId = str() - - - @property - def dashboardTool(self) -> str: - """Getter: The name of the dashboard tool such as looker, redash etc.""" - return self._inner_dict.get('dashboardTool') # type: ignore - - @dashboardTool.setter - def dashboardTool(self, value: str) -> None: - """Setter: The name of the dashboard tool such as looker, redash etc.""" - self._inner_dict['dashboardTool'] = value - - - @property - def chartId(self) -> str: - """Getter: Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'""" - return self._inner_dict.get('chartId') # type: ignore - - @chartId.setter - def chartId(self, value: str) -> None: - """Setter: Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'""" - self._inner_dict['chartId'] = value - - -class CorpGroupKeyClass(DictWrapper): - """Key for a CorpGroup""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.CorpGroupKey") - def __init__(self, - name: str, - ): - super().__init__() - - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "CorpGroupKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - - - @property - def name(self) -> str: - """Getter: The URL-encoded name of the AD/LDAP group. Serves as a globally unique identifier within DataHub.""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: The URL-encoded name of the AD/LDAP group. Serves as a globally unique identifier within DataHub.""" - self._inner_dict['name'] = value - - -class CorpUserKeyClass(DictWrapper): - """Key for a CorpUser""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.CorpUserKey") - def __init__(self, - username: str, - ): - super().__init__() - - self.username = username - - @classmethod - def construct_with_defaults(cls) -> "CorpUserKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.username = str() - - - @property - def username(self) -> str: - """Getter: The name of the AD/LDAP user.""" - return self._inner_dict.get('username') # type: ignore - - @username.setter - def username(self, value: str) -> None: - """Setter: The name of the AD/LDAP user.""" - self._inner_dict['username'] = value - - -class DashboardKeyClass(DictWrapper): - """Key for a Dashboard""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DashboardKey") - def __init__(self, - dashboardTool: str, - dashboardId: str, - ): - super().__init__() - - self.dashboardTool = dashboardTool - self.dashboardId = dashboardId - - @classmethod - def construct_with_defaults(cls) -> "DashboardKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.dashboardTool = str() - self.dashboardId = str() - - - @property - def dashboardTool(self) -> str: - """Getter: The name of the dashboard tool such as looker, redash etc.""" - return self._inner_dict.get('dashboardTool') # type: ignore - - @dashboardTool.setter - def dashboardTool(self, value: str) -> None: - """Setter: The name of the dashboard tool such as looker, redash etc.""" - self._inner_dict['dashboardTool'] = value - - - @property - def dashboardId(self) -> str: - """Getter: Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as 'looker.linkedin.com/dashboards/1234'""" - return self._inner_dict.get('dashboardId') # type: ignore - - @dashboardId.setter - def dashboardId(self, value: str) -> None: - """Setter: Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as 'looker.linkedin.com/dashboards/1234'""" - self._inner_dict['dashboardId'] = value - - -class DataFlowKeyClass(DictWrapper): - """Key for a Data Flow""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataFlowKey") - def __init__(self, - orchestrator: str, - flowId: str, - cluster: str, - ): - super().__init__() - - self.orchestrator = orchestrator - self.flowId = flowId - self.cluster = cluster - - @classmethod - def construct_with_defaults(cls) -> "DataFlowKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.orchestrator = str() - self.flowId = str() - self.cluster = str() - - - @property - def orchestrator(self) -> str: - """Getter: Workflow manager like azkaban, airflow which orchestrates the flow""" - return self._inner_dict.get('orchestrator') # type: ignore - - @orchestrator.setter - def orchestrator(self, value: str) -> None: - """Setter: Workflow manager like azkaban, airflow which orchestrates the flow""" - self._inner_dict['orchestrator'] = value - - - @property - def flowId(self) -> str: - """Getter: Unique Identifier of the data flow""" - return self._inner_dict.get('flowId') # type: ignore - - @flowId.setter - def flowId(self, value: str) -> None: - """Setter: Unique Identifier of the data flow""" - self._inner_dict['flowId'] = value - - - @property - def cluster(self) -> str: - """Getter: Cluster where the flow is executed""" - return self._inner_dict.get('cluster') # type: ignore - - @cluster.setter - def cluster(self, value: str) -> None: - """Setter: Cluster where the flow is executed""" - self._inner_dict['cluster'] = value - - -class DataHubPolicyKeyClass(DictWrapper): - """Key for a DataHub Policy""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey") - def __init__(self, - id: str, - ): - super().__init__() - - self.id = id - - @classmethod - def construct_with_defaults(cls) -> "DataHubPolicyKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.id = str() - - - @property - def id(self) -> str: - """Getter: A unique id for the DataHub access policy record. Generated on the server side at policy creation time.""" - return self._inner_dict.get('id') # type: ignore - - @id.setter - def id(self, value: str) -> None: - """Setter: A unique id for the DataHub access policy record. Generated on the server side at policy creation time.""" - self._inner_dict['id'] = value - - -class DataJobKeyClass(DictWrapper): - """Key for a Data Job""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataJobKey") - def __init__(self, - flow: str, - jobId: str, - ): - super().__init__() - - self.flow = flow - self.jobId = jobId - - @classmethod - def construct_with_defaults(cls) -> "DataJobKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.flow = str() - self.jobId = str() - - - @property - def flow(self) -> str: - """Getter: Standardized data processing flow urn representing the flow for the job""" - return self._inner_dict.get('flow') # type: ignore - - @flow.setter - def flow(self, value: str) -> None: - """Setter: Standardized data processing flow urn representing the flow for the job""" - self._inner_dict['flow'] = value - - - @property - def jobId(self) -> str: - """Getter: Unique Identifier of the data job""" - return self._inner_dict.get('jobId') # type: ignore - - @jobId.setter - def jobId(self, value: str) -> None: - """Setter: Unique Identifier of the data job""" - self._inner_dict['jobId'] = value - - -class DataPlatformKeyClass(DictWrapper): - """Key for a Data Platform""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataPlatformKey") - def __init__(self, - platformName: str, - ): - super().__init__() - - self.platformName = platformName - - @classmethod - def construct_with_defaults(cls) -> "DataPlatformKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platformName = str() - - - @property - def platformName(self) -> str: - """Getter: Data platform name i.e. hdfs, oracle, espresso""" - return self._inner_dict.get('platformName') # type: ignore - - @platformName.setter - def platformName(self, value: str) -> None: - """Setter: Data platform name i.e. hdfs, oracle, espresso""" - self._inner_dict['platformName'] = value - - -class DataProcessKeyClass(DictWrapper): - """Key for a Data Process""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataProcessKey") - def __init__(self, - name: str, - orchestrator: str, - origin: Union[str, "FabricTypeClass"], - ): - super().__init__() - - self.name = name - self.orchestrator = orchestrator - self.origin = origin - - @classmethod - def construct_with_defaults(cls) -> "DataProcessKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.orchestrator = str() - self.origin = FabricTypeClass.DEV - - - @property - def name(self) -> str: - """Getter: Process name i.e. an ETL job name""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Process name i.e. an ETL job name""" - self._inner_dict['name'] = value - - - @property - def orchestrator(self) -> str: - """Getter: Standardized Orchestrator where data process is defined. - TODO: Migrate towards something that can be validated like DataPlatform urn""" - return self._inner_dict.get('orchestrator') # type: ignore - - @orchestrator.setter - def orchestrator(self, value: str) -> None: - """Setter: Standardized Orchestrator where data process is defined. - TODO: Migrate towards something that can be validated like DataPlatform urn""" - self._inner_dict['orchestrator'] = value - - - @property - def origin(self) -> Union[str, "FabricTypeClass"]: - """Getter: Fabric type where dataset belongs to or where it was generated.""" - return self._inner_dict.get('origin') # type: ignore - - @origin.setter - def origin(self, value: Union[str, "FabricTypeClass"]) -> None: - """Setter: Fabric type where dataset belongs to or where it was generated.""" - self._inner_dict['origin'] = value - - -class DatasetKeyClass(DictWrapper): - """Key for a Dataset""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DatasetKey") - def __init__(self, - platform: str, - name: str, - origin: Union[str, "FabricTypeClass"], - ): - super().__init__() - - self.platform = platform - self.name = name - self.origin = origin - - @classmethod - def construct_with_defaults(cls) -> "DatasetKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platform = str() - self.name = str() - self.origin = FabricTypeClass.DEV - - - @property - def platform(self) -> str: - """Getter: Data platform urn associated with the dataset""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Data platform urn associated with the dataset""" - self._inner_dict['platform'] = value - - - @property - def name(self) -> str: - """Getter: Dataset native name e.g. .
, /dir/subdir/, or """ - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Dataset native name e.g. .
, /dir/subdir/, or """ - self._inner_dict['name'] = value - - - @property - def origin(self) -> Union[str, "FabricTypeClass"]: - """Getter: Fabric type where dataset belongs to or where it was generated.""" - return self._inner_dict.get('origin') # type: ignore - - @origin.setter - def origin(self, value: Union[str, "FabricTypeClass"]) -> None: - """Setter: Fabric type where dataset belongs to or where it was generated.""" - self._inner_dict['origin'] = value - - -class GlossaryNodeKeyClass(DictWrapper): - """Key for a GlossaryNode""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey") - def __init__(self, - name: str, - ): - super().__init__() - - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "GlossaryNodeKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - - - @property - def name(self) -> str: - # No docs available. - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - # No docs available. - self._inner_dict['name'] = value - - -class GlossaryTermKeyClass(DictWrapper): - """Key for a GlossaryTerm""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.GlossaryTermKey") - def __init__(self, - name: str, - ): - super().__init__() - - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "GlossaryTermKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - - - @property - def name(self) -> str: - # No docs available. - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - # No docs available. - self._inner_dict['name'] = value - - -class MLFeatureKeyClass(DictWrapper): - """Key for an MLFeature""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLFeatureKey") - def __init__(self, - featureNamespace: str, - name: str, - ): - super().__init__() - - self.featureNamespace = featureNamespace - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "MLFeatureKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.featureNamespace = str() - self.name = str() - - - @property - def featureNamespace(self) -> str: - """Getter: Namespace for the feature""" - return self._inner_dict.get('featureNamespace') # type: ignore - - @featureNamespace.setter - def featureNamespace(self, value: str) -> None: - """Setter: Namespace for the feature""" - self._inner_dict['featureNamespace'] = value - - - @property - def name(self) -> str: - """Getter: Name of the feature""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the feature""" - self._inner_dict['name'] = value - - -class MLFeatureTableKeyClass(DictWrapper): - """Key for an MLFeatureTable""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLFeatureTableKey") - def __init__(self, - platform: str, - name: str, - ): - super().__init__() - - self.platform = platform - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "MLFeatureTableKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platform = str() - self.name = str() - - - @property - def platform(self) -> str: - """Getter: Data platform urn associated with the feature table""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Data platform urn associated with the feature table""" - self._inner_dict['platform'] = value - - - @property - def name(self) -> str: - """Getter: Name of the feature table""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the feature table""" - self._inner_dict['name'] = value - - -class MLModelDeploymentKeyClass(DictWrapper): - """Key for an ML model deployment""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey") - def __init__(self, - platform: str, - name: str, - origin: Union[str, "FabricTypeClass"], - ): - super().__init__() - - self.platform = platform - self.name = name - self.origin = origin - - @classmethod - def construct_with_defaults(cls) -> "MLModelDeploymentKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platform = str() - self.name = str() - self.origin = FabricTypeClass.DEV - - - @property - def platform(self) -> str: - """Getter: Standardized platform urn for the model Deployment""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Standardized platform urn for the model Deployment""" - self._inner_dict['platform'] = value - - - @property - def name(self) -> str: - """Getter: Name of the MLModelDeployment""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the MLModelDeployment""" - self._inner_dict['name'] = value - - - @property - def origin(self) -> Union[str, "FabricTypeClass"]: - """Getter: Fabric type where model Deployment belongs to or where it was generated""" - return self._inner_dict.get('origin') # type: ignore - - @origin.setter - def origin(self, value: Union[str, "FabricTypeClass"]) -> None: - """Setter: Fabric type where model Deployment belongs to or where it was generated""" - self._inner_dict['origin'] = value - - -class MLModelGroupKeyClass(DictWrapper): - """Key for an ML model group""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey") - def __init__(self, - platform: str, - name: str, - origin: Union[str, "FabricTypeClass"], - ): - super().__init__() - - self.platform = platform - self.name = name - self.origin = origin - - @classmethod - def construct_with_defaults(cls) -> "MLModelGroupKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platform = str() - self.name = str() - self.origin = FabricTypeClass.DEV - - - @property - def platform(self) -> str: - """Getter: Standardized platform urn for the model group""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Standardized platform urn for the model group""" - self._inner_dict['platform'] = value - - - @property - def name(self) -> str: - """Getter: Name of the MLModelGroup""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the MLModelGroup""" - self._inner_dict['name'] = value - - - @property - def origin(self) -> Union[str, "FabricTypeClass"]: - """Getter: Fabric type where model group belongs to or where it was generated""" - return self._inner_dict.get('origin') # type: ignore - - @origin.setter - def origin(self, value: Union[str, "FabricTypeClass"]) -> None: - """Setter: Fabric type where model group belongs to or where it was generated""" - self._inner_dict['origin'] = value - - -class MLModelKeyClass(DictWrapper): - """Key for an ML model""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelKey") - def __init__(self, - platform: str, - name: str, - origin: Union[str, "FabricTypeClass"], - ): - super().__init__() - - self.platform = platform - self.name = name - self.origin = origin - - @classmethod - def construct_with_defaults(cls) -> "MLModelKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.platform = str() - self.name = str() - self.origin = FabricTypeClass.DEV - - - @property - def platform(self) -> str: - """Getter: Standardized platform urn for the model""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Standardized platform urn for the model""" - self._inner_dict['platform'] = value - - - @property - def name(self) -> str: - """Getter: Name of the MLModel""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the MLModel""" - self._inner_dict['name'] = value - - - @property - def origin(self) -> Union[str, "FabricTypeClass"]: - """Getter: Fabric type where model belongs to or where it was generated""" - return self._inner_dict.get('origin') # type: ignore - - @origin.setter - def origin(self, value: Union[str, "FabricTypeClass"]) -> None: - """Setter: Fabric type where model belongs to or where it was generated""" - self._inner_dict['origin'] = value - - -class MLPrimaryKeyKeyClass(DictWrapper): - """Key for an MLPrimaryKey""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey") - def __init__(self, - featureNamespace: str, - name: str, - ): - super().__init__() - - self.featureNamespace = featureNamespace - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "MLPrimaryKeyKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.featureNamespace = str() - self.name = str() - - - @property - def featureNamespace(self) -> str: - """Getter: Namespace for the primary key""" - return self._inner_dict.get('featureNamespace') # type: ignore - - @featureNamespace.setter - def featureNamespace(self, value: str) -> None: - """Setter: Namespace for the primary key""" - self._inner_dict['featureNamespace'] = value - - - @property - def name(self) -> str: - """Getter: Name of the primary key""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the primary key""" - self._inner_dict['name'] = value - - -class SchemaFieldKeyClass(DictWrapper): - """Key for a SchemaField""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.SchemaFieldKey") - def __init__(self, - parent: str, - fieldPath: str, - ): - super().__init__() - - self.parent = parent - self.fieldPath = fieldPath - - @classmethod - def construct_with_defaults(cls) -> "SchemaFieldKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.parent = str() - self.fieldPath = str() - - - @property - def parent(self) -> str: - """Getter: Parent associated with the schema field""" - return self._inner_dict.get('parent') # type: ignore - - @parent.setter - def parent(self, value: str) -> None: - """Setter: Parent associated with the schema field""" - self._inner_dict['parent'] = value - - - @property - def fieldPath(self) -> str: - """Getter: fieldPath identifying the schema field""" - return self._inner_dict.get('fieldPath') # type: ignore - - @fieldPath.setter - def fieldPath(self, value: str) -> None: - """Setter: fieldPath identifying the schema field""" - self._inner_dict['fieldPath'] = value - - -class TagKeyClass(DictWrapper): - """Key for a Tag""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.TagKey") - def __init__(self, - name: str, - ): - super().__init__() - - self.name = name - - @classmethod - def construct_with_defaults(cls) -> "TagKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - - - @property - def name(self) -> str: - """Getter: The unique tag name""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: The unique tag name""" - self._inner_dict['name'] = value - - -class ChartSnapshotClass(DictWrapper): - """A metadata snapshot for a specific Chart entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["ChartKeyClass", "ChartInfoClass", "ChartQueryClass", "EditableChartPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "ChartSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["ChartKeyClass", "ChartInfoClass", "ChartQueryClass", "EditableChartPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]: - """Getter: The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["ChartKeyClass", "ChartInfoClass", "ChartQueryClass", "EditableChartPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]) -> None: - """Setter: The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class CorpGroupSnapshotClass(DictWrapper): - """A metadata snapshot for a specific CorpGroup entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["CorpGroupKeyClass", "CorpGroupInfoClass", "GlobalTagsClass", "StatusClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "CorpGroupSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["CorpGroupKeyClass", "CorpGroupInfoClass", "GlobalTagsClass", "StatusClass"]]: - """Getter: The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["CorpGroupKeyClass", "CorpGroupInfoClass", "GlobalTagsClass", "StatusClass"]]) -> None: - """Setter: The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class CorpUserSnapshotClass(DictWrapper): - """A metadata snapshot for a specific CorpUser entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["CorpUserKeyClass", "CorpUserInfoClass", "CorpUserEditableInfoClass", "GroupMembershipClass", "GlobalTagsClass", "StatusClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "CorpUserSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["CorpUserKeyClass", "CorpUserInfoClass", "CorpUserEditableInfoClass", "GroupMembershipClass", "GlobalTagsClass", "StatusClass"]]: - """Getter: The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["CorpUserKeyClass", "CorpUserInfoClass", "CorpUserEditableInfoClass", "GroupMembershipClass", "GlobalTagsClass", "StatusClass"]]) -> None: - """Setter: The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DashboardSnapshotClass(DictWrapper): - """A metadata snapshot for a specific Dashboard entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DashboardKeyClass", "DashboardInfoClass", "EditableDashboardPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DashboardSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DashboardKeyClass", "DashboardInfoClass", "EditableDashboardPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]: - """Getter: The list of metadata aspects associated with the dashboard. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DashboardKeyClass", "DashboardInfoClass", "EditableDashboardPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]) -> None: - """Setter: The list of metadata aspects associated with the dashboard. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DataFlowSnapshotClass(DictWrapper): - """A metadata snapshot for a specific DataFlow entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DataFlowKeyClass", "DataFlowInfoClass", "EditableDataFlowPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DataFlowSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DataFlowKeyClass", "DataFlowInfoClass", "EditableDataFlowPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]: - """Getter: The list of metadata aspects associated with the data flow. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DataFlowKeyClass", "DataFlowInfoClass", "EditableDataFlowPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]) -> None: - """Setter: The list of metadata aspects associated with the data flow. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DataHubPolicySnapshotClass(DictWrapper): - """A metadata snapshot for DataHub Access Policy data.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DataHubPolicySnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DataHubPolicyKeyClass", "DataHubPolicyInfoClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DataHubPolicySnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DataHubPolicyKeyClass", "DataHubPolicyInfoClass"]]: - """Getter: The list of metadata aspects associated with the DataHub access policy.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DataHubPolicyKeyClass", "DataHubPolicyInfoClass"]]) -> None: - """Setter: The list of metadata aspects associated with the DataHub access policy.""" - self._inner_dict['aspects'] = value - - -class DataJobSnapshotClass(DictWrapper): - """A metadata snapshot for a specific DataJob entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DataJobKeyClass", "DataJobInfoClass", "DataJobInputOutputClass", "EditableDataJobPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DataJobSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DataJobKeyClass", "DataJobInfoClass", "DataJobInputOutputClass", "EditableDataJobPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]: - """Getter: The list of metadata aspects associated with the data job. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DataJobKeyClass", "DataJobInfoClass", "DataJobInputOutputClass", "EditableDataJobPropertiesClass", "OwnershipClass", "StatusClass", "GlobalTagsClass", "BrowsePathsClass", "GlossaryTermsClass", "InstitutionalMemoryClass"]]) -> None: - """Setter: The list of metadata aspects associated with the data job. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DataPlatformSnapshotClass(DictWrapper): - """A metadata snapshot for a specific dataplatform entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DataPlatformSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DataPlatformKeyClass", "DataPlatformInfoClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DataPlatformSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DataPlatformKeyClass", "DataPlatformInfoClass"]]: - """Getter: The list of metadata aspects associated with the data platform. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DataPlatformKeyClass", "DataPlatformInfoClass"]]) -> None: - """Setter: The list of metadata aspects associated with the data platform. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DataProcessSnapshotClass(DictWrapper): - """A metadata snapshot for a specific Data process entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DataProcessSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DataProcessKeyClass", "OwnershipClass", "DataProcessInfoClass", "StatusClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DataProcessSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DataProcessKeyClass", "OwnershipClass", "DataProcessInfoClass", "StatusClass"]]: - """Getter: The list of metadata aspects associated with the data process. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DataProcessKeyClass", "OwnershipClass", "DataProcessInfoClass", "StatusClass"]]) -> None: - """Setter: The list of metadata aspects associated with the data process. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class DatasetSnapshotClass(DictWrapper): - """A metadata snapshot for a specific dataset entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["DatasetKeyClass", "DatasetPropertiesClass", "EditableDatasetPropertiesClass", "DatasetDeprecationClass", "DatasetUpstreamLineageClass", "UpstreamLineageClass", "InstitutionalMemoryClass", "OwnershipClass", "StatusClass", "SchemaMetadataClass", "EditableSchemaMetadataClass", "GlobalTagsClass", "GlossaryTermsClass", "BrowsePathsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "DatasetSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["DatasetKeyClass", "DatasetPropertiesClass", "EditableDatasetPropertiesClass", "DatasetDeprecationClass", "DatasetUpstreamLineageClass", "UpstreamLineageClass", "InstitutionalMemoryClass", "OwnershipClass", "StatusClass", "SchemaMetadataClass", "EditableSchemaMetadataClass", "GlobalTagsClass", "GlossaryTermsClass", "BrowsePathsClass"]]: - """Getter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["DatasetKeyClass", "DatasetPropertiesClass", "EditableDatasetPropertiesClass", "DatasetDeprecationClass", "DatasetUpstreamLineageClass", "UpstreamLineageClass", "InstitutionalMemoryClass", "OwnershipClass", "StatusClass", "SchemaMetadataClass", "EditableSchemaMetadataClass", "GlobalTagsClass", "GlossaryTermsClass", "BrowsePathsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class GlossaryNodeSnapshotClass(DictWrapper): - """A metadata snapshot for a specific GlossaryNode entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["GlossaryNodeKeyClass", "GlossaryNodeInfoClass", "OwnershipClass", "StatusClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "GlossaryNodeSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["GlossaryNodeKeyClass", "GlossaryNodeInfoClass", "OwnershipClass", "StatusClass"]]: - """Getter: The list of metadata aspects associated with the GlossaryNode. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["GlossaryNodeKeyClass", "GlossaryNodeInfoClass", "OwnershipClass", "StatusClass"]]) -> None: - """Setter: The list of metadata aspects associated with the GlossaryNode. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class GlossaryTermSnapshotClass(DictWrapper): - """A metadata snapshot for a specific GlossaryTerm entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["GlossaryTermKeyClass", "GlossaryTermInfoClass", "OwnershipClass", "StatusClass", "BrowsePathsClass", "GlossaryRelatedTermsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "GlossaryTermSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["GlossaryTermKeyClass", "GlossaryTermInfoClass", "OwnershipClass", "StatusClass", "BrowsePathsClass", "GlossaryRelatedTermsClass"]]: - """Getter: The list of metadata aspects associated with the GlossaryTerm. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["GlossaryTermKeyClass", "GlossaryTermInfoClass", "OwnershipClass", "StatusClass", "BrowsePathsClass", "GlossaryRelatedTermsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the GlossaryTerm. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLFeatureSnapshotClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLFeatureKeyClass", "MLFeaturePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLFeatureSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLFeatureKeyClass", "MLFeaturePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]: - """Getter: The list of metadata aspects associated with the MLFeature. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLFeatureKeyClass", "MLFeaturePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLFeature. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLFeatureTableSnapshotClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLFeatureTableKeyClass", "MLFeatureTablePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLFeatureTableSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLFeatureTableKeyClass", "MLFeatureTablePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]: - """Getter: The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLFeatureTableKeyClass", "MLFeatureTablePropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLModelDeploymentSnapshotClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLModelDeploymentKeyClass", "MLModelDeploymentPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLModelDeploymentSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLModelDeploymentKeyClass", "MLModelDeploymentPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass"]]: - """Getter: The list of metadata aspects associated with the MLModelDeployment. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLModelDeploymentKeyClass", "MLModelDeploymentPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLModelDeployment. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLModelGroupSnapshotClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLModelGroupSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLModelGroupKeyClass", "MLModelGroupPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLModelGroupSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLModelGroupKeyClass", "MLModelGroupPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]: - """Getter: The list of metadata aspects associated with the MLModelGroup. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLModelGroupKeyClass", "MLModelGroupPropertiesClass", "OwnershipClass", "StatusClass", "DeprecationClass", "BrowsePathsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLModelGroup. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLModelSnapshotClass(DictWrapper): - """MLModel Snapshot entity details.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLModelKeyClass", "OwnershipClass", "MLModelPropertiesClass", "IntendedUseClass", "MLModelFactorPromptsClass", "MetricsClass", "EvaluationDataClass", "TrainingDataClass", "QuantitativeAnalysesClass", "EthicalConsiderationsClass", "CaveatsAndRecommendationsClass", "InstitutionalMemoryClass", "SourceCodeClass", "StatusClass", "CostClass", "DeprecationClass", "BrowsePathsClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLModelSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLModelKeyClass", "OwnershipClass", "MLModelPropertiesClass", "IntendedUseClass", "MLModelFactorPromptsClass", "MetricsClass", "EvaluationDataClass", "TrainingDataClass", "QuantitativeAnalysesClass", "EthicalConsiderationsClass", "CaveatsAndRecommendationsClass", "InstitutionalMemoryClass", "SourceCodeClass", "StatusClass", "CostClass", "DeprecationClass", "BrowsePathsClass"]]: - """Getter: The list of metadata aspects associated with the MLModel. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLModelKeyClass", "OwnershipClass", "MLModelPropertiesClass", "IntendedUseClass", "MLModelFactorPromptsClass", "MetricsClass", "EvaluationDataClass", "TrainingDataClass", "QuantitativeAnalysesClass", "EthicalConsiderationsClass", "CaveatsAndRecommendationsClass", "InstitutionalMemoryClass", "SourceCodeClass", "StatusClass", "CostClass", "DeprecationClass", "BrowsePathsClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLModel. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class MLPrimaryKeySnapshotClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot") - def __init__(self, - urn: str, - aspects: List[Union["MLPrimaryKeyKeyClass", "MLPrimaryKeyPropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "MLPrimaryKeySnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["MLPrimaryKeyKeyClass", "MLPrimaryKeyPropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass"]]: - """Getter: The list of metadata aspects associated with the MLPrimaryKey. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["MLPrimaryKeyKeyClass", "MLPrimaryKeyPropertiesClass", "OwnershipClass", "InstitutionalMemoryClass", "StatusClass", "DeprecationClass"]]) -> None: - """Setter: The list of metadata aspects associated with the MLPrimaryKey. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class SchemaFieldSnapshotClass(DictWrapper): - """A metadata snapshot for a specific schema field entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.SchemaFieldSnapshot") - def __init__(self, - urn: str, - aspects: List["SchemaFieldKeyClass"], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "SchemaFieldSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List["SchemaFieldKeyClass"]: - """Getter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List["SchemaFieldKeyClass"]) -> None: - """Setter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class TagSnapshotClass(DictWrapper): - """A metadata snapshot for a specific dataset entity.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot") - def __init__(self, - urn: str, - aspects: List[Union["TagKeyClass", "OwnershipClass", "TagPropertiesClass", "StatusClass"]], - ): - super().__init__() - - self.urn = urn - self.aspects = aspects - - @classmethod - def construct_with_defaults(cls) -> "TagSnapshotClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.urn = str() - self.aspects = list() - - - @property - def urn(self) -> str: - """Getter: URN for the entity the metadata snapshot is associated with.""" - return self._inner_dict.get('urn') # type: ignore - - @urn.setter - def urn(self, value: str) -> None: - """Setter: URN for the entity the metadata snapshot is associated with.""" - self._inner_dict['urn'] = value - - - @property - def aspects(self) -> List[Union["TagKeyClass", "OwnershipClass", "TagPropertiesClass", "StatusClass"]]: - """Getter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - return self._inner_dict.get('aspects') # type: ignore - - @aspects.setter - def aspects(self, value: List[Union["TagKeyClass", "OwnershipClass", "TagPropertiesClass", "StatusClass"]]) -> None: - """Setter: The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects.""" - self._inner_dict['aspects'] = value - - -class BaseDataClass(DictWrapper): - """BaseData record""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.BaseData") - def __init__(self, - dataset: str, - motivation: Union[None, str]=None, - preProcessing: Union[None, List[str]]=None, - ): - super().__init__() - - self.dataset = dataset - self.motivation = motivation - self.preProcessing = preProcessing - - @classmethod - def construct_with_defaults(cls) -> "BaseDataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.dataset = str() - self.motivation = self.RECORD_SCHEMA.field_map["motivation"].default - self.preProcessing = self.RECORD_SCHEMA.field_map["preProcessing"].default - - - @property - def dataset(self) -> str: - """Getter: What dataset were used in the MLModel?""" - return self._inner_dict.get('dataset') # type: ignore - - @dataset.setter - def dataset(self, value: str) -> None: - """Setter: What dataset were used in the MLModel?""" - self._inner_dict['dataset'] = value - - - @property - def motivation(self) -> Union[None, str]: - """Getter: Why was this dataset chosen?""" - return self._inner_dict.get('motivation') # type: ignore - - @motivation.setter - def motivation(self, value: Union[None, str]) -> None: - """Setter: Why was this dataset chosen?""" - self._inner_dict['motivation'] = value - - - @property - def preProcessing(self) -> Union[None, List[str]]: - """Getter: How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?""" - return self._inner_dict.get('preProcessing') # type: ignore - - @preProcessing.setter - def preProcessing(self, value: Union[None, List[str]]) -> None: - """Setter: How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?""" - self._inner_dict['preProcessing'] = value - - -class CaveatDetailsClass(DictWrapper): - """This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.CaveatDetails") - def __init__(self, - needsFurtherTesting: Union[None, bool]=None, - caveatDescription: Union[None, str]=None, - groupsNotRepresented: Union[None, List[str]]=None, - ): - super().__init__() - - self.needsFurtherTesting = needsFurtherTesting - self.caveatDescription = caveatDescription - self.groupsNotRepresented = groupsNotRepresented - - @classmethod - def construct_with_defaults(cls) -> "CaveatDetailsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.needsFurtherTesting = self.RECORD_SCHEMA.field_map["needsFurtherTesting"].default - self.caveatDescription = self.RECORD_SCHEMA.field_map["caveatDescription"].default - self.groupsNotRepresented = self.RECORD_SCHEMA.field_map["groupsNotRepresented"].default - - - @property - def needsFurtherTesting(self) -> Union[None, bool]: - """Getter: Did the results suggest any further testing?""" - return self._inner_dict.get('needsFurtherTesting') # type: ignore - - @needsFurtherTesting.setter - def needsFurtherTesting(self, value: Union[None, bool]) -> None: - """Setter: Did the results suggest any further testing?""" - self._inner_dict['needsFurtherTesting'] = value - - - @property - def caveatDescription(self) -> Union[None, str]: - """Getter: Caveat Description - For ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders.""" - return self._inner_dict.get('caveatDescription') # type: ignore - - @caveatDescription.setter - def caveatDescription(self, value: Union[None, str]) -> None: - """Setter: Caveat Description - For ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders.""" - self._inner_dict['caveatDescription'] = value - - - @property - def groupsNotRepresented(self) -> Union[None, List[str]]: - """Getter: Relevant groups that were not represented in the evaluation dataset?""" - return self._inner_dict.get('groupsNotRepresented') # type: ignore - - @groupsNotRepresented.setter - def groupsNotRepresented(self, value: Union[None, List[str]]) -> None: - """Setter: Relevant groups that were not represented in the evaluation dataset?""" - self._inner_dict['groupsNotRepresented'] = value - - -class CaveatsAndRecommendationsClass(DictWrapper): - """This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.CaveatsAndRecommendations") - def __init__(self, - caveats: Union[None, "CaveatDetailsClass"]=None, - recommendations: Union[None, str]=None, - idealDatasetCharacteristics: Union[None, List[str]]=None, - ): - super().__init__() - - self.caveats = caveats - self.recommendations = recommendations - self.idealDatasetCharacteristics = idealDatasetCharacteristics - - @classmethod - def construct_with_defaults(cls) -> "CaveatsAndRecommendationsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.caveats = self.RECORD_SCHEMA.field_map["caveats"].default - self.recommendations = self.RECORD_SCHEMA.field_map["recommendations"].default - self.idealDatasetCharacteristics = self.RECORD_SCHEMA.field_map["idealDatasetCharacteristics"].default - - - @property - def caveats(self) -> Union[None, "CaveatDetailsClass"]: - """Getter: This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?""" - return self._inner_dict.get('caveats') # type: ignore - - @caveats.setter - def caveats(self, value: Union[None, "CaveatDetailsClass"]) -> None: - """Setter: This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?""" - self._inner_dict['caveats'] = value - - - @property - def recommendations(self) -> Union[None, str]: - """Getter: Recommendations on where this MLModel should be used.""" - return self._inner_dict.get('recommendations') # type: ignore - - @recommendations.setter - def recommendations(self, value: Union[None, str]) -> None: - """Setter: Recommendations on where this MLModel should be used.""" - self._inner_dict['recommendations'] = value - - - @property - def idealDatasetCharacteristics(self) -> Union[None, List[str]]: - """Getter: Ideal characteristics of an evaluation dataset for this MLModel""" - return self._inner_dict.get('idealDatasetCharacteristics') # type: ignore - - @idealDatasetCharacteristics.setter - def idealDatasetCharacteristics(self, value: Union[None, List[str]]) -> None: - """Setter: Ideal characteristics of an evaluation dataset for this MLModel""" - self._inner_dict['idealDatasetCharacteristics'] = value - - -class DeploymentStatusClass(object): - """Model endpoint statuses""" - - - """Deployments out of service.""" - OUT_OF_SERVICE = "OUT_OF_SERVICE" - - """Deployments being created.""" - CREATING = "CREATING" - - """Deployments being updated.""" - UPDATING = "UPDATING" - - """Deployments being reverted to a previous version.""" - ROLLING_BACK = "ROLLING_BACK" - - """Deployments that are active.""" - IN_SERVICE = "IN_SERVICE" - - """Deployments being deleted.""" - DELETING = "DELETING" - - """Deployments with an error state.""" - FAILED = "FAILED" - - """Deployments with unknown/unmappable state.""" - UNKNOWN = "UNKNOWN" - - -class EthicalConsiderationsClass(DictWrapper): - """This section is intended to demonstrate the ethical considerations that went into MLModel development, surfacing ethical challenges and solutions to stakeholders.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations") - def __init__(self, - data: Union[None, List[str]]=None, - humanLife: Union[None, List[str]]=None, - mitigations: Union[None, List[str]]=None, - risksAndHarms: Union[None, List[str]]=None, - useCases: Union[None, List[str]]=None, - ): - super().__init__() - - self.data = data - self.humanLife = humanLife - self.mitigations = mitigations - self.risksAndHarms = risksAndHarms - self.useCases = useCases - - @classmethod - def construct_with_defaults(cls) -> "EthicalConsiderationsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.data = self.RECORD_SCHEMA.field_map["data"].default - self.humanLife = self.RECORD_SCHEMA.field_map["humanLife"].default - self.mitigations = self.RECORD_SCHEMA.field_map["mitigations"].default - self.risksAndHarms = self.RECORD_SCHEMA.field_map["risksAndHarms"].default - self.useCases = self.RECORD_SCHEMA.field_map["useCases"].default - - - @property - def data(self) -> Union[None, List[str]]: - """Getter: Does the MLModel use any sensitive data (e.g., protected classes)?""" - return self._inner_dict.get('data') # type: ignore - - @data.setter - def data(self, value: Union[None, List[str]]) -> None: - """Setter: Does the MLModel use any sensitive data (e.g., protected classes)?""" - self._inner_dict['data'] = value - - - @property - def humanLife(self) -> Union[None, List[str]]: - """Getter: Is the MLModel intended to inform decisions about matters central to human life or flourishing – e.g., health or safety? Or could it be used in such a way?""" - return self._inner_dict.get('humanLife') # type: ignore - - @humanLife.setter - def humanLife(self, value: Union[None, List[str]]) -> None: - """Setter: Is the MLModel intended to inform decisions about matters central to human life or flourishing – e.g., health or safety? Or could it be used in such a way?""" - self._inner_dict['humanLife'] = value - - - @property - def mitigations(self) -> Union[None, List[str]]: - """Getter: What risk mitigation strategies were used during MLModel development?""" - return self._inner_dict.get('mitigations') # type: ignore - - @mitigations.setter - def mitigations(self, value: Union[None, List[str]]) -> None: - """Setter: What risk mitigation strategies were used during MLModel development?""" - self._inner_dict['mitigations'] = value - - - @property - def risksAndHarms(self) -> Union[None, List[str]]: - """Getter: What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown.""" - return self._inner_dict.get('risksAndHarms') # type: ignore - - @risksAndHarms.setter - def risksAndHarms(self, value: Union[None, List[str]]) -> None: - """Setter: What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown.""" - self._inner_dict['risksAndHarms'] = value - - - @property - def useCases(self) -> Union[None, List[str]]: - """Getter: Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section""" - return self._inner_dict.get('useCases') # type: ignore - - @useCases.setter - def useCases(self, value: Union[None, List[str]]) -> None: - """Setter: Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section""" - self._inner_dict['useCases'] = value - - -class EvaluationDataClass(DictWrapper): - """All referenced datasets would ideally point to any set of documents that provide visibility into the source and composition of the dataset.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.EvaluationData") - def __init__(self, - evaluationData: List["BaseDataClass"], - ): - super().__init__() - - self.evaluationData = evaluationData - - @classmethod - def construct_with_defaults(cls) -> "EvaluationDataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.evaluationData = list() - - - @property - def evaluationData(self) -> List["BaseDataClass"]: - """Getter: Details on the dataset(s) used for the quantitative analyses in the MLModel""" - return self._inner_dict.get('evaluationData') # type: ignore - - @evaluationData.setter - def evaluationData(self, value: List["BaseDataClass"]) -> None: - """Setter: Details on the dataset(s) used for the quantitative analyses in the MLModel""" - self._inner_dict['evaluationData'] = value - - -class IntendedUseClass(DictWrapper): - """Intended Use for the ML Model""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.IntendedUse") - def __init__(self, - primaryUses: Union[None, List[str]]=None, - primaryUsers: Union[None, List[Union[str, "IntendedUserTypeClass"]]]=None, - outOfScopeUses: Union[None, List[str]]=None, - ): - super().__init__() - - self.primaryUses = primaryUses - self.primaryUsers = primaryUsers - self.outOfScopeUses = outOfScopeUses - - @classmethod - def construct_with_defaults(cls) -> "IntendedUseClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.primaryUses = self.RECORD_SCHEMA.field_map["primaryUses"].default - self.primaryUsers = self.RECORD_SCHEMA.field_map["primaryUsers"].default - self.outOfScopeUses = self.RECORD_SCHEMA.field_map["outOfScopeUses"].default - - - @property - def primaryUses(self) -> Union[None, List[str]]: - """Getter: Primary Use cases for the MLModel.""" - return self._inner_dict.get('primaryUses') # type: ignore - - @primaryUses.setter - def primaryUses(self, value: Union[None, List[str]]) -> None: - """Setter: Primary Use cases for the MLModel.""" - self._inner_dict['primaryUses'] = value - - - @property - def primaryUsers(self) -> Union[None, List[Union[str, "IntendedUserTypeClass"]]]: - """Getter: Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?""" - return self._inner_dict.get('primaryUsers') # type: ignore - - @primaryUsers.setter - def primaryUsers(self, value: Union[None, List[Union[str, "IntendedUserTypeClass"]]]) -> None: - """Setter: Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?""" - self._inner_dict['primaryUsers'] = value - - - @property - def outOfScopeUses(self) -> Union[None, List[str]]: - """Getter: Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to.""" - return self._inner_dict.get('outOfScopeUses') # type: ignore - - @outOfScopeUses.setter - def outOfScopeUses(self, value: Union[None, List[str]]) -> None: - """Setter: Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to.""" - self._inner_dict['outOfScopeUses'] = value - - -class IntendedUserTypeClass(object): - # No docs available. - - ENTERPRISE = "ENTERPRISE" - HOBBY = "HOBBY" - ENTERTAINMENT = "ENTERTAINMENT" - - -class MLFeaturePropertiesClass(DictWrapper): - """Properties associated with a MLFeature""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties") - def __init__(self, - description: Union[None, str]=None, - dataType: Union[None, Union[str, "MLFeatureDataTypeClass"]]=None, - version: Union[None, "VersionTagClass"]=None, - sources: Union[None, List[str]]=None, - ): - super().__init__() - - self.description = description - self.dataType = dataType - self.version = version - self.sources = sources - - @classmethod - def construct_with_defaults(cls) -> "MLFeaturePropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.dataType = self.RECORD_SCHEMA.field_map["dataType"].default - self.version = self.RECORD_SCHEMA.field_map["version"].default - self.sources = self.RECORD_SCHEMA.field_map["sources"].default - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLFeature""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLFeature""" - self._inner_dict['description'] = value - - - @property - def dataType(self) -> Union[None, Union[str, "MLFeatureDataTypeClass"]]: - """Getter: Data Type of the MLFeature""" - return self._inner_dict.get('dataType') # type: ignore - - @dataType.setter - def dataType(self, value: Union[None, Union[str, "MLFeatureDataTypeClass"]]) -> None: - """Setter: Data Type of the MLFeature""" - self._inner_dict['dataType'] = value - - - @property - def version(self) -> Union[None, "VersionTagClass"]: - """Getter: Version of the MLFeature""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: Union[None, "VersionTagClass"]) -> None: - """Setter: Version of the MLFeature""" - self._inner_dict['version'] = value - - - @property - def sources(self) -> Union[None, List[str]]: - """Getter: Source of the MLFeature""" - return self._inner_dict.get('sources') # type: ignore - - @sources.setter - def sources(self, value: Union[None, List[str]]) -> None: - """Setter: Source of the MLFeature""" - self._inner_dict['sources'] = value - - -class MLFeatureTablePropertiesClass(DictWrapper): - """Properties associated with a MLFeatureTable""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties") - def __init__(self, - customProperties: Optional[Dict[str, str]]=None, - description: Union[None, str]=None, - mlFeatures: Union[None, List[str]]=None, - mlPrimaryKeys: Union[None, List[str]]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.description = description - self.mlFeatures = mlFeatures - self.mlPrimaryKeys = mlPrimaryKeys - - @classmethod - def construct_with_defaults(cls) -> "MLFeatureTablePropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.mlFeatures = self.RECORD_SCHEMA.field_map["mlFeatures"].default - self.mlPrimaryKeys = self.RECORD_SCHEMA.field_map["mlPrimaryKeys"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLFeatureTable""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLFeatureTable""" - self._inner_dict['description'] = value - - - @property - def mlFeatures(self) -> Union[None, List[str]]: - """Getter: List of features contained in the feature table""" - return self._inner_dict.get('mlFeatures') # type: ignore - - @mlFeatures.setter - def mlFeatures(self, value: Union[None, List[str]]) -> None: - """Setter: List of features contained in the feature table""" - self._inner_dict['mlFeatures'] = value - - - @property - def mlPrimaryKeys(self) -> Union[None, List[str]]: - """Getter: List of primary keys in the feature table (if multiple, assumed to act as a composite key)""" - return self._inner_dict.get('mlPrimaryKeys') # type: ignore - - @mlPrimaryKeys.setter - def mlPrimaryKeys(self, value: Union[None, List[str]]) -> None: - """Setter: List of primary keys in the feature table (if multiple, assumed to act as a composite key)""" - self._inner_dict['mlPrimaryKeys'] = value - - -class MLHyperParamClass(DictWrapper): - """Properties associated with an ML Hyper Param""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLHyperParam") - def __init__(self, - name: str, - description: Union[None, str]=None, - value: Union[None, str]=None, - createdAt: Union[None, int]=None, - ): - super().__init__() - - self.name = name - self.description = description - self.value = value - self.createdAt = createdAt - - @classmethod - def construct_with_defaults(cls) -> "MLHyperParamClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.value = self.RECORD_SCHEMA.field_map["value"].default - self.createdAt = self.RECORD_SCHEMA.field_map["createdAt"].default - - - @property - def name(self) -> str: - """Getter: Name of the MLHyperParam""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the MLHyperParam""" - self._inner_dict['name'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLHyperParam""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLHyperParam""" - self._inner_dict['description'] = value - - - @property - def value(self) -> Union[None, str]: - """Getter: The value of the MLHyperParam""" - return self._inner_dict.get('value') # type: ignore - - @value.setter - def value(self, value: Union[None, str]) -> None: - """Setter: The value of the MLHyperParam""" - self._inner_dict['value'] = value - - - @property - def createdAt(self) -> Union[None, int]: - """Getter: Date when the MLHyperParam was developed""" - return self._inner_dict.get('createdAt') # type: ignore - - @createdAt.setter - def createdAt(self, value: Union[None, int]) -> None: - """Setter: Date when the MLHyperParam was developed""" - self._inner_dict['createdAt'] = value - - -class MLMetricClass(DictWrapper): - """Properties associated with an ML Metric""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLMetric") - def __init__(self, - name: str, - description: Union[None, str]=None, - value: Union[None, str]=None, - createdAt: Union[None, int]=None, - ): - super().__init__() - - self.name = name - self.description = description - self.value = value - self.createdAt = createdAt - - @classmethod - def construct_with_defaults(cls) -> "MLMetricClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.value = self.RECORD_SCHEMA.field_map["value"].default - self.createdAt = self.RECORD_SCHEMA.field_map["createdAt"].default - - - @property - def name(self) -> str: - """Getter: Name of the mlMetric""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the mlMetric""" - self._inner_dict['name'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the mlMetric""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the mlMetric""" - self._inner_dict['description'] = value - - - @property - def value(self) -> Union[None, str]: - """Getter: The value of the mlMetric""" - return self._inner_dict.get('value') # type: ignore - - @value.setter - def value(self, value: Union[None, str]) -> None: - """Setter: The value of the mlMetric""" - self._inner_dict['value'] = value - - - @property - def createdAt(self) -> Union[None, int]: - """Getter: Date when the mlMetric was developed""" - return self._inner_dict.get('createdAt') # type: ignore - - @createdAt.setter - def createdAt(self, value: Union[None, int]) -> None: - """Setter: Date when the mlMetric was developed""" - self._inner_dict['createdAt'] = value - - -class MLModelDeploymentPropertiesClass(DictWrapper): - """Properties associated with an ML Model Deployment""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties") - def __init__(self, - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - description: Union[None, str]=None, - createdAt: Union[None, int]=None, - version: Union[None, "VersionTagClass"]=None, - status: Union[None, Union[str, "DeploymentStatusClass"]]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.description = description - self.createdAt = createdAt - self.version = version - self.status = status - - @classmethod - def construct_with_defaults(cls) -> "MLModelDeploymentPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.createdAt = self.RECORD_SCHEMA.field_map["createdAt"].default - self.version = self.RECORD_SCHEMA.field_map["version"].default - self.status = self.RECORD_SCHEMA.field_map["status"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLModelDeployment""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLModelDeployment""" - self._inner_dict['description'] = value - - - @property - def createdAt(self) -> Union[None, int]: - """Getter: Date when the MLModelDeployment was developed""" - return self._inner_dict.get('createdAt') # type: ignore - - @createdAt.setter - def createdAt(self, value: Union[None, int]) -> None: - """Setter: Date when the MLModelDeployment was developed""" - self._inner_dict['createdAt'] = value - - - @property - def version(self) -> Union[None, "VersionTagClass"]: - """Getter: Version of the MLModelDeployment""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: Union[None, "VersionTagClass"]) -> None: - """Setter: Version of the MLModelDeployment""" - self._inner_dict['version'] = value - - - @property - def status(self) -> Union[None, Union[str, "DeploymentStatusClass"]]: - """Getter: Status of the deployment""" - return self._inner_dict.get('status') # type: ignore - - @status.setter - def status(self, value: Union[None, Union[str, "DeploymentStatusClass"]]) -> None: - """Setter: Status of the deployment""" - self._inner_dict['status'] = value - - -class MLModelFactorPromptsClass(DictWrapper): - """Prompts which affect the performance of the MLModel""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts") - def __init__(self, - relevantFactors: Union[None, List["MLModelFactorsClass"]]=None, - evaluationFactors: Union[None, List["MLModelFactorsClass"]]=None, - ): - super().__init__() - - self.relevantFactors = relevantFactors - self.evaluationFactors = evaluationFactors - - @classmethod - def construct_with_defaults(cls) -> "MLModelFactorPromptsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.relevantFactors = self.RECORD_SCHEMA.field_map["relevantFactors"].default - self.evaluationFactors = self.RECORD_SCHEMA.field_map["evaluationFactors"].default - - - @property - def relevantFactors(self) -> Union[None, List["MLModelFactorsClass"]]: - """Getter: What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?""" - return self._inner_dict.get('relevantFactors') # type: ignore - - @relevantFactors.setter - def relevantFactors(self, value: Union[None, List["MLModelFactorsClass"]]) -> None: - """Setter: What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?""" - self._inner_dict['relevantFactors'] = value - - - @property - def evaluationFactors(self) -> Union[None, List["MLModelFactorsClass"]]: - """Getter: Which factors are being reported, and why were these chosen?""" - return self._inner_dict.get('evaluationFactors') # type: ignore - - @evaluationFactors.setter - def evaluationFactors(self, value: Union[None, List["MLModelFactorsClass"]]) -> None: - """Setter: Which factors are being reported, and why were these chosen?""" - self._inner_dict['evaluationFactors'] = value - - -class MLModelFactorsClass(DictWrapper): - """Factors affecting the performance of the MLModel.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLModelFactors") - def __init__(self, - groups: Union[None, List[str]]=None, - instrumentation: Union[None, List[str]]=None, - environment: Union[None, List[str]]=None, - ): - super().__init__() - - self.groups = groups - self.instrumentation = instrumentation - self.environment = environment - - @classmethod - def construct_with_defaults(cls) -> "MLModelFactorsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.groups = self.RECORD_SCHEMA.field_map["groups"].default - self.instrumentation = self.RECORD_SCHEMA.field_map["instrumentation"].default - self.environment = self.RECORD_SCHEMA.field_map["environment"].default - - - @property - def groups(self) -> Union[None, List[str]]: - """Getter: Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances. - For human-centric machine learning MLModels, groups are people who share one or multiple characteristics.""" - return self._inner_dict.get('groups') # type: ignore - - @groups.setter - def groups(self, value: Union[None, List[str]]) -> None: - """Setter: Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances. - For human-centric machine learning MLModels, groups are people who share one or multiple characteristics.""" - self._inner_dict['groups'] = value - - - @property - def instrumentation(self) -> Union[None, List[str]]: - """Getter: The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel. - For example, a face detection model may perform differently depending on the camera’s hardware and software, - including lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode.""" - return self._inner_dict.get('instrumentation') # type: ignore - - @instrumentation.setter - def instrumentation(self, value: Union[None, List[str]]) -> None: - """Setter: The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel. - For example, a face detection model may perform differently depending on the camera’s hardware and software, - including lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode.""" - self._inner_dict['instrumentation'] = value - - - @property - def environment(self) -> Union[None, List[str]]: - """Getter: A further factor affecting MLModel performance is the environment in which it is deployed.""" - return self._inner_dict.get('environment') # type: ignore - - @environment.setter - def environment(self, value: Union[None, List[str]]) -> None: - """Setter: A further factor affecting MLModel performance is the environment in which it is deployed.""" - self._inner_dict['environment'] = value - - -class MLModelGroupPropertiesClass(DictWrapper): - """Properties associated with an ML Model Group""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties") - def __init__(self, - customProperties: Optional[Dict[str, str]]=None, - description: Union[None, str]=None, - createdAt: Union[None, int]=None, - version: Union[None, "VersionTagClass"]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.description = description - self.createdAt = createdAt - self.version = version - - @classmethod - def construct_with_defaults(cls) -> "MLModelGroupPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.createdAt = self.RECORD_SCHEMA.field_map["createdAt"].default - self.version = self.RECORD_SCHEMA.field_map["version"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLModelGroup""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLModelGroup""" - self._inner_dict['description'] = value - - - @property - def createdAt(self) -> Union[None, int]: - """Getter: Date when the MLModelGroup was developed""" - return self._inner_dict.get('createdAt') # type: ignore - - @createdAt.setter - def createdAt(self, value: Union[None, int]) -> None: - """Setter: Date when the MLModelGroup was developed""" - self._inner_dict['createdAt'] = value - - - @property - def version(self) -> Union[None, "VersionTagClass"]: - """Getter: Version of the MLModelGroup""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: Union[None, "VersionTagClass"]) -> None: - """Setter: Version of the MLModelGroup""" - self._inner_dict['version'] = value - - -class MLModelPropertiesClass(DictWrapper): - """Properties associated with a ML Model""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLModelProperties") - def __init__(self, - customProperties: Optional[Dict[str, str]]=None, - externalUrl: Union[None, str]=None, - description: Union[None, str]=None, - date: Union[None, int]=None, - version: Union[None, "VersionTagClass"]=None, - type: Union[None, str]=None, - hyperParameters: Union[None, Dict[str, Union[str, int, float, float, bool]]]=None, - hyperParams: Union[None, List["MLHyperParamClass"]]=None, - trainingMetrics: Union[None, List["MLMetricClass"]]=None, - onlineMetrics: Union[None, List["MLMetricClass"]]=None, - mlFeatures: Union[None, List[str]]=None, - tags: Optional[List[str]]=None, - deployments: Union[None, List[str]]=None, - trainingJobs: Union[None, List[str]]=None, - downstreamJobs: Union[None, List[str]]=None, - groups: Union[None, List[str]]=None, - ): - super().__init__() - - if customProperties is None: - # default: {} - self.customProperties = dict() - else: - self.customProperties = customProperties - self.externalUrl = externalUrl - self.description = description - self.date = date - self.version = version - self.type = type - self.hyperParameters = hyperParameters - self.hyperParams = hyperParams - self.trainingMetrics = trainingMetrics - self.onlineMetrics = onlineMetrics - self.mlFeatures = mlFeatures - if tags is None: - # default: [] - self.tags = list() - else: - self.tags = tags - self.deployments = deployments - self.trainingJobs = trainingJobs - self.downstreamJobs = downstreamJobs - self.groups = groups - - @classmethod - def construct_with_defaults(cls) -> "MLModelPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.customProperties = dict() - self.externalUrl = self.RECORD_SCHEMA.field_map["externalUrl"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.date = self.RECORD_SCHEMA.field_map["date"].default - self.version = self.RECORD_SCHEMA.field_map["version"].default - self.type = self.RECORD_SCHEMA.field_map["type"].default - self.hyperParameters = self.RECORD_SCHEMA.field_map["hyperParameters"].default - self.hyperParams = self.RECORD_SCHEMA.field_map["hyperParams"].default - self.trainingMetrics = self.RECORD_SCHEMA.field_map["trainingMetrics"].default - self.onlineMetrics = self.RECORD_SCHEMA.field_map["onlineMetrics"].default - self.mlFeatures = self.RECORD_SCHEMA.field_map["mlFeatures"].default - self.tags = list() - self.deployments = self.RECORD_SCHEMA.field_map["deployments"].default - self.trainingJobs = self.RECORD_SCHEMA.field_map["trainingJobs"].default - self.downstreamJobs = self.RECORD_SCHEMA.field_map["downstreamJobs"].default - self.groups = self.RECORD_SCHEMA.field_map["groups"].default - - - @property - def customProperties(self) -> Dict[str, str]: - """Getter: Custom property bag.""" - return self._inner_dict.get('customProperties') # type: ignore - - @customProperties.setter - def customProperties(self, value: Dict[str, str]) -> None: - """Setter: Custom property bag.""" - self._inner_dict['customProperties'] = value - - - @property - def externalUrl(self) -> Union[None, str]: - """Getter: URL where the reference exist""" - return self._inner_dict.get('externalUrl') # type: ignore - - @externalUrl.setter - def externalUrl(self, value: Union[None, str]) -> None: - """Setter: URL where the reference exist""" - self._inner_dict['externalUrl'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLModel""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLModel""" - self._inner_dict['description'] = value - - - @property - def date(self) -> Union[None, int]: - """Getter: Date when the MLModel was developed""" - return self._inner_dict.get('date') # type: ignore - - @date.setter - def date(self, value: Union[None, int]) -> None: - """Setter: Date when the MLModel was developed""" - self._inner_dict['date'] = value - - - @property - def version(self) -> Union[None, "VersionTagClass"]: - """Getter: Version of the MLModel""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: Union[None, "VersionTagClass"]) -> None: - """Setter: Version of the MLModel""" - self._inner_dict['version'] = value - - - @property - def type(self) -> Union[None, str]: - """Getter: Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[None, str]) -> None: - """Setter: Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc""" - self._inner_dict['type'] = value - - - @property - def hyperParameters(self) -> Union[None, Dict[str, Union[str, int, float, float, bool]]]: - """Getter: Hyper Parameters of the MLModel - - NOTE: these are deprecated in favor of hyperParams""" - return self._inner_dict.get('hyperParameters') # type: ignore - - @hyperParameters.setter - def hyperParameters(self, value: Union[None, Dict[str, Union[str, int, float, float, bool]]]) -> None: - """Setter: Hyper Parameters of the MLModel - - NOTE: these are deprecated in favor of hyperParams""" - self._inner_dict['hyperParameters'] = value - - - @property - def hyperParams(self) -> Union[None, List["MLHyperParamClass"]]: - """Getter: Hyperparameters of the MLModel""" - return self._inner_dict.get('hyperParams') # type: ignore - - @hyperParams.setter - def hyperParams(self, value: Union[None, List["MLHyperParamClass"]]) -> None: - """Setter: Hyperparameters of the MLModel""" - self._inner_dict['hyperParams'] = value - - - @property - def trainingMetrics(self) -> Union[None, List["MLMetricClass"]]: - """Getter: Metrics of the MLModel used in training""" - return self._inner_dict.get('trainingMetrics') # type: ignore - - @trainingMetrics.setter - def trainingMetrics(self, value: Union[None, List["MLMetricClass"]]) -> None: - """Setter: Metrics of the MLModel used in training""" - self._inner_dict['trainingMetrics'] = value - - - @property - def onlineMetrics(self) -> Union[None, List["MLMetricClass"]]: - """Getter: Metrics of the MLModel used in production""" - return self._inner_dict.get('onlineMetrics') # type: ignore - - @onlineMetrics.setter - def onlineMetrics(self, value: Union[None, List["MLMetricClass"]]) -> None: - """Setter: Metrics of the MLModel used in production""" - self._inner_dict['onlineMetrics'] = value - - - @property - def mlFeatures(self) -> Union[None, List[str]]: - """Getter: List of features used for MLModel training""" - return self._inner_dict.get('mlFeatures') # type: ignore - - @mlFeatures.setter - def mlFeatures(self, value: Union[None, List[str]]) -> None: - """Setter: List of features used for MLModel training""" - self._inner_dict['mlFeatures'] = value - - - @property - def tags(self) -> List[str]: - """Getter: Tags for the MLModel""" - return self._inner_dict.get('tags') # type: ignore - - @tags.setter - def tags(self, value: List[str]) -> None: - """Setter: Tags for the MLModel""" - self._inner_dict['tags'] = value - - - @property - def deployments(self) -> Union[None, List[str]]: - """Getter: Deployments for the MLModel""" - return self._inner_dict.get('deployments') # type: ignore - - @deployments.setter - def deployments(self, value: Union[None, List[str]]) -> None: - """Setter: Deployments for the MLModel""" - self._inner_dict['deployments'] = value - - - @property - def trainingJobs(self) -> Union[None, List[str]]: - """Getter: List of jobs (if any) used to train the model""" - return self._inner_dict.get('trainingJobs') # type: ignore - - @trainingJobs.setter - def trainingJobs(self, value: Union[None, List[str]]) -> None: - """Setter: List of jobs (if any) used to train the model""" - self._inner_dict['trainingJobs'] = value - - - @property - def downstreamJobs(self) -> Union[None, List[str]]: - """Getter: List of jobs (if any) that use the model""" - return self._inner_dict.get('downstreamJobs') # type: ignore - - @downstreamJobs.setter - def downstreamJobs(self, value: Union[None, List[str]]) -> None: - """Setter: List of jobs (if any) that use the model""" - self._inner_dict['downstreamJobs'] = value - - - @property - def groups(self) -> Union[None, List[str]]: - """Getter: Groups the model belongs to""" - return self._inner_dict.get('groups') # type: ignore - - @groups.setter - def groups(self, value: Union[None, List[str]]) -> None: - """Setter: Groups the model belongs to""" - self._inner_dict['groups'] = value - - -class MLPrimaryKeyPropertiesClass(DictWrapper): - """Properties associated with a MLPrimaryKey""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties") - def __init__(self, - sources: List[str], - description: Union[None, str]=None, - dataType: Union[None, Union[str, "MLFeatureDataTypeClass"]]=None, - version: Union[None, "VersionTagClass"]=None, - ): - super().__init__() - - self.description = description - self.dataType = dataType - self.version = version - self.sources = sources - - @classmethod - def construct_with_defaults(cls) -> "MLPrimaryKeyPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.dataType = self.RECORD_SCHEMA.field_map["dataType"].default - self.version = self.RECORD_SCHEMA.field_map["version"].default - self.sources = list() - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the MLPrimaryKey""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the MLPrimaryKey""" - self._inner_dict['description'] = value - - - @property - def dataType(self) -> Union[None, Union[str, "MLFeatureDataTypeClass"]]: - """Getter: Data Type of the MLPrimaryKey""" - return self._inner_dict.get('dataType') # type: ignore - - @dataType.setter - def dataType(self, value: Union[None, Union[str, "MLFeatureDataTypeClass"]]) -> None: - """Setter: Data Type of the MLPrimaryKey""" - self._inner_dict['dataType'] = value - - - @property - def version(self) -> Union[None, "VersionTagClass"]: - """Getter: Version of the MLPrimaryKey""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: Union[None, "VersionTagClass"]) -> None: - """Setter: Version of the MLPrimaryKey""" - self._inner_dict['version'] = value - - - @property - def sources(self) -> List[str]: - """Getter: Source of the MLPrimaryKey""" - return self._inner_dict.get('sources') # type: ignore - - @sources.setter - def sources(self, value: List[str]) -> None: - """Setter: Source of the MLPrimaryKey""" - self._inner_dict['sources'] = value - - -class MetricsClass(DictWrapper): - """Metrics to be featured for the MLModel.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.Metrics") - def __init__(self, - performanceMeasures: Union[None, List[str]]=None, - decisionThreshold: Union[None, List[str]]=None, - ): - super().__init__() - - self.performanceMeasures = performanceMeasures - self.decisionThreshold = decisionThreshold - - @classmethod - def construct_with_defaults(cls) -> "MetricsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.performanceMeasures = self.RECORD_SCHEMA.field_map["performanceMeasures"].default - self.decisionThreshold = self.RECORD_SCHEMA.field_map["decisionThreshold"].default - - - @property - def performanceMeasures(self) -> Union[None, List[str]]: - """Getter: Measures of MLModel performance""" - return self._inner_dict.get('performanceMeasures') # type: ignore - - @performanceMeasures.setter - def performanceMeasures(self, value: Union[None, List[str]]) -> None: - """Setter: Measures of MLModel performance""" - self._inner_dict['performanceMeasures'] = value - - - @property - def decisionThreshold(self) -> Union[None, List[str]]: - """Getter: Decision Thresholds used (if any)?""" - return self._inner_dict.get('decisionThreshold') # type: ignore - - @decisionThreshold.setter - def decisionThreshold(self, value: Union[None, List[str]]) -> None: - """Setter: Decision Thresholds used (if any)?""" - self._inner_dict['decisionThreshold'] = value - - -class QuantitativeAnalysesClass(DictWrapper): - """Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative analyses should provide the results of evaluating the MLModel according to the chosen metrics, providing confidence interval values when possible.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.QuantitativeAnalyses") - def __init__(self, - unitaryResults: Union[None, str]=None, - intersectionalResults: Union[None, str]=None, - ): - super().__init__() - - self.unitaryResults = unitaryResults - self.intersectionalResults = intersectionalResults - - @classmethod - def construct_with_defaults(cls) -> "QuantitativeAnalysesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.unitaryResults = self.RECORD_SCHEMA.field_map["unitaryResults"].default - self.intersectionalResults = self.RECORD_SCHEMA.field_map["intersectionalResults"].default - - - @property - def unitaryResults(self) -> Union[None, str]: - """Getter: Link to a dashboard with results showing how the MLModel performed with respect to each factor""" - return self._inner_dict.get('unitaryResults') # type: ignore - - @unitaryResults.setter - def unitaryResults(self, value: Union[None, str]) -> None: - """Setter: Link to a dashboard with results showing how the MLModel performed with respect to each factor""" - self._inner_dict['unitaryResults'] = value - - - @property - def intersectionalResults(self) -> Union[None, str]: - """Getter: Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?""" - return self._inner_dict.get('intersectionalResults') # type: ignore - - @intersectionalResults.setter - def intersectionalResults(self, value: Union[None, str]) -> None: - """Setter: Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?""" - self._inner_dict['intersectionalResults'] = value - - -class SourceCodeClass(DictWrapper): - """Source Code""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.SourceCode") - def __init__(self, - sourceCode: List["SourceCodeUrlClass"], - ): - super().__init__() - - self.sourceCode = sourceCode - - @classmethod - def construct_with_defaults(cls) -> "SourceCodeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.sourceCode = list() - - - @property - def sourceCode(self) -> List["SourceCodeUrlClass"]: - """Getter: Source Code along with types""" - return self._inner_dict.get('sourceCode') # type: ignore - - @sourceCode.setter - def sourceCode(self, value: List["SourceCodeUrlClass"]) -> None: - """Setter: Source Code along with types""" - self._inner_dict['sourceCode'] = value - - -class SourceCodeUrlClass(DictWrapper): - """Source Code Url Entity""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.SourceCodeUrl") - def __init__(self, - type: Union[str, "SourceCodeUrlTypeClass"], - sourceCodeUrl: str, - ): - super().__init__() - - self.type = type - self.sourceCodeUrl = sourceCodeUrl - - @classmethod - def construct_with_defaults(cls) -> "SourceCodeUrlClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.type = SourceCodeUrlTypeClass.ML_MODEL_SOURCE_CODE - self.sourceCodeUrl = str() - - - @property - def type(self) -> Union[str, "SourceCodeUrlTypeClass"]: - """Getter: Source Code Url Types""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[str, "SourceCodeUrlTypeClass"]) -> None: - """Setter: Source Code Url Types""" - self._inner_dict['type'] = value - - - @property - def sourceCodeUrl(self) -> str: - """Getter: Source Code Url""" - return self._inner_dict.get('sourceCodeUrl') # type: ignore - - @sourceCodeUrl.setter - def sourceCodeUrl(self, value: str) -> None: - """Setter: Source Code Url""" - self._inner_dict['sourceCodeUrl'] = value - - -class SourceCodeUrlTypeClass(object): - # No docs available. - - ML_MODEL_SOURCE_CODE = "ML_MODEL_SOURCE_CODE" - TRAINING_PIPELINE_SOURCE_CODE = "TRAINING_PIPELINE_SOURCE_CODE" - EVALUATION_PIPELINE_SOURCE_CODE = "EVALUATION_PIPELINE_SOURCE_CODE" - - -class TrainingDataClass(DictWrapper): - """Ideally, the MLModel card would contain as much information about the training data as the evaluation data. However, there might be cases where it is not feasible to provide this level of detailed information about the training data. For example, the data may be proprietary, or require a non-disclosure agreement. In these cases, we advocate for basic details about the distributions over groups in the data, as well as any other details that could inform stakeholders on the kinds of biases the model may have encoded.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.ml.metadata.TrainingData") - def __init__(self, - trainingData: List["BaseDataClass"], - ): - super().__init__() - - self.trainingData = trainingData - - @classmethod - def construct_with_defaults(cls) -> "TrainingDataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.trainingData = list() - - - @property - def trainingData(self) -> List["BaseDataClass"]: - """Getter: Details on the dataset(s) used for training the MLModel""" - return self._inner_dict.get('trainingData') # type: ignore - - @trainingData.setter - def trainingData(self, value: List["BaseDataClass"]) -> None: - """Setter: Details on the dataset(s) used for training the MLModel""" - self._inner_dict['trainingData'] = value - - -class GenericAspectClass(DictWrapper): - """Generic record structure for serializing an Aspect - """ - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.GenericAspect") - def __init__(self, - value: bytes, - contentType: str, - ): - super().__init__() - - self.value = value - self.contentType = contentType - - @classmethod - def construct_with_defaults(cls) -> "GenericAspectClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.value = bytes() - self.contentType = str() - - - @property - def value(self) -> bytes: - # No docs available. - return self._inner_dict.get('value') # type: ignore - - @value.setter - def value(self, value: bytes) -> None: - # No docs available. - self._inner_dict['value'] = value - - - @property - def contentType(self) -> str: - # No docs available. - return self._inner_dict.get('contentType') # type: ignore - - @contentType.setter - def contentType(self, value: str) -> None: - # No docs available. - self._inner_dict['contentType'] = value - - -class MetadataChangeEventClass(DictWrapper): - """Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.MetadataChangeEvent") - def __init__(self, - proposedSnapshot: Union["ChartSnapshotClass", "CorpGroupSnapshotClass", "CorpUserSnapshotClass", "DashboardSnapshotClass", "DataFlowSnapshotClass", "DataJobSnapshotClass", "DatasetSnapshotClass", "DataProcessSnapshotClass", "DataPlatformSnapshotClass", "MLModelSnapshotClass", "MLPrimaryKeySnapshotClass", "MLFeatureSnapshotClass", "MLFeatureTableSnapshotClass", "MLModelDeploymentSnapshotClass", "MLModelGroupSnapshotClass", "TagSnapshotClass", "GlossaryTermSnapshotClass", "GlossaryNodeSnapshotClass", "DataHubPolicySnapshotClass", "SchemaFieldSnapshotClass"], - auditHeader: Union[None, "KafkaAuditHeaderClass"]=None, - proposedDelta: None=None, - systemMetadata: Union[None, "SystemMetadataClass"]=None, - ): - super().__init__() - - self.auditHeader = auditHeader - self.proposedSnapshot = proposedSnapshot - self.proposedDelta = proposedDelta - self.systemMetadata = systemMetadata - - @classmethod - def construct_with_defaults(cls) -> "MetadataChangeEventClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.auditHeader = self.RECORD_SCHEMA.field_map["auditHeader"].default - self.proposedSnapshot = ChartSnapshotClass.construct_with_defaults() - self.proposedDelta = self.RECORD_SCHEMA.field_map["proposedDelta"].default - self.systemMetadata = self.RECORD_SCHEMA.field_map["systemMetadata"].default - - - @property - def auditHeader(self) -> Union[None, "KafkaAuditHeaderClass"]: - """Getter: Kafka audit header. See go/kafkaauditheader for more info.""" - return self._inner_dict.get('auditHeader') # type: ignore - - @auditHeader.setter - def auditHeader(self, value: Union[None, "KafkaAuditHeaderClass"]) -> None: - """Setter: Kafka audit header. See go/kafkaauditheader for more info.""" - self._inner_dict['auditHeader'] = value - - - @property - def proposedSnapshot(self) -> Union["ChartSnapshotClass", "CorpGroupSnapshotClass", "CorpUserSnapshotClass", "DashboardSnapshotClass", "DataFlowSnapshotClass", "DataJobSnapshotClass", "DatasetSnapshotClass", "DataProcessSnapshotClass", "DataPlatformSnapshotClass", "MLModelSnapshotClass", "MLPrimaryKeySnapshotClass", "MLFeatureSnapshotClass", "MLFeatureTableSnapshotClass", "MLModelDeploymentSnapshotClass", "MLModelGroupSnapshotClass", "TagSnapshotClass", "GlossaryTermSnapshotClass", "GlossaryNodeSnapshotClass", "DataHubPolicySnapshotClass", "SchemaFieldSnapshotClass"]: - """Getter: Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot.""" - return self._inner_dict.get('proposedSnapshot') # type: ignore - - @proposedSnapshot.setter - def proposedSnapshot(self, value: Union["ChartSnapshotClass", "CorpGroupSnapshotClass", "CorpUserSnapshotClass", "DashboardSnapshotClass", "DataFlowSnapshotClass", "DataJobSnapshotClass", "DatasetSnapshotClass", "DataProcessSnapshotClass", "DataPlatformSnapshotClass", "MLModelSnapshotClass", "MLPrimaryKeySnapshotClass", "MLFeatureSnapshotClass", "MLFeatureTableSnapshotClass", "MLModelDeploymentSnapshotClass", "MLModelGroupSnapshotClass", "TagSnapshotClass", "GlossaryTermSnapshotClass", "GlossaryNodeSnapshotClass", "DataHubPolicySnapshotClass", "SchemaFieldSnapshotClass"]) -> None: - """Setter: Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot.""" - self._inner_dict['proposedSnapshot'] = value - - - @property - def proposedDelta(self) -> None: - """Getter: Delta of the proposed metadata partial update.""" - return self._inner_dict.get('proposedDelta') # type: ignore - - @proposedDelta.setter - def proposedDelta(self, value: None) -> None: - """Setter: Delta of the proposed metadata partial update.""" - self._inner_dict['proposedDelta'] = value - - - @property - def systemMetadata(self) -> Union[None, "SystemMetadataClass"]: - """Getter: Metadata around how the snapshot was ingested""" - return self._inner_dict.get('systemMetadata') # type: ignore - - @systemMetadata.setter - def systemMetadata(self, value: Union[None, "SystemMetadataClass"]) -> None: - """Setter: Metadata around how the snapshot was ingested""" - self._inner_dict['systemMetadata'] = value - - -class MetadataChangeProposalClass(DictWrapper): - """Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.MetadataChangeProposal") - def __init__(self, - entityType: str, - changeType: Union[str, "ChangeTypeClass"], - auditHeader: Union[None, "KafkaAuditHeaderClass"]=None, - entityUrn: Union[None, str]=None, - entityKeyAspect: Union[None, "GenericAspectClass"]=None, - aspectName: Union[None, str]=None, - aspect: Union[None, "GenericAspectClass"]=None, - systemMetadata: Union[None, "SystemMetadataClass"]=None, - ): - super().__init__() - - self.auditHeader = auditHeader - self.entityType = entityType - self.entityUrn = entityUrn - self.entityKeyAspect = entityKeyAspect - self.changeType = changeType - self.aspectName = aspectName - self.aspect = aspect - self.systemMetadata = systemMetadata - - @classmethod - def construct_with_defaults(cls) -> "MetadataChangeProposalClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.auditHeader = self.RECORD_SCHEMA.field_map["auditHeader"].default - self.entityType = str() - self.entityUrn = self.RECORD_SCHEMA.field_map["entityUrn"].default - self.entityKeyAspect = self.RECORD_SCHEMA.field_map["entityKeyAspect"].default - self.changeType = ChangeTypeClass.UPSERT - self.aspectName = self.RECORD_SCHEMA.field_map["aspectName"].default - self.aspect = self.RECORD_SCHEMA.field_map["aspect"].default - self.systemMetadata = self.RECORD_SCHEMA.field_map["systemMetadata"].default - - - @property - def auditHeader(self) -> Union[None, "KafkaAuditHeaderClass"]: - """Getter: Kafka audit header. See go/kafkaauditheader for more info.""" - return self._inner_dict.get('auditHeader') # type: ignore - - @auditHeader.setter - def auditHeader(self, value: Union[None, "KafkaAuditHeaderClass"]) -> None: - """Setter: Kafka audit header. See go/kafkaauditheader for more info.""" - self._inner_dict['auditHeader'] = value - - - @property - def entityType(self) -> str: - """Getter: Type of the entity being written to""" - return self._inner_dict.get('entityType') # type: ignore - - @entityType.setter - def entityType(self, value: str) -> None: - """Setter: Type of the entity being written to""" - self._inner_dict['entityType'] = value - - - @property - def entityUrn(self) -> Union[None, str]: - """Getter: Urn of the entity being written - """ - return self._inner_dict.get('entityUrn') # type: ignore - - @entityUrn.setter - def entityUrn(self, value: Union[None, str]) -> None: - """Setter: Urn of the entity being written - """ - self._inner_dict['entityUrn'] = value - - - @property - def entityKeyAspect(self) -> Union[None, "GenericAspectClass"]: - """Getter: Key aspect of the entity being written""" - return self._inner_dict.get('entityKeyAspect') # type: ignore - - @entityKeyAspect.setter - def entityKeyAspect(self, value: Union[None, "GenericAspectClass"]) -> None: - """Setter: Key aspect of the entity being written""" - self._inner_dict['entityKeyAspect'] = value - - - @property - def changeType(self) -> Union[str, "ChangeTypeClass"]: - """Getter: Type of change being proposed""" - return self._inner_dict.get('changeType') # type: ignore - - @changeType.setter - def changeType(self, value: Union[str, "ChangeTypeClass"]) -> None: - """Setter: Type of change being proposed""" - self._inner_dict['changeType'] = value - - - @property - def aspectName(self) -> Union[None, str]: - """Getter: Aspect of the entity being written to - Not filling this out implies that the writer wants to affect the entire entity - Note: This is only valid for CREATE and DELETE operations. - """ - return self._inner_dict.get('aspectName') # type: ignore - - @aspectName.setter - def aspectName(self, value: Union[None, str]) -> None: - """Setter: Aspect of the entity being written to - Not filling this out implies that the writer wants to affect the entire entity - Note: This is only valid for CREATE and DELETE operations. - """ - self._inner_dict['aspectName'] = value - - - @property - def aspect(self) -> Union[None, "GenericAspectClass"]: - # No docs available. - return self._inner_dict.get('aspect') # type: ignore - - @aspect.setter - def aspect(self, value: Union[None, "GenericAspectClass"]) -> None: - # No docs available. - self._inner_dict['aspect'] = value - - - @property - def systemMetadata(self) -> Union[None, "SystemMetadataClass"]: - """Getter: A string->string map of custom properties that one might want to attach to an event - """ - return self._inner_dict.get('systemMetadata') # type: ignore - - @systemMetadata.setter - def systemMetadata(self, value: Union[None, "SystemMetadataClass"]) -> None: - """Setter: A string->string map of custom properties that one might want to attach to an event - """ - self._inner_dict['systemMetadata'] = value - - -class SystemMetadataClass(DictWrapper): - """Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.mxe.SystemMetadata") - def __init__(self, - lastObserved: Optional[Union[int, None]]=None, - runId: Optional[Union[str, None]]=None, - properties: Union[None, Dict[str, str]]=None, - ): - super().__init__() - - if lastObserved is None: - # default: 0 - self.lastObserved = self.RECORD_SCHEMA.field_map["lastObserved"].default - else: - self.lastObserved = lastObserved - if runId is None: - # default: 'no-run-id-provided' - self.runId = self.RECORD_SCHEMA.field_map["runId"].default - else: - self.runId = runId - self.properties = properties - - @classmethod - def construct_with_defaults(cls) -> "SystemMetadataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.lastObserved = self.RECORD_SCHEMA.field_map["lastObserved"].default - self.runId = self.RECORD_SCHEMA.field_map["runId"].default - self.properties = self.RECORD_SCHEMA.field_map["properties"].default - - - @property - def lastObserved(self) -> Union[int, None]: - """Getter: The timestamp the metadata was observed at""" - return self._inner_dict.get('lastObserved') # type: ignore - - @lastObserved.setter - def lastObserved(self, value: Union[int, None]) -> None: - """Setter: The timestamp the metadata was observed at""" - self._inner_dict['lastObserved'] = value - - - @property - def runId(self) -> Union[str, None]: - """Getter: The run id that produced the metadata""" - return self._inner_dict.get('runId') # type: ignore - - @runId.setter - def runId(self, value: Union[str, None]) -> None: - """Setter: The run id that produced the metadata""" - self._inner_dict['runId'] = value - - - @property - def properties(self) -> Union[None, Dict[str, str]]: - """Getter: Additional properties""" - return self._inner_dict.get('properties') # type: ignore - - @properties.setter - def properties(self, value: Union[None, Dict[str, str]]) -> None: - """Setter: Additional properties""" - self._inner_dict['properties'] = value - - -class DataHubActorFilterClass(DictWrapper): - """Information used to filter DataHub actors.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.policy.DataHubActorFilter") - def __init__(self, - users: Union[None, List[str]]=None, - groups: Union[None, List[str]]=None, - resourceOwners: Optional[bool]=None, - allUsers: Optional[bool]=None, - allGroups: Optional[bool]=None, - ): - super().__init__() - - self.users = users - self.groups = groups - if resourceOwners is None: - # default: False - self.resourceOwners = self.RECORD_SCHEMA.field_map["resourceOwners"].default - else: - self.resourceOwners = resourceOwners - if allUsers is None: - # default: False - self.allUsers = self.RECORD_SCHEMA.field_map["allUsers"].default - else: - self.allUsers = allUsers - if allGroups is None: - # default: False - self.allGroups = self.RECORD_SCHEMA.field_map["allGroups"].default - else: - self.allGroups = allGroups - - @classmethod - def construct_with_defaults(cls) -> "DataHubActorFilterClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.users = self.RECORD_SCHEMA.field_map["users"].default - self.groups = self.RECORD_SCHEMA.field_map["groups"].default - self.resourceOwners = self.RECORD_SCHEMA.field_map["resourceOwners"].default - self.allUsers = self.RECORD_SCHEMA.field_map["allUsers"].default - self.allGroups = self.RECORD_SCHEMA.field_map["allGroups"].default - - - @property - def users(self) -> Union[None, List[str]]: - """Getter: A specific set of users to apply the policy to (disjunctive)""" - return self._inner_dict.get('users') # type: ignore - - @users.setter - def users(self, value: Union[None, List[str]]) -> None: - """Setter: A specific set of users to apply the policy to (disjunctive)""" - self._inner_dict['users'] = value - - - @property - def groups(self) -> Union[None, List[str]]: - """Getter: A specific set of groups to apply the policy to (disjunctive)""" - return self._inner_dict.get('groups') # type: ignore - - @groups.setter - def groups(self, value: Union[None, List[str]]) -> None: - """Setter: A specific set of groups to apply the policy to (disjunctive)""" - self._inner_dict['groups'] = value - - - @property - def resourceOwners(self) -> bool: - """Getter: Whether the filter should return true for owners of a particular resource. - Only applies to policies of type 'Metadata', which have a resource associated with them.""" - return self._inner_dict.get('resourceOwners') # type: ignore - - @resourceOwners.setter - def resourceOwners(self, value: bool) -> None: - """Setter: Whether the filter should return true for owners of a particular resource. - Only applies to policies of type 'Metadata', which have a resource associated with them.""" - self._inner_dict['resourceOwners'] = value - - - @property - def allUsers(self) -> bool: - """Getter: Whether the filter should apply to all users.""" - return self._inner_dict.get('allUsers') # type: ignore - - @allUsers.setter - def allUsers(self, value: bool) -> None: - """Setter: Whether the filter should apply to all users.""" - self._inner_dict['allUsers'] = value - - - @property - def allGroups(self) -> bool: - """Getter: Whether the filter should apply to all groups.""" - return self._inner_dict.get('allGroups') # type: ignore - - @allGroups.setter - def allGroups(self, value: bool) -> None: - """Setter: Whether the filter should apply to all groups.""" - self._inner_dict['allGroups'] = value - - -class DataHubPolicyInfoClass(DictWrapper): - """Information about a DataHub (UI) access policy.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.policy.DataHubPolicyInfo") - def __init__(self, - displayName: str, - description: str, - type: str, - state: str, - privileges: List[str], - actors: "DataHubActorFilterClass", - resources: Union[None, "DataHubResourceFilterClass"]=None, - editable: Optional[bool]=None, - ): - super().__init__() - - self.displayName = displayName - self.description = description - self.type = type - self.state = state - self.resources = resources - self.privileges = privileges - self.actors = actors - if editable is None: - # default: True - self.editable = self.RECORD_SCHEMA.field_map["editable"].default - else: - self.editable = editable - - @classmethod - def construct_with_defaults(cls) -> "DataHubPolicyInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.displayName = str() - self.description = str() - self.type = str() - self.state = str() - self.resources = self.RECORD_SCHEMA.field_map["resources"].default - self.privileges = list() - self.actors = DataHubActorFilterClass.construct_with_defaults() - self.editable = self.RECORD_SCHEMA.field_map["editable"].default - - - @property - def displayName(self) -> str: - """Getter: Display name of the Policy""" - return self._inner_dict.get('displayName') # type: ignore - - @displayName.setter - def displayName(self, value: str) -> None: - """Setter: Display name of the Policy""" - self._inner_dict['displayName'] = value - - - @property - def description(self) -> str: - """Getter: Description of the Policy""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: str) -> None: - """Setter: Description of the Policy""" - self._inner_dict['description'] = value - - - @property - def type(self) -> str: - """Getter: The type of policy""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: str) -> None: - """Setter: The type of policy""" - self._inner_dict['type'] = value - - - @property - def state(self) -> str: - """Getter: The state of policy, ACTIVE or INACTIVE""" - return self._inner_dict.get('state') # type: ignore - - @state.setter - def state(self, value: str) -> None: - """Setter: The state of policy, ACTIVE or INACTIVE""" - self._inner_dict['state'] = value - - - @property - def resources(self) -> Union[None, "DataHubResourceFilterClass"]: - """Getter: The resource that the policy applies to. Not required for some 'Platform' privileges.""" - return self._inner_dict.get('resources') # type: ignore - - @resources.setter - def resources(self, value: Union[None, "DataHubResourceFilterClass"]) -> None: - """Setter: The resource that the policy applies to. Not required for some 'Platform' privileges.""" - self._inner_dict['resources'] = value - - - @property - def privileges(self) -> List[str]: - """Getter: The privileges that the policy grants.""" - return self._inner_dict.get('privileges') # type: ignore - - @privileges.setter - def privileges(self, value: List[str]) -> None: - """Setter: The privileges that the policy grants.""" - self._inner_dict['privileges'] = value - - - @property - def actors(self) -> "DataHubActorFilterClass": - """Getter: The actors that the policy applies to.""" - return self._inner_dict.get('actors') # type: ignore - - @actors.setter - def actors(self, value: "DataHubActorFilterClass") -> None: - """Setter: The actors that the policy applies to.""" - self._inner_dict['actors'] = value - - - @property - def editable(self) -> bool: - """Getter: Whether the policy should be editable via the UI""" - return self._inner_dict.get('editable') # type: ignore - - @editable.setter - def editable(self, value: bool) -> None: - """Setter: Whether the policy should be editable via the UI""" - self._inner_dict['editable'] = value - - -class DataHubResourceFilterClass(DictWrapper): - """Information used to filter DataHub resource.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.policy.DataHubResourceFilter") - def __init__(self, - type: Union[None, str]=None, - resources: Union[None, List[str]]=None, - allResources: Optional[bool]=None, - ): - super().__init__() - - self.type = type - self.resources = resources - if allResources is None: - # default: False - self.allResources = self.RECORD_SCHEMA.field_map["allResources"].default - else: - self.allResources = allResources - - @classmethod - def construct_with_defaults(cls) -> "DataHubResourceFilterClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.type = self.RECORD_SCHEMA.field_map["type"].default - self.resources = self.RECORD_SCHEMA.field_map["resources"].default - self.allResources = self.RECORD_SCHEMA.field_map["allResources"].default - - - @property - def type(self) -> Union[None, str]: - """Getter: The type of resource that the policy applies to. This will most often be a data asset entity name, for - example 'dataset'. It is not strictly required because in the future we will want to support filtering a resource - by domain, as well.""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union[None, str]) -> None: - """Setter: The type of resource that the policy applies to. This will most often be a data asset entity name, for - example 'dataset'. It is not strictly required because in the future we will want to support filtering a resource - by domain, as well.""" - self._inner_dict['type'] = value - - - @property - def resources(self) -> Union[None, List[str]]: - """Getter: A specific set of resources to apply the policy to, e.g. asset urns""" - return self._inner_dict.get('resources') # type: ignore - - @resources.setter - def resources(self, value: Union[None, List[str]]) -> None: - """Setter: A specific set of resources to apply the policy to, e.g. asset urns""" - self._inner_dict['resources'] = value - - - @property - def allResources(self) -> bool: - """Getter: Whether the policy should be applied to all assets matching the filter.""" - return self._inner_dict.get('allResources') # type: ignore - - @allResources.setter - def allResources(self, value: bool) -> None: - """Setter: Whether the policy should be applied to all assets matching the filter.""" - self._inner_dict['allResources'] = value - - -class ArrayTypeClass(DictWrapper): - """Array field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.ArrayType") - def __init__(self, - nestedType: Union[None, List[str]]=None, - ): - super().__init__() - - self.nestedType = nestedType - - @classmethod - def construct_with_defaults(cls) -> "ArrayTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.nestedType = self.RECORD_SCHEMA.field_map["nestedType"].default - - - @property - def nestedType(self) -> Union[None, List[str]]: - """Getter: List of types this array holds.""" - return self._inner_dict.get('nestedType') # type: ignore - - @nestedType.setter - def nestedType(self, value: Union[None, List[str]]) -> None: - """Setter: List of types this array holds.""" - self._inner_dict['nestedType'] = value - - -class BinaryJsonSchemaClass(DictWrapper): - """Schema text of binary JSON schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.BinaryJsonSchema") - def __init__(self, - schema: str, - ): - super().__init__() - - self.schema = schema - - @classmethod - def construct_with_defaults(cls) -> "BinaryJsonSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.schema = str() - - - @property - def schema(self) -> str: - """Getter: The native schema text for binary JSON file format.""" - return self._inner_dict.get('schema') # type: ignore - - @schema.setter - def schema(self, value: str) -> None: - """Setter: The native schema text for binary JSON file format.""" - self._inner_dict['schema'] = value - - -class BooleanTypeClass(DictWrapper): - """Boolean field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.BooleanType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "BooleanTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class BytesTypeClass(DictWrapper): - """Bytes field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.BytesType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "BytesTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class DatasetFieldForeignKeyClass(DictWrapper): - """For non-urn based foregin keys.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.DatasetFieldForeignKey") - def __init__(self, - parentDataset: str, - currentFieldPaths: List[str], - parentField: str, - ): - super().__init__() - - self.parentDataset = parentDataset - self.currentFieldPaths = currentFieldPaths - self.parentField = parentField - - @classmethod - def construct_with_defaults(cls) -> "DatasetFieldForeignKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.parentDataset = str() - self.currentFieldPaths = list() - self.parentField = str() - - - @property - def parentDataset(self) -> str: - """Getter: dataset that stores the resource.""" - return self._inner_dict.get('parentDataset') # type: ignore - - @parentDataset.setter - def parentDataset(self, value: str) -> None: - """Setter: dataset that stores the resource.""" - self._inner_dict['parentDataset'] = value - - - @property - def currentFieldPaths(self) -> List[str]: - """Getter: List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset.""" - return self._inner_dict.get('currentFieldPaths') # type: ignore - - @currentFieldPaths.setter - def currentFieldPaths(self, value: List[str]) -> None: - """Setter: List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset.""" - self._inner_dict['currentFieldPaths'] = value - - - @property - def parentField(self) -> str: - """Getter: SchemaField@fieldPath that uniquely identify field in parent dataset that this field references.""" - return self._inner_dict.get('parentField') # type: ignore - - @parentField.setter - def parentField(self, value: str) -> None: - """Setter: SchemaField@fieldPath that uniquely identify field in parent dataset that this field references.""" - self._inner_dict['parentField'] = value - - -class DateTypeClass(DictWrapper): - """Date field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.DateType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "DateTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class EditableSchemaFieldInfoClass(DictWrapper): - """SchemaField to describe metadata related to dataset schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.EditableSchemaFieldInfo") - def __init__(self, - fieldPath: str, - description: Union[None, str]=None, - globalTags: Union[None, "GlobalTagsClass"]=None, - glossaryTerms: Union[None, "GlossaryTermsClass"]=None, - ): - super().__init__() - - self.fieldPath = fieldPath - self.description = description - self.globalTags = globalTags - self.glossaryTerms = glossaryTerms - - @classmethod - def construct_with_defaults(cls) -> "EditableSchemaFieldInfoClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldPath = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.globalTags = self.RECORD_SCHEMA.field_map["globalTags"].default - self.glossaryTerms = self.RECORD_SCHEMA.field_map["glossaryTerms"].default - - - @property - def fieldPath(self) -> str: - """Getter: FieldPath uniquely identifying the SchemaField this metadata is associated with""" - return self._inner_dict.get('fieldPath') # type: ignore - - @fieldPath.setter - def fieldPath(self, value: str) -> None: - """Setter: FieldPath uniquely identifying the SchemaField this metadata is associated with""" - self._inner_dict['fieldPath'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Description""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Description""" - self._inner_dict['description'] = value - - - @property - def globalTags(self) -> Union[None, "GlobalTagsClass"]: - """Getter: Tags associated with the field""" - return self._inner_dict.get('globalTags') # type: ignore - - @globalTags.setter - def globalTags(self, value: Union[None, "GlobalTagsClass"]) -> None: - """Setter: Tags associated with the field""" - self._inner_dict['globalTags'] = value - - - @property - def glossaryTerms(self) -> Union[None, "GlossaryTermsClass"]: - """Getter: Glossary terms associated with the field""" - return self._inner_dict.get('glossaryTerms') # type: ignore - - @glossaryTerms.setter - def glossaryTerms(self, value: Union[None, "GlossaryTermsClass"]) -> None: - """Setter: Glossary terms associated with the field""" - self._inner_dict['glossaryTerms'] = value - - -class EditableSchemaMetadataClass(DictWrapper): - """EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from - ingestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.EditableSchemaMetadata") - def __init__(self, - editableSchemaFieldInfo: List["EditableSchemaFieldInfoClass"], - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - ): - super().__init__() - - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.editableSchemaFieldInfo = editableSchemaFieldInfo - - @classmethod - def construct_with_defaults(cls) -> "EditableSchemaMetadataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.editableSchemaFieldInfo = list() - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def editableSchemaFieldInfo(self) -> List["EditableSchemaFieldInfoClass"]: - """Getter: Client provided a list of fields from document schema.""" - return self._inner_dict.get('editableSchemaFieldInfo') # type: ignore - - @editableSchemaFieldInfo.setter - def editableSchemaFieldInfo(self, value: List["EditableSchemaFieldInfoClass"]) -> None: - """Setter: Client provided a list of fields from document schema.""" - self._inner_dict['editableSchemaFieldInfo'] = value - - -class EnumTypeClass(DictWrapper): - """Enum field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.EnumType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "EnumTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class EspressoSchemaClass(DictWrapper): - """Schema text of an espresso table schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.EspressoSchema") - def __init__(self, - documentSchema: str, - tableSchema: str, - ): - super().__init__() - - self.documentSchema = documentSchema - self.tableSchema = tableSchema - - @classmethod - def construct_with_defaults(cls) -> "EspressoSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.documentSchema = str() - self.tableSchema = str() - - - @property - def documentSchema(self) -> str: - """Getter: The native espresso document schema.""" - return self._inner_dict.get('documentSchema') # type: ignore - - @documentSchema.setter - def documentSchema(self, value: str) -> None: - """Setter: The native espresso document schema.""" - self._inner_dict['documentSchema'] = value - - - @property - def tableSchema(self) -> str: - """Getter: The espresso table schema definition.""" - return self._inner_dict.get('tableSchema') # type: ignore - - @tableSchema.setter - def tableSchema(self, value: str) -> None: - """Setter: The espresso table schema definition.""" - self._inner_dict['tableSchema'] = value - - -class FixedTypeClass(DictWrapper): - """Fixed field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.FixedType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "FixedTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class ForeignKeyConstraintClass(DictWrapper): - """Description of a foreign key constraint in a schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.ForeignKeyConstraint") - def __init__(self, - name: str, - foreignFields: List[str], - sourceFields: List[str], - foreignDataset: str, - ): - super().__init__() - - self.name = name - self.foreignFields = foreignFields - self.sourceFields = sourceFields - self.foreignDataset = foreignDataset - - @classmethod - def construct_with_defaults(cls) -> "ForeignKeyConstraintClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.foreignFields = list() - self.sourceFields = list() - self.foreignDataset = str() - - - @property - def name(self) -> str: - """Getter: Name of the constraint, likely provided from the source""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the constraint, likely provided from the source""" - self._inner_dict['name'] = value - - - @property - def foreignFields(self) -> List[str]: - """Getter: Fields the constraint maps to on the foreign dataset""" - return self._inner_dict.get('foreignFields') # type: ignore - - @foreignFields.setter - def foreignFields(self, value: List[str]) -> None: - """Setter: Fields the constraint maps to on the foreign dataset""" - self._inner_dict['foreignFields'] = value - - - @property - def sourceFields(self) -> List[str]: - """Getter: Fields the constraint maps to on the source dataset""" - return self._inner_dict.get('sourceFields') # type: ignore - - @sourceFields.setter - def sourceFields(self, value: List[str]) -> None: - """Setter: Fields the constraint maps to on the source dataset""" - self._inner_dict['sourceFields'] = value - - - @property - def foreignDataset(self) -> str: - """Getter: Reference to the foreign dataset for ease of lookup""" - return self._inner_dict.get('foreignDataset') # type: ignore - - @foreignDataset.setter - def foreignDataset(self, value: str) -> None: - """Setter: Reference to the foreign dataset for ease of lookup""" - self._inner_dict['foreignDataset'] = value - - -class ForeignKeySpecClass(DictWrapper): - """Description of a foreign key in a schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.ForeignKeySpec") - def __init__(self, - foreignKey: Union["DatasetFieldForeignKeyClass", "UrnForeignKeyClass"], - ): - super().__init__() - - self.foreignKey = foreignKey - - @classmethod - def construct_with_defaults(cls) -> "ForeignKeySpecClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.foreignKey = DatasetFieldForeignKeyClass.construct_with_defaults() - - - @property - def foreignKey(self) -> Union["DatasetFieldForeignKeyClass", "UrnForeignKeyClass"]: - """Getter: Foreign key definition in metadata schema.""" - return self._inner_dict.get('foreignKey') # type: ignore - - @foreignKey.setter - def foreignKey(self, value: Union["DatasetFieldForeignKeyClass", "UrnForeignKeyClass"]) -> None: - """Setter: Foreign key definition in metadata schema.""" - self._inner_dict['foreignKey'] = value - - -class KafkaSchemaClass(DictWrapper): - """Schema holder for kafka schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.KafkaSchema") - def __init__(self, - documentSchema: str, - keySchema: Union[None, str]=None, - ): - super().__init__() - - self.documentSchema = documentSchema - self.keySchema = keySchema - - @classmethod - def construct_with_defaults(cls) -> "KafkaSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.documentSchema = str() - self.keySchema = self.RECORD_SCHEMA.field_map["keySchema"].default - - - @property - def documentSchema(self) -> str: - """Getter: The native kafka document schema. This is a human readable avro document schema.""" - return self._inner_dict.get('documentSchema') # type: ignore - - @documentSchema.setter - def documentSchema(self, value: str) -> None: - """Setter: The native kafka document schema. This is a human readable avro document schema.""" - self._inner_dict['documentSchema'] = value - - - @property - def keySchema(self) -> Union[None, str]: - """Getter: The native kafka key schema as retrieved from Schema Registry""" - return self._inner_dict.get('keySchema') # type: ignore - - @keySchema.setter - def keySchema(self, value: Union[None, str]) -> None: - """Setter: The native kafka key schema as retrieved from Schema Registry""" - self._inner_dict['keySchema'] = value - - -class KeyValueSchemaClass(DictWrapper): - """Schema text of a key-value store schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.KeyValueSchema") - def __init__(self, - keySchema: str, - valueSchema: str, - ): - super().__init__() - - self.keySchema = keySchema - self.valueSchema = valueSchema - - @classmethod - def construct_with_defaults(cls) -> "KeyValueSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.keySchema = str() - self.valueSchema = str() - - - @property - def keySchema(self) -> str: - """Getter: The raw schema for the key in the key-value store.""" - return self._inner_dict.get('keySchema') # type: ignore - - @keySchema.setter - def keySchema(self, value: str) -> None: - """Setter: The raw schema for the key in the key-value store.""" - self._inner_dict['keySchema'] = value - - - @property - def valueSchema(self) -> str: - """Getter: The raw schema for the value in the key-value store.""" - return self._inner_dict.get('valueSchema') # type: ignore - - @valueSchema.setter - def valueSchema(self, value: str) -> None: - """Setter: The raw schema for the value in the key-value store.""" - self._inner_dict['valueSchema'] = value - - -class MapTypeClass(DictWrapper): - """Map field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.MapType") - def __init__(self, - keyType: Union[None, str]=None, - valueType: Union[None, str]=None, - ): - super().__init__() - - self.keyType = keyType - self.valueType = valueType - - @classmethod - def construct_with_defaults(cls) -> "MapTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.keyType = self.RECORD_SCHEMA.field_map["keyType"].default - self.valueType = self.RECORD_SCHEMA.field_map["valueType"].default - - - @property - def keyType(self) -> Union[None, str]: - """Getter: Key type in a map""" - return self._inner_dict.get('keyType') # type: ignore - - @keyType.setter - def keyType(self, value: Union[None, str]) -> None: - """Setter: Key type in a map""" - self._inner_dict['keyType'] = value - - - @property - def valueType(self) -> Union[None, str]: - """Getter: Type of the value in a map""" - return self._inner_dict.get('valueType') # type: ignore - - @valueType.setter - def valueType(self, value: Union[None, str]) -> None: - """Setter: Type of the value in a map""" - self._inner_dict['valueType'] = value - - -class MySqlDDLClass(DictWrapper): - """Schema holder for MySql data definition language that describes an MySql table.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.MySqlDDL") - def __init__(self, - tableSchema: str, - ): - super().__init__() - - self.tableSchema = tableSchema - - @classmethod - def construct_with_defaults(cls) -> "MySqlDDLClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.tableSchema = str() - - - @property - def tableSchema(self) -> str: - """Getter: The native schema in the dataset's platform. This is a human readable (json blob) table schema.""" - return self._inner_dict.get('tableSchema') # type: ignore - - @tableSchema.setter - def tableSchema(self, value: str) -> None: - """Setter: The native schema in the dataset's platform. This is a human readable (json blob) table schema.""" - self._inner_dict['tableSchema'] = value - - -class NullTypeClass(DictWrapper): - """Null field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.NullType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "NullTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class NumberTypeClass(DictWrapper): - """Number data type: long, integer, short, etc..""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.NumberType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "NumberTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class OracleDDLClass(DictWrapper): - """Schema holder for oracle data definition language that describes an oracle table.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.OracleDDL") - def __init__(self, - tableSchema: str, - ): - super().__init__() - - self.tableSchema = tableSchema - - @classmethod - def construct_with_defaults(cls) -> "OracleDDLClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.tableSchema = str() - - - @property - def tableSchema(self) -> str: - """Getter: The native schema in the dataset's platform. This is a human readable (json blob) table schema.""" - return self._inner_dict.get('tableSchema') # type: ignore - - @tableSchema.setter - def tableSchema(self, value: str) -> None: - """Setter: The native schema in the dataset's platform. This is a human readable (json blob) table schema.""" - self._inner_dict['tableSchema'] = value - - -class OrcSchemaClass(DictWrapper): - """Schema text of an ORC schema.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.OrcSchema") - def __init__(self, - schema: str, - ): - super().__init__() - - self.schema = schema - - @classmethod - def construct_with_defaults(cls) -> "OrcSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.schema = str() - - - @property - def schema(self) -> str: - """Getter: The native schema for ORC file format.""" - return self._inner_dict.get('schema') # type: ignore - - @schema.setter - def schema(self, value: str) -> None: - """Setter: The native schema for ORC file format.""" - self._inner_dict['schema'] = value - - -class OtherSchemaClass(DictWrapper): - """Schema holder for undefined schema types.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.OtherSchema") - def __init__(self, - rawSchema: str, - ): - super().__init__() - - self.rawSchema = rawSchema - - @classmethod - def construct_with_defaults(cls) -> "OtherSchemaClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.rawSchema = str() - - - @property - def rawSchema(self) -> str: - """Getter: The native schema in the dataset's platform.""" - return self._inner_dict.get('rawSchema') # type: ignore - - @rawSchema.setter - def rawSchema(self, value: str) -> None: - """Setter: The native schema in the dataset's platform.""" - self._inner_dict['rawSchema'] = value - - -class PrestoDDLClass(DictWrapper): - """Schema holder for presto data definition language that describes a presto view.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.PrestoDDL") - def __init__(self, - rawSchema: str, - ): - super().__init__() - - self.rawSchema = rawSchema - - @classmethod - def construct_with_defaults(cls) -> "PrestoDDLClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.rawSchema = str() - - - @property - def rawSchema(self) -> str: - """Getter: The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL.""" - return self._inner_dict.get('rawSchema') # type: ignore - - @rawSchema.setter - def rawSchema(self, value: str) -> None: - """Setter: The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL.""" - self._inner_dict['rawSchema'] = value - - -class RecordTypeClass(DictWrapper): - """Record field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.RecordType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "RecordTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class SchemaFieldClass(DictWrapper): - """SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.SchemaField") - def __init__(self, - fieldPath: str, - type: "SchemaFieldDataTypeClass", - nativeDataType: str, - jsonPath: Union[None, str]=None, - nullable: Optional[bool]=None, - description: Union[None, str]=None, - recursive: Optional[bool]=None, - globalTags: Union[None, "GlobalTagsClass"]=None, - glossaryTerms: Union[None, "GlossaryTermsClass"]=None, - isPartOfKey: Optional[bool]=None, - ): - super().__init__() - - self.fieldPath = fieldPath - self.jsonPath = jsonPath - if nullable is None: - # default: False - self.nullable = self.RECORD_SCHEMA.field_map["nullable"].default - else: - self.nullable = nullable - self.description = description - self.type = type - self.nativeDataType = nativeDataType - if recursive is None: - # default: False - self.recursive = self.RECORD_SCHEMA.field_map["recursive"].default - else: - self.recursive = recursive - self.globalTags = globalTags - self.glossaryTerms = glossaryTerms - if isPartOfKey is None: - # default: False - self.isPartOfKey = self.RECORD_SCHEMA.field_map["isPartOfKey"].default - else: - self.isPartOfKey = isPartOfKey - - @classmethod - def construct_with_defaults(cls) -> "SchemaFieldClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldPath = str() - self.jsonPath = self.RECORD_SCHEMA.field_map["jsonPath"].default - self.nullable = self.RECORD_SCHEMA.field_map["nullable"].default - self.description = self.RECORD_SCHEMA.field_map["description"].default - self.type = SchemaFieldDataTypeClass.construct_with_defaults() - self.nativeDataType = str() - self.recursive = self.RECORD_SCHEMA.field_map["recursive"].default - self.globalTags = self.RECORD_SCHEMA.field_map["globalTags"].default - self.glossaryTerms = self.RECORD_SCHEMA.field_map["glossaryTerms"].default - self.isPartOfKey = self.RECORD_SCHEMA.field_map["isPartOfKey"].default - - - @property - def fieldPath(self) -> str: - """Getter: Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.""" - return self._inner_dict.get('fieldPath') # type: ignore - - @fieldPath.setter - def fieldPath(self, value: str) -> None: - """Setter: Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.""" - self._inner_dict['fieldPath'] = value - - - @property - def jsonPath(self) -> Union[None, str]: - """Getter: Flattened name of a field in JSON Path notation.""" - return self._inner_dict.get('jsonPath') # type: ignore - - @jsonPath.setter - def jsonPath(self, value: Union[None, str]) -> None: - """Setter: Flattened name of a field in JSON Path notation.""" - self._inner_dict['jsonPath'] = value - - - @property - def nullable(self) -> bool: - """Getter: Indicates if this field is optional or nullable""" - return self._inner_dict.get('nullable') # type: ignore - - @nullable.setter - def nullable(self, value: bool) -> None: - """Setter: Indicates if this field is optional or nullable""" - self._inner_dict['nullable'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Description""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Description""" - self._inner_dict['description'] = value - - - @property - def type(self) -> "SchemaFieldDataTypeClass": - """Getter: Platform independent field type of the field.""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: "SchemaFieldDataTypeClass") -> None: - """Setter: Platform independent field type of the field.""" - self._inner_dict['type'] = value - - - @property - def nativeDataType(self) -> str: - """Getter: The native type of the field in the dataset's platform as declared by platform schema.""" - return self._inner_dict.get('nativeDataType') # type: ignore - - @nativeDataType.setter - def nativeDataType(self, value: str) -> None: - """Setter: The native type of the field in the dataset's platform as declared by platform schema.""" - self._inner_dict['nativeDataType'] = value - - - @property - def recursive(self) -> bool: - """Getter: There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.""" - return self._inner_dict.get('recursive') # type: ignore - - @recursive.setter - def recursive(self, value: bool) -> None: - """Setter: There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.""" - self._inner_dict['recursive'] = value - - - @property - def globalTags(self) -> Union[None, "GlobalTagsClass"]: - """Getter: Tags associated with the field""" - return self._inner_dict.get('globalTags') # type: ignore - - @globalTags.setter - def globalTags(self, value: Union[None, "GlobalTagsClass"]) -> None: - """Setter: Tags associated with the field""" - self._inner_dict['globalTags'] = value - - - @property - def glossaryTerms(self) -> Union[None, "GlossaryTermsClass"]: - """Getter: Glossary terms associated with the field""" - return self._inner_dict.get('glossaryTerms') # type: ignore - - @glossaryTerms.setter - def glossaryTerms(self, value: Union[None, "GlossaryTermsClass"]) -> None: - """Setter: Glossary terms associated with the field""" - self._inner_dict['glossaryTerms'] = value - - - @property - def isPartOfKey(self) -> bool: - """Getter: For schema fields that are part of complex keys, set this field to true - We do this to easily distinguish between value and key fields""" - return self._inner_dict.get('isPartOfKey') # type: ignore - - @isPartOfKey.setter - def isPartOfKey(self, value: bool) -> None: - """Setter: For schema fields that are part of complex keys, set this field to true - We do this to easily distinguish between value and key fields""" - self._inner_dict['isPartOfKey'] = value - - -class SchemaFieldDataTypeClass(DictWrapper): - """Schema field data types""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.SchemaFieldDataType") - def __init__(self, - type: Union["BooleanTypeClass", "FixedTypeClass", "StringTypeClass", "BytesTypeClass", "NumberTypeClass", "DateTypeClass", "TimeTypeClass", "EnumTypeClass", "NullTypeClass", "MapTypeClass", "ArrayTypeClass", "UnionTypeClass", "RecordTypeClass"], - ): - super().__init__() - - self.type = type - - @classmethod - def construct_with_defaults(cls) -> "SchemaFieldDataTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.type = BooleanTypeClass.construct_with_defaults() - - - @property - def type(self) -> Union["BooleanTypeClass", "FixedTypeClass", "StringTypeClass", "BytesTypeClass", "NumberTypeClass", "DateTypeClass", "TimeTypeClass", "EnumTypeClass", "NullTypeClass", "MapTypeClass", "ArrayTypeClass", "UnionTypeClass", "RecordTypeClass"]: - """Getter: Data platform specific types""" - return self._inner_dict.get('type') # type: ignore - - @type.setter - def type(self, value: Union["BooleanTypeClass", "FixedTypeClass", "StringTypeClass", "BytesTypeClass", "NumberTypeClass", "DateTypeClass", "TimeTypeClass", "EnumTypeClass", "NullTypeClass", "MapTypeClass", "ArrayTypeClass", "UnionTypeClass", "RecordTypeClass"]) -> None: - """Setter: Data platform specific types""" - self._inner_dict['type'] = value - - -class SchemaMetadataClass(DictWrapper): - """SchemaMetadata to describe metadata related to store schema""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.SchemaMetadata") - def __init__(self, - schemaName: str, - platform: str, - version: int, - hash: str, - platformSchema: Union["EspressoSchemaClass", "OracleDDLClass", "MySqlDDLClass", "PrestoDDLClass", "KafkaSchemaClass", "BinaryJsonSchemaClass", "OrcSchemaClass", "SchemalessClass", "KeyValueSchemaClass", "OtherSchemaClass"], - fields: List["SchemaFieldClass"], - created: Optional["AuditStampClass"]=None, - lastModified: Optional["AuditStampClass"]=None, - deleted: Union[None, "AuditStampClass"]=None, - dataset: Union[None, str]=None, - cluster: Union[None, str]=None, - primaryKeys: Union[None, List[str]]=None, - foreignKeysSpecs: Union[None, Dict[str, "ForeignKeySpecClass"]]=None, - foreignKeys: Union[None, List["ForeignKeyConstraintClass"]]=None, - ): - super().__init__() - - self.schemaName = schemaName - self.platform = platform - self.version = version - if created is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - else: - self.created = created - if lastModified is None: - # default: {'actor': 'urn:li:corpuser:unknown', 'impersonator': None, 'time': 0} - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - else: - self.lastModified = lastModified - self.deleted = deleted - self.dataset = dataset - self.cluster = cluster - self.hash = hash - self.platformSchema = platformSchema - self.fields = fields - self.primaryKeys = primaryKeys - self.foreignKeysSpecs = foreignKeysSpecs - self.foreignKeys = foreignKeys - - @classmethod - def construct_with_defaults(cls) -> "SchemaMetadataClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.schemaName = str() - self.platform = str() - self.version = int() - self.created = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["created"].default, writers_schema=self.RECORD_SCHEMA.field_map["created"].type) - self.lastModified = _json_converter.from_json_object(self.RECORD_SCHEMA.field_map["lastModified"].default, writers_schema=self.RECORD_SCHEMA.field_map["lastModified"].type) - self.deleted = self.RECORD_SCHEMA.field_map["deleted"].default - self.dataset = self.RECORD_SCHEMA.field_map["dataset"].default - self.cluster = self.RECORD_SCHEMA.field_map["cluster"].default - self.hash = str() - self.platformSchema = EspressoSchemaClass.construct_with_defaults() - self.fields = list() - self.primaryKeys = self.RECORD_SCHEMA.field_map["primaryKeys"].default - self.foreignKeysSpecs = self.RECORD_SCHEMA.field_map["foreignKeysSpecs"].default - self.foreignKeys = self.RECORD_SCHEMA.field_map["foreignKeys"].default - - - @property - def schemaName(self) -> str: - """Getter: Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking""" - return self._inner_dict.get('schemaName') # type: ignore - - @schemaName.setter - def schemaName(self, value: str) -> None: - """Setter: Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking""" - self._inner_dict['schemaName'] = value - - - @property - def platform(self) -> str: - """Getter: Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})""" - return self._inner_dict.get('platform') # type: ignore - - @platform.setter - def platform(self, value: str) -> None: - """Setter: Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})""" - self._inner_dict['platform'] = value - - - @property - def version(self) -> int: - """Getter: Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version.""" - return self._inner_dict.get('version') # type: ignore - - @version.setter - def version(self, value: int) -> None: - """Setter: Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version.""" - self._inner_dict['version'] = value - - - @property - def created(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('created') # type: ignore - - @created.setter - def created(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.""" - self._inner_dict['created'] = value - - - @property - def lastModified(self) -> "AuditStampClass": - """Getter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - return self._inner_dict.get('lastModified') # type: ignore - - @lastModified.setter - def lastModified(self, value: "AuditStampClass") -> None: - """Setter: An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.""" - self._inner_dict['lastModified'] = value - - - @property - def deleted(self) -> Union[None, "AuditStampClass"]: - """Getter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - return self._inner_dict.get('deleted') # type: ignore - - @deleted.setter - def deleted(self, value: Union[None, "AuditStampClass"]) -> None: - """Setter: An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.""" - self._inner_dict['deleted'] = value - - - @property - def dataset(self) -> Union[None, str]: - """Getter: Dataset this schema metadata is associated with.""" - return self._inner_dict.get('dataset') # type: ignore - - @dataset.setter - def dataset(self, value: Union[None, str]) -> None: - """Setter: Dataset this schema metadata is associated with.""" - self._inner_dict['dataset'] = value - - - @property - def cluster(self) -> Union[None, str]: - """Getter: The cluster this schema metadata resides from""" - return self._inner_dict.get('cluster') # type: ignore - - @cluster.setter - def cluster(self, value: Union[None, str]) -> None: - """Setter: The cluster this schema metadata resides from""" - self._inner_dict['cluster'] = value - - - @property - def hash(self) -> str: - """Getter: the SHA1 hash of the schema content""" - return self._inner_dict.get('hash') # type: ignore - - @hash.setter - def hash(self, value: str) -> None: - """Setter: the SHA1 hash of the schema content""" - self._inner_dict['hash'] = value - - - @property - def platformSchema(self) -> Union["EspressoSchemaClass", "OracleDDLClass", "MySqlDDLClass", "PrestoDDLClass", "KafkaSchemaClass", "BinaryJsonSchemaClass", "OrcSchemaClass", "SchemalessClass", "KeyValueSchemaClass", "OtherSchemaClass"]: - """Getter: The native schema in the dataset's platform.""" - return self._inner_dict.get('platformSchema') # type: ignore - - @platformSchema.setter - def platformSchema(self, value: Union["EspressoSchemaClass", "OracleDDLClass", "MySqlDDLClass", "PrestoDDLClass", "KafkaSchemaClass", "BinaryJsonSchemaClass", "OrcSchemaClass", "SchemalessClass", "KeyValueSchemaClass", "OtherSchemaClass"]) -> None: - """Setter: The native schema in the dataset's platform.""" - self._inner_dict['platformSchema'] = value - - - @property - def fields(self) -> List["SchemaFieldClass"]: - """Getter: Client provided a list of fields from document schema.""" - return self._inner_dict.get('fields') # type: ignore - - @fields.setter - def fields(self, value: List["SchemaFieldClass"]) -> None: - """Setter: Client provided a list of fields from document schema.""" - self._inner_dict['fields'] = value - - - @property - def primaryKeys(self) -> Union[None, List[str]]: - """Getter: Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.""" - return self._inner_dict.get('primaryKeys') # type: ignore - - @primaryKeys.setter - def primaryKeys(self, value: Union[None, List[str]]) -> None: - """Setter: Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.""" - self._inner_dict['primaryKeys'] = value - - - @property - def foreignKeysSpecs(self) -> Union[None, Dict[str, "ForeignKeySpecClass"]]: - """Getter: Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.""" - return self._inner_dict.get('foreignKeysSpecs') # type: ignore - - @foreignKeysSpecs.setter - def foreignKeysSpecs(self, value: Union[None, Dict[str, "ForeignKeySpecClass"]]) -> None: - """Setter: Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.""" - self._inner_dict['foreignKeysSpecs'] = value - - - @property - def foreignKeys(self) -> Union[None, List["ForeignKeyConstraintClass"]]: - """Getter: List of foreign key constraints for the schema""" - return self._inner_dict.get('foreignKeys') # type: ignore - - @foreignKeys.setter - def foreignKeys(self, value: Union[None, List["ForeignKeyConstraintClass"]]) -> None: - """Setter: List of foreign key constraints for the schema""" - self._inner_dict['foreignKeys'] = value - - -class SchemalessClass(DictWrapper): - """The dataset has no specific schema associated with it""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.Schemaless") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "SchemalessClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class StringTypeClass(DictWrapper): - """String field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.StringType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "StringTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class TimeTypeClass(DictWrapper): - """Time field type. This should also be used for datetimes.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.TimeType") - def __init__(self, - ): - super().__init__() - - - @classmethod - def construct_with_defaults(cls) -> "TimeTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - pass - - -class UnionTypeClass(DictWrapper): - """Union field type.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.UnionType") - def __init__(self, - nestedTypes: Union[None, List[str]]=None, - ): - super().__init__() - - self.nestedTypes = nestedTypes - - @classmethod - def construct_with_defaults(cls) -> "UnionTypeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.nestedTypes = self.RECORD_SCHEMA.field_map["nestedTypes"].default - - - @property - def nestedTypes(self) -> Union[None, List[str]]: - """Getter: List of types in union type.""" - return self._inner_dict.get('nestedTypes') # type: ignore - - @nestedTypes.setter - def nestedTypes(self, value: Union[None, List[str]]) -> None: - """Setter: List of types in union type.""" - self._inner_dict['nestedTypes'] = value - - -class UrnForeignKeyClass(DictWrapper): - """If SchemaMetadata fields make any external references and references are of type com.linkedin.pegasus2avro.common.Urn or any children, this models can be used to mark it.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.schema.UrnForeignKey") - def __init__(self, - currentFieldPath: str, - ): - super().__init__() - - self.currentFieldPath = currentFieldPath - - @classmethod - def construct_with_defaults(cls) -> "UrnForeignKeyClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.currentFieldPath = str() - - - @property - def currentFieldPath(self) -> str: - """Getter: Field in hosting(current) SchemaMetadata.""" - return self._inner_dict.get('currentFieldPath') # type: ignore - - @currentFieldPath.setter - def currentFieldPath(self, value: str) -> None: - """Setter: Field in hosting(current) SchemaMetadata.""" - self._inner_dict['currentFieldPath'] = value - - -class TagPropertiesClass(DictWrapper): - """Properties associated with a Tag""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.tag.TagProperties") - def __init__(self, - name: str, - description: Union[None, str]=None, - ): - super().__init__() - - self.name = name - self.description = description - - @classmethod - def construct_with_defaults(cls) -> "TagPropertiesClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.name = str() - self.description = self.RECORD_SCHEMA.field_map["description"].default - - - @property - def name(self) -> str: - """Getter: Name of the tag""" - return self._inner_dict.get('name') # type: ignore - - @name.setter - def name(self, value: str) -> None: - """Setter: Name of the tag""" - self._inner_dict['name'] = value - - - @property - def description(self) -> Union[None, str]: - """Getter: Documentation of the tag""" - return self._inner_dict.get('description') # type: ignore - - @description.setter - def description(self, value: Union[None, str]) -> None: - """Setter: Documentation of the tag""" - self._inner_dict['description'] = value - - -class CalendarIntervalClass(object): - # No docs available. - - SECOND = "SECOND" - MINUTE = "MINUTE" - HOUR = "HOUR" - DAY = "DAY" - WEEK = "WEEK" - MONTH = "MONTH" - QUARTER = "QUARTER" - YEAR = "YEAR" - - -class PartitionSpecClass(DictWrapper): - """Defines how the data is partitioned""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.timeseries.PartitionSpec") - def __init__(self, - partition: str, - timePartition: Union[None, "TimeWindowClass"]=None, - ): - super().__init__() - - self.partition = partition - self.timePartition = timePartition - - @classmethod - def construct_with_defaults(cls) -> "PartitionSpecClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.partition = str() - self.timePartition = self.RECORD_SCHEMA.field_map["timePartition"].default - - - @property - def partition(self) -> str: - """Getter: String representation of the partition""" - return self._inner_dict.get('partition') # type: ignore - - @partition.setter - def partition(self, value: str) -> None: - """Setter: String representation of the partition""" - self._inner_dict['partition'] = value - - - @property - def timePartition(self) -> Union[None, "TimeWindowClass"]: - """Getter: Time window of the partition if applicable""" - return self._inner_dict.get('timePartition') # type: ignore - - @timePartition.setter - def timePartition(self, value: Union[None, "TimeWindowClass"]) -> None: - """Setter: Time window of the partition if applicable""" - self._inner_dict['timePartition'] = value - - -class TimeWindowClass(DictWrapper): - # No docs available. - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.timeseries.TimeWindow") - def __init__(self, - startTimeMillis: int, - length: "TimeWindowSizeClass", - ): - super().__init__() - - self.startTimeMillis = startTimeMillis - self.length = length - - @classmethod - def construct_with_defaults(cls) -> "TimeWindowClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.startTimeMillis = int() - self.length = TimeWindowSizeClass.construct_with_defaults() - - - @property - def startTimeMillis(self) -> int: - """Getter: Start time as epoch at UTC.""" - return self._inner_dict.get('startTimeMillis') # type: ignore - - @startTimeMillis.setter - def startTimeMillis(self, value: int) -> None: - """Setter: Start time as epoch at UTC.""" - self._inner_dict['startTimeMillis'] = value - - - @property - def length(self) -> "TimeWindowSizeClass": - """Getter: The length of the window.""" - return self._inner_dict.get('length') # type: ignore - - @length.setter - def length(self, value: "TimeWindowSizeClass") -> None: - """Setter: The length of the window.""" - self._inner_dict['length'] = value - - -class TimeWindowSizeClass(DictWrapper): - """Defines the size of a time window.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.timeseries.TimeWindowSize") - def __init__(self, - unit: Union[str, "CalendarIntervalClass"], - multiple: Optional[int]=None, - ): - super().__init__() - - self.unit = unit - if multiple is None: - # default: 1 - self.multiple = self.RECORD_SCHEMA.field_map["multiple"].default - else: - self.multiple = multiple - - @classmethod - def construct_with_defaults(cls) -> "TimeWindowSizeClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.unit = CalendarIntervalClass.SECOND - self.multiple = self.RECORD_SCHEMA.field_map["multiple"].default - - - @property - def unit(self) -> Union[str, "CalendarIntervalClass"]: - """Getter: Interval unit such as minute/hour/day etc.""" - return self._inner_dict.get('unit') # type: ignore - - @unit.setter - def unit(self, value: Union[str, "CalendarIntervalClass"]) -> None: - """Setter: Interval unit such as minute/hour/day etc.""" - self._inner_dict['unit'] = value - - - @property - def multiple(self) -> int: - """Getter: How many units. Defaults to 1.""" - return self._inner_dict.get('multiple') # type: ignore - - @multiple.setter - def multiple(self, value: int) -> None: - """Setter: How many units. Defaults to 1.""" - self._inner_dict['multiple'] = value - - -class FieldUsageCountsClass(DictWrapper): - """ Records field-level usage counts for a given resource """ - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.usage.FieldUsageCounts") - def __init__(self, - fieldName: str, - count: int, - ): - super().__init__() - - self.fieldName = fieldName - self.count = count - - @classmethod - def construct_with_defaults(cls) -> "FieldUsageCountsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.fieldName = str() - self.count = int() - - - @property - def fieldName(self) -> str: - # No docs available. - return self._inner_dict.get('fieldName') # type: ignore - - @fieldName.setter - def fieldName(self, value: str) -> None: - # No docs available. - self._inner_dict['fieldName'] = value - - - @property - def count(self) -> int: - # No docs available. - return self._inner_dict.get('count') # type: ignore - - @count.setter - def count(self, value: int) -> None: - # No docs available. - self._inner_dict['count'] = value - - -class UsageAggregationClass(DictWrapper): - """Usage data for a given resource, rolled up into a bucket.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.usage.UsageAggregation") - def __init__(self, - bucket: int, - duration: Union[str, "WindowDurationClass"], - resource: str, - metrics: "UsageAggregationMetricsClass", - ): - super().__init__() - - self.bucket = bucket - self.duration = duration - self.resource = resource - self.metrics = metrics - - @classmethod - def construct_with_defaults(cls) -> "UsageAggregationClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.bucket = int() - self.duration = WindowDurationClass.YEAR - self.resource = str() - self.metrics = UsageAggregationMetricsClass.construct_with_defaults() - - - @property - def bucket(self) -> int: - """Getter: Bucket start time in milliseconds """ - return self._inner_dict.get('bucket') # type: ignore - - @bucket.setter - def bucket(self, value: int) -> None: - """Setter: Bucket start time in milliseconds """ - self._inner_dict['bucket'] = value - - - @property - def duration(self) -> Union[str, "WindowDurationClass"]: - """Getter: Bucket duration """ - return self._inner_dict.get('duration') # type: ignore - - @duration.setter - def duration(self, value: Union[str, "WindowDurationClass"]) -> None: - """Setter: Bucket duration """ - self._inner_dict['duration'] = value - - - @property - def resource(self) -> str: - """Getter: Resource associated with these usage stats """ - return self._inner_dict.get('resource') # type: ignore - - @resource.setter - def resource(self, value: str) -> None: - """Setter: Resource associated with these usage stats """ - self._inner_dict['resource'] = value - - - @property - def metrics(self) -> "UsageAggregationMetricsClass": - """Getter: Metrics associated with this bucket """ - return self._inner_dict.get('metrics') # type: ignore - - @metrics.setter - def metrics(self, value: "UsageAggregationMetricsClass") -> None: - """Setter: Metrics associated with this bucket """ - self._inner_dict['metrics'] = value - - -class UsageAggregationMetricsClass(DictWrapper): - """Metrics for usage data for a given resource and bucket. Not all fields - make sense for all buckets, so every field is optional.""" - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.usage.UsageAggregationMetrics") - def __init__(self, - uniqueUserCount: Union[None, int]=None, - users: Union[None, List["UserUsageCountsClass"]]=None, - totalSqlQueries: Union[None, int]=None, - topSqlQueries: Union[None, List[str]]=None, - fields: Union[None, List["FieldUsageCountsClass"]]=None, - ): - super().__init__() - - self.uniqueUserCount = uniqueUserCount - self.users = users - self.totalSqlQueries = totalSqlQueries - self.topSqlQueries = topSqlQueries - self.fields = fields - - @classmethod - def construct_with_defaults(cls) -> "UsageAggregationMetricsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.uniqueUserCount = self.RECORD_SCHEMA.field_map["uniqueUserCount"].default - self.users = self.RECORD_SCHEMA.field_map["users"].default - self.totalSqlQueries = self.RECORD_SCHEMA.field_map["totalSqlQueries"].default - self.topSqlQueries = self.RECORD_SCHEMA.field_map["topSqlQueries"].default - self.fields = self.RECORD_SCHEMA.field_map["fields"].default - - - @property - def uniqueUserCount(self) -> Union[None, int]: - """Getter: Unique user count """ - return self._inner_dict.get('uniqueUserCount') # type: ignore - - @uniqueUserCount.setter - def uniqueUserCount(self, value: Union[None, int]) -> None: - """Setter: Unique user count """ - self._inner_dict['uniqueUserCount'] = value - - - @property - def users(self) -> Union[None, List["UserUsageCountsClass"]]: - """Getter: Users within this bucket, with frequency counts """ - return self._inner_dict.get('users') # type: ignore - - @users.setter - def users(self, value: Union[None, List["UserUsageCountsClass"]]) -> None: - """Setter: Users within this bucket, with frequency counts """ - self._inner_dict['users'] = value - - - @property - def totalSqlQueries(self) -> Union[None, int]: - """Getter: Total SQL query count """ - return self._inner_dict.get('totalSqlQueries') # type: ignore - - @totalSqlQueries.setter - def totalSqlQueries(self, value: Union[None, int]) -> None: - """Setter: Total SQL query count """ - self._inner_dict['totalSqlQueries'] = value - - - @property - def topSqlQueries(self) -> Union[None, List[str]]: - """Getter: Frequent SQL queries; mostly makes sense for datasets in SQL databases """ - return self._inner_dict.get('topSqlQueries') # type: ignore - - @topSqlQueries.setter - def topSqlQueries(self, value: Union[None, List[str]]) -> None: - """Setter: Frequent SQL queries; mostly makes sense for datasets in SQL databases """ - self._inner_dict['topSqlQueries'] = value - - - @property - def fields(self) -> Union[None, List["FieldUsageCountsClass"]]: - """Getter: Field-level usage stats """ - return self._inner_dict.get('fields') # type: ignore - - @fields.setter - def fields(self, value: Union[None, List["FieldUsageCountsClass"]]) -> None: - """Setter: Field-level usage stats """ - self._inner_dict['fields'] = value - - -class UserUsageCountsClass(DictWrapper): - """ Records a single user's usage counts for a given resource """ - - RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.usage.UserUsageCounts") - def __init__(self, - count: int, - user: Union[None, str]=None, - userEmail: Union[None, str]=None, - ): - super().__init__() - - self.user = user - self.count = count - self.userEmail = userEmail - - @classmethod - def construct_with_defaults(cls) -> "UserUsageCountsClass": - self = cls.construct({}) - self._restore_defaults() - - return self - - def _restore_defaults(self) -> None: - self.user = self.RECORD_SCHEMA.field_map["user"].default - self.count = int() - self.userEmail = self.RECORD_SCHEMA.field_map["userEmail"].default - - - @property - def user(self) -> Union[None, str]: - # No docs available. - return self._inner_dict.get('user') # type: ignore - - @user.setter - def user(self, value: Union[None, str]) -> None: - # No docs available. - self._inner_dict['user'] = value - - - @property - def count(self) -> int: - # No docs available. - return self._inner_dict.get('count') # type: ignore - - @count.setter - def count(self, value: int) -> None: - # No docs available. - self._inner_dict['count'] = value - - - @property - def userEmail(self) -> Union[None, str]: - """Getter: If user_email is set, we attempt to resolve the user's urn upon ingest """ - return self._inner_dict.get('userEmail') # type: ignore - - @userEmail.setter - def userEmail(self, value: Union[None, str]) -> None: - """Setter: If user_email is set, we attempt to resolve the user's urn upon ingest """ - self._inner_dict['userEmail'] = value - - -__SCHEMA_TYPES = { - 'com.linkedin.events.KafkaAuditHeader': KafkaAuditHeaderClass, - 'com.linkedin.pegasus2avro.chart.ChartInfo': ChartInfoClass, - 'com.linkedin.pegasus2avro.chart.ChartQuery': ChartQueryClass, - 'com.linkedin.pegasus2avro.chart.ChartQueryType': ChartQueryTypeClass, - 'com.linkedin.pegasus2avro.chart.ChartType': ChartTypeClass, - 'com.linkedin.pegasus2avro.chart.EditableChartProperties': EditableChartPropertiesClass, - 'com.linkedin.pegasus2avro.common.AccessLevel': AccessLevelClass, - 'com.linkedin.pegasus2avro.common.AuditStamp': AuditStampClass, - 'com.linkedin.pegasus2avro.common.BrowsePaths': BrowsePathsClass, - 'com.linkedin.pegasus2avro.common.ChangeAuditStamps': ChangeAuditStampsClass, - 'com.linkedin.pegasus2avro.common.Cost': CostClass, - 'com.linkedin.pegasus2avro.common.CostCost': CostCostClass, - 'com.linkedin.pegasus2avro.common.CostCostDiscriminator': CostCostDiscriminatorClass, - 'com.linkedin.pegasus2avro.common.CostType': CostTypeClass, - 'com.linkedin.pegasus2avro.common.Deprecation': DeprecationClass, - 'com.linkedin.pegasus2avro.common.FabricType': FabricTypeClass, - 'com.linkedin.pegasus2avro.common.GlobalTags': GlobalTagsClass, - 'com.linkedin.pegasus2avro.common.GlossaryTermAssociation': GlossaryTermAssociationClass, - 'com.linkedin.pegasus2avro.common.GlossaryTerms': GlossaryTermsClass, - 'com.linkedin.pegasus2avro.common.InstitutionalMemory': InstitutionalMemoryClass, - 'com.linkedin.pegasus2avro.common.InstitutionalMemoryMetadata': InstitutionalMemoryMetadataClass, - 'com.linkedin.pegasus2avro.common.MLFeatureDataType': MLFeatureDataTypeClass, - 'com.linkedin.pegasus2avro.common.Owner': OwnerClass, - 'com.linkedin.pegasus2avro.common.Ownership': OwnershipClass, - 'com.linkedin.pegasus2avro.common.OwnershipSource': OwnershipSourceClass, - 'com.linkedin.pegasus2avro.common.OwnershipSourceType': OwnershipSourceTypeClass, - 'com.linkedin.pegasus2avro.common.OwnershipType': OwnershipTypeClass, - 'com.linkedin.pegasus2avro.common.Status': StatusClass, - 'com.linkedin.pegasus2avro.common.TagAssociation': TagAssociationClass, - 'com.linkedin.pegasus2avro.common.VersionTag': VersionTagClass, - 'com.linkedin.pegasus2avro.common.WindowDuration': WindowDurationClass, - 'com.linkedin.pegasus2avro.common.fieldtransformer.TransformationType': TransformationTypeClass, - 'com.linkedin.pegasus2avro.common.fieldtransformer.UDFTransformer': UDFTransformerClass, - 'com.linkedin.pegasus2avro.dashboard.DashboardInfo': DashboardInfoClass, - 'com.linkedin.pegasus2avro.dashboard.EditableDashboardProperties': EditableDashboardPropertiesClass, - 'com.linkedin.pegasus2avro.datajob.DataFlowInfo': DataFlowInfoClass, - 'com.linkedin.pegasus2avro.datajob.DataJobInfo': DataJobInfoClass, - 'com.linkedin.pegasus2avro.datajob.DataJobInputOutput': DataJobInputOutputClass, - 'com.linkedin.pegasus2avro.datajob.EditableDataFlowProperties': EditableDataFlowPropertiesClass, - 'com.linkedin.pegasus2avro.datajob.EditableDataJobProperties': EditableDataJobPropertiesClass, - 'com.linkedin.pegasus2avro.datajob.JobStatus': JobStatusClass, - 'com.linkedin.pegasus2avro.datajob.azkaban.AzkabanJobType': AzkabanJobTypeClass, - 'com.linkedin.pegasus2avro.dataplatform.DataPlatformInfo': DataPlatformInfoClass, - 'com.linkedin.pegasus2avro.dataplatform.PlatformType': PlatformTypeClass, - 'com.linkedin.pegasus2avro.dataprocess.DataProcessInfo': DataProcessInfoClass, - 'com.linkedin.pegasus2avro.dataset.DatasetDeprecation': DatasetDeprecationClass, - 'com.linkedin.pegasus2avro.dataset.DatasetFieldMapping': DatasetFieldMappingClass, - 'com.linkedin.pegasus2avro.dataset.DatasetFieldProfile': DatasetFieldProfileClass, - 'com.linkedin.pegasus2avro.dataset.DatasetFieldUsageCounts': DatasetFieldUsageCountsClass, - 'com.linkedin.pegasus2avro.dataset.DatasetLineageType': DatasetLineageTypeClass, - 'com.linkedin.pegasus2avro.dataset.DatasetProfile': DatasetProfileClass, - 'com.linkedin.pegasus2avro.dataset.DatasetProperties': DatasetPropertiesClass, - 'com.linkedin.pegasus2avro.dataset.DatasetUpstreamLineage': DatasetUpstreamLineageClass, - 'com.linkedin.pegasus2avro.dataset.DatasetUsageStatistics': DatasetUsageStatisticsClass, - 'com.linkedin.pegasus2avro.dataset.DatasetUserUsageCounts': DatasetUserUsageCountsClass, - 'com.linkedin.pegasus2avro.dataset.EditableDatasetProperties': EditableDatasetPropertiesClass, - 'com.linkedin.pegasus2avro.dataset.Histogram': HistogramClass, - 'com.linkedin.pegasus2avro.dataset.Quantile': QuantileClass, - 'com.linkedin.pegasus2avro.dataset.Upstream': UpstreamClass, - 'com.linkedin.pegasus2avro.dataset.UpstreamLineage': UpstreamLineageClass, - 'com.linkedin.pegasus2avro.dataset.ValueFrequency': ValueFrequencyClass, - 'com.linkedin.pegasus2avro.events.metadata.ChangeType': ChangeTypeClass, - 'com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo': GlossaryNodeInfoClass, - 'com.linkedin.pegasus2avro.glossary.GlossaryRelatedTerms': GlossaryRelatedTermsClass, - 'com.linkedin.pegasus2avro.glossary.GlossaryTermInfo': GlossaryTermInfoClass, - 'com.linkedin.pegasus2avro.identity.CorpGroupInfo': CorpGroupInfoClass, - 'com.linkedin.pegasus2avro.identity.CorpUserEditableInfo': CorpUserEditableInfoClass, - 'com.linkedin.pegasus2avro.identity.CorpUserInfo': CorpUserInfoClass, - 'com.linkedin.pegasus2avro.identity.GroupMembership': GroupMembershipClass, - 'com.linkedin.pegasus2avro.metadata.key.ChartKey': ChartKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.CorpGroupKey': CorpGroupKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.CorpUserKey': CorpUserKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DashboardKey': DashboardKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DataFlowKey': DataFlowKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DataHubPolicyKey': DataHubPolicyKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DataJobKey': DataJobKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DataPlatformKey': DataPlatformKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DataProcessKey': DataProcessKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.DatasetKey': DatasetKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey': GlossaryNodeKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.GlossaryTermKey': GlossaryTermKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLFeatureKey': MLFeatureKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLFeatureTableKey': MLFeatureTableKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey': MLModelDeploymentKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey': MLModelGroupKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLModelKey': MLModelKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey': MLPrimaryKeyKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.SchemaFieldKey': SchemaFieldKeyClass, - 'com.linkedin.pegasus2avro.metadata.key.TagKey': TagKeyClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot': ChartSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.CorpGroupSnapshot': CorpGroupSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.CorpUserSnapshot': CorpUserSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot': DashboardSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DataFlowSnapshot': DataFlowSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DataHubPolicySnapshot': DataHubPolicySnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DataJobSnapshot': DataJobSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DataPlatformSnapshot': DataPlatformSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DataProcessSnapshot': DataProcessSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot': DatasetSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot': GlossaryNodeSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot': GlossaryTermSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureSnapshot': MLFeatureSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLFeatureTableSnapshot': MLFeatureTableSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLModelDeploymentSnapshot': MLModelDeploymentSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLModelGroupSnapshot': MLModelGroupSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLModelSnapshot': MLModelSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.MLPrimaryKeySnapshot': MLPrimaryKeySnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.SchemaFieldSnapshot': SchemaFieldSnapshotClass, - 'com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot': TagSnapshotClass, - 'com.linkedin.pegasus2avro.ml.metadata.BaseData': BaseDataClass, - 'com.linkedin.pegasus2avro.ml.metadata.CaveatDetails': CaveatDetailsClass, - 'com.linkedin.pegasus2avro.ml.metadata.CaveatsAndRecommendations': CaveatsAndRecommendationsClass, - 'com.linkedin.pegasus2avro.ml.metadata.DeploymentStatus': DeploymentStatusClass, - 'com.linkedin.pegasus2avro.ml.metadata.EthicalConsiderations': EthicalConsiderationsClass, - 'com.linkedin.pegasus2avro.ml.metadata.EvaluationData': EvaluationDataClass, - 'com.linkedin.pegasus2avro.ml.metadata.IntendedUse': IntendedUseClass, - 'com.linkedin.pegasus2avro.ml.metadata.IntendedUserType': IntendedUserTypeClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLFeatureProperties': MLFeaturePropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLFeatureTableProperties': MLFeatureTablePropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLHyperParam': MLHyperParamClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLMetric': MLMetricClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLModelDeploymentProperties': MLModelDeploymentPropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLModelFactorPrompts': MLModelFactorPromptsClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLModelFactors': MLModelFactorsClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLModelGroupProperties': MLModelGroupPropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLModelProperties': MLModelPropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.MLPrimaryKeyProperties': MLPrimaryKeyPropertiesClass, - 'com.linkedin.pegasus2avro.ml.metadata.Metrics': MetricsClass, - 'com.linkedin.pegasus2avro.ml.metadata.QuantitativeAnalyses': QuantitativeAnalysesClass, - 'com.linkedin.pegasus2avro.ml.metadata.SourceCode': SourceCodeClass, - 'com.linkedin.pegasus2avro.ml.metadata.SourceCodeUrl': SourceCodeUrlClass, - 'com.linkedin.pegasus2avro.ml.metadata.SourceCodeUrlType': SourceCodeUrlTypeClass, - 'com.linkedin.pegasus2avro.ml.metadata.TrainingData': TrainingDataClass, - 'com.linkedin.pegasus2avro.mxe.GenericAspect': GenericAspectClass, - 'com.linkedin.pegasus2avro.mxe.MetadataChangeEvent': MetadataChangeEventClass, - 'com.linkedin.pegasus2avro.mxe.MetadataChangeProposal': MetadataChangeProposalClass, - 'com.linkedin.pegasus2avro.mxe.SystemMetadata': SystemMetadataClass, - 'com.linkedin.pegasus2avro.policy.DataHubActorFilter': DataHubActorFilterClass, - 'com.linkedin.pegasus2avro.policy.DataHubPolicyInfo': DataHubPolicyInfoClass, - 'com.linkedin.pegasus2avro.policy.DataHubResourceFilter': DataHubResourceFilterClass, - 'com.linkedin.pegasus2avro.schema.ArrayType': ArrayTypeClass, - 'com.linkedin.pegasus2avro.schema.BinaryJsonSchema': BinaryJsonSchemaClass, - 'com.linkedin.pegasus2avro.schema.BooleanType': BooleanTypeClass, - 'com.linkedin.pegasus2avro.schema.BytesType': BytesTypeClass, - 'com.linkedin.pegasus2avro.schema.DatasetFieldForeignKey': DatasetFieldForeignKeyClass, - 'com.linkedin.pegasus2avro.schema.DateType': DateTypeClass, - 'com.linkedin.pegasus2avro.schema.EditableSchemaFieldInfo': EditableSchemaFieldInfoClass, - 'com.linkedin.pegasus2avro.schema.EditableSchemaMetadata': EditableSchemaMetadataClass, - 'com.linkedin.pegasus2avro.schema.EnumType': EnumTypeClass, - 'com.linkedin.pegasus2avro.schema.EspressoSchema': EspressoSchemaClass, - 'com.linkedin.pegasus2avro.schema.FixedType': FixedTypeClass, - 'com.linkedin.pegasus2avro.schema.ForeignKeyConstraint': ForeignKeyConstraintClass, - 'com.linkedin.pegasus2avro.schema.ForeignKeySpec': ForeignKeySpecClass, - 'com.linkedin.pegasus2avro.schema.KafkaSchema': KafkaSchemaClass, - 'com.linkedin.pegasus2avro.schema.KeyValueSchema': KeyValueSchemaClass, - 'com.linkedin.pegasus2avro.schema.MapType': MapTypeClass, - 'com.linkedin.pegasus2avro.schema.MySqlDDL': MySqlDDLClass, - 'com.linkedin.pegasus2avro.schema.NullType': NullTypeClass, - 'com.linkedin.pegasus2avro.schema.NumberType': NumberTypeClass, - 'com.linkedin.pegasus2avro.schema.OracleDDL': OracleDDLClass, - 'com.linkedin.pegasus2avro.schema.OrcSchema': OrcSchemaClass, - 'com.linkedin.pegasus2avro.schema.OtherSchema': OtherSchemaClass, - 'com.linkedin.pegasus2avro.schema.PrestoDDL': PrestoDDLClass, - 'com.linkedin.pegasus2avro.schema.RecordType': RecordTypeClass, - 'com.linkedin.pegasus2avro.schema.SchemaField': SchemaFieldClass, - 'com.linkedin.pegasus2avro.schema.SchemaFieldDataType': SchemaFieldDataTypeClass, - 'com.linkedin.pegasus2avro.schema.SchemaMetadata': SchemaMetadataClass, - 'com.linkedin.pegasus2avro.schema.Schemaless': SchemalessClass, - 'com.linkedin.pegasus2avro.schema.StringType': StringTypeClass, - 'com.linkedin.pegasus2avro.schema.TimeType': TimeTypeClass, - 'com.linkedin.pegasus2avro.schema.UnionType': UnionTypeClass, - 'com.linkedin.pegasus2avro.schema.UrnForeignKey': UrnForeignKeyClass, - 'com.linkedin.pegasus2avro.tag.TagProperties': TagPropertiesClass, - 'com.linkedin.pegasus2avro.timeseries.CalendarInterval': CalendarIntervalClass, - 'com.linkedin.pegasus2avro.timeseries.PartitionSpec': PartitionSpecClass, - 'com.linkedin.pegasus2avro.timeseries.TimeWindow': TimeWindowClass, - 'com.linkedin.pegasus2avro.timeseries.TimeWindowSize': TimeWindowSizeClass, - 'com.linkedin.pegasus2avro.usage.FieldUsageCounts': FieldUsageCountsClass, - 'com.linkedin.pegasus2avro.usage.UsageAggregation': UsageAggregationClass, - 'com.linkedin.pegasus2avro.usage.UsageAggregationMetrics': UsageAggregationMetricsClass, - 'com.linkedin.pegasus2avro.usage.UserUsageCounts': UserUsageCountsClass, - 'KafkaAuditHeader': KafkaAuditHeaderClass, - 'ChartInfo': ChartInfoClass, - 'ChartQuery': ChartQueryClass, - 'ChartQueryType': ChartQueryTypeClass, - 'ChartType': ChartTypeClass, - 'EditableChartProperties': EditableChartPropertiesClass, - 'AccessLevel': AccessLevelClass, - 'AuditStamp': AuditStampClass, - 'BrowsePaths': BrowsePathsClass, - 'ChangeAuditStamps': ChangeAuditStampsClass, - 'Cost': CostClass, - 'CostCost': CostCostClass, - 'CostCostDiscriminator': CostCostDiscriminatorClass, - 'CostType': CostTypeClass, - 'Deprecation': DeprecationClass, - 'FabricType': FabricTypeClass, - 'GlobalTags': GlobalTagsClass, - 'GlossaryTermAssociation': GlossaryTermAssociationClass, - 'GlossaryTerms': GlossaryTermsClass, - 'InstitutionalMemory': InstitutionalMemoryClass, - 'InstitutionalMemoryMetadata': InstitutionalMemoryMetadataClass, - 'MLFeatureDataType': MLFeatureDataTypeClass, - 'Owner': OwnerClass, - 'Ownership': OwnershipClass, - 'OwnershipSource': OwnershipSourceClass, - 'OwnershipSourceType': OwnershipSourceTypeClass, - 'OwnershipType': OwnershipTypeClass, - 'Status': StatusClass, - 'TagAssociation': TagAssociationClass, - 'VersionTag': VersionTagClass, - 'WindowDuration': WindowDurationClass, - 'TransformationType': TransformationTypeClass, - 'UDFTransformer': UDFTransformerClass, - 'DashboardInfo': DashboardInfoClass, - 'EditableDashboardProperties': EditableDashboardPropertiesClass, - 'DataFlowInfo': DataFlowInfoClass, - 'DataJobInfo': DataJobInfoClass, - 'DataJobInputOutput': DataJobInputOutputClass, - 'EditableDataFlowProperties': EditableDataFlowPropertiesClass, - 'EditableDataJobProperties': EditableDataJobPropertiesClass, - 'JobStatus': JobStatusClass, - 'AzkabanJobType': AzkabanJobTypeClass, - 'DataPlatformInfo': DataPlatformInfoClass, - 'PlatformType': PlatformTypeClass, - 'DataProcessInfo': DataProcessInfoClass, - 'DatasetDeprecation': DatasetDeprecationClass, - 'DatasetFieldMapping': DatasetFieldMappingClass, - 'DatasetFieldProfile': DatasetFieldProfileClass, - 'DatasetFieldUsageCounts': DatasetFieldUsageCountsClass, - 'DatasetLineageType': DatasetLineageTypeClass, - 'DatasetProfile': DatasetProfileClass, - 'DatasetProperties': DatasetPropertiesClass, - 'DatasetUpstreamLineage': DatasetUpstreamLineageClass, - 'DatasetUsageStatistics': DatasetUsageStatisticsClass, - 'DatasetUserUsageCounts': DatasetUserUsageCountsClass, - 'EditableDatasetProperties': EditableDatasetPropertiesClass, - 'Histogram': HistogramClass, - 'Quantile': QuantileClass, - 'Upstream': UpstreamClass, - 'UpstreamLineage': UpstreamLineageClass, - 'ValueFrequency': ValueFrequencyClass, - 'ChangeType': ChangeTypeClass, - 'GlossaryNodeInfo': GlossaryNodeInfoClass, - 'GlossaryRelatedTerms': GlossaryRelatedTermsClass, - 'GlossaryTermInfo': GlossaryTermInfoClass, - 'CorpGroupInfo': CorpGroupInfoClass, - 'CorpUserEditableInfo': CorpUserEditableInfoClass, - 'CorpUserInfo': CorpUserInfoClass, - 'GroupMembership': GroupMembershipClass, - 'ChartKey': ChartKeyClass, - 'CorpGroupKey': CorpGroupKeyClass, - 'CorpUserKey': CorpUserKeyClass, - 'DashboardKey': DashboardKeyClass, - 'DataFlowKey': DataFlowKeyClass, - 'DataHubPolicyKey': DataHubPolicyKeyClass, - 'DataJobKey': DataJobKeyClass, - 'DataPlatformKey': DataPlatformKeyClass, - 'DataProcessKey': DataProcessKeyClass, - 'DatasetKey': DatasetKeyClass, - 'GlossaryNodeKey': GlossaryNodeKeyClass, - 'GlossaryTermKey': GlossaryTermKeyClass, - 'MLFeatureKey': MLFeatureKeyClass, - 'MLFeatureTableKey': MLFeatureTableKeyClass, - 'MLModelDeploymentKey': MLModelDeploymentKeyClass, - 'MLModelGroupKey': MLModelGroupKeyClass, - 'MLModelKey': MLModelKeyClass, - 'MLPrimaryKeyKey': MLPrimaryKeyKeyClass, - 'SchemaFieldKey': SchemaFieldKeyClass, - 'TagKey': TagKeyClass, - 'ChartSnapshot': ChartSnapshotClass, - 'CorpGroupSnapshot': CorpGroupSnapshotClass, - 'CorpUserSnapshot': CorpUserSnapshotClass, - 'DashboardSnapshot': DashboardSnapshotClass, - 'DataFlowSnapshot': DataFlowSnapshotClass, - 'DataHubPolicySnapshot': DataHubPolicySnapshotClass, - 'DataJobSnapshot': DataJobSnapshotClass, - 'DataPlatformSnapshot': DataPlatformSnapshotClass, - 'DataProcessSnapshot': DataProcessSnapshotClass, - 'DatasetSnapshot': DatasetSnapshotClass, - 'GlossaryNodeSnapshot': GlossaryNodeSnapshotClass, - 'GlossaryTermSnapshot': GlossaryTermSnapshotClass, - 'MLFeatureSnapshot': MLFeatureSnapshotClass, - 'MLFeatureTableSnapshot': MLFeatureTableSnapshotClass, - 'MLModelDeploymentSnapshot': MLModelDeploymentSnapshotClass, - 'MLModelGroupSnapshot': MLModelGroupSnapshotClass, - 'MLModelSnapshot': MLModelSnapshotClass, - 'MLPrimaryKeySnapshot': MLPrimaryKeySnapshotClass, - 'SchemaFieldSnapshot': SchemaFieldSnapshotClass, - 'TagSnapshot': TagSnapshotClass, - 'BaseData': BaseDataClass, - 'CaveatDetails': CaveatDetailsClass, - 'CaveatsAndRecommendations': CaveatsAndRecommendationsClass, - 'DeploymentStatus': DeploymentStatusClass, - 'EthicalConsiderations': EthicalConsiderationsClass, - 'EvaluationData': EvaluationDataClass, - 'IntendedUse': IntendedUseClass, - 'IntendedUserType': IntendedUserTypeClass, - 'MLFeatureProperties': MLFeaturePropertiesClass, - 'MLFeatureTableProperties': MLFeatureTablePropertiesClass, - 'MLHyperParam': MLHyperParamClass, - 'MLMetric': MLMetricClass, - 'MLModelDeploymentProperties': MLModelDeploymentPropertiesClass, - 'MLModelFactorPrompts': MLModelFactorPromptsClass, - 'MLModelFactors': MLModelFactorsClass, - 'MLModelGroupProperties': MLModelGroupPropertiesClass, - 'MLModelProperties': MLModelPropertiesClass, - 'MLPrimaryKeyProperties': MLPrimaryKeyPropertiesClass, - 'Metrics': MetricsClass, - 'QuantitativeAnalyses': QuantitativeAnalysesClass, - 'SourceCode': SourceCodeClass, - 'SourceCodeUrl': SourceCodeUrlClass, - 'SourceCodeUrlType': SourceCodeUrlTypeClass, - 'TrainingData': TrainingDataClass, - 'GenericAspect': GenericAspectClass, - 'MetadataChangeEvent': MetadataChangeEventClass, - 'MetadataChangeProposal': MetadataChangeProposalClass, - 'SystemMetadata': SystemMetadataClass, - 'DataHubActorFilter': DataHubActorFilterClass, - 'DataHubPolicyInfo': DataHubPolicyInfoClass, - 'DataHubResourceFilter': DataHubResourceFilterClass, - 'ArrayType': ArrayTypeClass, - 'BinaryJsonSchema': BinaryJsonSchemaClass, - 'BooleanType': BooleanTypeClass, - 'BytesType': BytesTypeClass, - 'DatasetFieldForeignKey': DatasetFieldForeignKeyClass, - 'DateType': DateTypeClass, - 'EditableSchemaFieldInfo': EditableSchemaFieldInfoClass, - 'EditableSchemaMetadata': EditableSchemaMetadataClass, - 'EnumType': EnumTypeClass, - 'EspressoSchema': EspressoSchemaClass, - 'FixedType': FixedTypeClass, - 'ForeignKeyConstraint': ForeignKeyConstraintClass, - 'ForeignKeySpec': ForeignKeySpecClass, - 'KafkaSchema': KafkaSchemaClass, - 'KeyValueSchema': KeyValueSchemaClass, - 'MapType': MapTypeClass, - 'MySqlDDL': MySqlDDLClass, - 'NullType': NullTypeClass, - 'NumberType': NumberTypeClass, - 'OracleDDL': OracleDDLClass, - 'OrcSchema': OrcSchemaClass, - 'OtherSchema': OtherSchemaClass, - 'PrestoDDL': PrestoDDLClass, - 'RecordType': RecordTypeClass, - 'SchemaField': SchemaFieldClass, - 'SchemaFieldDataType': SchemaFieldDataTypeClass, - 'SchemaMetadata': SchemaMetadataClass, - 'Schemaless': SchemalessClass, - 'StringType': StringTypeClass, - 'TimeType': TimeTypeClass, - 'UnionType': UnionTypeClass, - 'UrnForeignKey': UrnForeignKeyClass, - 'TagProperties': TagPropertiesClass, - 'CalendarInterval': CalendarIntervalClass, - 'PartitionSpec': PartitionSpecClass, - 'TimeWindow': TimeWindowClass, - 'TimeWindowSize': TimeWindowSizeClass, - 'FieldUsageCounts': FieldUsageCountsClass, - 'UsageAggregation': UsageAggregationClass, - 'UsageAggregationMetrics': UsageAggregationMetricsClass, - 'UserUsageCounts': UserUsageCountsClass, -} - -_json_converter = avrojson.AvroJsonConverter(use_logical_types=False, schema_types=__SCHEMA_TYPES) - -# fmt: on diff --git a/metadata-ingestion/src/datahub/metadata/schemas/DatasetProfile.avsc b/metadata-ingestion/src/datahub/metadata/schemas/DatasetProfile.avsc deleted file mode 100644 index f34872e337..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/DatasetProfile.avsc +++ /dev/null @@ -1,297 +0,0 @@ -{ - "type": "record", - "name": "DatasetProfile", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Stats corresponding to datasets", - "fields": [ - { - "name": "timestampMillis", - "type": "long", - "doc": "The event timestamp field as epoch at UTC in milli seconds." - }, - { - "name": "eventGranularity", - "type": [ - "null", - { - "type": "record", - "name": "TimeWindowSize", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "doc": "Defines the size of a time window.", - "fields": [ - { - "name": "unit", - "type": { - "type": "enum", - "name": "CalendarInterval", - "symbols": [ - "SECOND", - "MINUTE", - "HOUR", - "DAY", - "WEEK", - "MONTH", - "QUARTER", - "YEAR" - ] - }, - "doc": "Interval unit such as minute/hour/day etc." - }, - { - "name": "multiple", - "type": "int", - "doc": "How many units. Defaults to 1.", - "default": 1 - } - ] - } - ], - "doc": "Granularity of the event if applicable", - "default": null - }, - { - "name": "partitionSpec", - "type": [ - "null", - { - "type": "record", - "name": "PartitionSpec", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "doc": "Defines how the data is partitioned", - "fields": [ - { - "name": "partition", - "type": "string", - "doc": "String representation of the partition" - }, - { - "name": "timePartition", - "type": [ - "null", - { - "type": "record", - "name": "TimeWindow", - "fields": [ - { - "name": "startTimeMillis", - "type": "long", - "doc": "Start time as epoch at UTC." - }, - { - "name": "length", - "type": "TimeWindowSize", - "doc": "The length of the window." - } - ] - } - ], - "doc": "Time window of the partition if applicable", - "default": null - } - ] - } - ], - "doc": "The optional partition specification.", - "default": null - }, - { - "name": "rowCount", - "type": [ - "null", - "long" - ], - "default": null - }, - { - "name": "columnCount", - "type": [ - "null", - "long" - ], - "default": null - }, - { - "name": "fieldProfiles", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldProfile", - "doc": "Stats corresponding to fields in a dataset", - "fields": [ - { - "name": "fieldPath", - "type": "string" - }, - { - "name": "uniqueCount", - "type": [ - "null", - "long" - ], - "default": null - }, - { - "name": "uniqueProportion", - "type": [ - "null", - "float" - ], - "default": null - }, - { - "name": "nullCount", - "type": [ - "null", - "long" - ], - "default": null - }, - { - "name": "nullProportion", - "type": [ - "null", - "float" - ], - "default": null - }, - { - "name": "min", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "max", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "mean", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "median", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "stdev", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "quantiles", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "Quantile", - "fields": [ - { - "name": "quantile", - "type": "string" - }, - { - "name": "value", - "type": "string" - } - ] - } - } - ], - "default": null - }, - { - "name": "distinctValueFrequencies", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "ValueFrequency", - "fields": [ - { - "name": "value", - "type": "string" - }, - { - "name": "frequency", - "type": "long" - } - ] - } - } - ], - "default": null - }, - { - "name": "histogram", - "type": [ - "null", - { - "type": "record", - "name": "Histogram", - "fields": [ - { - "name": "boundaries", - "type": { - "type": "array", - "items": "string" - } - }, - { - "name": "heights", - "type": { - "type": "array", - "items": "float" - } - } - ] - } - ], - "default": null - }, - { - "name": "sampleValues", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "default": null - } - ] - } - } - ], - "default": null - } - ], - "Aspect": { - "name": "datasetProfile", - "type": "timeseries" - } -} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schemas/DatasetUsageStatistics.avsc b/metadata-ingestion/src/datahub/metadata/schemas/DatasetUsageStatistics.avsc deleted file mode 100644 index 2ec87cdd14..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/DatasetUsageStatistics.avsc +++ /dev/null @@ -1,212 +0,0 @@ -{ - "type": "record", - "name": "DatasetUsageStatistics", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Stats corresponding to dataset's usage.", - "fields": [ - { - "name": "timestampMillis", - "type": "long", - "doc": "The event timestamp field as epoch at UTC in milli seconds." - }, - { - "name": "eventGranularity", - "type": [ - "null", - { - "type": "record", - "name": "TimeWindowSize", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "doc": "Defines the size of a time window.", - "fields": [ - { - "name": "unit", - "type": { - "type": "enum", - "name": "CalendarInterval", - "symbols": [ - "SECOND", - "MINUTE", - "HOUR", - "DAY", - "WEEK", - "MONTH", - "QUARTER", - "YEAR" - ] - }, - "doc": "Interval unit such as minute/hour/day etc." - }, - { - "name": "multiple", - "type": "int", - "doc": "How many units. Defaults to 1.", - "default": 1 - } - ] - } - ], - "doc": "Granularity of the event if applicable", - "default": null - }, - { - "name": "partitionSpec", - "type": [ - "null", - { - "type": "record", - "name": "PartitionSpec", - "namespace": "com.linkedin.pegasus2avro.timeseries", - "doc": "Defines how the data is partitioned", - "fields": [ - { - "name": "partition", - "type": "string", - "doc": "String representation of the partition" - }, - { - "name": "timePartition", - "type": [ - "null", - { - "type": "record", - "name": "TimeWindow", - "fields": [ - { - "name": "startTimeMillis", - "type": "long", - "doc": "Start time as epoch at UTC." - }, - { - "name": "length", - "type": "TimeWindowSize", - "doc": "The length of the window." - } - ] - } - ], - "doc": "Time window of the partition if applicable", - "default": null - } - ] - } - ], - "doc": "The optional partition specification.", - "default": null - }, - { - "name": "uniqueUserCount", - "type": [ - "null", - "int" - ], - "doc": "Unique user count", - "default": null, - "TimeseriesField": {} - }, - { - "name": "totalSqlQueries", - "type": [ - "null", - "int" - ], - "doc": "Total SQL query count", - "default": null, - "TimeseriesField": {} - }, - { - "name": "topSqlQueries", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Frequent SQL queries; mostly makes sense for datasets in SQL databases", - "default": null, - "TimeseriesField": {} - }, - { - "name": "userCounts", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetUserUsageCounts", - "doc": "Records a single user's usage counts for a given resource", - "fields": [ - { - "name": "user", - "type": "string", - "doc": "The unique id of the user.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "count", - "type": "int", - "doc": "Number of times the dataset has been used by the user.", - "TimeseriesField": {} - }, - { - "name": "userEmail", - "type": [ - "null", - "string" - ], - "doc": "If user_email is set, we attempt to resolve the user's urn upon ingest", - "default": null, - "TimeseriesField": {} - } - ] - } - } - ], - "doc": "Users within this bucket, with frequency counts", - "default": null, - "TimeseriesFieldCollection": { - "key": "user" - } - }, - { - "name": "fieldCounts", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldUsageCounts", - "doc": "Records field-level usage counts for a given dataset", - "fields": [ - { - "name": "fieldPath", - "type": "string", - "doc": "The name of the field." - }, - { - "name": "count", - "type": "int", - "doc": "Number of times the field has been used.", - "TimeseriesField": {} - } - ] - } - } - ], - "doc": "Field-level usage stats", - "default": null, - "TimeseriesFieldCollection": { - "key": "fieldPath" - } - } - ], - "Aspect": { - "name": "datasetUsageStatistics", - "type": "timeseries" - } -} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeEvent.avsc b/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeEvent.avsc deleted file mode 100644 index adacd5c624..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeEvent.avsc +++ /dev/null @@ -1,5734 +0,0 @@ -{ - "type": "record", - "name": "MetadataChangeEvent", - "namespace": "com.linkedin.pegasus2avro.mxe", - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.", - "fields": [ - { - "name": "auditHeader", - "type": [ - "null", - { - "type": "record", - "name": "KafkaAuditHeader", - "namespace": "com.linkedin.events", - "doc": "This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader", - "fields": [ - { - "name": "time", - "type": "long", - "doc": "The time at which the event was emitted into kafka.", - "compliance": [ - { - "policy": "EVENT_TIME" - } - ] - }, - { - "name": "server", - "type": "string", - "doc": "The fully qualified name of the host from which the event is being emitted.", - "compliance": "NONE" - }, - { - "name": "instance", - "type": [ - "null", - "string" - ], - "doc": "The instance on the server from which the event is being emitted. e.g. i001", - "default": null, - "compliance": "NONE" - }, - { - "name": "appName", - "type": "string", - "doc": "The name of the application from which the event is being emitted. see go/appname", - "compliance": "NONE" - }, - { - "name": "messageId", - "type": { - "type": "fixed", - "name": "UUID", - "size": 16 - }, - "doc": "A unique identifier for the message", - "compliance": "NONE" - }, - { - "name": "auditVersion", - "type": [ - "null", - "int" - ], - "doc": "The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing", - "default": null, - "compliance": "NONE" - }, - { - "name": "fabricUrn", - "type": [ - "null", - "string" - ], - "doc": "The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric.", - "default": null, - "compliance": "NONE" - }, - { - "name": "clusterConnectionString", - "type": [ - "null", - "string" - ], - "doc": "This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.", - "default": null, - "compliance": "NONE" - } - ] - } - ], - "doc": "Kafka audit header. See go/kafkaauditheader for more info.", - "default": null - }, - { - "name": "proposedSnapshot", - "type": [ - { - "type": "record", - "name": "ChartSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific Chart entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.ChartUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "ChartKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Chart", - "fields": [ - { - "name": "dashboardTool", - "type": "string", - "doc": "The name of the dashboard tool such as looker, redash etc.", - "Searchable": { - "addToFilters": true, - "boostScore": 4.0, - "fieldName": "tool", - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "chartId", - "type": "string", - "doc": "Unique id for the chart. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, chart URL could be used here for Looker such as 'looker.linkedin.com/looks/1234'" - } - ], - "Aspect": { - "name": "chartKey" - } - }, - { - "type": "record", - "name": "ChartInfo", - "namespace": "com.linkedin.pegasus2avro.chart", - "doc": "Information about a chart", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "title", - "type": "string", - "doc": "Title of the chart", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "description", - "type": "string", - "doc": "Detailed description about the chart", - "Searchable": {} - }, - { - "name": "lastModified", - "type": { - "type": "record", - "name": "ChangeAuditStamps", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into various lifecycle stages, and who acted to move it into those lifecycle stages. The recommended best practice is to include this record in your record schema, and annotate its fields as @readOnly in your resource. See https://github.com/linkedin/rest.li/wiki/Validation-in-Rest.li#restli-validation-annotations", - "fields": [ - { - "name": "created", - "type": { - "type": "record", - "name": "AuditStamp", - "doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage.", - "fields": [ - { - "name": "time", - "type": "long", - "doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent." - }, - { - "name": "actor", - "type": "string", - "doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "impersonator", - "type": [ - "null", - "string" - ], - "doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - } - ] - }, - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - } - ] - }, - "doc": "Captures information about who created/last modified/deleted this chart and when" - }, - { - "name": "chartUrl", - "type": [ - "null", - "string" - ], - "doc": "URL for the chart. This could be used as an external link on DataHub to allow users access/view the chart", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "inputs", - "type": [ - "null", - { - "type": "array", - "items": [ - "string" - ] - } - ], - "doc": "Data sources for the chart", - "default": null, - "Relationship": { - "/*/string": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - } - }, - { - "name": "type", - "type": [ - "null", - { - "type": "enum", - "name": "ChartType", - "doc": "The various types of charts", - "symbols": [ - "BAR", - "PIE", - "SCATTER", - "TABLE", - "TEXT", - "LINE", - "AREA", - "HISTOGRAM", - "BOX_PLOT" - ], - "symbolDocs": { - "BAR": "Chart showing a Bar chart", - "PIE": "Chart showing a Pie chart", - "SCATTER": "Chart showing a Scatter plot", - "TABLE": "Chart showing a table", - "TEXT": "Chart showing Markdown formatted text" - } - } - ], - "doc": "Type of the chart", - "default": null, - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - } - }, - { - "name": "access", - "type": [ - "null", - { - "type": "enum", - "name": "AccessLevel", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "The various access levels", - "symbols": [ - "PUBLIC", - "PRIVATE" - ], - "symbolDocs": { - "PRIVATE": "Private availability to certain set of users", - "PUBLIC": "Publicly available access level" - } - } - ], - "doc": "Access level for the chart", - "default": null, - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - } - }, - { - "name": "lastRefreshed", - "type": [ - "null", - "long" - ], - "doc": "The time when this chart last refreshed", - "default": null - } - ], - "Aspect": { - "name": "chartInfo" - } - }, - { - "type": "record", - "name": "ChartQuery", - "namespace": "com.linkedin.pegasus2avro.chart", - "doc": "Information for chart query which is used for getting data of the chart", - "fields": [ - { - "name": "rawQuery", - "type": "string", - "doc": "Raw query to build a chart from input datasets" - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "ChartQueryType", - "symbols": [ - "LOOKML", - "SQL" - ], - "symbolDocs": { - "LOOKML": "LookML queries", - "SQL": "SQL type queries" - } - }, - "doc": "Chart query type", - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - } - } - ], - "Aspect": { - "name": "chartQuery" - } - }, - { - "type": "record", - "name": "EditableChartProperties", - "namespace": "com.linkedin.pegasus2avro.chart", - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Edited documentation of the chart ", - "default": null, - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - } - } - ], - "Aspect": { - "name": "editableChartProperties" - } - }, - { - "type": "record", - "name": "Ownership", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Ownership information of an entity.", - "fields": [ - { - "name": "owners", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Owner", - "doc": "Ownership information", - "fields": [ - { - "name": "owner", - "type": "string", - "doc": "Owner URN, e.g. urn:li:corpuser:ldap, urn:li:corpGroup:group_name, and urn:li:multiProduct:mp_name\n(Caveat: only corpuser is currently supported in the frontend.)", - "Relationship": { - "entityTypes": [ - "corpUser", - "corpGroup" - ], - "name": "OwnedBy" - }, - "Searchable": { - "fieldName": "owners", - "fieldType": "URN", - "hasValuesFieldName": "hasOwners", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "OwnershipType", - "doc": "Owner category or owner role", - "symbols": [ - "DEVELOPER", - "DATAOWNER", - "DELEGATE", - "PRODUCER", - "CONSUMER", - "STAKEHOLDER" - ], - "symbolDocs": { - "CONSUMER": "A person, group, or service that consumes the data", - "DATAOWNER": "A person or group that is owning the data", - "DELEGATE": "A person or a group that overseas the operation, e.g. a DBA or SRE.", - "DEVELOPER": "A person or group that is in charge of developing the code", - "PRODUCER": "A person, group, or service that produces/generates the data", - "STAKEHOLDER": "A person or a group that has direct business interest" - } - }, - "doc": "The type of the ownership" - }, - { - "name": "source", - "type": [ - "null", - { - "type": "record", - "name": "OwnershipSource", - "doc": "Source/provider of the ownership information", - "fields": [ - { - "name": "type", - "type": { - "type": "enum", - "name": "OwnershipSourceType", - "symbols": [ - "AUDIT", - "DATABASE", - "FILE_SYSTEM", - "ISSUE_TRACKING_SYSTEM", - "MANUAL", - "SERVICE", - "SOURCE_CONTROL", - "OTHER" - ], - "symbolDocs": { - "AUDIT": "Auditing system or audit logs", - "DATABASE": "Database, e.g. GRANTS table", - "FILE_SYSTEM": "File system, e.g. file/directory owner", - "ISSUE_TRACKING_SYSTEM": "Issue tracking system, e.g. Jira", - "MANUAL": "Manually provided by a user", - "OTHER": "Other sources", - "SERVICE": "Other ownership-like service, e.g. Nuage, ACL service etc", - "SOURCE_CONTROL": "SCM system, e.g. GIT, SVN" - } - }, - "doc": "The type of the source" - }, - { - "name": "url", - "type": [ - "null", - "string" - ], - "doc": "A reference URL for the source", - "default": null - } - ] - } - ], - "doc": "Source information for the ownership", - "default": null - } - ] - } - }, - "doc": "List of owners of the entity." - }, - { - "name": "lastModified", - "type": "AuditStamp", - "doc": "Audit stamp containing who last modified the record and when. A value of 0 in the time field indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - } - ], - "Aspect": { - "name": "ownership" - } - }, - { - "type": "record", - "name": "Status", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "The status metadata of an entity, e.g. dataset, metric, feature, etc.\nThis aspect is used to represent soft deletes conventionally.", - "fields": [ - { - "name": "removed", - "type": "boolean", - "doc": "whether the entity is removed or not", - "default": false, - "Searchable": { - "fieldType": "BOOLEAN" - } - } - ], - "Aspect": { - "name": "status" - } - }, - { - "type": "record", - "name": "GlobalTags", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Tag aspect used for applying tags to an entity", - "fields": [ - { - "name": "tags", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "TagAssociation", - "doc": "Properties of an applied tag. For now, just an Urn. In the future we can extend this with other properties, e.g.\npropagation parameters.", - "fields": [ - { - "name": "tag", - "type": "string", - "doc": "Urn of the applied tag", - "Searchable": { - "fieldName": "tags", - "fieldType": "URN_PARTIAL", - "hasValuesFieldName": "hasTags" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.TagUrn" - } - } - ] - } - }, - "doc": "Tags associated with a given entity" - } - ], - "Aspect": { - "name": "globalTags" - } - }, - { - "type": "record", - "name": "BrowsePaths", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Shared aspect containing Browse Paths to be indexed for an entity.", - "fields": [ - { - "name": "paths", - "type": { - "type": "array", - "items": "string" - }, - "doc": "A list of valid browse paths for the entity.\n\nBrowse paths are expected to be backslash-separated strings. For example: 'prod/snowflake/datasetName'", - "Searchable": { - "/*": { - "fieldName": "browsePaths", - "fieldType": "BROWSE_PATH" - } - } - } - ], - "Aspect": { - "name": "browsePaths" - } - }, - { - "type": "record", - "name": "GlossaryTerms", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Related business terms information", - "fields": [ - { - "name": "terms", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "GlossaryTermAssociation", - "doc": "Properties of an applied glossary term.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "Urn of the applied glossary term", - "Searchable": { - "fieldName": "glossaryTerms", - "fieldType": "URN_PARTIAL" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn" - } - } - ] - } - }, - "doc": "The related business terms" - }, - { - "name": "auditStamp", - "type": "AuditStamp", - "doc": "Audit stamp containing who reported the related business term" - } - ], - "Aspect": { - "name": "glossaryTerms" - } - }, - { - "type": "record", - "name": "InstitutionalMemory", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Institutional memory of an entity. This is a way to link to relevant documentation and provide description of the documentation. Institutional or tribal knowledge is very important for users to leverage the entity.", - "fields": [ - { - "name": "elements", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "InstitutionalMemoryMetadata", - "doc": "Metadata corresponding to a record of institutional memory.", - "fields": [ - { - "name": "url", - "type": "string", - "doc": "Link to an engineering design document or a wiki page.", - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "description", - "type": "string", - "doc": "Description of the link." - }, - { - "name": "createStamp", - "type": "AuditStamp", - "doc": "Audit stamp associated with creation of this record" - } - ] - } - }, - "doc": "List of records that represent institutional memory of an entity. Each record consists of a link, description, creator and timestamps associated with that record." - } - ], - "Aspect": { - "name": "institutionalMemory" - } - } - ] - }, - "doc": "The list of metadata aspects associated with the chart. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "chartKey", - "name": "chart" - } - }, - { - "type": "record", - "name": "CorpGroupSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific CorpGroup entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpGroupUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "CorpGroupKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a CorpGroup", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "The URL-encoded name of the AD/LDAP group. Serves as a globally unique identifier within DataHub.", - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "corpGroupKey" - } - }, - { - "type": "record", - "name": "CorpGroupInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "doc": "group of corpUser, it may contains nested group", - "fields": [ - { - "name": "displayName", - "type": [ - "null", - "string" - ], - "doc": "The name to use when displaying the group.", - "default": null, - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "email", - "type": [ - "null", - "string" - ], - "doc": "email of this group", - "default": null - }, - { - "name": "admins", - "type": { - "type": "array", - "items": "string" - }, - "doc": "owners of this group", - "Relationship": { - "/*": { - "entityTypes": [ - "corpUser" - ], - "name": "OwnedBy" - } - } - }, - { - "name": "members", - "type": { - "type": "array", - "items": "string" - }, - "doc": "List of ldap urn in this group.", - "Relationship": { - "/*": { - "entityTypes": [ - "corpUser" - ], - "name": "IsPartOf" - } - } - }, - { - "name": "groups", - "type": { - "type": "array", - "items": "string" - }, - "doc": "List of groups in this group.", - "Relationship": { - "/*": { - "entityTypes": [ - "corpGroup" - ], - "name": "IsPartOf" - } - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "A description of the group.", - "default": null, - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpGroupUrn" - ], - "name": "corpGroupInfo" - } - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "doc": "The list of metadata aspects associated with the LdapUser. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "corpGroupKey", - "name": "corpGroup" - } - }, - { - "type": "record", - "name": "CorpUserSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific CorpUser entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "CorpUserKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a CorpUser", - "fields": [ - { - "name": "username", - "type": "string", - "doc": "The name of the AD/LDAP user.", - "Searchable": { - "boostScore": 2.0, - "enableAutocomplete": true, - "fieldName": "ldap", - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "corpUserKey" - } - }, - { - "type": "record", - "name": "CorpUserInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "doc": "Linkedin corp user information", - "fields": [ - { - "name": "active", - "type": "boolean", - "doc": "Whether the corpUser is active, ref: https://iwww.corp.linkedin.com/wiki/cf/display/GTSD/Accessing+Active+Directory+via+LDAP+tools", - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 2.0 - } - } - }, - { - "name": "displayName", - "type": [ - "null", - "string" - ], - "doc": "displayName of this user , e.g. Hang Zhang(DataHQ)", - "default": null - }, - { - "name": "email", - "type": [ - "null", - "string" - ], - "doc": "email address of this user", - "default": null, - "Searchable": { - "fieldType": "KEYWORD", - "queryByDefault": true - } - }, - { - "name": "title", - "type": [ - "null", - "string" - ], - "doc": "title of this user", - "default": null, - "Searchable": { - "fieldType": "KEYWORD", - "queryByDefault": true - } - }, - { - "name": "managerUrn", - "type": [ - "null", - "string" - ], - "doc": "direct manager of this user", - "default": null, - "Relationship": { - "entityTypes": [ - "corpUser" - ], - "name": "ReportsTo" - }, - "Searchable": { - "fieldName": "managerLdap", - "fieldType": "URN", - "queryByDefault": true - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - } - }, - { - "name": "departmentId", - "type": [ - "null", - "long" - ], - "doc": "department id this user belong to", - "default": null - }, - { - "name": "departmentName", - "type": [ - "null", - "string" - ], - "doc": "department name this user belong to", - "default": null - }, - { - "name": "firstName", - "type": [ - "null", - "string" - ], - "doc": "first name of this user", - "default": null - }, - { - "name": "lastName", - "type": [ - "null", - "string" - ], - "doc": "last name of this user", - "default": null - }, - { - "name": "fullName", - "type": [ - "null", - "string" - ], - "doc": "Common name of this user, format is firstName + lastName (split by a whitespace)", - "default": null, - "Searchable": { - "boostScore": 10.0, - "fieldType": "TEXT_PARTIAL", - "queryByDefault": true - } - }, - { - "name": "countryCode", - "type": [ - "null", - "string" - ], - "doc": "two uppercase letters country code. e.g. US", - "default": null - } - ], - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpuserUrn" - ], - "name": "corpUserInfo" - } - }, - { - "type": "record", - "name": "CorpUserEditableInfo", - "namespace": "com.linkedin.pegasus2avro.identity", - "doc": "Linkedin corp user information that can be edited from UI", - "fields": [ - { - "name": "aboutMe", - "type": [ - "null", - "string" - ], - "doc": "About me section of the user", - "default": null - }, - { - "name": "teams", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Teams that the user belongs to e.g. Metadata", - "default": [], - "Searchable": { - "/*": { - "fieldType": "TEXT" - } - } - }, - { - "name": "skills", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Skills that the user possesses e.g. Machine Learning", - "default": [], - "Searchable": { - "/*": { - "fieldType": "TEXT" - } - } - }, - { - "name": "pictureLink", - "type": "string", - "doc": "A URL which points to a picture which user wants to set as a profile photo", - "default": "https://raw.githubusercontent.com/linkedin/datahub/master/datahub-web-react/src/images/default_avatar.png", - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - } - ], - "Aspect": { - "EntityUrns": [ - "com.linkedin.pegasus2avro.common.CorpuserUrn" - ], - "name": "corpUserEditableInfo" - } - }, - { - "type": "record", - "name": "GroupMembership", - "namespace": "com.linkedin.pegasus2avro.identity", - "doc": "Carries information about the CorpGroups a user is in.", - "fields": [ - { - "name": "groups", - "type": { - "type": "array", - "items": "string" - }, - "Relationship": { - "/*": { - "entityTypes": [ - "corpGroup" - ], - "name": "IsMemberOfGroup" - } - } - } - ], - "Aspect": { - "name": "groupMembership" - } - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "doc": "The list of metadata aspects associated with the CorpUser. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "corpUserKey", - "name": "corpuser" - } - }, - { - "type": "record", - "name": "DashboardSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific Dashboard entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DashboardUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DashboardKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Dashboard", - "fields": [ - { - "name": "dashboardTool", - "type": "string", - "doc": "The name of the dashboard tool such as looker, redash etc.", - "Searchable": { - "addToFilters": true, - "boostScore": 4.0, - "fieldName": "tool", - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "dashboardId", - "type": "string", - "doc": "Unique id for the dashboard. This id should be globally unique for a dashboarding tool even when there are multiple deployments of it. As an example, dashboard URL could be used here for Looker such as 'looker.linkedin.com/dashboards/1234'" - } - ], - "Aspect": { - "name": "dashboardKey" - } - }, - { - "type": "record", - "name": "DashboardInfo", - "namespace": "com.linkedin.pegasus2avro.dashboard", - "doc": "Information about a dashboard", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "title", - "type": "string", - "doc": "Title of the dashboard", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "description", - "type": "string", - "doc": "Detailed description about the dashboard", - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "charts", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Charts in a dashboard", - "default": [], - "Relationship": { - "/*": { - "entityTypes": [ - "chart" - ], - "name": "Contains" - } - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.ChangeAuditStamps", - "doc": "Captures information about who created/last modified/deleted this dashboard and when" - }, - { - "name": "dashboardUrl", - "type": [ - "null", - "string" - ], - "doc": "URL for the dashboard. This could be used as an external link on DataHub to allow users access/view the dashboard", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "access", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AccessLevel" - ], - "doc": "Access level for the dashboard", - "default": null, - "Searchable": { - "addToFilters": true, - "fieldType": "KEYWORD" - } - }, - { - "name": "lastRefreshed", - "type": [ - "null", - "long" - ], - "doc": "The time when this dashboard last refreshed", - "default": null - } - ], - "Aspect": { - "name": "dashboardInfo" - } - }, - { - "type": "record", - "name": "EditableDashboardProperties", - "namespace": "com.linkedin.pegasus2avro.dashboard", - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Edited documentation of the dashboard", - "default": null, - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - } - } - ], - "Aspect": { - "name": "editableDashboardProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "doc": "The list of metadata aspects associated with the dashboard. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "dashboardKey", - "name": "dashboard" - } - }, - { - "type": "record", - "name": "DataFlowSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific DataFlow entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataFlowUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DataFlowKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Data Flow", - "fields": [ - { - "name": "orchestrator", - "type": "string", - "doc": "Workflow manager like azkaban, airflow which orchestrates the flow", - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "flowId", - "type": "string", - "doc": "Unique Identifier of the data flow", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "cluster", - "type": "string", - "doc": "Cluster where the flow is executed", - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "dataFlowKey" - } - }, - { - "type": "record", - "name": "DataFlowInfo", - "namespace": "com.linkedin.pegasus2avro.datajob", - "doc": "Information about a Data processing flow", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "name", - "type": "string", - "doc": "Flow name", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Flow description", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "project", - "type": [ - "null", - "string" - ], - "doc": "Optional project/namespace associated with the flow", - "default": null, - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "dataFlowInfo" - } - }, - { - "type": "record", - "name": "EditableDataFlowProperties", - "namespace": "com.linkedin.pegasus2avro.datajob", - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Edited documentation of the data flow", - "default": null, - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - } - } - ], - "Aspect": { - "name": "editableDataFlowProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "doc": "The list of metadata aspects associated with the data flow. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "dataFlowKey", - "name": "dataFlow" - } - }, - { - "type": "record", - "name": "DataJobSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific DataJob entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataJobUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DataJobKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Data Job", - "fields": [ - { - "name": "flow", - "type": "string", - "doc": "Standardized data processing flow urn representing the flow for the job", - "Relationship": { - "entityTypes": [ - "dataFlow" - ], - "name": "IsPartOf" - }, - "Searchable": { - "fieldName": "dataFlow", - "fieldType": "URN_PARTIAL", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "jobId", - "type": "string", - "doc": "Unique Identifier of the data job", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "dataJobKey" - } - }, - { - "type": "record", - "name": "DataJobInfo", - "namespace": "com.linkedin.pegasus2avro.datajob", - "doc": "Information about a Data processing job", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "name", - "type": "string", - "doc": "Job name", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Job description", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "type", - "type": [ - { - "type": "enum", - "name": "AzkabanJobType", - "namespace": "com.linkedin.pegasus2avro.datajob.azkaban", - "doc": "The various types of support azkaban jobs", - "symbols": [ - "COMMAND", - "HADOOP_JAVA", - "HADOOP_SHELL", - "HIVE", - "PIG", - "SQL", - "GLUE" - ], - "symbolDocs": { - "COMMAND": "The command job type is one of the basic built-in types. It runs multiple UNIX commands using java processbuilder.\nUpon execution, Azkaban spawns off a process to run the command.", - "GLUE": "Glue type is for running AWS Glue job transforms.", - "HADOOP_JAVA": "Runs a java program with ability to access Hadoop cluster.\nhttps://azkaban.readthedocs.io/en/latest/jobTypes.html#java-job-type", - "HADOOP_SHELL": "In large part, this is the same Command type. The difference is its ability to talk to a Hadoop cluster\nsecurely, via Hadoop tokens.", - "HIVE": "Hive type is for running Hive jobs.", - "PIG": "Pig type is for running Pig jobs.", - "SQL": "SQL is for running Presto, mysql queries etc" - } - }, - "string" - ], - "doc": "Datajob type\n**NOTE**: AzkabanJobType is deprecated. Please use strings instead." - }, - { - "name": "flowUrn", - "type": [ - "null", - "string" - ], - "doc": "DataFlow urn that this job is part of", - "default": null, - "Relationship": { - "entityTypes": [ - "dataFlow" - ], - "name": "IsPartOf" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataFlowUrn" - } - }, - { - "name": "status", - "type": [ - "null", - { - "type": "enum", - "name": "JobStatus", - "doc": "Job statuses", - "symbols": [ - "STARTING", - "IN_PROGRESS", - "STOPPING", - "STOPPED", - "COMPLETED", - "FAILED", - "UNKNOWN" - ], - "symbolDocs": { - "COMPLETED": "Jobs with successful completion.", - "FAILED": "Jobs that have failed.", - "IN_PROGRESS": "Jobs currently running.", - "STARTING": "Jobs being initialized.", - "STOPPED": "Jobs that have stopped.", - "STOPPING": "Jobs being stopped.", - "UNKNOWN": "Jobs with unknown status (either unmappable or unavailable)" - } - } - ], - "doc": "Status of the job", - "default": null - } - ], - "Aspect": { - "name": "dataJobInfo" - } - }, - { - "type": "record", - "name": "DataJobInputOutput", - "namespace": "com.linkedin.pegasus2avro.datajob", - "doc": "Information about the inputs and outputs of a Data processing job", - "fields": [ - { - "name": "inputDatasets", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Input datasets consumed by the data job during processing", - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "inputs", - "fieldType": "URN", - "numValuesFieldName": "numInputDatasets", - "queryByDefault": false - } - } - }, - { - "name": "outputDatasets", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Output datasets produced by the data job during processing", - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Produces" - } - }, - "Searchable": { - "/*": { - "fieldName": "outputs", - "fieldType": "URN", - "numValuesFieldName": "numOutputDatasets", - "queryByDefault": false - } - } - }, - { - "name": "inputDatajobs", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Input datajobs that this data job depends on", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "DownstreamOf" - } - } - } - ], - "Aspect": { - "name": "dataJobInputOutput" - } - }, - { - "type": "record", - "name": "EditableDataJobProperties", - "namespace": "com.linkedin.pegasus2avro.datajob", - "doc": "Stores editable changes made to properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Edited documentation of the data job ", - "default": null, - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - } - } - ], - "Aspect": { - "name": "editableDataJobProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.BrowsePaths", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.InstitutionalMemory" - ] - }, - "doc": "The list of metadata aspects associated with the data job. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "dataJobKey", - "name": "dataJob" - } - }, - { - "type": "record", - "name": "DatasetSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific dataset entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DatasetKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Dataset", - "fields": [ - { - "name": "platform", - "type": "string", - "doc": "Data platform urn associated with the dataset", - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "name", - "type": "string", - "doc": "Dataset native name e.g. .
, /dir/subdir/, or ", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "origin", - "type": { - "type": "enum", - "name": "FabricType", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Fabric group type", - "symbols": [ - "DEV", - "EI", - "PROD", - "CORP" - ], - "symbolDocs": { - "CORP": "Designates corporation fabrics", - "DEV": "Designates development fabrics", - "EI": "Designates early-integration (staging) fabrics", - "PROD": "Designates production fabrics" - } - }, - "doc": "Fabric type where dataset belongs to or where it was generated.", - "Searchable": { - "addToFilters": true, - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "datasetKey" - } - }, - { - "type": "record", - "name": "DatasetProperties", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Properties associated with a Dataset", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the dataset", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "uri", - "type": [ - "null", - "string" - ], - "doc": "The abstracted URI such as hdfs:///data/tracking/PageViewEvent, file:///dir/file_name. Uri should not include any environment specific properties. Some datasets might not have a standardized uri, which makes this field optional (i.e. kafka topic).", - "default": null, - "java": { - "class": "java.net.URI" - } - }, - { - "name": "tags", - "type": { - "type": "array", - "items": "string" - }, - "doc": "[Legacy] Unstructured tags for the dataset. Structured tags can be applied via the `GlobalTags` aspect.", - "default": [] - } - ], - "Aspect": { - "name": "datasetProperties" - } - }, - { - "type": "record", - "name": "EditableDatasetProperties", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "EditableDatasetProperties stores editable changes made to dataset properties. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the dataset", - "default": null, - "Searchable": { - "fieldName": "editedDescription", - "fieldType": "TEXT" - } - } - ], - "Aspect": { - "name": "editableDatasetProperties" - } - }, - { - "type": "record", - "name": "DatasetDeprecation", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Dataset deprecation status", - "fields": [ - { - "name": "deprecated", - "type": "boolean", - "doc": "Whether the dataset is deprecated by owner.", - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 0.5 - } - } - }, - { - "name": "decommissionTime", - "type": [ - "null", - "long" - ], - "doc": "The time user plan to decommission this dataset.", - "default": null - }, - { - "name": "note", - "type": "string", - "doc": "Additional information about the dataset deprecation plan, such as the wiki, doc, RB." - }, - { - "name": "actor", - "type": [ - "null", - "string" - ], - "doc": "The corpuser URN which will be credited for modifying this deprecation content.", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - } - ], - "Aspect": { - "name": "datasetDeprecation" - } - }, - { - "type": "record", - "name": "DatasetUpstreamLineage", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Fine Grained upstream lineage for fields in a dataset", - "fields": [ - { - "name": "fieldMappings", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "DatasetFieldMapping", - "doc": "Representation of mapping between fields in source dataset to the field in destination dataset", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "Audit stamp containing who reported the field mapping and when" - }, - { - "name": "transformation", - "type": [ - { - "type": "enum", - "name": "TransformationType", - "namespace": "com.linkedin.pegasus2avro.common.fieldtransformer", - "doc": "Type of the transformation involved in generating destination fields from source fields.", - "symbols": [ - "BLACKBOX", - "IDENTITY" - ], - "symbolDocs": { - "BLACKBOX": "Field transformation expressed as unknown black box function.", - "IDENTITY": "Field transformation expressed as Identity function." - } - }, - { - "type": "record", - "name": "UDFTransformer", - "namespace": "com.linkedin.pegasus2avro.common.fieldtransformer", - "doc": "Field transformation expressed in UDF", - "fields": [ - { - "name": "udf", - "type": "string", - "doc": "A UDF mentioning how the source fields got transformed to destination field. This is the FQCN(Fully Qualified Class Name) of the udf." - } - ] - } - ], - "doc": "Transfomration function between the fields involved" - }, - { - "name": "sourceFields", - "type": { - "type": "array", - "items": [ - "string" - ] - }, - "doc": "Source fields from which the fine grained lineage is derived" - }, - { - "name": "destinationField", - "type": "string", - "doc": "Destination field which is derived from source fields", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetFieldUrn" - } - } - ] - } - }, - "doc": "Upstream to downstream field level lineage mappings" - } - ], - "Aspect": { - "name": "datasetUpstreamLineage" - } - }, - { - "type": "record", - "name": "UpstreamLineage", - "namespace": "com.linkedin.pegasus2avro.dataset", - "doc": "Upstream lineage of a dataset", - "fields": [ - { - "name": "upstreams", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "Upstream", - "doc": "Upstream lineage information about a dataset including the source reporting the lineage", - "fields": [ - { - "name": "auditStamp", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "Audit stamp containing who reported the lineage and when.\nWARNING: this field is deprecated and may be removed in a future release.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - }, - "deprecated": "we no longer associate a timestamp per upstream edge" - }, - { - "name": "dataset", - "type": "string", - "doc": "The upstream dataset the lineage points to", - "Relationship": { - "entityTypes": [ - "dataset" - ], - "name": "DownstreamOf" - }, - "Searchable": { - "fieldName": "upstreams", - "fieldType": "URN", - "queryByDefault": false - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - } - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "DatasetLineageType", - "doc": "The various types of supported dataset lineage", - "symbols": [ - "COPY", - "TRANSFORMED", - "VIEW" - ], - "symbolDocs": { - "COPY": "Direct copy without modification", - "TRANSFORMED": "Transformed data with modification (format or content change)", - "VIEW": "Represents a view defined on the sources e.g. Hive view defined on underlying hive tables or a Hive table pointing to a HDFS dataset or DALI view defined on multiple sources" - } - }, - "doc": "The type of the lineage" - } - ] - } - }, - "doc": "List of upstream dataset lineage information" - } - ], - "Aspect": { - "name": "upstreamLineage" - } - }, - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - { - "type": "record", - "name": "SchemaMetadata", - "namespace": "com.linkedin.pegasus2avro.schema", - "doc": "SchemaMetadata to describe metadata related to store schema", - "fields": [ - { - "name": "schemaName", - "type": "string", - "doc": "Schema name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking", - "validate": { - "strlen": { - "max": 500, - "min": 1 - } - } - }, - { - "name": "platform", - "type": "string", - "doc": "Standardized platform urn where schema is defined. The data platform Urn (urn:li:platform:{platform_name})", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataPlatformUrn" - } - }, - { - "name": "version", - "type": "long", - "doc": "Every change to SchemaMetadata in the resource results in a new version. Version is server assigned. This version is differ from platform native schema version." - }, - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "dataset", - "type": [ - "null", - "string" - ], - "doc": "Dataset this schema metadata is associated with.", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - } - }, - { - "name": "cluster", - "type": [ - "null", - "string" - ], - "doc": "The cluster this schema metadata resides from", - "default": null - }, - { - "name": "hash", - "type": "string", - "doc": "the SHA1 hash of the schema content" - }, - { - "name": "platformSchema", - "type": [ - { - "type": "record", - "name": "EspressoSchema", - "doc": "Schema text of an espresso table schema.", - "fields": [ - { - "name": "documentSchema", - "type": "string", - "doc": "The native espresso document schema." - }, - { - "name": "tableSchema", - "type": "string", - "doc": "The espresso table schema definition." - } - ] - }, - { - "type": "record", - "name": "OracleDDL", - "doc": "Schema holder for oracle data definition language that describes an oracle table.", - "fields": [ - { - "name": "tableSchema", - "type": "string", - "doc": "The native schema in the dataset's platform. This is a human readable (json blob) table schema." - } - ] - }, - { - "type": "record", - "name": "MySqlDDL", - "doc": "Schema holder for MySql data definition language that describes an MySql table.", - "fields": [ - { - "name": "tableSchema", - "type": "string", - "doc": "The native schema in the dataset's platform. This is a human readable (json blob) table schema." - } - ] - }, - { - "type": "record", - "name": "PrestoDDL", - "doc": "Schema holder for presto data definition language that describes a presto view.", - "fields": [ - { - "name": "rawSchema", - "type": "string", - "doc": "The raw schema in the dataset's platform. This includes the DDL and the columns extracted from DDL." - } - ] - }, - { - "type": "record", - "name": "KafkaSchema", - "doc": "Schema holder for kafka schema.", - "fields": [ - { - "name": "documentSchema", - "type": "string", - "doc": "The native kafka document schema. This is a human readable avro document schema." - }, - { - "name": "keySchema", - "type": [ - "null", - "string" - ], - "doc": "The native kafka key schema as retrieved from Schema Registry", - "default": null - } - ] - }, - { - "type": "record", - "name": "BinaryJsonSchema", - "doc": "Schema text of binary JSON schema.", - "fields": [ - { - "name": "schema", - "type": "string", - "doc": "The native schema text for binary JSON file format." - } - ] - }, - { - "type": "record", - "name": "OrcSchema", - "doc": "Schema text of an ORC schema.", - "fields": [ - { - "name": "schema", - "type": "string", - "doc": "The native schema for ORC file format." - } - ] - }, - { - "type": "record", - "name": "Schemaless", - "doc": "The dataset has no specific schema associated with it", - "fields": [] - }, - { - "type": "record", - "name": "KeyValueSchema", - "doc": "Schema text of a key-value store schema.", - "fields": [ - { - "name": "keySchema", - "type": "string", - "doc": "The raw schema for the key in the key-value store." - }, - { - "name": "valueSchema", - "type": "string", - "doc": "The raw schema for the value in the key-value store." - } - ] - }, - { - "type": "record", - "name": "OtherSchema", - "doc": "Schema holder for undefined schema types.", - "fields": [ - { - "name": "rawSchema", - "type": "string", - "doc": "The native schema in the dataset's platform." - } - ] - } - ], - "doc": "The native schema in the dataset's platform." - }, - { - "name": "fields", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "SchemaField", - "doc": "SchemaField to describe metadata related to dataset schema. Schema normalization rules: http://go/tms-schema", - "fields": [ - { - "name": "fieldPath", - "type": "string", - "doc": "Flattened name of the field. Field is computed from jsonPath field. For data translation rules refer to wiki page above.", - "Searchable": { - "fieldName": "fieldPaths", - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "jsonPath", - "type": [ - "null", - "string" - ], - "doc": "Flattened name of a field in JSON Path notation.", - "default": null - }, - { - "name": "nullable", - "type": "boolean", - "doc": "Indicates if this field is optional or nullable", - "default": false - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Description", - "default": null, - "Searchable": { - "boostScore": 0.1, - "fieldName": "fieldDescriptions", - "fieldType": "TEXT" - } - }, - { - "name": "type", - "type": { - "type": "record", - "name": "SchemaFieldDataType", - "doc": "Schema field data types", - "fields": [ - { - "name": "type", - "type": [ - { - "type": "record", - "name": "BooleanType", - "doc": "Boolean field type.", - "fields": [] - }, - { - "type": "record", - "name": "FixedType", - "doc": "Fixed field type.", - "fields": [] - }, - { - "type": "record", - "name": "StringType", - "doc": "String field type.", - "fields": [] - }, - { - "type": "record", - "name": "BytesType", - "doc": "Bytes field type.", - "fields": [] - }, - { - "type": "record", - "name": "NumberType", - "doc": "Number data type: long, integer, short, etc..", - "fields": [] - }, - { - "type": "record", - "name": "DateType", - "doc": "Date field type.", - "fields": [] - }, - { - "type": "record", - "name": "TimeType", - "doc": "Time field type. This should also be used for datetimes.", - "fields": [] - }, - { - "type": "record", - "name": "EnumType", - "doc": "Enum field type.", - "fields": [] - }, - { - "type": "record", - "name": "NullType", - "doc": "Null field type.", - "fields": [] - }, - { - "type": "record", - "name": "MapType", - "doc": "Map field type.", - "fields": [ - { - "name": "keyType", - "type": [ - "null", - "string" - ], - "doc": "Key type in a map", - "default": null - }, - { - "name": "valueType", - "type": [ - "null", - "string" - ], - "doc": "Type of the value in a map", - "default": null - } - ] - }, - { - "type": "record", - "name": "ArrayType", - "doc": "Array field type.", - "fields": [ - { - "name": "nestedType", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of types this array holds.", - "default": null - } - ] - }, - { - "type": "record", - "name": "UnionType", - "doc": "Union field type.", - "fields": [ - { - "name": "nestedTypes", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of types in union type.", - "default": null - } - ] - }, - { - "type": "record", - "name": "RecordType", - "doc": "Record field type.", - "fields": [] - } - ], - "doc": "Data platform specific types" - } - ] - }, - "doc": "Platform independent field type of the field." - }, - { - "name": "nativeDataType", - "type": "string", - "doc": "The native type of the field in the dataset's platform as declared by platform schema." - }, - { - "name": "recursive", - "type": "boolean", - "doc": "There are use cases when a field in type B references type A. A field in A references field of type B. In such cases, we will mark the first field as recursive.", - "default": false - }, - { - "name": "globalTags", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlobalTags" - ], - "doc": "Tags associated with the field", - "default": null, - "Searchable": { - "/tags/*/tag": { - "boostScore": 0.5, - "fieldName": "fieldTags", - "fieldType": "URN_PARTIAL" - } - } - }, - { - "name": "glossaryTerms", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlossaryTerms" - ], - "doc": "Glossary terms associated with the field", - "default": null, - "Searchable": { - "/terms/*/urn": { - "boostScore": 0.5, - "fieldName": "fieldGlossaryTerms", - "fieldType": "URN_PARTIAL" - } - } - }, - { - "name": "isPartOfKey", - "type": "boolean", - "doc": "For schema fields that are part of complex keys, set this field to true\nWe do this to easily distinguish between value and key fields", - "default": false - } - ] - } - }, - "doc": "Client provided a list of fields from document schema." - }, - { - "name": "primaryKeys", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Client provided list of fields that define primary keys to access record. Field order defines hierarchical espresso keys. Empty lists indicates absence of primary key access patter. Value is a SchemaField@fieldPath.", - "default": null - }, - { - "name": "foreignKeysSpecs", - "type": [ - "null", - { - "type": "map", - "values": { - "type": "record", - "name": "ForeignKeySpec", - "doc": "Description of a foreign key in a schema.", - "fields": [ - { - "name": "foreignKey", - "type": [ - { - "type": "record", - "name": "DatasetFieldForeignKey", - "doc": "For non-urn based foregin keys.", - "fields": [ - { - "name": "parentDataset", - "type": "string", - "doc": "dataset that stores the resource.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - } - }, - { - "name": "currentFieldPaths", - "type": { - "type": "array", - "items": "string" - }, - "doc": "List of fields in hosting(current) SchemaMetadata that conform a foreign key. List can contain a single entry or multiple entries if several entries in hosting schema conform a foreign key in a single parent dataset." - }, - { - "name": "parentField", - "type": "string", - "doc": "SchemaField@fieldPath that uniquely identify field in parent dataset that this field references." - } - ] - }, - { - "type": "record", - "name": "UrnForeignKey", - "doc": "If SchemaMetadata fields make any external references and references are of type com.linkedin.pegasus2avro.common.Urn or any children, this models can be used to mark it.", - "fields": [ - { - "name": "currentFieldPath", - "type": "string", - "doc": "Field in hosting(current) SchemaMetadata." - } - ] - } - ], - "doc": "Foreign key definition in metadata schema." - } - ] - } - } - ], - "doc": "Map captures all the references schema makes to external datasets. Map key is ForeignKeySpecName typeref.", - "default": null, - "deprecated": "Use foreignKeys instead." - }, - { - "name": "foreignKeys", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "ForeignKeyConstraint", - "doc": "Description of a foreign key constraint in a schema.", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Name of the constraint, likely provided from the source" - }, - { - "name": "foreignFields", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Fields the constraint maps to on the foreign dataset", - "Relationship": { - "/*": { - "entityTypes": [ - "schemaField" - ], - "name": "ForeignKeyTo" - } - } - }, - { - "name": "sourceFields", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Fields the constraint maps to on the source dataset" - }, - { - "name": "foreignDataset", - "type": "string", - "doc": "Reference to the foreign dataset for ease of lookup", - "Relationship": { - "entityTypes": [ - "dataset" - ], - "name": "ForeignKeyToDataset" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - } - ] - } - } - ], - "doc": "List of foreign key constraints for the schema", - "default": null - } - ], - "Aspect": { - "name": "schemaMetadata" - } - }, - { - "type": "record", - "name": "EditableSchemaMetadata", - "namespace": "com.linkedin.pegasus2avro.schema", - "doc": "EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines.", - "fields": [ - { - "name": "created", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the creation of this resource/association/sub-resource. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "lastModified", - "type": "com.linkedin.pegasus2avro.common.AuditStamp", - "doc": "An AuditStamp corresponding to the last modification of this resource/association/sub-resource. If no modification has happened since creation, lastModified should be the same as created. A value of 0 for time indicates missing data.", - "default": { - "actor": "urn:li:corpuser:unknown", - "impersonator": null, - "time": 0 - } - }, - { - "name": "deleted", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.AuditStamp" - ], - "doc": "An AuditStamp corresponding to the deletion of this resource/association/sub-resource. Logically, deleted MUST have a later timestamp than creation. It may or may not have the same time as lastModified depending upon the resource/association/sub-resource semantics.", - "default": null - }, - { - "name": "editableSchemaFieldInfo", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "EditableSchemaFieldInfo", - "doc": "SchemaField to describe metadata related to dataset schema.", - "fields": [ - { - "name": "fieldPath", - "type": "string", - "doc": "FieldPath uniquely identifying the SchemaField this metadata is associated with" - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Description", - "default": null, - "Searchable": { - "boostScore": 0.1, - "fieldName": "editedFieldDescriptions", - "fieldType": "TEXT" - } - }, - { - "name": "globalTags", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlobalTags" - ], - "doc": "Tags associated with the field", - "default": null, - "Searchable": { - "/tags/*/tag": { - "boostScore": 0.5, - "fieldName": "editedFieldTags", - "fieldType": "URN_PARTIAL" - } - } - }, - { - "name": "glossaryTerms", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.GlossaryTerms" - ], - "doc": "Glossary terms associated with the field", - "default": null, - "Searchable": { - "/terms/*/urn": { - "boostScore": 0.5, - "fieldName": "editedFieldGlossaryTerms", - "fieldType": "URN_PARTIAL" - } - } - } - ] - } - }, - "doc": "Client provided a list of fields from document schema." - } - ], - "Aspect": { - "name": "editableSchemaMetadata" - } - }, - "com.linkedin.pegasus2avro.common.GlobalTags", - "com.linkedin.pegasus2avro.common.GlossaryTerms", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "datasetKey", - "name": "dataset" - } - }, - { - "type": "record", - "name": "DataProcessSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific Data process entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataProcessUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DataProcessKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Data Process", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Process name i.e. an ETL job name", - "Searchable": { - "boostScore": 4.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "orchestrator", - "type": "string", - "doc": "Standardized Orchestrator where data process is defined.\nTODO: Migrate towards something that can be validated like DataPlatform urn", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "origin", - "type": "com.linkedin.pegasus2avro.common.FabricType", - "doc": "Fabric type where dataset belongs to or where it was generated.", - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "dataProcessKey" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "name": "DataProcessInfo", - "namespace": "com.linkedin.pegasus2avro.dataprocess", - "doc": "The inputs and outputs of this data process", - "fields": [ - { - "name": "inputs", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "the inputs of the data process", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "inputs", - "fieldType": "URN", - "numValuesFieldName": "numInputDatasets", - "queryByDefault": false - } - } - }, - { - "name": "outputs", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "the outputs of the data process", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "Consumes" - } - }, - "Searchable": { - "/*": { - "fieldName": "outputs", - "fieldType": "URN", - "numValuesFieldName": "numOutputDatasets", - "queryByDefault": false - } - } - } - ], - "Aspect": { - "name": "dataProcessInfo" - } - }, - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "doc": "The list of metadata aspects associated with the data process. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "dataProcessKey", - "name": "dataProcess" - }, - "deprecated": "Use DataJob instead." - }, - { - "type": "record", - "name": "DataPlatformSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific dataplatform entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DataPlatformUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DataPlatformKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Data Platform", - "fields": [ - { - "name": "platformName", - "type": "string", - "doc": "Data platform name i.e. hdfs, oracle, espresso" - } - ], - "Aspect": { - "name": "dataPlatformKey" - } - }, - { - "type": "record", - "name": "DataPlatformInfo", - "namespace": "com.linkedin.pegasus2avro.dataplatform", - "doc": "Information about a data platform", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Name of the data platform", - "validate": { - "strlen": { - "max": 15 - } - } - }, - { - "name": "displayName", - "type": [ - "null", - "string" - ], - "doc": "The name that will be used for displaying a platform type.", - "default": null - }, - { - "name": "type", - "type": { - "type": "enum", - "name": "PlatformType", - "doc": "Platform types available at LinkedIn", - "symbols": [ - "FILE_SYSTEM", - "KEY_VALUE_STORE", - "MESSAGE_BROKER", - "OBJECT_STORE", - "OLAP_DATASTORE", - "OTHERS", - "QUERY_ENGINE", - "RELATIONAL_DB", - "SEARCH_ENGINE" - ], - "symbolDocs": { - "FILE_SYSTEM": "Value for a file system, e.g. hdfs", - "KEY_VALUE_STORE": "Value for a key value store, e.g. espresso, voldemort", - "MESSAGE_BROKER": "Value for a message broker, e.g. kafka", - "OBJECT_STORE": "Value for an object store, e.g. ambry", - "OLAP_DATASTORE": "Value for an OLAP datastore, e.g. pinot", - "OTHERS": "Value for other platforms, e.g salesforce, dovetail", - "QUERY_ENGINE": "Value for a query engine, e.g. presto", - "RELATIONAL_DB": "Value for a relational database, e.g. oracle, mysql", - "SEARCH_ENGINE": "Value for a search engine, e.g seas" - } - }, - "doc": "Platform type this data platform describes" - }, - { - "name": "datasetNameDelimiter", - "type": "string", - "doc": "The delimiter in the dataset names on the data platform, e.g. '/' for HDFS and '.' for Oracle" - }, - { - "name": "logoUrl", - "type": [ - "null", - "string" - ], - "doc": "The URL for a logo associated with the platform", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - } - ], - "Aspect": { - "name": "dataPlatformInfo" - } - } - ] - }, - "doc": "The list of metadata aspects associated with the data platform. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "dataPlatformKey", - "name": "dataPlatform" - } - }, - { - "type": "record", - "name": "MLModelSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "MLModel Snapshot entity details.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.MLModelUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLModelKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an ML model", - "fields": [ - { - "name": "platform", - "type": "string", - "doc": "Standardized platform urn for the model", - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the MLModel", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "origin", - "type": "com.linkedin.pegasus2avro.common.FabricType", - "doc": "Fabric type where model belongs to or where it was generated", - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "mlModelKey" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "name": "MLModelProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with a ML Model", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLModel", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "date", - "type": [ - "null", - "long" - ], - "doc": "Date when the MLModel was developed", - "default": null - }, - { - "name": "version", - "type": [ - "null", - { - "type": "record", - "name": "VersionTag", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "A resource-defined string representing the resource state for the purpose of concurrency control", - "fields": [ - { - "name": "versionTag", - "type": [ - "null", - "string" - ], - "default": null - } - ] - } - ], - "doc": "Version of the MLModel", - "default": null - }, - { - "name": "type", - "type": [ - "null", - "string" - ], - "doc": "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc", - "default": null, - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "hyperParameters", - "type": [ - "null", - { - "type": "map", - "values": [ - "string", - "int", - "float", - "double", - "boolean" - ] - } - ], - "doc": "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams", - "default": null - }, - { - "name": "hyperParams", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "MLHyperParam", - "doc": "Properties associated with an ML Hyper Param", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Name of the MLHyperParam" - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLHyperParam", - "default": null - }, - { - "name": "value", - "type": [ - "null", - "string" - ], - "doc": "The value of the MLHyperParam", - "default": null - }, - { - "name": "createdAt", - "type": [ - "null", - "long" - ], - "doc": "Date when the MLHyperParam was developed", - "default": null - } - ], - "Aspect": { - "name": "mlHyperParam" - } - } - } - ], - "doc": "Hyperparameters of the MLModel", - "default": null - }, - { - "name": "trainingMetrics", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "MLMetric", - "doc": "Properties associated with an ML Metric", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Name of the mlMetric" - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the mlMetric", - "default": null - }, - { - "name": "value", - "type": [ - "null", - "string" - ], - "doc": "The value of the mlMetric", - "default": null - }, - { - "name": "createdAt", - "type": [ - "null", - "long" - ], - "doc": "Date when the mlMetric was developed", - "default": null - } - ], - "Aspect": { - "name": "mlMetric" - } - } - } - ], - "doc": "Metrics of the MLModel used in training", - "default": null - }, - { - "name": "onlineMetrics", - "type": [ - "null", - { - "type": "array", - "items": "MLMetric" - } - ], - "doc": "Metrics of the MLModel used in production", - "default": null - }, - { - "name": "mlFeatures", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of features used for MLModel training", - "default": null - }, - { - "name": "tags", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Tags for the MLModel", - "default": [] - }, - { - "name": "deployments", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Deployments for the MLModel", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "mlModelDeployment" - ], - "name": "DeployedTo" - } - } - }, - { - "name": "trainingJobs", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of jobs (if any) used to train the model", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "TrainedBy" - } - } - }, - { - "name": "downstreamJobs", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of jobs (if any) that use the model", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataJob" - ], - "name": "UsedBy" - } - } - }, - { - "name": "groups", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Groups the model belongs to", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "mlModelGroup" - ], - "name": "MemberOf" - } - } - } - ], - "Aspect": { - "name": "mlModelProperties" - } - }, - { - "type": "record", - "name": "IntendedUse", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Intended Use for the ML Model", - "fields": [ - { - "name": "primaryUses", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Primary Use cases for the MLModel.", - "default": null - }, - { - "name": "primaryUsers", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "enum", - "name": "IntendedUserType", - "symbols": [ - "ENTERPRISE", - "HOBBY", - "ENTERTAINMENT" - ] - } - } - ], - "doc": "Primary Intended Users - For example, was the MLModel developed for entertainment purposes, for hobbyists, or enterprise solutions?", - "default": null - }, - { - "name": "outOfScopeUses", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Highlight technology that the MLModel might easily be confused with, or related contexts that users could try to apply the MLModel to.", - "default": null - } - ], - "Aspect": { - "name": "intendedUse" - } - }, - { - "type": "record", - "name": "MLModelFactorPrompts", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Prompts which affect the performance of the MLModel", - "fields": [ - { - "name": "relevantFactors", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "MLModelFactors", - "doc": "Factors affecting the performance of the MLModel.", - "fields": [ - { - "name": "groups", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Groups refers to distinct categories with similar characteristics that are present in the evaluation data instances.\nFor human-centric machine learning MLModels, groups are people who share one or multiple characteristics.", - "default": null - }, - { - "name": "instrumentation", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "The performance of a MLModel can vary depending on what instruments were used to capture the input to the MLModel.\nFor example, a face detection model may perform differently depending on the camera\u2019s hardware and software,\nincluding lens, image stabilization, high dynamic range techniques, and background blurring for portrait mode.", - "default": null - }, - { - "name": "environment", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "A further factor affecting MLModel performance is the environment in which it is deployed.", - "default": null - } - ] - } - } - ], - "doc": "What are foreseeable salient factors for which MLModel performance may vary, and how were these determined?", - "default": null - }, - { - "name": "evaluationFactors", - "type": [ - "null", - { - "type": "array", - "items": "MLModelFactors" - } - ], - "doc": "Which factors are being reported, and why were these chosen?", - "default": null - } - ], - "Aspect": { - "name": "mlModelFactorPrompts" - } - }, - { - "type": "record", - "name": "Metrics", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Metrics to be featured for the MLModel.", - "fields": [ - { - "name": "performanceMeasures", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Measures of MLModel performance", - "default": null - }, - { - "name": "decisionThreshold", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Decision Thresholds used (if any)?", - "default": null - } - ], - "Aspect": { - "name": "mlModelMetrics" - } - }, - { - "type": "record", - "name": "EvaluationData", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "All referenced datasets would ideally point to any set of documents that provide visibility into the source and composition of the dataset.", - "fields": [ - { - "name": "evaluationData", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "BaseData", - "doc": "BaseData record", - "fields": [ - { - "name": "dataset", - "type": "string", - "doc": "What dataset were used in the MLModel?", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.DatasetUrn" - } - }, - { - "name": "motivation", - "type": [ - "null", - "string" - ], - "doc": "Why was this dataset chosen?", - "default": null - }, - { - "name": "preProcessing", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "How was the data preprocessed (e.g., tokenization of sentences, cropping of images, any filtering such as dropping images without faces)?", - "default": null - } - ] - } - }, - "doc": "Details on the dataset(s) used for the quantitative analyses in the MLModel" - } - ], - "Aspect": { - "name": "mlModelEvaluationData" - } - }, - { - "type": "record", - "name": "TrainingData", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Ideally, the MLModel card would contain as much information about the training data as the evaluation data. However, there might be cases where it is not feasible to provide this level of detailed information about the training data. For example, the data may be proprietary, or require a non-disclosure agreement. In these cases, we advocate for basic details about the distributions over groups in the data, as well as any other details that could inform stakeholders on the kinds of biases the model may have encoded.", - "fields": [ - { - "name": "trainingData", - "type": { - "type": "array", - "items": "BaseData" - }, - "doc": "Details on the dataset(s) used for training the MLModel" - } - ], - "Aspect": { - "name": "mlModelTrainingData" - } - }, - { - "type": "record", - "name": "QuantitativeAnalyses", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Quantitative analyses should be disaggregated, that is, broken down by the chosen factors. Quantitative analyses should provide the results of evaluating the MLModel according to the chosen metrics, providing confidence interval values when possible.", - "fields": [ - { - "name": "unitaryResults", - "type": [ - "null", - "string" - ], - "doc": "Link to a dashboard with results showing how the MLModel performed with respect to each factor", - "default": null - }, - { - "name": "intersectionalResults", - "type": [ - "null", - "string" - ], - "doc": "Link to a dashboard with results showing how the MLModel performed with respect to the intersection of evaluated factors?", - "default": null - } - ], - "Aspect": { - "name": "mlModelQuantitativeAnalyses" - } - }, - { - "type": "record", - "name": "EthicalConsiderations", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "This section is intended to demonstrate the ethical considerations that went into MLModel development, surfacing ethical challenges and solutions to stakeholders.", - "fields": [ - { - "name": "data", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Does the MLModel use any sensitive data (e.g., protected classes)?", - "default": null - }, - { - "name": "humanLife", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": " Is the MLModel intended to inform decisions about matters central to human life or flourishing \u2013 e.g., health or safety? Or could it be used in such a way?", - "default": null - }, - { - "name": "mitigations", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "What risk mitigation strategies were used during MLModel development?", - "default": null - }, - { - "name": "risksAndHarms", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "What risks may be present in MLModel usage? Try to identify the potential recipients, likelihood, and magnitude of harms. If these cannot be determined, note that they were considered but remain unknown.", - "default": null - }, - { - "name": "useCases", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Are there any known MLModel use cases that are especially fraught? This may connect directly to the intended use section", - "default": null - } - ], - "Aspect": { - "name": "mlModelEthicalConsiderations" - } - }, - { - "type": "record", - "name": "CaveatsAndRecommendations", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?", - "fields": [ - { - "name": "caveats", - "type": [ - "null", - { - "type": "record", - "name": "CaveatDetails", - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset? Are there additional recommendations for model use?", - "fields": [ - { - "name": "needsFurtherTesting", - "type": [ - "null", - "boolean" - ], - "doc": "Did the results suggest any further testing?", - "default": null - }, - { - "name": "caveatDescription", - "type": [ - "null", - "string" - ], - "doc": "Caveat Description\nFor ex: Given gender classes are binary (male/not male), which we include as male/female. Further work needed to evaluate across a spectrum of genders.", - "default": null - }, - { - "name": "groupsNotRepresented", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Relevant groups that were not represented in the evaluation dataset?", - "default": null - } - ] - } - ], - "doc": "This section should list additional concerns that were not covered in the previous sections. For example, did the results suggest any further testing? Were there any relevant groups that were not represented in the evaluation dataset?", - "default": null - }, - { - "name": "recommendations", - "type": [ - "null", - "string" - ], - "doc": "Recommendations on where this MLModel should be used.", - "default": null - }, - { - "name": "idealDatasetCharacteristics", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Ideal characteristics of an evaluation dataset for this MLModel", - "default": null - } - ], - "Aspect": { - "name": "mlModelCaveatsAndRecommendations" - } - }, - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - { - "type": "record", - "name": "SourceCode", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Source Code", - "fields": [ - { - "name": "sourceCode", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "SourceCodeUrl", - "doc": "Source Code Url Entity", - "fields": [ - { - "name": "type", - "type": { - "type": "enum", - "name": "SourceCodeUrlType", - "symbols": [ - "ML_MODEL_SOURCE_CODE", - "TRAINING_PIPELINE_SOURCE_CODE", - "EVALUATION_PIPELINE_SOURCE_CODE" - ] - }, - "doc": "Source Code Url Types" - }, - { - "name": "sourceCodeUrl", - "type": "string", - "doc": "Source Code Url", - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - } - ] - } - }, - "doc": "Source Code along with types" - } - ], - "Aspect": { - "name": "sourceCode" - } - }, - "com.linkedin.pegasus2avro.common.Status", - { - "type": "record", - "name": "Cost", - "namespace": "com.linkedin.pegasus2avro.common", - "fields": [ - { - "name": "costType", - "type": { - "type": "enum", - "name": "CostType", - "doc": "Type of Cost Code", - "symbols": [ - "ORG_COST_TYPE" - ], - "symbolDocs": { - "ORG_COST_TYPE": "Org Cost Type to which the Cost of this entity should be attributed to" - } - } - }, - { - "name": "cost", - "type": { - "type": "record", - "name": "CostCost", - "fields": [ - { - "name": "costId", - "type": [ - "null", - "double" - ], - "default": null - }, - { - "name": "costCode", - "type": [ - "null", - "string" - ], - "default": null - }, - { - "name": "fieldDiscriminator", - "type": { - "type": "enum", - "name": "CostCostDiscriminator", - "symbols": [ - "costId", - "costCode" - ] - }, - "doc": "Contains the name of the field that has its value set." - } - ] - } - } - ], - "Aspect": { - "name": "cost" - } - }, - { - "type": "record", - "name": "Deprecation", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Deprecation status of an entity", - "fields": [ - { - "name": "deprecated", - "type": "boolean", - "doc": "Whether the entity is deprecated.", - "Searchable": { - "fieldType": "BOOLEAN", - "weightsPerFieldValue": { - "true": 0.5 - } - } - }, - { - "name": "decommissionTime", - "type": [ - "null", - "long" - ], - "doc": "The time user plan to decommission this entity.", - "default": null - }, - { - "name": "note", - "type": "string", - "doc": "Additional information about the entity deprecation plan, such as the wiki, doc, RB." - }, - { - "name": "actor", - "type": "string", - "doc": "The corpuser URN which will be credited for modifying this deprecation content.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.CorpuserUrn" - } - } - ], - "Aspect": { - "name": "deprecation" - } - }, - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "doc": "The list of metadata aspects associated with the MLModel. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlModelKey", - "name": "mlModel" - } - }, - { - "type": "record", - "name": "MLPrimaryKeySnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLPrimaryKeyKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an MLPrimaryKey", - "fields": [ - { - "name": "featureNamespace", - "type": "string", - "doc": "Namespace for the primary key", - "Searchable": { - "addToFilters": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the primary key", - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "mlPrimaryKeyKey" - } - }, - { - "type": "record", - "name": "MLPrimaryKeyProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with a MLPrimaryKey", - "fields": [ - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLPrimaryKey", - "default": null - }, - { - "name": "dataType", - "type": [ - "null", - { - "type": "enum", - "name": "MLFeatureDataType", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "MLFeature Data Type", - "symbols": [ - "USELESS", - "NOMINAL", - "ORDINAL", - "BINARY", - "COUNT", - "TIME", - "INTERVAL", - "IMAGE", - "VIDEO", - "AUDIO", - "TEXT", - "MAP", - "SEQUENCE", - "SET", - "CONTINUOUS", - "BYTE", - "UNKNOWN" - ], - "symbolDocs": { - "AUDIO": "Audio Data", - "BINARY": "Binary data is discrete data that can be in only one of two categories \u2014 either yes or no, 1 or 0, off or on, etc", - "BYTE": "Bytes data are binary-encoded values that can represent complex objects.", - "CONTINUOUS": "Continuous data are made of uncountable values, often the result of a measurement such as height, weight, age etc.", - "COUNT": "Count data is discrete whole number data \u2014 no negative numbers here.\nCount data often has many small values, such as zero and one.", - "IMAGE": "Image Data", - "INTERVAL": "Interval data has equal spaces between the numbers and does not represent a temporal pattern.\nExamples include percentages, temperatures, and income.", - "MAP": "Mapping Data Type ex: dict, map", - "NOMINAL": "Nominal data is made of discrete values with no numerical relationship between the different categories \u2014 mean and median are meaningless.\nAnimal species is one example. For example, pig is not higher than bird and lower than fish.", - "ORDINAL": "Ordinal data are discrete integers that can be ranked or sorted.\nFor example, the distance between first and second may not be the same as the distance between second and third.", - "SEQUENCE": "Sequence Data Type ex: list, tuple, range", - "SET": "Set Data Type ex: set, frozenset", - "TEXT": "Text Data", - "TIME": "Time data is a cyclical, repeating continuous form of data.\nThe relevant time features can be any period\u2014 daily, weekly, monthly, annual, etc.", - "UNKNOWN": "Unknown data are data that we don't know the type for.", - "USELESS": "Useless data is unique, discrete data with no potential relationship with the outcome variable.\nA useless feature has high cardinality. An example would be bank account numbers that were generated randomly.", - "VIDEO": "Video Data" - } - } - ], - "doc": "Data Type of the MLPrimaryKey", - "default": null - }, - { - "name": "version", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "doc": "Version of the MLPrimaryKey", - "default": null - }, - { - "name": "sources", - "type": { - "type": "array", - "items": "string" - }, - "doc": "Source of the MLPrimaryKey", - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "DerivedFrom" - } - } - } - ], - "Aspect": { - "name": "mlPrimaryKeyProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation" - ] - }, - "doc": "The list of metadata aspects associated with the MLPrimaryKey. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlPrimaryKeyKey", - "name": "mlPrimaryKey" - } - }, - { - "type": "record", - "name": "MLFeatureSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.MLFeatureUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLFeatureKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an MLFeature", - "fields": [ - { - "name": "featureNamespace", - "type": "string", - "doc": "Namespace for the feature", - "Searchable": { - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the feature", - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "mlFeatureKey" - } - }, - { - "type": "record", - "name": "MLFeatureProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with a MLFeature", - "fields": [ - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLFeature", - "default": null - }, - { - "name": "dataType", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.MLFeatureDataType" - ], - "doc": "Data Type of the MLFeature", - "default": null - }, - { - "name": "version", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "doc": "Version of the MLFeature", - "default": null - }, - { - "name": "sources", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "Source of the MLFeature", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "dataset" - ], - "name": "DerivedFrom" - } - } - } - ], - "Aspect": { - "name": "mlFeatureProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "doc": "The list of metadata aspects associated with the MLFeature. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlFeatureKey", - "name": "mlFeature" - } - }, - { - "type": "record", - "name": "MLFeatureTableSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLFeatureTableKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an MLFeatureTable", - "fields": [ - { - "name": "platform", - "type": "string", - "doc": "Data platform urn associated with the feature table", - "Relationship": { - "entityTypes": [ - "dataPlatform" - ], - "name": "SourcePlatform" - }, - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the feature table", - "Searchable": { - "boostScore": 8.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "mlFeatureTableKey" - } - }, - { - "type": "record", - "name": "MLFeatureTableProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with a MLFeatureTable", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLFeatureTable", - "default": null - }, - { - "name": "mlFeatures", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of features contained in the feature table", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "mlFeature" - ], - "name": "Contains" - } - } - }, - { - "name": "mlPrimaryKeys", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "List of primary keys in the feature table (if multiple, assumed to act as a composite key)", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "mlPrimaryKey" - ], - "name": "KeyedBy" - } - } - } - ], - "Aspect": { - "name": "mlFeatureTableProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.InstitutionalMemory", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "doc": "The list of metadata aspects associated with the MLFeatureTable. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlFeatureTableKey", - "name": "mlFeatureTable" - } - }, - { - "type": "record", - "name": "MLModelDeploymentSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLModelDeploymentKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an ML model deployment", - "fields": [ - { - "name": "platform", - "type": "string", - "doc": "Standardized platform urn for the model Deployment", - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the MLModelDeployment", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "origin", - "type": "com.linkedin.pegasus2avro.common.FabricType", - "doc": "Fabric type where model Deployment belongs to or where it was generated", - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "mlModelDeploymentKey" - } - }, - { - "type": "record", - "name": "MLModelDeploymentProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with an ML Model Deployment", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "externalUrl", - "type": [ - "null", - "string" - ], - "doc": "URL where the reference exist", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLModelDeployment", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "createdAt", - "type": [ - "null", - "long" - ], - "doc": "Date when the MLModelDeployment was developed", - "default": null - }, - { - "name": "version", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "doc": "Version of the MLModelDeployment", - "default": null - }, - { - "name": "status", - "type": [ - "null", - { - "type": "enum", - "name": "DeploymentStatus", - "doc": "Model endpoint statuses", - "symbols": [ - "OUT_OF_SERVICE", - "CREATING", - "UPDATING", - "ROLLING_BACK", - "IN_SERVICE", - "DELETING", - "FAILED", - "UNKNOWN" - ], - "symbolDocs": { - "CREATING": "Deployments being created.", - "DELETING": "Deployments being deleted.", - "FAILED": "Deployments with an error state.", - "IN_SERVICE": "Deployments that are active.", - "OUT_OF_SERVICE": "Deployments out of service.", - "ROLLING_BACK": "Deployments being reverted to a previous version.", - "UNKNOWN": "Deployments with unknown/unmappable state.", - "UPDATING": "Deployments being updated." - } - } - ], - "doc": "Status of the deployment", - "default": null - } - ], - "Aspect": { - "name": "mlModelDeploymentProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation" - ] - }, - "doc": "The list of metadata aspects associated with the MLModelDeployment. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlModelDeploymentKey", - "name": "mlModelDeployment" - } - }, - { - "type": "record", - "name": "MLModelGroupSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "MLModelGroupKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for an ML model group", - "fields": [ - { - "name": "platform", - "type": "string", - "doc": "Standardized platform urn for the model group", - "Searchable": { - "addToFilters": true, - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "name", - "type": "string", - "doc": "Name of the MLModelGroup", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - }, - { - "name": "origin", - "type": "com.linkedin.pegasus2avro.common.FabricType", - "doc": "Fabric type where model group belongs to or where it was generated", - "Searchable": { - "fieldType": "TEXT_PARTIAL", - "queryByDefault": false - } - } - ], - "Aspect": { - "name": "mlModelGroupKey" - } - }, - { - "type": "record", - "name": "MLModelGroupProperties", - "namespace": "com.linkedin.pegasus2avro.ml.metadata", - "doc": "Properties associated with an ML Model Group", - "fields": [ - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "Custom property bag.", - "default": {}, - "Searchable": { - "/*": { - "queryByDefault": true - } - } - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the MLModelGroup", - "default": null, - "Searchable": { - "fieldType": "TEXT", - "hasValuesFieldName": "hasDescription" - } - }, - { - "name": "createdAt", - "type": [ - "null", - "long" - ], - "doc": "Date when the MLModelGroup was developed", - "default": null - }, - { - "name": "version", - "type": [ - "null", - "com.linkedin.pegasus2avro.common.VersionTag" - ], - "doc": "Version of the MLModelGroup", - "default": null - } - ], - "Aspect": { - "name": "mlModelGroupProperties" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.Deprecation", - "com.linkedin.pegasus2avro.common.BrowsePaths" - ] - }, - "doc": "The list of metadata aspects associated with the MLModelGroup. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "mlModelGroupKey", - "name": "mlModelGroup" - } - }, - { - "type": "record", - "name": "TagSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific dataset entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.TagUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "TagKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a Tag", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "The unique tag name", - "Searchable": { - "boostScore": 10.0, - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "tagKey" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - { - "type": "record", - "name": "TagProperties", - "namespace": "com.linkedin.pegasus2avro.tag", - "doc": "Properties associated with a Tag", - "fields": [ - { - "name": "name", - "type": "string", - "doc": "Name of the tag" - }, - { - "name": "description", - "type": [ - "null", - "string" - ], - "doc": "Documentation of the tag", - "default": null - } - ], - "Aspect": { - "name": "tagProperties" - } - }, - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "tagKey", - "name": "tag" - } - }, - { - "type": "record", - "name": "GlossaryTermSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific GlossaryTerm entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "GlossaryTermKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a GlossaryTerm", - "fields": [ - { - "name": "name", - "type": "string", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "glossaryTermKey" - } - }, - { - "type": "record", - "name": "GlossaryTermInfo", - "namespace": "com.linkedin.pegasus2avro.glossary", - "doc": "Properties associated with a GlossaryTerm", - "fields": [ - { - "name": "definition", - "type": "string", - "doc": "Definition of business term", - "Searchable": {} - }, - { - "name": "parentNode", - "type": [ - "null", - "string" - ], - "doc": "Parent node of the glossary term", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - } - }, - { - "name": "termSource", - "type": "string", - "doc": "Source of the Business Term (INTERNAL or EXTERNAL) with default value as INTERNAL", - "Searchable": { - "fieldType": "KEYWORD" - } - }, - { - "name": "sourceRef", - "type": [ - "null", - "string" - ], - "doc": "External Reference to the business-term", - "default": null, - "Searchable": { - "fieldType": "KEYWORD" - } - }, - { - "name": "sourceUrl", - "type": [ - "null", - "string" - ], - "doc": "The abstracted URL such as https://spec.edmcouncil.org/fibo/ontology/FBC/FinancialInstruments/FinancialInstruments/CashInstrument.", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.url.Url", - "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer" - } - }, - { - "name": "customProperties", - "type": { - "type": "map", - "values": "string" - }, - "doc": "A key-value map to capture any other non-standardized properties for the glossary term", - "default": {} - }, - { - "name": "rawSchema", - "type": [ - "null", - "string" - ], - "doc": "Schema definition of the glossary term", - "default": null - } - ], - "Aspect": { - "name": "glossaryTermInfo" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status", - "com.linkedin.pegasus2avro.common.BrowsePaths", - { - "type": "record", - "name": "GlossaryRelatedTerms", - "namespace": "com.linkedin.pegasus2avro.glossary", - "doc": "Has A / Is A lineage information about a glossary Term reporting the lineage", - "fields": [ - { - "name": "isRelatedTerms", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "The relationship Is A with glossary term", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "glossaryTerm" - ], - "name": "IsA" - } - }, - "Searchable": { - "/*": { - "boostScore": 2.0, - "fieldName": "isRelatedTerms", - "fieldType": "URN" - } - } - }, - { - "name": "hasRelatedTerms", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "The relationship Has A with glossary term", - "default": null, - "Relationship": { - "/*": { - "entityTypes": [ - "glossaryTerm" - ], - "name": "HasA" - } - }, - "Searchable": { - "/*": { - "boostScore": 2.0, - "fieldName": "hasRelatedTerms", - "fieldType": "URN" - } - } - } - ], - "Aspect": { - "name": "glossaryRelatedTerms" - } - } - ] - }, - "doc": "The list of metadata aspects associated with the GlossaryTerm. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "glossaryTermKey", - "name": "glossaryTerm" - } - }, - { - "type": "record", - "name": "GlossaryNodeSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific GlossaryNode entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "GlossaryNodeKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a GlossaryNode", - "fields": [ - { - "name": "name", - "type": "string", - "Searchable": { - "enableAutocomplete": true, - "fieldType": "TEXT_PARTIAL" - } - } - ], - "Aspect": { - "name": "glossaryNodeKey" - } - }, - { - "type": "record", - "name": "GlossaryNodeInfo", - "namespace": "com.linkedin.pegasus2avro.glossary", - "doc": "Properties associated with a GlossaryNode", - "fields": [ - { - "name": "definition", - "type": "string", - "doc": "Definition of business node", - "Searchable": {} - }, - { - "name": "parentNode", - "type": [ - "null", - "string" - ], - "doc": "Parent node of the glossary term", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.GlossaryNodeUrn" - } - } - ], - "Aspect": { - "name": "glossaryNodeInfo" - } - }, - "com.linkedin.pegasus2avro.common.Ownership", - "com.linkedin.pegasus2avro.common.Status" - ] - }, - "doc": "The list of metadata aspects associated with the GlossaryNode. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "glossaryNodeKey", - "name": "glossaryNode" - } - }, - { - "type": "record", - "name": "DataHubPolicySnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for DataHub Access Policy data.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "DataHubPolicyKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a DataHub Policy", - "fields": [ - { - "name": "id", - "type": "string", - "doc": "A unique id for the DataHub access policy record. Generated on the server side at policy creation time." - } - ], - "Aspect": { - "name": "dataHubPolicyKey" - } - }, - { - "type": "record", - "name": "DataHubPolicyInfo", - "namespace": "com.linkedin.pegasus2avro.policy", - "doc": "Information about a DataHub (UI) access policy.", - "fields": [ - { - "name": "displayName", - "type": "string", - "doc": "Display name of the Policy" - }, - { - "name": "description", - "type": "string", - "doc": "Description of the Policy" - }, - { - "name": "type", - "type": "string", - "doc": "The type of policy" - }, - { - "name": "state", - "type": "string", - "doc": "The state of policy, ACTIVE or INACTIVE" - }, - { - "name": "resources", - "type": [ - "null", - { - "type": "record", - "name": "DataHubResourceFilter", - "doc": "Information used to filter DataHub resource.", - "fields": [ - { - "name": "type", - "type": [ - "null", - "string" - ], - "doc": "The type of resource that the policy applies to. This will most often be a data asset entity name, for\nexample 'dataset'. It is not strictly required because in the future we will want to support filtering a resource\nby domain, as well.", - "default": null - }, - { - "name": "resources", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "A specific set of resources to apply the policy to, e.g. asset urns", - "default": null - }, - { - "name": "allResources", - "type": "boolean", - "doc": "Whether the policy should be applied to all assets matching the filter.", - "default": false - } - ] - } - ], - "doc": "The resource that the policy applies to. Not required for some 'Platform' privileges.", - "default": null - }, - { - "name": "privileges", - "type": { - "type": "array", - "items": "string" - }, - "doc": "The privileges that the policy grants." - }, - { - "name": "actors", - "type": { - "type": "record", - "name": "DataHubActorFilter", - "doc": "Information used to filter DataHub actors.", - "fields": [ - { - "name": "users", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "A specific set of users to apply the policy to (disjunctive)", - "default": null - }, - { - "name": "groups", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": "A specific set of groups to apply the policy to (disjunctive)", - "default": null - }, - { - "name": "resourceOwners", - "type": "boolean", - "doc": "Whether the filter should return true for owners of a particular resource.\nOnly applies to policies of type 'Metadata', which have a resource associated with them.", - "default": false - }, - { - "name": "allUsers", - "type": "boolean", - "doc": "Whether the filter should apply to all users.", - "default": false - }, - { - "name": "allGroups", - "type": "boolean", - "doc": "Whether the filter should apply to all groups.", - "default": false - } - ] - }, - "doc": "The actors that the policy applies to." - }, - { - "name": "editable", - "type": "boolean", - "doc": "Whether the policy should be editable via the UI", - "default": true - } - ], - "Aspect": { - "name": "dataHubPolicyInfo" - } - } - ] - }, - "doc": "The list of metadata aspects associated with the DataHub access policy." - } - ], - "Entity": { - "keyAspect": "dataHubPolicyKey", - "name": "dataHubPolicy" - } - }, - { - "type": "record", - "name": "SchemaFieldSnapshot", - "namespace": "com.linkedin.pegasus2avro.metadata.snapshot", - "doc": "A metadata snapshot for a specific schema field entity.", - "fields": [ - { - "name": "urn", - "type": "string", - "doc": "URN for the entity the metadata snapshot is associated with.", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "aspects", - "type": { - "type": "array", - "items": [ - { - "type": "record", - "name": "SchemaFieldKey", - "namespace": "com.linkedin.pegasus2avro.metadata.key", - "doc": "Key for a SchemaField", - "fields": [ - { - "name": "parent", - "type": "string", - "doc": "Parent associated with the schema field", - "Searchable": { - "fieldType": "URN" - }, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "fieldPath", - "type": "string", - "doc": "fieldPath identifying the schema field", - "Searchable": { - "fieldType": "KEYWORD" - } - } - ], - "Aspect": { - "name": "schemaFieldKey" - } - } - ] - }, - "doc": "The list of metadata aspects associated with the dataset. Depending on the use case, this can either be all, or a selection, of supported aspects." - } - ], - "Entity": { - "keyAspect": "schemaFieldKey", - "name": "schemaField" - } - } - ], - "doc": "Snapshot of the proposed metadata change. Include only the aspects affected by the change in the snapshot." - }, - { - "name": "proposedDelta", - "type": [ - "null" - ], - "doc": "Delta of the proposed metadata partial update.", - "default": null - }, - { - "name": "systemMetadata", - "type": [ - "null", - { - "type": "record", - "name": "SystemMetadata", - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.", - "fields": [ - { - "name": "lastObserved", - "type": [ - "long", - "null" - ], - "doc": "The timestamp the metadata was observed at", - "default": 0 - }, - { - "name": "runId", - "type": [ - "string", - "null" - ], - "doc": "The run id that produced the metadata", - "default": "no-run-id-provided" - }, - { - "name": "properties", - "type": [ - "null", - { - "type": "map", - "values": "string" - } - ], - "doc": "Additional properties", - "default": null - } - ] - } - ], - "doc": "Metadata around how the snapshot was ingested", - "default": null - } - ] -} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeProposal.avsc b/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeProposal.avsc deleted file mode 100644 index f1bd5328cd..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/MetadataChangeProposal.avsc +++ /dev/null @@ -1,222 +0,0 @@ -{ - "type": "record", - "name": "MetadataChangeProposal", - "namespace": "com.linkedin.pegasus2avro.mxe", - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataChangeLog is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeProposal will be emitted instead.", - "fields": [ - { - "name": "auditHeader", - "type": [ - "null", - { - "type": "record", - "name": "KafkaAuditHeader", - "namespace": "com.linkedin.events", - "doc": "This header records information about the context of an event as it is emitted into kafka and is intended to be used by the kafka audit application. For more information see go/kafkaauditheader", - "fields": [ - { - "name": "time", - "type": "long", - "doc": "The time at which the event was emitted into kafka.", - "compliance": [ - { - "policy": "EVENT_TIME" - } - ] - }, - { - "name": "server", - "type": "string", - "doc": "The fully qualified name of the host from which the event is being emitted.", - "compliance": "NONE" - }, - { - "name": "instance", - "type": [ - "null", - "string" - ], - "doc": "The instance on the server from which the event is being emitted. e.g. i001", - "default": null, - "compliance": "NONE" - }, - { - "name": "appName", - "type": "string", - "doc": "The name of the application from which the event is being emitted. see go/appname", - "compliance": "NONE" - }, - { - "name": "messageId", - "type": { - "type": "fixed", - "name": "UUID", - "size": 16 - }, - "doc": "A unique identifier for the message", - "compliance": "NONE" - }, - { - "name": "auditVersion", - "type": [ - "null", - "int" - ], - "doc": "The version that is being used for auditing. In version 0, the audit trail buckets events into 10 minute audit windows based on the EventHeader timestamp. In version 1, the audit trail buckets events as follows: if the schema has an outer KafkaAuditHeader, use the outer audit header timestamp for bucketing; else if the EventHeader has an inner KafkaAuditHeader use that inner audit header's timestamp for bucketing", - "default": null, - "compliance": "NONE" - }, - { - "name": "fabricUrn", - "type": [ - "null", - "string" - ], - "doc": "The fabricUrn of the host from which the event is being emitted. Fabric Urn in the format of urn:li:fabric:{fabric_name}. See go/fabric.", - "default": null, - "compliance": "NONE" - }, - { - "name": "clusterConnectionString", - "type": [ - "null", - "string" - ], - "doc": "This is a String that the client uses to establish some kind of connection with the Kafka cluster. The exact format of it depends on specific versions of clients and brokers. This information could potentially identify the fabric and cluster with which the client is producing to or consuming from.", - "default": null, - "compliance": "NONE" - } - ] - } - ], - "doc": "Kafka audit header. See go/kafkaauditheader for more info.", - "default": null - }, - { - "name": "entityType", - "type": "string", - "doc": "Type of the entity being written to" - }, - { - "name": "entityUrn", - "type": [ - "null", - "string" - ], - "doc": "Urn of the entity being written\n", - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "entityKeyAspect", - "type": [ - "null", - { - "type": "record", - "name": "GenericAspect", - "doc": "Generic record structure for serializing an Aspect\n", - "fields": [ - { - "name": "value", - "type": "bytes" - }, - { - "name": "contentType", - "type": "string" - } - ] - } - ], - "doc": "Key aspect of the entity being written", - "default": null - }, - { - "name": "changeType", - "type": { - "type": "enum", - "name": "ChangeType", - "namespace": "com.linkedin.pegasus2avro.events.metadata", - "doc": "Descriptor for a change action", - "symbols": [ - "UPSERT", - "CREATE", - "UPDATE", - "DELETE", - "PATCH" - ], - "symbolDocs": { - "CREATE": "NOT SUPPORTED YET\ninsert if not exists. otherwise fail", - "DELETE": "NOT SUPPORTED YET\ndelete action", - "PATCH": "NOT SUPPORTED YET\npatch the changes instead of full replace", - "UPDATE": "NOT SUPPORTED YET\nupdate if exists. otherwise fail", - "UPSERT": "insert if not exists. otherwise update" - } - }, - "doc": "Type of change being proposed" - }, - { - "name": "aspectName", - "type": [ - "null", - "string" - ], - "doc": "Aspect of the entity being written to\nNot filling this out implies that the writer wants to affect the entire entity\nNote: This is only valid for CREATE and DELETE operations.\n", - "default": null - }, - { - "name": "aspect", - "type": [ - "null", - "GenericAspect" - ], - "default": null - }, - { - "name": "systemMetadata", - "type": [ - "null", - { - "type": "record", - "name": "SystemMetadata", - "doc": "Kafka event for proposing a metadata change for an entity. A corresponding MetadataAuditEvent is emitted when the change is accepted and committed, otherwise a FailedMetadataChangeEvent will be emitted instead.", - "fields": [ - { - "name": "lastObserved", - "type": [ - "long", - "null" - ], - "doc": "The timestamp the metadata was observed at", - "default": 0 - }, - { - "name": "runId", - "type": [ - "string", - "null" - ], - "doc": "The run id that produced the metadata", - "default": "no-run-id-provided" - }, - { - "name": "properties", - "type": [ - "null", - { - "type": "map", - "values": "string" - } - ], - "doc": "Additional properties", - "default": null - } - ] - } - ], - "doc": "A string->string map of custom properties that one might want to attach to an event\n", - "default": null - } - ] -} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schemas/UsageAggregation.avsc b/metadata-ingestion/src/datahub/metadata/schemas/UsageAggregation.avsc deleted file mode 100644 index 4dea9af6f8..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/UsageAggregation.avsc +++ /dev/null @@ -1,147 +0,0 @@ -{ - "type": "record", - "name": "UsageAggregation", - "namespace": "com.linkedin.pegasus2avro.usage", - "doc": "Usage data for a given resource, rolled up into a bucket.", - "fields": [ - { - "name": "bucket", - "type": "long", - "doc": " Bucket start time in milliseconds " - }, - { - "name": "duration", - "type": { - "type": "enum", - "name": "WindowDuration", - "namespace": "com.linkedin.pegasus2avro.common", - "doc": "Enum to define the length of a bucket when doing aggregations", - "symbols": [ - "YEAR", - "MONTH", - "WEEK", - "DAY", - "HOUR" - ] - }, - "doc": " Bucket duration " - }, - { - "name": "resource", - "type": "string", - "doc": " Resource associated with these usage stats ", - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "metrics", - "type": { - "type": "record", - "name": "UsageAggregationMetrics", - "doc": "Metrics for usage data for a given resource and bucket. Not all fields\nmake sense for all buckets, so every field is optional.", - "fields": [ - { - "name": "uniqueUserCount", - "type": [ - "null", - "int" - ], - "doc": " Unique user count ", - "default": null - }, - { - "name": "users", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "UserUsageCounts", - "doc": " Records a single user's usage counts for a given resource ", - "fields": [ - { - "name": "user", - "type": [ - "null", - "string" - ], - "default": null, - "java": { - "class": "com.linkedin.pegasus2avro.common.urn.Urn" - } - }, - { - "name": "count", - "type": "int" - }, - { - "name": "userEmail", - "type": [ - "null", - "string" - ], - "doc": " If user_email is set, we attempt to resolve the user's urn upon ingest ", - "default": null - } - ] - } - } - ], - "doc": " Users within this bucket, with frequency counts ", - "default": null - }, - { - "name": "totalSqlQueries", - "type": [ - "null", - "int" - ], - "doc": " Total SQL query count ", - "default": null - }, - { - "name": "topSqlQueries", - "type": [ - "null", - { - "type": "array", - "items": "string" - } - ], - "doc": " Frequent SQL queries; mostly makes sense for datasets in SQL databases ", - "default": null - }, - { - "name": "fields", - "type": [ - "null", - { - "type": "array", - "items": { - "type": "record", - "name": "FieldUsageCounts", - "doc": " Records field-level usage counts for a given resource ", - "fields": [ - { - "name": "fieldName", - "type": "string" - }, - { - "name": "count", - "type": "int" - } - ] - } - } - ], - "doc": " Field-level usage stats ", - "default": null - } - ] - }, - "doc": " Metrics associated with this bucket " - } - ] -} \ No newline at end of file diff --git a/metadata-ingestion/src/datahub/metadata/schemas/__init__.py b/metadata-ingestion/src/datahub/metadata/schemas/__init__.py deleted file mode 100644 index e8ac4465ea..0000000000 --- a/metadata-ingestion/src/datahub/metadata/schemas/__init__.py +++ /dev/null @@ -1,325 +0,0 @@ -# flake8: noqa - -# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py -# Do not modify manually! - -# fmt: off - -import functools -import pathlib - -def _load_schema(schema_name: str) -> str: - return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text() - -@functools.lru_cache(maxsize=None) -def getMetadataChangeEventSchema() -> str: - return _load_schema("MetadataChangeEvent") - -@functools.lru_cache(maxsize=None) -def getMetadataChangeProposalSchema() -> str: - return _load_schema("MetadataChangeProposal") - -@functools.lru_cache(maxsize=None) -def getUsageAggregationSchema() -> str: - return _load_schema("UsageAggregation") - -@functools.lru_cache(maxsize=None) -def getChartInfoSchema() -> str: - return _load_schema("ChartInfo") - -@functools.lru_cache(maxsize=None) -def getChartQuerySchema() -> str: - return _load_schema("ChartQuery") - -@functools.lru_cache(maxsize=None) -def getEditableChartPropertiesSchema() -> str: - return _load_schema("EditableChartProperties") - -@functools.lru_cache(maxsize=None) -def getBrowsePathsSchema() -> str: - return _load_schema("BrowsePaths") - -@functools.lru_cache(maxsize=None) -def getCostSchema() -> str: - return _load_schema("Cost") - -@functools.lru_cache(maxsize=None) -def getDeprecationSchema() -> str: - return _load_schema("Deprecation") - -@functools.lru_cache(maxsize=None) -def getGlobalTagsSchema() -> str: - return _load_schema("GlobalTags") - -@functools.lru_cache(maxsize=None) -def getGlossaryTermsSchema() -> str: - return _load_schema("GlossaryTerms") - -@functools.lru_cache(maxsize=None) -def getInstitutionalMemorySchema() -> str: - return _load_schema("InstitutionalMemory") - -@functools.lru_cache(maxsize=None) -def getOwnershipSchema() -> str: - return _load_schema("Ownership") - -@functools.lru_cache(maxsize=None) -def getStatusSchema() -> str: - return _load_schema("Status") - -@functools.lru_cache(maxsize=None) -def getDashboardInfoSchema() -> str: - return _load_schema("DashboardInfo") - -@functools.lru_cache(maxsize=None) -def getEditableDashboardPropertiesSchema() -> str: - return _load_schema("EditableDashboardProperties") - -@functools.lru_cache(maxsize=None) -def getDataFlowInfoSchema() -> str: - return _load_schema("DataFlowInfo") - -@functools.lru_cache(maxsize=None) -def getDataJobInfoSchema() -> str: - return _load_schema("DataJobInfo") - -@functools.lru_cache(maxsize=None) -def getDataJobInputOutputSchema() -> str: - return _load_schema("DataJobInputOutput") - -@functools.lru_cache(maxsize=None) -def getEditableDataFlowPropertiesSchema() -> str: - return _load_schema("EditableDataFlowProperties") - -@functools.lru_cache(maxsize=None) -def getEditableDataJobPropertiesSchema() -> str: - return _load_schema("EditableDataJobProperties") - -@functools.lru_cache(maxsize=None) -def getDataPlatformInfoSchema() -> str: - return _load_schema("DataPlatformInfo") - -@functools.lru_cache(maxsize=None) -def getDataProcessInfoSchema() -> str: - return _load_schema("DataProcessInfo") - -@functools.lru_cache(maxsize=None) -def getDatasetDeprecationSchema() -> str: - return _load_schema("DatasetDeprecation") - -@functools.lru_cache(maxsize=None) -def getDatasetProfileSchema() -> str: - return _load_schema("DatasetProfile") - -@functools.lru_cache(maxsize=None) -def getDatasetPropertiesSchema() -> str: - return _load_schema("DatasetProperties") - -@functools.lru_cache(maxsize=None) -def getDatasetUpstreamLineageSchema() -> str: - return _load_schema("DatasetUpstreamLineage") - -@functools.lru_cache(maxsize=None) -def getDatasetUsageStatisticsSchema() -> str: - return _load_schema("DatasetUsageStatistics") - -@functools.lru_cache(maxsize=None) -def getEditableDatasetPropertiesSchema() -> str: - return _load_schema("EditableDatasetProperties") - -@functools.lru_cache(maxsize=None) -def getUpstreamLineageSchema() -> str: - return _load_schema("UpstreamLineage") - -@functools.lru_cache(maxsize=None) -def getGlossaryNodeInfoSchema() -> str: - return _load_schema("GlossaryNodeInfo") - -@functools.lru_cache(maxsize=None) -def getGlossaryRelatedTermsSchema() -> str: - return _load_schema("GlossaryRelatedTerms") - -@functools.lru_cache(maxsize=None) -def getGlossaryTermInfoSchema() -> str: - return _load_schema("GlossaryTermInfo") - -@functools.lru_cache(maxsize=None) -def getCorpGroupInfoSchema() -> str: - return _load_schema("CorpGroupInfo") - -@functools.lru_cache(maxsize=None) -def getCorpUserEditableInfoSchema() -> str: - return _load_schema("CorpUserEditableInfo") - -@functools.lru_cache(maxsize=None) -def getCorpUserInfoSchema() -> str: - return _load_schema("CorpUserInfo") - -@functools.lru_cache(maxsize=None) -def getGroupMembershipSchema() -> str: - return _load_schema("GroupMembership") - -@functools.lru_cache(maxsize=None) -def getChartKeySchema() -> str: - return _load_schema("ChartKey") - -@functools.lru_cache(maxsize=None) -def getCorpGroupKeySchema() -> str: - return _load_schema("CorpGroupKey") - -@functools.lru_cache(maxsize=None) -def getCorpUserKeySchema() -> str: - return _load_schema("CorpUserKey") - -@functools.lru_cache(maxsize=None) -def getDashboardKeySchema() -> str: - return _load_schema("DashboardKey") - -@functools.lru_cache(maxsize=None) -def getDataFlowKeySchema() -> str: - return _load_schema("DataFlowKey") - -@functools.lru_cache(maxsize=None) -def getDataHubPolicyKeySchema() -> str: - return _load_schema("DataHubPolicyKey") - -@functools.lru_cache(maxsize=None) -def getDataJobKeySchema() -> str: - return _load_schema("DataJobKey") - -@functools.lru_cache(maxsize=None) -def getDataPlatformKeySchema() -> str: - return _load_schema("DataPlatformKey") - -@functools.lru_cache(maxsize=None) -def getDataProcessKeySchema() -> str: - return _load_schema("DataProcessKey") - -@functools.lru_cache(maxsize=None) -def getDatasetKeySchema() -> str: - return _load_schema("DatasetKey") - -@functools.lru_cache(maxsize=None) -def getGlossaryNodeKeySchema() -> str: - return _load_schema("GlossaryNodeKey") - -@functools.lru_cache(maxsize=None) -def getGlossaryTermKeySchema() -> str: - return _load_schema("GlossaryTermKey") - -@functools.lru_cache(maxsize=None) -def getMLFeatureKeySchema() -> str: - return _load_schema("MLFeatureKey") - -@functools.lru_cache(maxsize=None) -def getMLFeatureTableKeySchema() -> str: - return _load_schema("MLFeatureTableKey") - -@functools.lru_cache(maxsize=None) -def getMLModelDeploymentKeySchema() -> str: - return _load_schema("MLModelDeploymentKey") - -@functools.lru_cache(maxsize=None) -def getMLModelGroupKeySchema() -> str: - return _load_schema("MLModelGroupKey") - -@functools.lru_cache(maxsize=None) -def getMLModelKeySchema() -> str: - return _load_schema("MLModelKey") - -@functools.lru_cache(maxsize=None) -def getMLPrimaryKeyKeySchema() -> str: - return _load_schema("MLPrimaryKeyKey") - -@functools.lru_cache(maxsize=None) -def getSchemaFieldKeySchema() -> str: - return _load_schema("SchemaFieldKey") - -@functools.lru_cache(maxsize=None) -def getTagKeySchema() -> str: - return _load_schema("TagKey") - -@functools.lru_cache(maxsize=None) -def getCaveatsAndRecommendationsSchema() -> str: - return _load_schema("CaveatsAndRecommendations") - -@functools.lru_cache(maxsize=None) -def getEthicalConsiderationsSchema() -> str: - return _load_schema("EthicalConsiderations") - -@functools.lru_cache(maxsize=None) -def getEvaluationDataSchema() -> str: - return _load_schema("EvaluationData") - -@functools.lru_cache(maxsize=None) -def getIntendedUseSchema() -> str: - return _load_schema("IntendedUse") - -@functools.lru_cache(maxsize=None) -def getMLFeaturePropertiesSchema() -> str: - return _load_schema("MLFeatureProperties") - -@functools.lru_cache(maxsize=None) -def getMLFeatureTablePropertiesSchema() -> str: - return _load_schema("MLFeatureTableProperties") - -@functools.lru_cache(maxsize=None) -def getMLHyperParamSchema() -> str: - return _load_schema("MLHyperParam") - -@functools.lru_cache(maxsize=None) -def getMLMetricSchema() -> str: - return _load_schema("MLMetric") - -@functools.lru_cache(maxsize=None) -def getMLModelDeploymentPropertiesSchema() -> str: - return _load_schema("MLModelDeploymentProperties") - -@functools.lru_cache(maxsize=None) -def getMLModelFactorPromptsSchema() -> str: - return _load_schema("MLModelFactorPrompts") - -@functools.lru_cache(maxsize=None) -def getMLModelGroupPropertiesSchema() -> str: - return _load_schema("MLModelGroupProperties") - -@functools.lru_cache(maxsize=None) -def getMLModelPropertiesSchema() -> str: - return _load_schema("MLModelProperties") - -@functools.lru_cache(maxsize=None) -def getMLPrimaryKeyPropertiesSchema() -> str: - return _load_schema("MLPrimaryKeyProperties") - -@functools.lru_cache(maxsize=None) -def getMetricsSchema() -> str: - return _load_schema("Metrics") - -@functools.lru_cache(maxsize=None) -def getQuantitativeAnalysesSchema() -> str: - return _load_schema("QuantitativeAnalyses") - -@functools.lru_cache(maxsize=None) -def getSourceCodeSchema() -> str: - return _load_schema("SourceCode") - -@functools.lru_cache(maxsize=None) -def getTrainingDataSchema() -> str: - return _load_schema("TrainingData") - -@functools.lru_cache(maxsize=None) -def getDataHubPolicyInfoSchema() -> str: - return _load_schema("DataHubPolicyInfo") - -@functools.lru_cache(maxsize=None) -def getEditableSchemaMetadataSchema() -> str: - return _load_schema("EditableSchemaMetadata") - -@functools.lru_cache(maxsize=None) -def getSchemaMetadataSchema() -> str: - return _load_schema("SchemaMetadata") - -@functools.lru_cache(maxsize=None) -def getTagPropertiesSchema() -> str: - return _load_schema("TagProperties") -# fmt: on