mirror of
https://github.com/datahub-project/datahub.git
synced 2025-08-18 06:06:55 +00:00
feat(model): fully populate the entity registry (#7818)
This commit is contained in:
parent
97ac8d93f8
commit
af566e1184
@ -1,3 +1,4 @@
|
|||||||
|
import collections
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@ -360,6 +361,15 @@ def generate(
|
|||||||
f'Entity key {entity.keyAspect} is used by {aspect["Aspect"]["keyForEntity"]} and {entity.name}'
|
f'Entity key {entity.keyAspect} is used by {aspect["Aspect"]["keyForEntity"]} and {entity.name}'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Also require that the aspect list is deduplicated.
|
||||||
|
duplicate_aspects = collections.Counter(entity.aspects) - collections.Counter(
|
||||||
|
set(entity.aspects)
|
||||||
|
)
|
||||||
|
if duplicate_aspects:
|
||||||
|
raise ValueError(
|
||||||
|
f"Entity {entity.name} has duplicate aspects: {duplicate_aspects}"
|
||||||
|
)
|
||||||
|
|
||||||
aspect["Aspect"]["keyForEntity"] = entity.name
|
aspect["Aspect"]["keyForEntity"] = entity.name
|
||||||
aspect["Aspect"]["entityCategory"] = entity.category
|
aspect["Aspect"]["entityCategory"] = entity.category
|
||||||
aspect["Aspect"]["entityAspects"] = entity.aspects
|
aspect["Aspect"]["entityAspects"] = entity.aspects
|
||||||
|
@ -1,15 +1,27 @@
|
|||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import typing
|
||||||
|
from typing import List, Type
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import typing_inspect
|
||||||
|
|
||||||
from datahub.metadata.schema_classes import (
|
from datahub.metadata.schema_classes import (
|
||||||
ASPECT_CLASSES,
|
ASPECT_CLASSES,
|
||||||
KEY_ASPECTS,
|
KEY_ASPECTS,
|
||||||
FineGrainedLineageClass,
|
FineGrainedLineageClass,
|
||||||
|
MetadataChangeEventClass,
|
||||||
OwnershipClass,
|
OwnershipClass,
|
||||||
TelemetryKeyClass,
|
TelemetryKeyClass,
|
||||||
UpstreamClass,
|
UpstreamClass,
|
||||||
_Aspect,
|
_Aspect,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_UPDATE_ENTITY_REGISTRY = os.getenv("UPDATE_ENTITY_REGISTRY", "false").lower() == "true"
|
||||||
|
ENTITY_REGISTRY_PATH = pathlib.Path(
|
||||||
|
"../metadata-models/src/main/resources/entity-registry.yml"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_class_filter() -> None:
|
def test_class_filter() -> None:
|
||||||
# The codegen should only generate classes for aspects and a few extra classes.
|
# The codegen should only generate classes for aspects and a few extra classes.
|
||||||
@ -67,3 +79,74 @@ def test_urn_annotation():
|
|||||||
assert FineGrainedLineageClass.RECORD_SCHEMA.fields_dict["upstreams"].get_prop(
|
assert FineGrainedLineageClass.RECORD_SCHEMA.fields_dict["upstreams"].get_prop(
|
||||||
"urn_is_array"
|
"urn_is_array"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _add_to_registry(entity: str, aspect: str) -> None:
|
||||||
|
from ruamel.yaml import YAML
|
||||||
|
|
||||||
|
yaml = YAML()
|
||||||
|
|
||||||
|
doc = yaml.load(ENTITY_REGISTRY_PATH)
|
||||||
|
|
||||||
|
for entry in doc["entities"]:
|
||||||
|
if entry["name"] == entity:
|
||||||
|
entry["aspects"].append(aspect)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f'could not find entity "{entity}" in entity registry at {ENTITY_REGISTRY_PATH}'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prevent line wrapping + preserve indentation.
|
||||||
|
yaml.width = 2**20 # type: ignore[assignment]
|
||||||
|
yaml.indent(mapping=2, sequence=4, offset=2)
|
||||||
|
yaml.dump(doc, ENTITY_REGISTRY_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def test_entity_registry_completeness():
|
||||||
|
# The snapshot classes can have aspects that the entity registry doesn't know about.
|
||||||
|
# This ensures that we don't have any of those cases.
|
||||||
|
|
||||||
|
errors: List[str] = []
|
||||||
|
|
||||||
|
snapshot_classes: List[Type] = typing_inspect.get_args(
|
||||||
|
typing.get_type_hints(MetadataChangeEventClass.__init__)["proposedSnapshot"]
|
||||||
|
)
|
||||||
|
|
||||||
|
lowercase_entity_type_map = {name.lower(): name for name in KEY_ASPECTS}
|
||||||
|
|
||||||
|
for snapshot_class in snapshot_classes:
|
||||||
|
lowercase_entity_type: str = snapshot_class.__name__.replace(
|
||||||
|
"SnapshotClass", ""
|
||||||
|
).lower()
|
||||||
|
entity_type = lowercase_entity_type_map[lowercase_entity_type]
|
||||||
|
|
||||||
|
key_aspect = KEY_ASPECTS[entity_type]
|
||||||
|
supported_aspect_names = set(key_aspect.get_aspect_info()["entityAspects"])
|
||||||
|
|
||||||
|
snapshot_aspect_types: List[Type[_Aspect]] = typing_inspect.get_args(
|
||||||
|
typing_inspect.get_args(
|
||||||
|
typing.get_type_hints(snapshot_class.__init__)["aspects"]
|
||||||
|
)[0]
|
||||||
|
)
|
||||||
|
|
||||||
|
# print(f"Entity type: {entity_type}")
|
||||||
|
# print(f"Supported aspects: {supported_aspect_names}")
|
||||||
|
# print(f"Snapshot aspects: {snapshot_aspect_types}")
|
||||||
|
|
||||||
|
for aspect_type in snapshot_aspect_types:
|
||||||
|
if aspect_type == key_aspect:
|
||||||
|
continue
|
||||||
|
|
||||||
|
aspect_name = aspect_type.ASPECT_NAME
|
||||||
|
if aspect_name not in supported_aspect_names:
|
||||||
|
if _UPDATE_ENTITY_REGISTRY:
|
||||||
|
_add_to_registry(entity_type, aspect_name)
|
||||||
|
else:
|
||||||
|
error = f"entity {entity_type}: aspect {aspect_name} is missing from the entity registry"
|
||||||
|
print(error)
|
||||||
|
errors.append(error)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
not errors
|
||||||
|
), f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"'
|
||||||
|
@ -22,6 +22,18 @@ entities:
|
|||||||
- testResults
|
- testResults
|
||||||
- siblings
|
- siblings
|
||||||
- embed
|
- embed
|
||||||
|
- datasetProperties
|
||||||
|
- editableDatasetProperties
|
||||||
|
- datasetDeprecation
|
||||||
|
- datasetUpstreamLineage
|
||||||
|
- upstreamLineage
|
||||||
|
- institutionalMemory
|
||||||
|
- ownership
|
||||||
|
- editableSchemaMetadata
|
||||||
|
- globalTags
|
||||||
|
- glossaryTerms
|
||||||
|
- browsePaths
|
||||||
|
- dataPlatformInstance
|
||||||
- name: dataHubPolicy
|
- name: dataHubPolicy
|
||||||
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
|
doc: DataHub Policies represent access policies granted to users or groups on metadata operations like edit, view etc.
|
||||||
category: internal
|
category: internal
|
||||||
@ -36,6 +48,16 @@ entities:
|
|||||||
- domains
|
- domains
|
||||||
- deprecation
|
- deprecation
|
||||||
- versionInfo
|
- versionInfo
|
||||||
|
- dataJobInfo
|
||||||
|
- dataJobInputOutput
|
||||||
|
- editableDataJobProperties
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- globalTags
|
||||||
|
- browsePaths
|
||||||
|
- glossaryTerms
|
||||||
|
- institutionalMemory
|
||||||
|
- dataPlatformInstance
|
||||||
- name: dataFlow
|
- name: dataFlow
|
||||||
category: core
|
category: core
|
||||||
keyAspect: dataFlowKey
|
keyAspect: dataFlowKey
|
||||||
@ -43,6 +65,21 @@ entities:
|
|||||||
- domains
|
- domains
|
||||||
- deprecation
|
- deprecation
|
||||||
- versionInfo
|
- versionInfo
|
||||||
|
- dataFlowInfo
|
||||||
|
- editableDataFlowProperties
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- globalTags
|
||||||
|
- browsePaths
|
||||||
|
- glossaryTerms
|
||||||
|
- institutionalMemory
|
||||||
|
- dataPlatformInstance
|
||||||
|
- name: dataProcess
|
||||||
|
keyAspect: dataProcessKey
|
||||||
|
aspects:
|
||||||
|
- dataProcessInfo
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
- name: dataProcessInstance
|
- name: dataProcessInstance
|
||||||
doc: DataProcessInstance represents an instance of a datajob/jobflow run
|
doc: DataProcessInstance represents an instance of a datajob/jobflow run
|
||||||
keyAspect: dataProcessInstanceKey
|
keyAspect: dataProcessInstanceKey
|
||||||
@ -55,12 +92,22 @@ entities:
|
|||||||
- name: chart
|
- name: chart
|
||||||
keyAspect: chartKey
|
keyAspect: chartKey
|
||||||
aspects:
|
aspects:
|
||||||
- domains
|
- chartInfo
|
||||||
- container
|
- editableChartProperties
|
||||||
- deprecation
|
- chartQuery
|
||||||
- inputFields
|
- inputFields
|
||||||
- chartUsageStatistics
|
- chartUsageStatistics
|
||||||
- embed
|
- embed
|
||||||
|
- browsePaths
|
||||||
|
- domains
|
||||||
|
- container
|
||||||
|
- deprecation
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- institutionalMemory
|
||||||
|
- dataPlatformInstance
|
||||||
|
- globalTags
|
||||||
|
- glossaryTerms
|
||||||
- name: dashboard
|
- name: dashboard
|
||||||
keyAspect: dashboardKey
|
keyAspect: dashboardKey
|
||||||
aspects:
|
aspects:
|
||||||
@ -71,6 +118,15 @@ entities:
|
|||||||
- inputFields
|
- inputFields
|
||||||
- subTypes
|
- subTypes
|
||||||
- embed
|
- embed
|
||||||
|
- dashboardInfo
|
||||||
|
- editableDashboardProperties
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- globalTags
|
||||||
|
- browsePaths
|
||||||
|
- glossaryTerms
|
||||||
|
- institutionalMemory
|
||||||
|
- dataPlatformInstance
|
||||||
- name: notebook
|
- name: notebook
|
||||||
doc: Notebook represents a combination of query, text, chart and etc. This is in BETA version
|
doc: Notebook represents a combination of query, text, chart and etc. This is in BETA version
|
||||||
keyAspect: notebookKey
|
keyAspect: notebookKey
|
||||||
@ -141,15 +197,19 @@ entities:
|
|||||||
- tagProperties
|
- tagProperties
|
||||||
- ownership
|
- ownership
|
||||||
- deprecation
|
- deprecation
|
||||||
|
- status
|
||||||
- name: glossaryTerm
|
- name: glossaryTerm
|
||||||
keyAspect: glossaryTermKey
|
keyAspect: glossaryTermKey
|
||||||
aspects:
|
aspects:
|
||||||
- glossaryTermInfo
|
- glossaryTermInfo
|
||||||
|
- glossaryRelatedTerms
|
||||||
- institutionalMemory
|
- institutionalMemory
|
||||||
- schemaMetadata
|
- schemaMetadata
|
||||||
- ownership
|
- ownership
|
||||||
- deprecation
|
- deprecation
|
||||||
- domains
|
- domains
|
||||||
|
- status
|
||||||
|
- browsePaths
|
||||||
- name: glossaryNode
|
- name: glossaryNode
|
||||||
keyAspect: glossaryNodeKey
|
keyAspect: glossaryNodeKey
|
||||||
aspects:
|
aspects:
|
||||||
@ -205,6 +265,24 @@ entities:
|
|||||||
- glossaryTerms
|
- glossaryTerms
|
||||||
- editableMlModelProperties
|
- editableMlModelProperties
|
||||||
- domains
|
- domains
|
||||||
|
- ownership
|
||||||
|
- mlModelProperties
|
||||||
|
- intendedUse
|
||||||
|
- mlModelFactorPrompts
|
||||||
|
- mlModelMetrics
|
||||||
|
- mlModelEvaluationData
|
||||||
|
- mlModelTrainingData
|
||||||
|
- mlModelQuantitativeAnalyses
|
||||||
|
- mlModelEthicalConsiderations
|
||||||
|
- mlModelCaveatsAndRecommendations
|
||||||
|
- institutionalMemory
|
||||||
|
- sourceCode
|
||||||
|
- status
|
||||||
|
- cost
|
||||||
|
- deprecation
|
||||||
|
- browsePaths
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
- name: mlModelGroup
|
- name: mlModelGroup
|
||||||
category: core
|
category: core
|
||||||
keyAspect: mlModelGroupKey
|
keyAspect: mlModelGroupKey
|
||||||
@ -212,6 +290,23 @@ entities:
|
|||||||
- glossaryTerms
|
- glossaryTerms
|
||||||
- editableMlModelGroupProperties
|
- editableMlModelGroupProperties
|
||||||
- domains
|
- domains
|
||||||
|
- mlModelGroupProperties
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- deprecation
|
||||||
|
- browsePaths
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
|
- name: mlModelDeployment
|
||||||
|
category: core
|
||||||
|
keyAspect: mlModelDeploymentKey
|
||||||
|
aspects:
|
||||||
|
- mlModelDeploymentProperties
|
||||||
|
- ownership
|
||||||
|
- status
|
||||||
|
- deprecation
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
- name: mlFeatureTable
|
- name: mlFeatureTable
|
||||||
category: core
|
category: core
|
||||||
keyAspect: mlFeatureTableKey
|
keyAspect: mlFeatureTableKey
|
||||||
@ -219,6 +314,14 @@ entities:
|
|||||||
- glossaryTerms
|
- glossaryTerms
|
||||||
- editableMlFeatureTableProperties
|
- editableMlFeatureTableProperties
|
||||||
- domains
|
- domains
|
||||||
|
- mlFeatureTableProperties
|
||||||
|
- ownership
|
||||||
|
- institutionalMemory
|
||||||
|
- status
|
||||||
|
- deprecation
|
||||||
|
- browsePaths
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
- name: mlFeature
|
- name: mlFeature
|
||||||
category: core
|
category: core
|
||||||
keyAspect: mlFeatureKey
|
keyAspect: mlFeatureKey
|
||||||
@ -226,6 +329,14 @@ entities:
|
|||||||
- glossaryTerms
|
- glossaryTerms
|
||||||
- editableMlFeatureProperties
|
- editableMlFeatureProperties
|
||||||
- domains
|
- domains
|
||||||
|
- mlFeatureProperties
|
||||||
|
- ownership
|
||||||
|
- institutionalMemory
|
||||||
|
- status
|
||||||
|
- deprecation
|
||||||
|
- browsePaths
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
- name: mlPrimaryKey
|
- name: mlPrimaryKey
|
||||||
category: core
|
category: core
|
||||||
keyAspect: mlPrimaryKeyKey
|
keyAspect: mlPrimaryKeyKey
|
||||||
@ -233,6 +344,13 @@ entities:
|
|||||||
- glossaryTerms
|
- glossaryTerms
|
||||||
- editableMlPrimaryKeyProperties
|
- editableMlPrimaryKeyProperties
|
||||||
- domains
|
- domains
|
||||||
|
- mlPrimaryKeyProperties
|
||||||
|
- ownership
|
||||||
|
- institutionalMemory
|
||||||
|
- status
|
||||||
|
- deprecation
|
||||||
|
- globalTags
|
||||||
|
- dataPlatformInstance
|
||||||
- name: telemetry
|
- name: telemetry
|
||||||
category: internal
|
category: internal
|
||||||
keyAspect: telemetryKey
|
keyAspect: telemetryKey
|
||||||
|
Loading…
x
Reference in New Issue
Block a user