mirror of
https://github.com/datahub-project/datahub.git
synced 2025-10-27 17:02:03 +00:00
feat(docs): add structured property for search field names in metadata model (#15097)
This commit is contained in:
parent
7372b9120d
commit
5a1569743a
2
.github/workflows/metadata-model.yml
vendored
2
.github/workflows/metadata-model.yml
vendored
@ -5,6 +5,8 @@ on:
|
|||||||
- master
|
- master
|
||||||
paths:
|
paths:
|
||||||
- "metadata-models/**"
|
- "metadata-models/**"
|
||||||
|
- "metadata-ingestion/scripts/modeldocgen.py"
|
||||||
|
- "metadata-ingestion/scripts/modeldocupload.sh"
|
||||||
release:
|
release:
|
||||||
types: [published]
|
types: [published]
|
||||||
|
|
||||||
|
|||||||
@ -39,9 +39,14 @@ from datahub.metadata.schema_classes import (
|
|||||||
SchemaFieldDataTypeClass,
|
SchemaFieldDataTypeClass,
|
||||||
SchemaMetadataClass,
|
SchemaMetadataClass,
|
||||||
StringTypeClass,
|
StringTypeClass,
|
||||||
|
StructuredPropertiesClass,
|
||||||
|
StructuredPropertyDefinitionClass,
|
||||||
|
StructuredPropertySettingsClass,
|
||||||
|
StructuredPropertyValueAssignmentClass,
|
||||||
SubTypesClass,
|
SubTypesClass,
|
||||||
TagAssociationClass,
|
TagAssociationClass,
|
||||||
)
|
)
|
||||||
|
from datahub.metadata.urns import StructuredPropertyUrn, Urn
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -888,6 +893,54 @@ def make_entity_docs(entity_display_name: str, graph: RelationshipGraph) -> str:
|
|||||||
raise Exception(f"Failed to find information for entity: {entity_name}")
|
raise Exception(f"Failed to find information for entity: {entity_name}")
|
||||||
|
|
||||||
|
|
||||||
|
def create_search_field_name_property() -> List[MetadataChangeProposalWrapper]:
|
||||||
|
"""
|
||||||
|
Create the structured property for documenting search field names.
|
||||||
|
|
||||||
|
This property is used to capture the actual field name used in the search index
|
||||||
|
when it differs from the field name in the schema (e.g., 'instance' field is
|
||||||
|
indexed as 'platformInstance').
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of MCPs for the property definition and settings
|
||||||
|
"""
|
||||||
|
property_id = "com.datahub.metadata.searchFieldName"
|
||||||
|
property_urn = str(
|
||||||
|
StructuredPropertyUrn.from_string(f"urn:li:structuredProperty:{property_id}")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create property definition
|
||||||
|
definition_mcp = MetadataChangeProposalWrapper(
|
||||||
|
entityUrn=property_urn,
|
||||||
|
aspect=StructuredPropertyDefinitionClass(
|
||||||
|
qualifiedName=property_id,
|
||||||
|
displayName="Search Field Name",
|
||||||
|
valueType=Urn.make_data_type_urn("string"),
|
||||||
|
description=(
|
||||||
|
"The field name used in the search index when it differs from the schema field name. "
|
||||||
|
"Use this field name when constructing search queries for this field."
|
||||||
|
),
|
||||||
|
entityTypes=[Urn.make_entity_type_urn("schemaField")],
|
||||||
|
cardinality="SINGLE",
|
||||||
|
immutable=False,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create property settings for display
|
||||||
|
settings_mcp = MetadataChangeProposalWrapper(
|
||||||
|
entityUrn=property_urn,
|
||||||
|
aspect=StructuredPropertySettingsClass(
|
||||||
|
isHidden=False,
|
||||||
|
showInSearchFilters=False,
|
||||||
|
showInAssetSummary=True,
|
||||||
|
showAsAssetBadge=False,
|
||||||
|
showInColumnsTable=True, # Show as a column in schema tables
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
return [definition_mcp, settings_mcp]
|
||||||
|
|
||||||
|
|
||||||
def generate_stitched_record(
|
def generate_stitched_record(
|
||||||
relnships_graph: RelationshipGraph,
|
relnships_graph: RelationshipGraph,
|
||||||
) -> Iterable[MetadataChangeProposalWrapper]:
|
) -> Iterable[MetadataChangeProposalWrapper]:
|
||||||
@ -897,6 +950,11 @@ def generate_stitched_record(
|
|||||||
final_path = re.sub(r"^\[version=2.0\]\.", "", final_path)
|
final_path = re.sub(r"^\[version=2.0\]\.", "", final_path)
|
||||||
return final_path
|
return final_path
|
||||||
|
|
||||||
|
# Track schema fields that need structured properties
|
||||||
|
schema_field_properties: Dict[
|
||||||
|
str, str
|
||||||
|
] = {} # schema_field_urn -> search_field_name
|
||||||
|
|
||||||
for entity_name, entity_def in entity_registry.items():
|
for entity_name, entity_def in entity_registry.items():
|
||||||
entity_display_name = entity_def.display_name
|
entity_display_name = entity_def.display_name
|
||||||
entity_fields = []
|
entity_fields = []
|
||||||
@ -981,6 +1039,28 @@ def generate_stitched_record(
|
|||||||
f_field.globalTags.tags.append(
|
f_field.globalTags.tags.append(
|
||||||
TagAssociationClass(tag="urn:li:tag:Searchable")
|
TagAssociationClass(tag="urn:li:tag:Searchable")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Check if search field name differs from actual field name
|
||||||
|
searchable_config = json_dict["Searchable"]
|
||||||
|
if (
|
||||||
|
isinstance(searchable_config, dict)
|
||||||
|
and "fieldName" in searchable_config
|
||||||
|
):
|
||||||
|
search_field_name = searchable_config["fieldName"]
|
||||||
|
# Extract the actual field name from the field path
|
||||||
|
# Field path format: "[version=2.0].[type=...].<fieldName>"
|
||||||
|
actual_field_name = strip_types(f_field.fieldPath).split(
|
||||||
|
"."
|
||||||
|
)[-1]
|
||||||
|
|
||||||
|
if search_field_name != actual_field_name:
|
||||||
|
# Track this for later - we'll emit a separate MCP for the schema field entity
|
||||||
|
schema_field_urn = make_schema_field_urn(
|
||||||
|
source_dataset_urn, f_field.fieldPath
|
||||||
|
)
|
||||||
|
schema_field_properties[schema_field_urn] = (
|
||||||
|
search_field_name
|
||||||
|
)
|
||||||
if "Relationship" in json_dict:
|
if "Relationship" in json_dict:
|
||||||
relationship_info = json_dict["Relationship"]
|
relationship_info = json_dict["Relationship"]
|
||||||
# detect if we have relationship specified at leaf level or thru path specs
|
# detect if we have relationship specified at leaf level or thru path specs
|
||||||
@ -1064,6 +1144,21 @@ def generate_stitched_record(
|
|||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Emit structured properties for schema fields
|
||||||
|
property_urn = "urn:li:structuredProperty:com.datahub.metadata.searchFieldName"
|
||||||
|
for schema_field_urn, search_field_name in schema_field_properties.items():
|
||||||
|
yield MetadataChangeProposalWrapper(
|
||||||
|
entityUrn=schema_field_urn,
|
||||||
|
aspect=StructuredPropertiesClass(
|
||||||
|
properties=[
|
||||||
|
StructuredPropertyValueAssignmentClass(
|
||||||
|
propertyUrn=property_urn,
|
||||||
|
values=[search_field_name],
|
||||||
|
)
|
||||||
|
]
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class EntityAspectName:
|
class EntityAspectName:
|
||||||
@ -1256,8 +1351,15 @@ def generate( # noqa: C901
|
|||||||
logger.error(f"Failed to generate lineage JSON: {e}")
|
logger.error(f"Failed to generate lineage JSON: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# Create structured property for search field names first
|
||||||
|
logger.info("Creating structured property for search field names")
|
||||||
|
structured_property_mcps = create_search_field_name_property()
|
||||||
|
|
||||||
relationship_graph = RelationshipGraph()
|
relationship_graph = RelationshipGraph()
|
||||||
mcps = list(generate_stitched_record(relationship_graph))
|
entity_mcps = list(generate_stitched_record(relationship_graph))
|
||||||
|
|
||||||
|
# Combine MCPs with structured property first
|
||||||
|
mcps = structured_property_mcps + entity_mcps
|
||||||
|
|
||||||
shutil.rmtree(f"{generated_docs_dir}/entities", ignore_errors=True)
|
shutil.rmtree(f"{generated_docs_dir}/entities", ignore_errors=True)
|
||||||
entity_names = [(x, entity_registry[x]) for x in generated_documentation]
|
entity_names = [(x, entity_registry[x]) for x in generated_documentation]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user