docs: hide pydantic_removed_field marked fields from documentation (#14829)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Sergio Gómez Villamor 2025-09-28 11:36:37 +02:00 committed by GitHub
parent c18b125a05
commit 900d7fe244
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 45 additions and 8 deletions

View File

@ -12,7 +12,7 @@ from typing import Dict, List, Optional
import click
from docgen_types import Platform, Plugin
from docs_config_table import gen_md_table_from_json_schema
from docs_config_table import gen_md_table_from_pydantic
from datahub.configuration.common import ConfigModel
from datahub.ingestion.api.decorators import (
@ -244,8 +244,8 @@ def create_plugin_from_capability_data(
source_config_class: ConfigModel = source_type.get_config_class()
plugin.config_json_schema = source_config_class.schema_json(indent=2)
plugin.config_md = gen_md_table_from_json_schema(
source_config_class.schema(), current_source=plugin_name
plugin.config_md = gen_md_table_from_pydantic(
source_config_class, current_source=plugin_name
)
# Write the config json schema to the out_dir.

View File

@ -1,7 +1,7 @@
import html
import json
import re
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Type
from typing import Any, ClassVar, Dict, Iterable, List, Optional, Set, Type
from pydantic import BaseModel, Field
@ -345,12 +345,40 @@ def priority_value(path: str) -> str:
return "A"
def _get_removed_fields_from_model(model_class: Type[BaseModel]) -> set:
"""Extract fields marked as removed via pydantic_removed_field from a Pydantic model"""
removed_fields = set()
# Check pre-root validators for removal markers
if hasattr(model_class, "__pre_root_validators__"):
for validator in model_class.__pre_root_validators__:
removed_field = getattr(validator, "_doc_removed_field", None)
if removed_field is not None:
removed_fields.add(removed_field)
return removed_fields
def _is_removed_field(field_name: str, removed_fields: Optional[Set[str]]) -> bool:
"""Check if a field is marked as removed"""
return field_name in removed_fields if removed_fields else False
def should_hide_field(
schema_field: SchemaFieldClass, current_source: str, schema_dict: Dict[str, Any]
schema_field: SchemaFieldClass,
current_source: str,
schema_dict: Dict[str, Any],
removed_fields: Optional[Set[str]] = None,
) -> bool:
"""Check if field should be hidden for the current source"""
# Extract field name from the path
field_name = schema_field.fieldPath.split(".")[-1]
# Hide removed fields
if _is_removed_field(field_name, removed_fields):
return True
for ends_with in [
"pattern.[type=array].allow",
"pattern.[type=array].allow.[type=string].string",
@ -380,9 +408,12 @@ def should_hide_field(
def gen_md_table_from_json_schema(
schema_dict: Dict[str, Any], current_source: Optional[str] = None
schema_dict: Dict[str, Any],
current_source: Optional[str] = None,
removed_fields: Optional[Set[str]] = None,
) -> str:
# we don't want default field values to be injected into the description of the field
JsonSchemaTranslator._INJECT_DEFAULTS_INTO_DESCRIPTION = False
schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(schema_dict))
result: List[str] = [FieldHeader().to_md_line()]
@ -390,7 +421,9 @@ def gen_md_table_from_json_schema(
field_tree = FieldTree(field=None)
for field in schema_fields:
row: FieldRow = FieldRow.from_schema_field(field)
if current_source and should_hide_field(field, current_source, schema_dict):
if current_source and should_hide_field(
field, current_source, schema_dict, removed_fields
):
continue
field_tree.add_field(row)
@ -408,7 +441,8 @@ def gen_md_table_from_json_schema(
def gen_md_table_from_pydantic(
model: Type[BaseModel], current_source: Optional[str] = None
) -> str:
return gen_md_table_from_json_schema(model.schema(), current_source)
removed_fields = _get_removed_fields_from_model(model)
return gen_md_table_from_json_schema(model.schema(), current_source, removed_fields)
if __name__ == "__main__":

View File

@ -24,6 +24,9 @@ def pydantic_removed_field(
values.pop(field)
return values
# Mark the function as handling a removed field for doc generation
_validate_field_removal._doc_removed_field = field # type: ignore[attr-defined]
# Hack: Pydantic maintains unique list of validators by referring its __name__.
# https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
# This hack ensures that multiple field removals do not overwrite each other.