mirror of
https://github.com/datahub-project/datahub.git
synced 2025-06-27 05:03:31 +00:00
feat(docs): add showing specific fields to docs of specific connectors (#13810)
This commit is contained in:
parent
b12b9aa919
commit
85b29c9361
@ -198,7 +198,7 @@ def load_plugin(plugin_name: str, out_dir: str) -> Plugin:
|
|||||||
source_config_class: ConfigModel = source_type.get_config_class()
|
source_config_class: ConfigModel = source_type.get_config_class()
|
||||||
|
|
||||||
plugin.config_json_schema = source_config_class.schema_json(indent=2)
|
plugin.config_json_schema = source_config_class.schema_json(indent=2)
|
||||||
plugin.config_md = gen_md_table_from_json_schema(source_config_class.schema())
|
plugin.config_md = gen_md_table_from_json_schema(source_config_class.schema(), current_source=plugin_name)
|
||||||
|
|
||||||
# Write the config json schema to the out_dir.
|
# Write the config json schema to the out_dir.
|
||||||
config_dir = pathlib.Path(out_dir) / "config_schemas"
|
config_dir = pathlib.Path(out_dir) / "config_schemas"
|
||||||
|
@ -343,7 +343,26 @@ def priority_value(path: str) -> str:
|
|||||||
return "A"
|
return "A"
|
||||||
|
|
||||||
|
|
||||||
def gen_md_table_from_json_schema(schema_dict: Dict[str, Any]) -> str:
|
def should_hide_field(schema_field, current_source: str, schema_dict: Dict[str, Any]) -> bool:
|
||||||
|
"""Check if field should be hidden for the current source"""
|
||||||
|
# Extract field name from the path
|
||||||
|
field_name = schema_field.fieldPath.split('.')[-1]
|
||||||
|
|
||||||
|
# Look in definitions for the field schema
|
||||||
|
definitions = schema_dict.get("definitions", {})
|
||||||
|
for _, def_schema in definitions.items():
|
||||||
|
properties = def_schema.get("properties", {})
|
||||||
|
if field_name in properties:
|
||||||
|
field_schema = properties[field_name]
|
||||||
|
schema_extra = field_schema.get("schema_extra", {})
|
||||||
|
supported_sources = schema_extra.get("supported_sources")
|
||||||
|
|
||||||
|
if supported_sources and current_source:
|
||||||
|
return current_source.lower() not in [s.lower() for s in supported_sources]
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def gen_md_table_from_json_schema(schema_dict: Dict[str, Any], current_source: Optional[str] = None) -> str:
|
||||||
# we don't want default field values to be injected into the description of the field
|
# we don't want default field values to be injected into the description of the field
|
||||||
JsonSchemaTranslator._INJECT_DEFAULTS_INTO_DESCRIPTION = False
|
JsonSchemaTranslator._INJECT_DEFAULTS_INTO_DESCRIPTION = False
|
||||||
schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(schema_dict))
|
schema_fields = list(JsonSchemaTranslator.get_fields_from_schema(schema_dict))
|
||||||
@ -352,6 +371,8 @@ def gen_md_table_from_json_schema(schema_dict: Dict[str, Any]) -> str:
|
|||||||
field_tree = FieldTree(field=None)
|
field_tree = FieldTree(field=None)
|
||||||
for field in schema_fields:
|
for field in schema_fields:
|
||||||
row: FieldRow = FieldRow.from_schema_field(field)
|
row: FieldRow = FieldRow.from_schema_field(field)
|
||||||
|
if current_source and should_hide_field(field, current_source, schema_dict):
|
||||||
|
continue
|
||||||
field_tree.add_field(row)
|
field_tree.add_field(row)
|
||||||
|
|
||||||
field_tree.sort()
|
field_tree.sort()
|
||||||
@ -365,12 +386,13 @@ def gen_md_table_from_json_schema(schema_dict: Dict[str, Any]) -> str:
|
|||||||
return "".join(result)
|
return "".join(result)
|
||||||
|
|
||||||
|
|
||||||
def gen_md_table_from_pydantic(model: Type[BaseModel]) -> str:
|
def gen_md_table_from_pydantic(model: Type[BaseModel], current_source: Optional[str] = None) -> str:
|
||||||
return gen_md_table_from_json_schema(model.schema())
|
return gen_md_table_from_json_schema(model.schema(), current_source)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Simple test code.
|
# Simple test code.
|
||||||
from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
|
from datahub.ingestion.source.snowflake.snowflake_config import SnowflakeV2Config
|
||||||
|
|
||||||
print("".join(gen_md_table_from_pydantic(SnowflakeV2Config)))
|
print("".join(gen_md_table_from_pydantic(SnowflakeV2Config, current_source="snowflake")))
|
||||||
|
@ -125,6 +125,7 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|||||||
description="Profile table only if it has been updated since these many number of days. "
|
description="Profile table only if it has been updated since these many number of days. "
|
||||||
"If set to `null`, no constraint of last modified time for tables to profile. "
|
"If set to `null`, no constraint of last modified time for tables to profile. "
|
||||||
"Supported only in `snowflake` and `BigQuery`.",
|
"Supported only in `snowflake` and `BigQuery`.",
|
||||||
|
schema_extra={"supported_sources": ["snowflake", "bigquery"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
profile_table_size_limit: Optional[int] = Field(
|
profile_table_size_limit: Optional[int] = Field(
|
||||||
@ -132,6 +133,9 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|||||||
description="Profile tables only if their size is less than specified GBs. If set to `null`, "
|
description="Profile tables only if their size is less than specified GBs. If set to `null`, "
|
||||||
"no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
|
"no limit on the size of tables to profile. Supported only in `Snowflake`, `BigQuery` and "
|
||||||
"`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
|
"`Databricks`. Supported for `Oracle` based on calculated size from gathered stats.",
|
||||||
|
schema_extra={
|
||||||
|
"supported_sources": ["snowflake", "bigquery", "unity-catalog", "oracle"]
|
||||||
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
profile_table_row_limit: Optional[int] = Field(
|
profile_table_row_limit: Optional[int] = Field(
|
||||||
@ -139,12 +143,14 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|||||||
description="Profile tables only if their row count is less than specified count. "
|
description="Profile tables only if their row count is less than specified count. "
|
||||||
"If set to `null`, no limit on the row count of tables to profile. Supported only in "
|
"If set to `null`, no limit on the row count of tables to profile. Supported only in "
|
||||||
"`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
|
"`Snowflake`, `BigQuery`. Supported for `Oracle` based on gathered stats.",
|
||||||
|
schema_extra={"supported_sources": ["snowflake", "bigquery", "oracle"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
profile_table_row_count_estimate_only: bool = Field(
|
profile_table_row_count_estimate_only: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description="Use an approximate query for row count. This will be much faster but slightly "
|
description="Use an approximate query for row count. This will be much faster but slightly "
|
||||||
"less accurate. Only supported for Postgres and MySQL. ",
|
"less accurate. Only supported for Postgres and MySQL. ",
|
||||||
|
schema_extra={"supported_sources": ["postgres", "mysql"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
# The query combiner enables us to combine multiple queries into a single query,
|
# The query combiner enables us to combine multiple queries into a single query,
|
||||||
@ -161,27 +167,32 @@ class GEProfilingConfig(GEProfilingBaseConfig):
|
|||||||
default=True,
|
default=True,
|
||||||
description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
|
description="Whether to profile partitioned tables. Only BigQuery and Aws Athena supports this. "
|
||||||
"If enabled, latest partition data is used for profiling.",
|
"If enabled, latest partition data is used for profiling.",
|
||||||
|
schema_extra={"supported_sources": ["athena", "bigquery"]},
|
||||||
)
|
)
|
||||||
partition_datetime: Optional[datetime.datetime] = Field(
|
partition_datetime: Optional[datetime.datetime] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="If specified, profile only the partition which matches this datetime. "
|
description="If specified, profile only the partition which matches this datetime. "
|
||||||
"If not specified, profile the latest partition. Only Bigquery supports this.",
|
"If not specified, profile the latest partition. Only Bigquery supports this.",
|
||||||
|
schema_extra={"supported_sources": ["bigquery"]},
|
||||||
)
|
)
|
||||||
use_sampling: bool = Field(
|
use_sampling: bool = Field(
|
||||||
default=True,
|
default=True,
|
||||||
description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
|
description="Whether to profile column level stats on sample of table. Only BigQuery and Snowflake support this. "
|
||||||
"If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
|
"If enabled, profiling is done on rows sampled from table. Sampling is not done for smaller tables. ",
|
||||||
|
schema_extra={"supported_sources": ["bigquery", "snowflake"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
sample_size: int = Field(
|
sample_size: int = Field(
|
||||||
default=10000,
|
default=10000,
|
||||||
description="Number of rows to be sampled from table for column level profiling."
|
description="Number of rows to be sampled from table for column level profiling."
|
||||||
"Applicable only if `use_sampling` is set to True.",
|
"Applicable only if `use_sampling` is set to True.",
|
||||||
|
schema_extra={"supported_sources": ["bigquery", "snowflake"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
profile_external_tables: bool = Field(
|
profile_external_tables: bool = Field(
|
||||||
default=False,
|
default=False,
|
||||||
description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
|
description="Whether to profile external tables. Only Snowflake and Redshift supports this.",
|
||||||
|
schema_extra={"supported_sources": ["redshift", "snowflake"]},
|
||||||
)
|
)
|
||||||
|
|
||||||
tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
|
tags_to_ignore_sampling: Optional[List[str]] = pydantic.Field(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user