Feat/update generate docs schema (#19287)

This commit is contained in:
tarunpandey23 2025-01-09 14:50:12 +05:30 committed by GitHub
parent 63aa484fec
commit 1d2774ac29
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -30,18 +30,17 @@ import os
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import List from typing import List
import re
import jsonschema2md import jsonschema2md
SOURCES_ROOT = "openmetadata-spec/src/main/resources/json/schema" SOURCES_ROOT = "openmetadata-spec/src/main/resources/json/schema"
SINK_ROOT = "openmetadata-docs/content/v1.2.x" SINK_ROOT = "openmetadata-docs/content/v1.6.x"
SCHEMAS_ROOT = SINK_ROOT + "/main-concepts/metadata-standard/schemas/" SCHEMAS_ROOT = SINK_ROOT + "/main-concepts/metadata-standard/schemas/"
PARSER = jsonschema2md.Parser( PARSER = jsonschema2md.Parser(
examples_as_yaml=False, examples_as_yaml=False,
show_examples="all", show_examples="all",
) )
NOW = datetime.now(timezone.utc) NOW = datetime.now(timezone.utc)
@ -66,7 +65,6 @@ def to_tile(string: str) -> str:
def write_md(new_file: Path, lines: List[str]) -> None: def write_md(new_file: Path, lines: List[str]) -> None:
new_absolute = new_file.absolute() new_absolute = new_file.absolute()
new_absolute.parent.mkdir(exist_ok=True, parents=True) new_absolute.parent.mkdir(exist_ok=True, parents=True)
with open(new_absolute, "w") as f: with open(new_absolute, "w") as f:
for line in lines: for line in lines:
f.write(line) f.write(line)
@ -74,12 +72,9 @@ def write_md(new_file: Path, lines: List[str]) -> None:
def prepare_menu(new_file: Path, is_file: bool) -> None: def prepare_menu(new_file: Path, is_file: bool) -> None:
slug = generate_slug(new_file, is_file) slug = generate_slug(new_file, is_file)
category_root = "- category: Main Concepts / Metadata Standard / Schemas / " category_root = "- category: Main Concepts / Metadata Standard / Schemas / "
category_suffix = str(new_file.parent).replace(SCHEMAS_ROOT, "") category_suffix = str(new_file.parent).replace(SCHEMAS_ROOT, "")
title = [to_tile(new_file.stem)] if is_file else [] title = [to_tile(new_file.stem)] if is_file else []
category_suffix_list = ( category_suffix_list = (
list(map(lambda x: x.capitalize(), category_suffix.split("/"))) + title list(map(lambda x: x.capitalize(), category_suffix.split("/"))) + title
) )
@ -88,16 +83,41 @@ def prepare_menu(new_file: Path, is_file: bool) -> None:
print(f" url: {slug}") print(f" url: {slug}")
def generate_folder_content(directory: Path, slug: str) -> List[str]:
"""
Generate default content for a folder (directory index).
"""
folder_name = to_tile(directory.stem)
sub_items = list(directory.iterdir())
content = [f"# {folder_name}\n\n"]
content.append(f"This folder contains the following items:\n\n")
for item in sub_items:
item_slug = item.stem.lower()
if item.is_dir():
content.append(f"- [**{to_tile(item.stem)}**]({slug}/{item_slug})\n")
else:
content.append(f"- [**{to_tile(item.stem)}**]({slug}/{item_slug})\n")
return content
def generate_header(new_file: Path, is_file: bool) -> List[str]: def generate_header(new_file: Path, is_file: bool) -> List[str]:
sep = "---\n" sep = "---\n"
title = f"title: {new_file.stem}\n" title = f"title: {new_file.stem}\n"
slug = f"slug: {generate_slug(new_file, is_file)}\n" slug = f"slug: {generate_slug(new_file, is_file)}\n"
return [sep, title, slug, sep, "\n"] return [sep, title, slug, sep, "\n"]
def generated_at() -> List[str]: def generated_at() -> List[str]:
return [f"\n\nDocumentation file automatically generated at {NOW}.\n"] return [f"\n\nDocumentation file automatically generated at {NOW}.\n"]
def remove_a_tags(lines: List[str]) -> List[str]:
"""
Remove <a> tags from the given lines of Markdown content.
"""
cleaned_lines = []
for line in lines:
cleaned_line = re.sub(r'<a[^>]*>(.*?)</a>', r'\1', line)
cleaned_lines.append(cleaned_line)
return cleaned_lines
def main() -> None: def main() -> None:
""" """
@ -108,29 +128,30 @@ def main() -> None:
""" """
results = [(file, True) for file in Path(SOURCES_ROOT).rglob("*.json")] results = [(file, True) for file in Path(SOURCES_ROOT).rglob("*.json")]
directories = [Path(x[0]) for x in os.walk(SOURCES_ROOT)] directories = [Path(x[0]) for x in os.walk(SOURCES_ROOT)]
indexes = list((directory / "index.md", False) for directory in directories) indexes = list((directory / "index.md", False) for directory in directories)
all_elems = results + indexes all_elems = results + indexes
all_elems.sort() all_elems.sort()
for elem, is_file in all_elems: for elem, is_file in all_elems:
new_file = build_new_file(elem) new_file = build_new_file(elem)
if is_file: if is_file:
with open(elem.absolute()) as f: with open(elem.absolute()) as f:
md_lines = PARSER.parse_schema(json.load(f)) md_lines = PARSER.parse_schema(json.load(f))
md_lines = remove_a_tags(md_lines) # Remove <a> tags
else: else:
md_lines = [f"# {to_tile(elem.parent.stem)}"] md_lines = [f"# {to_tile(elem.parent.stem)}"]
all_lines = generate_header(new_file, is_file) + md_lines + generated_at() all_lines = generate_header(new_file, is_file) + md_lines + generated_at()
write_md(new_file, all_lines) write_md(new_file, all_lines)
prepare_menu(new_file, is_file) prepare_menu(new_file, is_file)
for elem, is_file in indexes:
new_file = build_new_file(elem)
slug = generate_slug(new_file, is_file=False)
md_lines = generate_folder_content(elem.parent, slug)
all_lines = generate_header(new_file, is_file=False) + md_lines + generated_at()
write_md(new_file, all_lines)
prepare_menu(new_file, is_file=False)
if __name__ == "__main__": if __name__ == "__main__":
main() main()