From 7ea39ad0b039fdefb54440f83df6d26dd66afbe8 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Mon, 9 Jun 2025 18:50:31 +0900 Subject: [PATCH] test build on sphinx-markdown-builder --- docs-website/sphinx/Makefile | 6 +- docs-website/sphinx/apidocs/sdk.rst | 9 + docs-website/sphinx/conf.py | 2 + .../sphinx/convert_sphinx_to_docusaurus.py | 89 +++------ docs-website/sphinx/requirements.txt | 1 + docs-website/src/styles/sphinx.scss | 171 +++++++----------- 6 files changed, 106 insertions(+), 172 deletions(-) create mode 100644 docs-website/sphinx/apidocs/sdk.rst diff --git a/docs-website/sphinx/Makefile b/docs-website/sphinx/Makefile index e8c419f991..14083acee2 100644 --- a/docs-website/sphinx/Makefile +++ b/docs-website/sphinx/Makefile @@ -26,10 +26,10 @@ $(VENV_SENTINEL): requirements.txt # Not using Python's http.server because it enables caching headers. serve: - serve -p 3001 _build/html/ + serve -p 3001 _build/markdown/ -md: html - $(VENV_DIR)/bin/python3 convert_sphinx_to_docusaurus.py +md: + @$(SPHINXBUILD) -M markdown "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) && $(VENV_DIR)/bin/python3 convert_sphinx_to_docusaurus.py # Route other targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). diff --git a/docs-website/sphinx/apidocs/sdk.rst b/docs-website/sphinx/apidocs/sdk.rst new file mode 100644 index 0000000000..579be1be2b --- /dev/null +++ b/docs-website/sphinx/apidocs/sdk.rst @@ -0,0 +1,9 @@ +DataHub SDK +======= + +The DataHub SDK is a Python library for interacting with the DataHub platform. + +.. automodule:: datahub.sdk.search_client + +.. automodule:: datahub.sdk.lineage_client + diff --git a/docs-website/sphinx/conf.py b/docs-website/sphinx/conf.py index 49cd20d5ef..276fdb1904 100644 --- a/docs-website/sphinx/conf.py +++ b/docs-website/sphinx/conf.py @@ -26,8 +26,10 @@ extensions = [ "sphinx_autodoc_typehints", # This enables us to autogenerate docs for our CLI. "sphinx_click", + "sphinx_markdown_builder", ] +markdown_anchor_sections = True napoleon_use_param = True # Move type hint info to function description instead of signature diff --git a/docs-website/sphinx/convert_sphinx_to_docusaurus.py b/docs-website/sphinx/convert_sphinx_to_docusaurus.py index 891e08d8c7..ef9e2c7cc6 100644 --- a/docs-website/sphinx/convert_sphinx_to_docusaurus.py +++ b/docs-website/sphinx/convert_sphinx_to_docusaurus.py @@ -1,78 +1,39 @@ import pathlib -import json -from bs4 import BeautifulSoup - SPHINX_ROOT_DIR = pathlib.Path(".") -SPHINX_BUILD_DIR = SPHINX_ROOT_DIR / pathlib.Path("_build/html/apidocs") +SPHINX_BUILD_DIR = SPHINX_ROOT_DIR / pathlib.Path("_build/markdown/apidocs") DOCS_OUTPUT_DIR = pathlib.Path("../docs/python-sdk") -def html_to_mdx(html: str) -> str: - # Because the HTML uses `class` and has `{}` in it, it isn't valid - # MDX. As such, we use React's dangerouslySetInnerHTML. - return f""" - -
- -""" - - -def bs4_to_mdx(soup: BeautifulSoup) -> str: - # TODO: Eventually we should do something smarter here to - # generate something that's closer to real Markdown. This would - # be helpful, for example, for enabling Docusaurus to generate - # a table of contents for the page. - return html_to_mdx(str(soup)) - - -def convert_html_to_md(html_file: pathlib.Path) -> str: - html = html_file.read_text() - soup = BeautifulSoup(html, "html.parser") - - body = soup.find("main").find("div", {"class": "bd-article-container"}) - article = body.find("article") - - # Remove all the "permalink to this heading" links. - for link in article.find_all("a", {"class": "headerlink"}): - link.decompose() - - # Remove the trailing " – " from arguments that are missing - # a description. - for item in article.select("dl.field-list dd p"): - # Note - that's U+2013, not a normal hyphen. - if str(item).endswith(" –

"): - parent = item.parent - # print("orig item", item) - new_item = BeautifulSoup(str(item)[:-7] + "

", "html.parser") - # print("new-item", str(new_item)) - parent.p.replace_with(new_item) - # print("item post replace", parent) - - # Extract title from the h1. - title_element = article.find("h1") - title = title_element.text - title_element.decompose() - - # TODO - generate nicer slugs for these pages - md_meta = f"""--- -title: {title} ----\n\n""" - - return md_meta + bs4_to_mdx(article) - - def main(): DOCS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True) - for doc in SPHINX_BUILD_DIR.glob("**/*.html"): - md = convert_html_to_md(doc) - - outfile = DOCS_OUTPUT_DIR / doc.relative_to(SPHINX_BUILD_DIR).with_suffix(".md") + for doc in SPHINX_BUILD_DIR.glob("**/*.md"): + outfile = DOCS_OUTPUT_DIR / doc.relative_to(SPHINX_BUILD_DIR) outfile.parent.mkdir(parents=True, exist_ok=True) - outfile.write_text(md) - print(f"Generated {outfile}") + with open(doc, "r") as f: + content = f.read() + + # Replace dangerous characters + replacements = [ + ("", "<\\id>"), + ("", "<\\type>"), + ("", "<\\id1>"), + ("", "<\\id2>"), + ("MDXContent.isMDXComponent = true", ""), + ] + for old, new in replacements: + content = content.replace(old, new) + + # Wrap the entire content with div (top and bottom) + final_content = f"
\n\n{content.strip()}\n\n
\n" + + with open(outfile, "w") as f: + f.write(final_content) + + print(f"✅ Generated {outfile}") if __name__ == "__main__": main() diff --git a/docs-website/sphinx/requirements.txt b/docs-website/sphinx/requirements.txt index d9e7eb197e..70eabcd0e1 100644 --- a/docs-website/sphinx/requirements.txt +++ b/docs-website/sphinx/requirements.txt @@ -5,6 +5,7 @@ sphinx-click==4.4.0 sphinx_autodoc_typehints==1.22 pydata-sphinx-theme==0.13.1 snowballstemmer>=2.2,<3 # Fixes https://github.com/sphinx-doc/sphinx/issues/13533 +sphinx-markdown-builder==0.6.8 # Because of https://github.com/pydata/pydata-sphinx-theme/issues/108 accessible-pygments diff --git a/docs-website/src/styles/sphinx.scss b/docs-website/src/styles/sphinx.scss index 022ba68afa..05235df13d 100644 --- a/docs-website/src/styles/sphinx.scss +++ b/docs-website/src/styles/sphinx.scss @@ -1,124 +1,85 @@ -// Styles for Sphinx Python SDK generated docs -$borderRadius: 5px; +.python-sdk { + font-size: 16px; + line-height: 1.6; + color: var(--ifm-font-color-base); -dl.py { - margin-bottom: calc(var(--ifm-spacing-vertical) * 2); - font-size: 14px; - border: 1px solid var(--ifm-hr-border-color); - border-radius: $borderRadius; + h1, h2, h3, h4 { + font-weight: 600; + margin-top: 2rem; + margin-bottom: 1rem; + } - code { - border: none; - background: none; + h3, h4 { + border-bottom: 1px solid var(--ifm-hr-border-color); + padding-bottom: 0.25rem; + } + + // Signature block + h3:has(code), h4:has(code) { + background-color: var(--ifm-code-background); + border-radius: 6px; + padding: 0.75rem 1rem 0.75rem 2rem; + font-family: var(--ifm-font-family-monospace); + font-size: 0.95rem; + font-weight: 500; + color: var(--ifm-font-color-base); } p { - margin-bottom: 0; + margin-bottom: 1rem; } - dl { - margin-bottom: var(--ifm-spacing-vertical); + code { + background-color: var(--ifm-code-background); + padding: 0.2em 0.4em; + border-radius: 4px; + font-size: 0.9em; } - // The parameter name: - em.sig-param > span:first-child { - font-weight: bold; - } + ul { + margin-bottom: 1.5rem; - > dd:not(:empty) { - padding-bottom: var(--ifm-spacing-vertical); - } - - dt.sig { - box-sizing: border-box; - font-size: 0.9rem; - padding: var(--ifm-spacing-vertical); - border-radius: $borderRadius; - font-family: var(--ifm-font-family-monospace); - background-color: var(--ifm-background-surface-color); - } - - > dd { - &:not(:empty) { - padding-top: calc(var(--ifm-spacing-vertical) / 2); - margin-top: 0; - margin-left: var(--ifm-spacing-horizontal); - margin-right: var(--ifm-spacing-horizontal); - } - } - - // e.g. `class`, or `def` - em.property { - color: var(--ifm-font-color-base); - font-weight: bold; - } - - // e.g. `MyClass` - span.sig-name { - color: #2774b3; - font-weight: bold; - } - - // e.g classmethod - em.property { - color: #66b327; - } - - em.sig-param { - span.default_value { - color: #66b327; - } - } - - span.sig-return { - span.sig-return-typehint { - color: var(--ifm-font-color-base); - - pre { - color: var(--ifm-font-color-base); - } - } - } - - dl.field-list { - padding-top: calc(var(--ifm-spacing-vertical) / 2); - display: grid; - grid-template-columns: fit-content(30%) auto; - &:not(:first-child) { - border-top: 1px solid var(--ifm-hr-border-color); + &:has(li strong) { + background-color: var(--ifm-background-surface-color); + border-radius: 8px; + padding: 1rem 1rem 1rem 2rem; + box-shadow: 0 1px 3px rgba(0,0,0,0.06); } - dt { - margin-right: 0.5em; - } + li { + margin-bottom: 0.75rem; - dd { - font-family: var(--ifm-font-family-monospace); - } - - dt, - dd { - margin-left: 0; - padding-left: 0; - - &:not(:first-of-type) { - border-top: 1px solid var(--ifm-hr-border-color); - padding-top: var(--ifm-spacing-vertical); - } - &:not(:last-of-type) { - padding-bottom: var(--ifm-spacing-vertical); + strong { + display: inline-block; + color: #2774b3; + font-weight: 600; + min-width: 90px; } - ul { - list-style-type: none; - padding-left: 0; - li { - p { - margin: 0; - padding: 0; - } - } + code { + background: transparent; + color: #66b327; + padding: 0; } } + + // nested ul for params or return type formatting + ul { + margin-top: 0.5rem; + margin-bottom: 0.5rem; + padding-left: 1rem; + border-left: 2px dotted var(--ifm-color-emphasis-300); + } + } + + // Return type section + li:has(strong:contains("Return type")) { + font-style: italic; + color: var(--ifm-font-color-secondary); + } + + // Parameter type callouts like (Optional[str]) + em > code { + color: #c678dd; } }