From 7ea39ad0b039fdefb54440f83df6d26dd66afbe8 Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Mon, 9 Jun 2025 18:50:31 +0900
Subject: [PATCH] test build on sphinx-markdown-builder

---
 docs-website/sphinx/Makefile                  |   6 +-
 docs-website/sphinx/apidocs/sdk.rst           |   9 +
 docs-website/sphinx/conf.py                   |   2 +
 .../sphinx/convert_sphinx_to_docusaurus.py    |  89 +++------
 docs-website/sphinx/requirements.txt          |   1 +
 docs-website/src/styles/sphinx.scss           | 171 +++++++-----------
 6 files changed, 106 insertions(+), 172 deletions(-)
 create mode 100644 docs-website/sphinx/apidocs/sdk.rst
diff --git a/docs-website/sphinx/Makefile b/docs-website/sphinx/Makefile
index e8c419f991..14083acee2 100644
--- a/docs-website/sphinx/Makefile
+++ b/docs-website/sphinx/Makefile
@@ -26,10 +26,10 @@ $(VENV_SENTINEL): requirements.txt
 
 # Not using Python's http.server because it enables caching headers.
 serve:
-	serve -p 3001 _build/html/
+	serve -p 3001 _build/markdown/
 
-md: html
-	$(VENV_DIR)/bin/python3 convert_sphinx_to_docusaurus.py
+md:
+	@$(SPHINXBUILD) -M markdown "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) && $(VENV_DIR)/bin/python3 convert_sphinx_to_docusaurus.py
 
 # Route other targets to Sphinx using the new
 # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
diff --git a/docs-website/sphinx/apidocs/sdk.rst b/docs-website/sphinx/apidocs/sdk.rst
new file mode 100644
index 0000000000..579be1be2b
--- /dev/null
+++ b/docs-website/sphinx/apidocs/sdk.rst
@@ -0,0 +1,9 @@
+DataHub SDK
+=======
+
+The DataHub SDK is a Python library for interacting with the DataHub platform.
+
+.. automodule:: datahub.sdk.search_client
+
+.. automodule:: datahub.sdk.lineage_client
+
diff --git a/docs-website/sphinx/conf.py b/docs-website/sphinx/conf.py
index 49cd20d5ef..276fdb1904 100644
--- a/docs-website/sphinx/conf.py
+++ b/docs-website/sphinx/conf.py
@@ -26,8 +26,10 @@ extensions = [
     "sphinx_autodoc_typehints",
     # This enables us to autogenerate docs for our CLI.
     "sphinx_click",
+    "sphinx_markdown_builder",
 ]
 
+markdown_anchor_sections = True
 napoleon_use_param = True
 
 # Move type hint info to function description instead of signature
diff --git a/docs-website/sphinx/convert_sphinx_to_docusaurus.py b/docs-website/sphinx/convert_sphinx_to_docusaurus.py
index 891e08d8c7..ef9e2c7cc6 100644
--- a/docs-website/sphinx/convert_sphinx_to_docusaurus.py
+++ b/docs-website/sphinx/convert_sphinx_to_docusaurus.py
@@ -1,78 +1,39 @@
 import pathlib
-import json
-from bs4 import BeautifulSoup
-
 
 SPHINX_ROOT_DIR = pathlib.Path(".")
-SPHINX_BUILD_DIR = SPHINX_ROOT_DIR / pathlib.Path("_build/html/apidocs")
+SPHINX_BUILD_DIR = SPHINX_ROOT_DIR / pathlib.Path("_build/markdown/apidocs")
 DOCS_OUTPUT_DIR = pathlib.Path("../docs/python-sdk")
 
 
-def html_to_mdx(html: str) -> str:
-    # Because the HTML uses `class` and has `{}` in it, it isn't valid
-    # MDX. As such, we use React's dangerouslySetInnerHTML.
-    return f"""
-
-<div dangerouslySetInnerHTML={{{{__html: {json.dumps(html)}}}}}></div>
-
-"""
-
-
-def bs4_to_mdx(soup: BeautifulSoup) -> str:
-    # TODO: Eventually we should do something smarter here to
-    # generate something that's closer to real Markdown. This would
-    # be helpful, for example, for enabling Docusaurus to generate
-    # a table of contents for the page.
-    return html_to_mdx(str(soup))
-
-
-def convert_html_to_md(html_file: pathlib.Path) -> str:
-    html = html_file.read_text()
-    soup = BeautifulSoup(html, "html.parser")
-
-    body = soup.find("main").find("div", {"class": "bd-article-container"})
-    article = body.find("article")
-
-    # Remove all the "permalink to this heading" links.
-    for link in article.find_all("a", {"class": "headerlink"}):
-        link.decompose()
-
-    # Remove the trailing " – " from arguments that are missing
-    # a description.
-    for item in article.select("dl.field-list dd p"):
-        # Note - that's U+2013, not a normal hyphen.
-        if str(item).endswith(" – </p>"):
-            parent = item.parent
-            # print("orig item", item)
-            new_item = BeautifulSoup(str(item)[:-7] + "</p>", "html.parser")
-            # print("new-item", str(new_item))
-            parent.p.replace_with(new_item)
-            # print("item post replace", parent)
-
-    # Extract title from the h1.
-    title_element = article.find("h1")
-    title = title_element.text
-    title_element.decompose()
-
-    # TODO - generate nicer slugs for these pages
-    md_meta = f"""---
-title: {title}
----\n\n"""
-
-    return md_meta + bs4_to_mdx(article)
-
-
 def main():
     DOCS_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 
-    for doc in SPHINX_BUILD_DIR.glob("**/*.html"):
-        md = convert_html_to_md(doc)
-
-        outfile = DOCS_OUTPUT_DIR / doc.relative_to(SPHINX_BUILD_DIR).with_suffix(".md")
+    for doc in SPHINX_BUILD_DIR.glob("**/*.md"):
+        outfile = DOCS_OUTPUT_DIR / doc.relative_to(SPHINX_BUILD_DIR)
         outfile.parent.mkdir(parents=True, exist_ok=True)
-        outfile.write_text(md)
 
-        print(f"Generated {outfile}")
+        with open(doc, "r") as f:
+            content = f.read()
+
+        # Replace dangerous characters
+        replacements = [
+            ("<function ", "<\\function "),
+            ("<id>", "<\\id>"),
+            ("<type>", "<\\type>"),
+            ("<id1>", "<\\id1>"),
+            ("<id2>", "<\\id2>"),
+            ("MDXContent.isMDXComponent = true", ""),
+        ]
+        for old, new in replacements:
+            content = content.replace(old, new)
+
+        # Wrap the entire content with div (top and bottom)
+        final_content = f"<div className=\"python-sdk\">\n\n{content.strip()}\n\n</div>\n"
+
+        with open(outfile, "w") as f:
+            f.write(final_content)
+
+        print(f"✅ Generated {outfile}")
 
 if __name__ == "__main__":
     main()
diff --git a/docs-website/sphinx/requirements.txt b/docs-website/sphinx/requirements.txt
index d9e7eb197e..70eabcd0e1 100644
--- a/docs-website/sphinx/requirements.txt
+++ b/docs-website/sphinx/requirements.txt
@@ -5,6 +5,7 @@ sphinx-click==4.4.0
 sphinx_autodoc_typehints==1.22
 pydata-sphinx-theme==0.13.1
 snowballstemmer>=2.2,<3 # Fixes https://github.com/sphinx-doc/sphinx/issues/13533
+sphinx-markdown-builder==0.6.8
 
 # Because of https://github.com/pydata/pydata-sphinx-theme/issues/108
 accessible-pygments
diff --git a/docs-website/src/styles/sphinx.scss b/docs-website/src/styles/sphinx.scss
index 022ba68afa..05235df13d 100644
--- a/docs-website/src/styles/sphinx.scss
+++ b/docs-website/src/styles/sphinx.scss
@@ -1,124 +1,85 @@
-// Styles for Sphinx Python SDK generated docs
-$borderRadius: 5px;
+.python-sdk {
+  font-size: 16px;
+  line-height: 1.6;
+  color: var(--ifm-font-color-base);
 
-dl.py {
-  margin-bottom: calc(var(--ifm-spacing-vertical) * 2);
-  font-size: 14px;
-  border: 1px solid var(--ifm-hr-border-color);
-  border-radius: $borderRadius;
+  h1, h2, h3, h4 {
+    font-weight: 600;
+    margin-top: 2rem;
+    margin-bottom: 1rem;
+  }
 
-  code {
-    border: none;
-    background: none;
+  h3, h4 {
+    border-bottom: 1px solid var(--ifm-hr-border-color);
+    padding-bottom: 0.25rem;
+  }
+
+  // Signature block
+  h3:has(code), h4:has(code) {
+    background-color: var(--ifm-code-background);
+    border-radius: 6px;
+    padding: 0.75rem 1rem 0.75rem 2rem;
+    font-family: var(--ifm-font-family-monospace);
+    font-size: 0.95rem;
+    font-weight: 500;
+    color: var(--ifm-font-color-base);
   }
 
   p {
-    margin-bottom: 0;
+    margin-bottom: 1rem;
   }
 
-  dl {
-    margin-bottom: var(--ifm-spacing-vertical);
+  code {
+    background-color: var(--ifm-code-background);
+    padding: 0.2em 0.4em;
+    border-radius: 4px;
+    font-size: 0.9em;
   }
 
-  // The parameter name:
-  em.sig-param > span:first-child {
-    font-weight: bold;
-  }
+  ul {
+    margin-bottom: 1.5rem;
 
-  > dd:not(:empty) {
-    padding-bottom: var(--ifm-spacing-vertical);
-  }
-
-  dt.sig {
-    box-sizing: border-box;
-    font-size: 0.9rem;
-    padding: var(--ifm-spacing-vertical);
-    border-radius: $borderRadius;
-    font-family: var(--ifm-font-family-monospace);
-    background-color: var(--ifm-background-surface-color);
-  }
-
-  > dd {
-    &:not(:empty) {
-      padding-top: calc(var(--ifm-spacing-vertical) / 2);
-      margin-top: 0;
-      margin-left: var(--ifm-spacing-horizontal);
-      margin-right: var(--ifm-spacing-horizontal);
-    }
-  }
-
-  // e.g. `class`, or `def`
-  em.property {
-    color: var(--ifm-font-color-base);
-    font-weight: bold;
-  }
-
-  // e.g. `MyClass`
-  span.sig-name {
-    color: #2774b3;
-    font-weight: bold;
-  }
-
-  // e.g classmethod
-  em.property {
-    color: #66b327;
-  }
-
-  em.sig-param {
-    span.default_value {
-      color: #66b327;
-    }
-  }
-
-  span.sig-return {
-    span.sig-return-typehint {
-      color: var(--ifm-font-color-base);
-
-      pre {
-        color: var(--ifm-font-color-base);
-      }
-    }
-  }
-
-  dl.field-list {
-    padding-top: calc(var(--ifm-spacing-vertical) / 2);
-    display: grid;
-    grid-template-columns: fit-content(30%) auto;
-    &:not(:first-child) {
-      border-top: 1px solid var(--ifm-hr-border-color);
+    &:has(li strong) {
+      background-color: var(--ifm-background-surface-color);
+      border-radius: 8px;
+      padding: 1rem 1rem 1rem 2rem;
+      box-shadow: 0 1px 3px rgba(0,0,0,0.06);
     }
 
-    dt {
-      margin-right: 0.5em;
-    }
+    li {
+      margin-bottom: 0.75rem;
 
-    dd {
-      font-family: var(--ifm-font-family-monospace);
-    }
-
-    dt,
-    dd {
-      margin-left: 0;
-      padding-left: 0;
-
-      &:not(:first-of-type) {
-        border-top: 1px solid var(--ifm-hr-border-color);
-        padding-top: var(--ifm-spacing-vertical);
-      }
-      &:not(:last-of-type) {
-        padding-bottom: var(--ifm-spacing-vertical);
+      strong {
+        display: inline-block;
+        color: #2774b3;
+        font-weight: 600;
+        min-width: 90px;
       }
 
-      ul {
-        list-style-type: none;
-        padding-left: 0;
-        li {
-          p {
-            margin: 0;
-            padding: 0;
-          }
-        }
+      code {
+        background: transparent;
+        color: #66b327;
+        padding: 0;
       }
     }
+
+    // nested ul for params or return type formatting
+    ul {
+      margin-top: 0.5rem;
+      margin-bottom: 0.5rem;
+      padding-left: 1rem;
+      border-left: 2px dotted var(--ifm-color-emphasis-300);
+    }
+  }
+
+  // Return type section
+  li:has(strong:contains("Return type")) {
+    font-style: italic;
+    color: var(--ifm-font-color-secondary);
+  }
+
+  // Parameter type callouts like (Optional[str])
+  em > code {
+    color: #c678dd;
   }
 }