Extend process_notebooks for testing (#1789)

* Extend process_notebooks for testing * add command * spelling and lint * update docs * Update contributing.md * add shebang * Update contributing.md * lint
2025-12-26 14:38:50 +00:00 · 2024-02-29 15:47:30 -05:00 · 2024-02-29 15:47:30 -05:00 · f6c9b13ac4
commit f6c9b13ac4
parent 4d0d486115
7 changed files with 342 additions and 65 deletions
--- a/.github/workflows/deploy-website.yml
+++ b/.github/workflows/deploy-website.yml
@ -52,7 +52,7 @@ jobs:
          quarto render .
      - name: Process notebooks
        run: |
-          python process_notebooks.py
+          python process_notebooks.py render
      - name: Test Build
        run: |
          if [ -e yarn.lock ]; then
@ -98,7 +98,7 @@ jobs:
          quarto render .
      - name: Process notebooks
        run: |
-          python process_notebooks.py
+          python process_notebooks.py render
      - name: Build website
        run: |
          if [ -e yarn.lock ]; then
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@ -3036,7 +3036,8 @@
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
-  }
+},
+"test_skip": "Requires interactive usage"
 },
 "nbformat": 4,
 "nbformat_minor": 4
--- a/notebook/contributing.md
+++ b/notebook/contributing.md
@ -74,3 +74,37 @@ Learn more about configuring LLMs for agents [here](/docs/llm_configuration).
 :::
 ````
 ``````
+
+## Testing
+
+Notebooks can be tested by running:
+
+```sh
+python website/process_notebooks.py test
+```
+
+This will automatically scan for all notebooks in the notebook/ and website/ dirs.
+
+To test a specific notebook pass its path:
+
+```sh
+python website/process_notebooks.py test notebook/agentchat_logging.ipynb
+```
+
+Options:
+- `--timeout` - timeout for a single notebook
+- `--exit-on-first-fail` - stop executing further notebooks after the first one fails
+
+### Skip tests
+
+If a notebook needs to be skipped then add to the notebook metadata:
+```json
+{
+    "...": "...",
+    "metadata": {
+        "test_skip": "REASON"
+    }
+}
+```
+
+Note: Notebook metadata can be edited by opening the notebook in a text editor (Or "Open With..." -> "Text Editor" in VSCode)
--- a/website/README.md
+++ b/website/README.md
@ -33,8 +33,7 @@ Navigate to the `website` folder and run:

 ```console
 pydoc-markdown
-quarto render ./docs
-python ./process_notebooks.py
+python ./process_notebooks.py render
 yarn start
 ```

--- a/website/build_website.sh
+++ b/website/build_website.sh
@ -28,11 +28,8 @@ fi
 # Generate documentation using pydoc-markdown
 pydoc-markdown

-# Render the website using Quarto
-quarto render ./docs
-
 # Process notebooks using a Python script
-python ./process_notebooks.py
+python ./process_notebooks.py render

 # Start the website using yarn
 yarn start
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@ -175,6 +175,8 @@ Tests for the `autogen.agentchat.contrib` module may be skipped automatically if
 required dependencies are not installed. Please consult the documentation for
 each contrib module to see what dependencies are required.

+See [here](https://github.com/microsoft/autogen/blob/main/notebook/contributing.md#testing) for how to run notebook tests.
+
 #### Skip flags for tests

 - `--skip-openai` for skipping tests that require access to OpenAI services.
@ -216,11 +218,11 @@ Then:

 ```console
 npm install --global yarn  # skip if you use the dev container we provided
-pip install pydoc-markdown  # skip if you use the dev container we provided
+pip install pydoc-markdown pyyaml termcolor # skip if you use the dev container we provided
 cd website
 yarn install --frozen-lockfile --ignore-engines
 pydoc-markdown
-quarto render ./docs
+python process_notebooks.py render
 yarn start
 ```

@ -245,7 +247,7 @@ Once at the CLI in Docker run the following commands:
 cd website
 yarn install --frozen-lockfile --ignore-engines
 pydoc-markdown
-quarto render ./docs
+python process_notebooks.py render
 yarn start --host 0.0.0.0 --port 3000
 ```

--- a/website/process_notebooks.py
+++ b/website/process_notebooks.py
@ -1,11 +1,24 @@
+#!/usr/bin/env python
+
+from __future__ import annotations
+import signal
 import sys
 from pathlib import Path
 import subprocess
 import argparse
 import shutil
 import json
+import tempfile
+import threading
+import time
 import typing
 import concurrent.futures
+import os
+
+from typing import Optional, Tuple, Union
+from dataclasses import dataclass
+
+from multiprocessing import current_process

 try:
    import yaml
@ -13,6 +26,27 @@ except ImportError:
    print("pyyaml not found.\n\nPlease install pyyaml:\n\tpip install pyyaml\n")
    sys.exit(1)

+try:
+    import nbclient
+    from nbclient.client import (
+        CellExecutionError,
+        CellTimeoutError,
+        NotebookClient,
+    )
+except ImportError:
+    if current_process().name == "MainProcess":
+        print("nbclient not found.\n\nPlease install nbclient:\n\tpip install nbclient\n")
+        print("test won't work without nbclient")
+
+try:
+    import nbformat
+    from nbformat import NotebookNode
+except ImportError:
+    if current_process().name == "MainProcess":
+        print("nbformat not found.\n\nPlease install nbformat:\n\tpip install nbformat\n")
+        print("test won't work without nbclient")
+
+
 try:
    from termcolor import colored
 except ImportError:
@ -28,7 +62,7 @@ class Result:
        self.stderr = stderr


-def check_quarto_bin(quarto_bin: str = "quarto"):
+def check_quarto_bin(quarto_bin: str = "quarto") -> None:
    """Check if quarto is installed."""
    try:
        subprocess.check_output([quarto_bin, "--version"])
@ -72,6 +106,17 @@ def extract_yaml_from_notebook(notebook: Path) -> typing.Optional[typing.Dict]:

 def skip_reason_or_none_if_ok(notebook: Path) -> typing.Optional[str]:
    """Return a reason to skip the notebook, or None if it should not be skipped."""
+
+    if notebook.suffix != ".ipynb":
+        return "not a notebook"
+
+    if not notebook.exists():
+        return "file does not exist"
+
+    # Extra checks for notebooks in the notebook directory
+    if "notebook" not in notebook.parts:
+        return None
+
    with open(notebook, "r", encoding="utf-8") as f:
        content = f.read()

@ -121,56 +166,166 @@ def skip_reason_or_none_if_ok(notebook: Path) -> typing.Optional[str]:
    return None


-def process_notebook(src_notebook: Path, dest_dir: Path, quarto_bin: str, dry_run: bool) -> str:
+def process_notebook(src_notebook: Path, website_dir: Path, notebook_dir: Path, quarto_bin: str, dry_run: bool) -> str:
    """Process a single notebook."""
-    reason_or_none = skip_reason_or_none_if_ok(src_notebook)
-    if reason_or_none:
-        return colored(f"Skipping {src_notebook.name}, reason: {reason_or_none}", "yellow")

-    target_mdx_file = dest_dir / f"{src_notebook.stem}.mdx"
-    intermediate_notebook = dest_dir / f"{src_notebook.stem}.ipynb"
+    in_notebook_dir = "notebook" in src_notebook.parts

-    # If the intermediate_notebook already exists, check if it is newer than the source file
-    if target_mdx_file.exists():
-        if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
-            return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")
+    if in_notebook_dir:
+        relative_notebook = src_notebook.relative_to(notebook_dir)
+        dest_dir = notebooks_target_dir(website_directory=website_dir)
+        target_mdx_file = dest_dir / relative_notebook.with_suffix(".mdx")
+        intermediate_notebook = dest_dir / relative_notebook

-    if dry_run:
-        return colored(f"Would process {src_notebook.name}", "green")
+        # If the intermediate_notebook already exists, check if it is newer than the source file
+        if target_mdx_file.exists():
+            if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
+                return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")

-    # Copy notebook to target dir
-    # The reason we copy the notebook is that quarto does not support rendering from a different directory
-    shutil.copy(src_notebook, intermediate_notebook)
+        if dry_run:
+            return colored(f"Would process {src_notebook.name}", "green")

-    # Check if another file has to be copied too
-    # Solely added for the purpose of agent_library_example.json
-    front_matter = extract_yaml_from_notebook(src_notebook)
-    # Should not be none at this point as we have already done the same checks as in extract_yaml_from_notebook
-    assert front_matter is not None, f"Front matter is None for {src_notebook.name}"
-    if "extra_files_to_copy" in front_matter:
-        for file in front_matter["extra_files_to_copy"]:
-            shutil.copy(src_notebook.parent / file, dest_dir / file)
+        # Copy notebook to target dir
+        # The reason we copy the notebook is that quarto does not support rendering from a different directory
+        shutil.copy(src_notebook, intermediate_notebook)

-    # Capture output
-    result = subprocess.run(
-        [quarto_bin, "render", intermediate_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
-    )
-    if result.returncode != 0:
-        return colored(f"Failed to render {intermediate_notebook}", "red") + f"\n{result.stderr}" + f"\n{result.stdout}"
+        # Check if another file has to be copied too
+        # Solely added for the purpose of agent_library_example.json
+        front_matter = extract_yaml_from_notebook(src_notebook)
+        # Should not be none at this point as we have already done the same checks as in extract_yaml_from_notebook
+        assert front_matter is not None, f"Front matter is None for {src_notebook.name}"
+        if "extra_files_to_copy" in front_matter:
+            for file in front_matter["extra_files_to_copy"]:
+                shutil.copy(src_notebook.parent / file, dest_dir / file)

-    # Unlink intermediate files
-    intermediate_notebook.unlink()
+        # Capture output
+        result = subprocess.run(
+            [quarto_bin, "render", intermediate_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        if result.returncode != 0:
+            return (
+                colored(f"Failed to render {intermediate_notebook}", "red")
+                + f"\n{result.stderr}"
+                + f"\n{result.stdout}"
+            )

-    if "extra_files_to_copy" in front_matter:
-        for file in front_matter["extra_files_to_copy"]:
-            (dest_dir / file).unlink()
+        # Unlink intermediate files
+        intermediate_notebook.unlink()

-    # Post process the file
-    post_process_mdx(target_mdx_file)
+        if "extra_files_to_copy" in front_matter:
+            for file in front_matter["extra_files_to_copy"]:
+                (dest_dir / file).unlink()
+
+        # Post process the file
+        post_process_mdx(target_mdx_file)
+    else:
+        target_mdx_file = src_notebook.with_suffix(".mdx")
+
+        # If the intermediate_notebook already exists, check if it is newer than the source file
+        if target_mdx_file.exists():
+            if target_mdx_file.stat().st_mtime > src_notebook.stat().st_mtime:
+                return colored(f"Skipping {src_notebook.name}, as target file is newer", "blue")
+
+        if dry_run:
+            return colored(f"Would process {src_notebook.name}", "green")
+
+        result = subprocess.run(
+            [quarto_bin, "render", src_notebook], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+        )
+        if result.returncode != 0:
+            return colored(f"Failed to render {src_notebook}", "red") + f"\n{result.stderr}" + f"\n{result.stdout}"

    return colored(f"Processed {src_notebook.name}", "green")


+# Notebook execution based on nbmake: https://github.com/treebeardtech/nbmakes
+@dataclass
+class NotebookError:
+    error_name: str
+    error_value: Optional[str]
+    traceback: str
+    cell_source: str
+
+
+@dataclass
+class NotebookSkip:
+    reason: str
+
+
+NB_VERSION = 4
+
+
+def test_notebook(notebook_path: Path, timeout: int = 300) -> Tuple[Path, Optional[Union[NotebookError, NotebookSkip]]]:
+    nb = nbformat.read(str(notebook_path), NB_VERSION)
+
+    allow_errors = False
+    if "execution" in nb.metadata:
+        if "timeout" in nb.metadata.execution:
+            timeout = nb.metadata.execution.timeout
+        if "allow_errors" in nb.metadata.execution:
+            allow_errors = nb.metadata.execution.allow_errors
+
+    if "test_skip" in nb.metadata:
+        return notebook_path, NotebookSkip(reason=nb.metadata.test_skip)
+
+    try:
+        c = NotebookClient(
+            nb,
+            timeout=timeout,
+            allow_errors=allow_errors,
+            record_timing=True,
+        )
+        os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "1"
+        os.environ["TOKENIZERS_PARALLELISM"] = "false"
+        with tempfile.TemporaryDirectory() as tempdir:
+            c.execute(cwd=tempdir)
+    except CellExecutionError:
+        error = get_error_info(nb)
+        assert error is not None
+        return notebook_path, error
+    except CellTimeoutError:
+        error = get_timeout_info(nb)
+        assert error is not None
+        return notebook_path, error
+
+    return notebook_path, None
+
+
+# Find the first code cell which did not complete.
+def get_timeout_info(
+    nb: NotebookNode,
+) -> Optional[NotebookError]:
+    for i, cell in enumerate(nb.cells):
+        if cell.cell_type != "code":
+            continue
+        if "shell.execute_reply" not in cell.metadata.execution:
+            return NotebookError(
+                error_name="timeout",
+                error_value="",
+                traceback="",
+                cell_source="".join(cell["source"]),
+            )
+
+    return None
+
+
+def get_error_info(nb: NotebookNode) -> Optional[NotebookError]:
+    for cell in nb["cells"]:  # get LAST error
+        if cell["cell_type"] != "code":
+            continue
+        errors = [output for output in cell["outputs"] if output["output_type"] == "error" or "ename" in output]
+
+        if errors:
+            traceback = "\n".join(errors[0].get("traceback", ""))
+            return NotebookError(
+                error_name=errors[0].get("ename", ""),
+                error_value=errors[0].get("evalue", ""),
+                traceback=traceback,
+                cell_source="".join(cell["source"]),
+            )
+    return None
+
+
 # rendered_notebook is the final mdx file
 def post_process_mdx(rendered_mdx: Path) -> None:
    notebook_name = f"{rendered_mdx.stem}.ipynb"
@ -234,9 +389,32 @@ def path(path_str: str) -> Path:
    return Path(path_str)


-def main():
+def collect_notebooks(notebook_directory: Path, website_directory: Path) -> typing.List[Path]:
+    notebooks = list(notebook_directory.glob("*.ipynb"))
+    notebooks.extend(list(website_directory.glob("docs/**/*.ipynb")))
+    return notebooks
+
+
+def start_thread_to_terminate_when_parent_process_dies(ppid: int):
+    pid = os.getpid()
+
+    def f() -> None:
+        while True:
+            try:
+                os.kill(ppid, 0)
+            except OSError:
+                os.kill(pid, signal.SIGTERM)
+            time.sleep(1)
+
+    thread = threading.Thread(target=f, daemon=True)
+    thread.start()
+
+
+def main() -> None:
    script_dir = Path(__file__).parent.absolute()
    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(dest="subcommand")
+
    parser.add_argument(
        "--notebook-directory",
        type=path,
@ -246,29 +424,95 @@ def main():
    parser.add_argument(
        "--website-directory", type=path, help="Root directory of docusarus website", default=script_dir
    )
-    parser.add_argument("--quarto-bin", help="Path to quarto binary", default="quarto")
-    parser.add_argument("--dry-run", help="Don't render", action="store_true")
    parser.add_argument("--workers", help="Number of workers to use", type=int, default=-1)

-    args = parser.parse_args()
+    render_parser = subparsers.add_parser("render")
+    render_parser.add_argument("--quarto-bin", help="Path to quarto binary", default="quarto")
+    render_parser.add_argument("--dry-run", help="Don't render", action="store_true")
+    render_parser.add_argument("notebooks", type=path, nargs="*", default=None)

+    test_parser = subparsers.add_parser("test")
+    test_parser.add_argument("--timeout", help="Timeout for each notebook", type=int, default=60)
+    test_parser.add_argument("--exit-on-first-fail", "-e", help="Exit after first test fail", action="store_true")
+    test_parser.add_argument("notebooks", type=path, nargs="*", default=None)
+
+    args = parser.parse_args()
    if args.workers == -1:
        args.workers = None

-    check_quarto_bin(args.quarto_bin)
+    if args.subcommand is None:
+        print("No subcommand specified")
+        sys.exit(1)

-    if not notebooks_target_dir(args.website_directory).exists():
-        notebooks_target_dir(args.website_directory).mkdir(parents=True)
+    if args.notebooks:
+        collected_notebooks = args.notebooks
+    else:
+        collected_notebooks = collect_notebooks(args.notebook_directory, args.website_directory)

-    with concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) as executor:
-        futures = [
-            executor.submit(
-                process_notebook, f, notebooks_target_dir(args.website_directory), args.quarto_bin, args.dry_run
-            )
-            for f in args.notebook_directory.glob("*.ipynb")
-        ]
-        for future in concurrent.futures.as_completed(futures):
-            print(future.result())
+    filtered_notebooks = []
+    for notebook in collected_notebooks:
+        reason = skip_reason_or_none_if_ok(notebook)
+        if reason:
+            print(f"{colored('[Skip]', 'yellow')} {colored(notebook.name, 'blue')}: {reason}")
+        else:
+            filtered_notebooks.append(notebook)
+
+    print(f"Processing {len(filtered_notebooks)} notebook{'s' if len(filtered_notebooks) != 1 else ''}...")
+
+    if args.subcommand == "test":
+        failure = False
+        with concurrent.futures.ProcessPoolExecutor(
+            max_workers=args.workers,
+            initializer=start_thread_to_terminate_when_parent_process_dies,
+            initargs=(os.getpid(),),
+        ) as executor:
+            futures = [executor.submit(test_notebook, f, args.timeout) for f in filtered_notebooks]
+            for future in concurrent.futures.as_completed(futures):
+                notebook, optional_error_or_skip = future.result()
+                if isinstance(optional_error_or_skip, NotebookError):
+                    if optional_error_or_skip.error_name == "timeout":
+                        print(
+                            f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.error_name}"
+                        )
+
+                    else:
+                        print("-" * 80)
+                        print(
+                            f"{colored('[Error]', 'red')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.error_name} - {optional_error_or_skip.error_value}"
+                        )
+                        print(optional_error_or_skip.traceback)
+                        print("-" * 80)
+                    if args.exit_on_first_fail:
+                        sys.exit(1)
+                    failure = True
+                elif isinstance(optional_error_or_skip, NotebookSkip):
+                    print(
+                        f"{colored('[Skip]', 'yellow')} {colored(notebook.name, 'blue')}: {optional_error_or_skip.reason}"
+                    )
+                else:
+                    print(f"{colored('[OK]', 'green')} {colored(notebook.name, 'blue')}")
+
+        if failure:
+            sys.exit(1)
+
+    elif args.subcommand == "render":
+        check_quarto_bin(args.quarto_bin)
+
+        if not notebooks_target_dir(args.website_directory).exists():
+            notebooks_target_dir(args.website_directory).mkdir(parents=True)
+
+        with concurrent.futures.ProcessPoolExecutor(max_workers=args.workers) as executor:
+            futures = [
+                executor.submit(
+                    process_notebook, f, args.website_directory, args.notebook_directory, args.quarto_bin, args.dry_run
+                )
+                for f in filtered_notebooks
+            ]
+            for future in concurrent.futures.as_completed(futures):
+                print(future.result())
+    else:
+        print("Unknown subcommand")
+        sys.exit(1)


 if __name__ == "__main__":