autogen/test/test_browser_utils.py

import pytest
import os
import sys
import requests
import hashlib
import re
import math

from agentchat.test_assistant_agent import KEY_LOC  # noqa: E402

BLOG_POST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"
BLOG_POST_TITLE = "Does Model and Inference Parameter Matter in LLM Applications? - A Case Study for MATH | AutoGen"
BLOG_POST_STRING = "Large language models (LLMs) are powerful tools that can generate natural language texts for various applications, such as chatbots, summarization, translation, and more. GPT-4 is currently the state of the art LLM in the world. Is model selection irrelevant? What about inference parameters?"

WIKIPEDIA_URL = "https://en.wikipedia.org/wiki/Microsoft"
WIKIPEDIA_TITLE = "Microsoft - Wikipedia"
WIKIPEDIA_STRING = "Redmond"

PLAIN_TEXT_URL = "https://raw.githubusercontent.com/microsoft/autogen/main/README.md"
IMAGE_URL = "https://github.com/afourney.png"

PDF_URL = "https://arxiv.org/pdf/2308.08155.pdf"
PDF_STRING = "Figure 1: AutoGen enables diverse LLM-based applications using multi-agent conversations."

BING_QUERY = "Microsoft"
BING_TITLE = f"{BING_QUERY} - Search"
BING_STRING = f"A Bing search for '{BING_QUERY}' found"

try:
    from autogen.browser_utils import SimpleTextBrowser
except ImportError:
    skip_all = True
else:
    skip_all = False

try:
    BING_API_KEY = os.environ["BING_API_KEY"]
except KeyError:
    skip_bing = True
else:
    skip_bing = False


def _rm_folder(path):
    """Remove all the regular files in a folder, then deletes the folder. Assumes a flat file structure, with no subdirectories."""
    for fname in os.listdir(path):
        fpath = os.path.join(path, fname)
        if os.path.isfile(fpath):
            os.unlink(fpath)
    os.rmdir(path)


@pytest.mark.skipif(
    skip_all,
    reason="do not run if dependency is not installed",
)
def test_simple_text_browser():
    # Create a downloads folder (removing any leftover ones from prior tests)
    downloads_folder = os.path.join(KEY_LOC, "downloads")
    if os.path.isdir(downloads_folder):
        _rm_folder(downloads_folder)
    os.mkdir(downloads_folder)

    # Instantiate the browser
    user_agent = "python-requests/" + requests.__version__
    viewport_size = 1024
    browser = SimpleTextBrowser(
        downloads_folder=downloads_folder,
        viewport_size=viewport_size,
        request_kwargs={
            "headers": {"User-Agent": user_agent},
        },
    )

    # Test that we can visit a page and find what we expect there
    top_viewport = browser.visit_page(BLOG_POST_URL)
    assert browser.viewport == top_viewport
    assert browser.page_title.strip() == BLOG_POST_TITLE.strip()
    assert BLOG_POST_STRING in browser.page_content

    # Check if page splitting works
    approx_pages = math.ceil(len(browser.page_content) / viewport_size)  # May be fewer, since it aligns to word breaks
    assert len(browser.viewport_pages) <= approx_pages
    assert abs(len(browser.viewport_pages) - approx_pages) <= 1  # allow only a small deviation
    assert browser.viewport_pages[0][0] == 0
    assert browser.viewport_pages[-1][1] == len(browser.page_content)

    # Make sure we can reconstruct the full contents from the split pages
    buffer = ""
    for bounds in browser.viewport_pages:
        buffer += browser.page_content[bounds[0] : bounds[1]]
    assert buffer == browser.page_content

    # Test scrolling (scroll all the way to the bottom)
    for i in range(1, len(browser.viewport_pages)):
        browser.page_down()
        assert browser.viewport_current_page == i
    # Test scrolloing beyond the limits
    for i in range(0, 5):
        browser.page_down()
        assert browser.viewport_current_page == len(browser.viewport_pages) - 1

    # Test scrolling (scroll all the way to the bottom)
    for i in range(len(browser.viewport_pages) - 2, 0, -1):
        browser.page_up()
        assert browser.viewport_current_page == i
    # Test scrolloing beyond the limits
    for i in range(0, 5):
        browser.page_up()
        assert browser.viewport_current_page == 0

    # Test Wikipedia handling
    assert WIKIPEDIA_STRING in browser.visit_page(WIKIPEDIA_URL)
    assert WIKIPEDIA_TITLE.strip() == browser.page_title.strip()

    # Visit a plain-text file
    response = requests.get(PLAIN_TEXT_URL)
    response.raise_for_status()
    expected_results = response.text

    browser.visit_page(PLAIN_TEXT_URL)
    assert browser.page_content.strip() == expected_results.strip()

    # Directly download an image, and compute its md5
    response = requests.get(IMAGE_URL, stream=True)
    response.raise_for_status()
    expected_md5 = hashlib.md5(response.raw.read()).hexdigest()

    # Visit an image causing it to be downloaded by the SimpleTextBrowser, then compute its md5
    viewport = browser.visit_page(IMAGE_URL)
    m = re.search(r"Downloaded '(.*?)' to '(.*?)'", viewport)
    fetched_url = m.group(1)
    download_loc = m.group(2)
    assert fetched_url == IMAGE_URL

    with open(download_loc, "rb") as fh:
        downloaded_md5 = hashlib.md5(fh.read()).hexdigest()

    # MD%s should match
    assert expected_md5 == downloaded_md5

    # Fetch a PDF
    viewport = browser.visit_page(PDF_URL)
    assert PDF_STRING in viewport

    # Clean up
    _rm_folder(downloads_folder)


@pytest.mark.skipif(
    skip_bing,
    reason="do not run bing tests if key is missing",
)
def test_bing_search():
    # Instantiate the browser
    user_agent = "python-requests/" + requests.__version__
    browser = SimpleTextBrowser(
        bing_api_key=BING_API_KEY,
        viewport_size=1024,
        request_kwargs={
            "headers": {"User-Agent": user_agent},
        },
    )

    assert BING_STRING in browser.visit_page("bing: " + BING_QUERY)
    assert BING_TITLE == browser.page_title
    assert len(browser.viewport_pages) == 1
    assert browser.viewport_pages[0] == (0, len(browser.page_content))


if __name__ == "__main__":
    """Runs this file's tests from the command line."""
    test_simple_text_browser()
    test_bing_search()
Add a web surfer agent that can search and browse the web. (#1093) * Initial commit of WebSurfer. Adds the browser_utils, and related tests. WebSurfer will be added in a subsequent commit. * Added the web surfer agent, and related tests. * Added a notebook to show how WebSurferAgent works. * Fixed a typo. * Updated test_web_surfer for compatibility with #1110. * Updated skip_oai logic. * Fixed code formatting. * More pre-commit fixes. * Added block to contrib-openai.yml * Added block to contrib-openai.yml * Added hook for BING_API_KEY * Temporarily commented out other tests, per request. * Fixed indentation (maybe?) * Restoring contrib-openai.yml 2024-01-21 19:43:15 -08:00			`import pytest`
			`import os`
			`import sys`
			`import requests`
			`import hashlib`
			`import re`
bump version to 0.2.9 (#1430) * Fixed rounding bug. * bump version to 0.2.9 * fix format error * simplify contrib tests --------- Co-authored-by: Adam Fourney <adamfo@microsoft.com> 2024-01-27 10:49:24 -08:00			`import math`
Add a web surfer agent that can search and browse the web. (#1093) * Initial commit of WebSurfer. Adds the browser_utils, and related tests. WebSurfer will be added in a subsequent commit. * Added the web surfer agent, and related tests. * Added a notebook to show how WebSurferAgent works. * Fixed a typo. * Updated test_web_surfer for compatibility with #1110. * Updated skip_oai logic. * Fixed code formatting. * More pre-commit fixes. * Added block to contrib-openai.yml * Added block to contrib-openai.yml * Added hook for BING_API_KEY * Temporarily commented out other tests, per request. * Fixed indentation (maybe?) * Restoring contrib-openai.yml 2024-01-21 19:43:15 -08:00
			`from agentchat.test_assistant_agent import KEY_LOC # noqa: E402`

			`BLOG_POST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"`
			`BLOG_POST_TITLE = "Does Model and Inference Parameter Matter in LLM Applications? - A Case Study for MATH \| AutoGen"`
			`BLOG_POST_STRING = "Large language models (LLMs) are powerful tools that can generate natural language texts for various applications, such as chatbots, summarization, translation, and more. GPT-4 is currently the state of the art LLM in the world. Is model selection irrelevant? What about inference parameters?"`

			`WIKIPEDIA_URL = "https://en.wikipedia.org/wiki/Microsoft"`
			`WIKIPEDIA_TITLE = "Microsoft - Wikipedia"`
			`WIKIPEDIA_STRING = "Redmond"`

			`PLAIN_TEXT_URL = "https://raw.githubusercontent.com/microsoft/autogen/main/README.md"`
			`IMAGE_URL = "https://github.com/afourney.png"`

			`PDF_URL = "https://arxiv.org/pdf/2308.08155.pdf"`
			`PDF_STRING = "Figure 1: AutoGen enables diverse LLM-based applications using multi-agent conversations."`

			`BING_QUERY = "Microsoft"`
			`BING_TITLE = f"{BING_QUERY} - Search"`
			`BING_STRING = f"A Bing search for '{BING_QUERY}' found"`

			`try:`
			`from autogen.browser_utils import SimpleTextBrowser`
			`except ImportError:`
			`skip_all = True`
			`else:`
			`skip_all = False`

			`try:`
			`BING_API_KEY = os.environ["BING_API_KEY"]`
			`except KeyError:`
			`skip_bing = True`
			`else:`
			`skip_bing = False`


			`def _rm_folder(path):`
			`"""Remove all the regular files in a folder, then deletes the folder. Assumes a flat file structure, with no subdirectories."""`
			`for fname in os.listdir(path):`
			`fpath = os.path.join(path, fname)`
			`if os.path.isfile(fpath):`
			`os.unlink(fpath)`
			`os.rmdir(path)`


			`@pytest.mark.skipif(`
			`skip_all,`
			`reason="do not run if dependency is not installed",`
			`)`
			`def test_simple_text_browser():`
			`# Create a downloads folder (removing any leftover ones from prior tests)`
			`downloads_folder = os.path.join(KEY_LOC, "downloads")`
			`if os.path.isdir(downloads_folder):`
			`_rm_folder(downloads_folder)`
			`os.mkdir(downloads_folder)`

			`# Instantiate the browser`
			`user_agent = "python-requests/" + requests.__version__`
			`viewport_size = 1024`
			`browser = SimpleTextBrowser(`
			`downloads_folder=downloads_folder,`
			`viewport_size=viewport_size,`
			`request_kwargs={`
			`"headers": {"User-Agent": user_agent},`
			`},`
			`)`

			`# Test that we can visit a page and find what we expect there`
			`top_viewport = browser.visit_page(BLOG_POST_URL)`
			`assert browser.viewport == top_viewport`
			`assert browser.page_title.strip() == BLOG_POST_TITLE.strip()`
			`assert BLOG_POST_STRING in browser.page_content`

			`# Check if page splitting works`
bump version to 0.2.9 (#1430) * Fixed rounding bug. * bump version to 0.2.9 * fix format error * simplify contrib tests --------- Co-authored-by: Adam Fourney <adamfo@microsoft.com> 2024-01-27 10:49:24 -08:00			`approx_pages = math.ceil(len(browser.page_content) / viewport_size) # May be fewer, since it aligns to word breaks`
Add a web surfer agent that can search and browse the web. (#1093) * Initial commit of WebSurfer. Adds the browser_utils, and related tests. WebSurfer will be added in a subsequent commit. * Added the web surfer agent, and related tests. * Added a notebook to show how WebSurferAgent works. * Fixed a typo. * Updated test_web_surfer for compatibility with #1110. * Updated skip_oai logic. * Fixed code formatting. * More pre-commit fixes. * Added block to contrib-openai.yml * Added block to contrib-openai.yml * Added hook for BING_API_KEY * Temporarily commented out other tests, per request. * Fixed indentation (maybe?) * Restoring contrib-openai.yml 2024-01-21 19:43:15 -08:00			`assert len(browser.viewport_pages) <= approx_pages`
			`assert abs(len(browser.viewport_pages) - approx_pages) <= 1 # allow only a small deviation`
			`assert browser.viewport_pages[0][0] == 0`
			`assert browser.viewport_pages[-1][1] == len(browser.page_content)`

			`# Make sure we can reconstruct the full contents from the split pages`
			`buffer = ""`
			`for bounds in browser.viewport_pages:`
			`buffer += browser.page_content[bounds[0] : bounds[1]]`
			`assert buffer == browser.page_content`

			`# Test scrolling (scroll all the way to the bottom)`
			`for i in range(1, len(browser.viewport_pages)):`
			`browser.page_down()`
			`assert browser.viewport_current_page == i`
			`# Test scrolloing beyond the limits`
			`for i in range(0, 5):`
			`browser.page_down()`
			`assert browser.viewport_current_page == len(browser.viewport_pages) - 1`

			`# Test scrolling (scroll all the way to the bottom)`
			`for i in range(len(browser.viewport_pages) - 2, 0, -1):`
			`browser.page_up()`
			`assert browser.viewport_current_page == i`
			`# Test scrolloing beyond the limits`
			`for i in range(0, 5):`
			`browser.page_up()`
			`assert browser.viewport_current_page == 0`

			`# Test Wikipedia handling`
			`assert WIKIPEDIA_STRING in browser.visit_page(WIKIPEDIA_URL)`
			`assert WIKIPEDIA_TITLE.strip() == browser.page_title.strip()`

			`# Visit a plain-text file`
			`response = requests.get(PLAIN_TEXT_URL)`
			`response.raise_for_status()`
			`expected_results = response.text`

			`browser.visit_page(PLAIN_TEXT_URL)`
			`assert browser.page_content.strip() == expected_results.strip()`

			`# Directly download an image, and compute its md5`
			`response = requests.get(IMAGE_URL, stream=True)`
			`response.raise_for_status()`
			`expected_md5 = hashlib.md5(response.raw.read()).hexdigest()`

			`# Visit an image causing it to be downloaded by the SimpleTextBrowser, then compute its md5`
			`viewport = browser.visit_page(IMAGE_URL)`
			`m = re.search(r"Downloaded '(.?)' to '(.?)'", viewport)`
			`fetched_url = m.group(1)`
			`download_loc = m.group(2)`
			`assert fetched_url == IMAGE_URL`

			`with open(download_loc, "rb") as fh:`
			`downloaded_md5 = hashlib.md5(fh.read()).hexdigest()`

			`# MD%s should match`
			`assert expected_md5 == downloaded_md5`

			`# Fetch a PDF`
			`viewport = browser.visit_page(PDF_URL)`
			`assert PDF_STRING in viewport`

			`# Clean up`
			`_rm_folder(downloads_folder)`


			`@pytest.mark.skipif(`
			`skip_bing,`
			`reason="do not run bing tests if key is missing",`
			`)`
			`def test_bing_search():`
			`# Instantiate the browser`
			`user_agent = "python-requests/" + requests.__version__`
			`browser = SimpleTextBrowser(`
			`bing_api_key=BING_API_KEY,`
			`viewport_size=1024,`
			`request_kwargs={`
			`"headers": {"User-Agent": user_agent},`
			`},`
			`)`

			`assert BING_STRING in browser.visit_page("bing: " + BING_QUERY)`
			`assert BING_TITLE == browser.page_title`
			`assert len(browser.viewport_pages) == 1`
			`assert browser.viewport_pages[0] == (0, len(browser.page_content))`


			`if __name__ == "__main__":`
			`"""Runs this file's tests from the command line."""`
			`test_simple_text_browser()`
			`test_bing_search()`