2024-02-28 17:11:08 -08:00
|
|
|
#!/usr/bin/env python3 -m pytest
|
|
|
|
|
2024-01-21 19:43:15 -08:00
|
|
|
import os
|
|
|
|
import re
|
2024-04-05 10:26:06 +08:00
|
|
|
import sys
|
|
|
|
|
2024-01-21 19:43:15 -08:00
|
|
|
import pytest
|
2024-04-05 10:26:06 +08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
from autogen import UserProxyAgent, config_list_from_json
|
2024-01-21 19:43:15 -08:00
|
|
|
from autogen.oai.openai_utils import filter_config
|
|
|
|
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
|
2024-04-15 05:34:26 -07:00
|
|
|
from conftest import MOCK_OPEN_AI_API_KEY, reason, skip_openai # noqa: E402
|
2024-01-21 19:43:15 -08:00
|
|
|
|
|
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402
|
|
|
|
|
|
|
|
BLOG_POST_URL = "https://microsoft.github.io/autogen/blog/2023/04/21/LLM-tuning-math"
|
|
|
|
BLOG_POST_TITLE = "Does Model and Inference Parameter Matter in LLM Applications? - A Case Study for MATH | AutoGen"
|
|
|
|
BING_QUERY = "Microsoft"
|
|
|
|
|
|
|
|
try:
|
|
|
|
from autogen.agentchat.contrib.web_surfer import WebSurferAgent
|
|
|
|
except ImportError:
|
|
|
|
skip_all = True
|
|
|
|
else:
|
|
|
|
skip_all = False
|
|
|
|
|
|
|
|
try:
|
|
|
|
BING_API_KEY = os.environ["BING_API_KEY"]
|
|
|
|
except KeyError:
|
|
|
|
skip_bing = True
|
|
|
|
else:
|
|
|
|
skip_bing = False
|
|
|
|
|
2024-04-15 05:34:26 -07:00
|
|
|
if not skip_openai:
|
2024-01-21 19:43:15 -08:00
|
|
|
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
skip_all,
|
|
|
|
reason="do not run if dependency is not installed",
|
|
|
|
)
|
2024-02-10 06:09:08 +01:00
|
|
|
def test_web_surfer() -> None:
|
|
|
|
with pytest.MonkeyPatch.context() as mp:
|
|
|
|
# we mock the API key so we can register functions (llm_config must be present for this to work)
|
2024-02-14 10:51:38 -08:00
|
|
|
mp.setenv("OPENAI_API_KEY", MOCK_OPEN_AI_API_KEY)
|
2024-02-10 06:09:08 +01:00
|
|
|
page_size = 4096
|
|
|
|
web_surfer = WebSurferAgent(
|
Validate llm_config passed to ConversableAgent (issue #1522) (#1654)
* Validate llm_config passed to ConversableAgent
Based on #1522, this commit implements the additional validation checks in
`ConversableAgent.`
Add the following validation and `raise ValueError` if:
- The `llm_config` is `None`.
- The `llm_config` is valid, but `config_list` is missing or lacks elements.
- The `config_list` is valid, but no `model` is specified.
The rest of the changes are code churn to adjust or add the test cases.
* Validate llm_config passed to ConversableAgent
Based on #1522, this commit implements the additional validation checks in
`ConversableAgent.`
Add the following validation and `raise ValueError` if:
- The `llm_config` is `None` (validated in `ConversableAgent`).
- The `llm_config` has no `model` specified and `config_list` is empty
(validated in `OpenAIWrapper`).
- The `config_list` has at least one entry, but not all the entries have
the `model` is specified (validated in `OpenAIWrapper`).
The rest of the changes are code churn to adjust or add the test cases.
* Validate llm_config passed to ConversableAgent
Based on #1522, this commit implements the additional validation checks in
`ConversableAgent.`
Add the following validation and `raise ValueError` if:
- The `llm_config` is `None` (validated in `ConversableAgent`).
- The `llm_config` has no `model` specified and `config_list` is empty
(validated in `OpenAIWrapper`).
- The `config_list` has at least one entry, but not all the entries have
the `model` is specified (validated in `OpenAIWrapper`).
The rest of the changes are code churn to adjust or add the test cases.
* Validate llm_config passed to ConversableAgent
Based on #1522, this commit implements the additional validation checks in
`ConversableAgent.`
Add the following validation and `raise ValueError` if:
- The `llm_config` is `None` (validated in `ConversableAgent`).
- The `llm_config` has no `model` specified and `config_list` is empty
(validated in `OpenAIWrapper`).
- The `config_list` has at least one entry, but not all the entries have
the `model` is specified (validated in `OpenAIWrapper`).
The rest of the changes are code churn to adjust or add the test cases.
* Validate llm_config passed to ConversableAgent
Based on #1522, this commit implements the additional validation checks in
`ConversableAgent.`
Add the following validation and `raise ValueError` if:
- The `llm_config` is `None` (validated in `ConversableAgent`).
- The `llm_config` has no `model` specified and `config_list` is empty
(validated in `OpenAIWrapper`).
- The `config_list` has at least one entry, but not all the entries have
the `model` is specified (validated in `OpenAIWrapper`).
The rest of the changes are code churn to adjust or add the test cases.
* Fix the test_web_surfer issue
For anyone reading this: you need to `pip install markdownify` for the
`import WebSurferAgent` to succeed. That is needed to run the
`test_web_surfer.py` locally.
Test logic needs `llm_config` that is not `None` and that is not
`False`.
Let us pray that this works as part of GitHub actions ...
* One more fix for llm_config validation contract
2024-02-14 16:54:31 -08:00
|
|
|
"web_surfer",
|
|
|
|
llm_config={"model": "gpt-4", "config_list": []},
|
|
|
|
browser_config={"viewport_size": page_size},
|
2024-02-10 06:09:08 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
# Sneak a peak at the function map, allowing us to call the functions for testing here
|
|
|
|
function_map = web_surfer._user_proxy._function_map
|
|
|
|
|
|
|
|
# Test some basic navigations
|
|
|
|
response = function_map["visit_page"](BLOG_POST_URL)
|
|
|
|
assert f"Address: {BLOG_POST_URL}".strip() in response
|
|
|
|
assert f"Title: {BLOG_POST_TITLE}".strip() in response
|
|
|
|
|
|
|
|
# Test scrolling
|
|
|
|
m = re.search(r"\bViewport position: Showing page 1 of (\d+).", response)
|
|
|
|
total_pages = int(m.group(1)) # type: ignore[union-attr]
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
response = function_map["page_down"]()
|
|
|
|
assert (
|
|
|
|
f"Viewport position: Showing page 2 of {total_pages}." in response
|
|
|
|
) # Assumes the content is longer than one screen
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
response = function_map["page_up"]()
|
|
|
|
assert f"Viewport position: Showing page 1 of {total_pages}." in response
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
# Try to scroll too far back up
|
|
|
|
response = function_map["page_up"]()
|
|
|
|
assert f"Viewport position: Showing page 1 of {total_pages}." in response
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
# Try to scroll too far down
|
|
|
|
for i in range(0, total_pages + 1):
|
|
|
|
response = function_map["page_down"]()
|
|
|
|
assert f"Viewport position: Showing page {total_pages} of {total_pages}." in response
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
# Test Q&A and summarization -- we don't have a key so we expect it to fail (but it means the code path is correct)
|
|
|
|
with pytest.raises(IndexError):
|
WebSurfer Updated (Selenium, Playwright, and support for many filetypes) (#1929)
* Feat/headless browser (retargeted) (#1832)
* Add headless browser to the WebSurferAgent, closes #1481
* replace soup.get_text() with markdownify.MarkdownConverter().convert_soup(soup)
* import HeadlessChromeBrowser
* implicitly wait for 10s
* inicrease max. wait time to 99s
* fix: trim trailing whitespace
* test: fix headless tests
* better bing query search
* docs: add example 3 for headless option
---------
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
* Handle missing Selenium package.
* Added browser_chat.py example to simplify testing.
* Based browser on mdconvert. (#1847)
* Based browser on mdconvert.
* Updated web_surfer.
* Renamed HeadlessChromeBrowser to SeleniumChromeBrowser
* Added an initial POC with Playwright.
* Separated Bing search into it's own utility module.
* Simple browser now uses Bing tools.
* Updated Playwright browser to inherit from SimpleTextBrowser
* Got Selenium working too.
* Renamed classes and files for consistency.
* Added more instructions.
* Initial work to support other search providers.
* Added some basic behavior when the BING_API_KEY is missing.
* Cleaned up some search results.
* Moved to using the request.Sessions object. Moved Bing SERP paring to mdconvert to be more broadly useful.
* Added backward compatibility to WebSurferAgent
* Selenium and Playwright now grab the whole DOM, not jus the body, allowing the converters access to metadata.
* Fixed printing of page titles in Playwright.
* Moved installation of WebSurfer dependencies to contrib-tests.yml
* Fixing pre-commit issues.
* Reverting conversable_agent, which should not have been changed in prior commit.
* Added RequestMarkdownBrowser tests.
* Fixed a bug with Bing search, and added search test cases.
* Added tests for Bing search.
* Added tests for md_convert
* Added test files.
* Added missing pptx.
* Added more tests for WebSurfer coverage.
* Fixed guard on requests_markdown_browser test.
* Updated test coverage for mdconvert.
* Fix brwser_utils tests.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Disable Selenium GPU and sandbox to ensure it runs headless in Docker.
* Added option for Bing API results to be interleaved (as Bing specifies), or presented in a categorized list (Web, News, Videos), etc
* Print more details when requests exceptions are thrown.
* Added additional documentation to markdown_search
* Added documentation to the selenium_markdown_browser.
* Added documentation to playwright_markdown_browser.py
* Added documentation to requests_markdown_browser
* Added documentation to mdconvert.py
* Updated agentchat_surfer notebook.
* Update .github/workflows/contrib-tests.yml
Co-authored-by: Davor Runje <davor@airt.ai>
* Merge main. Resolve conflicts.
* Resolve pre-commit checks.
* Removed offending LFS file.
* Re-added offending LFS file.
* Fixed browser_utils tests.
* Fixed style errors.
---------
Co-authored-by: Asapanna Rakesh <45640029+INF800@users.noreply.github.com>
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
Co-authored-by: Davor Runje <davor@airt.ai>
2024-09-25 15:17:42 -07:00
|
|
|
response = function_map["read_page_and_answer"]("When was it founded?")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
with pytest.raises(IndexError):
|
|
|
|
response = function_map["summarize_page"]()
|
2024-01-21 19:43:15 -08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
2024-04-15 05:34:26 -07:00
|
|
|
skip_all or skip_openai,
|
|
|
|
reason="dependency is not installed OR" + reason,
|
2024-01-21 19:43:15 -08:00
|
|
|
)
|
2024-02-10 06:09:08 +01:00
|
|
|
def test_web_surfer_oai() -> None:
|
|
|
|
llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": 42}
|
|
|
|
|
|
|
|
# adding Azure name variations to the model list
|
|
|
|
model = ["gpt-3.5-turbo-1106", "gpt-3.5-turbo-16k-0613", "gpt-3.5-turbo-16k"]
|
|
|
|
model += [m.replace(".", "") for m in model]
|
2024-01-21 19:43:15 -08:00
|
|
|
|
|
|
|
summarizer_llm_config = {
|
2024-02-10 06:09:08 +01:00
|
|
|
"config_list": filter_config(config_list, dict(model=model)), # type: ignore[no-untyped-call]
|
2024-01-21 19:43:15 -08:00
|
|
|
"timeout": 180,
|
|
|
|
}
|
|
|
|
|
2024-02-10 06:09:08 +01:00
|
|
|
assert len(llm_config["config_list"]) > 0 # type: ignore[arg-type]
|
2024-01-21 19:43:15 -08:00
|
|
|
assert len(summarizer_llm_config["config_list"]) > 0
|
|
|
|
|
|
|
|
page_size = 4096
|
|
|
|
web_surfer = WebSurferAgent(
|
|
|
|
"web_surfer",
|
|
|
|
llm_config=llm_config,
|
|
|
|
summarizer_llm_config=summarizer_llm_config,
|
|
|
|
browser_config={"viewport_size": page_size},
|
|
|
|
)
|
|
|
|
|
|
|
|
user_proxy = UserProxyAgent(
|
|
|
|
"user_proxy",
|
|
|
|
human_input_mode="NEVER",
|
|
|
|
code_execution_config=False,
|
|
|
|
default_auto_reply="",
|
|
|
|
is_termination_msg=lambda x: True,
|
|
|
|
)
|
|
|
|
|
2024-02-18 23:47:19 +08:00
|
|
|
# Make some requests that should test function calling
|
|
|
|
user_proxy.initiate_chat(web_surfer, message="Please visit the page 'https://en.wikipedia.org/wiki/Microsoft'")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-18 23:47:19 +08:00
|
|
|
user_proxy.initiate_chat(web_surfer, message="Please scroll down.")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-18 23:47:19 +08:00
|
|
|
user_proxy.initiate_chat(web_surfer, message="Please scroll up.")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-18 23:47:19 +08:00
|
|
|
user_proxy.initiate_chat(web_surfer, message="When was it founded?")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
2024-02-18 23:47:19 +08:00
|
|
|
user_proxy.initiate_chat(web_surfer, message="What's this page about?")
|
2024-01-21 19:43:15 -08:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(
|
|
|
|
skip_bing,
|
|
|
|
reason="do not run if bing api key is not available",
|
|
|
|
)
|
2024-02-10 06:09:08 +01:00
|
|
|
def test_web_surfer_bing() -> None:
|
2024-01-21 19:43:15 -08:00
|
|
|
page_size = 4096
|
|
|
|
web_surfer = WebSurferAgent(
|
|
|
|
"web_surfer",
|
2024-02-10 06:09:08 +01:00
|
|
|
llm_config={
|
|
|
|
"config_list": [
|
|
|
|
{
|
|
|
|
"model": "gpt-3.5-turbo-16k",
|
WebSurfer Updated (Selenium, Playwright, and support for many filetypes) (#1929)
* Feat/headless browser (retargeted) (#1832)
* Add headless browser to the WebSurferAgent, closes #1481
* replace soup.get_text() with markdownify.MarkdownConverter().convert_soup(soup)
* import HeadlessChromeBrowser
* implicitly wait for 10s
* inicrease max. wait time to 99s
* fix: trim trailing whitespace
* test: fix headless tests
* better bing query search
* docs: add example 3 for headless option
---------
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
* Handle missing Selenium package.
* Added browser_chat.py example to simplify testing.
* Based browser on mdconvert. (#1847)
* Based browser on mdconvert.
* Updated web_surfer.
* Renamed HeadlessChromeBrowser to SeleniumChromeBrowser
* Added an initial POC with Playwright.
* Separated Bing search into it's own utility module.
* Simple browser now uses Bing tools.
* Updated Playwright browser to inherit from SimpleTextBrowser
* Got Selenium working too.
* Renamed classes and files for consistency.
* Added more instructions.
* Initial work to support other search providers.
* Added some basic behavior when the BING_API_KEY is missing.
* Cleaned up some search results.
* Moved to using the request.Sessions object. Moved Bing SERP paring to mdconvert to be more broadly useful.
* Added backward compatibility to WebSurferAgent
* Selenium and Playwright now grab the whole DOM, not jus the body, allowing the converters access to metadata.
* Fixed printing of page titles in Playwright.
* Moved installation of WebSurfer dependencies to contrib-tests.yml
* Fixing pre-commit issues.
* Reverting conversable_agent, which should not have been changed in prior commit.
* Added RequestMarkdownBrowser tests.
* Fixed a bug with Bing search, and added search test cases.
* Added tests for Bing search.
* Added tests for md_convert
* Added test files.
* Added missing pptx.
* Added more tests for WebSurfer coverage.
* Fixed guard on requests_markdown_browser test.
* Updated test coverage for mdconvert.
* Fix brwser_utils tests.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Disable Selenium GPU and sandbox to ensure it runs headless in Docker.
* Added option for Bing API results to be interleaved (as Bing specifies), or presented in a categorized list (Web, News, Videos), etc
* Print more details when requests exceptions are thrown.
* Added additional documentation to markdown_search
* Added documentation to the selenium_markdown_browser.
* Added documentation to playwright_markdown_browser.py
* Added documentation to requests_markdown_browser
* Added documentation to mdconvert.py
* Updated agentchat_surfer notebook.
* Update .github/workflows/contrib-tests.yml
Co-authored-by: Davor Runje <davor@airt.ai>
* Merge main. Resolve conflicts.
* Resolve pre-commit checks.
* Removed offending LFS file.
* Re-added offending LFS file.
* Fixed browser_utils tests.
* Fixed style errors.
---------
Co-authored-by: Asapanna Rakesh <45640029+INF800@users.noreply.github.com>
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
Co-authored-by: Davor Runje <davor@airt.ai>
2024-09-25 15:17:42 -07:00
|
|
|
"api_key": MOCK_OPEN_AI_API_KEY,
|
2024-02-10 06:09:08 +01:00
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
2024-01-21 19:43:15 -08:00
|
|
|
browser_config={"viewport_size": page_size, "bing_api_key": BING_API_KEY},
|
|
|
|
)
|
|
|
|
|
|
|
|
# Sneak a peak at the function map, allowing us to call the functions for testing here
|
|
|
|
function_map = web_surfer._user_proxy._function_map
|
|
|
|
|
|
|
|
# Test informational queries
|
|
|
|
response = function_map["informational_web_search"](BING_QUERY)
|
WebSurfer Updated (Selenium, Playwright, and support for many filetypes) (#1929)
* Feat/headless browser (retargeted) (#1832)
* Add headless browser to the WebSurferAgent, closes #1481
* replace soup.get_text() with markdownify.MarkdownConverter().convert_soup(soup)
* import HeadlessChromeBrowser
* implicitly wait for 10s
* inicrease max. wait time to 99s
* fix: trim trailing whitespace
* test: fix headless tests
* better bing query search
* docs: add example 3 for headless option
---------
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
* Handle missing Selenium package.
* Added browser_chat.py example to simplify testing.
* Based browser on mdconvert. (#1847)
* Based browser on mdconvert.
* Updated web_surfer.
* Renamed HeadlessChromeBrowser to SeleniumChromeBrowser
* Added an initial POC with Playwright.
* Separated Bing search into it's own utility module.
* Simple browser now uses Bing tools.
* Updated Playwright browser to inherit from SimpleTextBrowser
* Got Selenium working too.
* Renamed classes and files for consistency.
* Added more instructions.
* Initial work to support other search providers.
* Added some basic behavior when the BING_API_KEY is missing.
* Cleaned up some search results.
* Moved to using the request.Sessions object. Moved Bing SERP paring to mdconvert to be more broadly useful.
* Added backward compatibility to WebSurferAgent
* Selenium and Playwright now grab the whole DOM, not jus the body, allowing the converters access to metadata.
* Fixed printing of page titles in Playwright.
* Moved installation of WebSurfer dependencies to contrib-tests.yml
* Fixing pre-commit issues.
* Reverting conversable_agent, which should not have been changed in prior commit.
* Added RequestMarkdownBrowser tests.
* Fixed a bug with Bing search, and added search test cases.
* Added tests for Bing search.
* Added tests for md_convert
* Added test files.
* Added missing pptx.
* Added more tests for WebSurfer coverage.
* Fixed guard on requests_markdown_browser test.
* Updated test coverage for mdconvert.
* Fix brwser_utils tests.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Removed image test from browser, since exiftool isn't installed on test machine.
* Disable Selenium GPU and sandbox to ensure it runs headless in Docker.
* Added option for Bing API results to be interleaved (as Bing specifies), or presented in a categorized list (Web, News, Videos), etc
* Print more details when requests exceptions are thrown.
* Added additional documentation to markdown_search
* Added documentation to the selenium_markdown_browser.
* Added documentation to playwright_markdown_browser.py
* Added documentation to requests_markdown_browser
* Added documentation to mdconvert.py
* Updated agentchat_surfer notebook.
* Update .github/workflows/contrib-tests.yml
Co-authored-by: Davor Runje <davor@airt.ai>
* Merge main. Resolve conflicts.
* Resolve pre-commit checks.
* Removed offending LFS file.
* Re-added offending LFS file.
* Fixed browser_utils tests.
* Fixed style errors.
---------
Co-authored-by: Asapanna Rakesh <45640029+INF800@users.noreply.github.com>
Co-authored-by: Vijay Ramesh <vijay@regrello.com>
Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
Co-authored-by: Davor Runje <davor@airt.ai>
2024-09-25 15:17:42 -07:00
|
|
|
assert f"Address: search: {BING_QUERY}" in response
|
2024-01-21 19:43:15 -08:00
|
|
|
assert f"Title: {BING_QUERY} - Search" in response
|
|
|
|
assert "Viewport position: Showing page 1 of 1." in response
|
|
|
|
assert f"A Bing search for '{BING_QUERY}' found " in response
|
|
|
|
|
|
|
|
# Test informational queries
|
|
|
|
response = function_map["navigational_web_search"](BING_QUERY + " Wikipedia")
|
|
|
|
assert "Address: https://en.wikipedia.org/wiki/" in response
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
"""Runs this file's tests from the command line."""
|
|
|
|
test_web_surfer()
|
2024-02-10 06:09:08 +01:00
|
|
|
test_web_surfer_oai()
|
|
|
|
test_web_surfer_bing()
|