2024-10-17 21:33:33 -07:00
import asyncio
import logging
import json
import os
import re
import nltk
from typing import Any , Dict , List , Tuple , Union
2024-12-03 17:00:44 -08:00
from autogen_core import AgentId , AgentProxy , TopicId
2024-12-04 16:23:20 -08:00
from autogen_core import SingleThreadedAgentRuntime
2024-12-06 01:59:28 -08:00
from autogen_core import EVENT_LOGGER_NAME
2024-12-03 17:00:44 -08:00
from autogen_core import DefaultSubscription , DefaultTopicId
2024-12-06 01:23:05 -08:00
from autogen_ext . code_executors . local import LocalCommandLineCodeExecutor
2024-12-09 13:00:08 -05:00
from autogen_core . models import (
2024-10-17 21:33:33 -07:00
ChatCompletionClient ,
UserMessage ,
SystemMessage ,
LLMMessage ,
)
from autogen_magentic_one . markdown_browser import MarkdownConverter , UnsupportedFormatException
from autogen_magentic_one . agents . coder import Coder , Executor
from autogen_magentic_one . agents . orchestrator import RoundRobinOrchestrator , LedgerOrchestrator
from autogen_magentic_one . messages import BroadcastMessage , OrchestrationEvent , RequestReplyMessage , ResetMessage , DeactivateMessage
from autogen_magentic_one . agents . multimodal_web_surfer import MultimodalWebSurfer
from autogen_magentic_one . agents . file_surfer import FileSurfer
2025-01-08 09:33:28 -05:00
from autogen_magentic_one . utils import LogHandler , message_content_to_str
2024-10-17 21:33:33 -07:00
import evaluation_harness
from evaluation_harness . env_config import (
ACCOUNTS ,
GITLAB ,
MAP ,
REDDIT ,
SHOPPING ,
SHOPPING_ADMIN ,
WIKIPEDIA ,
HOMEPAGE ,
SITE_URLS ,
LOGIN_PROMPTS ,
SITE_DESCRIPTIONS ,
url_to_sitename ,
)
REPLACEMENTS = {
" __REDDIT__ " : REDDIT ,
" __SHOPPING__ " : SHOPPING ,
" __SHOPPING_ADMIN__ " : SHOPPING_ADMIN ,
" __GITLAB__ " : GITLAB ,
" __WIKIPEDIA__ " : WIKIPEDIA ,
" __MAP__ " : MAP ,
" __HOMEPAGE__ " : HOMEPAGE ,
}
nltk . download ( " punkt " )
async def response_preparer ( task : str , source : str , client : ChatCompletionClient , transcript : List [ LLMMessage ] ) - > str :
messages : List [ LLMMessage ] = [
UserMessage (
content = f " Earlier you were asked the following: \n \n { task } \n \n Your team then worked diligently to address that request. Here is a transcript of that conversation: " ,
source = source ,
)
]
# copy them to this context
for message in transcript :
messages . append (
UserMessage (
content = message_content_to_str ( message . content ) ,
# TODO fix this -> remove type ignore
source = message . source , # type: ignore
)
)
# ask for the final answer
messages . append (
UserMessage (
content = f """
Read the above conversation and output a FINAL ANSWER to the original request . The original request is repeated here for convenience :
{ task }
To output the final answer , use the following template : FINAL ANSWER : [ YOUR FINAL ANSWER ]
Your FINAL ANSWER should be as few words as possible .
If the original request was not a question , or you did not find a definitive answer , simply summarize the final state of the page or task as your FINAL ANSWER . """ ,
source = source ,
)
)
response = await client . create ( messages )
assert isinstance ( response . content , str )
return response . content
async def main ( ) - > None :
# Expand the prompt and the full task
task_prompt = " "
TASK = None
with open ( " task_prompt.json.txt " , " rt " ) as fh :
task_prompt = fh . read ( )
with open ( " task_prompt.json " , " wt " ) as fh :
for k in REPLACEMENTS :
task_prompt = task_prompt . replace ( k , REPLACEMENTS [ k ] )
fh . write ( task_prompt )
TASK = json . loads ( task_prompt )
2024-12-03 17:00:44 -08:00
if TASK [ " start_url " ] == REDDIT :
2024-10-17 21:33:33 -07:00
TASK [ " start_url " ] = TASK [ " start_url " ] + " /forums/all "
full_task = " "
with open ( " full_task.json.txt " , " rt " ) as fh :
full_task = fh . read ( )
with open ( " full_task.json " , " wt " ) as fh :
for k in REPLACEMENTS :
full_task = full_task . replace ( k , REPLACEMENTS [ k ] )
fh . write ( full_task )
# Create the runtime.
runtime = SingleThreadedAgentRuntime ( )
# Create the AzureOpenAI client, with AAD auth
2025-01-08 09:33:28 -05:00
client = ChatCompletionClient . load_component ( json . loads ( os . environ [ " CHAT_COMPLETION_CLIENT_CONFIG " ] ) )
2024-10-17 21:33:33 -07:00
# Login assistant
await runtime . register (
" LoginAssistant " ,
lambda : Coder (
model_client = client ,
system_messages = [
2024-12-04 16:14:41 -08:00
SystemMessage ( content = """ You are a general-purpose AI assistant and can handle many questions -- but you don ' t have access to a web browser. However, the user you are talking to does have a browser, and you can see the screen. Provide short direct instructions to them to take you where you need to go to answer the initial question posed to you.
2024-10-17 21:33:33 -07:00
Once the user has taken the final necessary action to complete the task , and you have fully addressed the initial request , reply with the word TERMINATE . """ ,
)
] ,
) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
login_assistant = AgentProxy ( AgentId ( " LoginAssistant " , " default " ) , runtime )
# Web surfer
await runtime . register (
" WebSurfer " ,
lambda : MultimodalWebSurfer ( ) , # Configuration is set later by init()
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
web_surfer = AgentProxy ( AgentId ( " WebSurfer " , " default " ) , runtime )
actual_surfer = await runtime . try_get_underlying_agent_instance ( web_surfer . id , type = MultimodalWebSurfer )
await actual_surfer . init ( model_client = client , downloads_folder = os . getcwd ( ) , browser_channel = " chromium " )
# Round-robin orchestrator
await runtime . register (
2024-12-03 17:00:44 -08:00
" round_robin_orc " ,
2024-10-17 21:33:33 -07:00
lambda : RoundRobinOrchestrator ( agents = [ web_surfer , login_assistant ] , ) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
round_robin_orc = AgentProxy ( AgentId ( " round_robin_orc " , " default " ) , runtime )
# Login to the necessary websites
for site in TASK [ " sites " ] :
if site in [ " reddit " , " gitlab " , " shopping " , " shopping_admin " ] :
actual_surfer . start_page = SITE_URLS [ site ]
runtime . start ( )
await runtime . publish_message (
2024-12-03 17:00:44 -08:00
ResetMessage ( ) ,
2024-10-17 21:33:33 -07:00
topic_id = DefaultTopicId ( ) ,
)
await runtime . publish_message (
BroadcastMessage ( content = UserMessage ( content = LOGIN_PROMPTS [ site ] , source = " human " ) ) ,
topic_id = DefaultTopicId ( ) ,
)
await runtime . stop_when_idle ( )
# Deactivate the login-related agents
runtime . start ( )
await runtime . send_message ( DeactivateMessage ( ) , login_assistant . id )
await runtime . send_message ( DeactivateMessage ( ) , round_robin_orc . id )
await runtime . stop_when_idle ( )
# By this point, we should be logged in. Prepare for the main event
await runtime . register (
" Assistant " ,
lambda : Coder ( model_client = client ) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
coder = AgentProxy ( AgentId ( " Assistant " , " default " ) , runtime )
await runtime . register (
" ComputerTerminal " ,
lambda : Executor ( executor = LocalCommandLineCodeExecutor ( ) , confirm_execution = " ACCEPT_ALL " ) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
executor = AgentProxy ( AgentId ( " ComputerTerminal " , " default " ) , runtime )
2024-12-03 17:00:44 -08:00
2024-10-17 21:33:33 -07:00
await runtime . register (
" FileSurfer " ,
lambda : FileSurfer ( model_client = client ) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
file_surfer = AgentProxy ( AgentId ( " FileSurfer " , " default " ) , runtime )
2024-12-03 17:00:44 -08:00
2024-10-17 21:33:33 -07:00
await runtime . register (
2024-12-03 17:00:44 -08:00
" orchestrator " ,
2024-10-17 21:33:33 -07:00
lambda : LedgerOrchestrator (
agents = [ coder , executor , file_surfer , web_surfer ] ,
model_client = client ,
max_rounds = 30 ,
max_time = 25 * 60 ,
) ,
subscriptions = lambda : [ DefaultSubscription ( ) ] ,
)
orchestrator = AgentProxy ( AgentId ( " orchestrator " , " default " ) , runtime )
# The main event
actual_surfer . start_page = TASK [ " start_url " ]
runtime . start ( )
await runtime . send_message ( ResetMessage ( ) , web_surfer . id )
# Provide some background about the pages
site_description_prompt = " "
sitename = url_to_sitename ( TASK [ " start_url " ] )
if sitename :
site_description_prompt = " , " + SITE_DESCRIPTIONS [ sitename ]
task = f " Your web browser is currently open to the website { TASK [ ' start_url ' ] } { site_description_prompt } . On this website, please complete the following task: \n \n { TASK [ ' intent ' ] } "
await runtime . publish_message (
BroadcastMessage ( content = UserMessage ( content = task . strip ( ) , source = " human " ) ) ,
topic_id = DefaultTopicId ( ) ,
)
await runtime . stop_when_idle ( )
# Output the final answer
actual_orchestrator = await runtime . try_get_underlying_agent_instance ( orchestrator . id , type = LedgerOrchestrator )
transcript : List [ LLMMessage ] = actual_orchestrator . _chat_history # type: ignore
orc_metadata = await orchestrator . metadata
source = orc_metadata [ " type " ]
final_answer = await response_preparer ( task = TASK [ " intent " ] , source = source , client = client , transcript = transcript )
m = re . search ( " FINAL ANSWER:(.*)$ " , final_answer , re . DOTALL )
if m :
final_answer = m . group ( 1 ) . strip ( )
print ( ' page.stop( " ' + final_answer + ' " ) ' )
print ( " MAIN TASK COMPLETE !#!# " )
########## EVALUATION ##########
context = actual_surfer . _context
page = actual_surfer . _page
cdp_session = await context . new_cdp_session ( page )
config_file = " full_task.json "
2024-12-03 17:00:44 -08:00
2024-10-17 21:33:33 -07:00
evaluator = evaluation_harness . evaluator_router ( config_file )
score = await evaluator (
trajectory = evaluation_harness . make_answer_trajecotry ( final_answer ) ,
config_file = config_file ,
page = page ,
client = cdp_session ,
# azure_config=llm_config,
)
2024-12-03 17:00:44 -08:00
2024-10-17 21:33:33 -07:00
print ( " FINAL SCORE: " + str ( score ) )
if __name__ == " __main__ " :
logger = logging . getLogger ( EVENT_LOGGER_NAME )
logger . setLevel ( logging . INFO )
log_handler = LogHandler ( )
logger . handlers = [ log_handler ]
asyncio . run ( main ( ) )