2024-08-04 08:28:19 +08:00
|
|
|
import assert from 'node:assert';
|
|
|
|
import type { WebPage } from '@/common/page';
|
2024-07-23 16:25:11 +08:00
|
|
|
import Insight, {
|
2024-08-04 08:28:19 +08:00
|
|
|
type AIElementParseResponse,
|
|
|
|
type DumpSubscriber,
|
|
|
|
type ExecutionDump,
|
|
|
|
type ExecutionRecorderItem,
|
|
|
|
type ExecutionTaskActionApply,
|
|
|
|
type ExecutionTaskApply,
|
|
|
|
type ExecutionTaskInsightLocateApply,
|
|
|
|
type ExecutionTaskInsightQueryApply,
|
|
|
|
type ExecutionTaskPlanningApply,
|
2024-07-23 16:25:11 +08:00
|
|
|
Executor,
|
2024-08-06 10:00:25 +08:00
|
|
|
type InsightAssertionResponse,
|
2024-08-04 08:28:19 +08:00
|
|
|
type InsightDump,
|
|
|
|
type InsightExtractParam,
|
2024-08-01 15:46:40 +08:00
|
|
|
plan,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningAction,
|
2024-08-06 10:00:25 +08:00
|
|
|
type PlanningActionParamAssert,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningActionParamHover,
|
|
|
|
type PlanningActionParamInputOrKeyPress,
|
|
|
|
type PlanningActionParamScroll,
|
2024-08-07 20:03:13 +08:00
|
|
|
type PlanningActionParamSleep,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningActionParamTap,
|
2024-07-23 16:25:11 +08:00
|
|
|
} from '@midscene/core';
|
|
|
|
import { base64Encoded } from '@midscene/core/image';
|
2024-08-04 08:28:19 +08:00
|
|
|
import { commonScreenshotParam, getTmpFile, sleep } from '@midscene/core/utils';
|
|
|
|
import type { ChatCompletionMessageParam } from 'openai/resources';
|
2024-07-28 08:49:57 +08:00
|
|
|
import type { KeyInput, Page as PuppeteerPage } from 'puppeteer';
|
2024-08-04 08:28:19 +08:00
|
|
|
import type { WebElementInfo } from '../web-element';
|
|
|
|
import { type AiTaskCache, TaskCache } from './task-cache';
|
|
|
|
import { type WebUIContext, parseContextFromWebPage } from './utils';
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-07-28 08:49:57 +08:00
|
|
|
export class PageTaskExecutor {
|
|
|
|
page: WebPage;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-01 15:46:40 +08:00
|
|
|
insight: Insight<WebElementInfo, WebUIContext>;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-07-28 08:49:57 +08:00
|
|
|
executionDump?: ExecutionDump;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-01 15:46:40 +08:00
|
|
|
taskCache: TaskCache;
|
|
|
|
|
|
|
|
constructor(page: WebPage, opts: { cache: AiTaskCache }) {
|
2024-07-23 16:25:11 +08:00
|
|
|
this.page = page;
|
2024-08-01 15:46:40 +08:00
|
|
|
this.insight = new Insight<WebElementInfo, WebUIContext>(async () => {
|
2024-07-28 08:49:57 +08:00
|
|
|
return await parseContextFromWebPage(page);
|
2024-07-23 16:25:11 +08:00
|
|
|
});
|
2024-08-01 15:46:40 +08:00
|
|
|
this.taskCache = new TaskCache(opts);
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|
|
|
|
|
2024-07-25 10:47:02 +08:00
|
|
|
private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {
|
2024-07-23 16:25:11 +08:00
|
|
|
const file = getTmpFile('jpeg');
|
|
|
|
await this.page.screenshot({
|
|
|
|
...commonScreenshotParam,
|
|
|
|
path: file,
|
|
|
|
});
|
|
|
|
const item: ExecutionRecorderItem = {
|
|
|
|
type: 'screenshot',
|
|
|
|
ts: Date.now(),
|
|
|
|
screenshot: base64Encoded(file),
|
|
|
|
timing,
|
|
|
|
};
|
|
|
|
return item;
|
|
|
|
}
|
|
|
|
|
2024-08-04 08:28:19 +08:00
|
|
|
private wrapExecutorWithScreenshot(
|
|
|
|
taskApply: ExecutionTaskApply,
|
|
|
|
): ExecutionTaskApply {
|
2024-07-23 16:25:11 +08:00
|
|
|
const taskWithScreenshot: ExecutionTaskApply = {
|
|
|
|
...taskApply,
|
|
|
|
executor: async (param, context, ...args) => {
|
|
|
|
const recorder: ExecutionRecorderItem[] = [];
|
|
|
|
const { task } = context;
|
|
|
|
// set the recorder before executor in case of error
|
|
|
|
task.recorder = recorder;
|
2024-07-25 10:47:02 +08:00
|
|
|
const shot = await this.recordScreenshot(`before ${task.type}`);
|
2024-07-23 16:25:11 +08:00
|
|
|
recorder.push(shot);
|
|
|
|
const result = await taskApply.executor(param, context, ...args);
|
|
|
|
if (taskApply.type === 'Action') {
|
|
|
|
await sleep(1000);
|
2024-07-25 10:47:02 +08:00
|
|
|
const shot2 = await this.recordScreenshot('after Action');
|
2024-07-23 16:25:11 +08:00
|
|
|
recorder.push(shot2);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return taskWithScreenshot;
|
|
|
|
}
|
|
|
|
|
|
|
|
private async convertPlanToExecutable(plans: PlanningAction[]) {
|
|
|
|
const tasks: ExecutionTaskApply[] = plans
|
|
|
|
.map((plan) => {
|
2024-07-25 13:40:46 +08:00
|
|
|
if (plan.type === 'Locate') {
|
|
|
|
const taskFind: ExecutionTaskInsightLocateApply = {
|
2024-07-23 16:25:11 +08:00
|
|
|
type: 'Insight',
|
2024-07-25 13:40:46 +08:00
|
|
|
subType: 'Locate',
|
2024-08-01 16:07:58 +08:00
|
|
|
param: plan.param,
|
2024-07-23 16:25:11 +08:00
|
|
|
executor: async (param) => {
|
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
2024-08-01 15:46:40 +08:00
|
|
|
const pageContext = await this.insight.contextRetrieverFn();
|
2024-08-04 08:28:19 +08:00
|
|
|
const locateCache = this.taskCache.readCache(
|
|
|
|
pageContext,
|
|
|
|
'locate',
|
|
|
|
param.prompt,
|
|
|
|
);
|
2024-08-01 15:46:40 +08:00
|
|
|
let locateResult: AIElementParseResponse | undefined;
|
2024-08-09 21:37:41 +08:00
|
|
|
const callAI = this.insight.aiVendorFn;
|
2024-08-01 15:46:40 +08:00
|
|
|
const element = await this.insight.locate(param.prompt, {
|
2024-08-09 21:37:41 +08:00
|
|
|
callAI: async (...message: any) => {
|
2024-08-01 15:46:40 +08:00
|
|
|
if (locateCache) {
|
|
|
|
locateResult = locateCache;
|
|
|
|
return Promise.resolve(locateCache);
|
|
|
|
}
|
2024-08-09 21:37:41 +08:00
|
|
|
locateResult = await callAI(...message);
|
|
|
|
assert(locateResult);
|
2024-08-01 15:46:40 +08:00
|
|
|
return locateResult;
|
|
|
|
},
|
|
|
|
});
|
|
|
|
|
2024-07-25 13:40:46 +08:00
|
|
|
assert(element, `Element not found: ${param.prompt}`);
|
2024-08-01 15:46:40 +08:00
|
|
|
if (locateResult) {
|
|
|
|
this.taskCache.saveCache({
|
|
|
|
type: 'locate',
|
|
|
|
pageContext: {
|
|
|
|
url: pageContext.url,
|
|
|
|
size: pageContext.size,
|
|
|
|
},
|
|
|
|
prompt: param.prompt,
|
|
|
|
response: locateResult,
|
|
|
|
});
|
|
|
|
}
|
2024-07-23 16:25:11 +08:00
|
|
|
return {
|
|
|
|
output: {
|
|
|
|
element,
|
|
|
|
},
|
|
|
|
log: {
|
|
|
|
dump: insightDump,
|
|
|
|
},
|
2024-08-01 15:46:40 +08:00
|
|
|
cache: {
|
|
|
|
hit: Boolean(locateResult),
|
|
|
|
},
|
2024-07-23 16:25:11 +08:00
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return taskFind;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
2024-08-06 10:00:25 +08:00
|
|
|
if (plan.type === 'Assert') {
|
|
|
|
const assertPlan = plan as PlanningAction<PlanningActionParamAssert>;
|
|
|
|
const taskAssert: ExecutionTaskApply = {
|
|
|
|
type: 'Insight',
|
|
|
|
subType: 'Assert',
|
|
|
|
param: assertPlan.param,
|
|
|
|
executor: async () => {
|
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
|
|
const assertion = await this.insight.assert(
|
|
|
|
assertPlan.param.assertion,
|
|
|
|
);
|
|
|
|
|
|
|
|
return {
|
|
|
|
output: assertion,
|
|
|
|
log: {
|
|
|
|
dump: insightDump,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return taskAssert;
|
|
|
|
}
|
2024-08-04 08:28:19 +08:00
|
|
|
if (plan.type === 'Input') {
|
|
|
|
const taskActionInput: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Input',
|
|
|
|
param: plan.param,
|
|
|
|
executor: async (taskParam, { element }) => {
|
|
|
|
if (element) {
|
|
|
|
await this.page.mouse.click(
|
|
|
|
element.center[0],
|
|
|
|
element.center[1],
|
|
|
|
);
|
|
|
|
}
|
|
|
|
assert(taskParam.value, 'No value to input');
|
|
|
|
await this.page.keyboard.type(taskParam.value);
|
|
|
|
},
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
return taskActionInput;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
|
|
|
if (plan.type === 'KeyboardPress') {
|
|
|
|
const taskActionKeyboardPress: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'KeyboardPress',
|
|
|
|
param: plan.param,
|
|
|
|
executor: async (taskParam) => {
|
|
|
|
assert(taskParam.value, 'No key to press');
|
|
|
|
await this.page.keyboard.press(taskParam.value as KeyInput);
|
|
|
|
},
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
return taskActionKeyboardPress;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
|
|
|
if (plan.type === 'Tap') {
|
|
|
|
const taskActionTap: ExecutionTaskActionApply<PlanningActionParamTap> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Tap',
|
|
|
|
executor: async (param, { element }) => {
|
|
|
|
assert(element, 'Element not found, cannot tap');
|
|
|
|
await this.page.mouse.click(
|
|
|
|
element.center[0],
|
|
|
|
element.center[1],
|
|
|
|
);
|
|
|
|
},
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
return taskActionTap;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
|
|
|
if (plan.type === 'Hover') {
|
|
|
|
const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Hover',
|
|
|
|
executor: async (param, { element }) => {
|
|
|
|
// console.log('executor args', param, element);
|
|
|
|
assert(element, 'Element not found, cannot hover');
|
|
|
|
await this.page.mouse.move(
|
|
|
|
element.center[0],
|
|
|
|
element.center[1],
|
|
|
|
);
|
|
|
|
},
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
return taskActionHover;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
|
|
|
if (plan.type === 'Scroll') {
|
|
|
|
const taskActionScroll: ExecutionTaskActionApply<PlanningActionParamScroll> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Scroll',
|
|
|
|
param: plan.param,
|
|
|
|
executor: async (taskParam) => {
|
|
|
|
const scrollToEventName = taskParam.scrollType;
|
|
|
|
const innerHeight = await (this.page as PuppeteerPage).evaluate(
|
|
|
|
() => window.innerHeight,
|
|
|
|
);
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-04 08:28:19 +08:00
|
|
|
switch (scrollToEventName) {
|
|
|
|
case 'ScrollUntilTop':
|
|
|
|
await this.page.mouse.wheel(0, -9999999);
|
|
|
|
break;
|
|
|
|
case 'ScrollUntilBottom':
|
|
|
|
await this.page.mouse.wheel(0, 9999999);
|
|
|
|
break;
|
|
|
|
case 'ScrollUp':
|
|
|
|
await this.page.mouse.wheel(0, -innerHeight);
|
|
|
|
break;
|
|
|
|
case 'ScrollDown':
|
|
|
|
await this.page.mouse.wheel(0, innerHeight);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
console.error(
|
|
|
|
'Unknown scroll event type:',
|
|
|
|
scrollToEventName,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
return taskActionScroll;
|
2024-08-04 08:28:19 +08:00
|
|
|
}
|
2024-08-07 20:03:13 +08:00
|
|
|
if (plan.type === 'Sleep') {
|
|
|
|
const taskActionSleep: ExecutionTaskActionApply<PlanningActionParamSleep> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Sleep',
|
|
|
|
param: plan.param,
|
|
|
|
executor: async (taskParam) => {
|
|
|
|
assert(taskParam.timeMs, 'No time to sleep');
|
|
|
|
await sleep(taskParam.timeMs);
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return taskActionSleep;
|
|
|
|
}
|
2024-08-04 08:28:19 +08:00
|
|
|
if (plan.type === 'Error') {
|
2024-07-23 16:25:11 +08:00
|
|
|
throw new Error(`Got a task plan with type Error: ${plan.thought}`);
|
|
|
|
}
|
2024-08-04 08:28:19 +08:00
|
|
|
throw new Error(`Unknown or Unsupported task type: ${plan.type}`);
|
2024-07-23 16:25:11 +08:00
|
|
|
})
|
|
|
|
.map((task: ExecutionTaskApply) => {
|
|
|
|
return this.wrapExecutorWithScreenshot(task);
|
|
|
|
});
|
|
|
|
|
|
|
|
return tasks;
|
|
|
|
}
|
|
|
|
|
2024-08-04 08:28:19 +08:00
|
|
|
async action(
|
|
|
|
userPrompt: string /* , actionInfo?: { actionType?: EventActions[number]['action'] } */,
|
|
|
|
) {
|
2024-08-01 15:46:40 +08:00
|
|
|
const taskExecutor = new Executor(userPrompt);
|
|
|
|
taskExecutor.description = userPrompt;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
|
|
|
let plans: PlanningAction[] = [];
|
|
|
|
const planningTask: ExecutionTaskPlanningApply = {
|
|
|
|
type: 'Planning',
|
|
|
|
param: {
|
|
|
|
userPrompt,
|
|
|
|
},
|
2024-08-01 15:46:40 +08:00
|
|
|
executor: async (param) => {
|
|
|
|
const pageContext = await this.insight.contextRetrieverFn();
|
|
|
|
let planResult: { plans: PlanningAction[] };
|
2024-08-04 08:28:19 +08:00
|
|
|
const planCache = this.taskCache.readCache(
|
|
|
|
pageContext,
|
|
|
|
'plan',
|
|
|
|
userPrompt,
|
|
|
|
);
|
2024-08-01 15:46:40 +08:00
|
|
|
if (planCache) {
|
|
|
|
planResult = planCache;
|
|
|
|
} else {
|
|
|
|
planResult = await plan(param.userPrompt, {
|
|
|
|
context: pageContext,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-07-23 16:25:11 +08:00
|
|
|
assert(planResult.plans.length > 0, 'No plans found');
|
|
|
|
// eslint-disable-next-line prefer-destructuring
|
|
|
|
plans = planResult.plans;
|
2024-08-01 15:46:40 +08:00
|
|
|
|
|
|
|
this.taskCache.saveCache({
|
|
|
|
type: 'plan',
|
|
|
|
pageContext: {
|
|
|
|
url: pageContext.url,
|
|
|
|
size: pageContext.size,
|
|
|
|
},
|
|
|
|
prompt: userPrompt,
|
|
|
|
response: planResult,
|
|
|
|
});
|
2024-07-23 16:25:11 +08:00
|
|
|
return {
|
|
|
|
output: planResult,
|
2024-08-01 15:46:40 +08:00
|
|
|
cache: {
|
2024-08-02 13:58:15 +08:00
|
|
|
hit: Boolean(planCache),
|
2024-08-01 15:46:40 +08:00
|
|
|
},
|
2024-07-23 16:25:11 +08:00
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
try {
|
|
|
|
// plan
|
2024-08-01 15:46:40 +08:00
|
|
|
await taskExecutor.append(this.wrapExecutorWithScreenshot(planningTask));
|
|
|
|
await taskExecutor.flush();
|
|
|
|
this.executionDump = taskExecutor.dump();
|
2024-07-23 16:25:11 +08:00
|
|
|
|
|
|
|
// append tasks
|
|
|
|
const executables = await this.convertPlanToExecutable(plans);
|
2024-08-01 15:46:40 +08:00
|
|
|
await taskExecutor.append(executables);
|
2024-07-23 16:25:11 +08:00
|
|
|
|
|
|
|
// flush actions
|
2024-08-01 15:46:40 +08:00
|
|
|
await taskExecutor.flush();
|
|
|
|
this.executionDump = taskExecutor.dump();
|
2024-07-23 16:25:11 +08:00
|
|
|
|
|
|
|
assert(
|
2024-08-01 15:46:40 +08:00
|
|
|
taskExecutor.status !== 'error',
|
|
|
|
`failed to execute tasks: ${taskExecutor.status}, msg: ${taskExecutor.errorMsg || ''}`,
|
2024-07-23 16:25:11 +08:00
|
|
|
);
|
|
|
|
} catch (e: any) {
|
|
|
|
// keep the dump before throwing
|
2024-08-01 15:46:40 +08:00
|
|
|
this.executionDump = taskExecutor.dump();
|
2024-07-23 16:25:11 +08:00
|
|
|
const err = new Error(e.message, { cause: e });
|
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
}
|
2024-07-25 10:47:02 +08:00
|
|
|
|
|
|
|
async query(demand: InsightExtractParam) {
|
2024-08-01 15:46:40 +08:00
|
|
|
const description = JSON.stringify(demand);
|
|
|
|
const taskExecutor = new Executor(description);
|
|
|
|
taskExecutor.description = description;
|
2024-07-25 10:47:02 +08:00
|
|
|
let data: any;
|
|
|
|
const queryTask: ExecutionTaskInsightQueryApply = {
|
|
|
|
type: 'Insight',
|
2024-07-25 13:40:46 +08:00
|
|
|
subType: 'Query',
|
2024-07-25 10:47:02 +08:00
|
|
|
param: {
|
|
|
|
dataDemand: demand,
|
|
|
|
},
|
|
|
|
executor: async (param) => {
|
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
|
|
data = await this.insight.extract<any>(param.dataDemand);
|
|
|
|
return {
|
|
|
|
output: data,
|
|
|
|
log: { dump: insightDump },
|
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
try {
|
2024-08-01 15:46:40 +08:00
|
|
|
await taskExecutor.append(this.wrapExecutorWithScreenshot(queryTask));
|
|
|
|
await taskExecutor.flush();
|
|
|
|
this.executionDump = taskExecutor.dump();
|
2024-07-25 10:47:02 +08:00
|
|
|
} catch (e: any) {
|
|
|
|
// keep the dump before throwing
|
2024-08-01 15:46:40 +08:00
|
|
|
this.executionDump = taskExecutor.dump();
|
2024-07-25 10:47:02 +08:00
|
|
|
const err = new Error(e.message, { cause: e });
|
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
return data;
|
|
|
|
}
|
2024-08-06 10:00:25 +08:00
|
|
|
|
|
|
|
async assert(assertion: string): Promise<InsightAssertionResponse> {
|
|
|
|
const description = assertion;
|
|
|
|
const taskExecutor = new Executor(description);
|
|
|
|
taskExecutor.description = description;
|
|
|
|
const assertionPlan: PlanningAction<PlanningActionParamAssert> = {
|
|
|
|
type: 'Assert',
|
|
|
|
param: {
|
|
|
|
assertion,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
const assertTask = await this.convertPlanToExecutable([assertionPlan]);
|
|
|
|
|
|
|
|
await taskExecutor.append(this.wrapExecutorWithScreenshot(assertTask[0]));
|
|
|
|
const assertionResult: InsightAssertionResponse =
|
|
|
|
await taskExecutor.flush();
|
|
|
|
this.executionDump = taskExecutor.dump();
|
|
|
|
|
|
|
|
return assertionResult;
|
|
|
|
}
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|