2024-08-04 08:28:19 +08:00
|
|
|
import assert from 'node:assert';
|
|
|
|
import type { WebPage } from '@/common/page';
|
2024-11-25 16:05:01 +08:00
|
|
|
import type { PuppeteerWebPage } from '@/puppeteer';
|
2024-10-12 16:01:07 +08:00
|
|
|
import {
|
2024-10-31 18:18:31 +08:00
|
|
|
type AIElementIdResponse,
|
2024-08-04 08:28:19 +08:00
|
|
|
type DumpSubscriber,
|
|
|
|
type ExecutionRecorderItem,
|
|
|
|
type ExecutionTaskActionApply,
|
|
|
|
type ExecutionTaskApply,
|
|
|
|
type ExecutionTaskInsightLocateApply,
|
|
|
|
type ExecutionTaskInsightQueryApply,
|
|
|
|
type ExecutionTaskPlanningApply,
|
2024-12-16 15:04:21 +08:00
|
|
|
type ExecutionTaskProgressOptions,
|
2024-07-23 16:25:11 +08:00
|
|
|
Executor,
|
2024-11-05 11:49:21 +08:00
|
|
|
type Insight,
|
2024-08-06 10:00:25 +08:00
|
|
|
type InsightAssertionResponse,
|
2024-08-04 08:28:19 +08:00
|
|
|
type InsightDump,
|
|
|
|
type InsightExtractParam,
|
2024-12-08 20:12:17 +08:00
|
|
|
type PlanningAIResponse,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningAction,
|
2024-08-06 10:00:25 +08:00
|
|
|
type PlanningActionParamAssert,
|
2024-10-12 16:01:07 +08:00
|
|
|
type PlanningActionParamError,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningActionParamHover,
|
|
|
|
type PlanningActionParamInputOrKeyPress,
|
|
|
|
type PlanningActionParamScroll,
|
2024-08-07 20:03:13 +08:00
|
|
|
type PlanningActionParamSleep,
|
2024-08-04 08:28:19 +08:00
|
|
|
type PlanningActionParamTap,
|
2024-08-21 14:43:35 +08:00
|
|
|
type PlanningActionParamWaitFor,
|
2024-10-12 16:01:07 +08:00
|
|
|
plan,
|
2024-10-31 18:18:31 +08:00
|
|
|
transformElementPositionToId,
|
2024-07-23 16:25:11 +08:00
|
|
|
} from '@midscene/core';
|
2024-09-23 10:57:19 +08:00
|
|
|
import { sleep } from '@midscene/core/utils';
|
2024-09-05 20:05:19 +08:00
|
|
|
import type { KeyInput } from 'puppeteer';
|
|
|
|
import type { ElementInfo } from '../extractor';
|
2024-11-05 11:49:21 +08:00
|
|
|
import type { WebElementInfo } from '../web-element';
|
2024-09-06 17:19:35 +08:00
|
|
|
import { TaskCache } from './task-cache';
|
2024-11-05 11:49:21 +08:00
|
|
|
import type { WebUIContext } from './utils';
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-10 07:57:15 +08:00
|
|
|
interface ExecutionResult<OutputType = any> {
|
|
|
|
output: OutputType;
|
|
|
|
executor: Executor;
|
|
|
|
}
|
|
|
|
|
2024-07-28 08:49:57 +08:00
|
|
|
export class PageTaskExecutor {
|
|
|
|
page: WebPage;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-01 15:46:40 +08:00
|
|
|
insight: Insight<WebElementInfo, WebUIContext>;
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-01 15:46:40 +08:00
|
|
|
taskCache: TaskCache;
|
|
|
|
|
2024-11-05 11:49:21 +08:00
|
|
|
constructor(
|
|
|
|
page: WebPage,
|
|
|
|
insight: Insight<WebElementInfo, WebUIContext>,
|
|
|
|
opts: { cacheId: string | undefined },
|
|
|
|
) {
|
2024-07-23 16:25:11 +08:00
|
|
|
this.page = page;
|
2024-11-05 11:49:21 +08:00
|
|
|
this.insight = insight;
|
2024-09-06 17:19:35 +08:00
|
|
|
|
|
|
|
this.taskCache = new TaskCache({
|
2024-11-07 21:06:01 +08:00
|
|
|
cacheId: opts?.cacheId,
|
2024-09-06 17:19:35 +08:00
|
|
|
});
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|
|
|
|
|
2024-07-25 10:47:02 +08:00
|
|
|
private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {
|
2024-10-28 11:04:40 +08:00
|
|
|
const base64 = await this.page.screenshotBase64();
|
2024-07-23 16:25:11 +08:00
|
|
|
const item: ExecutionRecorderItem = {
|
|
|
|
type: 'screenshot',
|
|
|
|
ts: Date.now(),
|
2024-10-28 11:04:40 +08:00
|
|
|
screenshot: base64,
|
2024-07-23 16:25:11 +08:00
|
|
|
timing,
|
|
|
|
};
|
|
|
|
return item;
|
|
|
|
}
|
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
private prependExecutorWithScreenshot(
|
2024-08-04 08:28:19 +08:00
|
|
|
taskApply: ExecutionTaskApply,
|
2024-11-05 14:28:16 +08:00
|
|
|
appendAfterExecution = false,
|
2024-08-04 08:28:19 +08:00
|
|
|
): ExecutionTaskApply {
|
2024-07-23 16:25:11 +08:00
|
|
|
const taskWithScreenshot: ExecutionTaskApply = {
|
|
|
|
...taskApply,
|
|
|
|
executor: async (param, context, ...args) => {
|
|
|
|
const recorder: ExecutionRecorderItem[] = [];
|
|
|
|
const { task } = context;
|
|
|
|
// set the recorder before executor in case of error
|
|
|
|
task.recorder = recorder;
|
2024-07-25 10:47:02 +08:00
|
|
|
const shot = await this.recordScreenshot(`before ${task.type}`);
|
2024-07-23 16:25:11 +08:00
|
|
|
recorder.push(shot);
|
|
|
|
const result = await taskApply.executor(param, context, ...args);
|
|
|
|
if (taskApply.type === 'Action') {
|
2024-11-25 16:05:01 +08:00
|
|
|
await Promise.all([
|
|
|
|
(async () => {
|
|
|
|
await sleep(100);
|
|
|
|
if ((this.page as PuppeteerWebPage).waitUntilNetworkIdle) {
|
2024-12-23 12:03:05 +08:00
|
|
|
try {
|
|
|
|
await (this.page as PuppeteerWebPage).waitUntilNetworkIdle({
|
|
|
|
idleTime: 100,
|
|
|
|
timeout: 800,
|
|
|
|
});
|
|
|
|
} catch (error) {
|
|
|
|
// console.error('waitUntilNetworkIdle error', error);
|
|
|
|
}
|
2024-11-25 16:05:01 +08:00
|
|
|
}
|
|
|
|
})(),
|
2024-12-23 12:03:05 +08:00
|
|
|
sleep(200),
|
2024-11-25 16:05:01 +08:00
|
|
|
]);
|
2024-11-05 14:28:16 +08:00
|
|
|
}
|
|
|
|
if (appendAfterExecution) {
|
2024-07-25 10:47:02 +08:00
|
|
|
const shot2 = await this.recordScreenshot('after Action');
|
2024-07-23 16:25:11 +08:00
|
|
|
recorder.push(shot2);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
},
|
|
|
|
};
|
|
|
|
return taskWithScreenshot;
|
|
|
|
}
|
|
|
|
|
2024-09-06 17:19:35 +08:00
|
|
|
private async convertPlanToExecutable(
|
|
|
|
plans: PlanningAction[],
|
|
|
|
cacheGroup?: ReturnType<TaskCache['getCacheGroupByPrompt']>,
|
|
|
|
) {
|
2024-12-08 20:12:17 +08:00
|
|
|
const tasks: ExecutionTaskApply[] = [];
|
|
|
|
plans.forEach((plan) => {
|
2024-11-05 14:28:16 +08:00
|
|
|
if (plan.type === 'Locate') {
|
2024-12-08 20:12:17 +08:00
|
|
|
if (plan.locate?.id === null || plan.locate?.id === 'null') {
|
|
|
|
// console.warn('Locate action with id is null, will be ignored');
|
|
|
|
return;
|
|
|
|
}
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskFind: ExecutionTaskInsightLocateApply = {
|
|
|
|
type: 'Insight',
|
|
|
|
subType: 'Locate',
|
2024-12-08 20:12:17 +08:00
|
|
|
param: plan.locate || undefined,
|
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (param, taskContext) => {
|
|
|
|
const { task } = taskContext;
|
2024-12-08 20:12:17 +08:00
|
|
|
assert(param?.prompt || param?.id, 'No prompt or id to locate');
|
2024-11-05 14:28:16 +08:00
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
|
|
const shotTime = Date.now();
|
2024-12-08 20:12:17 +08:00
|
|
|
const pageContext = await this.insight.contextRetrieverFn('locate');
|
2024-11-05 14:28:16 +08:00
|
|
|
const recordItem: ExecutionRecorderItem = {
|
|
|
|
type: 'screenshot',
|
|
|
|
ts: shotTime,
|
|
|
|
screenshot: pageContext.screenshotBase64,
|
|
|
|
timing: 'before locate',
|
|
|
|
};
|
|
|
|
|
|
|
|
const locateCache = cacheGroup?.readCache(
|
|
|
|
pageContext,
|
|
|
|
'locate',
|
|
|
|
param.prompt,
|
|
|
|
);
|
|
|
|
let locateResult: AIElementIdResponse | undefined;
|
|
|
|
const callAI = this.insight.aiVendorFn;
|
|
|
|
const element = await this.insight.locate(param.prompt, {
|
2024-12-08 20:12:17 +08:00
|
|
|
quickAnswer: param?.id
|
|
|
|
? {
|
|
|
|
id: param.id,
|
|
|
|
}
|
|
|
|
: undefined,
|
2024-11-05 14:28:16 +08:00
|
|
|
callAI: async (...message: any) => {
|
|
|
|
if (locateCache) {
|
|
|
|
locateResult = locateCache;
|
2024-12-08 20:12:17 +08:00
|
|
|
return Promise.resolve({ content: locateCache });
|
2024-11-05 14:28:16 +08:00
|
|
|
}
|
2024-12-08 20:12:17 +08:00
|
|
|
const { content: aiResult, usage } = await callAI(...message);
|
2025-01-02 21:23:30 +08:00
|
|
|
// locateResult = transformElementPositionToId(
|
|
|
|
// aiResult,
|
|
|
|
// pageContext.content,
|
|
|
|
// size,
|
|
|
|
// );
|
|
|
|
// assert(locateResult);
|
|
|
|
return { content: aiResult, usage };
|
2024-11-05 14:28:16 +08:00
|
|
|
},
|
|
|
|
});
|
|
|
|
|
|
|
|
if (locateResult) {
|
|
|
|
cacheGroup?.saveCache({
|
|
|
|
type: 'locate',
|
|
|
|
pageContext: {
|
|
|
|
url: pageContext.url,
|
|
|
|
size: pageContext.size,
|
2024-08-01 15:46:40 +08:00
|
|
|
},
|
2024-11-05 14:28:16 +08:00
|
|
|
prompt: param.prompt,
|
|
|
|
response: locateResult,
|
2024-08-01 15:46:40 +08:00
|
|
|
});
|
2024-11-05 14:28:16 +08:00
|
|
|
}
|
|
|
|
if (!element) {
|
|
|
|
task.log = {
|
|
|
|
dump: insightDump,
|
|
|
|
};
|
|
|
|
throw new Error(`Element not found: ${param.prompt}`);
|
|
|
|
}
|
2024-08-01 15:46:40 +08:00
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
return {
|
|
|
|
output: {
|
|
|
|
element,
|
|
|
|
},
|
|
|
|
log: {
|
|
|
|
dump: insightDump,
|
|
|
|
},
|
|
|
|
cache: {
|
|
|
|
hit: Boolean(locateCache),
|
|
|
|
},
|
|
|
|
recorder: [recordItem],
|
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskFind);
|
|
|
|
} else if (plan.type === 'Assert' || plan.type === 'AssertWithoutThrow') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const assertPlan = plan as PlanningAction<PlanningActionParamAssert>;
|
|
|
|
const taskAssert: ExecutionTaskApply = {
|
|
|
|
type: 'Insight',
|
|
|
|
subType: 'Assert',
|
|
|
|
param: assertPlan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: assertPlan.thought,
|
|
|
|
locate: assertPlan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (param, taskContext) => {
|
|
|
|
const { task } = taskContext;
|
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
|
|
|
const assertion = await this.insight.assert(
|
|
|
|
assertPlan.param.assertion,
|
|
|
|
);
|
|
|
|
|
|
|
|
if (!assertion.pass) {
|
|
|
|
if (plan.type === 'Assert') {
|
|
|
|
task.output = assertion;
|
2024-08-28 19:21:32 +08:00
|
|
|
task.log = {
|
|
|
|
dump: insightDump,
|
|
|
|
};
|
2024-11-05 14:28:16 +08:00
|
|
|
throw new Error(
|
|
|
|
assertion.thought || 'Assertion failed without reason',
|
|
|
|
);
|
2024-08-28 19:21:32 +08:00
|
|
|
}
|
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
task.error = assertion.thought;
|
|
|
|
}
|
2024-08-06 10:00:25 +08:00
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
return {
|
|
|
|
output: assertion,
|
|
|
|
log: {
|
|
|
|
dump: insightDump,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskAssert);
|
|
|
|
} else if (plan.type === 'Input') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionInput: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Input',
|
|
|
|
param: plan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (taskParam, { element }) => {
|
|
|
|
if (element) {
|
|
|
|
await this.page.clearInput(element as ElementInfo);
|
|
|
|
|
2024-11-25 16:05:01 +08:00
|
|
|
if (!taskParam || !taskParam.value) {
|
2024-11-05 14:28:16 +08:00
|
|
|
return;
|
2024-08-21 14:43:35 +08:00
|
|
|
}
|
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
await this.page.keyboard.type(taskParam.value);
|
2024-08-10 07:57:15 +08:00
|
|
|
}
|
2024-08-06 10:00:25 +08:00
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionInput);
|
|
|
|
} else if (plan.type === 'KeyboardPress') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionKeyboardPress: ExecutionTaskActionApply<PlanningActionParamInputOrKeyPress> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'KeyboardPress',
|
|
|
|
param: plan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (taskParam) => {
|
2024-11-25 16:05:01 +08:00
|
|
|
assert(taskParam?.value, 'No key to press');
|
2024-11-05 14:28:16 +08:00
|
|
|
await this.page.keyboard.press(taskParam.value as KeyInput);
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionKeyboardPress);
|
|
|
|
} else if (plan.type === 'Tap') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionTap: ExecutionTaskActionApply<PlanningActionParamTap> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Tap',
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (param, { element }) => {
|
|
|
|
assert(element, 'Element not found, cannot tap');
|
|
|
|
await this.page.mouse.click(element.center[0], element.center[1]);
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionTap);
|
|
|
|
} else if (plan.type === 'Hover') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Hover',
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (param, { element }) => {
|
|
|
|
assert(element, 'Element not found, cannot hover');
|
|
|
|
await this.page.mouse.move(element.center[0], element.center[1]);
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionHover);
|
|
|
|
} else if (plan.type === 'Scroll') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionScroll: ExecutionTaskActionApply<PlanningActionParamScroll> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Scroll',
|
|
|
|
param: plan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-12-19 10:44:08 +08:00
|
|
|
executor: async (taskParam, { element }) => {
|
2024-12-23 12:11:10 +08:00
|
|
|
const startingPoint = element
|
|
|
|
? {
|
|
|
|
left: element.center[0],
|
|
|
|
top: element.center[1],
|
|
|
|
}
|
|
|
|
: undefined;
|
2024-12-19 10:44:08 +08:00
|
|
|
const scrollToEventName = taskParam?.scrollType;
|
|
|
|
if (scrollToEventName === 'untilTop') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollUntilTop(startingPoint);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (scrollToEventName === 'untilBottom') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollUntilBottom(startingPoint);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (scrollToEventName === 'untilRight') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollUntilRight(startingPoint);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (scrollToEventName === 'untilLeft') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollUntilLeft(startingPoint);
|
2025-01-02 10:19:22 +08:00
|
|
|
} else if (scrollToEventName === 'once' || !scrollToEventName) {
|
|
|
|
if (taskParam.direction === 'down' || !taskParam.direction) {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollDown(
|
|
|
|
taskParam.distance || undefined,
|
|
|
|
startingPoint,
|
|
|
|
);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (taskParam.direction === 'up') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollUp(
|
|
|
|
taskParam.distance || undefined,
|
|
|
|
startingPoint,
|
|
|
|
);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (taskParam.direction === 'left') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollLeft(
|
|
|
|
taskParam.distance || undefined,
|
|
|
|
startingPoint,
|
|
|
|
);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (taskParam.direction === 'right') {
|
2024-12-23 12:11:10 +08:00
|
|
|
await this.page.scrollRight(
|
|
|
|
taskParam.distance || undefined,
|
|
|
|
startingPoint,
|
|
|
|
);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else {
|
|
|
|
throw new Error(
|
|
|
|
`Unknown scroll direction: ${taskParam.direction}`,
|
2024-11-05 14:28:16 +08:00
|
|
|
);
|
2024-12-19 10:44:08 +08:00
|
|
|
}
|
2024-12-23 12:11:10 +08:00
|
|
|
// until mouse event is done
|
|
|
|
await sleep(500);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else {
|
|
|
|
throw new Error(
|
|
|
|
`Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
|
|
|
|
taskParam,
|
|
|
|
)}`,
|
|
|
|
);
|
2024-11-05 14:28:16 +08:00
|
|
|
}
|
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionScroll);
|
|
|
|
} else if (plan.type === 'Sleep') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionSleep: ExecutionTaskActionApply<PlanningActionParamSleep> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Sleep',
|
|
|
|
param: plan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-11-05 14:28:16 +08:00
|
|
|
executor: async (taskParam) => {
|
2024-11-25 16:05:01 +08:00
|
|
|
await sleep(taskParam?.timeMs || 3000);
|
2024-11-05 14:28:16 +08:00
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionSleep);
|
|
|
|
} else if (plan.type === 'Error') {
|
2024-11-05 14:28:16 +08:00
|
|
|
const taskActionError: ExecutionTaskActionApply<PlanningActionParamError> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'Error',
|
|
|
|
param: plan.param,
|
2024-12-08 20:12:17 +08:00
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
2024-12-16 15:04:21 +08:00
|
|
|
executor: async () => {
|
|
|
|
throw new Error(plan?.thought || 'error without thought');
|
2024-11-05 14:28:16 +08:00
|
|
|
},
|
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
tasks.push(taskActionError);
|
2024-12-19 10:44:08 +08:00
|
|
|
} else if (plan.type === 'FalsyConditionStatement') {
|
|
|
|
const taskActionFalsyConditionStatement: ExecutionTaskActionApply<null> =
|
|
|
|
{
|
|
|
|
type: 'Action',
|
|
|
|
subType: 'FalsyConditionStatement',
|
|
|
|
param: null,
|
|
|
|
thought: plan.thought,
|
|
|
|
locate: plan.locate,
|
|
|
|
executor: async () => {
|
|
|
|
// console.warn(`[warn]falsy condition: ${plan.thought}`);
|
|
|
|
},
|
|
|
|
};
|
|
|
|
tasks.push(taskActionFalsyConditionStatement);
|
2024-12-08 20:12:17 +08:00
|
|
|
} else {
|
|
|
|
throw new Error(`Unknown or unsupported task type: ${plan.type}`);
|
2024-11-05 14:28:16 +08:00
|
|
|
}
|
|
|
|
});
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-11-25 16:05:01 +08:00
|
|
|
const wrappedTasks = tasks.map(
|
2024-11-05 14:28:16 +08:00
|
|
|
(task: ExecutionTaskApply, index: number) => {
|
|
|
|
if (task.type === 'Action') {
|
|
|
|
return this.prependExecutorWithScreenshot(
|
|
|
|
task,
|
|
|
|
index === tasks.length - 1,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
return task;
|
|
|
|
},
|
|
|
|
);
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
return {
|
|
|
|
tasks: wrappedTasks,
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
private planningTaskFromPrompt(
|
|
|
|
userPrompt: string,
|
|
|
|
cacheGroup: ReturnType<TaskCache['getCacheGroupByPrompt']>,
|
|
|
|
whatHaveDone?: string,
|
|
|
|
originalPrompt?: string,
|
|
|
|
) {
|
|
|
|
const task: ExecutionTaskPlanningApply = {
|
2024-07-23 16:25:11 +08:00
|
|
|
type: 'Planning',
|
2024-12-08 20:12:17 +08:00
|
|
|
locate: null,
|
2024-07-23 16:25:11 +08:00
|
|
|
param: {
|
|
|
|
userPrompt,
|
2024-12-08 20:12:17 +08:00
|
|
|
whatHaveDone,
|
|
|
|
originalPrompt,
|
2024-07-23 16:25:11 +08:00
|
|
|
},
|
2024-12-08 20:12:17 +08:00
|
|
|
executor: async (param, executorContext) => {
|
2024-11-05 14:28:16 +08:00
|
|
|
const shotTime = Date.now();
|
2024-12-08 20:12:17 +08:00
|
|
|
const pageContext = await this.insight.contextRetrieverFn('locate');
|
2024-11-05 14:28:16 +08:00
|
|
|
const recordItem: ExecutionRecorderItem = {
|
|
|
|
type: 'screenshot',
|
|
|
|
ts: shotTime,
|
|
|
|
screenshot: pageContext.screenshotBase64,
|
|
|
|
timing: 'before planning',
|
|
|
|
};
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
executorContext.task.recorder = [recordItem];
|
|
|
|
(executorContext.task as any).pageContext = pageContext;
|
|
|
|
|
2024-09-06 17:19:35 +08:00
|
|
|
const planCache = cacheGroup.readCache(pageContext, 'plan', userPrompt);
|
2024-12-08 20:12:17 +08:00
|
|
|
let planResult: Awaited<ReturnType<typeof plan>>;
|
2024-08-01 15:46:40 +08:00
|
|
|
if (planCache) {
|
2024-12-08 20:12:17 +08:00
|
|
|
// console.log('planCache', planCache);
|
2024-08-01 15:46:40 +08:00
|
|
|
planResult = planCache;
|
|
|
|
} else {
|
|
|
|
planResult = await plan(param.userPrompt, {
|
|
|
|
context: pageContext,
|
2024-12-08 20:12:17 +08:00
|
|
|
whatHaveDone: param.whatHaveDone,
|
|
|
|
originalPrompt: param.originalPrompt,
|
2024-08-01 15:46:40 +08:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
const { actions, furtherPlan, taskWillBeAccomplished } = planResult;
|
|
|
|
// console.log('actions', taskWillBeAccomplished, actions, furtherPlan);
|
|
|
|
|
|
|
|
let stopCollecting = false;
|
|
|
|
const finalActions = actions.reduce<PlanningAction[]>(
|
|
|
|
(acc, planningAction) => {
|
|
|
|
if (stopCollecting) {
|
|
|
|
return acc;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (planningAction.locate) {
|
|
|
|
acc.push({
|
|
|
|
type: 'Locate',
|
|
|
|
locate: planningAction.locate,
|
2024-12-16 15:04:21 +08:00
|
|
|
// remove id from planning, since the result is not accurate
|
|
|
|
// locate: {
|
|
|
|
// prompt: planningAction.locate.prompt,
|
|
|
|
// },
|
2024-12-08 20:12:17 +08:00
|
|
|
param: null,
|
|
|
|
thought: planningAction.locate.prompt,
|
|
|
|
});
|
|
|
|
} else if (
|
|
|
|
['Tap', 'Hover', 'Input'].includes(planningAction.type)
|
|
|
|
) {
|
|
|
|
// should include locate but get null
|
|
|
|
stopCollecting = true;
|
|
|
|
return acc;
|
|
|
|
}
|
|
|
|
acc.push(planningAction);
|
|
|
|
return acc;
|
|
|
|
},
|
|
|
|
[],
|
|
|
|
);
|
|
|
|
|
|
|
|
assert(finalActions.length > 0, 'No plans found');
|
2024-08-01 15:46:40 +08:00
|
|
|
|
2024-09-06 17:19:35 +08:00
|
|
|
cacheGroup.saveCache({
|
2024-08-01 15:46:40 +08:00
|
|
|
type: 'plan',
|
|
|
|
pageContext: {
|
|
|
|
url: pageContext.url,
|
|
|
|
size: pageContext.size,
|
|
|
|
},
|
|
|
|
prompt: userPrompt,
|
|
|
|
response: planResult,
|
|
|
|
});
|
2024-11-05 14:28:16 +08:00
|
|
|
|
2024-07-23 16:25:11 +08:00
|
|
|
return {
|
2024-12-08 20:12:17 +08:00
|
|
|
output: {
|
|
|
|
actions: finalActions,
|
|
|
|
taskWillBeAccomplished: false,
|
|
|
|
furtherPlan,
|
|
|
|
},
|
2024-08-01 15:46:40 +08:00
|
|
|
cache: {
|
2024-08-02 13:58:15 +08:00
|
|
|
hit: Boolean(planCache),
|
2024-08-01 15:46:40 +08:00
|
|
|
},
|
2024-12-08 20:12:17 +08:00
|
|
|
pageContext, // ?
|
2024-11-05 14:28:16 +08:00
|
|
|
recorder: [recordItem],
|
2024-07-23 16:25:11 +08:00
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
return task;
|
|
|
|
}
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-12-16 15:04:21 +08:00
|
|
|
async action(
|
|
|
|
userPrompt: string,
|
|
|
|
options?: ExecutionTaskProgressOptions,
|
|
|
|
): Promise<ExecutionResult> {
|
|
|
|
const taskExecutor = new Executor(userPrompt, undefined, undefined, {
|
|
|
|
onTaskStart: options?.onTaskStart,
|
|
|
|
});
|
2024-12-08 20:12:17 +08:00
|
|
|
|
|
|
|
const cacheGroup = this.taskCache.getCacheGroupByPrompt(userPrompt);
|
|
|
|
const originalPrompt = userPrompt;
|
|
|
|
let planningTask: ExecutionTaskPlanningApply | null =
|
|
|
|
this.planningTaskFromPrompt(originalPrompt, cacheGroup);
|
|
|
|
let result: any;
|
|
|
|
let replanCount = 0;
|
|
|
|
while (planningTask) {
|
|
|
|
if (replanCount > 5) {
|
|
|
|
const errorMsg =
|
|
|
|
'Replanning too many times, please split the task into multiple steps';
|
|
|
|
|
|
|
|
return this.appendErrorPlan(taskExecutor, errorMsg);
|
|
|
|
}
|
|
|
|
|
2024-12-19 10:44:08 +08:00
|
|
|
if (replanCount > 0) {
|
|
|
|
// add a brief sleep to wait for the page to be ready
|
|
|
|
await sleep(300);
|
|
|
|
}
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
// plan
|
|
|
|
await taskExecutor.append(planningTask);
|
|
|
|
const planResult: PlanningAIResponse = await taskExecutor.flush();
|
|
|
|
if (taskExecutor.isInErrorState()) {
|
|
|
|
return {
|
|
|
|
output: planResult,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
const plans = planResult.actions;
|
|
|
|
|
|
|
|
let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;
|
|
|
|
try {
|
|
|
|
executables = await this.convertPlanToExecutable(plans, cacheGroup);
|
|
|
|
taskExecutor.append(executables.tasks);
|
|
|
|
} catch (error) {
|
|
|
|
return this.appendErrorPlan(
|
|
|
|
taskExecutor,
|
|
|
|
`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(
|
|
|
|
plans,
|
|
|
|
)}`,
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
result = await taskExecutor.flush();
|
|
|
|
if (taskExecutor.isInErrorState()) {
|
|
|
|
return {
|
|
|
|
output: result,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
if (planResult.furtherPlan?.whatToDoNext) {
|
|
|
|
planningTask = this.planningTaskFromPrompt(
|
|
|
|
planResult.furtherPlan.whatToDoNext,
|
|
|
|
cacheGroup,
|
|
|
|
planResult.furtherPlan.whatHaveDone,
|
|
|
|
originalPrompt,
|
|
|
|
);
|
|
|
|
replanCount++;
|
|
|
|
} else {
|
|
|
|
planningTask = null;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2024-07-23 16:25:11 +08:00
|
|
|
|
2024-08-10 07:57:15 +08:00
|
|
|
return {
|
2024-12-08 20:12:17 +08:00
|
|
|
output: result,
|
2024-08-10 07:57:15 +08:00
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|
2024-07-25 10:47:02 +08:00
|
|
|
|
2025-01-10 10:55:41 +08:00
|
|
|
async query(
|
|
|
|
demand: InsightExtractParam,
|
|
|
|
options?: ExecutionTaskProgressOptions,
|
|
|
|
): Promise<ExecutionResult> {
|
2024-08-10 07:57:15 +08:00
|
|
|
const description =
|
|
|
|
typeof demand === 'string' ? demand : JSON.stringify(demand);
|
2025-01-10 10:55:41 +08:00
|
|
|
const taskExecutor = new Executor(description, undefined, undefined, {
|
|
|
|
onTaskStart: options?.onTaskStart,
|
|
|
|
});
|
2024-07-25 10:47:02 +08:00
|
|
|
const queryTask: ExecutionTaskInsightQueryApply = {
|
|
|
|
type: 'Insight',
|
2024-07-25 13:40:46 +08:00
|
|
|
subType: 'Query',
|
2024-12-08 20:12:17 +08:00
|
|
|
locate: null,
|
2024-07-25 10:47:02 +08:00
|
|
|
param: {
|
|
|
|
dataDemand: demand,
|
|
|
|
},
|
|
|
|
executor: async (param) => {
|
|
|
|
let insightDump: InsightDump | undefined;
|
|
|
|
const dumpCollector: DumpSubscriber = (dump) => {
|
|
|
|
insightDump = dump;
|
|
|
|
};
|
|
|
|
this.insight.onceDumpUpdatedFn = dumpCollector;
|
2024-08-10 07:57:15 +08:00
|
|
|
const data = await this.insight.extract<any>(param.dataDemand);
|
2024-07-25 10:47:02 +08:00
|
|
|
return {
|
|
|
|
output: data,
|
|
|
|
log: { dump: insightDump },
|
|
|
|
};
|
|
|
|
},
|
|
|
|
};
|
2024-08-10 07:57:15 +08:00
|
|
|
|
2024-11-05 14:28:16 +08:00
|
|
|
await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));
|
2024-08-10 07:57:15 +08:00
|
|
|
const output = await taskExecutor.flush();
|
|
|
|
return {
|
|
|
|
output,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
2024-07-25 10:47:02 +08:00
|
|
|
}
|
2024-08-06 10:00:25 +08:00
|
|
|
|
2024-08-10 07:57:15 +08:00
|
|
|
async assert(
|
|
|
|
assertion: string,
|
2025-01-10 10:55:41 +08:00
|
|
|
options?: ExecutionTaskProgressOptions,
|
2024-08-10 07:57:15 +08:00
|
|
|
): Promise<ExecutionResult<InsightAssertionResponse>> {
|
2024-08-21 14:43:35 +08:00
|
|
|
const description = `assert: ${assertion}`;
|
2025-01-10 10:55:41 +08:00
|
|
|
const taskExecutor = new Executor(description, undefined, undefined, {
|
|
|
|
onTaskStart: options?.onTaskStart,
|
|
|
|
});
|
2024-08-06 10:00:25 +08:00
|
|
|
const assertionPlan: PlanningAction<PlanningActionParamAssert> = {
|
|
|
|
type: 'Assert',
|
|
|
|
param: {
|
|
|
|
assertion,
|
|
|
|
},
|
2024-12-08 20:12:17 +08:00
|
|
|
locate: null,
|
2024-08-06 10:00:25 +08:00
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
const { tasks } = await this.convertPlanToExecutable([assertionPlan]);
|
2024-08-06 10:00:25 +08:00
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
|
2024-08-10 07:57:15 +08:00
|
|
|
const output: InsightAssertionResponse = await taskExecutor.flush();
|
2024-08-06 10:00:25 +08:00
|
|
|
|
2024-08-10 07:57:15 +08:00
|
|
|
return {
|
|
|
|
output,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
2024-08-06 10:00:25 +08:00
|
|
|
}
|
2024-08-21 14:43:35 +08:00
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
private async appendErrorPlan(taskExecutor: Executor, errorMsg: string) {
|
|
|
|
const errorPlan: PlanningAction<PlanningActionParamError> = {
|
|
|
|
type: 'Error',
|
|
|
|
param: {
|
|
|
|
thought: errorMsg,
|
|
|
|
},
|
|
|
|
locate: null,
|
|
|
|
};
|
|
|
|
const { tasks } = await this.convertPlanToExecutable([errorPlan]);
|
|
|
|
await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));
|
|
|
|
await taskExecutor.flush();
|
|
|
|
|
|
|
|
return {
|
|
|
|
output: undefined,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2024-08-21 14:43:35 +08:00
|
|
|
async waitFor(
|
|
|
|
assertion: string,
|
|
|
|
opt: PlanningActionParamWaitFor,
|
|
|
|
): Promise<ExecutionResult<void>> {
|
|
|
|
const description = `waitFor: ${assertion}`;
|
|
|
|
const taskExecutor = new Executor(description);
|
|
|
|
const { timeoutMs, checkIntervalMs } = opt;
|
|
|
|
|
|
|
|
assert(assertion, 'No assertion for waitFor');
|
|
|
|
assert(timeoutMs, 'No timeoutMs for waitFor');
|
|
|
|
assert(checkIntervalMs, 'No checkIntervalMs for waitFor');
|
|
|
|
|
|
|
|
const overallStartTime = Date.now();
|
|
|
|
let startTime = Date.now();
|
|
|
|
let errorThought = '';
|
|
|
|
while (Date.now() - overallStartTime < timeoutMs) {
|
|
|
|
startTime = Date.now();
|
|
|
|
const assertPlan: PlanningAction<PlanningActionParamAssert> = {
|
|
|
|
type: 'AssertWithoutThrow',
|
|
|
|
param: {
|
|
|
|
assertion,
|
|
|
|
},
|
2024-12-08 20:12:17 +08:00
|
|
|
locate: null,
|
2024-08-21 14:43:35 +08:00
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
const { tasks: assertTasks } = await this.convertPlanToExecutable([
|
|
|
|
assertPlan,
|
|
|
|
]);
|
2024-11-05 14:28:16 +08:00
|
|
|
await taskExecutor.append(
|
2024-12-08 20:12:17 +08:00
|
|
|
this.prependExecutorWithScreenshot(assertTasks[0]),
|
2024-11-05 14:28:16 +08:00
|
|
|
);
|
2024-08-21 14:43:35 +08:00
|
|
|
const output: InsightAssertionResponse = await taskExecutor.flush();
|
|
|
|
|
2024-10-12 12:09:25 +08:00
|
|
|
if (output?.pass) {
|
2024-08-21 14:43:35 +08:00
|
|
|
return {
|
|
|
|
output: undefined,
|
|
|
|
executor: taskExecutor,
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2024-10-12 12:09:25 +08:00
|
|
|
errorThought = output?.thought || 'unknown error';
|
2024-08-21 14:43:35 +08:00
|
|
|
const now = Date.now();
|
|
|
|
if (now - startTime < checkIntervalMs) {
|
|
|
|
const timeRemaining = checkIntervalMs - (now - startTime);
|
|
|
|
const sleepPlan: PlanningAction<PlanningActionParamSleep> = {
|
|
|
|
type: 'Sleep',
|
|
|
|
param: {
|
|
|
|
timeMs: timeRemaining,
|
|
|
|
},
|
2024-12-08 20:12:17 +08:00
|
|
|
locate: null,
|
2024-08-21 14:43:35 +08:00
|
|
|
};
|
2024-12-08 20:12:17 +08:00
|
|
|
const { tasks: sleepTasks } = await this.convertPlanToExecutable([
|
|
|
|
sleepPlan,
|
|
|
|
]);
|
2024-08-21 14:43:35 +08:00
|
|
|
await taskExecutor.append(
|
2024-12-08 20:12:17 +08:00
|
|
|
this.prependExecutorWithScreenshot(sleepTasks[0]),
|
2024-08-21 14:43:35 +08:00
|
|
|
);
|
|
|
|
await taskExecutor.flush();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-12-08 20:12:17 +08:00
|
|
|
return this.appendErrorPlan(
|
|
|
|
taskExecutor,
|
|
|
|
`waitFor timeout: ${errorThought}`,
|
|
|
|
);
|
2024-08-21 14:43:35 +08:00
|
|
|
}
|
2024-07-23 16:25:11 +08:00
|
|
|
}
|