mirror of
https://github.com/web-infra-dev/midscene.git
synced 2025-12-28 15:39:01 +00:00
feat: support the if-statement in planning prompt (#184)
This commit is contained in:
parent
6e54c153de
commit
523adab12f
@ -1,6 +1,6 @@
|
||||
# Customize Model Provider
|
||||
# Customize Model and Provider
|
||||
|
||||
Midscene uses the OpenAI SDK as the default AI service. You can customize the configuration using environment variables.
|
||||
Midscene uses the OpenAI SDK to call AI services. You can customize the configuration using environment variables.
|
||||
|
||||
There are the main configs, in which `OPENAI_API_KEY` is required.
|
||||
|
||||
@ -29,3 +29,10 @@ export MIDSCENE_OPENAI_INIT_CONFIG_JSON='{"baseURL":"....","defaultHeaders":{"ke
|
||||
# if you want to use proxy. Midscene uses `socks-proxy-agent` under the hood.
|
||||
export MIDSCENE_OPENAI_SOCKS_PROXY="socks5://127.0.0.1:1080"
|
||||
```
|
||||
|
||||
Note:
|
||||
|
||||
- Always choose a model that supports vision input. Currently, the known supported models are:
|
||||
- OpenAI: `gpt-4o`
|
||||
- Aliyun: `qwen-vl-max-latest`
|
||||
- Please follow the terms of use of each model.
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
# 自定义模型服务
|
||||
# 自定义模型和服务商
|
||||
|
||||
Midscene 默认集成了 OpenAI SDK 调用 AI 服务,你也可以通过环境变量来自定义配置。
|
||||
Midscene 默认集成了 OpenAI SDK 调用 AI 服务,你可以通过环境变量来自定义配置。
|
||||
|
||||
主要配置项如下,其中 `OPENAI_API_KEY` 是必选项:
|
||||
|
||||
@ -29,3 +29,10 @@ export MIDSCENE_OPENAI_INIT_CONFIG_JSON='{"baseURL":"....","defaultHeaders":{"ke
|
||||
# 可选, 如果你想使用代理。Midscene 使用 `socks-proxy-agent` 作为底层库。
|
||||
export MIDSCENE_OPENAI_SOCKS_PROXY="socks5://127.0.0.1:1080"
|
||||
```
|
||||
|
||||
说明:
|
||||
|
||||
- 务必选择一个支持视觉输入的模型。目前我们已知支持的模型有:
|
||||
- OpenAI: `gpt-4o`
|
||||
- 阿里云: `qwen-vl-max-latest`
|
||||
- 请遵守各项模型的使用条款
|
||||
|
||||
@ -81,7 +81,7 @@ export default defineConfig({
|
||||
link: '/cache',
|
||||
},
|
||||
{
|
||||
text: 'Customize Model Provider',
|
||||
text: 'Customize Model and Provider',
|
||||
link: '/model-provider',
|
||||
},
|
||||
],
|
||||
@ -138,7 +138,7 @@ export default defineConfig({
|
||||
link: '/zh/cache',
|
||||
},
|
||||
{
|
||||
text: '自定义模型服务',
|
||||
text: '自定义模型和服务商',
|
||||
link: '/zh/model-provider',
|
||||
},
|
||||
],
|
||||
|
||||
@ -17,7 +17,6 @@ export async function plan(
|
||||
context: UIContext;
|
||||
callAI?: typeof callAiFn<PlanningAIResponse>;
|
||||
},
|
||||
useModel?: 'coze' | 'openAI',
|
||||
): Promise<PlanningAIResponse> {
|
||||
const { callAI, context } = opts || {};
|
||||
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
||||
@ -56,7 +55,7 @@ ${opts.whatHaveDone}
|
||||
pageDescription:\n
|
||||
${pageDescription}
|
||||
\n
|
||||
Here is what you need to do now:
|
||||
Here is the instruction:
|
||||
=====================================
|
||||
${userPrompt}
|
||||
=====================================
|
||||
@ -72,7 +71,6 @@ ${taskBackgroundContext}
|
||||
const { content, usage } = await call({
|
||||
msgs,
|
||||
AIActionType: AIActionType.PLAN,
|
||||
useModel,
|
||||
});
|
||||
|
||||
const planFromAI = content;
|
||||
|
||||
@ -5,15 +5,6 @@ import type {
|
||||
ChatCompletionSystemMessageParam,
|
||||
ChatCompletionUserMessageParam,
|
||||
} from 'openai/resources';
|
||||
import {
|
||||
COZE_AI_ACTION_BOT_ID,
|
||||
COZE_AI_ASSERT_BOT_ID,
|
||||
COZE_EXTRACT_INFO_BOT_ID,
|
||||
COZE_INSPECT_ELEMENT_BOT_ID,
|
||||
callCozeAi,
|
||||
preferCozeModel,
|
||||
transformOpenAiArgsToCoze,
|
||||
} from './coze';
|
||||
import { callToGetJSONObject, preferOpenAIModel } from './openai';
|
||||
|
||||
export type AIArgs = [
|
||||
@ -31,10 +22,9 @@ export enum AIActionType {
|
||||
export async function callAiFn<T>(options: {
|
||||
msgs: AIArgs;
|
||||
AIActionType: AIActionType;
|
||||
useModel?: 'openAI' | 'coze';
|
||||
}): Promise<{ content: T; usage?: AIUsageInfo }> {
|
||||
const { useModel, msgs, AIActionType: AIActionTypeValue } = options;
|
||||
if (preferOpenAIModel(useModel)) {
|
||||
const { msgs, AIActionType: AIActionTypeValue } = options;
|
||||
if (preferOpenAIModel('openAI')) {
|
||||
const { content, usage } = await callToGetJSONObject<T>(
|
||||
msgs,
|
||||
AIActionTypeValue,
|
||||
@ -42,29 +32,6 @@ export async function callAiFn<T>(options: {
|
||||
return { content, usage };
|
||||
}
|
||||
|
||||
// if (preferCozeModel(useModel)) {
|
||||
// let botId = '';
|
||||
// switch (AIActionTypeValue) {
|
||||
// case AIActionType.ASSERT:
|
||||
// botId = COZE_AI_ASSERT_BOT_ID;
|
||||
// break;
|
||||
// case AIActionType.EXTRACT_DATA:
|
||||
// botId = COZE_EXTRACT_INFO_BOT_ID;
|
||||
// break;
|
||||
// case AIActionType.INSPECT_ELEMENT:
|
||||
// botId = COZE_INSPECT_ELEMENT_BOT_ID;
|
||||
// break;
|
||||
// default:
|
||||
// botId = COZE_AI_ACTION_BOT_ID;
|
||||
// }
|
||||
// const cozeMsg = transformOpenAiArgsToCoze(msgs[1]);
|
||||
// const parseResult = await callCozeAi<T>({
|
||||
// ...cozeMsg,
|
||||
// botId,
|
||||
// });
|
||||
// return parseResult;
|
||||
// }
|
||||
|
||||
throw Error(
|
||||
'Cannot find OpenAI config. You should set it before using. https://midscenejs.com/model-provider.html',
|
||||
);
|
||||
|
||||
@ -67,8 +67,7 @@ export async function AiInspectElement<
|
||||
useModel?: 'coze' | 'openAI';
|
||||
quickAnswer?: AISingleElementResponse;
|
||||
}) {
|
||||
const { context, multi, targetElementDescription, callAI, useModel } =
|
||||
options;
|
||||
const { context, multi, targetElementDescription, callAI } = options;
|
||||
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
||||
const { description, elementById, elementByPosition } =
|
||||
await describeUserPage(context);
|
||||
@ -152,7 +151,6 @@ ${JSON.stringify({
|
||||
const res = await callAI({
|
||||
msgs,
|
||||
AIActionType: AIActionType.INSPECT_ELEMENT,
|
||||
useModel,
|
||||
});
|
||||
return {
|
||||
parseResult: transformElementPositionToId(res.content, context.content),
|
||||
@ -165,7 +163,6 @@ ${JSON.stringify({
|
||||
const inspectElement = await callAiFn<AIElementResponse>({
|
||||
msgs,
|
||||
AIActionType: AIActionType.INSPECT_ELEMENT,
|
||||
useModel,
|
||||
});
|
||||
|
||||
return {
|
||||
@ -231,7 +228,6 @@ DATA_DEMAND ends.
|
||||
|
||||
const result = await callAiFn<AISectionParseResponse<T>>({
|
||||
msgs,
|
||||
useModel,
|
||||
AIActionType: AIActionType.EXTRACT_DATA,
|
||||
});
|
||||
return {
|
||||
@ -285,7 +281,6 @@ export async function AiAssert<
|
||||
const { content: assertResult, usage } = await callAiFn<AIAssertionResponse>({
|
||||
msgs,
|
||||
AIActionType: AIActionType.ASSERT,
|
||||
useModel,
|
||||
});
|
||||
return {
|
||||
content: assertResult,
|
||||
|
||||
@ -118,13 +118,12 @@ export async function callToGetJSONObject<T>(
|
||||
// gpt-4o-2024-05-13 only supports json_object response format
|
||||
let responseFormat:
|
||||
| OpenAI.ChatCompletionCreateParams['response_format']
|
||||
| OpenAI.ResponseFormatJSONObject = {
|
||||
type: AIResponseFormat.JSON,
|
||||
};
|
||||
| OpenAI.ResponseFormatJSONObject
|
||||
| undefined;
|
||||
|
||||
const model = getModelName();
|
||||
|
||||
if (model === 'gpt-4o-2024-08-06') {
|
||||
if (model.includes('gpt-4o')) {
|
||||
switch (AIActionTypeValue) {
|
||||
case AIActionType.ASSERT:
|
||||
responseFormat = assertSchema;
|
||||
@ -140,10 +139,10 @@ export async function callToGetJSONObject<T>(
|
||||
responseFormat = planSchema;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (model.startsWith('gemini')) {
|
||||
responseFormat = { type: AIResponseFormat.TEXT };
|
||||
if (model === 'gpt-4o-2024-05-13') {
|
||||
responseFormat = { type: AIResponseFormat.JSON };
|
||||
}
|
||||
}
|
||||
|
||||
const safeJsonParse = (input: string) => {
|
||||
@ -162,7 +161,7 @@ export async function callToGetJSONObject<T>(
|
||||
try {
|
||||
return { content: JSON.parse(jsonContent), usage: response.usage };
|
||||
} catch {
|
||||
throw Error(`parse json error: ${response.content}`);
|
||||
throw Error(`failed to parse json response: ${response.content}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -34,14 +34,14 @@ You are a versatile professional in software UI automation. Your outstanding con
|
||||
|
||||
## Objective
|
||||
|
||||
- Decompose the task user asked into a series of actions
|
||||
- Decompose the instruction user asked into a series of actions
|
||||
- Locate the target element if possible
|
||||
- If the task cannot be accomplished, give a further plan.
|
||||
- If the instruction cannot be accomplished, give a further plan.
|
||||
|
||||
## Workflow
|
||||
|
||||
1. Receive the user's element description, screenshot, and instruction.
|
||||
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / Error / Sleep). The "About the action" section below will give you more details.
|
||||
2. Decompose the user's task into a sequence of actions, and place it in the \`actions\` field. There are different types of actions (Tap / Hover / Input / KeyboardPress / Scroll / FalsyConditionStatement / Sleep). The "About the action" section below will give you more details.
|
||||
3. Precisely locate the target element if it's already shown in the screenshot, put the location info in the \`locate\` field of the action.
|
||||
4. If some target elements is not shown in the screenshot, consider the user's instruction is not feasible on this page. Follow the next steps.
|
||||
5. Consider whether the user's instruction will be accomplished after all the actions
|
||||
@ -52,7 +52,8 @@ You are a versatile professional in software UI automation. Your outstanding con
|
||||
|
||||
- All the actions you composed MUST be based on the page context information you get.
|
||||
- Trust the "What have been done" field about the task (if any), don't repeat actions in it.
|
||||
- If you cannot plan any actions at all, consider the page content is irrelevant to the task. Put the error message in the \`error\` field.
|
||||
- Respond only with valid JSON. Do not write an introduction or summary.
|
||||
- If you cannot plan any action at all (i.e. empty actions array), set reason in the \`error\` field.
|
||||
|
||||
## About the \`actions\` field
|
||||
|
||||
@ -77,10 +78,20 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
||||
* \`value\` is the final required input value based on the existing input. No matter what modifications are required, just provide the final value to replace the existing input value.
|
||||
- type: 'KeyboardPress', press a key
|
||||
* { param: { value: string } }
|
||||
- type: 'Scroll'
|
||||
* { param: { scrollType: 'scrollDownOneScreen' | 'scrollUpOneScreen' | 'scrollUntilBottom' | 'scrollUntilTop' } }
|
||||
- type: 'Error'
|
||||
* { param: { message: string } }
|
||||
- type: 'Scroll', scroll up or down.
|
||||
* {
|
||||
locate: LocateParam | null,
|
||||
param: {
|
||||
direction: 'down'(default) | 'up' | 'right' | 'left',
|
||||
scrollType: 'once' (default) | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft',
|
||||
distance: null | number
|
||||
}
|
||||
}
|
||||
* To scroll some specific element, put the element at the center of the region in the \`locate\` field. If it's a page scroll, put \`null\` in the \`locate\` field.
|
||||
* \`param\` is required in this action. If some fields are not specified, use direction \`down\`, \`once\` scroll type, and \`null\` distance.
|
||||
- type: 'FalsyConditionStatement'
|
||||
* { param: null }
|
||||
* use this action when the instruction is an "if" statement and the condition is falsy.
|
||||
- type: 'Sleep'
|
||||
* { param: { timeMs: number } }
|
||||
|
||||
@ -94,7 +105,8 @@ Each action has a \`type\` and corresponding \`param\`. To be detailed:
|
||||
|
||||
## Output JSON Format:
|
||||
|
||||
Please return the result in JSON format as follows:
|
||||
The JSON format is as follows:
|
||||
|
||||
{
|
||||
"actions": [
|
||||
{
|
||||
@ -152,6 +164,7 @@ By viewing the page screenshot and description, you should consider this and out
|
||||
"locate": null
|
||||
},
|
||||
],
|
||||
"error": null,
|
||||
"taskWillBeAccomplished": false,
|
||||
"furtherPlan": {
|
||||
"whatToDoNext": "find the 'English' option and click on it",
|
||||
@ -160,7 +173,39 @@ By viewing the page screenshot and description, you should consider this and out
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
## Example #2 : When task is accomplished, don't plan more actions
|
||||
|
||||
## Example #2 : Tolerate the error situation only when the instruction is an "if" statement
|
||||
|
||||
If the user says "If there is a popup, close it", you should consider this and output the JSON:
|
||||
|
||||
* By viewing the page screenshot and description, you cannot find the popup, so the condition is falsy.
|
||||
* The instruction itself is an "if" statement, it means the user can tolerate this situation, so you should leave a \`FalsyConditionStatement\` action.
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"actions": [{
|
||||
"thought": "There is no popup on the page",
|
||||
"type": "FalsyConditionStatement",
|
||||
"param": null
|
||||
}
|
||||
],
|
||||
"taskWillBeAccomplished": true,
|
||||
"furtherPlan": null
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
For contrast, if the user says "Close the popup" in this situation, you should consider this and output the JSON:
|
||||
|
||||
\`\`\`json
|
||||
{
|
||||
"actions": [],
|
||||
"error": "The instruction and page context are irrelevant, there is no popup on the page",
|
||||
"taskWillBeAccomplished": true,
|
||||
"furtherPlan": null
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
## Example #3 : When task is accomplished, don't plan more actions
|
||||
|
||||
When the user ask to "Wait 4s", you should consider this:
|
||||
|
||||
@ -234,7 +279,7 @@ export const planSchema: ResponseFormatJSONSchema = {
|
||||
param: {
|
||||
type: ['object', 'null'],
|
||||
description:
|
||||
'Parameter towards the task type, can be null only when the type field is Tap or Hover',
|
||||
'Parameter of the action, can be null ONLY when the type field is Tap or Hover',
|
||||
},
|
||||
locate: {
|
||||
type: ['object', 'null'],
|
||||
|
||||
@ -243,6 +243,7 @@ export interface PlanningAction<ParamType = any> {
|
||||
| 'KeyboardPress'
|
||||
| 'Scroll'
|
||||
| 'Error'
|
||||
| 'FalsyConditionStatement'
|
||||
| 'Assert'
|
||||
| 'AssertWithoutThrow'
|
||||
| 'Sleep';
|
||||
@ -269,11 +270,9 @@ export interface PlanningActionParamInputOrKeyPress {
|
||||
value: string;
|
||||
}
|
||||
export interface PlanningActionParamScroll {
|
||||
scrollType:
|
||||
| 'scrollUntilTop'
|
||||
| 'scrollUntilBottom'
|
||||
| 'scrollUpOneScreen'
|
||||
| 'scrollDownOneScreen';
|
||||
direction: 'down' | 'up' | 'right' | 'left';
|
||||
scrollType: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';
|
||||
distance: null | number;
|
||||
}
|
||||
|
||||
export interface PlanningActionParamAssert {
|
||||
|
||||
@ -1,12 +1,12 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`automation - planning openAI > basic run 1`] = `
|
||||
exports[`automation - planning > basic run 1`] = `
|
||||
{
|
||||
"timeMs": 3500,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`automation - planning openAI > basic run 2`] = `
|
||||
exports[`automation - planning > basic run 2`] = `
|
||||
{
|
||||
"value": "Enter",
|
||||
}
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import { plan } from '@/ai-model';
|
||||
/* eslint-disable max-lines-per-function */
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { modelList } from '../util';
|
||||
import { getPageDataOfTestName } from './test-suite/util';
|
||||
|
||||
vi.setConfig({
|
||||
@ -9,42 +8,113 @@ vi.setConfig({
|
||||
hookTimeout: 30 * 1000,
|
||||
});
|
||||
|
||||
modelList.forEach((model) => {
|
||||
describe(`automation - planning ${model}`, () => {
|
||||
it('basic run', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
describe('automation - planning', () => {
|
||||
it('basic run', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
|
||||
const { actions } = await plan(
|
||||
'type "Why is the earth a sphere?", wait 3.5s, hit Enter',
|
||||
{
|
||||
context,
|
||||
},
|
||||
model,
|
||||
);
|
||||
expect(actions.length).toBe(3);
|
||||
expect(actions[0].type).toBe('Input');
|
||||
expect(actions[1].type).toBe('Sleep');
|
||||
expect(actions[1].param).toMatchSnapshot();
|
||||
expect(actions[2].type).toBe('KeyboardPress');
|
||||
expect(actions[2].param).toMatchSnapshot();
|
||||
});
|
||||
const { actions } = await plan(
|
||||
'type "Why is the earth a sphere?", wait 3.5s, hit Enter',
|
||||
{
|
||||
context,
|
||||
},
|
||||
);
|
||||
|
||||
it('instructions of to-do mvc', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const instructions = [
|
||||
'在任务框 input 输入 今天学习 JS,按回车键',
|
||||
'在任务框 input 输入 明天学习 Rust,按回车键',
|
||||
'在任务框 input 输入后天学习 AI,按回车键',
|
||||
'将鼠标移动到任务列表中的第二项,点击第二项任务右边的删除按钮',
|
||||
'点击第二条任务左边的勾选按钮',
|
||||
'点击任务列表下面的 completed 状态按钮',
|
||||
];
|
||||
expect(actions.length).toBe(3);
|
||||
expect(actions[0].type).toBe('Input');
|
||||
expect(actions[1].type).toBe('Sleep');
|
||||
expect(actions[1].param).toMatchSnapshot();
|
||||
expect(actions[2].type).toBe('KeyboardPress');
|
||||
expect(actions[2].param).toMatchSnapshot();
|
||||
});
|
||||
|
||||
for (const instruction of instructions) {
|
||||
const { actions } = await plan(instruction, { context }, model);
|
||||
expect(actions).toBeTruthy();
|
||||
expect(actions[0].locate?.id).toBeTruthy();
|
||||
}
|
||||
});
|
||||
it('instructions of to-do mvc', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const instructions = [
|
||||
'在任务框 input 输入 今天学习 JS,按回车键',
|
||||
'在任务框 input 输入 明天学习 Rust,按回车键',
|
||||
'在任务框 input 输入后天学习 AI,按回车键',
|
||||
'将鼠标移动到任务列表中的第二项,点击第二项任务右边的删除按钮',
|
||||
'点击第二条任务左边的勾选按钮',
|
||||
'点击任务列表下面的 completed 状态按钮',
|
||||
];
|
||||
|
||||
for (const instruction of instructions) {
|
||||
const { actions } = await plan(instruction, { context });
|
||||
expect(actions).toBeTruthy();
|
||||
expect(actions[0].locate?.id).toBeTruthy();
|
||||
}
|
||||
});
|
||||
|
||||
it('scroll some element', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const { actions } = await plan(
|
||||
'Scroll left the status filters (with a button named "complete")',
|
||||
{
|
||||
context,
|
||||
},
|
||||
);
|
||||
|
||||
expect(actions).toBeTruthy();
|
||||
expect(actions[0].type).toBe('Scroll');
|
||||
expect(actions[0].locate).toBeTruthy();
|
||||
});
|
||||
|
||||
it('scroll page', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const { actions } = await plan(
|
||||
'Scroll down the page by 200px, scroll up the page by 100px, scroll right the second item of the task list by 300px',
|
||||
{ context },
|
||||
);
|
||||
expect(actions.length).toBe(3);
|
||||
expect(actions).toBeTruthy();
|
||||
expect(actions[0].type).toBe('Scroll');
|
||||
expect(actions[0].locate).toBeNull();
|
||||
expect(actions[0].param).toBeDefined();
|
||||
|
||||
expect(actions[2].locate).toBeTruthy();
|
||||
expect(actions[2].param).toBeDefined();
|
||||
});
|
||||
|
||||
it('throw error when instruction is not feasible', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
await expect(async () => {
|
||||
await plan('close Cookie Prompt', {
|
||||
context,
|
||||
});
|
||||
}).rejects.toThrow();
|
||||
});
|
||||
|
||||
it('should not throw in an "if" statement', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const { actions, error } = await plan(
|
||||
'If there is a cookie prompt, close it',
|
||||
{ context },
|
||||
);
|
||||
|
||||
expect(actions.length === 1).toBeTruthy();
|
||||
expect(actions[0]!.type).toBe('FalsyConditionStatement');
|
||||
});
|
||||
|
||||
it('should give a further plan when something is not found', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const res = await plan(
|
||||
'click the input box, wait 300ms, click the close button of the cookie prompt',
|
||||
{ context },
|
||||
);
|
||||
// console.log(res);
|
||||
expect(res.furtherPlan).toBeTruthy();
|
||||
expect(res.furtherPlan?.whatToDoNext).toBeTruthy();
|
||||
expect(res.furtherPlan?.whatHaveDone).toBeTruthy();
|
||||
});
|
||||
|
||||
it('partial error', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const res = await plan(
|
||||
'click the input box, click the close button of the cookie prompt',
|
||||
{ context },
|
||||
);
|
||||
expect(res.furtherPlan).toBeTruthy();
|
||||
expect(res.furtherPlan?.whatToDoNext).toBeTruthy();
|
||||
expect(res.furtherPlan?.whatHaveDone).toBeTruthy();
|
||||
});
|
||||
});
|
||||
|
||||
@ -5,8 +5,8 @@ import { describe, expect, it, vi } from 'vitest';
|
||||
vi.setConfig({
|
||||
testTimeout: 20 * 1000,
|
||||
});
|
||||
describe('openai', () => {
|
||||
it('basic', async () => {
|
||||
describe('openai sdk connectivity', () => {
|
||||
it('connectivity', async () => {
|
||||
const result = await call([
|
||||
{
|
||||
role: 'system',
|
||||
@ -37,4 +37,27 @@ describe('openai', () => {
|
||||
);
|
||||
expect(result.content.answer).toBe(15);
|
||||
});
|
||||
|
||||
it('image input', async () => {
|
||||
const result = await call([
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'Describe this image in one sentence.',
|
||||
},
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: 'https://portal.volccdn.com/obj/volcfe/bee_prod/biz_950/tos_38e6e81e1366482ed046045e72b0684d.png',
|
||||
detail: 'high',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
expect(result.content.length).toBeGreaterThan(10);
|
||||
});
|
||||
});
|
||||
|
||||
@ -322,7 +322,10 @@ export const generateAnimationScripts = (
|
||||
});
|
||||
insightOnTop = true;
|
||||
}
|
||||
} else if (task.type === 'Action') {
|
||||
} else if (
|
||||
task.type === 'Action' &&
|
||||
task.subType !== 'FalsyConditionStatement'
|
||||
) {
|
||||
const title = typeStr(task);
|
||||
const subTitle = paramStr(task);
|
||||
scripts.push(pointerScript(mousePointer, title, subTitle));
|
||||
|
||||
@ -104,31 +104,63 @@ export class Page implements AbstractPage {
|
||||
}
|
||||
|
||||
// Scroll to top element
|
||||
async scrollUntilTop(): Promise<void> {
|
||||
async scrollUntilTop(distance?: number): Promise<void> {
|
||||
const { height } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
|
||||
await this.mouseWheel(0, height, 100);
|
||||
await this.mouseWheel(0, -scrollDistance, 100);
|
||||
}
|
||||
|
||||
// Scroll to bottom element
|
||||
async scrollUntilBottom(): Promise<void> {
|
||||
async scrollUntilBottom(distance?: number): Promise<void> {
|
||||
const { height } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
|
||||
await this.mouseWheel(0, -height, 100);
|
||||
await this.mouseWheel(0, scrollDistance, 100);
|
||||
}
|
||||
|
||||
async scrollUntilLeft(distance?: number): Promise<void> {
|
||||
const { width } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
|
||||
await this.mouseWheel(-scrollDistance, 0, 100);
|
||||
}
|
||||
|
||||
async scrollUntilRight(distance?: number): Promise<void> {
|
||||
const { width } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
|
||||
await this.mouseWheel(scrollDistance, 0, 100);
|
||||
}
|
||||
|
||||
// Scroll up one screen
|
||||
async scrollUpOneScreen(): Promise<void> {
|
||||
async scrollUp(distance?: number): Promise<void> {
|
||||
const { height } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
|
||||
await this.mouseWheel(0, height, 1000);
|
||||
await this.mouseWheel(0, -scrollDistance, 1000);
|
||||
}
|
||||
|
||||
// Scroll down one screen
|
||||
async scrollDownOneScreen(): Promise<void> {
|
||||
async scrollDown(distance?: number): Promise<void> {
|
||||
const { height } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
|
||||
await this.mouseWheel(0, -height, 1000);
|
||||
await this.mouseWheel(0, scrollDistance, 1000);
|
||||
}
|
||||
|
||||
async scrollLeft(distance?: number): Promise<void> {
|
||||
const { width } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
|
||||
await this.mouseWheel(-scrollDistance, 0, 1000);
|
||||
}
|
||||
|
||||
async scrollRight(distance?: number): Promise<void> {
|
||||
const { width } = await this.browser.getWindowSize();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
|
||||
await this.mouseWheel(scrollDistance, 0, 1000);
|
||||
}
|
||||
|
||||
private async keyboardType(text: string): Promise<void> {
|
||||
|
||||
@ -192,22 +192,36 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
|
||||
return this.mouse.wheel(0, 9999999);
|
||||
}
|
||||
|
||||
async scrollUpOneScreen() {
|
||||
await chrome.scripting.executeScript({
|
||||
target: { tabId: this.tabId, allFrames: true },
|
||||
func: () => {
|
||||
window.scrollBy(0, -window.innerHeight * 0.7);
|
||||
},
|
||||
});
|
||||
async scrollUntilLeft() {
|
||||
return this.mouse.wheel(-9999999, 0);
|
||||
}
|
||||
|
||||
async scrollDownOneScreen() {
|
||||
await chrome.scripting.executeScript({
|
||||
target: { tabId: this.tabId, allFrames: true },
|
||||
func: () => {
|
||||
window.scrollBy(0, window.innerHeight * 0.7);
|
||||
},
|
||||
});
|
||||
async scrollUntilRight() {
|
||||
return this.mouse.wheel(9999999, 0);
|
||||
}
|
||||
|
||||
async scrollUp(distance?: number) {
|
||||
const { height } = await this.size();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
return this.mouse.wheel(0, -scrollDistance);
|
||||
}
|
||||
|
||||
async scrollDown(distance?: number) {
|
||||
const { height } = await this.size();
|
||||
const scrollDistance = distance || height * 0.7;
|
||||
return this.mouse.wheel(0, scrollDistance);
|
||||
}
|
||||
|
||||
async scrollLeft(distance?: number) {
|
||||
const { width } = await this.size();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
return this.mouse.wheel(-scrollDistance, 0);
|
||||
}
|
||||
|
||||
async scrollRight(distance?: number) {
|
||||
const { width } = await this.size();
|
||||
const scrollDistance = distance || width * 0.7;
|
||||
return this.mouse.wheel(scrollDistance, 0);
|
||||
}
|
||||
|
||||
async clearInput(element: ElementInfo) {
|
||||
@ -309,60 +323,3 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
|
||||
await this.detachDebugger();
|
||||
}
|
||||
}
|
||||
|
||||
// backup: some implementation by chrome extension API instead of CDP
|
||||
// async function getPageContentOfTab(tabId: number): Promise<{
|
||||
// context: ElementInfo[];
|
||||
// size: { width: number; height: number; dpr: number };
|
||||
// }> {
|
||||
// await chrome.scripting.executeScript({
|
||||
// target: {
|
||||
// tabId,
|
||||
// allFrames: true,
|
||||
// },
|
||||
// files: [scriptFileToRetrieve],
|
||||
// });
|
||||
|
||||
// // call and retrieve the result
|
||||
// const returnValue = await chrome.scripting.executeScript({
|
||||
// target: { tabId, allFrames: true },
|
||||
// func: () => {
|
||||
// return {
|
||||
// context: (
|
||||
// window as any
|
||||
// ).midscene_element_inspector.webExtractTextWithPosition(),
|
||||
// size: {
|
||||
// width: document.documentElement.clientWidth,
|
||||
// height: document.documentElement.clientHeight,
|
||||
// dpr: window.devicePixelRatio,
|
||||
// },
|
||||
// };
|
||||
// },
|
||||
// });
|
||||
|
||||
// console.log('returnValue', returnValue);
|
||||
// if (!returnValue[0].result) {
|
||||
// throw new Error(`Failed to get active page content of tabId: ${tabId}`);
|
||||
// }
|
||||
|
||||
// return returnValue[0].result;
|
||||
// }
|
||||
|
||||
// async function getSizeInfoOfTab(tabId: number): Promise<{
|
||||
// dpr: number;
|
||||
// width: number;
|
||||
// height: number;
|
||||
// }> {
|
||||
// const returnValue = await chrome.scripting.executeScript({
|
||||
// target: { tabId, allFrames: false },
|
||||
// func: () => {
|
||||
// return {
|
||||
// dpr: window.devicePixelRatio,
|
||||
// width: document.documentElement.clientWidth,
|
||||
// height: document.documentElement.clientHeight,
|
||||
// };
|
||||
// },
|
||||
// });
|
||||
// // console.log('returnValue of getScreenInfoOfTab', returnValue);
|
||||
// return returnValue[0].result!;
|
||||
// }
|
||||
|
||||
@ -313,27 +313,42 @@ export class PageTaskExecutor {
|
||||
param: plan.param,
|
||||
thought: plan.thought,
|
||||
locate: plan.locate,
|
||||
executor: async (taskParam) => {
|
||||
const scrollToEventName = taskParam.scrollType;
|
||||
|
||||
switch (scrollToEventName) {
|
||||
case 'scrollUntilTop':
|
||||
await this.page.scrollUntilTop();
|
||||
break;
|
||||
case 'scrollUntilBottom':
|
||||
await this.page.scrollUntilBottom();
|
||||
break;
|
||||
case 'scrollUpOneScreen':
|
||||
await this.page.scrollUpOneScreen();
|
||||
break;
|
||||
case 'scrollDownOneScreen':
|
||||
await this.page.scrollDownOneScreen();
|
||||
break;
|
||||
default:
|
||||
console.error(
|
||||
'Unknown scroll event type:',
|
||||
scrollToEventName,
|
||||
executor: async (taskParam, { element }) => {
|
||||
if (element) {
|
||||
await this.page.mouse.move(
|
||||
element.center[0],
|
||||
element.center[1],
|
||||
);
|
||||
}
|
||||
const scrollToEventName = taskParam?.scrollType;
|
||||
if (scrollToEventName === 'untilTop') {
|
||||
await this.page.scrollUntilTop();
|
||||
} else if (scrollToEventName === 'untilBottom') {
|
||||
await this.page.scrollUntilBottom();
|
||||
} else if (scrollToEventName === 'untilRight') {
|
||||
await this.page.scrollUntilRight();
|
||||
} else if (scrollToEventName === 'untilLeft') {
|
||||
await this.page.scrollUntilLeft();
|
||||
} else if (scrollToEventName === 'once') {
|
||||
if (taskParam.direction === 'down') {
|
||||
await this.page.scrollDown(taskParam.distance || undefined);
|
||||
} else if (taskParam.direction === 'up') {
|
||||
await this.page.scrollUp(taskParam.distance || undefined);
|
||||
} else if (taskParam.direction === 'left') {
|
||||
await this.page.scrollLeft(taskParam.distance || undefined);
|
||||
} else if (taskParam.direction === 'right') {
|
||||
await this.page.scrollRight(taskParam.distance || undefined);
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unknown scroll direction: ${taskParam.direction}`,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
throw new Error(
|
||||
`Unknown scroll event type: ${scrollToEventName}, taskParam: ${JSON.stringify(
|
||||
taskParam,
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
},
|
||||
};
|
||||
@ -364,6 +379,19 @@ export class PageTaskExecutor {
|
||||
},
|
||||
};
|
||||
tasks.push(taskActionError);
|
||||
} else if (plan.type === 'FalsyConditionStatement') {
|
||||
const taskActionFalsyConditionStatement: ExecutionTaskActionApply<null> =
|
||||
{
|
||||
type: 'Action',
|
||||
subType: 'FalsyConditionStatement',
|
||||
param: null,
|
||||
thought: plan.thought,
|
||||
locate: plan.locate,
|
||||
executor: async () => {
|
||||
// console.warn(`[warn]falsy condition: ${plan.thought}`);
|
||||
},
|
||||
};
|
||||
tasks.push(taskActionFalsyConditionStatement);
|
||||
} else {
|
||||
throw new Error(`Unknown or unsupported task type: ${plan.type}`);
|
||||
}
|
||||
@ -512,6 +540,11 @@ export class PageTaskExecutor {
|
||||
return this.appendErrorPlan(taskExecutor, errorMsg);
|
||||
}
|
||||
|
||||
if (replanCount > 0) {
|
||||
// add a brief sleep to wait for the page to be ready
|
||||
await sleep(300);
|
||||
}
|
||||
|
||||
// plan
|
||||
await taskExecutor.append(planningTask);
|
||||
const planResult: PlanningAIResponse = await taskExecutor.flush();
|
||||
@ -524,26 +557,6 @@ export class PageTaskExecutor {
|
||||
|
||||
const plans = planResult.actions;
|
||||
|
||||
// check if their is nothing but a locate will null task
|
||||
// const validPlans = plans.filter((plan: PlanningAction) => {
|
||||
// if (plan.type === 'Locate' && !plan.param?.id) {
|
||||
// return false;
|
||||
// }
|
||||
// return plan.type !== 'Plan';
|
||||
// });
|
||||
// if (validPlans.length === 0) {
|
||||
// if (replanCount === 0) {
|
||||
// return this.appendErrorPlan(
|
||||
// taskExecutor,
|
||||
// `No valid plans found, cannot proceed: ${userPrompt}`,
|
||||
// );
|
||||
// }
|
||||
// return this.appendErrorPlan(
|
||||
// taskExecutor,
|
||||
// `Cannot proceed after several steps, please check the report: ${userPrompt}`,
|
||||
// );
|
||||
// }
|
||||
|
||||
let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;
|
||||
try {
|
||||
executables = await this.convertPlanToExecutable(plans, cacheGroup);
|
||||
|
||||
@ -1,10 +1,12 @@
|
||||
import type {
|
||||
ExecutionTask,
|
||||
ExecutionTaskAction,
|
||||
ExecutionTaskActionApply,
|
||||
ExecutionTaskInsightAssertion,
|
||||
ExecutionTaskInsightLocate,
|
||||
ExecutionTaskInsightQuery,
|
||||
ExecutionTaskPlanning,
|
||||
PlanningActionParamScroll,
|
||||
} from '@midscene/core';
|
||||
|
||||
export function typeStr(task: ExecutionTask) {
|
||||
@ -27,8 +29,23 @@ export function paramStr(task: ExecutionTask) {
|
||||
|
||||
if (task.type === 'Action') {
|
||||
const sleepMs = (task as ExecutionTaskAction)?.param?.timeMs;
|
||||
const scrollType = (
|
||||
task as ExecutionTask<ExecutionTaskActionApply<PlanningActionParamScroll>>
|
||||
)?.param?.scrollType;
|
||||
if (sleepMs) {
|
||||
value = `${sleepMs}ms`;
|
||||
} else if (scrollType) {
|
||||
const scrollDirection = (
|
||||
task as ExecutionTask<
|
||||
ExecutionTaskActionApply<PlanningActionParamScroll>
|
||||
>
|
||||
)?.param?.direction;
|
||||
const scrollDistance = (
|
||||
task as ExecutionTask<
|
||||
ExecutionTaskActionApply<PlanningActionParamScroll>
|
||||
>
|
||||
)?.param?.distance;
|
||||
value = `${scrollDirection}, ${scrollType}, ${scrollDistance || 'distance-not-set'}`;
|
||||
} else {
|
||||
value =
|
||||
(task as ExecutionTaskAction)?.param?.value ||
|
||||
|
||||
@ -35,8 +35,12 @@ export abstract class AbstractPage {
|
||||
|
||||
abstract scrollUntilTop(): Promise<void>;
|
||||
abstract scrollUntilBottom(): Promise<void>;
|
||||
abstract scrollUpOneScreen(): Promise<void>;
|
||||
abstract scrollDownOneScreen(): Promise<void>;
|
||||
abstract scrollUntilLeft(): Promise<void>;
|
||||
abstract scrollUntilRight(): Promise<void>;
|
||||
abstract scrollUp(distance?: number): Promise<void>;
|
||||
abstract scrollDown(distance?: number): Promise<void>;
|
||||
abstract scrollLeft(distance?: number): Promise<void>;
|
||||
abstract scrollRight(distance?: number): Promise<void>;
|
||||
|
||||
abstract _forceUsePageContext?(): Promise<WebUIContext>;
|
||||
|
||||
|
||||
@ -47,12 +47,28 @@ export default class StaticPage implements AbstractPage {
|
||||
return ThrowNotImplemented('scrollUntilBottom');
|
||||
}
|
||||
|
||||
async scrollUpOneScreen() {
|
||||
return ThrowNotImplemented('scrollUpOneScreen');
|
||||
async scrollUntilLeft() {
|
||||
return ThrowNotImplemented('scrollUntilLeft');
|
||||
}
|
||||
|
||||
async scrollDownOneScreen() {
|
||||
return ThrowNotImplemented('scrollDownOneScreen');
|
||||
async scrollUntilRight() {
|
||||
return ThrowNotImplemented('scrollUntilRight');
|
||||
}
|
||||
|
||||
async scrollUp(distance?: number) {
|
||||
return ThrowNotImplemented('scrollUp');
|
||||
}
|
||||
|
||||
async scrollDown(distance?: number) {
|
||||
return ThrowNotImplemented('scrollDown');
|
||||
}
|
||||
|
||||
async scrollLeft(distance?: number) {
|
||||
return ThrowNotImplemented('scrollLeft');
|
||||
}
|
||||
|
||||
async scrollRight(distance?: number) {
|
||||
return ThrowNotImplemented('scrollRight');
|
||||
}
|
||||
|
||||
async clearInput() {
|
||||
|
||||
@ -136,19 +136,38 @@ export class Page<
|
||||
scrollUntilTop(): Promise<void> {
|
||||
return this.mouse.wheel(0, -9999999);
|
||||
}
|
||||
|
||||
scrollUntilBottom(): Promise<void> {
|
||||
return this.mouse.wheel(0, 9999999);
|
||||
}
|
||||
|
||||
async scrollUpOneScreen(): Promise<void> {
|
||||
const innerHeight = await this.evaluate(() => window.innerHeight);
|
||||
const distance = innerHeight * 0.7;
|
||||
await this.mouse.wheel(0, -distance);
|
||||
scrollUntilLeft(): Promise<void> {
|
||||
return this.mouse.wheel(-9999999, 0);
|
||||
}
|
||||
async scrollDownOneScreen(): Promise<void> {
|
||||
|
||||
scrollUntilRight(): Promise<void> {
|
||||
return this.mouse.wheel(9999999, 0);
|
||||
}
|
||||
|
||||
async scrollUp(distance?: number): Promise<void> {
|
||||
const innerHeight = await this.evaluate(() => window.innerHeight);
|
||||
const distance = innerHeight * 0.7;
|
||||
await this.mouse.wheel(0, distance);
|
||||
const scrollDistance = distance || innerHeight * 0.7;
|
||||
await this.mouse.wheel(0, -scrollDistance);
|
||||
}
|
||||
async scrollDown(distance?: number): Promise<void> {
|
||||
const innerHeight = await this.evaluate(() => window.innerHeight);
|
||||
const scrollDistance = distance || innerHeight * 0.7;
|
||||
await this.mouse.wheel(0, scrollDistance);
|
||||
}
|
||||
async scrollLeft(distance?: number): Promise<void> {
|
||||
const innerWidth = await this.evaluate(() => window.innerWidth);
|
||||
const scrollDistance = distance || innerWidth * 0.7;
|
||||
await this.mouse.wheel(-scrollDistance, 0);
|
||||
}
|
||||
async scrollRight(distance?: number): Promise<void> {
|
||||
const innerWidth = await this.evaluate(() => window.innerWidth);
|
||||
const scrollDistance = distance || innerWidth * 0.7;
|
||||
await this.mouse.wheel(scrollDistance, 0);
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
|
||||
36
packages/web-integration/tests/ai/web/puppeteer/scroll.html
Normal file
36
packages/web-integration/tests/ai/web/puppeteer/scroll.html
Normal file
@ -0,0 +1,36 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Scroll Page Demo</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: Arial, sans-serif;
|
||||
margin: 20px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>Vertical Scroll</h1>
|
||||
<div style="width: 300px; height: 300px; background-color: #ccc; overflow-y: scroll;">
|
||||
<div style="width: 100px; height: 100px; background-color: #aaa;">Vertical 1</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #bbb;">Vertical 2</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #ccc;">Vertical 3</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #ddd;">Vertical 4</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #eee;">Vertical 5</div>
|
||||
</div>
|
||||
|
||||
<h1>Horizontal Scroll</h1>
|
||||
<div style="width: 300px; height: 100px; background-color: #ccc; overflow-x: scroll; white-space: nowrap;">
|
||||
<div style="width: 100px; height: 100px; background-color: #aaa; display: inline-block;">Horizontal 1</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #bbb; display: inline-block;">Horizontal 2</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #ccc; display: inline-block;">Horizontal 3</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #ddd; display: inline-block;">Horizontal 4</div>
|
||||
<div style="width: 100px; height: 100px; background-color: #eee; display: inline-block;">Horizontal 5</div>
|
||||
</div>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
@ -1,3 +1,4 @@
|
||||
import path from 'node:path';
|
||||
import { PuppeteerAgent } from '@/puppeteer';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { launchPage } from './utils';
|
||||
@ -84,7 +85,7 @@ describe(
|
||||
await reset();
|
||||
});
|
||||
|
||||
it('Search', async () => {
|
||||
it('search engine', async () => {
|
||||
const { originPage, reset } = await launchPage('https://www.baidu.com/');
|
||||
const mid = new PuppeteerAgent(originPage);
|
||||
await mid.aiAction(
|
||||
@ -95,6 +96,31 @@ describe(
|
||||
|
||||
await reset();
|
||||
});
|
||||
|
||||
it('scroll', async () => {
|
||||
const htmlPath = path.join(__dirname, 'scroll.html');
|
||||
const { originPage, reset } = await launchPage(`file://${htmlPath}`);
|
||||
// const { originPage, reset } = await launchPage('https://news.baidu.com/');
|
||||
const mid = new PuppeteerAgent(originPage);
|
||||
await mid.aiAction(
|
||||
'find the "Vertical 2" element, scroll down 200px, find the "Horizontal 2" element, scroll right 100px',
|
||||
);
|
||||
await mid.aiAssert(
|
||||
'the "Horizontal 2", "Horizontal 4" and "Vertical 5" elements are visible',
|
||||
);
|
||||
await reset();
|
||||
});
|
||||
|
||||
it.skip('Playground', async () => {
|
||||
const { originPage, reset } = await launchPage('https://www.baidu.com/');
|
||||
const mid = new PuppeteerAgent(originPage);
|
||||
// await mid.aiAction('Close the cookie prompt');
|
||||
await mid.aiAction(
|
||||
'Type "AI 101" in search box, hit Enter, wait 2s. If there is a cookie prompt, close it',
|
||||
);
|
||||
|
||||
await reset();
|
||||
});
|
||||
},
|
||||
{
|
||||
timeout: 180 * 1000,
|
||||
|
||||
@ -118,7 +118,7 @@ describe(
|
||||
height: 200,
|
||||
},
|
||||
});
|
||||
await page.scrollDownOneScreen();
|
||||
await page.scrollDown();
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
await generateExtractData(
|
||||
page,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user