chore: fix e2e test (#316)
* chore: fix e2e test * chore: ignore todo test * chore: fix e2e test * chore: upgrade gpt api version * chore: fix e2e test * chore: fix e2e test * chore: fix direction * chore: fix direction * chore: split test * chore: split test * chore: modify input info * fix: scroll param in ci * fix: lint * fix: ai test * fix: ai test --------- Co-authored-by: yutao <yutao.tao@bytedance.com>
4
.github/workflows/ai.yml
vendored
@ -124,8 +124,10 @@ jobs:
|
||||
if-no-files-found: ignore
|
||||
|
||||
- name: Check if script failed
|
||||
if: steps.test-ai.outcome == 'failure'
|
||||
if: steps.test-ai.outcome == 'failure' || steps.e2e-tests.outcome == 'failure' || steps.e2e-tests-cache.outcome == 'failure' || steps.e2e-tests-report.outcome == 'failure'
|
||||
run: exit 1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@ -34,8 +34,9 @@
|
||||
"computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
|
||||
"evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
|
||||
"evaluate:assertion": "npm run test:ai -- tests/ai/evaluate/assertion.test.ts",
|
||||
"prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
|
||||
"evaluate:plan": "npm run test:ai -- tests/ai/evaluate/plan/planning.test.ts",
|
||||
"evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
|
||||
"prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
|
||||
"prepublishOnly": "npm run build"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@ -9,8 +9,8 @@ import {
|
||||
repeatFile,
|
||||
} from './test-suite/util';
|
||||
import 'dotenv/config';
|
||||
import { repeatTime } from '../util';
|
||||
|
||||
const repeatTime = 2;
|
||||
const testSources = [
|
||||
// 'todo',
|
||||
'online_order',
|
||||
|
||||
@ -14,8 +14,8 @@ import {
|
||||
runTestCases,
|
||||
} from './test-suite/util';
|
||||
import 'dotenv/config';
|
||||
import { repeatTime } from '../util';
|
||||
|
||||
const repeatTime = 2;
|
||||
const relocateAfterPlanning = false;
|
||||
const failCaseThreshold = process.env.CI ? 1 : 0;
|
||||
const testSources = [
|
||||
|
||||
@ -0,0 +1,89 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`automation - planning input > input value 1`] = `
|
||||
[
|
||||
{
|
||||
"locate": {
|
||||
"id": "fbc2d002",
|
||||
"prompt": "the input field with placeholder 'What needs to be done?'",
|
||||
},
|
||||
"param": {
|
||||
"value": "learning english",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "Input",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`automation - planning input > input value 2`] = `
|
||||
[
|
||||
{
|
||||
"locate": {
|
||||
"id": "fbc2d002",
|
||||
"prompt": "the input field labeled 'What needs to be done?'",
|
||||
},
|
||||
"param": {
|
||||
"value": "learning english",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "Input",
|
||||
},
|
||||
{
|
||||
"locate": null,
|
||||
"param": {
|
||||
"value": "Enter",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "KeyboardPress",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`automation - planning input > input value Add, delete, correct and check 1`] = `
|
||||
[
|
||||
{
|
||||
"locate": {
|
||||
"id": "fbc2d002",
|
||||
"prompt": "the task input box with the content 'Learn English'",
|
||||
},
|
||||
"param": {
|
||||
"value": "Learn English tomorrow",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "Input",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`automation - planning input > input value Add, delete, correct and check 2`] = `
|
||||
[
|
||||
{
|
||||
"locate": {
|
||||
"id": "fbc2d002",
|
||||
"prompt": "the input box containing 'Learn English'",
|
||||
},
|
||||
"param": {
|
||||
"value": "Learn Skiing",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "Input",
|
||||
},
|
||||
]
|
||||
`;
|
||||
|
||||
exports[`automation - planning input > input value Add, delete, correct and check 3`] = `
|
||||
[
|
||||
{
|
||||
"locate": {
|
||||
"id": "fbc2d002",
|
||||
"prompt": "the task input box containing 'Learn English'",
|
||||
},
|
||||
"param": {
|
||||
"value": "Learn",
|
||||
},
|
||||
"thought": undefined,
|
||||
"type": "Input",
|
||||
},
|
||||
]
|
||||
`;
|
||||
@ -0,0 +1,13 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`automation - planning > basic run 1`] = `
|
||||
{
|
||||
"timeMs": 3500,
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`automation - planning > basic run 2`] = `
|
||||
{
|
||||
"value": "Enter",
|
||||
}
|
||||
`;
|
||||
@ -1,7 +1,7 @@
|
||||
import { plan } from '@/ai-model';
|
||||
/* eslint-disable max-lines-per-function */
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { makePlanResultStable } from '../../util';
|
||||
import { makePlanResultStable, repeatTime } from '../../util';
|
||||
import { getPageDataOfTestName, repeat } from './../test-suite/util';
|
||||
|
||||
vi.setConfig({
|
||||
@ -10,7 +10,7 @@ vi.setConfig({
|
||||
});
|
||||
|
||||
describe('automation - planning input', () => {
|
||||
repeat(5, () =>
|
||||
repeat(repeatTime, () =>
|
||||
it('input value', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const instructions = [
|
||||
@ -26,7 +26,7 @@ describe('automation - planning input', () => {
|
||||
}),
|
||||
);
|
||||
|
||||
repeat(5, () =>
|
||||
repeat(repeatTime, () =>
|
||||
it('input value Add, delete, correct and check', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo-input-with-value');
|
||||
const instructions = [
|
||||
|
||||
@ -48,7 +48,7 @@ describe('automation - planning', () => {
|
||||
it('scroll some element', async () => {
|
||||
const { context } = await getPageDataOfTestName('todo');
|
||||
const { actions } = await plan(
|
||||
'Scroll left the status filters (with a button named "complete")',
|
||||
'Scroll left the status filters (with a button named "completed")',
|
||||
{
|
||||
context,
|
||||
},
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`assert openAI > online order 1`] = `
|
||||
exports[`extract > online order 1`] = `
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
@ -17,7 +17,7 @@ exports[`assert openAI > online order 1`] = `
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`assert openAI > todo 1`] = `
|
||||
exports[`extract > todo 1`] = `
|
||||
{
|
||||
"data": [
|
||||
"Learn Python",
|
||||
@ -29,7 +29,7 @@ exports[`assert openAI > todo 1`] = `
|
||||
}
|
||||
`;
|
||||
|
||||
exports[`assert openAI > todo obj 1`] = `
|
||||
exports[`extract > todo obj 1`] = `
|
||||
{
|
||||
"data": [
|
||||
{
|
||||
|
||||
@ -1,58 +1,61 @@
|
||||
import path from 'node:path';
|
||||
import { vlmPlanning } from '@/ai-model/ui-tars-planning';
|
||||
import { savePositionImg } from '@midscene/shared/img';
|
||||
import { expect, test } from 'vitest';
|
||||
import { expect, it, test } from 'vitest';
|
||||
import { getPageTestData } from '../evaluate/test-suite/util';
|
||||
|
||||
test('inspect with quick answer', async () => {
|
||||
const { context } = await getPageTestData(
|
||||
path.join(__dirname, '../evaluate/test-data/todo'),
|
||||
);
|
||||
const isUiTars = process.env.MIDSCENE_USE_VLM_UI_TARS === '1';
|
||||
|
||||
const { width, height } = context.size;
|
||||
const startTime = Date.now();
|
||||
const { realActions } = await vlmPlanning({
|
||||
userInstruction: '删除第二条任务',
|
||||
conversationHistory: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: context.originalScreenshotBase64,
|
||||
test.skipIf(!isUiTars)('only run in ui-tars', () => {
|
||||
it('plan to target', async () => {
|
||||
const { context } = await getPageTestData(
|
||||
path.join(__dirname, '../evaluate/test-data/todo'),
|
||||
);
|
||||
|
||||
const { width, height } = context.size;
|
||||
const startTime = Date.now();
|
||||
const { realActions } = await vlmPlanning({
|
||||
userInstruction: '删除第二条任务',
|
||||
conversationHistory: [
|
||||
{
|
||||
role: 'user',
|
||||
content: [
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: context.originalScreenshotBase64,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
],
|
||||
},
|
||||
],
|
||||
size: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
],
|
||||
size: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
const endTime = Date.now();
|
||||
const cost = (endTime - startTime) / 1000;
|
||||
const start_box =
|
||||
'start_box' in realActions[0].action_inputs
|
||||
? realActions[0].action_inputs.start_box
|
||||
: '[]';
|
||||
const box = JSON.parse(start_box);
|
||||
console.log('plan to target content:', {
|
||||
box,
|
||||
size: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
cost: `${cost}s`,
|
||||
const endTime = Date.now();
|
||||
const cost = (endTime - startTime) / 1000;
|
||||
const start_box =
|
||||
'start_box' in realActions[0].action_inputs
|
||||
? realActions[0].action_inputs.start_box
|
||||
: '[]';
|
||||
const box = JSON.parse(start_box);
|
||||
console.log('plan to target content:', {
|
||||
box,
|
||||
size: {
|
||||
width,
|
||||
height,
|
||||
},
|
||||
cost: `${cost}s`,
|
||||
});
|
||||
// expect(box).toEqual([0.397, 0.218, 0.397, 0.218]);
|
||||
expect(true).toBe(true);
|
||||
await savePositionImg({
|
||||
inputImgBase64: context.originalScreenshotBase64,
|
||||
rect: { x: box[0] * width, y: box[1] * height },
|
||||
outputPath: path.join(__dirname, 'output.png'),
|
||||
});
|
||||
});
|
||||
// expect(box).toEqual([0.397, 0.218, 0.397, 0.218]);
|
||||
expect(true).toBe(true);
|
||||
await savePositionImg({
|
||||
inputImgBase64: context.originalScreenshotBase64,
|
||||
rect: { x: box[0] * width, y: box[1] * height },
|
||||
outputPath: path.join(__dirname, 'output.png'),
|
||||
});
|
||||
// expect(cost).toBeLessThan(100);
|
||||
});
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import type { PlanningAction } from '@/types';
|
||||
|
||||
export const repeatTime = 1;
|
||||
export function makePlanResultStable(plans: PlanningAction[]) {
|
||||
return plans.map((plan) => {
|
||||
// Removing thinking makes the results stable for snapshot testing
|
||||
@ -7,9 +8,11 @@ export function makePlanResultStable(plans: PlanningAction[]) {
|
||||
if (plan.param?.prompt) {
|
||||
plan.param.prompt = '';
|
||||
}
|
||||
if (plan.quickAnswer) {
|
||||
plan.quickAnswer.reason = '';
|
||||
plan.quickAnswer.text = '';
|
||||
if ('quickAnswer' in plan && plan.quickAnswer) {
|
||||
plan.quickAnswer = {
|
||||
reason: '',
|
||||
text: '',
|
||||
};
|
||||
}
|
||||
return plan;
|
||||
});
|
||||
|
||||
@ -107,6 +107,7 @@
|
||||
"test": "vitest --run",
|
||||
"test:u": "vitest --run -u",
|
||||
"test:ai": "AI_TEST_TYPE=web npm run test",
|
||||
"test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect packages/web-integration/tests/ai/bridge/agent.test.ts",
|
||||
"test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=web npm run test",
|
||||
"test:ai:all": "npm run test:ai:web && npm run test:ai:native",
|
||||
"test:ai:native": "MIDSCENE_CACHE=true AI_TEST_TYPE=native npm run test",
|
||||
|
||||
@ -349,9 +349,13 @@ export class PageTaskExecutor {
|
||||
} else if (scrollToEventName === 'untilLeft') {
|
||||
await this.page.scrollUntilLeft(startingPoint);
|
||||
} else if (scrollToEventName === 'once' || !scrollToEventName) {
|
||||
if (taskParam.direction === 'down' || !taskParam.direction) {
|
||||
if (
|
||||
taskParam?.direction === 'down' ||
|
||||
!taskParam ||
|
||||
!taskParam.direction
|
||||
) {
|
||||
await this.page.scrollDown(
|
||||
taskParam.distance || undefined,
|
||||
taskParam?.distance || undefined,
|
||||
startingPoint,
|
||||
);
|
||||
} else if (taskParam.direction === 'up') {
|
||||
|
||||
@ -8,7 +8,8 @@ vi.setConfig({
|
||||
testTimeout: 60 * 1000,
|
||||
});
|
||||
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
describe.skipIf(process.env.CI)(
|
||||
|
||||
describe.skipIf(!process.env.BRIDGE_MODE)(
|
||||
'fully functional agent in server(cli) side',
|
||||
() => {
|
||||
it('basic', async () => {
|
||||
|
||||
@ -13,11 +13,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
|
||||
}
|
||||
|
||||
await ai('Enter "Happy Birthday" in the task box');
|
||||
await ai('Enter "Learn" in the task box');
|
||||
|
||||
await ai(
|
||||
'Add "JS today" to base on the existing content(important) of the task box, then press enter',
|
||||
);
|
||||
await ai('Enter "Learn JS today"in the task box, then press Enter to create');
|
||||
|
||||
await ai(
|
||||
'Enter "Learn Rust tomorrow" in the task box, then press Enter to create',
|
||||
@ -27,6 +23,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
|
||||
);
|
||||
|
||||
const allTaskList = await aiQuery<string[]>('string[], tasks in the list');
|
||||
console.log('allTaskList', allTaskList);
|
||||
expect(allTaskList.length).toBe(3);
|
||||
expect(allTaskList).toContain('Learn JS today');
|
||||
expect(allTaskList).toContain('Learn Rust tomorrow');
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import path from 'node:path';
|
||||
import { PuppeteerAgent } from '@/puppeteer';
|
||||
import { sleep } from '@midscene/core/utils';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { launchPage } from './utils';
|
||||
|
||||
@ -67,7 +68,7 @@ describe(
|
||||
const mid = new PuppeteerAgent(originPage);
|
||||
|
||||
// await mid.aiAction('If pop-ups are displayed click seven days out alert');
|
||||
|
||||
await sleep(8000);
|
||||
await mid.aiAction(
|
||||
'Click the password input in the demo section on page, type "abc"',
|
||||
);
|
||||
|
||||
|
Before Width: | Height: | Size: 638 KiB After Width: | Height: | Size: 641 KiB |
|
Before Width: | Height: | Size: 824 KiB After Width: | Height: | Size: 807 KiB |
|
Before Width: | Height: | Size: 95 KiB After Width: | Height: | Size: 88 KiB |
|
Before Width: | Height: | Size: 130 KiB After Width: | Height: | Size: 119 KiB |