midscene/packages/core/tests/unit-test/llm-planning.test.ts
yuyutaotao b261ed7f2a
feat(web): use xpath and yaml as cache (#711)
* feat(web-integration): use xpath for cache instead of id

* feat(web-integration): enhance TaskCache to support xpaths for cache matching and add new test cases

* feat(web-integration): add debug log for unknown page types in TaskCache

* feat(web-integration): update caching logic and cache hit conditions for Plan and Locate tasks

* chore(core): update debug log

* feat(web-integration): update rspress.config and enhance TaskCache structure with new properties

* feat(web-integration): recalculate id when hit cache

* fix(web-integration): update mock implementation in task-cache test to use evaluate method

* feat(web-integration): enhance element caching by adding XPath support and improving cache hit logic

* chore(core): lint

* feat(web-integration): improve XPath handling in web-extractor

* test(web-integration): fix tests

* feat(core, web-integration): add attributes to LocateResultElement and enhance element handling

* fix(core): lint

* feat(web-integration): add midsceneVersion to TaskCache and update cache validation logic

* fix(core): test

* fix(web-integration): update cache validation logic to prevent reading outdated midscene cache files

* feat(web-integration): enhance TaskCache to track used cache items and improve cache retrieval logic

* fix(core): xpath logic (#710)

* feat(core): resue context for locate

* feat(core): build yamlFlow from aiAction

* feat(core): refine task-cache

* feat(core): update cache

* feat(core): refine task-cache

* feat(core): refine task-cache

* feat(core): remove unused checkElementExistsByXPath

* feat(core): use yaml file as cache

* chore(core): fix lint

* chore(core): print warning for previous cache

* refactor(core): remove quickAnswer references and improve element matching logic

* fix(core): update import path for buildYamlFlowFromPlans

* chore(web-integration): update output image and skip task error test

* fix(web-integration): update test snapshots to handle beta versions

* fix(web-integration): adjust test snapshots for version consistency

* fix(web-integration): track original cache length and adjust matching logic in tests

* fix(web-integration): update test URLs to reflect new target site and enable previously skipped test

* chore(core): update cache docs

* fix(core): test

* feat(core): try to match element from plan

* fix(web-integration): cache id stable when retry in palywright

* fix(web-integration): typo

* style(web-integration): lint

* fix(web-integration): stable cacheid in tests

* fix(web-integration): cache id

---------

Co-authored-by: quanruzhuoxiu <quanruzhuoxiu@gmail.com>
2025-05-16 17:16:56 +08:00

128 lines
3.5 KiB
TypeScript

import { fillBboxParam } from '@/ai-model/common';
import { buildYamlFlowFromPlans } from '@/ai-model/common';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
describe('llm planning - qwen', () => {
let originalMidsceneUseQwenVl: string | undefined;
let originalMidsceneUseDoubaoVl: string | undefined;
beforeEach(() => {
originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;
originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;
process.env.MIDSCENE_USE_QWEN_VL = 'true';
process.env.MIDSCENE_USE_DOUBAO_VISION = 'false';
});
afterEach(() => {
process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;
process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;
});
it('fill locate param', () => {
const locate = {
id: 'test',
prompt: 'test',
bbox_2d: [100, 100, 200, 200] as [number, number, number, number],
};
const filledLocate = fillBboxParam(locate, 1000, 1000);
expect(filledLocate).toEqual({
id: 'test',
prompt: 'test',
bbox: [100, 100, 200, 200],
});
});
it('fill locate param', () => {
const locate = {
id: 'test',
prompt: 'test',
bbox_2d: [100, 100] as unknown as [number, number, number, number],
};
const filledLocate = fillBboxParam(locate, 1000, 1000);
expect(filledLocate).toEqual({
id: 'test',
prompt: 'test',
bbox: [100, 100, 120, 120],
});
});
});
describe('llm planning - doubao', () => {
let originalMidsceneUseDoubaoVl: string | undefined;
let originalMidsceneUseQwenVl: string | undefined;
beforeEach(() => {
originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;
originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;
process.env.MIDSCENE_USE_DOUBAO_VISION = 'true';
process.env.MIDSCENE_USE_QWEN_VL = 'false';
});
afterEach(() => {
process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;
process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;
});
it('fill locate param', () => {
const locate = {
id: 'test',
prompt: 'test',
bbox_2d: [923, 123, 123, 123] as [number, number, number, number],
};
const filledLocate = fillBboxParam(locate, 1000, 1000);
expect(filledLocate).toEqual({
id: 'test',
prompt: 'test',
bbox: [923, 123, 123, 123],
});
});
});
describe('llm planning - build yaml flow', () => {
it('build yaml flow', () => {
const flow = buildYamlFlowFromPlans([
{
type: 'Input',
locate: {
bbox: [512, 127, 1068, 198],
prompt: 'The input box for adding a new todo',
},
param: {
value: 'hello',
},
},
{
type: 'Hover',
locate: {
bbox: [521, 273, 692, 294],
prompt: "The second item 'Learn Rust' in the task list",
},
param: null,
},
{
type: 'Tap',
locate: {
bbox: [512, 127, 1068, 197],
prompt: "The input box labeled 'What needs to be done?'",
},
param: null,
},
{
locate: {
id: 'button',
prompt: 'some button',
},
param: {
direction: 'down',
distance: 500,
scrollType: 'once',
},
thought: 'Scroll down the page by 500px to view more content.',
type: 'Scroll',
},
]);
expect(flow).toMatchSnapshot();
});
});