midscene/packages/core/tests/unit-test/llm-planning.test.ts

import { fillBboxParam } from '@/ai-model/common';
import { buildYamlFlowFromPlans } from '@/ai-model/common';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';

describe('llm planning - qwen', () => {
  let originalMidsceneUseQwenVl: string | undefined;
  let originalMidsceneUseDoubaoVl: string | undefined;
  beforeEach(() => {
    originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;
    originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;
    process.env.MIDSCENE_USE_QWEN_VL = 'true';
    process.env.MIDSCENE_USE_DOUBAO_VISION = 'false';
  });

  afterEach(() => {
    process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;
    process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;
  });

  it('fill locate param', () => {
    const locate = {
      id: 'test',
      prompt: 'test',
      bbox_2d: [100, 100, 200, 200] as [number, number, number, number],
    };

    const filledLocate = fillBboxParam(locate, 1000, 1000);
    expect(filledLocate).toEqual({
      id: 'test',
      prompt: 'test',
      bbox: [100, 100, 200, 200],
    });
  });

  it('fill locate param', () => {
    const locate = {
      id: 'test',
      prompt: 'test',
      bbox_2d: [100, 100] as unknown as [number, number, number, number],
    };

    const filledLocate = fillBboxParam(locate, 1000, 1000);
    expect(filledLocate).toEqual({
      id: 'test',
      prompt: 'test',
      bbox: [100, 100, 120, 120],
    });
  });
});

describe('llm planning - doubao', () => {
  let originalMidsceneUseDoubaoVl: string | undefined;
  let originalMidsceneUseQwenVl: string | undefined;
  beforeEach(() => {
    originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;
    originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;
    process.env.MIDSCENE_USE_DOUBAO_VISION = 'true';
    process.env.MIDSCENE_USE_QWEN_VL = 'false';
  });

  afterEach(() => {
    process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;
    process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;
  });

  it('fill locate param', () => {
    const locate = {
      id: 'test',
      prompt: 'test',
      bbox_2d: [923, 123, 123, 123] as [number, number, number, number],
    };

    const filledLocate = fillBboxParam(locate, 1000, 1000);
    expect(filledLocate).toEqual({
      id: 'test',
      prompt: 'test',
      bbox: [923, 123, 123, 123],
    });
  });
});

describe('llm planning - build yaml flow', () => {
  it('build yaml flow', () => {
    const flow = buildYamlFlowFromPlans([
      {
        type: 'Input',
        locate: {
          bbox: [512, 127, 1068, 198],
          prompt: 'The input box for adding a new todo',
        },
        param: {
          value: 'hello',
        },
      },
      {
        type: 'Hover',
        locate: {
          bbox: [521, 273, 692, 294],
          prompt: "The second item 'Learn Rust' in the task list",
        },
        param: null,
      },
      {
        type: 'Tap',
        locate: {
          bbox: [512, 127, 1068, 197],
          prompt: "The input box labeled 'What needs to be done?'",
        },
        param: null,
      },
      {
        locate: {
          id: 'button',
          prompt: 'some button',
        },
        param: {
          direction: 'down',
          distance: 500,
          scrollType: 'once',
        },
        thought: 'Scroll down the page by 500px to view more content.',
        type: 'Scroll',
      },
    ]);
    expect(flow).toMatchSnapshot();
  });
});
fix(core): id conflict in quick answer (#679) * fix(core): id conflict in quick answer * fix(core): lint * docs(core): update docs for qwen * fix(core): test case * refactor(core): refactor elementById args * refactor(core): refactor elementById args --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-04-30 16:51:31 +08:00			`import { fillBboxParam } from '@/ai-model/common';`
feat(web): use xpath and yaml as cache (#711) * feat(web-integration): use xpath for cache instead of id * feat(web-integration): enhance TaskCache to support xpaths for cache matching and add new test cases * feat(web-integration): add debug log for unknown page types in TaskCache * feat(web-integration): update caching logic and cache hit conditions for Plan and Locate tasks * chore(core): update debug log * feat(web-integration): update rspress.config and enhance TaskCache structure with new properties * feat(web-integration): recalculate id when hit cache * fix(web-integration): update mock implementation in task-cache test to use evaluate method * feat(web-integration): enhance element caching by adding XPath support and improving cache hit logic * chore(core): lint * feat(web-integration): improve XPath handling in web-extractor * test(web-integration): fix tests * feat(core, web-integration): add attributes to LocateResultElement and enhance element handling * fix(core): lint * feat(web-integration): add midsceneVersion to TaskCache and update cache validation logic * fix(core): test * fix(web-integration): update cache validation logic to prevent reading outdated midscene cache files * feat(web-integration): enhance TaskCache to track used cache items and improve cache retrieval logic * fix(core): xpath logic (#710) * feat(core): resue context for locate * feat(core): build yamlFlow from aiAction * feat(core): refine task-cache * feat(core): update cache * feat(core): refine task-cache * feat(core): refine task-cache * feat(core): remove unused checkElementExistsByXPath * feat(core): use yaml file as cache * chore(core): fix lint * chore(core): print warning for previous cache * refactor(core): remove quickAnswer references and improve element matching logic * fix(core): update import path for buildYamlFlowFromPlans * chore(web-integration): update output image and skip task error test * fix(web-integration): update test snapshots to handle beta versions * fix(web-integration): adjust test snapshots for version consistency * fix(web-integration): track original cache length and adjust matching logic in tests * fix(web-integration): update test URLs to reflect new target site and enable previously skipped test * chore(core): update cache docs * fix(core): test * feat(core): try to match element from plan * fix(web-integration): cache id stable when retry in palywright * fix(web-integration): typo * style(web-integration): lint * fix(web-integration): stable cacheid in tests * fix(web-integration): cache id --------- Co-authored-by: quanruzhuoxiu <quanruzhuoxiu@gmail.com> 2025-05-16 17:16:56 +08:00			`import { buildYamlFlowFromPlans } from '@/ai-model/common';`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`import { afterEach, beforeEach, describe, expect, it } from 'vitest';`

			`describe('llm planning - qwen', () => {`
			`let originalMidsceneUseQwenVl: string \| undefined;`
			`let originalMidsceneUseDoubaoVl: string \| undefined;`
			`beforeEach(() => {`
			`originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;`
			`originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;`
			`process.env.MIDSCENE_USE_QWEN_VL = 'true';`
			`process.env.MIDSCENE_USE_DOUBAO_VISION = 'false';`
			`});`

			`afterEach(() => {`
			`process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;`
			`process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;`
			`});`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00
			`it('fill locate param', () => {`
			`const locate = {`
			`id: 'test',`
			`prompt: 'test',`
			`bbox_2d: [100, 100, 200, 200] as [number, number, number, number],`
			`};`

fix(core): id conflict in quick answer (#679) * fix(core): id conflict in quick answer * fix(core): lint * docs(core): update docs for qwen * fix(core): test case * refactor(core): refactor elementById args * refactor(core): refactor elementById args --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-04-30 16:51:31 +08:00			`const filledLocate = fillBboxParam(locate, 1000, 1000);`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`expect(filledLocate).toEqual({`
			`id: 'test',`
			`prompt: 'test',`
			`bbox: [100, 100, 200, 200],`
			`});`
			`});`

			`it('fill locate param', () => {`
			`const locate = {`
			`id: 'test',`
			`prompt: 'test',`
			`bbox_2d: [100, 100] as unknown as [number, number, number, number],`
			`};`

fix(core): id conflict in quick answer (#679) * fix(core): id conflict in quick answer * fix(core): lint * docs(core): update docs for qwen * fix(core): test case * refactor(core): refactor elementById args * refactor(core): refactor elementById args --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-04-30 16:51:31 +08:00			`const filledLocate = fillBboxParam(locate, 1000, 1000);`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`expect(filledLocate).toEqual({`
			`id: 'test',`
			`prompt: 'test',`
			`bbox: [100, 100, 120, 120],`
			`});`
			`});`
			`});`

			`describe('llm planning - doubao', () => {`
			`let originalMidsceneUseDoubaoVl: string \| undefined;`
			`let originalMidsceneUseQwenVl: string \| undefined;`
			`beforeEach(() => {`
			`originalMidsceneUseDoubaoVl = process.env.MIDSCENE_USE_DOUBAO_VISION;`
			`originalMidsceneUseQwenVl = process.env.MIDSCENE_USE_QWEN_VL;`
			`process.env.MIDSCENE_USE_DOUBAO_VISION = 'true';`
			`process.env.MIDSCENE_USE_QWEN_VL = 'false';`
			`});`

			`afterEach(() => {`
			`process.env.MIDSCENE_USE_DOUBAO_VISION = originalMidsceneUseDoubaoVl;`
			`process.env.MIDSCENE_USE_QWEN_VL = originalMidsceneUseQwenVl;`
			`});`

			`it('fill locate param', () => {`
			`const locate = {`
			`id: 'test',`
			`prompt: 'test',`
			`bbox_2d: [923, 123, 123, 123] as [number, number, number, number],`
			`};`

fix(core): id conflict in quick answer (#679) * fix(core): id conflict in quick answer * fix(core): lint * docs(core): update docs for qwen * fix(core): test case * refactor(core): refactor elementById args * refactor(core): refactor elementById args --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-04-30 16:51:31 +08:00			`const filledLocate = fillBboxParam(locate, 1000, 1000);`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`expect(filledLocate).toEqual({`
			`id: 'test',`
			`prompt: 'test',`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`bbox: [923, 123, 123, 123],`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`});`
			`});`
			`});`
feat(web): use xpath and yaml as cache (#711) * feat(web-integration): use xpath for cache instead of id * feat(web-integration): enhance TaskCache to support xpaths for cache matching and add new test cases * feat(web-integration): add debug log for unknown page types in TaskCache * feat(web-integration): update caching logic and cache hit conditions for Plan and Locate tasks * chore(core): update debug log * feat(web-integration): update rspress.config and enhance TaskCache structure with new properties * feat(web-integration): recalculate id when hit cache * fix(web-integration): update mock implementation in task-cache test to use evaluate method * feat(web-integration): enhance element caching by adding XPath support and improving cache hit logic * chore(core): lint * feat(web-integration): improve XPath handling in web-extractor * test(web-integration): fix tests * feat(core, web-integration): add attributes to LocateResultElement and enhance element handling * fix(core): lint * feat(web-integration): add midsceneVersion to TaskCache and update cache validation logic * fix(core): test * fix(web-integration): update cache validation logic to prevent reading outdated midscene cache files * feat(web-integration): enhance TaskCache to track used cache items and improve cache retrieval logic * fix(core): xpath logic (#710) * feat(core): resue context for locate * feat(core): build yamlFlow from aiAction * feat(core): refine task-cache * feat(core): update cache * feat(core): refine task-cache * feat(core): refine task-cache * feat(core): remove unused checkElementExistsByXPath * feat(core): use yaml file as cache * chore(core): fix lint * chore(core): print warning for previous cache * refactor(core): remove quickAnswer references and improve element matching logic * fix(core): update import path for buildYamlFlowFromPlans * chore(web-integration): update output image and skip task error test * fix(web-integration): update test snapshots to handle beta versions * fix(web-integration): adjust test snapshots for version consistency * fix(web-integration): track original cache length and adjust matching logic in tests * fix(web-integration): update test URLs to reflect new target site and enable previously skipped test * chore(core): update cache docs * fix(core): test * feat(core): try to match element from plan * fix(web-integration): cache id stable when retry in palywright * fix(web-integration): typo * style(web-integration): lint * fix(web-integration): stable cacheid in tests * fix(web-integration): cache id --------- Co-authored-by: quanruzhuoxiu <quanruzhuoxiu@gmail.com> 2025-05-16 17:16:56 +08:00
			`describe('llm planning - build yaml flow', () => {`
			`it('build yaml flow', () => {`
			`const flow = buildYamlFlowFromPlans([`
			`{`
			`type: 'Input',`
			`locate: {`
			`bbox: [512, 127, 1068, 198],`
			`prompt: 'The input box for adding a new todo',`
			`},`
			`param: {`
			`value: 'hello',`
			`},`
			`},`
			`{`
			`type: 'Hover',`
			`locate: {`
			`bbox: [521, 273, 692, 294],`
			`prompt: "The second item 'Learn Rust' in the task list",`
			`},`
			`param: null,`
			`},`
			`{`
			`type: 'Tap',`
			`locate: {`
			`bbox: [512, 127, 1068, 197],`
			`prompt: "The input box labeled 'What needs to be done?'",`
			`},`
			`param: null,`
			`},`
			`{`
			`locate: {`
			`id: 'button',`
			`prompt: 'some button',`
			`},`
			`param: {`
			`direction: 'down',`
			`distance: 500,`
			`scrollType: 'once',`
			`},`
			`thought: 'Scroll down the page by 500px to view more content.',`
			`type: 'Scroll',`
			`},`
			`]);`
			`expect(flow).toMatchSnapshot();`
			`});`
			`});`