midscene/packages/evaluation/tests/llm-locator.test.ts

import { writeFileSync } from 'node:fs';
import Insight, {
  type Rect,
  MIDSCENE_MODEL_NAME,
  getAIConfig,
} from '@midscene/core';
import { vlLocateMode } from '@midscene/core/env';
import { sleep } from '@midscene/core/utils';
import { saveBase64Image } from '@midscene/shared/img';
import dotenv from 'dotenv';
import { afterEach, expect, test } from 'vitest';
import { TestResultCollector } from '../src/test-analyzer';
import { annotateRects, buildContext, getCases } from './util';

dotenv.config({
  debug: true,
  override: true,
});

const testSources = [
  'antd-carousel',
  'todo',
  'online_order',
  'online_order_list',
  'taobao',
  'aweme-login',
  'aweme-play',
];

const positionModeTag = vlLocateMode() ? 'by_coordinates' : 'by_element';
const resultCollector = new TestResultCollector(
  positionModeTag,
  getAIConfig(MIDSCENE_MODEL_NAME) || 'unspecified',
);

let failCaseThreshold = 0;
if (process.env.CI && !vlLocateMode()) {
  failCaseThreshold = 3;
}

if (process.env.MIDSCENE_EVALUATION_EXPECT_VL) {
  expect(vlLocateMode()).toBeTruthy();
}

afterEach(async () => {
  await resultCollector.printSummary();
});

testSources.forEach((source) => {
  test(
    `${source}: locate element`,
    async () => {
      const { path: aiDataPath, content: cases } = await getCases(
        source,
        'inspect',
      );

      const annotations: Array<{
        indexId: number;
        rect: Rect;
      }> = [];
      for (const [index, testCase] of cases.testCases.entries()) {
        const context = await buildContext(source);

        const prompt = testCase.prompt;
        const startTime = Date.now();

        const insight = new Insight(context);

        const result = await insight.locate({
          prompt,
          deepThink: testCase.deepThink,
        });
        const { element, rect } = result;

        if (process.env.UPDATE_ANSWER_DATA) {
          // const { elementById } = context;

          if (rect) {
            const indexId = index + 1;
            testCase.response_rect = rect;
            testCase.annotation_index_id = indexId;
            annotations.push({
              indexId,
              rect,
            });

            // // biome-ignore lint/performance/noDelete: <explanation>
            // delete (testCase as any).response_bbox;
            // // biome-ignore lint/performance/noDelete: <explanation>
            // delete (testCase as any).response;
          }

          if (element) {
            testCase.response_element = {
              id: element.id,
              indexId: element.indexId,
            };
          }

          // write testCase to file
          writeFileSync(aiDataPath, JSON.stringify(cases, null, 2));
        }
        if (annotations.length > 0) {
          const markedImage = await annotateRects(
            context.screenshotBase64,
            annotations.map((item) => item.rect),
          );
          await saveBase64Image({
            base64Data: markedImage,
            outputPath: `${aiDataPath}-coordinates-annotated.png`,
          });
        }

        resultCollector.addResult(
          source,
          testCase,
          result,
          Date.now() - startTime,
        );
      }

      await resultCollector.analyze(source, failCaseThreshold);
      await sleep(3 * 1000);
    },
    360 * 1000,
  );
});
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`import { writeFileSync } from 'node:fs';`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`import Insight, {`
			`type Rect,`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`MIDSCENE_MODEL_NAME,`
			`getAIConfig,`
			`} from '@midscene/core';`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`import { vlLocateMode } from '@midscene/core/env';`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`import { sleep } from '@midscene/core/utils';`
			`import { saveBase64Image } from '@midscene/shared/img';`
			`import dotenv from 'dotenv';`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`import { afterEach, expect, test } from 'vitest';`
refactor: switch bundle type to bundleless (#437) 2025-03-07 17:20:18 +08:00			`import { TestResultCollector } from '../src/test-analyzer';`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`import { annotateRects, buildContext, getCases } from './util';`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00
			`dotenv.config({`
			`debug: true,`
			`override: true,`
			`});`

			`const testSources = [`
			`'antd-carousel',`
			`'todo',`
			`'online_order',`
			`'online_order_list',`
			`'taobao',`
			`'aweme-login',`
			`'aweme-play',`
			`];`

feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`const positionModeTag = vlLocateMode() ? 'by_coordinates' : 'by_element';`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`const resultCollector = new TestResultCollector(`
			`positionModeTag,`
			`getAIConfig(MIDSCENE_MODEL_NAME) \|\| 'unspecified',`
			`);`

fix: ci of qwen model (#410) 2025-02-21 16:15:51 +08:00			`let failCaseThreshold = 0;`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`if (process.env.CI && !vlLocateMode()) {`
fix: ci of qwen model (#410) 2025-02-21 16:15:51 +08:00			`failCaseThreshold = 3;`
			`}`

feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`if (process.env.MIDSCENE_EVALUATION_EXPECT_VL) {`
			`expect(vlLocateMode()).toBeTruthy();`
			`}`

feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`afterEach(async () => {`
			`await resultCollector.printSummary();`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`});`

			`testSources.forEach((source) => {`
			`test(`
			`${source}: locate element`,
			`async () => {`
			`const { path: aiDataPath, content: cases } = await getCases(`
			`source,`
			`'inspect',`
			`);`

			`const annotations: Array<{`
			`indexId: number;`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`rect: Rect;`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`}> = [];`
			`for (const [index, testCase] of cases.testCases.entries()) {`
			`const context = await buildContext(source);`

			`const prompt = testCase.prompt;`
			`const startTime = Date.now();`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00
			`const insight = new Insight(context);`

			`const result = await insight.locate({`
			`prompt,`
fix(llm): coords offset in vl locator (#545) 2025-04-08 17:56:15 +08:00			`deepThink: testCase.deepThink,`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`});`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`const { element, rect } = result;`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00
			`if (process.env.UPDATE_ANSWER_DATA) {`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`// const { elementById } = context;`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`if (rect) {`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`const indexId = index + 1;`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`testCase.response_rect = rect;`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`testCase.annotation_index_id = indexId;`
			`annotations.push({`
			`indexId,`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`rect,`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`});`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00
			`// // biome-ignore lint/performance/noDelete: <explanation>`
			`// delete (testCase as any).response_bbox;`
			`// // biome-ignore lint/performance/noDelete: <explanation>`
			`// delete (testCase as any).response;`
			`}`

			`if (element) {`
			`testCase.response_element = {`
			`id: element.id,`
			`indexId: element.indexId,`
			`};`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`}`

			`// write testCase to file`
			`writeFileSync(aiDataPath, JSON.stringify(cases, null, 2));`
			`}`
			`if (annotations.length > 0) {`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`const markedImage = await annotateRects(`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`context.screenshotBase64,`
feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`annotations.map((item) => item.rect),`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`);`
			`await saveBase64Image({`
			`base64Data: markedImage,`
			outputPath: `${aiDataPath}-coordinates-annotated.png`,
			`});`
			`}`

			`resultCollector.addResult(`
			`source,`
			`testCase,`
			`result,`
			`Date.now() - startTime,`
			`);`
			`}`

feat: enable search area for locate (#473) * feat: enable search area for locate * fix: update evaluation * fix: build error * fix: ci * fix: locator * feat: show searchArea in report * chore: add yaml support for aiTap * feat: update status tip * fix: #473 (#484) * chore: optimize unit test list --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-24 09:50:27 +08:00			`await resultCollector.analyze(source, failCaseThreshold);`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`await sleep(3 * 1000);`
			`},`
fix: print warning for oversized input (#414) 2025-02-24 16:53:06 +08:00			`360 * 1000,`
feat: locate by coord (#383) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-02-21 09:56:09 +08:00			`);`
			`});`