midscene/packages/evaluation/tests/llm-section-locator.test.ts

import { writeFileSync } from 'node:fs';
import { MIDSCENE_MODEL_NAME, type Rect, getAIConfig } from '@midscene/core';
import { AiLocateSection } from '@midscene/core/ai-model';
import { sleep } from '@midscene/core/utils';
import { vlLocateMode } from '@midscene/shared/env';
import { saveBase64Image } from '@midscene/shared/img';
import dotenv from 'dotenv';
import { afterAll, expect, test } from 'vitest';
import { TestResultCollector } from '../src/test-analyzer';
import { annotateRects, buildContext, getCases } from './util';

dotenv.config({
  debug: true,
  override: true,
});

const testSources = ['antd-tooltip'];

const resultCollector = new TestResultCollector(
  'section-locator',
  getAIConfig(MIDSCENE_MODEL_NAME) || 'unspecified',
);

let failCaseThreshold = 0;
if (process.env.CI && !vlLocateMode()) {
  failCaseThreshold = 3;
}

afterAll(async () => {
  await resultCollector.printSummary();
});

testSources.forEach((source) => {
  test(
    `${source}: locate section`,
    async () => {
      const { path: aiDataPath, content: cases } = await getCases(
        source,
        'section-locator',
      );

      const annotations: Array<{
        indexId: number;
        rect: Rect;
      }> = [];
      for (const [index, testCase] of cases.testCases.entries()) {
        const context = await buildContext(source);
        const prompt = testCase.prompt;
        const startTime = Date.now();
        const result = await AiLocateSection({
          context,
          sectionDescription: prompt,
        });

        if (process.env.UPDATE_ANSWER_DATA) {
          const { rect } = result;

          if (rect) {
            const indexId = index + 1;
            testCase.response_rect = rect;
            testCase.annotation_index_id = indexId;
            annotations.push({
              indexId,
              rect,
            });
          }

          // write testCase to file
          writeFileSync(aiDataPath, JSON.stringify(cases, null, 2));
        }
        if (annotations.length > 0) {
          const markedImage = await annotateRects(
            context.screenshotBase64,
            annotations.map((item) => item.rect),
            prompt,
          );
          await saveBase64Image({
            base64Data: markedImage,
            outputPath: `${aiDataPath}-coordinates-annotated.png`,
          });
        }

        resultCollector.addResult(
          source,
          testCase,
          result,
          Date.now() - startTime,
        );
      }

      await resultCollector.printSummary();
      await resultCollector.analyze(source, failCaseThreshold);
      await sleep(3 * 1000);
    },
    360 * 1000,
  );
});
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`import { writeFileSync } from 'node:fs';`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`import { MIDSCENE_MODEL_NAME, type Rect, getAIConfig } from '@midscene/core';`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`import { AiLocateSection } from '@midscene/core/ai-model';`
			`import { sleep } from '@midscene/core/utils';`
feat(core): allow custom midscene_run dir (#631) * feat(core): support custom midscene_run dir * feat(report): add search functionality to PlaywrightCaseSelector component * refactor(shared): simplify base directory resolution and remove unused environment variable * feat(shared): integrate shared environment variables across multiple packages * refactor(shared): update base directory resolution to use dynamic midscene_run directory * fix(puppeteer): increase screenshot timeout from 3s to 10s for improved reliability 2025-04-24 22:54:52 +08:00			`import { vlLocateMode } from '@midscene/shared/env';`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`import { saveBase64Image } from '@midscene/shared/img';`
			`import dotenv from 'dotenv';`
			`import { afterAll, expect, test } from 'vitest';`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`import { TestResultCollector } from '../src/test-analyzer';`
			`import { annotateRects, buildContext, getCases } from './util';`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00
			`dotenv.config({`
			`debug: true,`
			`override: true,`
			`});`

			`const testSources = ['antd-tooltip'];`

			`const resultCollector = new TestResultCollector(`
			`'section-locator',`
			`getAIConfig(MIDSCENE_MODEL_NAME) \|\| 'unspecified',`
			`);`

			`let failCaseThreshold = 0;`
			`if (process.env.CI && !vlLocateMode()) {`
			`failCaseThreshold = 3;`
			`}`

			`afterAll(async () => {`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`await resultCollector.printSummary();`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`});`

			`testSources.forEach((source) => {`
			`test(`
			`${source}: locate section`,
			`async () => {`
			`const { path: aiDataPath, content: cases } = await getCases(`
			`source,`
			`'section-locator',`
			`);`

			`const annotations: Array<{`
			`indexId: number;`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`rect: Rect;`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`}> = [];`
			`for (const [index, testCase] of cases.testCases.entries()) {`
			`const context = await buildContext(source);`
			`const prompt = testCase.prompt;`
			`const startTime = Date.now();`
			`const result = await AiLocateSection({`
			`context,`
			`sectionDescription: prompt,`
			`});`

			`if (process.env.UPDATE_ANSWER_DATA) {`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`const { rect } = result;`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`if (rect) {`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`const indexId = index + 1;`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`testCase.response_rect = rect;`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`testCase.annotation_index_id = indexId;`
			`annotations.push({`
			`indexId,`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`rect,`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`});`
			`}`

			`// write testCase to file`
			`writeFileSync(aiDataPath, JSON.stringify(cases, null, 2));`
			`}`
			`if (annotations.length > 0) {`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`const markedImage = await annotateRects(`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`context.screenshotBase64,`
feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`annotations.map((item) => item.rect),`
feat(evaluation): add screenspot v2 evaluation (#737) * feat(evaluation): add screenspot v2 evaluation * style(evaluation): format files array in package.json 2025-05-20 15:52:03 +08:00			`prompt,`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`);`
			`await saveBase64Image({`
			`base64Data: markedImage,`
			outputPath: `${aiDataPath}-coordinates-annotated.png`,
			`});`
			`}`

			`resultCollector.addResult(`
			`source,`
			`testCase,`
			`result,`
			`Date.now() - startTime,`
			`);`
			`}`

feat: update deepThink interface (#517) * feat: update deepThink interface * fix: CI * chore: update error message * feat: update search area config * fix: deepthink ui --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-04-02 19:26:56 +08:00			`await resultCollector.printSummary();`
			`await resultCollector.analyze(source, failCaseThreshold);`
feat: optimize locator (#456) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-03-17 19:19:54 +08:00			`await sleep(3 * 1000);`
			`},`
			`360 * 1000,`
			`);`
			`});`