midscene/packages/evaluation/tests/assertion.test.ts

import { readFileSync } from 'node:fs';
import path from 'node:path';
import { describe } from 'node:test';
import { AiAssert } from '@midscene/core';
import { buildContext } from '@midscene/core/evaluation';
import { afterAll, expect, test } from 'vitest';
import { type InspectAiTestCase, repeatFile } from './util';

import 'dotenv/config';
import dotenv from 'dotenv';

dotenv.config({
  debug: true,
  override: true,
});

const testSources = ['online_order'];

describe('ai inspect element', () => {
  const testResult: {
    path: string;
    result: {
      score: number;
      averageTime: string;
      successCount: number;
      failCount: number;
    };
  }[] = [];

  afterAll(async () => {
    console.table(
      testResult.map((r) => {
        return {
          path: r.path,
          ...r.result,
        };
      }),
    );
  });
  repeatFile(testSources, 1, (source, repeatIndex) => {
    const aiDataPath = path.join(
      __dirname,
      `../page-cases/assertion/${source}.json`,
    );
    const aiData = JSON.parse(
      readFileSync(aiDataPath, 'utf-8'),
    ) as InspectAiTestCase;

    aiData.testCases.forEach((testCase, index) => {
      const prompt = testCase.prompt;
      test(
        `${source}-${repeatIndex}: assertion-${prompt.slice(0, 30)}...`,
        async () => {
          const { context } = await buildContext(
            path.join(__dirname, '../page-data/', aiData.testDataPath),
          );

          const { prompt, expected } = testCase;
          const result = await AiAssert({
            assertion: prompt,
            context,
          });

          expect(typeof result?.content?.pass).toBe('boolean');
          if (result?.content?.pass !== expected) {
            throw new Error(
              `assertion failed: ${prompt} expected: ${expected}, actual: ${result?.content?.pass}, thought: ${result?.content?.thought}`,
            );
          }

          console.log('assertion passed, thought:', result?.content?.thought);
        },
        {
          timeout: 3 * 60 * 1000,
        },
      );
    });
  });
});
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`import { readFileSync } from 'node:fs';`
			`import path from 'node:path';`
			`import { describe } from 'node:test';`
feat: use different color for annotations (#366) 2025-02-10 16:36:12 +08:00			`import { AiAssert } from '@midscene/core';`
			`import { buildContext } from '@midscene/core/evaluation';`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`import { afterAll, expect, test } from 'vitest';`
feat: use different color for annotations (#366) 2025-02-10 16:36:12 +08:00			`import { type InspectAiTestCase, repeatFile } from './util';`

feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`import 'dotenv/config';`
feat(web-extract): extract web content as a tree (#337) * feat: extract web content as a tree * chore: update test data * chore: update test data * feat: update answer of evaluation * chore: update test cases * chore: remove focusing on cases * fix: ci * fix: put rect in html tree * fix: CI * fix: AI test * fix: lint * fix: CI * fix: static-page compatibility * fix: CI * fix: map by markerId * fix: llm planning prompt * chore: update hash length * chore: ignore writing dump file * fix: lint * fix: ci snapshot * chore: snapshot tree in web extractor * chore: export tree utils in core * chore: export tree utils in core * fix: CI * fix: update test case and evaluation * chore: remove unused file * refactor(extract): modify dependencies (#358) * refactor(extract): modify dependencies * chore: modify files config * chore: add indexId as key for map --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-02-07 14:55:52 +08:00			`import dotenv from 'dotenv';`

			`dotenv.config({`
			`debug: true,`
			`override: true,`
			`});`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00
feat: use different color for annotations (#366) 2025-02-10 16:36:12 +08:00			`const testSources = ['online_order'];`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00
			`describe('ai inspect element', () => {`
			`const testResult: {`
			`path: string;`
			`result: {`
			`score: number;`
			`averageTime: string;`
			`successCount: number;`
			`failCount: number;`
			`};`
			`}[] = [];`

			`afterAll(async () => {`
			`console.table(`
			`testResult.map((r) => {`
			`return {`
			`path: r.path,`
			`...r.result,`
			`};`
			`}),`
			`);`
			`});`
feat(web-extract): extract web content as a tree (#337) * feat: extract web content as a tree * chore: update test data * chore: update test data * feat: update answer of evaluation * chore: update test cases * chore: remove focusing on cases * fix: ci * fix: put rect in html tree * fix: CI * fix: AI test * fix: lint * fix: CI * fix: static-page compatibility * fix: CI * fix: map by markerId * fix: llm planning prompt * chore: update hash length * chore: ignore writing dump file * fix: lint * fix: ci snapshot * chore: snapshot tree in web extractor * chore: export tree utils in core * chore: export tree utils in core * fix: CI * fix: update test case and evaluation * chore: remove unused file * refactor(extract): modify dependencies (#358) * refactor(extract): modify dependencies * chore: modify files config * chore: add indexId as key for map --------- Co-authored-by: Zhou Xiao <zhouxiao.shaw@bytedance.com> 2025-02-07 14:55:52 +08:00			`repeatFile(testSources, 1, (source, repeatIndex) => {`
feat: use different color for annotations (#366) 2025-02-10 16:36:12 +08:00			`const aiDataPath = path.join(`
			`__dirname,`
			`../page-cases/assertion/${source}.json`,
			`);`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`const aiData = JSON.parse(`
			`readFileSync(aiDataPath, 'utf-8'),`
			`) as InspectAiTestCase;`

			`aiData.testCases.forEach((testCase, index) => {`
			`const prompt = testCase.prompt;`
			`test(`
			`${source}-${repeatIndex}: assertion-${prompt.slice(0, 30)}...`,
			`async () => {`
feat: use different color for annotations (#366) 2025-02-10 16:36:12 +08:00			`const { context } = await buildContext(`
			`path.join(__dirname, '../page-data/', aiData.testDataPath),`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`);`

			`const { prompt, expected } = testCase;`
			`const result = await AiAssert({`
			`assertion: prompt,`
			`context,`
			`});`

			`expect(typeof result?.content?.pass).toBe('boolean');`
			`if (result?.content?.pass !== expected) {`
			`throw new Error(`
			`assertion failed: ${prompt} expected: ${expected}, actual: ${result?.content?.pass}, thought: ${result?.content?.thought}`,
			`);`
			`}`

			`console.log('assertion passed, thought:', result?.content?.thought);`
			`},`
			`{`
fix(prompt): resolve the llm-planning format error (#341) 2025-01-30 14:14:14 +08:00			`timeout: 3 * 60 * 1000,`
feat(ai-model): remove dom info in assertion to make it reliable (#284) --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com> 2025-01-16 14:37:35 +08:00			`},`
			`);`
			`});`
			`});`
			`});`