midscene/packages/evaluation/tests/assertion.test.ts
yuyutaotao 2c5ea87131
fix: ai test (#460)
* fix: ai test

* fix: ci test

* fix: evaluation test

* fix: test

* fix: test

* fix: ai test

* fix: ai test
2025-03-12 13:49:50 +08:00

70 lines
1.7 KiB
TypeScript

import { readFileSync } from 'node:fs';
import path from 'node:path';
import { describe } from 'node:test';
import { AiAssert } from '@midscene/core';
import { afterAll, expect, test } from 'vitest';
import { buildContext, getCases } from './util';
import 'dotenv/config';
import dotenv from 'dotenv';
dotenv.config({
debug: true,
override: true,
});
const testSources = ['online_order', 'online_order_list'];
describe('ai assertion', () => {
const testResult: {
path: string;
result: {
score: number;
averageTime: string;
successCount: number;
failCount: number;
};
}[] = [];
afterAll(async () => {
console.table(
testResult.map((r) => {
return {
path: r.path,
...r.result,
};
}),
);
});
for (const source of testSources) {
const { path: aiDataPath, content: cases } = getCases(source, 'assertion');
cases.testCases.forEach((testCase, index) => {
const prompt = testCase.prompt;
console.log('prompt', prompt);
test(
`${source}: assertion-${prompt.slice(0, 30)}...`,
async () => {
const context = await buildContext(source);
const { prompt, expected } = testCase;
const result = await AiAssert({
assertion: prompt,
context,
});
console.log('assertion result', result);
expect(typeof result?.content?.pass).toBe('boolean');
if (result?.content?.pass !== expected) {
throw new Error(
`assertion failed: ${prompt} expected: ${expected}, actual: ${result?.content?.pass}, thought: ${result?.content?.thought}`,
);
}
},
3 * 60 * 1000,
);
});
}
});