mirror of
https://github.com/web-infra-dev/midscene.git
synced 2025-07-10 02:21:10 +00:00

* feat(web-integration): support unstableLogContent api for agent * fix(web-integration): use sync mehtod * docs(core): update API doc * fix(web-integration): playwright * docs(site): update --------- Co-authored-by: yutao <yutao.tao@bytedance.com>
275 lines
7.4 KiB
TypeScript
275 lines
7.4 KiB
TypeScript
import { mkdirSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
|
|
import path, { resolve, join } from 'node:path';
|
|
import Insight, {
|
|
type Rect,
|
|
MIDSCENE_MODEL_NAME,
|
|
getAIConfig,
|
|
} from '@midscene/core';
|
|
import { sleep } from '@midscene/core/utils';
|
|
import { vlLocateMode } from '@midscene/shared/env';
|
|
import { imageInfoOfBase64, saveBase64Image } from '@midscene/shared/img';
|
|
import dotenv from 'dotenv';
|
|
import sharp from 'sharp';
|
|
import { afterEach, expect, test } from 'vitest';
|
|
import { TestResultCollector } from '../src/test-analyzer';
|
|
import { annotateRects } from './util';
|
|
|
|
// Configuration
|
|
dotenv.config({
|
|
debug: true,
|
|
override: true,
|
|
});
|
|
|
|
const CONCURRENCY = process.env.SCREENSPOT_CONCURRENCY
|
|
? Number.parseInt(process.env.SCREENSPOT_CONCURRENCY, 10)
|
|
: 5;
|
|
|
|
// Types
|
|
interface Sample {
|
|
_id: { $oid: string };
|
|
filepath: string;
|
|
instruction: string;
|
|
action_detection: {
|
|
bounding_box: [number, number, number, number];
|
|
};
|
|
}
|
|
|
|
interface ProcessResult {
|
|
success: boolean;
|
|
}
|
|
|
|
interface TestStats {
|
|
correctCount: number;
|
|
failCount: number;
|
|
totalProcessed: number;
|
|
}
|
|
|
|
// Setup functions
|
|
function setupTestEnvironment() {
|
|
const screenspotV2Path = resolve(__dirname, '../page-data/screenspot-v2');
|
|
const outputTestResultPath = resolve(
|
|
screenspotV2Path,
|
|
'screenspot-v2-test-result',
|
|
);
|
|
const samplesPath = resolve(screenspotV2Path, 'samples.json');
|
|
|
|
// Clean and create directories
|
|
rmSync(outputTestResultPath, { recursive: true, force: true });
|
|
mkdirSync(join(outputTestResultPath, 'success'), { recursive: true });
|
|
mkdirSync(join(outputTestResultPath, 'failed'), { recursive: true });
|
|
mkdirSync(join(outputTestResultPath, 'error'), { recursive: true });
|
|
mkdirSync(join(outputTestResultPath, 'error', 'logs'), { recursive: true });
|
|
|
|
return {
|
|
screenspotV2Path,
|
|
outputTestResultPath,
|
|
samples: JSON.parse(readFileSync(samplesPath, 'utf-8')).samples as Sample[],
|
|
};
|
|
}
|
|
|
|
// Image processing functions
|
|
async function loadAndProcessImage(
|
|
filepath: string,
|
|
): Promise<{ imageBase64: string; size: any }> {
|
|
const imageBuffer = await sharp(filepath).png().toBuffer();
|
|
const imageBase64 = imageBuffer.toString('base64');
|
|
const size = await imageInfoOfBase64(imageBase64);
|
|
return { imageBase64, size };
|
|
}
|
|
|
|
// Error handling functions
|
|
async function handleError(
|
|
error: unknown,
|
|
sample: Sample,
|
|
imageBase64: string,
|
|
outputTestResultPath: string,
|
|
): Promise<ProcessResult> {
|
|
console.error(`Error processing sample ${sample._id.$oid}:`, error);
|
|
console.error('sample.filepath', sample.filepath);
|
|
|
|
const errorLog = {
|
|
timestamp: new Date().toISOString(),
|
|
sampleId: sample._id.$oid,
|
|
filepath: sample.filepath,
|
|
error:
|
|
error instanceof Error
|
|
? {
|
|
message: error.message,
|
|
stack: error.stack,
|
|
name: error.name,
|
|
}
|
|
: error,
|
|
};
|
|
|
|
writeFileSync(
|
|
join(
|
|
outputTestResultPath,
|
|
'error',
|
|
'logs',
|
|
`screenspot-v2-${sample._id.$oid}-error.log`,
|
|
),
|
|
JSON.stringify(errorLog, null, 2),
|
|
);
|
|
|
|
if (imageBase64) {
|
|
await saveBase64Image({
|
|
base64Data: imageBase64,
|
|
outputPath: join(
|
|
outputTestResultPath,
|
|
'error',
|
|
`screenspot-v2-${sample._id.$oid}-error.png`,
|
|
),
|
|
});
|
|
}
|
|
return { success: false };
|
|
}
|
|
|
|
// Sample processing functions
|
|
async function processSample(
|
|
sample: Sample,
|
|
screenspotV2Path: string,
|
|
outputTestResultPath: string,
|
|
): Promise<ProcessResult> {
|
|
let imageBase64 = '';
|
|
try {
|
|
const filepath = resolve(screenspotV2Path, sample.filepath);
|
|
const { imageBase64: base64, size } = await loadAndProcessImage(filepath);
|
|
imageBase64 = base64;
|
|
|
|
if (!size || !size.width || !size.height) {
|
|
console.error(`Invalid image size for sample ${sample._id.$oid}:`, size);
|
|
return { success: false };
|
|
}
|
|
|
|
const mockContext = {
|
|
screenshotBase64: `data:image/png;base64,${imageBase64}`,
|
|
content: [],
|
|
tree: {
|
|
node: null,
|
|
children: [],
|
|
},
|
|
size,
|
|
url: '',
|
|
timestamp: Date.now(),
|
|
};
|
|
|
|
const prompt = sample.instruction;
|
|
const insight = new Insight(mockContext);
|
|
const result = await insight.locate({ prompt });
|
|
const { element, rect } = result;
|
|
|
|
if (element && rect) {
|
|
const groundTruthRect = sample.action_detection.bounding_box;
|
|
const gtRect: Rect = {
|
|
left: groundTruthRect[0] * size.width,
|
|
top: groundTruthRect[1] * size.height,
|
|
width: groundTruthRect[2] * size.width,
|
|
height: groundTruthRect[3] * size.height,
|
|
};
|
|
|
|
const markedImage = await annotateRects(
|
|
mockContext.screenshotBase64,
|
|
[gtRect, element.rect, rect],
|
|
prompt,
|
|
);
|
|
|
|
const isMatch = isRectInside(element.rect, gtRect);
|
|
const resultPath = join(
|
|
outputTestResultPath,
|
|
isMatch ? 'success' : 'failed',
|
|
`screenspot-v2-${sample._id.$oid}-annotated.png`,
|
|
);
|
|
|
|
await saveBase64Image({
|
|
base64Data: markedImage,
|
|
outputPath: resultPath,
|
|
});
|
|
|
|
return { success: isMatch };
|
|
}
|
|
|
|
return { success: false };
|
|
} catch (error) {
|
|
return handleError(error, sample, imageBase64, outputTestResultPath);
|
|
}
|
|
}
|
|
|
|
// Progress reporting functions
|
|
function updateStats(stats: TestStats, result: ProcessResult): TestStats {
|
|
const newStats = {
|
|
...stats,
|
|
totalProcessed: stats.totalProcessed + 1,
|
|
};
|
|
|
|
if (result.success) {
|
|
newStats.correctCount = stats.correctCount + 1;
|
|
} else {
|
|
newStats.failCount = stats.failCount + 1;
|
|
}
|
|
|
|
return newStats;
|
|
}
|
|
|
|
function printProgress(stats: TestStats, totalSamples: number): void {
|
|
console.log(
|
|
`Progress: ${stats.totalProcessed}/${totalSamples} (${((stats.totalProcessed / totalSamples) * 100).toFixed(1)}%) - Pass: ${stats.correctCount}, Fail: ${stats.failCount}`,
|
|
);
|
|
}
|
|
|
|
function printFinalResults(stats: TestStats, totalSamples: number): number {
|
|
const accuracy = stats.correctCount / totalSamples;
|
|
console.log(
|
|
`ScreenSpot-v2 Final Results:
|
|
Total Samples: ${totalSamples}
|
|
Passed: ${stats.correctCount}
|
|
Failed: ${stats.failCount}
|
|
Accuracy: ${accuracy.toFixed(4)}
|
|
Concurrency: ${CONCURRENCY}`,
|
|
);
|
|
return accuracy;
|
|
}
|
|
|
|
// Main test
|
|
(process.env.SCREENSPOT_V2 ? test : test.skip)(
|
|
'ScreenSpot-v2: evaluate mobile UI element locator',
|
|
async () => {
|
|
const { screenspotV2Path, outputTestResultPath, samples } =
|
|
setupTestEnvironment();
|
|
let stats: TestStats = {
|
|
correctCount: 0,
|
|
failCount: 0,
|
|
totalProcessed: 0,
|
|
};
|
|
|
|
// Process samples in chunks based on concurrency
|
|
for (let i = 0; i < samples.length; i += CONCURRENCY) {
|
|
const chunk = samples.slice(i, i + CONCURRENCY);
|
|
const chunkPromises = chunk.map((sample) =>
|
|
processSample(sample, screenspotV2Path, outputTestResultPath),
|
|
);
|
|
|
|
const results = await Promise.all(chunkPromises);
|
|
|
|
// Update stats for each result
|
|
results.forEach((result) => {
|
|
stats = updateStats(stats, result);
|
|
});
|
|
|
|
printProgress(stats, samples.length);
|
|
}
|
|
|
|
const accuracy = printFinalResults(stats, samples.length);
|
|
expect(accuracy).toBeGreaterThan(0.5);
|
|
},
|
|
12 * 60 * 60 * 1000,
|
|
);
|
|
|
|
function isRectInside(rect1: Rect, rect2: Rect): boolean {
|
|
return (
|
|
rect1.left >= rect2.left &&
|
|
rect1.top >= rect2.top &&
|
|
rect1.left + rect1.width <= rect2.left + rect2.width &&
|
|
rect1.top + rect1.height <= rect2.top + rect2.height
|
|
);
|
|
}
|