mirror of
https://github.com/web-infra-dev/midscene.git
synced 2025-12-26 14:38:57 +00:00
fix(core): keep context in log file when locate fails (#597)
This commit is contained in:
parent
10efa40c8b
commit
138864e6b1
@ -51,7 +51,6 @@ export function fillLocateParam(
|
||||
locate: PlanningLocateParam,
|
||||
width: number,
|
||||
height: number,
|
||||
errorMsg?: string,
|
||||
) {
|
||||
// The Qwen model might have hallucinations of naming bbox as bbox_2d.
|
||||
if ((locate as any).bbox_2d && !locate?.bbox) {
|
||||
@ -61,7 +60,7 @@ export function fillLocateParam(
|
||||
}
|
||||
|
||||
if (locate?.bbox) {
|
||||
locate.bbox = adaptBbox(locate.bbox, width, height, errorMsg);
|
||||
locate.bbox = adaptBbox(locate.bbox, width, height);
|
||||
}
|
||||
|
||||
return locate;
|
||||
@ -69,12 +68,9 @@ export function fillLocateParam(
|
||||
|
||||
export function adaptQwenBbox(
|
||||
bbox: number[],
|
||||
errorMsg?: string,
|
||||
): [number, number, number, number] {
|
||||
if (bbox.length < 2) {
|
||||
const msg =
|
||||
errorMsg ||
|
||||
`invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
|
||||
const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
@ -95,7 +91,6 @@ export function adaptDoubaoBbox(
|
||||
bbox: number[] | string,
|
||||
width: number,
|
||||
height: number,
|
||||
errorMsg?: string,
|
||||
): [number, number, number, number] {
|
||||
assert(
|
||||
width > 0 && height > 0,
|
||||
@ -158,9 +153,7 @@ export function adaptDoubaoBbox(
|
||||
];
|
||||
}
|
||||
|
||||
const msg =
|
||||
errorMsg ||
|
||||
`invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
|
||||
const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
|
||||
throw new Error(msg);
|
||||
}
|
||||
|
||||
@ -168,13 +161,12 @@ export function adaptBbox(
|
||||
bbox: number[],
|
||||
width: number,
|
||||
height: number,
|
||||
errorMsg?: string,
|
||||
): [number, number, number, number] {
|
||||
if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {
|
||||
return adaptDoubaoBbox(bbox, width, height, errorMsg);
|
||||
return adaptDoubaoBbox(bbox, width, height);
|
||||
}
|
||||
|
||||
return adaptQwenBbox(bbox, errorMsg);
|
||||
return adaptQwenBbox(bbox);
|
||||
}
|
||||
|
||||
export function adaptBboxToRect(
|
||||
@ -183,18 +175,9 @@ export function adaptBboxToRect(
|
||||
height: number,
|
||||
offsetX = 0,
|
||||
offsetY = 0,
|
||||
errorMsg?: string,
|
||||
): Rect {
|
||||
debugInspectUtils(
|
||||
'adaptBboxToRect',
|
||||
bbox,
|
||||
width,
|
||||
height,
|
||||
offsetX,
|
||||
offsetY,
|
||||
errorMsg || '',
|
||||
);
|
||||
const [left, top, right, bottom] = adaptBbox(bbox, width, height, errorMsg);
|
||||
debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);
|
||||
const [left, top, right, bottom] = adaptBbox(bbox, width, height);
|
||||
return {
|
||||
left: left + offsetX,
|
||||
top: top + offsetY,
|
||||
|
||||
@ -202,38 +202,45 @@ export async function AiLocateElement<
|
||||
'elements' in res.content ? res.content.elements : [];
|
||||
let errors: AIElementLocatorResponse['errors'] | undefined =
|
||||
'errors' in res.content ? res.content.errors : [];
|
||||
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
||||
const errorMsg = res.content.errors?.length
|
||||
? `Failed to parse bbox: ${res.content.errors?.join(',')}`
|
||||
: '';
|
||||
try {
|
||||
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
||||
resRect = adaptBboxToRect(
|
||||
res.content.bbox,
|
||||
options.searchConfig?.rect?.width || context.size.width,
|
||||
options.searchConfig?.rect?.height || context.size.height,
|
||||
options.searchConfig?.rect?.left,
|
||||
options.searchConfig?.rect?.top,
|
||||
);
|
||||
debugInspect('resRect', resRect);
|
||||
|
||||
resRect = adaptBboxToRect(
|
||||
res.content.bbox,
|
||||
options.searchConfig?.rect?.width || context.size.width,
|
||||
options.searchConfig?.rect?.height || context.size.height,
|
||||
options.searchConfig?.rect?.left,
|
||||
options.searchConfig?.rect?.top,
|
||||
errorMsg,
|
||||
);
|
||||
debugInspect('resRect', resRect);
|
||||
const rectCenter = {
|
||||
x: resRect.left + resRect.width / 2,
|
||||
y: resRect.top + resRect.height / 2,
|
||||
};
|
||||
let element = elementByPositionWithElementInfo(context.tree, rectCenter);
|
||||
|
||||
const rectCenter = {
|
||||
x: resRect.left + resRect.width / 2,
|
||||
y: resRect.top + resRect.height / 2,
|
||||
};
|
||||
let element = elementByPositionWithElementInfo(context.tree, rectCenter);
|
||||
const distanceToCenter = element
|
||||
? distance({ x: element.center[0], y: element.center[1] }, rectCenter)
|
||||
: 0;
|
||||
|
||||
const distanceToCenter = element
|
||||
? distance({ x: element.center[0], y: element.center[1] }, rectCenter)
|
||||
: 0;
|
||||
if (!element || distanceToCenter > distanceThreshold) {
|
||||
element = insertElementByPosition(rectCenter);
|
||||
}
|
||||
|
||||
if (!element || distanceToCenter > distanceThreshold) {
|
||||
element = insertElementByPosition(rectCenter);
|
||||
if (element) {
|
||||
matchedElements = [element];
|
||||
errors = [];
|
||||
}
|
||||
}
|
||||
|
||||
if (element) {
|
||||
matchedElements = [element];
|
||||
errors = [];
|
||||
} catch (e) {
|
||||
const msg =
|
||||
e instanceof Error
|
||||
? `Failed to parse bbox: ${e.message}`
|
||||
: 'unknown error in locate';
|
||||
if (!errors || errors?.length === 0) {
|
||||
errors = [msg];
|
||||
} else {
|
||||
errors.push(`(${msg})`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -86,12 +86,22 @@ export async function plan(
|
||||
if (vlLocateMode()) {
|
||||
actions.forEach((action) => {
|
||||
if (action.locate) {
|
||||
action.locate = fillLocateParam(
|
||||
action.locate,
|
||||
size.width,
|
||||
size.height,
|
||||
planFromAI.error,
|
||||
);
|
||||
try {
|
||||
action.locate = fillLocateParam(
|
||||
action.locate,
|
||||
size.width,
|
||||
size.height,
|
||||
);
|
||||
} catch (e) {
|
||||
throw new Error(
|
||||
`Failed to fill locate param: ${planFromAI.error} (${
|
||||
e instanceof Error ? e.message : 'unknown error'
|
||||
})`,
|
||||
{
|
||||
cause: e,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
});
|
||||
// in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.
|
||||
|
||||
@ -138,7 +138,7 @@ export default class Insight<
|
||||
quickAnswer: opt?.quickAnswer,
|
||||
searchConfig: searchAreaResponse,
|
||||
});
|
||||
// const parseResult = await this.aiVendorFn<AIElementParseResponse>(msgs);
|
||||
|
||||
const timeCost = Date.now() - startTime;
|
||||
const taskInfo: InsightTaskInfo = {
|
||||
...(this.taskInfo ? this.taskInfo : {}),
|
||||
@ -153,7 +153,7 @@ export default class Insight<
|
||||
|
||||
let errorLog: string | undefined;
|
||||
if (parseResult.errors?.length) {
|
||||
errorLog = `locate - AI response error: \n${parseResult.errors.join('\n')}`;
|
||||
errorLog = `AI model failed to locate: \n${parseResult.errors.join('\n')}`;
|
||||
}
|
||||
|
||||
const dumpData: PartialInsightDumpFromSDK = {
|
||||
|
||||
@ -160,6 +160,12 @@ export class PageTaskExecutor {
|
||||
const dumpCollector: DumpSubscriber = (dump) => {
|
||||
insightDump = dump;
|
||||
usage = dump?.taskInfo?.usage;
|
||||
|
||||
task.log = {
|
||||
dump: insightDump,
|
||||
};
|
||||
|
||||
task.usage = usage;
|
||||
};
|
||||
this.insight.onceDumpUpdatedFn = dumpCollector;
|
||||
const shotTime = Date.now();
|
||||
@ -170,6 +176,7 @@ export class PageTaskExecutor {
|
||||
screenshot: pageContext.screenshotBase64,
|
||||
timing: 'before locate',
|
||||
};
|
||||
task.recorder = [recordItem];
|
||||
|
||||
const cachePrompt = param.prompt;
|
||||
const locateCache = cacheGroup?.matchCache(
|
||||
@ -218,9 +225,6 @@ export class PageTaskExecutor {
|
||||
});
|
||||
}
|
||||
if (!element) {
|
||||
task.log = {
|
||||
dump: insightDump,
|
||||
};
|
||||
throw new Error(`Element not found: ${param.prompt}`);
|
||||
}
|
||||
|
||||
@ -229,15 +233,10 @@ export class PageTaskExecutor {
|
||||
element,
|
||||
},
|
||||
pageContext,
|
||||
log: {
|
||||
dump: insightDump,
|
||||
},
|
||||
cache: {
|
||||
hit: cacheHitFlag,
|
||||
},
|
||||
recorder: [recordItem],
|
||||
aiCost,
|
||||
usage,
|
||||
};
|
||||
},
|
||||
};
|
||||
@ -826,7 +825,6 @@ export class PageTaskExecutor {
|
||||
logList.push(planResult.log);
|
||||
}
|
||||
|
||||
// console.log('planningResult is', planResult);
|
||||
if (!planResult.more_actions_needed_by_instruction) {
|
||||
planningTask = null;
|
||||
break;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user