mirror of
https://github.com/web-infra-dev/midscene.git
synced 2025-12-26 14:38:57 +00:00
fix(report): reduce context size in report file (#626)
* fix(core): reduce context size in report file * chore(core): fix lint * chore(core): resolve conflict --------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com>
This commit is contained in:
parent
ce7929bbbc
commit
ecefd8b0fa
@ -5,7 +5,7 @@ import { pluginLess } from '@rsbuild/plugin-less';
|
||||
import { pluginNodePolyfill } from '@rsbuild/plugin-node-polyfill';
|
||||
import { pluginReact } from '@rsbuild/plugin-react';
|
||||
|
||||
const testDataPath = path.join(__dirname, 'test-data', 'online-order.json');
|
||||
const testDataPath = path.join(__dirname, 'test-data', 'swag-lab.json');
|
||||
const testData = JSON.parse(fs.readFileSync(testDataPath, 'utf-8'));
|
||||
|
||||
const copyReportTemplate = () => ({
|
||||
|
||||
@ -33,12 +33,12 @@ const VIEW_TYPE_JSON = 'json';
|
||||
const DetailPanel = (): JSX.Element => {
|
||||
const insightDump = useExecutionDump((store) => store.insightDump);
|
||||
const dumpId = useExecutionDump((store) => store._insightDumpLoadId);
|
||||
const blackboardViewAvailable = Boolean(insightDump);
|
||||
const activeExecution = useExecutionDump((store) => store.activeExecution);
|
||||
const activeExecutionId = useExecutionDump(
|
||||
(store) => store._executionDumpLoadId,
|
||||
);
|
||||
const activeTask = useExecutionDump((store) => store.activeTask);
|
||||
const blackboardViewAvailable = Boolean(activeTask?.pageContext);
|
||||
const [preferredViewType, setViewType] = useState(VIEW_TYPE_REPLAY);
|
||||
const animationScripts = useExecutionDump(
|
||||
(store) => store.activeExecutionAnimation,
|
||||
@ -89,7 +89,7 @@ const DetailPanel = (): JSX.Element => {
|
||||
if (blackboardViewAvailable) {
|
||||
content = (
|
||||
<Blackboard
|
||||
uiContext={insightDump!.context}
|
||||
uiContext={activeTask.pageContext}
|
||||
highlightElements={insightDump!.matchedElement}
|
||||
highlightRect={insightDump!.taskInfo?.searchArea}
|
||||
key={`${dumpId}`}
|
||||
|
||||
1
apps/report/test-data/swag-lab.json
Normal file
1
apps/report/test-data/swag-lab.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,4 +1,10 @@
|
||||
import type { AIUsageInfo, Rect, Size } from '@/types';
|
||||
import type {
|
||||
AIUsageInfo,
|
||||
BaseElement,
|
||||
ElementTreeNode,
|
||||
Rect,
|
||||
Size,
|
||||
} from '@/types';
|
||||
import { assert } from '@midscene/shared/utils';
|
||||
|
||||
import type {
|
||||
@ -13,6 +19,9 @@ import {
|
||||
|
||||
import { vlLocateMode } from '@/env';
|
||||
import type { PlanningLocateParam } from '@/types';
|
||||
import { NodeType } from '@midscene/shared/constants';
|
||||
import { treeToList } from '@midscene/shared/extractor';
|
||||
import { compositeElementInfoImg } from '@midscene/shared/img';
|
||||
import { getDebug } from '@midscene/shared/logger';
|
||||
|
||||
export type AIArgs = [
|
||||
@ -261,3 +270,26 @@ export function expandSearchArea(rect: Rect, screenSize: Size) {
|
||||
);
|
||||
return rect;
|
||||
}
|
||||
|
||||
export async function markupImageForLLM(
|
||||
screenshotBase64: string,
|
||||
tree: ElementTreeNode<BaseElement>,
|
||||
size: Size,
|
||||
) {
|
||||
const elementsInfo = treeToList(tree);
|
||||
const elementsPositionInfoWithoutText = elementsInfo!.filter(
|
||||
(elementInfo) => {
|
||||
if (elementInfo.attributes.nodeType === NodeType.TEXT) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
);
|
||||
|
||||
const imagePayload = await compositeElementInfoImg({
|
||||
inputImgBase64: screenshotBase64,
|
||||
elementsPositionInfo: elementsPositionInfoWithoutText as any,
|
||||
size,
|
||||
});
|
||||
return imagePayload;
|
||||
}
|
||||
|
||||
@ -32,6 +32,7 @@ import {
|
||||
adaptBboxToRect,
|
||||
callAiFn,
|
||||
expandSearchArea,
|
||||
markupImageForLLM,
|
||||
mergeRects,
|
||||
} from './common';
|
||||
import { systemPromptToAssert } from './prompt/assertion';
|
||||
@ -128,7 +129,7 @@ export async function AiLocateElement<
|
||||
usage?: AIUsageInfo;
|
||||
}> {
|
||||
const { context, targetElementDescription, callAI } = options;
|
||||
const { screenshotBase64, screenshotBase64WithElementMarker } = context;
|
||||
const { screenshotBase64 } = context;
|
||||
const { description, elementById, insertElementByPosition, size } =
|
||||
await describeUserPage(context);
|
||||
// meet quick answer
|
||||
@ -153,7 +154,7 @@ export async function AiLocateElement<
|
||||
});
|
||||
const systemPrompt = systemPromptToLocateElement(vlLocateMode());
|
||||
|
||||
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
||||
let imagePayload = screenshotBase64;
|
||||
|
||||
if (options.searchConfig) {
|
||||
assert(
|
||||
@ -166,8 +167,14 @@ export async function AiLocateElement<
|
||||
);
|
||||
|
||||
imagePayload = options.searchConfig.imageBase64;
|
||||
} else if (getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL)) {
|
||||
} else if (vlLocateMode() === 'qwen-vl') {
|
||||
imagePayload = await paddingToMatchBlockByBase64(imagePayload);
|
||||
} else if (!vlLocateMode()) {
|
||||
imagePayload = await markupImageForLLM(
|
||||
screenshotBase64,
|
||||
context.tree,
|
||||
context.size,
|
||||
);
|
||||
}
|
||||
|
||||
const msgs: AIArgs = [
|
||||
|
||||
@ -7,6 +7,7 @@ import {
|
||||
type AIArgs,
|
||||
callAiFn,
|
||||
fillLocateParam,
|
||||
markupImageForLLM,
|
||||
warnGPT4oSizeLimit,
|
||||
} from './common';
|
||||
import {
|
||||
@ -27,7 +28,7 @@ export async function plan(
|
||||
},
|
||||
): Promise<PlanningAIResponse> {
|
||||
const { callAI, context } = opts || {};
|
||||
const { screenshotBase64, screenshotBase64WithElementMarker, size } = context;
|
||||
const { screenshotBase64, size } = context;
|
||||
const { description: pageDescription } = await describeUserPage(context);
|
||||
|
||||
const systemPrompt = await systemPromptToTaskPlanning({
|
||||
@ -46,9 +47,15 @@ export async function plan(
|
||||
taskBackgroundContext: taskBackgroundContextText,
|
||||
});
|
||||
|
||||
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
||||
let imagePayload = screenshotBase64;
|
||||
if (vlLocateMode() === 'qwen-vl') {
|
||||
imagePayload = await paddingToMatchBlockByBase64(imagePayload);
|
||||
} else if (!vlLocateMode()) {
|
||||
imagePayload = await markupImageForLLM(
|
||||
screenshotBase64,
|
||||
context.tree,
|
||||
context.size,
|
||||
);
|
||||
}
|
||||
|
||||
warnGPT4oSizeLimit(size);
|
||||
|
||||
@ -340,7 +340,7 @@ export async function callToGetJSONObject<T>(
|
||||
|
||||
const model = getModelName();
|
||||
|
||||
if (model.includes('gpt-4o') || model.includes('gpt-4.1')) {
|
||||
if (model.includes('gpt-4')) {
|
||||
switch (AIActionTypeValue) {
|
||||
case AIActionType.ASSERT:
|
||||
responseFormat = assertSchema;
|
||||
|
||||
@ -158,7 +158,6 @@ export default class Insight<
|
||||
|
||||
const dumpData: PartialInsightDumpFromSDK = {
|
||||
type: 'locate',
|
||||
context,
|
||||
userQuery: {
|
||||
element: queryPrompt,
|
||||
},
|
||||
@ -256,7 +255,6 @@ export default class Insight<
|
||||
|
||||
const dumpData: PartialInsightDumpFromSDK = {
|
||||
type: 'extract',
|
||||
context,
|
||||
userQuery: {
|
||||
dataDemand,
|
||||
},
|
||||
@ -314,7 +312,6 @@ export default class Insight<
|
||||
const { thought, pass } = assertResult.content;
|
||||
const dumpData: PartialInsightDumpFromSDK = {
|
||||
type: 'assert',
|
||||
context,
|
||||
userQuery: {
|
||||
assertion,
|
||||
},
|
||||
|
||||
@ -119,8 +119,6 @@ export interface AIAssertionResponse {
|
||||
export abstract class UIContext<ElementType extends BaseElement = BaseElement> {
|
||||
abstract screenshotBase64: string;
|
||||
|
||||
abstract screenshotBase64WithElementMarker?: string;
|
||||
|
||||
// @deprecated('use tree instead')
|
||||
abstract content: ElementType[];
|
||||
|
||||
@ -193,7 +191,6 @@ export interface ReportDumpWithAttributes {
|
||||
export interface InsightDump extends DumpMeta {
|
||||
type: 'locate' | 'extract' | 'assert';
|
||||
logId: string;
|
||||
context: UIContext;
|
||||
userQuery: {
|
||||
element?: string;
|
||||
dataDemand?: InsightExtractParam;
|
||||
|
||||
@ -146,6 +146,14 @@ export function writeDumpReport(
|
||||
return null;
|
||||
}
|
||||
writeFileSync(reportPath, reportContent);
|
||||
if (process.env.MIDSCENE_DEBUG_LOG_JSON) {
|
||||
writeFileSync(
|
||||
`${reportPath}.json`,
|
||||
typeof dumpData === 'string'
|
||||
? dumpData
|
||||
: JSON.stringify(dumpData, null, 2),
|
||||
);
|
||||
}
|
||||
|
||||
return reportPath;
|
||||
}
|
||||
|
||||
@ -1,27 +1,28 @@
|
||||
import assert from 'node:assert';
|
||||
import type Jimp from 'jimp';
|
||||
import type { NodeType } from '../constants';
|
||||
import type { Rect } from '../types';
|
||||
import type { BaseElement } from '../types';
|
||||
import getJimp from './get-jimp';
|
||||
import { bufferFromBase64, imageInfoOfBase64 } from './index';
|
||||
|
||||
// Define picture path
|
||||
type ElementType = {
|
||||
locator?: string;
|
||||
rect: Rect;
|
||||
center?: [number, number];
|
||||
id?: string;
|
||||
indexId: number;
|
||||
attributes?: {
|
||||
nodeType: NodeType;
|
||||
[key: string]: string;
|
||||
};
|
||||
};
|
||||
|
||||
let cachedFont: any = null;
|
||||
|
||||
const loadFonts = async () => {
|
||||
const Jimp = await getJimp();
|
||||
|
||||
try {
|
||||
const fonts = await Jimp.loadFont(Jimp.FONT_SANS_16_WHITE);
|
||||
return fonts;
|
||||
} catch (error) {
|
||||
console.warn('Error loading font, will try to load online fonts', error);
|
||||
const onlineFonts =
|
||||
'https://cdn.jsdelivr.net/npm/jimp-compact@0.16.1-2/fonts/open-sans/open-sans-16-white/open-sans-16-white.fnt';
|
||||
const fonts = await Jimp.loadFont(onlineFonts);
|
||||
return fonts;
|
||||
}
|
||||
};
|
||||
|
||||
const createSvgOverlay = async (
|
||||
elements: Array<ElementType>,
|
||||
elements: Array<BaseElement>,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
boxPadding = 5,
|
||||
@ -78,7 +79,11 @@ const createSvgOverlay = async (
|
||||
);
|
||||
|
||||
// Calculate text position
|
||||
const textWidth = element.indexId.toString().length * 8;
|
||||
const indexId = element.indexId;
|
||||
if (typeof indexId !== 'number') {
|
||||
continue;
|
||||
}
|
||||
const textWidth = indexId.toString().length * 8;
|
||||
const textHeight = 12;
|
||||
const rectWidth = textWidth + 5;
|
||||
const rectHeight = textHeight + 4;
|
||||
@ -164,7 +169,7 @@ const createSvgOverlay = async (
|
||||
);
|
||||
// Draw text (simplified, as Jimp doesn't have built-in text drawing)
|
||||
try {
|
||||
cachedFont = cachedFont || (await Jimp.loadFont(Jimp.FONT_SANS_16_WHITE));
|
||||
cachedFont = cachedFont || (await loadFonts());
|
||||
} catch (error) {
|
||||
console.error('Error loading font', error);
|
||||
}
|
||||
@ -173,7 +178,7 @@ const createSvgOverlay = async (
|
||||
rectX,
|
||||
rectY,
|
||||
{
|
||||
text: element.indexId.toString(),
|
||||
text: indexId.toString(),
|
||||
alignmentX: Jimp.HORIZONTAL_ALIGN_CENTER,
|
||||
alignmentY: Jimp.VERTICAL_ALIGN_MIDDLE,
|
||||
},
|
||||
@ -187,7 +192,7 @@ const createSvgOverlay = async (
|
||||
|
||||
export const compositeElementInfoImg = async (options: {
|
||||
inputImgBase64: string;
|
||||
elementsPositionInfo: Array<ElementType>;
|
||||
elementsPositionInfo: Array<BaseElement>;
|
||||
size?: { width: number; height: number };
|
||||
annotationPadding?: number;
|
||||
}) => {
|
||||
@ -255,8 +260,8 @@ export const compositeElementInfoImg = async (options: {
|
||||
|
||||
export const processImageElementInfo = async (options: {
|
||||
inputImgBase64: string;
|
||||
elementsPositionInfo: Array<ElementType>;
|
||||
elementsPositionInfoWithoutText: Array<ElementType>;
|
||||
elementsPositionInfo: Array<BaseElement>;
|
||||
elementsPositionInfoWithoutText: Array<BaseElement>;
|
||||
}) => {
|
||||
// Get the size of the original image
|
||||
const base64Image = options.inputImgBase64.split(';base64,').pop();
|
||||
|
||||
@ -69,7 +69,7 @@ export const Blackboard = (props: {
|
||||
const highlightRect = props.highlightRect;
|
||||
|
||||
const context = props.uiContext!;
|
||||
const { size, screenshotBase64, screenshotBase64WithElementMarker } = context;
|
||||
const { size, screenshotBase64 } = context;
|
||||
|
||||
const screenWidth = size.width;
|
||||
const screenHeight = size.height;
|
||||
@ -88,8 +88,6 @@ export const Blackboard = (props: {
|
||||
const { markerVisible, setMarkerVisible, elementsVisible, setTextsVisible } =
|
||||
useBlackboardPreference();
|
||||
|
||||
const ifMarkerAvailable = !!screenshotBase64WithElementMarker;
|
||||
|
||||
useEffect(() => {
|
||||
Promise.resolve(
|
||||
(async () => {
|
||||
@ -147,25 +145,6 @@ export const Blackboard = (props: {
|
||||
backgroundSprite.width = screenWidth;
|
||||
backgroundSprite.height = screenHeight;
|
||||
app.stage.addChildAt(backgroundSprite, 0);
|
||||
|
||||
if (ifMarkerAvailable) {
|
||||
const markerImg = new Image();
|
||||
markerImg.onload = () => {
|
||||
const markerTexture = PIXI.Texture.from(markerImg);
|
||||
const markerSprite = new PIXI.Sprite(markerTexture);
|
||||
markerSprite.x = 0;
|
||||
markerSprite.y = 0;
|
||||
markerSprite.width = screenWidth;
|
||||
markerSprite.height = screenHeight;
|
||||
app.stage.addChildAt(markerSprite, 1);
|
||||
pixiBgRef.current = markerSprite;
|
||||
markerSprite.visible = markerVisible;
|
||||
};
|
||||
markerImg.onerror = (e) => {
|
||||
console.error('load marker failed', e);
|
||||
};
|
||||
markerImg.src = screenshotBase64WithElementMarker;
|
||||
}
|
||||
};
|
||||
img.onerror = (e) => {
|
||||
console.error('load screenshot failed', e);
|
||||
@ -268,13 +247,6 @@ export const Blackboard = (props: {
|
||||
style={{ display: props.hideController ? 'none' : 'block' }}
|
||||
>
|
||||
<div className="overlay-control">
|
||||
<Checkbox
|
||||
checked={markerVisible}
|
||||
onChange={onSetMarkerVisible}
|
||||
disabled={!ifMarkerAvailable}
|
||||
>
|
||||
Marker
|
||||
</Checkbox>
|
||||
<Checkbox checked={elementsVisible} onChange={onSetElementsVisible}>
|
||||
Elements
|
||||
</Checkbox>
|
||||
|
||||
@ -619,7 +619,7 @@ export function Player(props?: {
|
||||
currentImg.current = item.img;
|
||||
await repaintImage();
|
||||
|
||||
const elements = item.insightDump.context.content;
|
||||
const elements = item.context?.content || [];
|
||||
const highlightElements = item.insightDump.matchedElement;
|
||||
await insightElementsAnimation(
|
||||
elements,
|
||||
|
||||
@ -6,12 +6,12 @@ import { paramStr, typeStr } from '@midscene/web/ui-utils';
|
||||
import type {
|
||||
ExecutionDump,
|
||||
ExecutionTask,
|
||||
ExecutionTaskApply,
|
||||
ExecutionTaskInsightLocate,
|
||||
ExecutionTaskPlanning,
|
||||
GroupedActionDump,
|
||||
InsightDump,
|
||||
Rect,
|
||||
UIContext,
|
||||
} from '@midscene/core';
|
||||
|
||||
export interface CameraState {
|
||||
@ -39,6 +39,7 @@ export interface AnimationScript {
|
||||
img?: string;
|
||||
camera?: TargetCameraState;
|
||||
insightDump?: InsightDump;
|
||||
context?: UIContext;
|
||||
duration: number;
|
||||
insightCameraDuration?: number;
|
||||
title?: string;
|
||||
@ -280,7 +281,7 @@ export const generateAnimationScripts = (
|
||||
});
|
||||
initSubTitle = paramStr(task);
|
||||
}
|
||||
} else if (task.type === 'Insight') {
|
||||
} else if (task.type === 'Insight' && task.subType === 'Locate') {
|
||||
const insightTask = task as ExecutionTaskInsightLocate;
|
||||
const resultElement = insightTask.output?.element;
|
||||
const title = typeStr(task);
|
||||
@ -292,18 +293,16 @@ export const generateAnimationScripts = (
|
||||
pointerTop: resultElement.center[1],
|
||||
};
|
||||
}
|
||||
if (insightTask.log?.dump) {
|
||||
const context = insightTask.pageContext;
|
||||
if (insightTask.log?.dump && context?.screenshotBase64) {
|
||||
const insightDump = insightTask.log.dump;
|
||||
if (!insightDump?.context?.screenshotBase64) {
|
||||
throw new Error('insight dump is required');
|
||||
}
|
||||
const insightContentLength = insightDump.context.content.length;
|
||||
const insightContentLength = context.content.length;
|
||||
|
||||
if (insightDump.context.screenshotBase64WithElementMarker) {
|
||||
if (context.screenshotBase64) {
|
||||
// show the original screenshot first
|
||||
scripts.push({
|
||||
type: 'img',
|
||||
img: insightDump.context.screenshotBase64,
|
||||
img: context.screenshotBase64,
|
||||
duration: stillAfterInsightDuration,
|
||||
title,
|
||||
subTitle,
|
||||
@ -324,9 +323,8 @@ export const generateAnimationScripts = (
|
||||
|
||||
scripts.push({
|
||||
type: 'insight',
|
||||
img:
|
||||
insightDump.context.screenshotBase64WithElementMarker ||
|
||||
insightDump.context.screenshotBase64,
|
||||
img: context.screenshotBase64,
|
||||
context: context,
|
||||
insightDump: insightDump,
|
||||
camera: cameraState,
|
||||
duration:
|
||||
@ -435,7 +433,7 @@ export const generateAnimationScripts = (
|
||||
});
|
||||
}
|
||||
|
||||
// console.log('replayscripts');
|
||||
// console.log('replay scripts');
|
||||
// console.log(scripts, tasksIncluded);
|
||||
|
||||
return scripts;
|
||||
|
||||
@ -4,17 +4,11 @@ import type {
|
||||
PlaywrightParserOpt,
|
||||
UIContext,
|
||||
} from '@midscene/core';
|
||||
import {
|
||||
MIDSCENE_REPORT_TAG_NAME,
|
||||
MIDSCENE_USE_VLM_UI_TARS,
|
||||
getAIConfig,
|
||||
getAIConfigInBoolean,
|
||||
} from '@midscene/core/env';
|
||||
import { MIDSCENE_REPORT_TAG_NAME, getAIConfig } from '@midscene/core/env';
|
||||
import { uploadTestInfoToServer } from '@midscene/core/utils';
|
||||
import { NodeType } from '@midscene/shared/constants';
|
||||
import type { ElementInfo } from '@midscene/shared/extractor';
|
||||
import { traverseTree, treeToList } from '@midscene/shared/extractor';
|
||||
import { compositeElementInfoImg, resizeImgBase64 } from '@midscene/shared/img';
|
||||
import { resizeImgBase64 } from '@midscene/shared/img';
|
||||
import { assert, logMsg, uuid } from '@midscene/shared/utils';
|
||||
import dayjs from 'dayjs';
|
||||
import { WebElementInfo } from '../web-element';
|
||||
@ -58,19 +52,9 @@ export async function parseContextFromWebPage(
|
||||
});
|
||||
});
|
||||
|
||||
const elementsInfo = treeToList(webTree);
|
||||
|
||||
assert(screenshotBase64!, 'screenshotBase64 is required');
|
||||
|
||||
const elementsPositionInfoWithoutText = elementsInfo!.filter(
|
||||
(elementInfo) => {
|
||||
if (elementInfo.attributes.nodeType === NodeType.TEXT) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
},
|
||||
);
|
||||
|
||||
const elementsInfo = treeToList(webTree);
|
||||
const size = await page.size();
|
||||
|
||||
if (size.dpr && size.dpr > 1) {
|
||||
@ -82,25 +66,11 @@ export async function parseContextFromWebPage(
|
||||
// console.timeEnd('resizeImgBase64');
|
||||
}
|
||||
|
||||
let screenshotBase64WithElementMarker = screenshotBase64;
|
||||
if (!getAIConfigInBoolean(MIDSCENE_USE_VLM_UI_TARS)) {
|
||||
if (_opt?.ignoreMarker) {
|
||||
screenshotBase64WithElementMarker = screenshotBase64;
|
||||
} else {
|
||||
screenshotBase64WithElementMarker = await compositeElementInfoImg({
|
||||
inputImgBase64: screenshotBase64,
|
||||
elementsPositionInfo: elementsPositionInfoWithoutText,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
content: elementsInfo!,
|
||||
tree: webTree,
|
||||
size,
|
||||
screenshotBase64: screenshotBase64!,
|
||||
screenshotBase64WithElementMarker: screenshotBase64WithElementMarker,
|
||||
url,
|
||||
};
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user