fix: fix the Player style when the locator is failed (#129)

* fix: fix the Player style when the locator is failed

* fix: add marker image into UIContext
This commit is contained in:
yuyutaotao 2024-10-17 10:44:30 +08:00 committed by GitHub
parent 9f44eedfd9
commit ee521e31e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 98 additions and 43 deletions

View File

@ -39,7 +39,7 @@ export async function AiInspectElement<
}) { }) {
const { context, multi, targetElementDescription, callAI, useModel } = const { context, multi, targetElementDescription, callAI, useModel } =
options; options;
const { screenshotBase64 } = context; const { screenshotBase64, screenshotBase64WithElementMarker } = context;
const { description, elementById } = await describeUserPage(context); const { description, elementById } = await describeUserPage(context);
// meet quick answer // meet quick answer
@ -61,7 +61,7 @@ export async function AiInspectElement<
{ {
type: 'image_url', type: 'image_url',
image_url: { image_url: {
url: screenshotBase64, url: screenshotBase64WithElementMarker || screenshotBase64,
}, },
}, },
{ {

View File

@ -42,16 +42,16 @@ Remember:
If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query. If the planned tasks are sequential and tasks may appear only after the execution of previous tasks, this is considered normal. Thoughts, prompts, and error messages should all be in the same language as the user query.
## Objective 2 (sub objective): Give a quick answer to the action with type "Locate" you just planned ## Objective 2 (sub objective, only for action with type "Locate"): Give a quick answer to the action with type "Locate" you just planned, append a \`quickAnswer\` field after the \`param\` field
Review the action you just planned. If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON: If the action type is 'Locate', provide a quick answer: Does any element meet the description in the prompt? If so, answer with the following format, as the \`quickAnswer\` field in the output JSON:
{ {
"reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button", "reason": "Reason for finding element 4: It is located in the upper right corner, is an image type, and according to the screenshot, it is a shopping cart icon button",
"text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
"id": "wefew2222few2" // id of this element, replace with actual value in practice "id": "wefew2222few2" // id of this element, replace with actual value in practice
} }
If the action type is not 'Locate', or there is no element meets the description in the prompt (usually because it will show up after some interaction), the answer should be null. If there is no element meets the description in the prompt (usually because it will show up later after some interaction), the \`quickAnswer\` field should be null.
## Output JSON Format: ## Output JSON Format:
@ -65,7 +65,7 @@ Please return the result in JSON format as follows:
"param": { "param": {
"prompt": "The search bar" "prompt": "The search bar"
}, },
"quickAnswer": { // since the first action is Locate, so we need to give a quick answer "quickAnswer": { // since this action type is 'Locate', and we can find the element, so we need to give a quick answer
"reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar", "reason": "Reason for finding element 4: It is located in the upper right corner, is an input type, and according to the screenshot, it is a search bar",
"text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty "text": "PLACEHOLDER", // Replace PLACEHOLDER with the text of elementInfo, if none, leave empty
"id": "wefew2222few2" // ID of this element, replace with actual value in practice "id": "wefew2222few2" // ID of this element, replace with actual value in practice
@ -76,6 +76,14 @@ Please return the result in JSON format as follows:
"type": "Tap", // Type of action, like 'Tap' 'Hover' ... "type": "Tap", // Type of action, like 'Tap' 'Hover' ...
"param": any, // Parameter towards the task type "param": any, // Parameter towards the task type
}, },
{
"thought": "Reasons for generating this task, and why this task is feasible on this page",
"type": "Locate", // Type of action, like 'Tap' 'Hover' ...
"param": {
"prompt": "The search bar"
},
"quickAnswer": null,
},
// ... more actions // ... more actions
], ],
error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here, error?: string, // Overall error messages. If there is any error occurs during the task planning (i.e. error in previous 'actions' array), conclude the errors again, put error messages here,
@ -111,7 +119,8 @@ export const planSchema: ResponseFormatJSONSchema = {
}, },
param: { param: {
type: ['object', 'null'], type: ['object', 'null'],
description: 'Parameter towards the task type, can be null', description:
'Parameter towards the task type, can be null only when the type field is Tap or Hover',
}, },
quickAnswer: { quickAnswer: {
type: ['object', 'null'], type: ['object', 'null'],

View File

@ -81,6 +81,8 @@ export interface AIAssertionResponse {
export abstract class UIContext<ElementType extends BaseElement = BaseElement> { export abstract class UIContext<ElementType extends BaseElement = BaseElement> {
abstract screenshotBase64: string; abstract screenshotBase64: string;
abstract screenshotBase64WithElementMarker?: string;
abstract content: ElementType[]; abstract content: ElementType[];
abstract size: Size; abstract size: Size;

View File

@ -57,7 +57,7 @@ export function writeDumpReport(
const attributesArr = Object.keys(attributes || {}).map((key) => { const attributesArr = Object.keys(attributes || {}).map((key) => {
return `${key}="${encodeURIComponent(attributes![key])}"`; return `${key}="${encodeURIComponent(attributes![key])}"`;
}); });
return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(' ')}>${dumpString}</script>`; return `<script type="midscene_web_dump" type="application/json" ${attributesArr.join(' ')}>\n${dumpString}\n</script>`;
}); });
reportContent = tpl.replace('{{dump}}', dumps.join('\n')); reportContent = tpl.replace('{{dump}}', dumps.join('\n'));
} }

View File

@ -72,7 +72,7 @@ function build() {
const resultWithDemo = tplReplacer(html, { const resultWithDemo = tplReplacer(html, {
css: `<style>\n${css}\n</style>\n`, css: `<style>\n${css}\n</style>\n`,
js: `<script>\n${js}\n</script>`, js: `<script>\n${js}\n</script>`,
dump: `<script type="midscene_web_dump" type="application/json">${demoData}</script>`, dump: `<script type="midscene_web_dump" type="application/json">\n${demoData}\n</script>`,
}); });
writeFileSync(outputDemoHTML, resultWithDemo); writeFileSync(outputDemoHTML, resultWithDemo);
console.log(`HTML file generated successfully: ${outputDemoHTML}`); console.log(`HTML file generated successfully: ${outputDemoHTML}`);

View File

@ -11,8 +11,6 @@ import { useBlackboardPreference, useInsightDump } from './store';
const itemFillAlpha = 0.4; const itemFillAlpha = 0.4;
const highlightAlpha = 0.4; const highlightAlpha = 0.4;
const bgOnAlpha = 1;
const bgOffAlpha = 0.3;
const noop = () => { const noop = () => {
// noop // noop
}; };
@ -70,7 +68,7 @@ const BlackBoard = (): JSX.Element => {
const highlightIds = highlightElements.map((e) => e.id); const highlightIds = highlightElements.map((e) => e.id);
const { context } = dump!; const { context } = dump!;
const { size, screenshotBase64 } = context; const { size, screenshotBase64, screenshotBase64WithElementMarker } = context;
const screenWidth = size.width; const screenWidth = size.width;
const screenHeight = size.height; const screenHeight = size.height;
@ -84,9 +82,11 @@ const BlackBoard = (): JSX.Element => {
// key overlays // key overlays
const pixiBgRef = useRef<PIXI.Sprite>(); const pixiBgRef = useRef<PIXI.Sprite>();
const { bgVisible, setBgVisible, elementsVisible, setTextsVisible } = const { markerVisible, setMarkerVisible, elementsVisible, setTextsVisible } =
useBlackboardPreference(); useBlackboardPreference();
const ifMarkerAvailable = !!screenshotBase64WithElementMarker;
useEffect(() => { useEffect(() => {
Promise.resolve( Promise.resolve(
(async () => { (async () => {
@ -139,14 +139,28 @@ const BlackBoard = (): JSX.Element => {
img.onload = () => { img.onload = () => {
if (!app.stage) return; if (!app.stage) return;
const screenshotTexture = PIXI.Texture.from(img); const screenshotTexture = PIXI.Texture.from(img);
const screenshotSprite = new PIXI.Sprite(screenshotTexture); const backgroundSprite = new PIXI.Sprite(screenshotTexture);
screenshotSprite.x = 0; backgroundSprite.x = 0;
screenshotSprite.y = 0; backgroundSprite.y = 0;
screenshotSprite.width = screenWidth; backgroundSprite.width = screenWidth;
screenshotSprite.height = screenHeight; backgroundSprite.height = screenHeight;
app.stage.addChildAt(screenshotSprite, 0); app.stage.addChildAt(backgroundSprite, 0);
pixiBgRef.current = screenshotSprite;
screenshotSprite.alpha = bgVisible ? bgOnAlpha : bgOffAlpha; if (ifMarkerAvailable) {
const markerImg = new Image();
markerImg.src = screenshotBase64WithElementMarker;
markerImg.onload = () => {
const markerTexture = PIXI.Texture.from(markerImg);
const markerSprite = new PIXI.Sprite(markerTexture);
markerSprite.x = 0;
markerSprite.y = 0;
markerSprite.width = screenWidth;
markerSprite.height = screenHeight;
app.stage.addChildAt(markerSprite, 1);
pixiBgRef.current = markerSprite;
markerSprite.visible = markerVisible;
};
}
}; };
}, [app.stage, appInitialed]); }, [app.stage, appInitialed]);
@ -156,7 +170,7 @@ const BlackBoard = (): JSX.Element => {
highlightContainer.removeChildren(); highlightContainer.removeChildren();
elementMarkContainer.removeChildren(); elementMarkContainer.removeChildren();
// element mark // element rects
context.content.forEach((element) => { context.content.forEach((element) => {
const { rect, content, id } = element; const { rect, content, id } = element;
const ifHighlight = highlightIds.includes(id); const ifHighlight = highlightIds.includes(id);
@ -198,10 +212,10 @@ const BlackBoard = (): JSX.Element => {
// elementsVisible, // elementsVisible,
]); ]);
const onSetBg: CheckboxProps['onChange'] = (e) => { const onSetMarkerVisible: CheckboxProps['onChange'] = (e) => {
setBgVisible(e.target.checked); setMarkerVisible(e.target.checked);
if (pixiBgRef.current) { if (pixiBgRef.current) {
pixiBgRef.current.alpha = e.target.checked ? bgOnAlpha : bgOffAlpha; pixiBgRef.current.visible = e.target.checked;
} }
}; };
@ -238,8 +252,12 @@ const BlackBoard = (): JSX.Element => {
/> />
<div className="blackboard-filter"> <div className="blackboard-filter">
<div className="overlay-control"> <div className="overlay-control">
<Checkbox checked={bgVisible} onChange={onSetBg}> <Checkbox
Screenshot checked={markerVisible}
onChange={onSetMarkerVisible}
disabled={!ifMarkerAvailable}
>
Marker
</Checkbox> </Checkbox>
<Checkbox checked={elementsVisible} onChange={onSetElementsVisible}> <Checkbox checked={elementsVisible} onChange={onSetElementsVisible}>
Elements Elements

View File

@ -9,7 +9,7 @@
width: fit-content; width: fit-content;
max-width: 100%; max-width: 100%;
max-height: 100%; max-height: 100%;
padding: @player-spacing; padding: @player-spacing 0;
padding-bottom: 0; padding-bottom: 0;
background: #434443DD; background: #434443DD;
box-sizing: border-box; box-sizing: border-box;
@ -27,7 +27,7 @@
align-items: center; align-items: center;
justify-content: center; justify-content: center;
overflow: hidden; overflow: hidden;
padding: 0 @player-spacing;
canvas { canvas {
max-width: 100%; max-width: 100%;
max-height: 100%; max-height: 100%;
@ -65,6 +65,7 @@
display: flex; display: flex;
flex-direction: row; flex-direction: row;
flex-shrink: 0; flex-shrink: 0;
padding: 0 @player-spacing;
.status-icon { .status-icon {
transition: .2s; transition: .2s;

View File

@ -545,7 +545,8 @@ const Player = (): JSX.Element => {
return acc + item.duration + (item.insightCameraDuration || 0); return acc + item.duration + (item.insightCameraDuration || 0);
}, 0); }, 0);
const progressUpdateInterval = 300; // progress bar
const progressUpdateInterval = 200;
const startTime = performance.now(); const startTime = performance.now();
setAnimationProgress(0); setAnimationProgress(0);
const updateProgress = () => { const updateProgress = () => {
@ -553,8 +554,11 @@ const Player = (): JSX.Element => {
(performance.now() - startTime) / totalDuration, (performance.now() - startTime) / totalDuration,
1, 1,
); );
setAnimationProgress(progress); setAnimationProgress(progress);
return timeout(updateProgress, progressUpdateInterval); if (progress < 1) {
return timeout(updateProgress, progressUpdateInterval);
}
}; };
frame(updateProgress); frame(updateProgress);

View File

@ -40,6 +40,7 @@ export interface AnimationScript {
} }
const stillDuration = 1200; const stillDuration = 1200;
const stillAfterInsightDuration = 300;
const locateDuration = 800; const locateDuration = 800;
const actionDuration = 1000; const actionDuration = 1000;
const clearInsightDuration = 200; const clearInsightDuration = 200;
@ -185,9 +186,23 @@ export const generateAnimationScripts = (
throw new Error('insight dump is required'); throw new Error('insight dump is required');
} }
const insightContentLength = insightDump.context.content.length; const insightContentLength = insightDump.context.content.length;
if (insightDump.context.screenshotBase64WithElementMarker) {
// show the original screenshot first
scripts.push({
type: 'img',
img: insightDump.context.screenshotBase64,
duration: stillAfterInsightDuration,
title,
subTitle,
});
}
scripts.push({ scripts.push({
type: 'insight', type: 'insight',
img: insightDump.context.screenshotBase64, img:
insightDump.context.screenshotBase64WithElementMarker ||
insightDump.context.screenshotBase64,
insightDump: insightDump, insightDump: insightDump,
camera: camera:
currentCameraState === fullPageCameraState || !insightCameraState currentCameraState === fullPageCameraState || !insightCameraState
@ -202,7 +217,7 @@ export const generateAnimationScripts = (
scripts.push({ scripts.push({
type: 'sleep', type: 'sleep',
duration: 800, duration: stillAfterInsightDuration,
title, title,
subTitle, subTitle,
}); });

View File

@ -13,15 +13,15 @@ import { generateAnimationScripts } from './replay-scripts';
const { create } = Z; const { create } = Z;
export const useBlackboardPreference = create<{ export const useBlackboardPreference = create<{
bgVisible: boolean; markerVisible: boolean;
elementsVisible: boolean; elementsVisible: boolean;
setBgVisible: (visible: boolean) => void; setMarkerVisible: (visible: boolean) => void;
setTextsVisible: (visible: boolean) => void; setTextsVisible: (visible: boolean) => void;
}>((set) => ({ }>((set) => ({
bgVisible: true, markerVisible: true,
elementsVisible: true, elementsVisible: true,
setBgVisible: (visible: boolean) => { setMarkerVisible: (visible: boolean) => {
set({ bgVisible: visible }); set({ markerVisible: visible });
}, },
setTextsVisible: (visible: boolean) => { setTextsVisible: (visible: boolean) => {
set({ elementsVisible: visible }); set({ elementsVisible: visible });
@ -126,8 +126,10 @@ export const useExecutionDump = create<{
execution.tasks.forEach((task) => { execution.tasks.forEach((task) => {
if (task.type === 'Insight') { if (task.type === 'Insight') {
const insightTask = task as ExecutionTaskInsightLocate; const insightTask = task as ExecutionTaskInsightLocate;
width = insightTask.log?.dump?.context?.size?.width || 1920; if (insightTask.log?.dump?.context?.size?.width) {
height = insightTask.log?.dump?.context?.size?.height || 1080; width = insightTask.log?.dump?.context?.size?.width;
height = insightTask.log?.dump?.context?.size?.height;
}
} }
}); });
}); });

View File

@ -44,7 +44,7 @@ export async function parseContextFromWebPage(
const size = await imageInfoOfBase64(screenshotBase64); const size = await imageInfoOfBase64(screenshotBase64);
// composite element infos to screenshot // composite element infos to screenshot
const screenshotBase64WithElementInfos = await compositeElementInfoImg({ const screenshotBase64WithElementMarker = await compositeElementInfoImg({
inputImgBase64: screenshotBase64.split(';base64,').pop() as string, inputImgBase64: screenshotBase64.split(';base64,').pop() as string,
elementsPositionInfo: elementsPositionInfoWithoutText, elementsPositionInfo: elementsPositionInfoWithoutText,
}); });
@ -52,7 +52,8 @@ export async function parseContextFromWebPage(
return { return {
content: elementsInfo, content: elementsInfo,
size, size,
screenshotBase64: `data:image/png;base64,${screenshotBase64WithElementInfos}`, screenshotBase64,
screenshotBase64WithElementMarker: `data:image/png;base64,${screenshotBase64WithElementMarker}`,
url, url,
}; };
} }

View File

@ -157,6 +157,7 @@ export function extractTextWithPosition(initNode: Document): ElementInfo[] {
nodeType = NodeType.BUTTON; nodeType = NodeType.BUTTON;
break; break;
case 'SEARCHINPUT': case 'SEARCHINPUT':
case 'TEXTINPUT':
case 'INPUT': case 'INPUT':
nodeType = NodeType.FORM_ITEM; nodeType = NodeType.FORM_ITEM;
break; break;

View File

@ -58,9 +58,11 @@ describe(
); );
const mid = new PuppeteerAgent(originPage); const mid = new PuppeteerAgent(originPage);
await mid.aiAction('Click the password input on page');
await mid.aiAction('scroll down two screen'); await mid.aiAction('scroll down two screen');
const widgets = await mid.aiQuery( await mid.aiQuery(
'find all inputs in the page, return the field name in string[]', 'find all inputs in the page, return the field name in string[]',
); );