fix cache bugs(scroll instant, text node info, cache hit condition) (#732)

* fix(web-integration): cache hit when element.id exist and scroll element instantly

* fix(web-integration): use parent xpath for text node

* fix(web-integration): only scroll in to view when element is not completly visible

* fix(shared): distinct text node

* test(web-integration): getElementInfoByXpath

* test(web-integration): rename desc

* test(web-integration): fix

* test(web-integration): snapshot attributes only

* test(web-integration): fix test
This commit is contained in:
Leyang 2025-05-20 13:19:32 +08:00 committed by GitHub
parent 89fd77b96a
commit fb2580616c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 170 additions and 12 deletions

View File

@ -1,5 +1,6 @@
import type { ElementInfo } from '.';
import { getNodeFromCacheList } from './util';
import { getRect, isElementPartiallyInViewport } from './util';
import { collectElementInfo } from './web-extractor';
const getElementIndex = (element: Element): number => {
@ -30,6 +31,21 @@ const findFirstAncestorWithId = (element: Element): Element | null => {
return null;
};
// Get the index of a text node among its siblings of the same type
const getTextNodeIndex = (textNode: Node): number => {
let index = 1;
let current = textNode.previousSibling;
while (current) {
if (current.nodeType === Node.TEXT_NODE) {
index++;
}
current = current.previousSibling;
}
return index;
};
const getElementXPath = (element: Node): string => {
// deal with text node
if (element.nodeType === Node.TEXT_NODE) {
@ -37,7 +53,8 @@ const getElementXPath = (element: Node): string => {
const parentNode = element.parentNode;
if (parentNode && parentNode.nodeType === Node.ELEMENT_NODE) {
const parentXPath = getElementXPath(parentNode);
return `${parentXPath}/text()`;
const textIndex = getTextNodeIndex(element);
return `${parentXPath}/text()[${textIndex}]`;
}
return '';
}
@ -144,7 +161,13 @@ export function getElementInfoByXpath(xpath: string): ElementInfo | null {
}
if (node instanceof HTMLElement) {
node.scrollIntoView({ behavior: 'smooth', block: 'center' });
// only when the element is not completely in the viewport, call scrollIntoView
const rect = getRect(node, 1, window);
const isVisible = isElementPartiallyInViewport(rect, window, document, 1);
if (!isVisible) {
node.scrollIntoView({ behavior: 'instant', block: 'center' });
}
}
return collectElementInfo(

View File

@ -58,10 +58,11 @@ export function setDataForNode(
return selector;
}
function isElementPartiallyInViewport(
export function isElementPartiallyInViewport(
rect: ReturnType<typeof getRect>,
currentWindow: typeof window,
currentDocument: typeof document,
visibleAreaRatio: number = 2 / 3,
) {
const elementHeight = rect.height;
const elementWidth = rect.width;
@ -90,7 +91,7 @@ function isElementPartiallyInViewport(
const visibleArea = overlapRect.width * overlapRect.height;
const totalArea = elementHeight * elementWidth;
// return visibleArea > 30 * 30 || visibleArea / totalArea >= 2 / 3;
return visibleArea / totalArea >= 2 / 3;
return visibleArea / totalArea >= visibleAreaRatio;
}
export function getPseudoElementContent(

View File

@ -239,14 +239,17 @@ export class PageTaskExecutor {
const element = await this.page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('${xpaths[0]}')`,
);
elementFromCache = element;
debug('cache hit, prompt: %s', cachePrompt);
cacheHitFlag = true;
debug(
'found a new new element with same xpath, xpath: %s, id: %s',
xpaths[0],
element?.id,
);
if (element?.id) {
elementFromCache = element;
debug('cache hit, prompt: %s', cachePrompt);
cacheHitFlag = true;
debug(
'found a new new element with same xpath, xpath: %s, id: %s',
xpaths[0],
element?.id,
);
}
}
} catch (error) {
debug('get element info by xpath error: ', error);

View File

@ -3115,3 +3115,79 @@ exports[`extractor > basic 2`] = `
"node": null,
}
`;
exports[`extractor > getElementInfoByXpath by evaluateJavaScript 1`] = `
{
"attributes": {
"htmlTagName": "<span>",
"nodeType": "TEXT Node",
},
"center": [
556,
46,
],
"content": "中文",
"id": "emaam",
"indexId": 0,
"locator": "",
"nodeHashId": "emaam",
"nodeType": "TEXT Node",
"rect": {
"height": 18,
"left": 540,
"top": 37,
"width": 32,
"zoom": 1,
},
"zoom": 1,
}
`;
exports[`extractor > getElementInfoByXpath from button node by evaluateJavaScript 1`] = `
{
"aria-label": "Search",
"class": ".btn",
"htmlTagName": "<body>",
"nodeType": "BUTTON Node",
"tabindex": "0",
"type": "submit",
}
`;
exports[`extractor > getElementInfoByXpath from div node by evaluateJavaScript 1`] = `
{
"attributes": {
"aria-label": "Search",
"class": ".btn",
"htmlTagName": "<body>",
"nodeType": "BUTTON Node",
"tabindex": "0",
"type": "submit",
},
"center": [
283,
1160,
],
"content": " ",
"id": "kohcf",
"indexId": 0,
"locator": "[_midscene_retrieve_task_id='kohcf']",
"nodeHashId": "kohcf",
"nodeType": "BUTTON Node",
"rect": {
"height": 210,
"left": 73,
"top": 1055,
"width": 420,
"zoom": 1,
},
"zoom": 1,
}
`;
exports[`extractor > getElementInfoByXpath from text node by evaluateJavaScript 1`] = `
{
"htmlTagName": "<span>",
"nodeType": "TEXT Node",
}
`;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 624 KiB

After

Width:  |  Height:  |  Size: 598 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 845 KiB

After

Width:  |  Height:  |  Size: 768 KiB

View File

@ -2,7 +2,9 @@ import { join } from 'node:path';
import { parseContextFromWebPage } from '@/common/utils';
import StaticPage from '@/playground/static-page';
import type { WebElementInfo } from '@/web-element';
import { sleep } from '@midscene/core/utils';
import { traverseTree } from '@midscene/shared/extractor';
import { getElementInfosScriptContent } from '@midscene/shared/fs';
import {
compositeElementInfoImg,
imageInfoOfBase64,
@ -189,6 +191,59 @@ describe(
const context = await parseContextFromWebPage(page);
expect(context).toBe(fakeContext);
});
it('getElementInfoByXpath from text node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/div[2]/div/div/ul/li[1]/span/text()[1]')`,
);
expect(element.content).toBe('English');
expect(element.nodeType).toBe('TEXT Node');
expect(element.attributes).toMatchSnapshot();
await reset();
});
it('getElementInfoByXpath from button node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/button')`,
);
expect(element.nodeType).toBe('BUTTON Node');
expect(element.attributes).toMatchSnapshot();
await reset();
});
it('getElementInfoByXpath from non form/button/image/text/container node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/div[3]/div')`,
);
expect(element).toBe(null);
await reset();
});
},
{
timeout: 90 * 1000,