fix(web-extract): fix the extractor may fail if there is no <body/> (#76)

This commit is contained in:
yuyutaotao 2024-08-28 19:21:32 +08:00 committed by GitHub
parent e071adfa70
commit f8fdf60570
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 296 additions and 164 deletions

View File

@ -98,7 +98,8 @@ export class PageTaskExecutor {
type: 'Insight',
subType: 'Locate',
param: plan.param,
executor: async (param) => {
executor: async (param, taskContext) => {
const { task } = taskContext;
let insightDump: InsightDump | undefined;
const dumpCollector: DumpSubscriber = (dump) => {
insightDump = dump;
@ -124,7 +125,6 @@ export class PageTaskExecutor {
},
});
assert(element, `Element not found: ${param.prompt}`);
if (locateResult) {
this.taskCache.saveCache({
type: 'locate',
@ -136,6 +136,13 @@ export class PageTaskExecutor {
response: locateResult,
});
}
if (!element) {
task.log = {
dump: insightDump,
};
throw new Error(`Element not found: ${param.prompt}`);
}
return {
output: {
element,
@ -296,8 +303,7 @@ export class PageTaskExecutor {
subType: 'Sleep',
param: plan.param,
executor: async (taskParam) => {
assert(taskParam.timeMs, 'No time to sleep');
await sleep(taskParam.timeMs);
await sleep(taskParam.timeMs || 3000);
},
};
return taskActionSleep;

View File

@ -2,3 +2,4 @@ import { extractTextWithPosition } from '.';
console.log(extractTextWithPosition(document.body, true));
console.log(JSON.stringify(extractTextWithPosition(document.body, false)));
(window as any).extractTextWithPosition = extractTextWithPosition;

View File

@ -20,3 +20,12 @@ export function isImgElement(node: Node): node is HTMLImageElement {
export function isTextElement(node: Node): node is HTMLTextAreaElement {
return node.nodeName.toLowerCase() === '#text';
}
export function isWidgetElement(node: Node): node is HTMLElement {
return (
node instanceof HTMLElement &&
(node.hasAttribute('aria-label') ||
node.hasAttribute('aria-controls') ||
node.hasAttribute('aria-labelledby'))
);
}

View File

@ -1,15 +1,16 @@
import { NodeType, TEXT_SIZE_THRESHOLD } from '@midscene/shared/constants';
import { NodeType } from '@midscene/shared/constants';
import {
isButtonElement,
isFormElement,
isImgElement,
isTextElement,
isWidgetElement,
} from './dom-util';
import {
generateHash,
getNodeAttributes,
getPseudoElementContent,
logger,
midsceneGenerateHash,
setDataForNode,
setDebugMode,
visibleRect,
@ -36,7 +37,7 @@ export interface ElementInfo {
center: [number, number];
}
const container: HTMLElement = document.body;
const container: HTMLElement = document.body || document;
function generateId(numberId: number) {
// const letters = 'ABCDEFGHIJKLMNPRSTUVXYZ';
@ -89,7 +90,7 @@ export function extractTextWithPosition(
if (isFormElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = generateHash(attributes.placeholder, rect);
const nodeHashId = midsceneGenerateHash(attributes.placeholder, rect);
const selector = setDataForNode(node, nodeHashId);
let valueContent =
attributes.value || attributes.placeholder || node.textContent || '';
@ -130,7 +131,7 @@ export function extractTextWithPosition(
const attributes = getNodeAttributes(node);
const pseudo = getPseudoElementContent(node);
const content = node.innerText || pseudo.before || pseudo.after || '';
const nodeHashId = generateHash(content, rect);
const nodeHashId = midsceneGenerateHash(content, rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
@ -155,7 +156,7 @@ export function extractTextWithPosition(
if (isImgElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = generateHash('', rect);
const nodeHashId = midsceneGenerateHash('', rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
@ -188,7 +189,7 @@ export function extractTextWithPosition(
if (!text.trim() && attributeKeys.length === 0) {
return;
}
const nodeHashId = generateHash(text, rect);
const nodeHashId = midsceneGenerateHash(text, rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
@ -212,6 +213,31 @@ export function extractTextWithPosition(
return;
}
if (isWidgetElement(node)) {
const attributes = getNodeAttributes(node);
const nodeHashId = midsceneGenerateHash('', rect);
const selector = setDataForNode(node, nodeHashId);
elementInfoArray.push({
id: nodeHashId,
indexId: generateId(nodeIndex++),
nodeHashId,
nodeType: NodeType.FORM_ITEM,
locator: selector,
attributes: {
...attributes,
nodeType: NodeType.FORM_ITEM,
},
content: '',
rect,
center: [
Math.round(rect.left + rect.width / 2),
Math.round(rect.top + rect.height / 2),
],
htmlNode: debugMode ? node : null,
});
return true;
}
return true;
}

View File

@ -154,13 +154,17 @@ export function visibleRect(
if (parentStyle.overflow === 'hidden') {
const parentRect = parent.getBoundingClientRect();
const tolerance = 10;
if (
rect.top < parentRect.top - tolerance &&
rect.left < parentRect.left - tolerance &&
rect.bottom > parentRect.bottom + tolerance &&
rect.right > parentRect.right + tolerance
rect.right < parentRect.left - tolerance ||
rect.left > parentRect.right + tolerance ||
rect.bottom < parentRect.top - tolerance ||
rect.top > parentRect.bottom + tolerance
) {
logger('Element is clipped by an ancestor', parent, rect, parentRect);
logger(el, 'element is partially or totally hidden by an ancestor', {
rect,
parentRect,
});
return false;
}
}
@ -168,8 +172,8 @@ export function visibleRect(
}
return {
left: Math.round(rect.left - scrollLeft),
top: Math.round(rect.top - scrollTop),
left: rect.left,
top: rect.top,
width: Math.round(rect.width),
height: Math.round(rect.height),
};
@ -232,7 +236,7 @@ export function getNodeAttributes(
return Object.fromEntries(attributesList);
}
export function generateHash(content: string, rect: any): string {
export function midsceneGenerateHash(content: string, rect: any): string {
// Combine the input into a string
const combined = JSON.stringify({ content, rect });
// Generates the ha-256 hash value
@ -242,4 +246,5 @@ export function generateHash(content: string, rect: any): string {
return hashHex.slice(0, 10);
}
(window as any).generateHash = generateHash;
(window as any).midsceneGenerateHash = midsceneGenerateHash;
(window as any).midsceneVisibleRect = visibleRect;

View File

@ -372,5 +372,42 @@ exports[`extractor > basic 1`] = `
},
"content": "",
},
{
"attributes": {
"nodeType": "TEXT Node",
},
"content": "content AAA",
},
{
"attributes": {
"aria-label": "Click me",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
"role": "button",
},
"content": "",
},
{
"attributes": {
"nodeType": "TEXT Node",
},
"content": "Click me",
},
{
"attributes": {
"aria-controls": "semi-select-5yxiyng",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
},
"content": "",
},
{
"attributes": {
"aria-labelledby": "eval_object.object_type-label",
"class": ".widget",
"nodeType": "FORM_ITEM Node",
},
"content": "",
},
]
`;

View File

@ -35,6 +35,30 @@ describe(
await reset();
});
it('scroll', async () => {
const { page, reset } = await launchPage(`file://${pagePath}`, {
viewport: {
width: 1080,
height: 200,
},
});
await page.evaluate(() => {
window.scrollTo(0, 400);
});
await new Promise((resolve) => setTimeout(resolve, 1000));
await generateExtractData(
page,
path.join(__dirname, 'fixtures/extractor/scroll'),
{
disableInputImage: false,
disableOutputImage: false,
disableOutputWithoutTextImg: true,
disableResizeOutputImg: true,
disableSnapshot: true,
},
);
});
it('profile ', async () => {
const { page, reset } = await launchPage('https://webinfra.org/about');
await new Promise((resolve) => setTimeout(resolve, 1000));

View File

@ -2,170 +2,194 @@
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sample HTML Page</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
table {
border-collapse: collapse;
width: 50%;
margin-top: 20px;
}
th,
td {
border: 1px solid #000;
padding: 8px;
text-align: center;
}
th {
background-color: #f2f2f2;
}
</style>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Sample HTML Page</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
table {
border-collapse: collapse;
width: 50%;
margin-top: 20px;
}
th,
td {
border: 1px solid #000;
padding: 8px;
text-align: center;
}
th {
background-color: #f2f2f2;
}
</style>
</head>
<body>
<h1>Data Record</h1>
<h2>1970-01-01 19:25:01</h2>
<h2>User Name: Stella</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Field 2</th>
<th>Field 3</th>
<th>Field 4</th>
<th>Field 5</th>
</tr>
</thead>
<tbody>
<tr>
<td>30S</td>
<td>Kace Cervantes</td>
<td>Aylin Sawyer</td>
<td>Jefferson Kirby</td>
<td>Skyla Jefferson</td>
</tr>
<tr>
<td>70U</td>
<td>Florence Davenport</td>
<td>Dariel Acevedo</td>
<td>Ashlynn Delacruz</td>
<td>Memphis Leal</td>
</tr>
<tr>
<td>3AY</td>
<td>Crystal Newman</td>
<td>Anderson Brown</td>
<td>Charlotte Griffith</td>
<td>Franklin Everett</td>
</tr>
<tr>
<td>YPG</td>
<td>Kori Payne</td>
<td>Edward Blevins</td>
<td>Aila Gill</td>
<td>Matthias Reed</td>
</tr>
<tr>
<td>ZEN</td>
<td>Magnolia Duke</td>
<td>Kalel Glover</td>
<td>Alessia Barton</td>
<td>Cassius Peck</td>
</tr>
</tbody>
</table>
<div>
<h3>Form</h3>
<label for="name">Name:</label>
<input id="J_input" placeholder="Hello World This is Placeholder" />
<button>Click Me</button>
<label>Shape</label>
<input placeholder="You shouldn't see this placeholder." value="Rectangle" />
<pre id="J_keyRecord"></pre>
<textarea placeholder="this_is_a_textarea"></textarea>
<img src="https://lf3-static.bytednsdoc.com/obj/eden-cn/vhaeh7vhabf/midscene.png" width="2" height="2"
alt="small_img" />
</div>
<!-- Global popup to test invisible -->
<div style="position: absolute; top: 0px; left: 500px; width: 100%;">
<h1>Data Record</h1>
<h2>1970-01-01 19:25:01</h2>
<h2>User Name: Stella</h2>
<table>
<thead>
<tr>
<th>ID</th>
<th>Field 2</th>
<th>Field 3</th>
<th>Field 4</th>
<th>Field 5</th>
</tr>
</thead>
<tbody>
<tr>
<td>30S</td>
<td>Kace Cervantes</td>
<td>Aylin Sawyer</td>
<td>Jefferson Kirby</td>
<td>Skyla Jefferson</td>
</tr>
<tr>
<td>70U</td>
<td>Florence Davenport</td>
<td>Dariel Acevedo</td>
<td>Ashlynn Delacruz</td>
<td>Memphis Leal</td>
</tr>
<tr>
<td>3AY</td>
<td>Crystal Newman</td>
<td>Anderson Brown</td>
<td>Charlotte Griffith</td>
<td>Franklin Everett</td>
</tr>
<tr>
<td>YPG</td>
<td>Kori Payne</td>
<td>Edward Blevins</td>
<td>Aila Gill</td>
<td>Matthias Reed</td>
</tr>
<tr>
<td>ZEN</td>
<td>Magnolia Duke</td>
<td>Kalel Glover</td>
<td>Alessia Barton</td>
<td>Cassius Peck</td>
</tr>
</tbody>
</table>
<div>
<div style="left: 59px; top: 46px;">
<ul >
<li><span>English</span></li>
<li><span>中文</span></li>
<li><span>Tiếng Việt</span>
</li>
</ul>
</div>
</div>
</div>
<h3>Form</h3>
<label for="name">Name:</label>
<input id="J_input" placeholder="Hello World This is Placeholder" />
<button>Click Me</button>
<label>Shape</label>
<input placeholder="You shouldn't see this placeholder." value="Rectangle" />
<pre id="J_keyRecord"></pre>
<textarea placeholder="this_is_a_textarea"></textarea>
<img src="https://lf3-static.bytednsdoc.com/obj/eden-cn/vhaeh7vhabf/midscene.png" width="2" height="2"
alt="small_img" />
</div>
<!-- form -->
<form action="/submit-form" method="post">
<label for="options">Choose an option:</label>
<select id="options" name="options">
<!-- Global popup to test invisible -->
<div style="position: absolute; top: 0px; left: 500px; width: 100%;">
<div>
<div style="left: 59px; top: 46px;">
<ul>
<li><span>English</span></li>
<li><span>中文</span></li>
<li><span>Tiếng Việt</span>
</li>
</ul>
</div>
</div>
</div>
<!-- form -->
<form action="/submit-form" method="post">
<label for="options">Choose an option:</label>
<select id="options" name="options">
<option value="option1-value">Option 1</option>
<option value="option2-value">Option 2</option>
</select>
</form>
</form>
<!-- a label with sibiling input -->
<div id="gh-ac-box2">
<label for="gh-ac" class="gh-ar-hdn">输入搜索关键词</label>
<input type="text" class="gh-tb ui-autocomplete-input" aria-autocomplete="list" aria-expanded="false" size="50"
maxlength="300" aria-label="搜索任何物品" placeholder="搜索任何物品" id="gh-ac" name="_nkw" autocapitalize="off"
autocorrect="off" spellcheck="false" autocomplete="off" aria-haspopup="true" role="combobox" aria-owns="ui-id-1">
<input style="display:none">
</div>
<!-- a label with nested input -->
<label
class="life-core-input-inner__wrapper life-core-input-inner__wrapper-border life-core-input-inner__wrapper-size-md life-core-input-inner__wrapper-add-suffix">
<!-- a label with sibiling input -->
<div id="gh-ac-box2">
<label for="gh-ac" class="gh-ar-hdn">输入搜索关键词</label>
<input type="text" class="gh-tb ui-autocomplete-input" aria-autocomplete="list" aria-expanded="false" size="50"
maxlength="300" aria-label="搜索任何物品" placeholder="搜索任何物品" id="gh-ac" name="_nkw" autocapitalize="off" autocorrect="off"
spellcheck="false" autocomplete="off" aria-haspopup="true" role="combobox" aria-owns="ui-id-1">
<input style="display:none">
</div>
<!-- a label with nested input -->
<label
class="life-core-input-inner__wrapper life-core-input-inner__wrapper-border life-core-input-inner__wrapper-size-md life-core-input-inner__wrapper-add-suffix">
<input placeholder="验证码" tabindex="0" type="text" class="life-core-input life-core-input-size-md" value="">
</label>
<!-- a checkbox drawn by div -->
<style>
.life-core-check-wrapper {
display: none;
}
.life-core-checkbox-icon {
display: inline-block;
width: 16px;
height: 16px;
border: 1px solid #333;
}
</style>
<span class="life-core-popper-trigger life-core-popper-trigger-focus life-core-tooltip">
<!-- a checkbox drawn by div -->
<style>
.life-core-check-wrapper {
display: none;
}
.life-core-checkbox-icon {
display: inline-block;
width: 16px;
height: 16px;
border: 1px solid #333;
}
</style>
<span class="life-core-popper-trigger life-core-popper-trigger-focus life-core-tooltip">
<label
class="life-core-checkbox life-core-checkbox-md src-pages-Login-components-LoginCard-index-module__checkbox--Npo2V--212e2">
<input class="life-core-check-wrapper" type="checkbox">
<span class="life-core-checkbox-icon">
</span>
<span class="life-core-checkbox-label life-core-checkbox-label-no">
<span class="life-core-checkbox-label life-core-checkbox-label-no">
</span>
</label>
</span>
</span>
<script>
var input = document.getElementById('J_input');
var keyRecord = document.getElementById('J_keyRecord');
input.addEventListener('keyup', function (e) {
keyRecord.innerHTML += '\n' + e.key;
});
</script>
<!-- item hidden by css overflow: hidden -->
<style>
.item-hidden-poc {
width: 100%;
height: 100px;
overflow: hidden;
}
</style>
<div class="item-hidden-poc">
<div style="height: 120px">content AAA</div>
<div style="height: 120px">content BBBB</div>
</div>
<!-- widget wrapper defined by aria -->
<style>
.widget {
width: 100px;
height: 100px;
background-color: #ccc;
margin-top: 10px;
}
</style>
<div class="widget" role="button" aria-label="Click me">Click me</div>
<div class="widget" aria-controls="semi-select-5yxiyng"></div>
<div class="widget" aria-labelledby="eval_object.object_type-label"></div>
<script>
var input = document.getElementById('J_input');
var keyRecord = document.getElementById('J_keyRecord');
input.addEventListener('keyup', function (e) {
keyRecord.innerHTML += '\n' + e.key;
});
</script>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 96 KiB

After

Width:  |  Height:  |  Size: 115 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 111 KiB

After

Width:  |  Height:  |  Size: 131 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB