Leyang fb2580616c
fix cache bugs(scroll instant, text node info, cache hit condition) (#732)
* fix(web-integration): cache hit when element.id exist and scroll element instantly

* fix(web-integration): use parent xpath for text node

* fix(web-integration): only scroll in to view when element is not completly visible

* fix(shared): distinct text node

* test(web-integration): getElementInfoByXpath

* test(web-integration): rename desc

* test(web-integration): fix

* test(web-integration): snapshot attributes only

* test(web-integration): fix test
2025-05-20 13:19:32 +08:00

252 lines
7.6 KiB
TypeScript

import { join } from 'node:path';
import { parseContextFromWebPage } from '@/common/utils';
import StaticPage from '@/playground/static-page';
import type { WebElementInfo } from '@/web-element';
import { sleep } from '@midscene/core/utils';
import { traverseTree } from '@midscene/shared/extractor';
import { getElementInfosScriptContent } from '@midscene/shared/fs';
import {
compositeElementInfoImg,
imageInfoOfBase64,
saveBase64Image,
} from '@midscene/shared/img';
import { createServer } from 'http-server';
import { beforeAll, describe, expect, it } from 'vitest';
import { launchPage } from '../ai/web/puppeteer/utils';
const pageDir = join(__dirname, './fixtures/web-extractor');
const pagePath = join(pageDir, 'index.html');
describe(
'extractor',
() => {
const port = 8082;
beforeAll(async () => {
const localServer = await new Promise((resolve, reject) => {
const server = createServer({
root: pageDir,
});
server.listen(port, '127.0.0.1', () => {
resolve(server);
});
});
return () => {
(localServer as any).server.close();
};
});
it('basic', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const { content, tree, screenshotBase64 } =
await parseContextFromWebPage(page);
const markedImg = await compositeElementInfoImg({
inputImgBase64: await page.screenshotBase64(),
elementsPositionInfo: content,
});
await saveBase64Image({
base64Data: screenshotBase64,
outputPath: join(pageDir, 'input.png'),
});
await saveBase64Image({
base64Data: markedImg,
outputPath: join(pageDir, 'output.png'),
});
const list = content.map((item) => {
return {
content: item.content,
attributes: item.attributes,
};
});
expect(list).toMatchSnapshot();
const simplifiedTree = traverseTree(tree!, (node) => {
return {
content: node.content,
indexId: node.indexId,
attributes: node.attributes,
} as any;
});
expect(simplifiedTree).toMatchSnapshot();
await reset();
});
it.skip('keep same id after resize', async () => {
const { page, reset } = await launchPage(
`file://${pagePath}?resize-after-3s=1`,
{
viewport: {
width: 1080,
height: 2000,
},
},
);
const filterTargetElement = (items: WebElementInfo[]) => {
return items.find((item) => item.attributes?.id === 'J_resize');
};
const { content } = await parseContextFromWebPage(page);
const item = filterTargetElement(content);
expect(item).toBeDefined();
// check all the ids are different
const ids = content.map((item) => item.id);
const uniqueIds = new Set(ids);
expect(uniqueIds.size).toBe(ids.length);
await new Promise((resolve) => setTimeout(resolve, 3000 + 1000));
const { content: content2 } = await parseContextFromWebPage(page);
const item2 = filterTargetElement(content2);
expect(item2).toBeDefined();
expect(item2?.id).toBe(item?.id);
await reset();
});
it('check screenshot size - 1x', async () => {
const { page, reset } = await launchPage(`file://${pagePath}`, {
viewport: {
width: 1080,
height: 2000,
deviceScaleFactor: 1,
},
});
const shotBase64 = await page.screenshotBase64();
const info = await imageInfoOfBase64(shotBase64);
expect(info.height).toBe(2000);
expect(info.width).toBe(1080);
await reset();
});
it('check screenshot size - 2x', async () => {
const { page, reset } = await launchPage(`file://${pagePath}`, {
viewport: {
width: 1080,
height: 2000,
deviceScaleFactor: 2,
},
});
const shotBase64 = await page.screenshotBase64();
const info = await imageInfoOfBase64(shotBase64);
expect(info.width).toBe(2160);
expect(info.height).toBe(4000);
await reset();
});
// it('scroll', async () => {
// const { page, reset } = await launchPage(`file://${pagePath}`, {
// viewport: {
// width: 1080,
// height: 200,
// deviceScaleFactor: 1,
// },
// });
// await page.scrollDown();
// await new Promise((resolve) => setTimeout(resolve, 1000));
// await generateExtractData(
// page,
// path.join(__dirname, 'fixtures/web-extractor/scroll'),
// {
// disableInputImage: false,
// disableOutputImage: false,
// disableOutputWithoutTextImg: true,
// disableResizeOutputImg: true,
// disableSnapshot: true,
// },
// );
// await reset();
// });
it('profiling', async () => {
const { page, reset } = await launchPage('https://www.bytedance.com');
await new Promise((resolve) => setTimeout(resolve, 1000));
console.time('total - parseContextFromWebPage');
await parseContextFromWebPage(page);
console.timeEnd('total - parseContextFromWebPage');
await reset();
});
it('static page with fixed context', async () => {
const fakeContext = {
foo: 'bar',
};
const page = new StaticPage(fakeContext as any);
const context = await parseContextFromWebPage(page);
expect(context).toBe(fakeContext);
});
it('getElementInfoByXpath from text node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/div[2]/div/div/ul/li[1]/span/text()[1]')`,
);
expect(element.content).toBe('English');
expect(element.nodeType).toBe('TEXT Node');
expect(element.attributes).toMatchSnapshot();
await reset();
});
it('getElementInfoByXpath from button node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/button')`,
);
expect(element.nodeType).toBe('BUTTON Node');
expect(element.attributes).toMatchSnapshot();
await reset();
});
it('getElementInfoByXpath from non form/button/image/text/container node by evaluateJavaScript', async () => {
const { page, reset } = await launchPage(`http://127.0.0.1:${port}`, {
viewport: {
width: 1080,
height: 3000,
deviceScaleFactor: 1,
},
});
const elementInfosScriptContent = getElementInfosScriptContent();
const element = await page.evaluateJavaScript?.(
`${elementInfosScriptContent}midscene_element_inspector.getElementInfoByXpath('/html/body/div[3]/div')`,
);
expect(element).toBe(null);
await reset();
});
},
{
timeout: 90 * 1000,
},
);