mirror of
https://github.com/web-infra-dev/midscene.git
synced 2025-12-29 08:00:09 +00:00
feat(browser-event): support drag event (#321)
This commit is contained in:
parent
08a159db03
commit
839dc6c799
@ -8,7 +8,14 @@ import {
|
||||
} from './prompt/ui-tars-planning';
|
||||
import { call } from './service-caller';
|
||||
|
||||
type ActionType = 'click' | 'type' | 'hotkey' | 'finished' | 'scroll' | 'wait';
|
||||
type ActionType =
|
||||
| 'click'
|
||||
| 'drag'
|
||||
| 'type'
|
||||
| 'hotkey'
|
||||
| 'finished'
|
||||
| 'scroll'
|
||||
| 'wait';
|
||||
|
||||
function capitalize(str: string) {
|
||||
return str.charAt(0).toUpperCase() + str.slice(1);
|
||||
@ -60,6 +67,18 @@ export async function vlmPlanning(options: {
|
||||
},
|
||||
param: action.thought || '',
|
||||
});
|
||||
} else if (action.action_type === 'drag') {
|
||||
const startPoint = getPoint(action.action_inputs.start_box, size);
|
||||
const endPoint = getPoint(action.action_inputs.end_box, size);
|
||||
transformActions.push({
|
||||
type: 'Drag',
|
||||
param: {
|
||||
start_box: { x: startPoint[0], y: startPoint[1] },
|
||||
end_box: { x: endPoint[0], y: endPoint[1] },
|
||||
},
|
||||
locate: null,
|
||||
thought: action.thought || '',
|
||||
});
|
||||
} else if (action.action_type === 'type') {
|
||||
transformActions.push({
|
||||
type: 'Input',
|
||||
@ -140,6 +159,14 @@ interface ClickAction extends BaseAction {
|
||||
};
|
||||
}
|
||||
|
||||
interface DragAction extends BaseAction {
|
||||
action_type: 'drag';
|
||||
action_inputs: {
|
||||
start_box: string; // JSON string of [x, y] coordinates
|
||||
end_box: string; // JSON string of [x, y] coordinates
|
||||
};
|
||||
}
|
||||
|
||||
interface WaitAction extends BaseAction {
|
||||
action_type: 'wait';
|
||||
action_inputs: {
|
||||
@ -175,6 +202,7 @@ interface FinishedAction extends BaseAction {
|
||||
|
||||
export type Action =
|
||||
| ClickAction
|
||||
| DragAction
|
||||
| TypeAction
|
||||
| HotkeyAction
|
||||
| ScrollAction
|
||||
|
||||
@ -221,6 +221,7 @@ export interface PlanningAction<ParamType = any> {
|
||||
type:
|
||||
| 'Locate'
|
||||
| 'Tap'
|
||||
| 'Drag'
|
||||
| 'Hover'
|
||||
| 'Input'
|
||||
| 'KeyboardPress'
|
||||
|
||||
@ -107,7 +107,8 @@
|
||||
"test": "vitest --run",
|
||||
"test:u": "vitest --run -u",
|
||||
"test:ai": "AI_TEST_TYPE=web npm run test",
|
||||
"test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect packages/web-integration/tests/ai/bridge/agent.test.ts",
|
||||
"test:ai:temp": "AI_TEST_TYPE=web vitest --run tests/ai/bridge/temp.test.ts",
|
||||
"test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect tests/ai/bridge/agent.test.ts",
|
||||
"test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=web npm run test",
|
||||
"test:ai:all": "npm run test:ai:web && npm run test:ai:native",
|
||||
"test:ai:native": "MIDSCENE_CACHE=true AI_TEST_TYPE=native npm run test",
|
||||
|
||||
@ -63,6 +63,8 @@ export class Page implements AbstractPage {
|
||||
wheel: (deltaX: number, deltaY: number) =>
|
||||
this.mouseWheel(deltaX, deltaY),
|
||||
move: (x: number, y: number) => this.mouseMove(x, y),
|
||||
drag: (from: { x: number; y: number }, to: { x: number; y: number }) =>
|
||||
this.mouseDrag(from, to),
|
||||
};
|
||||
}
|
||||
|
||||
@ -249,6 +251,25 @@ export class Page implements AbstractPage {
|
||||
]);
|
||||
}
|
||||
|
||||
private async mouseDrag(
|
||||
from: { x: number; y: number },
|
||||
to: { x: number; y: number },
|
||||
): Promise<void> {
|
||||
await this.browser.performActions([
|
||||
{
|
||||
type: 'pointer',
|
||||
id: 'mouse',
|
||||
parameters: { pointerType: 'mouse' },
|
||||
actions: [
|
||||
{ type: 'pointerMove', duration: 0, x: from.x, y: from.y },
|
||||
{ type: 'pointerDown', button: 0 },
|
||||
{ type: 'pointerMove', duration: 500, x: to.x, y: to.y },
|
||||
{ type: 'pointerUp', button: 0 },
|
||||
],
|
||||
},
|
||||
]);
|
||||
}
|
||||
|
||||
private async mouseWheel(
|
||||
deltaX: number,
|
||||
deltaY: number,
|
||||
|
||||
@ -63,6 +63,7 @@ export const getBridgePageInCliSide = (): ChromeExtensionPageCliSide => {
|
||||
click: bridgeCaller(MouseEvent.Click),
|
||||
wheel: bridgeCaller(MouseEvent.Wheel),
|
||||
move: bridgeCaller(MouseEvent.Move),
|
||||
drag: bridgeCaller(MouseEvent.Drag),
|
||||
};
|
||||
return mouse;
|
||||
}
|
||||
|
||||
@ -26,6 +26,7 @@ export enum MouseEvent {
|
||||
Click = 'mouse.click',
|
||||
Wheel = 'mouse.wheel',
|
||||
Move = 'mouse.move',
|
||||
Drag = 'mouse.drag',
|
||||
}
|
||||
|
||||
export enum KeyboardEvent {
|
||||
|
||||
@ -55,6 +55,9 @@ export class ChromeExtensionPageBrowserSide extends ChromeExtensionProxyPage {
|
||||
|
||||
if (method.startsWith(MouseEvent.PREFIX)) {
|
||||
const actionName = method.split('.')[1] as keyof MouseAction;
|
||||
if (actionName === 'drag') {
|
||||
return this.mouse[actionName].apply(this.mouse, args as any);
|
||||
}
|
||||
return this.mouse[actionName].apply(this.mouse, args as any);
|
||||
}
|
||||
|
||||
|
||||
@ -425,6 +425,27 @@ export default class ChromeExtensionProxyPage implements AbstractPage {
|
||||
y,
|
||||
});
|
||||
},
|
||||
drag: async (
|
||||
from: { x: number; y: number },
|
||||
to: { x: number; y: number },
|
||||
) => {
|
||||
await this.mouse.move(from.x, from.y);
|
||||
await this.sendCommandToDebugger('Input.dispatchMouseEvent', {
|
||||
type: 'mousePressed',
|
||||
x: from.x,
|
||||
y: from.y,
|
||||
button: 'left',
|
||||
clickCount: 1,
|
||||
});
|
||||
await this.mouse.move(to.x, to.y);
|
||||
await this.sendCommandToDebugger('Input.dispatchMouseEvent', {
|
||||
type: 'mouseReleased',
|
||||
x: to.x,
|
||||
y: to.y,
|
||||
button: 'left',
|
||||
clickCount: 1,
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
keyboard = {
|
||||
|
||||
@ -311,6 +311,25 @@ export class PageTaskExecutor {
|
||||
},
|
||||
};
|
||||
tasks.push(taskActionTap);
|
||||
} else if (plan.type === 'Drag') {
|
||||
const taskActionDrag: ExecutionTaskActionApply<{
|
||||
start_box: { x: number; y: number };
|
||||
end_box: { x: number; y: number };
|
||||
}> = {
|
||||
type: 'Action',
|
||||
subType: 'Drag',
|
||||
param: plan.param,
|
||||
thought: plan.thought,
|
||||
locate: plan.locate,
|
||||
executor: async (taskParam) => {
|
||||
assert(
|
||||
taskParam?.start_box && taskParam?.end_box,
|
||||
'No start_box or end_box to drag',
|
||||
);
|
||||
await this.page.mouse.drag(taskParam.start_box, taskParam.end_box);
|
||||
},
|
||||
};
|
||||
tasks.push(taskActionDrag);
|
||||
} else if (plan.type === 'Hover') {
|
||||
const taskActionHover: ExecutionTaskActionApply<PlanningActionParamHover> =
|
||||
{
|
||||
|
||||
@ -13,6 +13,10 @@ export interface MouseAction {
|
||||
) => Promise<void>;
|
||||
wheel: (deltaX: number, deltaY: number) => Promise<void>;
|
||||
move: (x: number, y: number) => Promise<void>;
|
||||
drag: (
|
||||
from: { x: number; y: number },
|
||||
to: { x: number; y: number },
|
||||
) => Promise<void>;
|
||||
}
|
||||
|
||||
export interface KeyboardAction {
|
||||
@ -36,6 +40,10 @@ export abstract class AbstractPage {
|
||||
) => {},
|
||||
wheel: async (deltaX: number, deltaY: number) => {},
|
||||
move: async (x: number, y: number) => {},
|
||||
drag: async (
|
||||
from: { x: number; y: number },
|
||||
to: { x: number; y: number },
|
||||
) => {},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@ -80,6 +80,7 @@ export default class StaticPage implements AbstractPage {
|
||||
click: ThrowNotImplemented.bind(null, 'mouse.click'),
|
||||
wheel: ThrowNotImplemented.bind(null, 'mouse.wheel'),
|
||||
move: ThrowNotImplemented.bind(null, 'mouse.move'),
|
||||
drag: ThrowNotImplemented.bind(null, 'mouse.drag'),
|
||||
};
|
||||
|
||||
keyboard = {
|
||||
|
||||
@ -96,6 +96,32 @@ export class Page<
|
||||
},
|
||||
move: async (x: number, y: number) =>
|
||||
this.underlyingPage.mouse.move(x, y),
|
||||
drag: async (
|
||||
from: { x: number; y: number },
|
||||
to: { x: number; y: number },
|
||||
) => {
|
||||
if (this.pageType === 'puppeteer') {
|
||||
await (this.underlyingPage as PuppeteerPage).mouse.drag(
|
||||
{
|
||||
x: from.x,
|
||||
y: from.y,
|
||||
},
|
||||
{
|
||||
x: to.x,
|
||||
y: to.y,
|
||||
},
|
||||
);
|
||||
} else if (this.pageType === 'playwright') {
|
||||
// Playwright doesn't have a drag method, so we need to simulate it
|
||||
await (this.underlyingPage as PlaywrightPage).mouse.move(
|
||||
from.x,
|
||||
from.y,
|
||||
);
|
||||
await (this.underlyingPage as PlaywrightPage).mouse.down();
|
||||
await (this.underlyingPage as PlaywrightPage).mouse.move(to.x, to.y);
|
||||
await (this.underlyingPage as PlaywrightPage).mouse.up();
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
19
packages/web-integration/tests/ai/bridge/temp.test.ts
Normal file
19
packages/web-integration/tests/ai/bridge/temp.test.ts
Normal file
@ -0,0 +1,19 @@
|
||||
import {
|
||||
AgentOverChromeBridge,
|
||||
getBridgePageInCliSide,
|
||||
} from '@/bridge-mode/agent-cli-side';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
|
||||
vi.setConfig({
|
||||
testTimeout: 260 * 1000,
|
||||
});
|
||||
|
||||
describe.skipIf(!process.env.BRIDGE_MODE)('drag event', () => {
|
||||
it('agent in cli side, current tab', async () => {
|
||||
const agent = new AgentOverChromeBridge();
|
||||
await agent.connectCurrentTab();
|
||||
await agent.ai('Finish dragging the slider');
|
||||
|
||||
await agent.destroy();
|
||||
});
|
||||
});
|
||||
Loading…
x
Reference in New Issue
Block a user