fix: coord offset of qwen model (#407)
--------- Co-authored-by: zhouxiao.shaw <zhouxiao.shaw@bytedance.com>
@ -3,7 +3,7 @@
|
||||
"testCases": [
|
||||
{
|
||||
"prompt": "'最简单的用法'下方有五个 icon,左侧第一个 icon",
|
||||
"response_bbox": [486, 871, 509, 883],
|
||||
"response_bbox": [487, 860, 509, 873],
|
||||
"response": [
|
||||
{
|
||||
"id": "nkpld",
|
||||
@ -14,7 +14,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "'最简单的用法'下方有五个 icon,左侧第二个 icon",
|
||||
"response_bbox": [519, 872, 536, 884],
|
||||
"response_bbox": [519, 860, 537, 874],
|
||||
"response": [
|
||||
{
|
||||
"id": "hdbbh",
|
||||
@ -25,7 +25,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "'最简单的用法'下方有五个 icon,左侧第三个 icon",
|
||||
"response_bbox": [552, 871, 569, 883],
|
||||
"response_bbox": [549, 861, 570, 873],
|
||||
"response": [
|
||||
{
|
||||
"id": "ncono",
|
||||
@ -36,7 +36,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "'最简单的用法'下方有五个 icon,左侧第四个 icon",
|
||||
"response_bbox": [583, 872, 596, 884],
|
||||
"response_bbox": [583, 861, 597, 874],
|
||||
"response": [
|
||||
{
|
||||
"id": "jkeam",
|
||||
@ -47,7 +47,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "'最简单的用法'下方有五个 icon,最右侧的 icon",
|
||||
"response_bbox": [617, 873, 629, 884],
|
||||
"response_bbox": [615, 862, 630, 874],
|
||||
"response": [
|
||||
{
|
||||
"id": "nnkcf",
|
||||
@ -58,7 +58,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "全屏幕右上角、版本号右侧有三个 icon ,查找左侧第一个",
|
||||
"response_bbox": [1269, 24, 1290, 38],
|
||||
"response_bbox": [1270, 23, 1290, 40],
|
||||
"response": [
|
||||
{
|
||||
"id": "dinoj",
|
||||
@ -69,7 +69,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "全屏幕右上角有三个 icon ,查找左侧第二个",
|
||||
"response_bbox": [1314, 20, 1335, 40],
|
||||
"response_bbox": [1315, 20, 1334, 39],
|
||||
"response": [
|
||||
{
|
||||
"id": "nfpha",
|
||||
@ -80,7 +80,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "屏幕右上角有三个 icon ,左侧第三个",
|
||||
"response_bbox": [1356, 20, 1378, 42],
|
||||
"response_bbox": [1356, 20, 1378, 40],
|
||||
"response": [
|
||||
{
|
||||
"id": "hmbld",
|
||||
@ -91,7 +91,7 @@
|
||||
},
|
||||
{
|
||||
"prompt": "在‘代码演示’右侧有三个 icon 按钮中,查找最中间的按钮",
|
||||
"response_bbox": [1185, 503, 1204, 517],
|
||||
"response_bbox": [1184, 497, 1203, 516],
|
||||
"response": [
|
||||
{
|
||||
"id": "pkafb",
|
||||
|
||||
|
Before Width: | Height: | Size: 360 KiB After Width: | Height: | Size: 358 KiB |
@ -10,7 +10,7 @@
|
||||
"indexId": 7
|
||||
}
|
||||
],
|
||||
"response_bbox": [726, 248, 798, 268],
|
||||
"response_bbox": [721, 245, 793, 263],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -22,7 +22,7 @@
|
||||
"indexId": 5
|
||||
}
|
||||
],
|
||||
"response_bbox": [489, 247, 560, 267],
|
||||
"response_bbox": [486, 245, 557, 263],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -34,7 +34,7 @@
|
||||
"indexId": 10
|
||||
}
|
||||
],
|
||||
"response_bbox": [495, 348, 792, 396],
|
||||
"response_bbox": [492, 341, 786, 390],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -46,7 +46,7 @@
|
||||
"indexId": 12
|
||||
}
|
||||
],
|
||||
"response_bbox": [496, 425, 793, 475],
|
||||
"response_bbox": [492, 416, 787, 468],
|
||||
"annotation_index_id": 4
|
||||
},
|
||||
{
|
||||
@ -58,7 +58,7 @@
|
||||
"indexId": 11
|
||||
}
|
||||
],
|
||||
"response_bbox": [701, 439, 772, 458],
|
||||
"response_bbox": [697, 435, 768, 452],
|
||||
"annotation_index_id": 5
|
||||
},
|
||||
{
|
||||
@ -70,7 +70,7 @@
|
||||
"indexId": 18
|
||||
}
|
||||
],
|
||||
"response_bbox": [495, 568, 792, 613],
|
||||
"response_bbox": [492, 557, 786, 603],
|
||||
"annotation_index_id": 6
|
||||
},
|
||||
{
|
||||
@ -82,7 +82,7 @@
|
||||
"indexId": 4
|
||||
}
|
||||
],
|
||||
"response_bbox": [849, 120, 867, 135],
|
||||
"response_bbox": [843, 120, 862, 135],
|
||||
"annotation_index_id": 7
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 194 KiB After Width: | Height: | Size: 192 KiB |
@ -9,7 +9,7 @@
|
||||
"indexId": 4
|
||||
}
|
||||
],
|
||||
"response_bbox": [16, 782, 30, 795],
|
||||
"response_bbox": [16, 770, 32, 784],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -20,7 +20,7 @@
|
||||
"indexId": 23
|
||||
}
|
||||
],
|
||||
"response_bbox": [1209, 354, 1238, 376],
|
||||
"response_bbox": [1203, 354, 1228, 371],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -31,7 +31,7 @@
|
||||
"indexId": 25
|
||||
}
|
||||
],
|
||||
"response_bbox": [1209, 435, 1240, 456],
|
||||
"response_bbox": [1204, 425, 1236, 447],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -42,7 +42,7 @@
|
||||
"indexId": -1
|
||||
}
|
||||
],
|
||||
"response_bbox": [1213, 509, 1240, 528],
|
||||
"response_bbox": [1204, 500, 1230, 520],
|
||||
"annotation_index_id": 4
|
||||
},
|
||||
{
|
||||
@ -53,7 +53,7 @@
|
||||
"indexId": -1
|
||||
}
|
||||
],
|
||||
"response_bbox": [1209, 584, 1237, 602],
|
||||
"response_bbox": [1205, 648, 1237, 665],
|
||||
"annotation_index_id": 5
|
||||
},
|
||||
{
|
||||
@ -64,7 +64,7 @@
|
||||
"indexId": -1
|
||||
}
|
||||
],
|
||||
"response_bbox": [1207, 783, 1226, 800],
|
||||
"response_bbox": [1203, 768, 1224, 785],
|
||||
"annotation_index_id": 6
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 130 KiB |
@ -10,7 +10,7 @@
|
||||
"indexId": 0
|
||||
}
|
||||
],
|
||||
"response_bbox": [16, 20, 40, 35],
|
||||
"response_bbox": [16, 20, 43, 37],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -22,7 +22,7 @@
|
||||
"indexId": 1
|
||||
}
|
||||
],
|
||||
"response_bbox": [56, 14, 123, 40],
|
||||
"response_bbox": [57, 14, 126, 43],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -34,7 +34,7 @@
|
||||
"indexId": 3
|
||||
}
|
||||
],
|
||||
"response_bbox": [345, 18, 365, 39],
|
||||
"response_bbox": [352, 18, 374, 36],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -46,7 +46,7 @@
|
||||
"indexId": 20
|
||||
}
|
||||
],
|
||||
"response_bbox": [187, 716, 223, 735],
|
||||
"response_bbox": [189, 723, 228, 740],
|
||||
"annotation_index_id": 4
|
||||
},
|
||||
{
|
||||
@ -58,7 +58,7 @@
|
||||
"indexId": 26
|
||||
}
|
||||
],
|
||||
"response_bbox": [296, 854, 379, 871],
|
||||
"response_bbox": [301, 862, 389, 884],
|
||||
"annotation_index_id": 5
|
||||
},
|
||||
{
|
||||
@ -70,7 +70,7 @@
|
||||
"indexId": 27
|
||||
}
|
||||
],
|
||||
"response_bbox": [362, 810, 385, 840],
|
||||
"response_bbox": [368, 824, 401, 847],
|
||||
"annotation_index_id": 6
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 376 KiB After Width: | Height: | Size: 373 KiB |
@ -10,7 +10,7 @@
|
||||
"indexId": 20
|
||||
}
|
||||
],
|
||||
"response_bbox": [184, 276, 250, 293],
|
||||
"response_bbox": [186, 279, 245, 293],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -22,7 +22,7 @@
|
||||
"indexId": 39
|
||||
}
|
||||
],
|
||||
"response_bbox": [326, 789, 379, 809],
|
||||
"response_bbox": [324, 801, 387, 816],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -34,7 +34,7 @@
|
||||
"indexId": 38
|
||||
}
|
||||
],
|
||||
"response_bbox": [186, 795, 223, 809],
|
||||
"response_bbox": [190, 804, 230, 825],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -46,7 +46,7 @@
|
||||
"indexId": 9
|
||||
}
|
||||
],
|
||||
"response_bbox": [0, 375, 68, 401],
|
||||
"response_bbox": [0, 378, 45, 401],
|
||||
"annotation_index_id": 4
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 290 KiB After Width: | Height: | Size: 289 KiB |
@ -10,7 +10,7 @@
|
||||
"indexId": 27
|
||||
}
|
||||
],
|
||||
"response_bbox": [321, 54, 857, 96],
|
||||
"response_bbox": [318, 52, 854, 96],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -22,7 +22,7 @@
|
||||
"indexId": 26
|
||||
}
|
||||
],
|
||||
"response_bbox": [780, 56, 859, 94],
|
||||
"response_bbox": [780, 56, 849, 92],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -34,7 +34,7 @@
|
||||
"indexId": 99
|
||||
}
|
||||
],
|
||||
"response_bbox": [138, 436, 170, 450],
|
||||
"response_bbox": [92, 427, 125, 443],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -46,7 +46,7 @@
|
||||
"indexId": 190
|
||||
}
|
||||
],
|
||||
"response_bbox": [1065, 392, 1094, 408],
|
||||
"response_bbox": [1062, 385, 1090, 407],
|
||||
"annotation_index_id": 4
|
||||
},
|
||||
{
|
||||
@ -58,7 +58,7 @@
|
||||
"indexId": 212
|
||||
}
|
||||
],
|
||||
"response_bbox": [1253, 306, 1287, 495],
|
||||
"response_bbox": [1240, 356, 1279, 408],
|
||||
"annotation_index_id": 5
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 1.0 MiB After Width: | Height: | Size: 1.0 MiB |
@ -10,7 +10,7 @@
|
||||
"indexId": 18
|
||||
}
|
||||
],
|
||||
"response_bbox": [516, 128, 1074, 198],
|
||||
"response_bbox": [512, 126, 1068, 197],
|
||||
"annotation_index_id": 1
|
||||
},
|
||||
{
|
||||
@ -22,7 +22,7 @@
|
||||
"indexId": 18
|
||||
}
|
||||
],
|
||||
"response_bbox": [516, 128, 1074, 198],
|
||||
"response_bbox": [512, 126, 1068, 197],
|
||||
"annotation_index_id": 2
|
||||
},
|
||||
{
|
||||
@ -34,7 +34,7 @@
|
||||
"indexId": 24
|
||||
}
|
||||
],
|
||||
"response_bbox": [578, 279, 694, 298],
|
||||
"response_bbox": [574, 276, 693, 294],
|
||||
"annotation_index_id": 3
|
||||
},
|
||||
{
|
||||
@ -46,7 +46,7 @@
|
||||
"indexId": 25
|
||||
}
|
||||
],
|
||||
"response_bbox": [1034, 282, 1050, 296],
|
||||
"response_bbox": [1027, 278, 1045, 293],
|
||||
"annotation_index_id": 4
|
||||
},
|
||||
{
|
||||
@ -58,7 +58,7 @@
|
||||
"indexId": 26
|
||||
}
|
||||
],
|
||||
"response_bbox": [524, 336, 560, 361],
|
||||
"response_bbox": [521, 334, 557, 360],
|
||||
"annotation_index_id": 5
|
||||
},
|
||||
{
|
||||
@ -70,7 +70,7 @@
|
||||
"indexId": 30
|
||||
}
|
||||
],
|
||||
"response_bbox": [806, 394, 877, 405],
|
||||
"response_bbox": [801, 390, 872, 405],
|
||||
"annotation_index_id": 6
|
||||
}
|
||||
]
|
||||
|
||||
|
Before Width: | Height: | Size: 307 KiB After Width: | Height: | Size: 305 KiB |
@ -241,6 +241,7 @@ export async function annotatePoints(
|
||||
indexId: item.indexId,
|
||||
};
|
||||
}),
|
||||
annotationPadding: 0,
|
||||
});
|
||||
return markedImage;
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import assert from 'node:assert';
|
||||
import { MIDSCENE_USE_QWEN_VL, getAIConfigInBoolean } from '@/env';
|
||||
import type {
|
||||
AIAssertionResponse,
|
||||
AIElementIdResponse,
|
||||
@ -13,6 +14,7 @@ import type {
|
||||
Size,
|
||||
UIContext,
|
||||
} from '@/types';
|
||||
import { paddingToMatchBlock } from '@midscene/shared/img';
|
||||
import type {
|
||||
ChatCompletionSystemMessageParam,
|
||||
ChatCompletionUserMessageParam,
|
||||
@ -248,6 +250,12 @@ export async function AiInspectElement<
|
||||
});
|
||||
const systemPrompt = systemPromptToLocateElement();
|
||||
|
||||
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
||||
|
||||
if (getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL)) {
|
||||
imagePayload = await paddingToMatchBlock(imagePayload);
|
||||
}
|
||||
|
||||
const msgs: AIArgs = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{
|
||||
@ -256,7 +264,7 @@ export async function AiInspectElement<
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: screenshotBase64WithElementMarker || screenshotBase64,
|
||||
url: imagePayload,
|
||||
detail: 'high',
|
||||
},
|
||||
},
|
||||
|
||||
@ -5,6 +5,7 @@ import type {
|
||||
PlanningLocateParam,
|
||||
UIContext,
|
||||
} from '@/types';
|
||||
import { paddingToMatchBlock } from '@midscene/shared/img';
|
||||
import { AIActionType, type AIArgs, callAiFn } from './common';
|
||||
import {
|
||||
automationUserPrompt,
|
||||
@ -12,7 +13,6 @@ import {
|
||||
systemPromptToTaskPlanning,
|
||||
} from './prompt/llm-planning';
|
||||
import { describeUserPage } from './prompt/util';
|
||||
|
||||
// transform the param of locate from qwen mode
|
||||
export function fillLocateParam(locate: PlanningLocateParam) {
|
||||
if (locate?.bbox_2d && !locate?.bbox) {
|
||||
@ -60,6 +60,11 @@ export async function plan(
|
||||
taskBackgroundContext: taskBackgroundContextText,
|
||||
});
|
||||
|
||||
let imagePayload = screenshotBase64WithElementMarker || screenshotBase64;
|
||||
if (getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL)) {
|
||||
imagePayload = await paddingToMatchBlock(imagePayload);
|
||||
}
|
||||
|
||||
const msgs: AIArgs = [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{
|
||||
@ -68,7 +73,7 @@ export async function plan(
|
||||
{
|
||||
type: 'image_url',
|
||||
image_url: {
|
||||
url: screenshotBase64WithElementMarker || screenshotBase64,
|
||||
url: imagePayload,
|
||||
detail: 'high',
|
||||
},
|
||||
},
|
||||
|
||||
@ -24,6 +24,7 @@ const createSvgOverlay = async (
|
||||
elements: Array<ElementType>,
|
||||
imageWidth: number,
|
||||
imageHeight: number,
|
||||
boxPadding = 5,
|
||||
): Promise<Jimp> => {
|
||||
const Jimp = await getJimp();
|
||||
const image = new Jimp(imageWidth, imageHeight, 0x00000000);
|
||||
@ -37,7 +38,6 @@ const createSvgOverlay = async (
|
||||
{ rect: 0x500073ff, text: 0xffffffff }, // purple, white
|
||||
];
|
||||
|
||||
const boxPadding = 5;
|
||||
for (let index = 0; index < elements.length; index++) {
|
||||
const element = elements[index];
|
||||
const color = colors[index % colors.length];
|
||||
@ -183,6 +183,7 @@ export const compositeElementInfoImg = async (options: {
|
||||
inputImgBase64: string;
|
||||
elementsPositionInfo: Array<ElementType>;
|
||||
size?: { width: number; height: number };
|
||||
annotationPadding?: number;
|
||||
}) => {
|
||||
assert(options.inputImgBase64, 'inputImgBase64 is required');
|
||||
let width = 0;
|
||||
@ -224,6 +225,7 @@ export const compositeElementInfoImg = async (options: {
|
||||
elementsPositionInfo,
|
||||
width,
|
||||
height,
|
||||
options.annotationPadding,
|
||||
);
|
||||
const svgImage = await Jimp.read(svgOverlay);
|
||||
const compositeImage = await image.composite(svgImage, 0, 0, {
|
||||
|
||||
@ -3,7 +3,6 @@ export {
|
||||
imageInfoOfBase64,
|
||||
bufferFromBase64,
|
||||
base64Encoded,
|
||||
base64ToPngFormat,
|
||||
} from './info';
|
||||
export {
|
||||
trimImage,
|
||||
@ -12,6 +11,7 @@ export {
|
||||
transformImgPathToBase64,
|
||||
zoomForGPT4o,
|
||||
saveBase64Image,
|
||||
paddingToMatchBlock,
|
||||
} from './transform';
|
||||
export { processImageElementInfo, compositeElementInfoImg } from './box-select';
|
||||
export { drawBoxOnImage, savePositionImg } from './draw-box';
|
||||
|
||||
@ -88,7 +88,3 @@ export function base64Encoded(image: string, withHeader = true) {
|
||||
}
|
||||
throw new Error('unsupported image type');
|
||||
}
|
||||
|
||||
export function base64ToPngFormat(base64: string) {
|
||||
return `data:image/png;base64,${base64}`;
|
||||
}
|
||||
|
||||
@ -82,6 +82,15 @@ export async function resizeImg(
|
||||
return resizedBuffer;
|
||||
}
|
||||
|
||||
export async function bufferFromBase64(base64: string) {
|
||||
const splitFlag = ';base64,';
|
||||
const dataSplitted = base64.split(splitFlag);
|
||||
if (dataSplitted.length !== 2) {
|
||||
throw Error('Invalid base64 data');
|
||||
}
|
||||
return Buffer.from(dataSplitted[1], 'base64');
|
||||
}
|
||||
|
||||
export async function resizeImgBase64(
|
||||
inputBase64: string,
|
||||
newSize: {
|
||||
@ -182,91 +191,28 @@ export async function trimImage(image: string | Buffer): Promise<{
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Aligns an image's coordinate system based on trimming information
|
||||
*
|
||||
* This function takes an image and a center rectangle as input. It first extracts the center
|
||||
* rectangle from the image using Jimp and converts it to a buffer. Then, it calls
|
||||
* the trimImage function to obtain the trimming information of the buffer image. If there is no
|
||||
* trimming information, the original center rectangle is returned. If there is trimming information,
|
||||
* a new rectangle is created based on the trimming information, with its top-left corner
|
||||
* positioned at the negative offset of the trimming from the original center rectangle's top-left
|
||||
* corner, and its width and height set to the trimmed image's dimensions.
|
||||
*
|
||||
* @param image The image file path or buffer to be processed
|
||||
* @param center The center rectangle of the image, which is used to extract and align
|
||||
* @returns A Promise that resolves to a rectangle object representing the aligned coordinates
|
||||
* @throws Error if there is an error during image processing
|
||||
*/
|
||||
// export async function alignCoordByTrim(
|
||||
// image: string | Buffer,
|
||||
// centerRect: Rect,
|
||||
// ): Promise<Rect> {
|
||||
// const isBuffer = Buffer.isBuffer(image);
|
||||
// let jimpImage;
|
||||
// if (isBuffer) {
|
||||
// jimpImage = await Jimp.read(image);
|
||||
// } else {
|
||||
// jimpImage = await Jimp.read(image);
|
||||
// }
|
||||
export function prependBase64Header(base64: string, mimeType = 'image/png') {
|
||||
return `data:${mimeType};base64,${base64}`;
|
||||
}
|
||||
|
||||
// const { width, height } = jimpImage.bitmap;
|
||||
// if (width <= 3 || height <= 3) {
|
||||
// return centerRect;
|
||||
// }
|
||||
// const zeroSize: Rect = {
|
||||
// left: 0,
|
||||
// top: 0,
|
||||
// width: -1,
|
||||
// height: -1,
|
||||
// };
|
||||
// const finalCenterRect: Rect = { ...centerRect };
|
||||
// if (centerRect.left > width || centerRect.top > height) {
|
||||
// return zeroSize;
|
||||
// }
|
||||
export async function paddingToMatchBlock(imageBase64: string, blockSize = 28) {
|
||||
const Jimp = await getJimp();
|
||||
const imageBuffer = await bufferFromBase64(imageBase64);
|
||||
const image = await Jimp.read(imageBuffer);
|
||||
const { width, height } = image.bitmap;
|
||||
|
||||
// if (finalCenterRect.left < 0) {
|
||||
// finalCenterRect.width += finalCenterRect.left;
|
||||
// finalCenterRect.left = 0;
|
||||
// }
|
||||
const targetWidth = Math.ceil(width / blockSize) * blockSize;
|
||||
const targetHeight = Math.ceil(height / blockSize) * blockSize;
|
||||
|
||||
// if (finalCenterRect.top < 0) {
|
||||
// finalCenterRect.height += finalCenterRect.top;
|
||||
// finalCenterRect.top = 0;
|
||||
// }
|
||||
if (targetWidth === width && targetHeight === height) {
|
||||
return imageBase64;
|
||||
}
|
||||
|
||||
// if (finalCenterRect.left + finalCenterRect.width > width) {
|
||||
// finalCenterRect.width = width - finalCenterRect.left;
|
||||
// }
|
||||
// if (finalCenterRect.top + finalCenterRect.height > height) {
|
||||
// finalCenterRect.height = height - finalCenterRect.top;
|
||||
// }
|
||||
const paddedImage = new Jimp(targetWidth, targetHeight, 0xffffffff);
|
||||
|
||||
// if (finalCenterRect.width <= 3 || finalCenterRect.height <= 3) {
|
||||
// return finalCenterRect;
|
||||
// }
|
||||
// Composite the original image onto the new canvas
|
||||
paddedImage.composite(image, 0, 0);
|
||||
|
||||
// try {
|
||||
// const croppedImage = jimpImage.crop(
|
||||
// centerRect.left,
|
||||
// centerRect.top,
|
||||
// centerRect.width,
|
||||
// centerRect.height,
|
||||
// );
|
||||
// const buffer = await croppedImage.getBufferAsync(Jimp.MIME_PNG);
|
||||
// const trimInfo = await trimImage(buffer);
|
||||
// if (!trimInfo) {
|
||||
// return centerRect;
|
||||
// }
|
||||
|
||||
// return {
|
||||
// left: centerRect.left - trimInfo.trimOffsetLeft,
|
||||
// top: centerRect.top - trimInfo.trimOffsetTop,
|
||||
// width: trimInfo.width,
|
||||
// height: trimInfo.height,
|
||||
// };
|
||||
// } catch (e) {
|
||||
// console.log(jimpImage.bitmap);
|
||||
// throw e;
|
||||
// }
|
||||
// }
|
||||
const base64 = await paddedImage.getBase64Async(Jimp.MIME_JPEG);
|
||||
return base64;
|
||||
}
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
||||
|
||||
exports[`utils > should be able to describe tree 1`] = `
|
||||
"<container id="1" markerId="19" left="0" top="0" width="100" height="100">
|
||||
Legend had it that the Whispering Woods held an ancient secret, one that connected the world of man and magic, of reality and dream. Each leaf, every ...
|
||||
<text id="2" markerId="999" ariaLabel="image description, it could be a long text, very loooooooooooooooooooooooooooooooooooooooooong" left="0" top="0" width="100" height="100">
|
||||
world
|
||||
</text>
|
||||
<img id="3" markerId="20" ariaLabel="image description" storyContent="Legend had it that the Whispering Woods held an ancient secret, one that connected the world of man and magic, of reality and dream. Each leaf, every ..." left="0" top="0" width="100" height="100">
|
||||
world 2345
|
||||
<img id="3" markerId="20" ariaLabel="image description" storyContent="Legend had it that the Whispering Woods held an ancient secret, one that connected the world of man and magic, of reality and dream. Each leaf, every ..." left="0" top="0" width="100" height="100">
|
||||
</img>
|
||||
<>
|
||||
<img id="3222" markerId="20" ariaLabel="image description" storyContent="Legend had it that the Whispering Woods held an ancient secret, one that connected the world of man and magic, of reality and dream. Each leaf, every ..." left="0" top="0" width="100" height="100">
|
||||
world 2345
|
||||
</img>
|
||||
</>
|
||||
</img>
|
||||
</container>"
|
||||
`;
|
||||
|
||||
exports[`utils > should be able to describe tree, filterNonTextContent = true 1`] = `
|
||||
"<container id="1" markerId="19" left="0" top="0" width="100" height="100">
|
||||
Legend had it that t...
|
||||
<text id="2" markerId="999" ariaLabel="image description, i..." left="0" top="0" width="100" height="100">
|
||||
world
|
||||
</text>
|
||||
<img id="3" markerId="20" ariaLabel="image description" storyContent="Legend had it that t..." left="0" top="0" width="100" height="100">
|
||||
world 2345
|
||||
<img id="3222" markerId="20" ariaLabel="image description" storyContent="Legend had it that t..." left="0" top="0" width="100" height="100">
|
||||
world 2345
|
||||
</img>
|
||||
</img>
|
||||
</container>"
|
||||
`;
|
||||
@ -15274,6 +15274,7 @@ exports[`image utils > imageInfo 1`] = `
|
||||
"getPixelColour": [Function],
|
||||
"setPixelColour": [Function],
|
||||
"writeAsync": [Function],
|
||||
Symbol(shapeMode): false,
|
||||
Symbol(kCapture): false,
|
||||
},
|
||||
"width": 68,
|
||||
@ -15287,3 +15288,7 @@ exports[`image utils > jimp + imageInfo 2`] = `905`;
|
||||
exports[`image utils > jpeg + base64 + imageInfo 1`] = `400`;
|
||||
|
||||
exports[`image utils > jpeg + base64 + imageInfo 2`] = `905`;
|
||||
|
||||
exports[`image utils > paddingToMatchBlock 1`] = `420`;
|
||||
|
||||
exports[`image utils > paddingToMatchBlock 2`] = `924`;
|
||||
|
||||
@ -1,4 +1,6 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
base64Encoded,
|
||||
imageInfo,
|
||||
@ -7,6 +9,7 @@ import {
|
||||
resizeImgBase64,
|
||||
} from '@/img';
|
||||
import getJimp from '@/img/get-jimp';
|
||||
import { paddingToMatchBlock, saveBase64Image } from 'src/img/transform';
|
||||
import { getFixture } from 'tests/utils';
|
||||
import { describe, expect, it } from 'vitest';
|
||||
|
||||
@ -70,6 +73,23 @@ describe('image utils', () => {
|
||||
expect(buffer).toBeDefined();
|
||||
});
|
||||
|
||||
it('paddingToMatchBlock', async () => {
|
||||
const image = getFixture('heytea.jpeg');
|
||||
const base64 = base64Encoded(image);
|
||||
const paddedBase64 = await paddingToMatchBlock(base64);
|
||||
|
||||
const resultInfo = await imageInfoOfBase64(paddedBase64);
|
||||
expect(resultInfo.width).toMatchSnapshot();
|
||||
expect(resultInfo.height).toMatchSnapshot();
|
||||
|
||||
const tmpFile = join(tmpdir(), 'heytea-padded.jpeg');
|
||||
await saveBase64Image({
|
||||
base64Data: paddedBase64,
|
||||
outputPath: tmpFile,
|
||||
});
|
||||
console.log('tmpFile', tmpFile);
|
||||
});
|
||||
|
||||
// it(
|
||||
// 'profile',
|
||||
// async () => {
|
||||
@ -89,69 +109,3 @@ describe('image utils', () => {
|
||||
// 10 * 1000,
|
||||
// );
|
||||
});
|
||||
|
||||
// it('align a sub-image', async () => {
|
||||
// const file = getFixture('long-text.png');
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: 140,
|
||||
// top: 50,
|
||||
// width: 200,
|
||||
// height: 80,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||
// it('align a tiny sub-image', async () => {
|
||||
// const file = getFixture('2x2.jpeg');
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: 140,
|
||||
// top: 50,
|
||||
// width: 200,
|
||||
// height: 80,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||
// it('align a table style sub-image', async () => {
|
||||
// const file = getFixture('table.png');
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: 140,
|
||||
// top: 50,
|
||||
// width: 200,
|
||||
// height: 80,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||
// it('illegal center rect, refuse to align', async () => {
|
||||
// const file = getFixture('long-text.png'); // 2862x250
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: 3000,
|
||||
// top: 3000,
|
||||
// width: 200,
|
||||
// height: 200,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||
// it('align a sub-image with negative coord', async () => {
|
||||
// const file = getFixture('long-text.png'); // 2862x250
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: -100,
|
||||
// top: -100,
|
||||
// width: 200,
|
||||
// height: 200,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||
// it('align an oversized sub-image', async () => {
|
||||
// const file = getFixture('long-text.png'); // 2862x250
|
||||
// const rect = await alignCoordByTrim(file, {
|
||||
// left: 2860,
|
||||
// top: 200,
|
||||
// width: 200,
|
||||
// height: 200,
|
||||
// });
|
||||
// expect(rect).toMatchSnapshot();
|
||||
// });
|
||||
|
||||