ragflow/deepdoc/vision/recognizer.py

#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

import os
from copy import deepcopy

import onnxruntime as ort
from huggingface_hub import snapshot_download

from api.utils.file_utils import get_project_base_directory
from .operators import *
from rag.settings import cron_logger


class Recognizer(object):
    def __init__(self, label_list, task_name, model_dir=None):
        """
        If you have trouble downloading HuggingFace models, -_^ this might help!!

        For Linux:
        export HF_ENDPOINT=https://hf-mirror.com

        For Windows:
        Good luck
        ^_-

        """
        if not model_dir:
            model_dir = os.path.join(
                        get_project_base_directory(),
                        "rag/res/deepdoc")
            model_file_path = os.path.join(model_dir, task_name + ".onnx")
            if not os.path.exists(model_file_path):
                model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",
                                              local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
                                              local_dir_use_symlinks=False)
                model_file_path = os.path.join(model_dir, task_name + ".onnx")
        else:
            model_file_path = os.path.join(model_dir, task_name + ".onnx")

        if not os.path.exists(model_file_path):
            raise ValueError("not find model file path {}".format(
                model_file_path))
        if False and ort.get_device() == "GPU":
            options = ort.SessionOptions()
            options.enable_cpu_mem_arena = False
            self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
        else:
            self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
        self.input_names = [node.name for node in self.ort_sess.get_inputs()]
        self.output_names = [node.name for node in self.ort_sess.get_outputs()]
        self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
        self.label_list = label_list

    @staticmethod
    def sort_Y_firstly(arr, threashold):
        # sort using y1 first and then x1
        arr = sorted(arr, key=lambda r: (r["top"], r["x0"]))
        for i in range(len(arr) - 1):
            for j in range(i, -1, -1):
                # restore the order using th
                if abs(arr[j + 1]["top"] - arr[j]["top"]) < threashold \
                        and arr[j + 1]["x0"] < arr[j]["x0"]:
                    tmp = deepcopy(arr[j])
                    arr[j] = deepcopy(arr[j + 1])
                    arr[j + 1] = deepcopy(tmp)
        return arr

    @staticmethod
    def sort_X_firstly(arr, threashold, copy=True):
        # sort using y1 first and then x1
        arr = sorted(arr, key=lambda r: (r["x0"], r["top"]))
        for i in range(len(arr) - 1):
            for j in range(i, -1, -1):
                # restore the order using th
                if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \
                        and arr[j + 1]["top"] < arr[j]["top"]:
                    tmp = deepcopy(arr[j]) if copy else arr[j]
                    arr[j] = deepcopy(arr[j + 1]) if copy else arr[j + 1]
                    arr[j + 1] = deepcopy(tmp) if copy else tmp
        return arr

    @staticmethod
    def sort_C_firstly(arr, thr=0):
        # sort using y1 first and then x1
        # sorted(arr, key=lambda r: (r["x0"], r["top"]))
        arr = Recognizer.sort_X_firstly(arr, thr)
        for i in range(len(arr) - 1):
            for j in range(i, -1, -1):
                # restore the order using th
                if "C" not in arr[j] or "C" not in arr[j + 1]:
                    continue
                if arr[j + 1]["C"] < arr[j]["C"] \
                        or (
                        arr[j + 1]["C"] == arr[j]["C"]
                        and arr[j + 1]["top"] < arr[j]["top"]
                ):
                    tmp = arr[j]
                    arr[j] = arr[j + 1]
                    arr[j + 1] = tmp
        return arr

        return sorted(arr, key=lambda r: (r.get("C", r["x0"]), r["top"]))

    @staticmethod
    def sort_R_firstly(arr, thr=0):
        # sort using y1 first and then x1
        # sorted(arr, key=lambda r: (r["top"], r["x0"]))
        arr = Recognizer.sort_Y_firstly(arr, thr)
        for i in range(len(arr) - 1):
            for j in range(i, -1, -1):
                if "R" not in arr[j] or "R" not in arr[j + 1]:
                    continue
                if arr[j + 1]["R"] < arr[j]["R"] \
                        or (
                        arr[j + 1]["R"] == arr[j]["R"]
                        and arr[j + 1]["x0"] < arr[j]["x0"]
                ):
                    tmp = arr[j]
                    arr[j] = arr[j + 1]
                    arr[j + 1] = tmp
        return arr

    @staticmethod
    def overlapped_area(a, b, ratio=True):
        tp, btm, x0, x1 = a["top"], a["bottom"], a["x0"], a["x1"]
        if b["x0"] > x1 or b["x1"] < x0:
            return 0
        if b["bottom"] < tp or b["top"] > btm:
            return 0
        x0_ = max(b["x0"], x0)
        x1_ = min(b["x1"], x1)
        assert x0_ <= x1_, "Fuckedup! T:{},B:{},X0:{},X1:{} ==> {}".format(
            tp, btm, x0, x1, b)
        tp_ = max(b["top"], tp)
        btm_ = min(b["bottom"], btm)
        assert tp_ <= btm_, "Fuckedup! T:{},B:{},X0:{},X1:{} => {}".format(
            tp, btm, x0, x1, b)
        ov = (btm_ - tp_) * (x1_ - x0_) if x1 - \
                                           x0 != 0 and btm - tp != 0 else 0
        if ov > 0 and ratio:
            ov /= (x1 - x0) * (btm - tp)
        return ov

    @staticmethod
    def layouts_cleanup(boxes, layouts, far=2, thr=0.7):
        def notOverlapped(a, b):
            return any([a["x1"] < b["x0"],
                        a["x0"] > b["x1"],
                        a["bottom"] < b["top"],
                        a["top"] > b["bottom"]])

        i = 0
        while i + 1 < len(layouts):
            j = i + 1
            while j < min(i + far, len(layouts)) \
                    and (layouts[i].get("type", "") != layouts[j].get("type", "")
                         or notOverlapped(layouts[i], layouts[j])):
                j += 1
            if j >= min(i + far, len(layouts)):
                i += 1
                continue
            if Recognizer.overlapped_area(layouts[i], layouts[j]) < thr \
                    and Recognizer.overlapped_area(layouts[j], layouts[i]) < thr:
                i += 1
                continue

            if layouts[i].get("score") and layouts[j].get("score"):
                if layouts[i]["score"] > layouts[j]["score"]:
                    layouts.pop(j)
                else:
                    layouts.pop(i)
                continue

            area_i, area_i_1 = 0, 0
            for b in boxes:
                if not notOverlapped(b, layouts[i]):
                    area_i += Recognizer.overlapped_area(b, layouts[i], False)
                if not notOverlapped(b, layouts[j]):
                    area_i_1 += Recognizer.overlapped_area(b, layouts[j], False)

            if area_i > area_i_1:
                layouts.pop(j)
            else:
                layouts.pop(i)

        return layouts

    def create_inputs(self, imgs, im_info):
        """generate input for different model type
        Args:
            imgs (list(numpy)): list of images (np.ndarray)
            im_info (list(dict)): list of image info
        Returns:
            inputs (dict): input of model
        """
        inputs = {}

        im_shape = []
        scale_factor = []
        if len(imgs) == 1:
            inputs['image'] = np.array((imgs[0],)).astype('float32')
            inputs['im_shape'] = np.array(
                (im_info[0]['im_shape'],)).astype('float32')
            inputs['scale_factor'] = np.array(
                (im_info[0]['scale_factor'],)).astype('float32')
            return inputs

        for e in im_info:
            im_shape.append(np.array((e['im_shape'],)).astype('float32'))
            scale_factor.append(np.array((e['scale_factor'],)).astype('float32'))

        inputs['im_shape'] = np.concatenate(im_shape, axis=0)
        inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)

        imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]
        max_shape_h = max([e[0] for e in imgs_shape])
        max_shape_w = max([e[1] for e in imgs_shape])
        padding_imgs = []
        for img in imgs:
            im_c, im_h, im_w = img.shape[:]
            padding_im = np.zeros(
                (im_c, max_shape_h, max_shape_w), dtype=np.float32)
            padding_im[:, :im_h, :im_w] = img
            padding_imgs.append(padding_im)
        inputs['image'] = np.stack(padding_imgs, axis=0)
        return inputs

    @staticmethod
    def find_overlapped(box, boxes_sorted_by_y, naive=False):
        if not boxes_sorted_by_y:
            return
        bxs = boxes_sorted_by_y
        s, e, ii = 0, len(bxs), 0
        while s < e and not naive:
            ii = (e + s) // 2
            pv = bxs[ii]
            if box["bottom"] < pv["top"]:
                e = ii
                continue
            if box["top"] > pv["bottom"]:
                s = ii + 1
                continue
            break
        while s < ii:
            if box["top"] > bxs[s]["bottom"]:
                s += 1
            break
        while e - 1 > ii:
            if box["bottom"] < bxs[e - 1]["top"]:
                e -= 1
            break

        max_overlaped_i, max_overlaped = None, 0
        for i in range(s, e):
            ov = Recognizer.overlapped_area(bxs[i], box)
            if ov <= max_overlaped:
                continue
            max_overlaped_i = i
            max_overlaped = ov

        return max_overlaped_i

    @staticmethod
    def find_horizontally_tightest_fit(box, boxes):
        if not boxes:
            return
        min_dis, min_i = 1000000, None
        for i,b in enumerate(boxes):
            if box.get("layoutno", "0") != b.get("layoutno", "0"): continue
            dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2)
            if dis < min_dis:
                min_i = i
                min_dis = dis
        return min_i

    @staticmethod
    def find_overlapped_with_threashold(box, boxes, thr=0.3):
        if not boxes:
            return
        max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0
        s, e = 0, len(boxes)
        for i in range(s, e):
            ov = Recognizer.overlapped_area(box, boxes[i])
            _ov = Recognizer.overlapped_area(boxes[i], box)
            if (ov, _ov) < (max_overlapped, _max_overlapped):
                continue
            max_overlapped_i = i
            max_overlapped = ov
            _max_overlapped = _ov

        return max_overlapped_i

    def preprocess(self, image_list):
        inputs = []
        if "scale_factor" in self.input_names:
            preprocess_ops = []
            for op_info in [
                {'interp': 2, 'keep_ratio': False, 'target_size': [800, 608], 'type': 'LinearResize'},
                {'is_scale': True, 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'type': 'StandardizeImage'},
                {'type': 'Permute'},
                {'stride': 32, 'type': 'PadStride'}
            ]:
                new_op_info = op_info.copy()
                op_type = new_op_info.pop('type')
                preprocess_ops.append(eval(op_type)(**new_op_info))

            for im_path in image_list:
                im, im_info = preprocess(im_path, preprocess_ops)
                inputs.append({"image": np.array((im,)).astype('float32'),
                               "scale_factor": np.array((im_info["scale_factor"],)).astype('float32')})
        else:
            hh, ww = self.input_shape
            for img in image_list:
                h, w = img.shape[:2]
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(np.array(img).astype('float32'), (ww, hh))
                # Scale input pixel values to 0 to 1
                img /= 255.0
                img = img.transpose(2, 0, 1)
                img = img[np.newaxis, :, :, :].astype(np.float32)
                inputs.append({self.input_names[0]: img, "scale_factor": [w/ww, h/hh]})
        return inputs

    def postprocess(self, boxes, inputs, thr):
        if "scale_factor" in self.input_names:
            bb = []
            for b in boxes:
                clsid, bbox, score = int(b[0]), b[2:], b[1]
                if score < thr:
                    continue
                if clsid >= len(self.label_list):
                    cron_logger.warning(f"bad category id")
                    continue
                bb.append({
                    "type": self.label_list[clsid].lower(),
                    "bbox": [float(t) for t in bbox.tolist()],
                    "score": float(score)
                })
            return bb

        def xywh2xyxy(x):
            # [x, y, w, h] to [x1, y1, x2, y2]
            y = np.copy(x)
            y[:, 0] = x[:, 0] - x[:, 2] / 2
            y[:, 1] = x[:, 1] - x[:, 3] / 2
            y[:, 2] = x[:, 0] + x[:, 2] / 2
            y[:, 3] = x[:, 1] + x[:, 3] / 2
            return y

        def compute_iou(box, boxes):
            # Compute xmin, ymin, xmax, ymax for both boxes
            xmin = np.maximum(box[0], boxes[:, 0])
            ymin = np.maximum(box[1], boxes[:, 1])
            xmax = np.minimum(box[2], boxes[:, 2])
            ymax = np.minimum(box[3], boxes[:, 3])

            # Compute intersection area
            intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

            # Compute union area
            box_area = (box[2] - box[0]) * (box[3] - box[1])
            boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
            union_area = box_area + boxes_area - intersection_area

            # Compute IoU
            iou = intersection_area / union_area

            return iou

        def iou_filter(boxes, scores, iou_threshold):
            sorted_indices = np.argsort(scores)[::-1]

            keep_boxes = []
            while sorted_indices.size > 0:
                # Pick the last box
                box_id = sorted_indices[0]
                keep_boxes.append(box_id)

                # Compute IoU of the picked box with the rest
                ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

                # Remove boxes with IoU over the threshold
                keep_indices = np.where(ious < iou_threshold)[0]

                # print(keep_indices.shape, sorted_indices.shape)
                sorted_indices = sorted_indices[keep_indices + 1]

            return keep_boxes

        boxes = np.squeeze(boxes).T
        # Filter out object confidence scores below threshold
        scores = np.max(boxes[:, 4:], axis=1)
        boxes = boxes[scores > thr, :]
        scores = scores[scores > thr]
        if len(boxes) == 0: return []

        # Get the class with the highest confidence
        class_ids = np.argmax(boxes[:, 4:], axis=1)
        boxes = boxes[:, :4]
        input_shape = np.array([inputs["scale_factor"][0], inputs["scale_factor"][1], inputs["scale_factor"][0], inputs["scale_factor"][1]])
        boxes = np.multiply(boxes, input_shape, dtype=np.float32)
        boxes = xywh2xyxy(boxes)

        unique_class_ids = np.unique(class_ids)
        indices = []
        for class_id in unique_class_ids:
            class_indices = np.where(class_ids == class_id)[0]
            class_boxes = boxes[class_indices, :]
            class_scores = scores[class_indices]
            class_keep_boxes = iou_filter(class_boxes, class_scores, 0.2)
            indices.extend(class_indices[class_keep_boxes])

        return [{
            "type": self.label_list[class_ids[i]].lower(),
            "bbox": [float(t) for t in boxes[i].tolist()],
            "score": float(scores[i])
        } for i in indices]

    def __call__(self, image_list, thr=0.7, batch_size=16):
        res = []
        imgs = []
        for i in range(len(image_list)):
            if not isinstance(image_list[i], np.ndarray):
                imgs.append(np.array(image_list[i]))
            else: imgs.append(image_list[i])

        batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size)
        for i in range(batch_loop_cnt):
            start_index = i * batch_size
            end_index = min((i + 1) * batch_size, len(imgs))
            batch_image_list = imgs[start_index:end_index]
            inputs = self.preprocess(batch_image_list)
            print("preprocess")
            for ins in inputs:
                bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
                res.append(bb)

        #seeit.save_results(image_list, res, self.label_list, threshold=thr)

        return res
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`

			`import os`
			`from copy import deepcopy`

			`import onnxruntime as ort`
			`from huggingface_hub import snapshot_download`

support snapshot download from local (#153) * support snapshot download from local * let snapshot download from local 2024-03-27 09:53:42 +08:00			`from api.utils.file_utils import get_project_base_directory`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`from .operators import *`
			`from rag.settings import cron_logger`


			`class Recognizer(object):`
			`def __init__(self, label_list, task_name, model_dir=None):`
			`"""`
			`If you have trouble downloading HuggingFace models, -_^ this might help!!`

			`For Linux:`
			`export HF_ENDPOINT=https://hf-mirror.com`

			`For Windows:`
			`Good luck`
			`^_-`

			`"""`
			`if not model_dir:`
let's load model from local (#163) 2024-03-28 16:10:47 +08:00			`model_dir = os.path.join(`
support snapshot download from local (#153) * support snapshot download from local * let snapshot download from local 2024-03-27 09:53:42 +08:00			`get_project_base_directory(),`
let's load model from local (#163) 2024-03-28 16:10:47 +08:00			`"rag/res/deepdoc")`
			`model_file_path = os.path.join(model_dir, task_name + ".onnx")`
			`if not os.path.exists(model_file_path):`
make sure the models will not be load twice (#422) ### What problem does this PR solve? #381 ### Type of change - [x] Refactoring 2024-04-18 09:37:23 +08:00			`model_dir = snapshot_download(repo_id="InfiniFlow/deepdoc",`
			`local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),`
			`local_dir_use_symlinks=False)`
let's load model from local (#163) 2024-03-28 16:10:47 +08:00			`model_file_path = os.path.join(model_dir, task_name + ".onnx")`
add base url for OpenAI (#166) 2024-03-28 19:15:16 +08:00			`else:`
			`model_file_path = os.path.join(model_dir, task_name + ".onnx")`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00
			`if not os.path.exists(model_file_path):`
			`raise ValueError("not find model file path {}".format(`
			`model_file_path))`
refine manul parser (#131) 2024-03-19 12:26:04 +08:00			`if False and ort.get_device() == "GPU":`
add dockerfile for cuda envirement. Refine table search strategy, (#123) 2024-03-14 19:45:29 +08:00			`options = ort.SessionOptions()`
			`options.enable_cpu_mem_arena = False`
			`self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`else:`
			`self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])`
add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00			`self.input_names = [node.name for node in self.ort_sess.get_inputs()]`
			`self.output_names = [node.name for node in self.ort_sess.get_outputs()]`
			`self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`self.label_list = label_list`

			`@staticmethod`
			`def sort_Y_firstly(arr, threashold):`
			`# sort using y1 first and then x1`
			`arr = sorted(arr, key=lambda r: (r["top"], r["x0"]))`
			`for i in range(len(arr) - 1):`
			`for j in range(i, -1, -1):`
			`# restore the order using th`
			`if abs(arr[j + 1]["top"] - arr[j]["top"]) < threashold \`
			`and arr[j + 1]["x0"] < arr[j]["x0"]:`
			`tmp = deepcopy(arr[j])`
			`arr[j] = deepcopy(arr[j + 1])`
			`arr[j + 1] = deepcopy(tmp)`
			`return arr`

			`@staticmethod`
			`def sort_X_firstly(arr, threashold, copy=True):`
			`# sort using y1 first and then x1`
			`arr = sorted(arr, key=lambda r: (r["x0"], r["top"]))`
			`for i in range(len(arr) - 1):`
			`for j in range(i, -1, -1):`
			`# restore the order using th`
			`if abs(arr[j + 1]["x0"] - arr[j]["x0"]) < threashold \`
			`and arr[j + 1]["top"] < arr[j]["top"]:`
			`tmp = deepcopy(arr[j]) if copy else arr[j]`
			`arr[j] = deepcopy(arr[j + 1]) if copy else arr[j + 1]`
			`arr[j + 1] = deepcopy(tmp) if copy else tmp`
			`return arr`

			`@staticmethod`
			`def sort_C_firstly(arr, thr=0):`
			`# sort using y1 first and then x1`
			`# sorted(arr, key=lambda r: (r["x0"], r["top"]))`
			`arr = Recognizer.sort_X_firstly(arr, thr)`
			`for i in range(len(arr) - 1):`
			`for j in range(i, -1, -1):`
			`# restore the order using th`
			`if "C" not in arr[j] or "C" not in arr[j + 1]:`
			`continue`
			`if arr[j + 1]["C"] < arr[j]["C"] \`
			`or (`
			`arr[j + 1]["C"] == arr[j]["C"]`
			`and arr[j + 1]["top"] < arr[j]["top"]`
			`):`
			`tmp = arr[j]`
			`arr[j] = arr[j + 1]`
			`arr[j + 1] = tmp`
			`return arr`

			`return sorted(arr, key=lambda r: (r.get("C", r["x0"]), r["top"]))`

			`@staticmethod`
			`def sort_R_firstly(arr, thr=0):`
			`# sort using y1 first and then x1`
			`# sorted(arr, key=lambda r: (r["top"], r["x0"]))`
			`arr = Recognizer.sort_Y_firstly(arr, thr)`
			`for i in range(len(arr) - 1):`
			`for j in range(i, -1, -1):`
			`if "R" not in arr[j] or "R" not in arr[j + 1]:`
			`continue`
			`if arr[j + 1]["R"] < arr[j]["R"] \`
			`or (`
			`arr[j + 1]["R"] == arr[j]["R"]`
			`and arr[j + 1]["x0"] < arr[j]["x0"]`
			`):`
			`tmp = arr[j]`
			`arr[j] = arr[j + 1]`
			`arr[j + 1] = tmp`
			`return arr`

			`@staticmethod`
			`def overlapped_area(a, b, ratio=True):`
			`tp, btm, x0, x1 = a["top"], a["bottom"], a["x0"], a["x1"]`
			`if b["x0"] > x1 or b["x1"] < x0:`
			`return 0`
			`if b["bottom"] < tp or b["top"] > btm:`
			`return 0`
			`x0_ = max(b["x0"], x0)`
			`x1_ = min(b["x1"], x1)`
			`assert x0_ <= x1_, "Fuckedup! T:{},B:{},X0:{},X1:{} ==> {}".format(`
			`tp, btm, x0, x1, b)`
			`tp_ = max(b["top"], tp)`
			`btm_ = min(b["bottom"], btm)`
			`assert tp_ <= btm_, "Fuckedup! T:{},B:{},X0:{},X1:{} => {}".format(`
			`tp, btm, x0, x1, b)`
			`ov = (btm_ - tp_) * (x1_ - x0_) if x1 - \`
			`x0 != 0 and btm - tp != 0 else 0`
			`if ov > 0 and ratio:`
			`ov /= (x1 - x0) * (btm - tp)`
			`return ov`

			`@staticmethod`
			`def layouts_cleanup(boxes, layouts, far=2, thr=0.7):`
			`def notOverlapped(a, b):`
			`return any([a["x1"] < b["x0"],`
			`a["x0"] > b["x1"],`
			`a["bottom"] < b["top"],`
			`a["top"] > b["bottom"]])`

			`i = 0`
			`while i + 1 < len(layouts):`
			`j = i + 1`
			`while j < min(i + far, len(layouts)) \`
			`and (layouts[i].get("type", "") != layouts[j].get("type", "")`
			`or notOverlapped(layouts[i], layouts[j])):`
			`j += 1`
			`if j >= min(i + far, len(layouts)):`
			`i += 1`
			`continue`
			`if Recognizer.overlapped_area(layouts[i], layouts[j]) < thr \`
			`and Recognizer.overlapped_area(layouts[j], layouts[i]) < thr:`
			`i += 1`
			`continue`

			`if layouts[i].get("score") and layouts[j].get("score"):`
			`if layouts[i]["score"] > layouts[j]["score"]:`
			`layouts.pop(j)`
			`else:`
			`layouts.pop(i)`
			`continue`

			`area_i, area_i_1 = 0, 0`
			`for b in boxes:`
			`if not notOverlapped(b, layouts[i]):`
			`area_i += Recognizer.overlapped_area(b, layouts[i], False)`
			`if not notOverlapped(b, layouts[j]):`
			`area_i_1 += Recognizer.overlapped_area(b, layouts[j], False)`

			`if area_i > area_i_1:`
			`layouts.pop(j)`
			`else:`
			`layouts.pop(i)`

			`return layouts`

			`def create_inputs(self, imgs, im_info):`
			`"""generate input for different model type`
			`Args:`
			`imgs (list(numpy)): list of images (np.ndarray)`
			`im_info (list(dict)): list of image info`
			`Returns:`
			`inputs (dict): input of model`
			`"""`
			`inputs = {}`

			`im_shape = []`
			`scale_factor = []`
			`if len(imgs) == 1:`
			`inputs['image'] = np.array((imgs[0],)).astype('float32')`
			`inputs['im_shape'] = np.array(`
			`(im_info[0]['im_shape'],)).astype('float32')`
			`inputs['scale_factor'] = np.array(`
			`(im_info[0]['scale_factor'],)).astype('float32')`
			`return inputs`

			`for e in im_info:`
			`im_shape.append(np.array((e['im_shape'],)).astype('float32'))`
			`scale_factor.append(np.array((e['scale_factor'],)).astype('float32'))`

			`inputs['im_shape'] = np.concatenate(im_shape, axis=0)`
			`inputs['scale_factor'] = np.concatenate(scale_factor, axis=0)`

			`imgs_shape = [[e.shape[1], e.shape[2]] for e in imgs]`
			`max_shape_h = max([e[0] for e in imgs_shape])`
			`max_shape_w = max([e[1] for e in imgs_shape])`
			`padding_imgs = []`
			`for img in imgs:`
			`im_c, im_h, im_w = img.shape[:]`
			`padding_im = np.zeros(`
			`(im_c, max_shape_h, max_shape_w), dtype=np.float32)`
			`padding_im[:, :im_h, :im_w] = img`
			`padding_imgs.append(padding_im)`
			`inputs['image'] = np.stack(padding_imgs, axis=0)`
			`return inputs`

			`@staticmethod`
			`def find_overlapped(box, boxes_sorted_by_y, naive=False):`
			`if not boxes_sorted_by_y:`
			`return`
			`bxs = boxes_sorted_by_y`
			`s, e, ii = 0, len(bxs), 0`
			`while s < e and not naive:`
			`ii = (e + s) // 2`
			`pv = bxs[ii]`
			`if box["bottom"] < pv["top"]:`
			`e = ii`
			`continue`
			`if box["top"] > pv["bottom"]:`
			`s = ii + 1`
			`continue`
			`break`
			`while s < ii:`
			`if box["top"] > bxs[s]["bottom"]:`
			`s += 1`
			`break`
			`while e - 1 > ii:`
			`if box["bottom"] < bxs[e - 1]["top"]:`
			`e -= 1`
			`break`

			`max_overlaped_i, max_overlaped = None, 0`
			`for i in range(s, e):`
			`ov = Recognizer.overlapped_area(bxs[i], box)`
			`if ov <= max_overlaped:`
			`continue`
			`max_overlaped_i = i`
			`max_overlaped = ov`

			`return max_overlaped_i`

refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`@staticmethod`
			`def find_horizontally_tightest_fit(box, boxes):`
			`if not boxes:`
			`return`
			`min_dis, min_i = 1000000, None`
			`for i,b in enumerate(boxes):`
fix table desc bugs, add positions to chunks (#91) 2024-03-04 14:42:26 +08:00			`if box.get("layoutno", "0") != b.get("layoutno", "0"): continue`
refine admin initialization (#75) 2024-02-27 14:57:34 +08:00			`dis = min(abs(box["x0"] - b["x0"]), abs(box["x1"] - b["x1"]), abs(box["x0"]+box["x1"] - b["x1"] - b["x0"])/2)`
			`if dis < min_dis:`
			`min_i = i`
			`min_dis = dis`
			`return min_i`

rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`@staticmethod`
			`def find_overlapped_with_threashold(box, boxes, thr=0.3):`
			`if not boxes:`
			`return`
refine pdf parser, add time zone to userinfo (#112) 2024-03-08 11:24:24 +08:00			`max_overlapped_i, max_overlapped, _max_overlapped = None, thr, 0`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`s, e = 0, len(boxes)`
			`for i in range(s, e):`
			`ov = Recognizer.overlapped_area(box, boxes[i])`
			`_ov = Recognizer.overlapped_area(boxes[i], box)`
refine pdf parser, add time zone to userinfo (#112) 2024-03-08 11:24:24 +08:00			`if (ov, _ov) < (max_overlapped, _max_overlapped):`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`continue`
refine pdf parser, add time zone to userinfo (#112) 2024-03-08 11:24:24 +08:00			`max_overlapped_i = i`
			`max_overlapped = ov`
			`_max_overlapped = _ov`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00
refine pdf parser, add time zone to userinfo (#112) 2024-03-08 11:24:24 +08:00			`return max_overlapped_i`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00
			`def preprocess(self, image_list):`
			`inputs = []`
add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00			`if "scale_factor" in self.input_names:`
			`preprocess_ops = []`
			`for op_info in [`
			`{'interp': 2, 'keep_ratio': False, 'target_size': [800, 608], 'type': 'LinearResize'},`
			`{'is_scale': True, 'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225], 'type': 'StandardizeImage'},`
			`{'type': 'Permute'},`
			`{'stride': 32, 'type': 'PadStride'}`
			`]:`
			`new_op_info = op_info.copy()`
			`op_type = new_op_info.pop('type')`
			`preprocess_ops.append(eval(op_type)(**new_op_info))`

			`for im_path in image_list:`
			`im, im_info = preprocess(im_path, preprocess_ops)`
			`inputs.append({"image": np.array((im,)).astype('float32'),`
			`"scale_factor": np.array((im_info["scale_factor"],)).astype('float32')})`
			`else:`
			`hh, ww = self.input_shape`
			`for img in image_list:`
			`h, w = img.shape[:2]`
			`img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)`
			`img = cv2.resize(np.array(img).astype('float32'), (ww, hh))`
			`# Scale input pixel values to 0 to 1`
			`img /= 255.0`
			`img = img.transpose(2, 0, 1)`
			`img = img[np.newaxis, :, :, :].astype(np.float32)`
			`inputs.append({self.input_names[0]: img, "scale_factor": [w/ww, h/hh]})`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`return inputs`

add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00			`def postprocess(self, boxes, inputs, thr):`
			`if "scale_factor" in self.input_names:`
			`bb = []`
			`for b in boxes:`
			`clsid, bbox, score = int(b[0]), b[2:], b[1]`
			`if score < thr:`
			`continue`
			`if clsid >= len(self.label_list):`
			`cron_logger.warning(f"bad category id")`
			`continue`
			`bb.append({`
			`"type": self.label_list[clsid].lower(),`
			`"bbox": [float(t) for t in bbox.tolist()],`
			`"score": float(score)`
			`})`
			`return bb`

			`def xywh2xyxy(x):`
			`# [x, y, w, h] to [x1, y1, x2, y2]`
			`y = np.copy(x)`
			`y[:, 0] = x[:, 0] - x[:, 2] / 2`
			`y[:, 1] = x[:, 1] - x[:, 3] / 2`
			`y[:, 2] = x[:, 0] + x[:, 2] / 2`
			`y[:, 3] = x[:, 1] + x[:, 3] / 2`
			`return y`

			`def compute_iou(box, boxes):`
			`# Compute xmin, ymin, xmax, ymax for both boxes`
			`xmin = np.maximum(box[0], boxes[:, 0])`
			`ymin = np.maximum(box[1], boxes[:, 1])`
			`xmax = np.minimum(box[2], boxes[:, 2])`
			`ymax = np.minimum(box[3], boxes[:, 3])`

			`# Compute intersection area`
			`intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)`

			`# Compute union area`
			`box_area = (box[2] - box[0]) * (box[3] - box[1])`
			`boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])`
			`union_area = box_area + boxes_area - intersection_area`

			`# Compute IoU`
			`iou = intersection_area / union_area`

			`return iou`

			`def iou_filter(boxes, scores, iou_threshold):`
			`sorted_indices = np.argsort(scores)[::-1]`

			`keep_boxes = []`
			`while sorted_indices.size > 0:`
			`# Pick the last box`
			`box_id = sorted_indices[0]`
			`keep_boxes.append(box_id)`

			`# Compute IoU of the picked box with the rest`
			`ious = compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])`

			`# Remove boxes with IoU over the threshold`
			`keep_indices = np.where(ious < iou_threshold)[0]`

			`# print(keep_indices.shape, sorted_indices.shape)`
			`sorted_indices = sorted_indices[keep_indices + 1]`

			`return keep_boxes`

			`boxes = np.squeeze(boxes).T`
			`# Filter out object confidence scores below threshold`
			`scores = np.max(boxes[:, 4:], axis=1)`
			`boxes = boxes[scores > thr, :]`
			`scores = scores[scores > thr]`
			`if len(boxes) == 0: return []`

			`# Get the class with the highest confidence`
			`class_ids = np.argmax(boxes[:, 4:], axis=1)`
			`boxes = boxes[:, :4]`
			`input_shape = np.array([inputs["scale_factor"][0], inputs["scale_factor"][1], inputs["scale_factor"][0], inputs["scale_factor"][1]])`
			`boxes = np.multiply(boxes, input_shape, dtype=np.float32)`
			`boxes = xywh2xyxy(boxes)`

			`unique_class_ids = np.unique(class_ids)`
			`indices = []`
			`for class_id in unique_class_ids:`
			`class_indices = np.where(class_ids == class_id)[0]`
			`class_boxes = boxes[class_indices, :]`
			`class_scores = scores[class_indices]`
			`class_keep_boxes = iou_filter(class_boxes, class_scores, 0.2)`
			`indices.extend(class_indices[class_keep_boxes])`

			`return [{`
			`"type": self.label_list[class_ids[i]].lower(),`
			`"bbox": [float(t) for t in boxes[i].tolist()],`
			`"score": float(scores[i])`
			`} for i in indices]`

rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`def __call__(self, image_list, thr=0.7, batch_size=16):`
			`res = []`
			`imgs = []`
			`for i in range(len(image_list)):`
			`if not isinstance(image_list[i], np.ndarray):`
			`imgs.append(np.array(image_list[i]))`
			`else: imgs.append(image_list[i])`

			`batch_loop_cnt = math.ceil(float(len(imgs)) / batch_size)`
			`for i in range(batch_loop_cnt):`
			`start_index = i * batch_size`
			`end_index = min((i + 1) * batch_size, len(imgs))`
			`batch_image_list = imgs[start_index:end_index]`
			`inputs = self.preprocess(batch_image_list)`
add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00			`print("preprocess")`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`for ins in inputs:`
add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00			`bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)`
rename vision, add layour and tsr recognizer (#70) * rename vision, add layour and tsr recognizer * trivial fixing 2024-02-22 19:11:37 +08:00			`res.append(bb)`

			`#seeit.save_results(image_list, res, self.label_list, threshold=thr)`

			`return res`
add ocr and recognizer demo, update README (#74) 2024-02-26 19:51:35 +08:00