mirror of
				https://github.com/infiniflow/ragflow.git
				synced 2025-10-30 17:29:40 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			188 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #  Licensed under the Apache License, Version 2.0 (the "License");
 | |
| #  you may not use this file except in compliance with the License.
 | |
| #  You may obtain a copy of the License at
 | |
| #
 | |
| #      http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| #  Unless required by applicable law or agreed to in writing, software
 | |
| #  distributed under the License is distributed on an "AS IS" BASIS,
 | |
| #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| #  See the License for the specific language governing permissions and
 | |
| #  limitations under the License.
 | |
| #
 | |
| import os, sys
 | |
| sys.path.insert(
 | |
|     0,
 | |
|     os.path.abspath(
 | |
|         os.path.join(
 | |
|             os.path.dirname(
 | |
|                 os.path.abspath(__file__)),
 | |
|             '../../')))
 | |
| 
 | |
| from deepdoc.vision.seeit import draw_box
 | |
| from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
 | |
| from api.utils.file_utils import get_project_base_directory
 | |
| import argparse
 | |
| import re
 | |
| import numpy as np
 | |
| 
 | |
| 
 | |
| def main(args):
 | |
|     images, outputs = init_in_out(args)
 | |
|     if args.mode.lower() == "layout":
 | |
|         labels = LayoutRecognizer.labels
 | |
|         detr = Recognizer(
 | |
|             labels,
 | |
|             "layout",
 | |
|             os.path.join(
 | |
|                 get_project_base_directory(),
 | |
|                 "rag/res/deepdoc/"))
 | |
|     if args.mode.lower() == "tsr":
 | |
|         labels = TableStructureRecognizer.labels
 | |
|         detr = TableStructureRecognizer()
 | |
|         ocr = OCR()
 | |
| 
 | |
|     layouts = detr(images, float(args.threshold))
 | |
|     for i, lyt in enumerate(layouts):
 | |
|         if args.mode.lower() == "tsr":
 | |
|             #lyt = [t for t in lyt if t["type"] == "table column"]
 | |
|             html = get_table_html(images[i], lyt, ocr)
 | |
|             with open(outputs[i] + ".html", "w+") as f:
 | |
|                 f.write(html)
 | |
|             lyt = [{
 | |
|                 "type": t["label"],
 | |
|                 "bbox": [t["x0"], t["top"], t["x1"], t["bottom"]],
 | |
|                 "score": t["score"]
 | |
|             } for t in lyt]
 | |
|         img = draw_box(images[i], lyt, labels, float(args.threshold))
 | |
|         img.save(outputs[i], quality=95)
 | |
|         print("save result to: " + outputs[i])
 | |
| 
 | |
| 
 | |
| def get_table_html(img, tb_cpns, ocr):
 | |
|     boxes = ocr(np.array(img))
 | |
|     boxes = Recognizer.sort_Y_firstly(
 | |
|         [{"x0": b[0][0], "x1": b[1][0],
 | |
|           "top": b[0][1], "text": t[0],
 | |
|           "bottom": b[-1][1],
 | |
|           "layout_type": "table",
 | |
|           "page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]],
 | |
|         np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3
 | |
|     )
 | |
| 
 | |
|     def gather(kwd, fzy=10, ption=0.6):
 | |
|         nonlocal boxes
 | |
|         eles = Recognizer.sort_Y_firstly(
 | |
|             [r for r in tb_cpns if re.match(kwd, r["label"])], fzy)
 | |
|         eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption)
 | |
|         return Recognizer.sort_Y_firstly(eles, 0)
 | |
| 
 | |
|     headers = gather(r".*header$")
 | |
|     rows = gather(r".* (row|header)")
 | |
|     spans = gather(r".*spanning")
 | |
|     clmns = sorted([r for r in tb_cpns if re.match(
 | |
|         r"table column$", r["label"])], key=lambda x: x["x0"])
 | |
|     clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5)
 | |
| 
 | |
|     for b in boxes:
 | |
|         ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3)
 | |
|         if ii is not None:
 | |
|             b["R"] = ii
 | |
|             b["R_top"] = rows[ii]["top"]
 | |
|             b["R_bott"] = rows[ii]["bottom"]
 | |
| 
 | |
|         ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3)
 | |
|         if ii is not None:
 | |
|             b["H_top"] = headers[ii]["top"]
 | |
|             b["H_bott"] = headers[ii]["bottom"]
 | |
|             b["H_left"] = headers[ii]["x0"]
 | |
|             b["H_right"] = headers[ii]["x1"]
 | |
|             b["H"] = ii
 | |
| 
 | |
|         ii = Recognizer.find_horizontally_tightest_fit(b, clmns)
 | |
|         if ii is not None:
 | |
|             b["C"] = ii
 | |
|             b["C_left"] = clmns[ii]["x0"]
 | |
|             b["C_right"] = clmns[ii]["x1"]
 | |
| 
 | |
|         ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3)
 | |
|         if ii is not None:
 | |
|             b["H_top"] = spans[ii]["top"]
 | |
|             b["H_bott"] = spans[ii]["bottom"]
 | |
|             b["H_left"] = spans[ii]["x0"]
 | |
|             b["H_right"] = spans[ii]["x1"]
 | |
|             b["SP"] = ii
 | |
| 
 | |
|     html = """
 | |
|     <html>
 | |
|     <head>
 | |
|     <style>
 | |
|     ._table_1nkzy_11 {
 | |
|       margin: auto;
 | |
|       width: 70%%;
 | |
|       padding: 10px;
 | |
|     }
 | |
|     ._table_1nkzy_11 p {
 | |
|       margin-bottom: 50px;
 | |
|       border: 1px solid #e1e1e1;
 | |
|     }
 | |
| 
 | |
|     caption {
 | |
|       color: #6ac1ca;
 | |
|       font-size: 20px;
 | |
|       height: 50px;
 | |
|       line-height: 50px;
 | |
|       font-weight: 600;
 | |
|       margin-bottom: 10px;
 | |
|     }
 | |
| 
 | |
|     ._table_1nkzy_11 table {
 | |
|       width: 100%%;
 | |
|       border-collapse: collapse;
 | |
|     }
 | |
| 
 | |
|     th {
 | |
|       color: #fff;
 | |
|       background-color: #6ac1ca;
 | |
|     }
 | |
| 
 | |
|     td:hover {
 | |
|       background: #c1e8e8;
 | |
|     }
 | |
| 
 | |
|     tr:nth-child(even) {
 | |
|       background-color: #f2f2f2;
 | |
|     }
 | |
| 
 | |
|     ._table_1nkzy_11 th,
 | |
|     ._table_1nkzy_11 td {
 | |
|       text-align: center;
 | |
|       border: 1px solid #ddd;
 | |
|       padding: 8px;
 | |
|     }
 | |
|     </style>
 | |
|     </head>
 | |
|     <body>
 | |
|     %s
 | |
|     </body>
 | |
|     </html>
 | |
| """ % TableStructureRecognizer.construct_table(boxes, html=True)
 | |
|     return html
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     parser = argparse.ArgumentParser()
 | |
|     parser.add_argument('--inputs',
 | |
|                         help="Directory where to store images or PDFs, or a file path to a single image or PDF",
 | |
|                         required=True)
 | |
|     parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'",
 | |
|                         default="./layouts_outputs")
 | |
|     parser.add_argument(
 | |
|         '--threshold',
 | |
|         help="A threshold to filter out detections. Default: 0.5",
 | |
|         default=0.5)
 | |
|     parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"],
 | |
|                         default="layout")
 | |
|     args = parser.parse_args()
 | |
|     main(args)
 | 
