mirror of
				https://github.com/PaddlePaddle/PaddleOCR.git
				synced 2025-11-04 11:49:14 +00:00 
			
		
		
		
	merge paddlestructure whl to paddleocr whl
This commit is contained in:
		
							parent
							
								
									bcfea0c389
								
							
						
					
					
						commit
						4030fd1e82
					
				@ -5,4 +5,5 @@ recursive-include ppocr/utils *.txt utility.py logging.py network.py
 | 
				
			|||||||
recursive-include ppocr/data *.py
 | 
					recursive-include ppocr/data *.py
 | 
				
			||||||
recursive-include ppocr/postprocess *.py
 | 
					recursive-include ppocr/postprocess *.py
 | 
				
			||||||
recursive-include tools/infer *.py
 | 
					recursive-include tools/infer *.py
 | 
				
			||||||
recursive-include ppocr/utils/e2e_utils *.py
 | 
					recursive-include ppocr/utils/e2e_utils *.py
 | 
				
			||||||
 | 
					recursive-include ppstructure *.py
 | 
				
			||||||
@ -11,7 +11,6 @@
 | 
				
			|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
# See the License for the specific language governing permissions and
 | 
					# See the License for the specific language governing permissions and
 | 
				
			||||||
# limitations under the License.
 | 
					# limitations under the License.
 | 
				
			||||||
 | 
					from .paddleocr import *
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__all__ = ['PaddleOCR', 'draw_ocr']
 | 
					__all__ = ['PaddleOCR', 'PPStructure', 'draw_ocr', 'draw_structure_result', 'save_structure_res']
 | 
				
			||||||
from .paddleocr import PaddleOCR
 | 
					 | 
				
			||||||
from .tools.infer.utility import draw_ocr
 | 
					 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										116
									
								
								paddleocr.py
									
									
									
									
									
								
							
							
						
						
									
										116
									
								
								paddleocr.py
									
									
									
									
									
								
							@ -29,16 +29,19 @@ from ppocr.utils.logging import get_logger
 | 
				
			|||||||
logger = get_logger()
 | 
					logger = get_logger()
 | 
				
			||||||
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 | 
					from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 | 
				
			||||||
from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
 | 
					from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
 | 
				
			||||||
from tools.infer.utility import draw_ocr, init_args, str2bool
 | 
					from tools.infer.utility import draw_ocr, str2bool
 | 
				
			||||||
 | 
					from ppstructure.utility import init_args, draw_structure_result
 | 
				
			||||||
 | 
					from ppstructure.predict_system import OCRSystem, save_structure_res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
__all__ = ['PaddleOCR']
 | 
					__all__ = ['PaddleOCR','PPStructure','draw_ocr','draw_structure_result','save_structure_res']
 | 
				
			||||||
 | 
					
 | 
				
			||||||
model_urls = {
 | 
					model_urls = {
 | 
				
			||||||
    'det': {
 | 
					    'det': {
 | 
				
			||||||
        'ch':
 | 
					        'ch':
 | 
				
			||||||
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
 | 
					            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
 | 
				
			||||||
        'en':
 | 
					        'en':
 | 
				
			||||||
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
 | 
					            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
 | 
				
			||||||
 | 
					        'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    'rec': {
 | 
					    'rec': {
 | 
				
			||||||
        'ch': {
 | 
					        'ch': {
 | 
				
			||||||
@ -110,10 +113,17 @@ model_urls = {
 | 
				
			|||||||
            'url':
 | 
					            'url':
 | 
				
			||||||
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
 | 
					                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
 | 
				
			||||||
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
 | 
					            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        'structure': {
 | 
				
			||||||
 | 
					            'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
 | 
				
			||||||
 | 
					            'dict_path': 'ppocr/utils/dict/table_dict.txt'
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    },
 | 
					    },
 | 
				
			||||||
    'cls':
 | 
					    'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
 | 
				
			||||||
        'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
 | 
					    'table': {
 | 
				
			||||||
 | 
					        'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
 | 
				
			||||||
 | 
					        'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SUPPORT_DET_MODEL = ['DB']
 | 
					SUPPORT_DET_MODEL = ['DB']
 | 
				
			||||||
@ -129,9 +139,10 @@ def parse_args(mMain=True):
 | 
				
			|||||||
    parser.add_argument("--lang", type=str, default='ch')
 | 
					    parser.add_argument("--lang", type=str, default='ch')
 | 
				
			||||||
    parser.add_argument("--det", type=str2bool, default=True)
 | 
					    parser.add_argument("--det", type=str2bool, default=True)
 | 
				
			||||||
    parser.add_argument("--rec", type=str2bool, default=True)
 | 
					    parser.add_argument("--rec", type=str2bool, default=True)
 | 
				
			||||||
 | 
					    parser.add_argument("--type", type=str, default='ocr')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for action in parser._actions:
 | 
					    for action in parser._actions:
 | 
				
			||||||
        if action.dest == 'rec_char_dict_path':
 | 
					        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
 | 
				
			||||||
            action.default = None
 | 
					            action.default = None
 | 
				
			||||||
    if mMain:
 | 
					    if mMain:
 | 
				
			||||||
        return parser.parse_args()
 | 
					        return parser.parse_args()
 | 
				
			||||||
@ -193,13 +204,13 @@ class PaddleOCR(predict_system.TextSystem):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # init model dir
 | 
					        # init model dir
 | 
				
			||||||
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
 | 
					        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
 | 
				
			||||||
                                                              os.path.join(BASE_DIR, VERSION, 'det', det_lang),
 | 
					                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
 | 
				
			||||||
                                                              model_urls['det'][det_lang])
 | 
					                                                              model_urls['det'][det_lang])
 | 
				
			||||||
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
 | 
					        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
 | 
				
			||||||
                                                              os.path.join(BASE_DIR, VERSION, 'rec', lang),
 | 
					                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
 | 
				
			||||||
                                                              model_urls['rec'][lang]['url'])
 | 
					                                                              model_urls['rec'][lang]['url'])
 | 
				
			||||||
        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
 | 
					        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
 | 
				
			||||||
                                                              os.path.join(BASE_DIR, VERSION, 'cls'),
 | 
					                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
 | 
				
			||||||
                                                              model_urls['cls'])
 | 
					                                                              model_urls['cls'])
 | 
				
			||||||
        # download model
 | 
					        # download model
 | 
				
			||||||
        maybe_download(params.det_model_dir, det_url)
 | 
					        maybe_download(params.det_model_dir, det_url)
 | 
				
			||||||
@ -272,6 +283,65 @@ class PaddleOCR(predict_system.TextSystem):
 | 
				
			|||||||
            return rec_res
 | 
					            return rec_res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class PPStructure(OCRSystem):
 | 
				
			||||||
 | 
					    def __init__(self, **kwargs):
 | 
				
			||||||
 | 
					        params = parse_args(mMain=False)
 | 
				
			||||||
 | 
					        params.__dict__.update(**kwargs)
 | 
				
			||||||
 | 
					        if not params.show_log:
 | 
				
			||||||
 | 
					            logger.setLevel(logging.INFO)
 | 
				
			||||||
 | 
					        params.use_angle_cls = False
 | 
				
			||||||
 | 
					        # init model dir
 | 
				
			||||||
 | 
					        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
 | 
				
			||||||
 | 
					                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'det'),
 | 
				
			||||||
 | 
					                                                              model_urls['det']['structure'])
 | 
				
			||||||
 | 
					        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
 | 
				
			||||||
 | 
					                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'rec'),
 | 
				
			||||||
 | 
					                                                              model_urls['rec']['structure']['url'])
 | 
				
			||||||
 | 
					        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
 | 
				
			||||||
 | 
					                                                                  os.path.join(BASE_DIR, VERSION, 'structure', 'table'),
 | 
				
			||||||
 | 
					                                                                  model_urls['table']['url'])
 | 
				
			||||||
 | 
					        # download model
 | 
				
			||||||
 | 
					        maybe_download(params.det_model_dir, det_url)
 | 
				
			||||||
 | 
					        maybe_download(params.rec_model_dir, rec_url)
 | 
				
			||||||
 | 
					        maybe_download(params.table_model_dir, table_url)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if params.rec_char_dict_path is None:
 | 
				
			||||||
 | 
					            params.rec_char_type = 'EN'
 | 
				
			||||||
 | 
					            if os.path.exists(str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])):
 | 
				
			||||||
 | 
					                params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                params.rec_char_dict_path = str(Path(__file__).parent.parent / model_urls['rec']['structure']['dict_path'])
 | 
				
			||||||
 | 
					        if params.table_char_dict_path is None:
 | 
				
			||||||
 | 
					            if os.path.exists(str(Path(__file__).parent / model_urls['table']['dict_path'])):
 | 
				
			||||||
 | 
					                params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                params.table_char_dict_path = str(Path(__file__).parent.parent / model_urls['table']['dict_path'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        print(params)
 | 
				
			||||||
 | 
					        super().__init__(params)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __call__(self, img):
 | 
				
			||||||
 | 
					        if isinstance(img, str):
 | 
				
			||||||
 | 
					            # download net image
 | 
				
			||||||
 | 
					            if img.startswith('http'):
 | 
				
			||||||
 | 
					                download_with_progressbar(img, 'tmp.jpg')
 | 
				
			||||||
 | 
					                img = 'tmp.jpg'
 | 
				
			||||||
 | 
					            image_file = img
 | 
				
			||||||
 | 
					            img, flag = check_and_read_gif(image_file)
 | 
				
			||||||
 | 
					            if not flag:
 | 
				
			||||||
 | 
					                with open(image_file, 'rb') as f:
 | 
				
			||||||
 | 
					                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
 | 
				
			||||||
 | 
					                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
 | 
				
			||||||
 | 
					            if img is None:
 | 
				
			||||||
 | 
					                logger.error("error in loading image:{}".format(image_file))
 | 
				
			||||||
 | 
					                return None
 | 
				
			||||||
 | 
					        if isinstance(img, np.ndarray) and len(img.shape) == 2:
 | 
				
			||||||
 | 
					            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        res = super().__call__(img)
 | 
				
			||||||
 | 
					        return res
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def main():
 | 
					def main():
 | 
				
			||||||
    # for cmd
 | 
					    # for cmd
 | 
				
			||||||
    args = parse_args(mMain=True)
 | 
					    args = parse_args(mMain=True)
 | 
				
			||||||
@ -284,14 +354,26 @@ def main():
 | 
				
			|||||||
    if len(image_file_list) == 0:
 | 
					    if len(image_file_list) == 0:
 | 
				
			||||||
        logger.error('no images find in {}'.format(args.image_dir))
 | 
					        logger.error('no images find in {}'.format(args.image_dir))
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					    if args.type=='ocr':
 | 
				
			||||||
 | 
					        engine = PaddleOCR(**(args.__dict__))
 | 
				
			||||||
 | 
					    elif args.type=='structure':
 | 
				
			||||||
 | 
					        engine = PPStructure(**(args.__dict__))
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    ocr_engine = PaddleOCR(**(args.__dict__))
 | 
					 | 
				
			||||||
    for img_path in image_file_list:
 | 
					    for img_path in image_file_list:
 | 
				
			||||||
 | 
					        img_name = os.path.basename(img_path).split('.')[0]
 | 
				
			||||||
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
 | 
					        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
 | 
				
			||||||
        result = ocr_engine.ocr(img_path,
 | 
					        if args.type == 'ocr':
 | 
				
			||||||
                                det=args.det,
 | 
					            result = engine.ocr(img_path,
 | 
				
			||||||
                                rec=args.rec,
 | 
					                                    det=args.det,
 | 
				
			||||||
                                cls=args.use_angle_cls)
 | 
					                                    rec=args.rec,
 | 
				
			||||||
        if result is not None:
 | 
					                                    cls=args.use_angle_cls)
 | 
				
			||||||
            for line in result:
 | 
					            if result is not None:
 | 
				
			||||||
                logger.info(line)
 | 
					                for line in result:
 | 
				
			||||||
 | 
					                    logger.info(line)
 | 
				
			||||||
 | 
					        elif args.type == 'structure':
 | 
				
			||||||
 | 
					            result = engine(img_path)
 | 
				
			||||||
 | 
					            for item in result:
 | 
				
			||||||
 | 
					                logger.info(item['res'])
 | 
				
			||||||
 | 
					            save_structure_res(result, args.output, img_name)
 | 
				
			||||||
@ -1,9 +0,0 @@
 | 
				
			|||||||
include LICENSE
 | 
					 | 
				
			||||||
include README.md
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
recursive-include ppocr/utils *.txt utility.py logging.py network.py
 | 
					 | 
				
			||||||
recursive-include ppocr/data *.py
 | 
					 | 
				
			||||||
recursive-include ppocr/postprocess *.py
 | 
					 | 
				
			||||||
recursive-include tools/infer *.py
 | 
					 | 
				
			||||||
recursive-include ppstructure *.py
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@ -11,7 +11,3 @@
 | 
				
			|||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
				
			||||||
# See the License for the specific language governing permissions and
 | 
					# See the License for the specific language governing permissions and
 | 
				
			||||||
# limitations under the License.
 | 
					# limitations under the License.
 | 
				
			||||||
 | 
					 | 
				
			||||||
from .paddlestructure import PaddleStructure, draw_result, save_res
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__all__ = ['PaddleStructure', 'draw_result', 'save_res']
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,147 +0,0 @@
 | 
				
			|||||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
					 | 
				
			||||||
# you may not use this file except in compliance with the License.
 | 
					 | 
				
			||||||
# You may obtain a copy of the License at
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Unless required by applicable law or agreed to in writing, software
 | 
					 | 
				
			||||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
					 | 
				
			||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					 | 
				
			||||||
# See the License for the specific language governing permissions and
 | 
					 | 
				
			||||||
# limitations under the License.
 | 
					 | 
				
			||||||
import logging
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__dir__ = os.path.dirname(__file__)
 | 
					 | 
				
			||||||
sys.path.append(__dir__)
 | 
					 | 
				
			||||||
sys.path.append(os.path.join(__dir__, '..'))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import cv2
 | 
					 | 
				
			||||||
import numpy as np
 | 
					 | 
				
			||||||
from pathlib import Path
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from ppocr.utils.logging import get_logger
 | 
					 | 
				
			||||||
from ppstructure.predict_system import OCRSystem, save_res
 | 
					 | 
				
			||||||
from ppstructure.utility import init_args, draw_result
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
logger = get_logger()
 | 
					 | 
				
			||||||
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
 | 
					 | 
				
			||||||
from ppocr.utils.network import maybe_download, download_with_progressbar, confirm_model_dir_url, is_link
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
__all__ = ['PaddleStructure', 'draw_result', 'save_res']
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERSION = '2.1'
 | 
					 | 
				
			||||||
BASE_DIR = os.path.expanduser("~/.paddlestructure/")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
model_urls = {
 | 
					 | 
				
			||||||
    'det': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar',
 | 
					 | 
				
			||||||
    'rec': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
 | 
					 | 
				
			||||||
    'table': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar'
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def parse_args(mMain=True):
 | 
					 | 
				
			||||||
    import argparse
 | 
					 | 
				
			||||||
    parser = init_args()
 | 
					 | 
				
			||||||
    parser.add_help = mMain
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    for action in parser._actions:
 | 
					 | 
				
			||||||
        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
 | 
					 | 
				
			||||||
            action.default = None
 | 
					 | 
				
			||||||
    if mMain:
 | 
					 | 
				
			||||||
        return parser.parse_args()
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        inference_args_dict = {}
 | 
					 | 
				
			||||||
        for action in parser._actions:
 | 
					 | 
				
			||||||
            inference_args_dict[action.dest] = action.default
 | 
					 | 
				
			||||||
        return argparse.Namespace(**inference_args_dict)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class PaddleStructure(OCRSystem):
 | 
					 | 
				
			||||||
    def __init__(self, **kwargs):
 | 
					 | 
				
			||||||
        params = parse_args(mMain=False)
 | 
					 | 
				
			||||||
        params.__dict__.update(**kwargs)
 | 
					 | 
				
			||||||
        if not params.show_log:
 | 
					 | 
				
			||||||
            logger.setLevel(logging.INFO)
 | 
					 | 
				
			||||||
        params.use_angle_cls = False
 | 
					 | 
				
			||||||
        # init model dir
 | 
					 | 
				
			||||||
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
 | 
					 | 
				
			||||||
                                                              os.path.join(BASE_DIR, VERSION, 'det'),
 | 
					 | 
				
			||||||
                                                              model_urls['det'])
 | 
					 | 
				
			||||||
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
 | 
					 | 
				
			||||||
                                                              os.path.join(BASE_DIR, VERSION, 'rec'),
 | 
					 | 
				
			||||||
                                                              model_urls['rec'])
 | 
					 | 
				
			||||||
        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
 | 
					 | 
				
			||||||
                                                                          os.path.join(BASE_DIR, VERSION, 'table'),
 | 
					 | 
				
			||||||
                                                                          model_urls['table'])
 | 
					 | 
				
			||||||
        # download model
 | 
					 | 
				
			||||||
        maybe_download(params.det_model_dir, det_url)
 | 
					 | 
				
			||||||
        maybe_download(params.rec_model_dir, rec_url)
 | 
					 | 
				
			||||||
        maybe_download(params.table_model_dir, table_url)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if params.rec_char_dict_path is None:
 | 
					 | 
				
			||||||
            params.rec_char_type = 'EN'
 | 
					 | 
				
			||||||
            if os.path.exists(str(Path(__file__).parent / 'ppocr/utils/dict/table_dict.txt')):
 | 
					 | 
				
			||||||
                params.rec_char_dict_path = str(Path(__file__).parent / 'ppocr/utils/dict/table_dict.txt')
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                params.rec_char_dict_path = str(Path(__file__).parent.parent / 'ppocr/utils/dict/table_dict.txt')
 | 
					 | 
				
			||||||
        if params.table_char_dict_path is None:
 | 
					 | 
				
			||||||
            if os.path.exists(str(Path(__file__).parent / 'ppocr/utils/dict/table_structure_dict.txt')):
 | 
					 | 
				
			||||||
                params.table_char_dict_path = str(
 | 
					 | 
				
			||||||
                    Path(__file__).parent / 'ppocr/utils/dict/table_structure_dict.txt')
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                params.table_char_dict_path = str(
 | 
					 | 
				
			||||||
                    Path(__file__).parent.parent / 'ppocr/utils/dict/table_structure_dict.txt')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        print(params)
 | 
					 | 
				
			||||||
        super().__init__(params)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __call__(self, img):
 | 
					 | 
				
			||||||
        if isinstance(img, str):
 | 
					 | 
				
			||||||
            # download net image
 | 
					 | 
				
			||||||
            if img.startswith('http'):
 | 
					 | 
				
			||||||
                download_with_progressbar(img, 'tmp.jpg')
 | 
					 | 
				
			||||||
                img = 'tmp.jpg'
 | 
					 | 
				
			||||||
            image_file = img
 | 
					 | 
				
			||||||
            img, flag = check_and_read_gif(image_file)
 | 
					 | 
				
			||||||
            if not flag:
 | 
					 | 
				
			||||||
                with open(image_file, 'rb') as f:
 | 
					 | 
				
			||||||
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
 | 
					 | 
				
			||||||
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
 | 
					 | 
				
			||||||
            if img is None:
 | 
					 | 
				
			||||||
                logger.error("error in loading image:{}".format(image_file))
 | 
					 | 
				
			||||||
                return None
 | 
					 | 
				
			||||||
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
 | 
					 | 
				
			||||||
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        res = super().__call__(img)
 | 
					 | 
				
			||||||
        return res
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def main():
 | 
					 | 
				
			||||||
    # for cmd
 | 
					 | 
				
			||||||
    args = parse_args(mMain=True)
 | 
					 | 
				
			||||||
    image_dir = args.image_dir
 | 
					 | 
				
			||||||
    save_folder = args.output
 | 
					 | 
				
			||||||
    if image_dir.startswith('http'):
 | 
					 | 
				
			||||||
        download_with_progressbar(image_dir, 'tmp.jpg')
 | 
					 | 
				
			||||||
        image_file_list = ['tmp.jpg']
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        image_file_list = get_image_file_list(args.image_dir)
 | 
					 | 
				
			||||||
    if len(image_file_list) == 0:
 | 
					 | 
				
			||||||
        logger.error('no images find in {}'.format(args.image_dir))
 | 
					 | 
				
			||||||
        return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    structure_engine = PaddleStructure(**(args.__dict__))
 | 
					 | 
				
			||||||
    for img_path in image_file_list:
 | 
					 | 
				
			||||||
        img_name = os.path.basename(img_path).split('.')[0]
 | 
					 | 
				
			||||||
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
 | 
					 | 
				
			||||||
        result = structure_engine(img_path)
 | 
					 | 
				
			||||||
        for item in result:
 | 
					 | 
				
			||||||
            logger.info(item['res'])
 | 
					 | 
				
			||||||
        save_res(result, save_folder, img_name)
 | 
					 | 
				
			||||||
        logger.info('result save to {}'.format(os.path.join(save_folder, img_name)))
 | 
					 | 
				
			||||||
@ -26,19 +26,18 @@ import numpy as np
 | 
				
			|||||||
import time
 | 
					import time
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import layoutparser as lp
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 | 
					from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 | 
				
			||||||
from ppocr.utils.logging import get_logger
 | 
					from ppocr.utils.logging import get_logger
 | 
				
			||||||
from tools.infer.predict_system import TextSystem
 | 
					from tools.infer.predict_system import TextSystem
 | 
				
			||||||
from ppstructure.table.predict_table import TableSystem, to_excel
 | 
					from ppstructure.table.predict_table import TableSystem, to_excel
 | 
				
			||||||
from ppstructure.utility import parse_args, draw_result
 | 
					from ppstructure.utility import parse_args, draw_structure_result
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = get_logger()
 | 
					logger = get_logger()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class OCRSystem(object):
 | 
					class OCRSystem(object):
 | 
				
			||||||
    def __init__(self, args):
 | 
					    def __init__(self, args):
 | 
				
			||||||
 | 
					        import layoutparser as lp
 | 
				
			||||||
        args.det_limit_type = 'resize_long'
 | 
					        args.det_limit_type = 'resize_long'
 | 
				
			||||||
        args.drop_score = 0
 | 
					        args.drop_score = 0
 | 
				
			||||||
        if not args.show_log:
 | 
					        if not args.show_log:
 | 
				
			||||||
@ -80,7 +79,7 @@ class OCRSystem(object):
 | 
				
			|||||||
        return res_list
 | 
					        return res_list
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def save_res(res, save_folder, img_name):
 | 
					def save_structure_res(res, save_folder, img_name):
 | 
				
			||||||
    excel_save_folder = os.path.join(save_folder, img_name)
 | 
					    excel_save_folder = os.path.join(save_folder, img_name)
 | 
				
			||||||
    os.makedirs(excel_save_folder, exist_ok=True)
 | 
					    os.makedirs(excel_save_folder, exist_ok=True)
 | 
				
			||||||
    # save res
 | 
					    # save res
 | 
				
			||||||
@ -115,8 +114,8 @@ def main(args):
 | 
				
			|||||||
            continue
 | 
					            continue
 | 
				
			||||||
        starttime = time.time()
 | 
					        starttime = time.time()
 | 
				
			||||||
        res = structure_sys(img)
 | 
					        res = structure_sys(img)
 | 
				
			||||||
        save_res(res, save_folder, img_name)
 | 
					        save_structure_res(res, save_folder, img_name)
 | 
				
			||||||
        draw_img = draw_result(img, res, args.vis_font_path)
 | 
					        draw_img = draw_structure_result(img, res, args.vis_font_path)
 | 
				
			||||||
        cv2.imwrite(os.path.join(save_folder, img_name, 'show.jpg'), draw_img)
 | 
					        cv2.imwrite(os.path.join(save_folder, img_name, 'show.jpg'), draw_img)
 | 
				
			||||||
        logger.info('result save to {}'.format(os.path.join(save_folder, img_name)))
 | 
					        logger.info('result save to {}'.format(os.path.join(save_folder, img_name)))
 | 
				
			||||||
        elapse = time.time() - starttime
 | 
					        elapse = time.time() - starttime
 | 
				
			||||||
 | 
				
			|||||||
@ -1,70 +0,0 @@
 | 
				
			|||||||
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
					 | 
				
			||||||
# you may not use this file except in compliance with the License.
 | 
					 | 
				
			||||||
# You may obtain a copy of the License at
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# Unless required by applicable law or agreed to in writing, software
 | 
					 | 
				
			||||||
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
					 | 
				
			||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					 | 
				
			||||||
# See the License for the specific language governing permissions and
 | 
					 | 
				
			||||||
# limitations under the License.
 | 
					 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from setuptools import setup
 | 
					 | 
				
			||||||
from io import open
 | 
					 | 
				
			||||||
import shutil
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
with open('../requirements.txt', encoding="utf-8-sig") as f:
 | 
					 | 
				
			||||||
    requirements = f.readlines()
 | 
					 | 
				
			||||||
    requirements.append('tqdm')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def readme():
 | 
					 | 
				
			||||||
    with open('README_ch.md', encoding="utf-8-sig") as f:
 | 
					 | 
				
			||||||
        README = f.read()
 | 
					 | 
				
			||||||
    return README
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
shutil.copytree('./table', './ppstructure/table')
 | 
					 | 
				
			||||||
shutil.copyfile('./predict_system.py', './ppstructure/predict_system.py')
 | 
					 | 
				
			||||||
shutil.copyfile('./utility.py', './ppstructure/utility.py')
 | 
					 | 
				
			||||||
shutil.copytree('../ppocr', './ppocr')
 | 
					 | 
				
			||||||
shutil.copytree('../tools', './tools')
 | 
					 | 
				
			||||||
shutil.copyfile('../LICENSE', './LICENSE')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
setup(
 | 
					 | 
				
			||||||
    name='paddlestructure',
 | 
					 | 
				
			||||||
    packages=['paddlestructure'],
 | 
					 | 
				
			||||||
    package_dir={'paddlestructure': ''},
 | 
					 | 
				
			||||||
    include_package_data=True,
 | 
					 | 
				
			||||||
    entry_points={"console_scripts": ["paddlestructure= paddlestructure.paddlestructure:main"]},
 | 
					 | 
				
			||||||
    version='1.0',
 | 
					 | 
				
			||||||
    install_requires=requirements,
 | 
					 | 
				
			||||||
    license='Apache License 2.0',
 | 
					 | 
				
			||||||
    description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
 | 
					 | 
				
			||||||
    long_description=readme(),
 | 
					 | 
				
			||||||
    long_description_content_type='text/markdown',
 | 
					 | 
				
			||||||
    url='https://github.com/PaddlePaddle/PaddleOCR',
 | 
					 | 
				
			||||||
    download_url='https://github.com/PaddlePaddle/PaddleOCR.git',
 | 
					 | 
				
			||||||
    keywords=[
 | 
					 | 
				
			||||||
        'ocr textdetection textrecognition paddleocr crnn east star-net rosetta ocrlite db chineseocr chinesetextdetection chinesetextrecognition'
 | 
					 | 
				
			||||||
    ],
 | 
					 | 
				
			||||||
    classifiers=[
 | 
					 | 
				
			||||||
        'Intended Audience :: Developers', 'Operating System :: OS Independent',
 | 
					 | 
				
			||||||
        'Natural Language :: Chinese (Simplified)',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.2',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.3',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.4',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.5',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.6',
 | 
					 | 
				
			||||||
        'Programming Language :: Python :: 3.7', 'Topic :: Utilities'
 | 
					 | 
				
			||||||
    ], )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
shutil.rmtree('ppocr')
 | 
					 | 
				
			||||||
shutil.rmtree('tools')
 | 
					 | 
				
			||||||
shutil.rmtree('ppstructure')
 | 
					 | 
				
			||||||
os.remove('LICENSE')
 | 
					 | 
				
			||||||
@ -36,7 +36,7 @@ def parse_args():
 | 
				
			|||||||
    return parser.parse_args()
 | 
					    return parser.parse_args()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def draw_result(image, result, font_path):
 | 
					def draw_structure_result(image, result, font_path):
 | 
				
			||||||
    if isinstance(image, np.ndarray):
 | 
					    if isinstance(image, np.ndarray):
 | 
				
			||||||
        image = Image.fromarray(image)
 | 
					        image = Image.fromarray(image)
 | 
				
			||||||
    boxes, txts, scores = [], [], []
 | 
					    boxes, txts, scores = [], [], []
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										3
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								setup.py
									
									
									
									
									
								
							@ -14,6 +14,7 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from setuptools import setup
 | 
					from setuptools import setup
 | 
				
			||||||
from io import open
 | 
					from io import open
 | 
				
			||||||
 | 
					from paddleocr import VERSION
 | 
				
			||||||
 | 
					
 | 
				
			||||||
with open('requirements.txt', encoding="utf-8-sig") as f:
 | 
					with open('requirements.txt', encoding="utf-8-sig") as f:
 | 
				
			||||||
    requirements = f.readlines()
 | 
					    requirements = f.readlines()
 | 
				
			||||||
@ -32,7 +33,7 @@ setup(
 | 
				
			|||||||
    package_dir={'paddleocr': ''},
 | 
					    package_dir={'paddleocr': ''},
 | 
				
			||||||
    include_package_data=True,
 | 
					    include_package_data=True,
 | 
				
			||||||
    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
 | 
					    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
 | 
				
			||||||
    version='2.0.6',
 | 
					    version=VERSION,
 | 
				
			||||||
    install_requires=requirements,
 | 
					    install_requires=requirements,
 | 
				
			||||||
    license='Apache License 2.0',
 | 
					    license='Apache License 2.0',
 | 
				
			||||||
    description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
 | 
					    description='Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user