| 
									
										
										
										
											2024-12-12 14:17:24 +08:00
										 |  |  | import sys | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | from pathlib import Path | 
					
						
							|  |  |  | from typing import Any | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import paddle | 
					
						
							|  |  |  | import pytest | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | current_dir = os.path.dirname(os.path.abspath(__file__)) | 
					
						
							|  |  |  | sys.path.append(os.path.abspath(os.path.join(current_dir, ".."))) | 
					
						
							|  |  |  | from ppocr.modeling.backbones.rec_donut_swin import DonutSwinModel, DonutSwinModelOutput | 
					
						
							| 
									
										
										
										
											2025-05-12 21:55:26 +08:00
										 |  |  | from ppocr.modeling.backbones.rec_pphgnetv2 import PPHGNetV2_B4_Formula | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  | from ppocr.modeling.backbones.rec_vary_vit import Vary_VIT_B_Formula | 
					
						
							| 
									
										
										
										
											2024-12-12 14:17:24 +08:00
										 |  |  | from ppocr.modeling.heads.rec_unimernet_head import UniMERNetHead | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  | from ppocr.modeling.heads.rec_ppformulanet_head import PPFormulaNet_Head | 
					
						
							| 
									
										
										
										
											2024-12-12 14:17:24 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.fixture | 
					
						
							|  |  |  | def sample_image(): | 
					
						
							|  |  |  |     return paddle.randn([1, 1, 192, 672]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  | @pytest.fixture | 
					
						
							|  |  |  | def sample_image_ppformulanet_s(): | 
					
						
							|  |  |  |     return paddle.randn([1, 1, 384, 384]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.fixture | 
					
						
							|  |  |  | def sample_image_ppformulanet_l(): | 
					
						
							|  |  |  |     return paddle.randn([1, 1, 768, 768]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-12 14:17:24 +08:00
										 |  |  | @pytest.fixture | 
					
						
							|  |  |  | def encoder_feat(): | 
					
						
							|  |  |  |     encoded_feat = paddle.randn([1, 126, 1024]) | 
					
						
							|  |  |  |     return DonutSwinModelOutput( | 
					
						
							|  |  |  |         last_hidden_state=encoded_feat, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  | @pytest.fixture | 
					
						
							|  |  |  | def encoder_feat_ppformulanet_s(): | 
					
						
							|  |  |  |     encoded_feat = paddle.randn([1, 144, 2048]) | 
					
						
							|  |  |  |     return DonutSwinModelOutput( | 
					
						
							|  |  |  |         last_hidden_state=encoded_feat, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @pytest.fixture | 
					
						
							|  |  |  | def encoder_feat_ppformulanet_l(): | 
					
						
							|  |  |  |     encoded_feat = paddle.randn([1, 144, 1024]) | 
					
						
							|  |  |  |     return DonutSwinModelOutput( | 
					
						
							|  |  |  |         last_hidden_state=encoded_feat, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-12-12 14:17:24 +08:00
										 |  |  | def test_unimernet_backbone(sample_image): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test UniMERNet backbone. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         sample_image: sample image to be processed. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     backbone = DonutSwinModel( | 
					
						
							|  |  |  |         hidden_size=1024, | 
					
						
							|  |  |  |         num_layers=4, | 
					
						
							|  |  |  |         num_heads=[4, 8, 16, 32], | 
					
						
							|  |  |  |         add_pooling_layer=True, | 
					
						
							|  |  |  |         use_mask_token=False, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     backbone.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = backbone(sample_image) | 
					
						
							|  |  |  |         encoder_feat = result[0] | 
					
						
							|  |  |  |         assert encoder_feat.shape == [1, 126, 1024] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_unimernet_head(encoder_feat): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test UniMERNet head. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         encoder_feat: encoder feature from unimernet backbone. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     head = UniMERNetHead( | 
					
						
							|  |  |  |         max_new_tokens=5, | 
					
						
							|  |  |  |         decoder_start_token_id=0, | 
					
						
							|  |  |  |         temperature=0.2, | 
					
						
							|  |  |  |         do_sample=False, | 
					
						
							|  |  |  |         top_p=0.95, | 
					
						
							|  |  |  |         encoder_hidden_size=1024, | 
					
						
							|  |  |  |         is_export=False, | 
					
						
							|  |  |  |         length_aware=True, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     head.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = head(encoder_feat) | 
					
						
							|  |  |  |         assert result.shape == [1, 6] | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ppformulanet_s_backbone(sample_image_ppformulanet_s): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test PP-FormulaNet-S backbone. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         sample_image_ppformulanet_s: sample image to be processed. | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2025-05-12 21:55:26 +08:00
										 |  |  |     backbone = PPHGNetV2_B4_Formula( | 
					
						
							| 
									
										
										
										
											2024-12-23 13:14:33 +08:00
										 |  |  |         class_num=1024, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     backbone.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = backbone(sample_image_ppformulanet_s) | 
					
						
							|  |  |  |         encoder_feat = result[0] | 
					
						
							|  |  |  |         assert encoder_feat.shape == [1, 144, 2048] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ppformulanet_s_head(encoder_feat_ppformulanet_s): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test PP-FormulaNet-S head. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         encoder_feat_ppformulanet_s: encoder feature from PP-FormulaNet-S backbone. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     head = PPFormulaNet_Head( | 
					
						
							|  |  |  |         max_new_tokens=6, | 
					
						
							|  |  |  |         decoder_start_token_id=0, | 
					
						
							|  |  |  |         decoder_ffn_dim=1536, | 
					
						
							|  |  |  |         decoder_hidden_size=384, | 
					
						
							|  |  |  |         decoder_layers=2, | 
					
						
							|  |  |  |         temperature=0.2, | 
					
						
							|  |  |  |         do_sample=False, | 
					
						
							|  |  |  |         top_p=0.95, | 
					
						
							|  |  |  |         encoder_hidden_size=2048, | 
					
						
							|  |  |  |         is_export=False, | 
					
						
							|  |  |  |         length_aware=True, | 
					
						
							|  |  |  |         use_parallel=True, | 
					
						
							|  |  |  |         parallel_step=3, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     head.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = head(encoder_feat_ppformulanet_s) | 
					
						
							|  |  |  |         assert result.shape == [1, 9] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ppformulanet_l_backbone(sample_image_ppformulanet_l): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test PP-FormulaNet-L backbone. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         sample_image_ppformulanet_l: sample image to be processed. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     backbone = Vary_VIT_B_Formula( | 
					
						
							|  |  |  |         image_size=768, | 
					
						
							|  |  |  |         encoder_embed_dim=768, | 
					
						
							|  |  |  |         encoder_depth=12, | 
					
						
							|  |  |  |         encoder_num_heads=12, | 
					
						
							|  |  |  |         encoder_global_attn_indexes=[2, 5, 8, 11], | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     backbone.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = backbone(sample_image_ppformulanet_l) | 
					
						
							|  |  |  |         encoder_feat = result[0] | 
					
						
							|  |  |  |         assert encoder_feat.shape == [1, 144, 1024] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ppformulanet_l_head(encoder_feat_ppformulanet_l): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Test PP-FormulaNet-L head. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Args: | 
					
						
							|  |  |  |         encoder_feat_ppformulanet_l: encoder feature from PP-FormulaNet-L Head. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     head = PPFormulaNet_Head( | 
					
						
							|  |  |  |         max_new_tokens=6, | 
					
						
							|  |  |  |         decoder_start_token_id=0, | 
					
						
							|  |  |  |         decoder_ffn_dim=2048, | 
					
						
							|  |  |  |         decoder_hidden_size=512, | 
					
						
							|  |  |  |         decoder_layers=8, | 
					
						
							|  |  |  |         temperature=0.2, | 
					
						
							|  |  |  |         do_sample=False, | 
					
						
							|  |  |  |         top_p=0.95, | 
					
						
							|  |  |  |         encoder_hidden_size=1024, | 
					
						
							|  |  |  |         is_export=False, | 
					
						
							|  |  |  |         length_aware=False, | 
					
						
							|  |  |  |         use_parallel=False, | 
					
						
							|  |  |  |         parallel_step=0, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     head.eval() | 
					
						
							|  |  |  |     with paddle.no_grad(): | 
					
						
							|  |  |  |         result = head(encoder_feat_ppformulanet_l) | 
					
						
							|  |  |  |         assert result.shape == [1, 7] |