From 2b3b3554c05ae615ed7eb051c2ac7c6bb8bc985d Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 17 Apr 2024 10:54:59 +0800 Subject: [PATCH] use tensor.shape bug not paddle.shape(tensor) (#11919) * use tensor.shape bug not paddle.shape(tensor) * refine * refine --- ppocr/modeling/backbones/rec_nrtr_mtb.py | 2 +- ppocr/modeling/backbones/rec_svtrnet.py | 2 +- ppocr/modeling/backbones/rec_vit.py | 4 ++-- ppocr/modeling/backbones/rec_vit_parseq.py | 6 +++--- ppocr/modeling/heads/rec_abinet_head.py | 2 +- ppocr/modeling/heads/rec_aster_head.py | 4 ++-- ppocr/modeling/heads/rec_att_head.py | 2 +- ppocr/modeling/heads/rec_cppd_head.py | 2 +- ppocr/modeling/heads/rec_multi_head.py | 2 +- ppocr/modeling/heads/rec_nrtr_head.py | 18 +++++++++--------- ppocr/modeling/heads/rec_sar_head.py | 8 ++++---- ppocr/modeling/heads/rec_satrn_head.py | 2 +- ppocr/modeling/heads/rec_spin_att_head.py | 2 +- ppocr/modeling/heads/sr_rensnet_transformer.py | 4 ++-- ppocr/modeling/heads/table_master_head.py | 4 ++-- ppocr/modeling/necks/csp_pan.py | 2 +- ppocr/modeling/necks/sast_fpn.py | 2 +- ppocr/modeling/transforms/tbsrn.py | 4 ++-- .../transforms/tps_spatial_transformer.py | 2 +- 19 files changed, 37 insertions(+), 37 deletions(-) diff --git a/ppocr/modeling/backbones/rec_nrtr_mtb.py b/ppocr/modeling/backbones/rec_nrtr_mtb.py index 22e02a6371..9315a9456d 100644 --- a/ppocr/modeling/backbones/rec_nrtr_mtb.py +++ b/ppocr/modeling/backbones/rec_nrtr_mtb.py @@ -42,7 +42,7 @@ class MTB(nn.Layer): if self.cnn_num == 2: # (b, w, h, c) x = paddle.transpose(x, [0, 3, 2, 1]) - x_shape = paddle.shape(x) + x_shape = x.shape x = paddle.reshape( x, [x_shape[0], x_shape[1], x_shape[2] * x_shape[3]]) return x diff --git a/ppocr/modeling/backbones/rec_svtrnet.py b/ppocr/modeling/backbones/rec_svtrnet.py index ea865a2da1..daddfeac43 100644 --- a/ppocr/modeling/backbones/rec_svtrnet.py +++ b/ppocr/modeling/backbones/rec_svtrnet.py @@ -33,7 +33,7 @@ def drop_path(x, drop_prob=0., training=False): if drop_prob == 0. or not training: return x keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype) - shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) random_tensor = paddle.floor(random_tensor) # binarize output = x.divide(keep_prob) * random_tensor diff --git a/ppocr/modeling/backbones/rec_vit.py b/ppocr/modeling/backbones/rec_vit.py index b7a55539da..ff31377478 100644 --- a/ppocr/modeling/backbones/rec_vit.py +++ b/ppocr/modeling/backbones/rec_vit.py @@ -33,7 +33,7 @@ def drop_path(x, drop_prob=0., training=False): if drop_prob == 0. or not training: return x keep_prob = paddle.to_tensor(1 - drop_prob) - shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype) random_tensor = paddle.floor(random_tensor) # binarize output = x.divide(keep_prob) * random_tensor @@ -243,7 +243,7 @@ class ViT(nn.Layer): def forward(self, x): x = self.patch_embed(x).flatten(2).transpose((0, 2, 1)) - x = x + self.pos_embed[:, 1:, :] #[:, :paddle.shape(x)[1], :] + x = x + self.pos_embed[:, 1:, :] #[:, :x.shape[1], :] x = self.pos_drop(x) for blk in self.blocks1: x = blk(x) diff --git a/ppocr/modeling/backbones/rec_vit_parseq.py b/ppocr/modeling/backbones/rec_vit_parseq.py index 403d122cb7..2bb7592a7c 100644 --- a/ppocr/modeling/backbones/rec_vit_parseq.py +++ b/ppocr/modeling/backbones/rec_vit_parseq.py @@ -43,7 +43,7 @@ def drop_path(x, drop_prob=0., training=False): if drop_prob == 0. or not training: return x keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype) - shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1) + shape = (x.shape[0], ) + (1, ) * (x.ndim - 1) random_tensor = keep_prob + paddle.rand(shape).astype(x.dtype) random_tensor = paddle.floor(random_tensor) # binarize output = x.divide(keep_prob) * random_tensor @@ -113,7 +113,7 @@ class Attention(nn.Layer): self.proj_drop = nn.Dropout(proj_drop) def forward(self, x): - # B= paddle.shape(x)[0] + # B= x.shape[0] N, C = x.shape[1:] qkv = self.qkv(x).reshape((-1, N, 3, self.num_heads, C // self.num_heads)).transpose((2, 0, 3, 1, 4)) @@ -280,7 +280,7 @@ class VisionTransformer(nn.Layer): ones_(m.weight) def forward_features(self, x): - B = paddle.shape(x)[0] + B = x.shape[0] x = self.patch_embed(x) x = x + self.pos_embed x = self.pos_drop(x) diff --git a/ppocr/modeling/heads/rec_abinet_head.py b/ppocr/modeling/heads/rec_abinet_head.py index e101e21f56..a95f2f1164 100644 --- a/ppocr/modeling/heads/rec_abinet_head.py +++ b/ppocr/modeling/heads/rec_abinet_head.py @@ -285,7 +285,7 @@ def _get_mask(length, max_length): Unmasked positions are filled with float(0.0). """ length = length.unsqueeze(-1) - B = paddle.shape(length)[0] + B = length.shape[0] grid = paddle.arange(0, max_length).unsqueeze(0).tile([B, 1]) zero_mask = paddle.zeros([B, max_length], dtype='float32') inf_mask = paddle.full([B, max_length], '-inf', dtype='float32') diff --git a/ppocr/modeling/heads/rec_aster_head.py b/ppocr/modeling/heads/rec_aster_head.py index c95e8fd31f..4e36e26ef3 100644 --- a/ppocr/modeling/heads/rec_aster_head.py +++ b/ppocr/modeling/heads/rec_aster_head.py @@ -81,7 +81,7 @@ class Embedding(nn.Layer): self.embed_dim) # Embed encoder output to a word-embedding like def forward(self, x): - x = paddle.reshape(x, [paddle.shape(x)[0], -1]) + x = paddle.reshape(x, [x.shape[0], -1]) x = self.eEmbed(x) return x @@ -105,7 +105,7 @@ class AttentionRecognitionHead(nn.Layer): def forward(self, x, embed): x, targets, lengths = x - batch_size = paddle.shape(x)[0] + batch_size = x.shape[0] # Decoder state = self.decoder.get_initial_state(embed) outputs = [] diff --git a/ppocr/modeling/heads/rec_att_head.py b/ppocr/modeling/heads/rec_att_head.py index 6349ee0c2c..7bd5bcc039 100644 --- a/ppocr/modeling/heads/rec_att_head.py +++ b/ppocr/modeling/heads/rec_att_head.py @@ -38,7 +38,7 @@ class AttentionHead(nn.Layer): return input_ont_hot def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = paddle.shape(inputs)[0] + batch_size = inputs.shape[0] num_steps = batch_max_length hidden = paddle.zeros((batch_size, self.hidden_size)) diff --git a/ppocr/modeling/heads/rec_cppd_head.py b/ppocr/modeling/heads/rec_cppd_head.py index 9ff785a36d..dc3ba4e12a 100644 --- a/ppocr/modeling/heads/rec_cppd_head.py +++ b/ppocr/modeling/heads/rec_cppd_head.py @@ -294,7 +294,7 @@ class CPPDHead(nn.Layer): char_node_embed = self.char_node_embed( paddle.arange(self.out_channels)).unsqueeze(0) char_node_embed = paddle.tile(char_node_embed, [bs, 1, 1]) - counting_char_num = paddle.shape(char_node_embed)[1] + counting_char_num = char_node_embed.shape[1] pos_node_embed = self.pos_node_embed(paddle.arange( self.max_len)).unsqueeze(0) + self.char_pos_embed pos_node_embed = paddle.tile(pos_node_embed, [bs, 1, 1]) diff --git a/ppocr/modeling/heads/rec_multi_head.py b/ppocr/modeling/heads/rec_multi_head.py index ae557e7aa0..bca76511e0 100644 --- a/ppocr/modeling/heads/rec_multi_head.py +++ b/ppocr/modeling/heads/rec_multi_head.py @@ -50,7 +50,7 @@ class AddPos(nn.Layer): trunc_normal_(self.dec_pos_embed) def forward(self,x): - x = x + self.dec_pos_embed[:, :paddle.shape(x)[1], :] + x = x + self.dec_pos_embed[:, :x.shape[1], :] return x diff --git a/ppocr/modeling/heads/rec_nrtr_head.py b/ppocr/modeling/heads/rec_nrtr_head.py index cff6aeb071..46de11428e 100644 --- a/ppocr/modeling/heads/rec_nrtr_head.py +++ b/ppocr/modeling/heads/rec_nrtr_head.py @@ -150,7 +150,7 @@ class Transformer(nn.Layer): def forward_test(self, src): - bs = paddle.shape(src)[0] + bs = src.shape[0] if self.encoder is not None: src = self.positional_encoding(src) for encoder_layer in self.encoder: @@ -164,7 +164,7 @@ class Transformer(nn.Layer): dec_seq_embed = self.embedding(dec_seq) dec_seq_embed = self.positional_encoding(dec_seq_embed) tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq_embed)[1]) + dec_seq_embed.shape[1]) tgt = dec_seq_embed for decoder_layer in self.decoder: tgt = decoder_layer(tgt, memory, self_mask=tgt_mask) @@ -175,7 +175,7 @@ class Transformer(nn.Layer): if paddle.equal_all( preds_idx, paddle.full( - paddle.shape(preds_idx), 3, dtype='int64')): + preds_idx.shape, 3, dtype='int64')): break preds_prob = paddle.max(word_prob, axis=-1) dec_seq = paddle.concat( @@ -198,7 +198,7 @@ class Transformer(nn.Layer): n_prev_active_inst, n_bm): """ Collect tensor parts associated to active instances. """ - beamed_tensor_shape = paddle.shape(beamed_tensor) + beamed_tensor_shape = beamed_tensor.shape n_curr_active_inst = len(curr_active_inst_idx) new_shape = (n_curr_active_inst * n_bm, beamed_tensor_shape[1], beamed_tensor_shape[2]) @@ -243,7 +243,7 @@ class Transformer(nn.Layer): dec_seq = self.embedding(dec_seq) dec_seq = self.positional_encoding(dec_seq) tgt_mask = self.generate_square_subsequent_mask( - paddle.shape(dec_seq)[1]) + dec_seq.shape[1]) tgt = dec_seq for decoder_layer in self.decoder: tgt = decoder_layer(tgt, enc_output, self_mask=tgt_mask) @@ -294,7 +294,7 @@ class Transformer(nn.Layer): src_enc = images n_bm = self.beam_size - src_shape = paddle.shape(src_enc) + src_shape = src_enc.shape inst_dec_beams = [Beam(n_bm) for _ in range(1)] active_inst_idx_list = list(range(1)) # Repeat data for beam search @@ -500,7 +500,7 @@ class PositionalEncoding(nn.Layer): >>> output = pos_encoder(x) """ x = x.transpose([1, 0, 2]) - x = x + self.pe[:paddle.shape(x)[0], :] + x = x + self.pe[:x.shape[0], :] return self.dropout(x).transpose([1, 0, 2]) @@ -552,13 +552,13 @@ class PositionalEncoding_2d(nn.Layer): Examples: >>> output = pos_encoder(x) """ - w_pe = self.pe[:paddle.shape(x)[-1], :] + w_pe = self.pe[:x.shape[-1], :] w1 = self.linear1(self.avg_pool_1(x).squeeze()).unsqueeze(0) w_pe = w_pe * w1 w_pe = paddle.transpose(w_pe, [1, 2, 0]) w_pe = paddle.unsqueeze(w_pe, 2) - h_pe = self.pe[:paddle.shape(x).shape[-2], :] + h_pe = self.pe[:x.shape.shape[-2], :] w2 = self.linear2(self.avg_pool_2(x).squeeze()).unsqueeze(0) h_pe = h_pe * w2 h_pe = paddle.transpose(h_pe, [1, 2, 0]) diff --git a/ppocr/modeling/heads/rec_sar_head.py b/ppocr/modeling/heads/rec_sar_head.py index 11addb7023..b301787adb 100644 --- a/ppocr/modeling/heads/rec_sar_head.py +++ b/ppocr/modeling/heads/rec_sar_head.py @@ -83,7 +83,7 @@ class SAREncoder(nn.Layer): def forward(self, feat, img_metas=None): if img_metas is not None: - assert len(img_metas[0]) == paddle.shape(feat)[0] + assert len(img_metas[0]) == feat.shape[0] valid_ratios = None if img_metas is not None and self.mask: @@ -99,7 +99,7 @@ class SAREncoder(nn.Layer): if valid_ratios is not None: valid_hf = [] T = paddle.shape(holistic_feat)[1] - for i in range(paddle.shape(valid_ratios)[0]): + for i in range(valid_ratios.shape[0]): valid_step = paddle.minimum( T, paddle.ceil(valid_ratios[i] * T).astype(T.dtype)) - 1 valid_hf.append(holistic_feat[i, valid_step, :]) @@ -253,7 +253,7 @@ class ParallelSARDecoder(BaseDecoder): if valid_ratios is not None: # cal mask of attention weight - for i in range(paddle.shape(valid_ratios)[0]): + for i in range(valid_ratios.shape[0]): valid_width = paddle.minimum( w, paddle.ceil(valid_ratios[i] * w).astype("int32")) if valid_width < w: @@ -292,7 +292,7 @@ class ParallelSARDecoder(BaseDecoder): img_metas: [label, valid_ratio] ''' if img_metas is not None: - assert paddle.shape(img_metas[0])[0] == paddle.shape(feat)[0] + assert img_metas[0].shape[0] == feat.shape[0] valid_ratios = None if img_metas is not None and self.mask: diff --git a/ppocr/modeling/heads/rec_satrn_head.py b/ppocr/modeling/heads/rec_satrn_head.py index 263c3b1bb3..ce7ae05450 100644 --- a/ppocr/modeling/heads/rec_satrn_head.py +++ b/ppocr/modeling/heads/rec_satrn_head.py @@ -283,7 +283,7 @@ class SATRNEncoder(nn.Layer): Tensor: A tensor of shape :math:`(N, T, D_m)`. """ if valid_ratios is None: - bs = paddle.shape(feat)[0] + bs = feat.shape[0] valid_ratios = paddle.full((bs, 1), 1., dtype=paddle.float32) feat = self.position_enc(feat) diff --git a/ppocr/modeling/heads/rec_spin_att_head.py b/ppocr/modeling/heads/rec_spin_att_head.py index 86e35e4339..d4d364aad9 100644 --- a/ppocr/modeling/heads/rec_spin_att_head.py +++ b/ppocr/modeling/heads/rec_spin_att_head.py @@ -42,7 +42,7 @@ class SPINAttentionHead(nn.Layer): return input_ont_hot def forward(self, inputs, targets=None, batch_max_length=25): - batch_size = paddle.shape(inputs)[0] + batch_size = inputs.shape[0] num_steps = batch_max_length + 1 # +1 for [sos] at end of sentence hidden = (paddle.zeros((batch_size, self.hidden_size)), diff --git a/ppocr/modeling/heads/sr_rensnet_transformer.py b/ppocr/modeling/heads/sr_rensnet_transformer.py index df0d0c9299..8f2705678e 100644 --- a/ppocr/modeling/heads/sr_rensnet_transformer.py +++ b/ppocr/modeling/heads/sr_rensnet_transformer.py @@ -78,7 +78,7 @@ class MultiHeadedAttention(nn.Layer): def forward(self, query, key, value, mask=None, attention_map=None): if mask is not None: mask = mask.unsqueeze(1) - nbatches = paddle.shape(query)[0] + nbatches = query.shape[0] query, key, value = \ [paddle.transpose(l(x).reshape([nbatches, -1, self.h, self.d_k]), [0,2,1,3]) @@ -230,7 +230,7 @@ class PositionalEncoding(nn.Layer): self.register_buffer('pe', pe) def forward(self, x): - x = x + self.pe[:, :paddle.shape(x)[1]] + x = x + self.pe[:, :x.shape[1]] return self.dropout(x) diff --git a/ppocr/modeling/heads/table_master_head.py b/ppocr/modeling/heads/table_master_head.py index 7d37e36960..8ae7d52c45 100644 --- a/ppocr/modeling/heads/table_master_head.py +++ b/ppocr/modeling/heads/table_master_head.py @@ -71,7 +71,7 @@ class TableMasterHead(nn.Layer): """ trg_pad_mask = (tgt != self.PAD).unsqueeze(1).unsqueeze(3) - tgt_len = paddle.shape(tgt)[1] + tgt_len = tgt.shape[1] trg_sub_mask = paddle.tril( paddle.ones( ([tgt_len, tgt_len]), dtype=paddle.float32)) @@ -279,5 +279,5 @@ class PositionalEncoding(nn.Layer): self.register_buffer('pe', pe) def forward(self, feat, **kwargs): - feat = feat + self.pe[:, :paddle.shape(feat)[1]] # pe 1*5000*512 + feat = feat + self.pe[:, :feat.shape[1]] # pe 1*5000*512 return self.dropout(feat) diff --git a/ppocr/modeling/necks/csp_pan.py b/ppocr/modeling/necks/csp_pan.py index f4f8547f7d..1819619bfb 100755 --- a/ppocr/modeling/necks/csp_pan.py +++ b/ppocr/modeling/necks/csp_pan.py @@ -305,7 +305,7 @@ class CSPPAN(nn.Layer): feat_heigh = inner_outs[0] feat_low = inputs[idx - 1] upsample_feat = F.upsample( - feat_heigh, size=paddle.shape(feat_low)[2:4], mode="nearest") + feat_heigh, size=feat_low.shape[2:4], mode="nearest") inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx]( paddle.concat([upsample_feat, feat_low], 1)) diff --git a/ppocr/modeling/necks/sast_fpn.py b/ppocr/modeling/necks/sast_fpn.py index 9b6024590a..d106179708 100644 --- a/ppocr/modeling/necks/sast_fpn.py +++ b/ppocr/modeling/necks/sast_fpn.py @@ -222,7 +222,7 @@ class Cross_Attention(nn.Layer): return f_weight def forward(self, f_common): - f_shape = paddle.shape(f_common) + f_shape = f_common.shape # print('f_shape: ', f_shape) f_theta = self.theta_conv(f_common) diff --git a/ppocr/modeling/transforms/tbsrn.py b/ppocr/modeling/transforms/tbsrn.py index e3e77bd36a..a53d0d277e 100644 --- a/ppocr/modeling/transforms/tbsrn.py +++ b/ppocr/modeling/transforms/tbsrn.py @@ -80,7 +80,7 @@ class FeatureEnhancer(nn.Layer): global_info: (batch, embedding_size, 1, 1) conv_feature: (batch, channel, H, W) ''' - batch = paddle.shape(conv_feature)[0] + batch = conv_feature.shape[0] position2d = positionalencoding2d( 64, 16, 64).cast('float32').unsqueeze(0).reshape([1, 64, 1024]) position2d = position2d.tile([batch, 1, 1]) @@ -276,7 +276,7 @@ class RecurrentResidualBlock(nn.Layer): residual = self.conv2(residual) residual = self.bn2(residual) - size = paddle.shape(residual) + size = residual.shape residual = residual.reshape([size[0], size[1], -1]) residual = self.feature_enhancer(residual) residual = residual.reshape([size[0], size[1], size[2], size[3]]) diff --git a/ppocr/modeling/transforms/tps_spatial_transformer.py b/ppocr/modeling/transforms/tps_spatial_transformer.py index 35b1d8bf71..4c905ee0cd 100644 --- a/ppocr/modeling/transforms/tps_spatial_transformer.py +++ b/ppocr/modeling/transforms/tps_spatial_transformer.py @@ -152,7 +152,7 @@ class TPSSpatialTransformer(nn.Layer): assert source_control_points.ndimension() == 3 assert source_control_points.shape[1] == self.num_control_points assert source_control_points.shape[2] == 2 - batch_size = paddle.shape(source_control_points)[0] + batch_size = source_control_points.shape[0] padding_matrix = paddle.expand( self.padding_matrix, shape=[batch_size, 3, 2])