This commit is contained in:
co63oc 2025-03-04 14:20:13 +08:00 committed by GitHub
parent de12ece0aa
commit 78ec762aac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 44 additions and 44 deletions

View File

@ -10,7 +10,7 @@
#define PADDLE_WITH_CUDA #define PADDLE_WITH_CUDA
#define CHECK_INPUT_SAME(x1, x2) \ #define CHECK_INPUT_SAME(x1, x2) \
PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") PD_CHECK(x1.place() == x2.place(), "input must be same place.")
#define CHECK_INPUT_CPU(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.") #define CHECK_INPUT_CPU(x) PD_CHECK(x.is_cpu(), #x " must be a CPU Tensor.")
template <typename T> struct PreCalc { template <typename T> struct PreCalc {

View File

@ -1026,14 +1026,14 @@ class CTCDKDLoss(nn.Layer):
pred_student = F.softmax(logits_student / self.temperature, axis=-1) pred_student = F.softmax(logits_student / self.temperature, axis=-1)
pred_teacher = F.softmax(logits_teacher / self.temperature, axis=-1) pred_teacher = F.softmax(logits_teacher / self.temperature, axis=-1)
# differents with dkd # differences with dkd
pred_student = paddle.mean(pred_student, axis=1) pred_student = paddle.mean(pred_student, axis=1)
pred_teacher = paddle.mean(pred_teacher, axis=1) pred_teacher = paddle.mean(pred_teacher, axis=1)
pred_student = self._cat_mask(pred_student, gt_mask, other_mask) pred_student = self._cat_mask(pred_student, gt_mask, other_mask)
pred_teacher = self._cat_mask(pred_teacher, gt_mask, other_mask) pred_teacher = self._cat_mask(pred_teacher, gt_mask, other_mask)
# differents with dkd # differences with dkd
tckd_loss = self.kl_loss(pred_student, pred_teacher) tckd_loss = self.kl_loss(pred_student, pred_teacher)
gt_mask_ex = paddle.expand_as(gt_mask.unsqueeze(axis=1), logits_teacher) gt_mask_ex = paddle.expand_as(gt_mask.unsqueeze(axis=1), logits_teacher)
@ -1043,11 +1043,11 @@ class CTCDKDLoss(nn.Layer):
pred_student_part2 = F.softmax( pred_student_part2 = F.softmax(
logits_student / self.temperature - 1000.0 * gt_mask_ex, axis=-1 logits_student / self.temperature - 1000.0 * gt_mask_ex, axis=-1
) )
# differents with dkd # differences with dkd
pred_teacher_part2 = paddle.mean(pred_teacher_part2, axis=1) pred_teacher_part2 = paddle.mean(pred_teacher_part2, axis=1)
pred_student_part2 = paddle.mean(pred_student_part2, axis=1) pred_student_part2 = paddle.mean(pred_student_part2, axis=1)
# differents with dkd # differences with dkd
nckd_loss = self.kl_loss(pred_student_part2, pred_teacher_part2) nckd_loss = self.kl_loss(pred_student_part2, pred_teacher_part2)
loss = self.alpha * tckd_loss + self.beta * nckd_loss loss = self.alpha * tckd_loss + self.beta * nckd_loss
return loss return loss

View File

@ -36,7 +36,7 @@ class BaseModel(nn.Layer):
model_type = config["model_type"] model_type = config["model_type"]
# build transform, # build transform,
# for rec, transform can be TPS,None # for rec, transform can be TPS,None
# for det and cls, transform shoule to be None, # for det and cls, transform should to be None,
# if you make model differently, you can use transform in det and cls # if you make model differently, you can use transform in det and cls
if "Transform" not in config or config["Transform"] is None: if "Transform" not in config or config["Transform"] is None:
self.use_transform = False self.use_transform = False

View File

@ -228,7 +228,7 @@ class MbConvBlock(nn.Layer):
x = F.sigmoid(x_squeezed) * x x = F.sigmoid(x_squeezed) * x
x = self._bn2(self._project_conv(x)) x = self._bn2(self._project_conv(x))
# skip conntection and drop connect # skip connection and drop connect
if self.id_skip and self._block_args.stride == 1 and self.inp == self.final_oup: if self.id_skip and self._block_args.stride == 1 and self.inp == self.final_oup:
if drop_connect_rate: if drop_connect_rate:
x = self._drop_connect(x, p=drop_connect_rate, training=self.training) x = self._drop_connect(x, p=drop_connect_rate, training=self.training)

View File

@ -521,7 +521,7 @@ class TheseusLayer(nn.Layer):
return_patterns = [stages_pattern[i] for i in return_stages] return_patterns = [stages_pattern[i] for i in return_stages]
if return_patterns: if return_patterns:
# call update_res function after the __init__ of the object has completed execution, that is, the contructing of layer or model has been completed. # call update_res function after the __init__ of the object has completed execution, that is, the constructing of layer or model has been completed.
def update_res_hook(layer, input): def update_res_hook(layer, input):
self.update_res(return_patterns) self.update_res(return_patterns)
@ -680,7 +680,7 @@ class TheseusLayer(nn.Layer):
res = self.upgrade_sublayer(layer_name, stop_grad) res = self.upgrade_sublayer(layer_name, stop_grad)
if len(res) == 0: if len(res) == 0:
msg = "Failed to stop the gradient befor the layer named '{layer_name}'" msg = "Failed to stop the gradient before the layer named '{layer_name}'"
return False return False
return True return True
@ -734,12 +734,12 @@ def save_sub_res_hook(layer, input, output):
def set_identity( def set_identity(
parent_layer: nn.Layer, layer_name: str, layer_index_list: str = None parent_layer: nn.Layer, layer_name: str, layer_index_list: str = None
) -> bool: ) -> bool:
"""set the layer specified by layer_name and layer_index_list to Indentity. """set the layer specified by layer_name and layer_index_list to Identity.
Args: Args:
parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index_list. parent_layer (nn.Layer): The parent layer of target layer specified by layer_name and layer_index_list.
layer_name (str): The name of target layer to be set to Indentity. layer_name (str): The name of target layer to be set to Identity.
layer_index_list (str, optional): The index of target layer to be set to Indentity in parent_layer. Defaults to None. layer_index_list (str, optional): The index of target layer to be set to Identity in parent_layer. Defaults to None.
Returns: Returns:
bool: True if successfully, False otherwise. bool: True if successfully, False otherwise.
@ -775,7 +775,7 @@ def parse_pattern_str(
"""parse the string type pattern. """parse the string type pattern.
Args: Args:
pattern (str): The pattern to discribe layer. pattern (str): The pattern to describe layer.
parent_layer (nn.Layer): The root layer relative to the pattern. parent_layer (nn.Layer): The root layer relative to the pattern.
Returns: Returns:
@ -806,7 +806,7 @@ def parse_pattern_str(
target_layer = getattr(parent_layer, target_layer_name, None) target_layer = getattr(parent_layer, target_layer_name, None)
if target_layer is None: if target_layer is None:
msg = f"Not found layer named('{target_layer_name}') specifed in pattern('{pattern}')." msg = f"Not found layer named('{target_layer_name}') specified in pattern('{pattern}')."
return None return None
if target_layer_index_list: if target_layer_index_list:
@ -814,7 +814,7 @@ def parse_pattern_str(
if int(target_layer_index) < 0 or int(target_layer_index) >= len( if int(target_layer_index) < 0 or int(target_layer_index) >= len(
target_layer target_layer
): ):
msg = f"Not found layer by index('{target_layer_index}') specifed in pattern('{pattern}'). The index should < {len(target_layer)} and > 0." msg = f"Not found layer by index('{target_layer_index}') specified in pattern('{pattern}'). The index should < {len(target_layer)} and > 0."
return None return None
target_layer = target_layer[target_layer_index] target_layer = target_layer[target_layer_index]

View File

@ -78,10 +78,10 @@ class BCNLanguage(nn.Layer):
embed = self.token_encoder(embed) # (B, N, C) embed = self.token_encoder(embed) # (B, N, C)
padding_mask = _get_mask(lengths, self.max_length) padding_mask = _get_mask(lengths, self.max_length)
zeros = paddle.zeros_like(embed) # (B, N, C) zeros = paddle.zeros_like(embed) # (B, N, C)
qeury = self.pos_encoder(zeros) query = self.pos_encoder(zeros)
for decoder_layer in self.decoder: for decoder_layer in self.decoder:
qeury = decoder_layer(qeury, embed, cross_mask=padding_mask) query = decoder_layer(query, embed, cross_mask=padding_mask)
output = qeury # (B, N, C) output = query # (B, N, C)
logits = self.cls(output) # (B, N, C) logits = self.cls(output) # (B, N, C)
@ -246,7 +246,7 @@ class ABINetHead(nn.Layer):
lengths = align_lengths lengths = align_lengths
lengths = paddle.clip( lengths = paddle.clip(
lengths, 2, self.max_length lengths, 2, self.max_length
) # TODO:move to langauge model ) # TODO:move to language model
l_feature, l_logits = self.language(tokens, lengths) l_feature, l_logits = self.language(tokens, lengths)
# alignment # alignment

View File

@ -216,7 +216,7 @@ class AttentionRecognitionHead(nn.Layer):
) )
state = paddle.index_select(state, index=predecessors.squeeze(), axis=1) state = paddle.index_select(state, index=predecessors.squeeze(), axis=1)
# Update sequence socres and erase scores for <eos> symbol so that they aren't expanded # Update sequence scores and erase scores for <eos> symbol so that they aren't expanded
stored_scores.append(sequence_scores.clone()) stored_scores.append(sequence_scores.clone())
y_prev = paddle.reshape(y_prev, shape=[-1, 1]) y_prev = paddle.reshape(y_prev, shape=[-1, 1])
eos_prev = paddle.full_like(y_prev, fill_value=eos) eos_prev = paddle.full_like(y_prev, fill_value=eos)

View File

@ -23,7 +23,7 @@ from paddle.nn.initializer import XavierNormal as xavier_normal_
class Transformer(nn.Layer): class Transformer(nn.Layer):
"""A transformer model. User is able to modify the attributes as needed. The architechture """A transformer model. User is able to modify the attributes as needed. The architecture
is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, is based on the paper "Attention Is All You Need". Ashish Vaswani, Noam Shazeer,
Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and
Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information Illia Polosukhin. 2017. Attention is all you need. In Advances in Neural Information

View File

@ -386,7 +386,7 @@ class ParseQHead(nn.Layer):
) )
logits = self.head(tgt_out) logits = self.head(tgt_out)
# transfer to probility # transfer to probability
logits = F.softmax(logits, axis=-1) logits = F.softmax(logits, axis=-1)
final_output = {"predict": logits} final_output = {"predict": logits}

View File

@ -990,7 +990,7 @@ class PPFormulaNet_Head(UniMERNetHead):
if isinstance(decoder_start_token_id, list): if isinstance(decoder_start_token_id, list):
if len(decoder_start_token_id) != batch_size: if len(decoder_start_token_id) != batch_size:
raise ValueError( raise ValueError(
f"`decoder_start_token_id` expcted to have length {batch_size} but got {len(decoder_start_token_id)}" f"`decoder_start_token_id` expected to have length {batch_size} but got {len(decoder_start_token_id)}"
) )
decoder_input_ids_start = paddle.to_tensor( decoder_input_ids_start = paddle.to_tensor(
decoder_start_token_id, decoder_start_token_id,

View File

@ -183,7 +183,7 @@ class GSRM(nn.Layer):
# ===== GSRM Semantic reasoning block ===== # ===== GSRM Semantic reasoning block =====
""" """
This module is achieved through bi-transformers, This module is achieved through bi-transformers,
ngram_feature1 is the froward one, ngram_fetaure2 is the backward one ngram_feature1 is the forward one, ngram_fetaure2 is the backward one
""" """
pad_idx = self.char_num pad_idx = self.char_num

View File

@ -2175,7 +2175,7 @@ class UniMERNetHead(nn.Layer):
if isinstance(decoder_start_token_id, list): if isinstance(decoder_start_token_id, list):
if len(decoder_start_token_id) != batch_size: if len(decoder_start_token_id) != batch_size:
raise ValueError( raise ValueError(
f"`decoder_start_token_id` expcted to have length {batch_size} but got {len(decoder_start_token_id)}" f"`decoder_start_token_id` expected to have length {batch_size} but got {len(decoder_start_token_id)}"
) )
decoder_input_ids_start = paddle.to_tensor( decoder_input_ids_start = paddle.to_tensor(
decoder_start_token_id, decoder_start_token_id,

View File

@ -218,7 +218,7 @@ class Transformer_Encoder(nn.Layer):
self.layer_norm = nn.LayerNorm(d_model, epsilon=1e-6) self.layer_norm = nn.LayerNorm(d_model, epsilon=1e-6)
def forward(self, enc_output, src_mask, return_attns=False): def forward(self, enc_output, src_mask, return_attns=False):
enc_output = self.dropout(self.position_enc(enc_output)) # position embeding enc_output = self.dropout(self.position_enc(enc_output)) # position embedding
for enc_layer in self.layer_stack: for enc_layer in self.layer_stack:
enc_output = enc_layer(enc_output, slf_attn_mask=src_mask) enc_output = enc_layer(enc_output, slf_attn_mask=src_mask)
enc_output = self.layer_norm(enc_output) enc_output = self.layer_norm(enc_output)

View File

@ -154,7 +154,7 @@ class TableMasterHead(nn.Layer):
class DecoderLayer(nn.Layer): class DecoderLayer(nn.Layer):
""" """
Decoder is made of self attention, srouce attention and feed forward. Decoder is made of self attention, source attention and feed forward.
""" """
def __init__(self, headers, d_model, dropout, d_ff): def __init__(self, headers, d_model, dropout, d_ff):

View File

@ -115,7 +115,7 @@ class TPSSpatialTransformer(nn.Layer):
# compute inverse matrix # compute inverse matrix
inverse_kernel = paddle.inverse(forward_kernel) inverse_kernel = paddle.inverse(forward_kernel)
# create target cordinate matrix # create target coordinate matrix
HW = self.target_height * self.target_width HW = self.target_height * self.target_width
target_coordinate = list( target_coordinate = list(
itertools.product(range(self.target_height), range(self.target_width)) itertools.product(range(self.target_height), range(self.target_width))

View File

@ -61,7 +61,7 @@ class SASTPostProcess(object):
""" """
Transfer vertical point_pairs into poly point in clockwise. Transfer vertical point_pairs into poly point in clockwise.
""" """
# constract poly # construct poly
point_num = len(point_pair_list) * 2 point_num = len(point_pair_list) * 2
point_list = [0] * point_num point_list = [0] * point_num
for idx, point_pair in enumerate(point_pair_list): for idx, point_pair in enumerate(point_pair_list):

View File

@ -464,12 +464,12 @@ class APP_Image2Doc(QWidget):
# Must set image path list and language before start # Must set image path list and language before start
self.output_dir = os.path.join( self.output_dir = os.path.join(
os.path.dirname(self.imagePaths[0]), "output" os.path.dirname(self.imagePaths[0]), "output"
) # output_dir shold be same as imagepath ) # output_dir should be same as imagepath
self._thread.setOutputDir(self.output_dir) self._thread.setOutputDir(self.output_dir)
self._thread.setImagePath(self.imagePaths) self._thread.setImagePath(self.imagePaths)
self._thread.setLang(lang) self._thread.setLang(lang)
self._thread.setPDFParser(pdfParser) self._thread.setPDFParser(pdfParser)
# disenble buttons # disable buttons
self.openFileButton.setEnabled(False) self.openFileButton.setEnabled(False)
self.startCNButton.setEnabled(False) self.startCNButton.setEnabled(False)
self.startENButton.setEnabled(False) self.startENButton.setEnabled(False)

View File

@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
conver table label to html convert table label to html
""" """
import json import json
@ -84,7 +84,7 @@ def convert(origin_gt_path, save_path):
for img_name, gt in tqdm(data_dict.items()): for img_name, gt in tqdm(data_dict.items()):
html = gen_html(gt) html = gen_html(gt)
save_pred_txt(img_name, html, save_path) save_pred_txt(img_name, html, save_path)
print("conver finish") print("convert finish")
def parse_args(): def parse_args():

View File

@ -112,7 +112,7 @@ class TEDS(object):
def __init__(self, structure_only=False, n_jobs=1, ignore_nodes=None): def __init__(self, structure_only=False, n_jobs=1, ignore_nodes=None):
assert isinstance(n_jobs, int) and ( assert isinstance(n_jobs, int) and (
n_jobs >= 1 n_jobs >= 1
), "n_jobs must be an integer greather than 1" ), "n_jobs must be an integer greater than 1"
self.structure_only = structure_only self.structure_only = structure_only
self.n_jobs = n_jobs self.n_jobs = n_jobs
self.ignore_nodes = ignore_nodes self.ignore_nodes = ignore_nodes

View File

@ -64,7 +64,7 @@ class DecodeImage(object):
class NormalizeImage(object): class NormalizeImage(object):
"""normalize image such as substract mean, divide std""" """normalize image such as subtract mean, divide std"""
def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs): def __init__(self, scale=None, mean=None, std=None, order="chw", **kwargs):
if isinstance(scale, str): if isinstance(scale, str):

View File

@ -24,7 +24,7 @@ def create_metric(
mode(str): mode, train/valid mode(str): mode, train/valid
Returns: Returns:
fetchs(dict): dict of measures fetches(dict): dict of measures
""" """
# if architecture["name"] == "GoogLeNet": # if architecture["name"] == "GoogLeNet":
# assert len(out) == 3, "GoogLeNet should have 3 outputs" # assert len(out) == 3, "GoogLeNet should have 3 outputs"
@ -35,10 +35,10 @@ def create_metric(
# out = out[1] # out = out[1]
softmax_out = F.softmax(out) softmax_out = F.softmax(out)
fetchs = OrderedDict() fetches = OrderedDict()
# set top1 to fetchs # set top1 to fetches
top1 = paddle.metric.accuracy(softmax_out, label=label, k=1) top1 = paddle.metric.accuracy(softmax_out, label=label, k=1)
# set topk to fetchs # set topk to fetches
k = min(topk, classes_num) k = min(topk, classes_num)
topk = paddle.metric.accuracy(softmax_out, label=label, k=k) topk = paddle.metric.accuracy(softmax_out, label=label, k=k)
@ -53,8 +53,8 @@ def create_metric(
/ paddle.distributed.get_world_size() / paddle.distributed.get_world_size()
) )
fetchs["top1"] = top1 fetches["top1"] = top1
topk_name = "top{}".format(k) topk_name = "top{}".format(k)
fetchs[topk_name] = topk fetches[topk_name] = topk
return fetchs return fetches

View File

@ -282,7 +282,7 @@ def create_predictor(args, mode, logger):
workspace_size=1 << 30, workspace_size=1 << 30,
precision_mode=precision, precision_mode=precision,
max_batch_size=args.max_batch_size, max_batch_size=args.max_batch_size,
min_subgraph_size=args.min_subgraph_size, # skip the minmum trt subgraph min_subgraph_size=args.min_subgraph_size, # skip the minimum trt subgraph
use_calib_mode=False, use_calib_mode=False,
) )

View File

@ -71,7 +71,7 @@ class NaiveSyncBatchNorm(nn.BatchNorm2D):
mean, meansqr = paddle.split(vec, [C, C]) mean, meansqr = paddle.split(vec, [C, C])
momentum = ( momentum = (
1 - self._momentum 1 - self._momentum
) # NOTE: paddle has reverse momentum defination ) # NOTE: paddle has reverse momentum definition
else: else:
if B == 0: if B == 0:
vec = paddle.zeros([2 * C + 1], dtype=mean.dtype) vec = paddle.zeros([2 * C + 1], dtype=mean.dtype)