fix NLP zero division error (#1009)

* fix NLP zero division error

* set predictions to None

* set predictions to None

* set predictions to None

* refactor

* refactor

---------

Co-authored-by: Li Jiang <lijiang1@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Li Jiang <bnujli@gmail.com>
This commit is contained in:
Susan Xueqing Liu 2023-05-03 01:50:28 -04:00 committed by GitHub
parent 31864d2d77
commit 00c30a398e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 2 deletions

View File

@ -1135,7 +1135,7 @@ class TransformersEstimator(BaseEstimator):
predictions = new_trainer.predict(test_dataset).predictions
except ZeroDivisionError:
logger.warning("Zero division error appeared in HuggingFace Transformers.")
predictions = np.array([-0.05] * len(test_dataset))
predictions = None
return predictions
def score(self, X_val: DataFrame, y_val: Series, **kwargs):
@ -1171,7 +1171,7 @@ class TransformersEstimator(BaseEstimator):
predictions = new_trainer.predict(test_dataset, **kwargs).predictions
except ZeroDivisionError:
logger.warning("Zero division error appeared in HuggingFace Transformers.")
predictions = np.array([0] * len(test_dataset))
predictions = None
post_y_pred, _ = postprocess_prediction_and_true(
task=self._task,
y_pred=predictions,

View File

@ -311,6 +311,8 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False):
def postprocess_prediction_and_true(task, y_pred, tokenizer, hf_args, y_true=None, X=None):
# postprocess the matrix prediction y_pred and ground truth y_true into user readable format, e.g., for summarization, decode into text
if y_pred is None:
return np.array([0.0] * len(X)), y_true
if task == SEQCLASSIFICATION:
return np.argmax(y_pred, axis=1), y_true
elif task == SEQREGRESSION: