From 00c30a398e6bdcb4a7790adfafdb295d13f390b5 Mon Sep 17 00:00:00 2001 From: Susan Xueqing Liu Date: Wed, 3 May 2023 01:50:28 -0400 Subject: [PATCH] fix NLP zero division error (#1009) * fix NLP zero division error * set predictions to None * set predictions to None * set predictions to None * refactor * refactor --------- Co-authored-by: Li Jiang Co-authored-by: Chi Wang Co-authored-by: Li Jiang --- flaml/automl/model.py | 4 ++-- flaml/automl/nlp/huggingface/utils.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index a04f90442..c32a89f35 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1135,7 +1135,7 @@ class TransformersEstimator(BaseEstimator): predictions = new_trainer.predict(test_dataset).predictions except ZeroDivisionError: logger.warning("Zero division error appeared in HuggingFace Transformers.") - predictions = np.array([-0.05] * len(test_dataset)) + predictions = None return predictions def score(self, X_val: DataFrame, y_val: Series, **kwargs): @@ -1171,7 +1171,7 @@ class TransformersEstimator(BaseEstimator): predictions = new_trainer.predict(test_dataset, **kwargs).predictions except ZeroDivisionError: logger.warning("Zero division error appeared in HuggingFace Transformers.") - predictions = np.array([0] * len(test_dataset)) + predictions = None post_y_pred, _ = postprocess_prediction_and_true( task=self._task, y_pred=predictions, diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py index 88a555578..9f07be368 100644 --- a/flaml/automl/nlp/huggingface/utils.py +++ b/flaml/automl/nlp/huggingface/utils.py @@ -311,6 +311,8 @@ def tokenize_swag(this_row, tokenizer, hf_args=None, return_column_name=False): def postprocess_prediction_and_true(task, y_pred, tokenizer, hf_args, y_true=None, X=None): # postprocess the matrix prediction y_pred and ground truth y_true into user readable format, e.g., for summarization, decode into text + if y_pred is None: + return np.array([0.0] * len(X)), y_true if task == SEQCLASSIFICATION: return np.argmax(y_pred, axis=1), y_true elif task == SEQREGRESSION: