adding TODOs for NLP module, so students can implement other tasks easier (#321)

* fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module
2025-11-13 00:24:23 +00:00 · 2021-12-03 12:45:16 -05:00 · 2021-12-03 12:45:16 -05:00 · fb59bb9928
commit fb59bb9928
parent c57954fbbd
9 changed files with 214 additions and 63 deletions
--- a/flaml/data.py
+++ b/flaml/data.py
@ -12,6 +12,7 @@ from .training_log import training_log_reader
 from datetime import datetime
 from typing import Dict, Union, List
 # TODO: if your task is not specified in here, define your task as an all-capitalized word
 SEQCLASSIFICATION = "seq-classification"
 CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION)
 SEQREGRESSION = "seq-regression"
@ -20,10 +21,16 @@ TS_FORECAST = "ts_forecast"
 TS_TIMESTAMP_COL = "ds"
 TS_VALUE_COL = "y"
 FORECAST = "forecast"
 SUMMARIZATION = "summarization"
 NLG_TASKS = (SUMMARIZATION,)
 NLU_TASKS = (
    SEQREGRESSION,
    SEQCLASSIFICATION,
 )
 def _is_nlp_task(task):
-    if task in [SEQCLASSIFICATION, SEQREGRESSION]:
+    if task in NLU_TASKS + NLG_TASKS:
        return True
    else:
        return False
--- a/flaml/model.py
+++ b/flaml/model.py
@ -23,6 +23,8 @@ from .data import (
    TS_FORECAST,
    TS_TIMESTAMP_COL,
    TS_VALUE_COL,
    SEQCLASSIFICATION,
    SEQREGRESSION,
 )
 import pandas as pd
@ -303,8 +305,8 @@ class TransformersEstimator(BaseEstimator):
        return train_df
    @classmethod
-    def search_space(cls, **params):
+    def search_space(cls, data_size, task, **params):
-        return {
+        search_space_dict = {
            "learning_rate": {
                "domain": tune.loguniform(lower=1e-6, upper=1e-3),
                "init_value": 1e-5,
@ -331,6 +333,14 @@ class TransformersEstimator(BaseEstimator):
            "seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
            "global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
        }
        #   TODO: if self._task == SUMMARIZATION, uncomment the code below, SET the search space for
        #    "num_beams" in search_space_dict using
        #    search_space_dict["num_beams"] = {...}
        # if task in NLG_TASKS:
        #     search_space_dict["num_beams"] = {"domain": tune.choice(...)}
        return search_space_dict
    def _init_hpo_args(self, automl_fit_kwargs: dict = None):
        from .nlp.utils import HPOArgs
@ -356,7 +366,15 @@ class TransformersEstimator(BaseEstimator):
    def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
        from transformers import EarlyStoppingCallback
        from transformers.trainer_utils import set_seed
-        from transformers import AutoTokenizer, TrainingArguments
+        from transformers import AutoTokenizer
        #   TODO: if self._task == SUMMARIZATION, uncomment the code below (add indentation before
        #         from transformers import TrainingArguments)
        # if self._task in NLG_TASKS:
        #     from transformers import Seq2SeqTrainingArguments as TrainingArguments
        # else:
        from transformers import TrainingArguments
        import transformers
        from datasets import Dataset
        from .nlp.utils import (
@ -367,6 +385,13 @@ class TransformersEstimator(BaseEstimator):
            get_trial_fold_name,
            date_str,
        )
        # TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
        #  from .nlp.huggingface.trainer import TrainerForAuto)
        # if self._task in NLG_TASKS:
        #     from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
        # else:
        from .nlp.huggingface.trainer import TrainerForAuto
        this_params = self.params
@ -414,6 +439,13 @@ class TransformersEstimator(BaseEstimator):
        X_train = self._preprocess(X_train, self._task, **kwargs)
        train_dataset = Dataset.from_pandas(self._join(X_train, y_train))
        # TODO: set a breakpoint here, observe the resulting train_dataset,
        #  compare it with the output of the tokenized results in your transformer example
        #  for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with
        #  the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
        #  make sure they are the same
        if X_val is not None:
            X_val = self._preprocess(X_val, self._task, **kwargs)
            eval_dataset = Dataset.from_pandas(self._join(X_val, y_val))
@ -528,6 +560,7 @@ class TransformersEstimator(BaseEstimator):
                logger.warning("checkpoint {} not found".format(ckpt_location))
    def cleanup(self):
        super().cleanup()
        if hasattr(self, "_ckpt_remains"):
            for each_ckpt in self._ckpt_remains:
                self._delete_one_ckpt(each_ckpt)
@ -558,7 +591,6 @@ class TransformersEstimator(BaseEstimator):
    def _compute_metrics_by_dataset_name(self, eval_pred):
        from .ml import sklearn_metric_loss_score
        from .data import SEQREGRESSION
        import datasets
        from .nlp.utils import load_default_huggingface_metric_for_task
@ -638,7 +670,13 @@ class TransformersEstimator(BaseEstimator):
        self._model = TrainerForAuto(model=best_model, args=training_args)
        predictions = self._model.predict(test_dataset)
-        return np.argmax(predictions.predictions, axis=1)
+        if self._task == SEQCLASSIFICATION:
            return np.argmax(predictions.predictions, axis=1)
        elif self._task == SEQREGRESSION:
            return predictions.predictions
        # TODO: elif self._task == your task, return the corresponding prediction
        #  e.g., if your task == QUESTIONANSWERING, you need to return the answer instead
        #  of the index
    def config2params(cls, config: dict) -> dict:
        params = config.copy()
--- a/flaml/nlp/huggingface/trainer.py
+++ b/flaml/nlp/huggingface/trainer.py
@ -2,12 +2,19 @@ import os
 try:
    from transformers import Trainer as TFTrainer
    from transformers import Seq2SeqTrainer
 except ImportError:
    TFTrainer = object
 class TrainerForAuto(TFTrainer):
-    def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
+    def evaluate(
        self,
        eval_dataset=None,
        ignore_keys=None,
        metric_key_prefix="eval",
        is_seq2seq=False,
    ):
        """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
        from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
@ -15,8 +22,21 @@ class TrainerForAuto(TFTrainer):
            self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
        )
        eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
        # TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
        # if is_seq2seq:
        #     metrics = eval_dataset and super().evaluate(
        #         eval_dataset,
        #         ignore_keys,
        #         metric_key_prefix,
        #         num_beams=self.args.num_beams,
        #     )
        # else:
        metrics = eval_dataset and super().evaluate(
-            eval_dataset, ignore_keys, metric_key_prefix
+            eval_dataset,
            ignore_keys,
            metric_key_prefix,
        )
        if metrics:
            for key in list(metrics.keys()):
@ -29,3 +49,27 @@ class TrainerForAuto(TFTrainer):
        else:
            self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
            self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
 # TODO: if your task is SUMMARIZATION, you need a different
 #  class Seq2SeqTrainerForAuto, uncomment the code below
 #  Note: I have implemented it here,
 #  but I don't know whether it's correct, you need to debug
 #  Seq2SeqTrainerForAuto to make sure it's correct
 # class Seq2SeqTrainerForAuto(Seq2SeqTrainer, TrainerForAuto):
 #     def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
 #         """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
 #         super(TrainerForAuto).evaluate(
 #             eval_dataset, ignore_keys, metric_key_prefix, is_seq2seq=True
 #         )
 # TODO: if your task is QUESTIONANSWERING, uncomment the code below
 #  by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28
 # class QATrainerForAuto(TrainerForAuto):
 #     pass
 # TODO: if your task is QUESTIONANSWERING, do the post processing here
--- a/flaml/nlp/utils.py
+++ b/flaml/nlp/utils.py
@ -10,6 +10,14 @@ def load_default_huggingface_metric_for_task(task):
        return "accuracy", "max"
    elif task == SEQREGRESSION:
        return "rmse", "max"
    # TODO: elif task == your task, return the default metric name for your task,
    #  e.g., if task == MULTIPLECHOICE, return "accuracy"
    #  notice this metric name has to be in ['accuracy', 'bertscore', 'bleu', 'bleurt',
    #  'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad',
    #  'f1', 'gleu', 'glue', 'google_bleu', 'indic_glue', 'matthews_correlation',
    #  'meteor', 'pearsonr', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari',
    #  'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer',
    #  'wiki_split', 'xnli']
 global tokenized_column_names
@ -20,6 +28,11 @@ def tokenize_text(X, task, custom_hpo_task):
    if task in (SEQCLASSIFICATION, SEQREGRESSION):
        return tokenize_text_seqclassification(X, custom_hpo_task)
    # TODO: elif task == your task, return the tokenized result
    #  for example, if your task == MULTIPLE CHOICE, you should
    #  create a function named tokenize_text_multiplechoice(X, custom_hpo_args)
    #  and what it does is the same as preprocess_function at
    #  https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
 def tokenize_text_seqclassification(X, custom_hpo_args):
@ -79,6 +92,8 @@ def get_num_labels(task, y_train):
        return 1
    elif task == SEQCLASSIFICATION:
        return len(set(y_train))
    else:
        return None
 def _clean_value(value: Any) -> str:
@ -155,25 +170,43 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
    def get_this_model():
        from transformers import AutoModelForSequenceClassification
-        return AutoModelForSequenceClassification.from_pretrained(
+        if task in (SEQCLASSIFICATION, SEQREGRESSION):
-            checkpoint_path, config=model_config
+            return AutoModelForSequenceClassification.from_pretrained(
-        )
+                checkpoint_path, config=model_config
            )
        # TODO: elif task == your task, fill in the line in your transformers example
        #  that loads the model, e.g., if task == MULTIPLE CHOICE, according to
        #  https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L298
        #  you can return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)
    def is_pretrained_model_in_classification_head_list(model_type):
        return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
    def _set_model_config(checkpoint_path):
-        if per_model_config and len(per_model_config) > 0:
+        if task in (SEQCLASSIFICATION, SEQREGRESSION):
-            model_config = AutoConfig.from_pretrained(
+            if per_model_config and len(per_model_config) > 0:
-                checkpoint_path,
+                model_config = AutoConfig.from_pretrained(
-                num_labels=model_config_num_labels,
+                    checkpoint_path,
-                **per_model_config,
+                    num_labels=model_config_num_labels,
-            )
+                    **per_model_config,
-        else:
+                )
-            model_config = AutoConfig.from_pretrained(
+            else:
-                checkpoint_path, num_labels=model_config_num_labels
+                model_config = AutoConfig.from_pretrained(
-            )
+                    checkpoint_path, num_labels=model_config_num_labels
-        return model_config
+                )
            return model_config
        # TODO: elif task == your task, uncomment the code below:
        # else:
        #     if per_model_config and len(per_model_config) > 0:
        #         model_config = AutoConfig.from_pretrained(
        #             checkpoint_path,
        #             **per_model_config,
        #         )
        #     else:
        #         model_config = AutoConfig.from_pretrained(
        #             checkpoint_path
        #         )
        #     return model_config
    if task == SEQCLASSIFICATION:
        num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
@ -199,8 +232,9 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
            this_model = get_this_model()
        this_model.resize_token_embeddings(this_vocab_size)
        return this_model
-    elif task == SEQREGRESSION:
+    else:
-        model_config_num_labels = 1
+        if task == SEQREGRESSION:
            model_config_num_labels = 1
        model_config = _set_model_config(checkpoint_path)
        this_model = get_this_model()
        return this_model
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@ -1,22 +1,25 @@
-import os
+import sys
 import pytest
-@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
+@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
 def test_hf_data():
    from flaml import AutoML
-
+    import requests
    from datasets import load_dataset
-    train_dataset = (
+    try:
-        load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
+        train_dataset = (
-    )
+            load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
-    dev_dataset = (
+        )
-        load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
+        dev_dataset = (
-    )
+            load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
-    test_dataset = (
+        )
-        load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
+        test_dataset = (
-    )
+            load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
        )
    except requests.exceptions.ConnectionError:
        return
    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"
@ -75,12 +78,15 @@ def test_hf_data():
 def _test_custom_data():
    from flaml import AutoML
-
+    import requests
    import pandas as pd
-    train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
+    try:
-    dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
+        train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
-    test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
+        dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
        test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
    except requests.exceptions.ConnectionError:
        pass
    custom_sent_keys = ["#1 String", "#2 String"]
    label_key = "Quality"
--- a/test/nlp/test_autohf_classificationhead.py
+++ b/test/nlp/test_autohf_classificationhead.py
@ -1,10 +1,17 @@
 def test_classification_head():
    from flaml import AutoML
-
+    import requests
    from datasets import load_dataset
-    train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
+    try:
-    dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
+        train_dataset = (
            load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
        )
        dev_dataset = (
            load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
        )
    except requests.exceptions.ConnectionError:
        return
    custom_sent_keys = ["text"]
    label_key = "label"
--- a/test/nlp/test_autohf_cv.py
+++ b/test/nlp/test_autohf_cv.py
@ -1,16 +1,19 @@
-import os
+import sys
 import pytest
-@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
+@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
 def test_cv():
    from flaml import AutoML
-
+    import requests
    from datasets import load_dataset
-    train_dataset = (
+    try:
-        load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
+        train_dataset = (
-    )
+            load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
        )
    except requests.exceptions.ConnectionError:
        return
    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"
--- a/test/nlp/test_autohf_maxiter1.py
+++ b/test/nlp/test_autohf_maxiter1.py
@ -1,15 +1,20 @@
-import os
+import sys
 import pytest
-@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
+@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
 def test_max_iter_1():
    from flaml import AutoML
-
+    import requests
    from datasets import load_dataset
-    train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
+    try:
-    dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
+        train_dataset = (
            load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
        )
        dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
    except requests.exceptions.ConnectionError:
        return
    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"
--- a/test/nlp/test_autohf_regression.py
+++ b/test/nlp/test_autohf_regression.py
@ -1,23 +1,26 @@
-import os
+import sys
 import pytest
-@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
+@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
 def test_regression():
    try:
        import ray
    except ImportError:
        return
    from flaml import AutoML
-
+    import requests
    from datasets import load_dataset
-    train_dataset = (
+    try:
-        load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
+        train_dataset = (
-    )
+            load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
-    dev_dataset = (
+        )
-        load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
+        dev_dataset = (
-    )
+            load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
        )
    except requests.exceptions.ConnectionError:
        return
    custom_sent_keys = ["sentence1", "sentence2"]
    label_key = "label"
@ -50,3 +53,7 @@ def test_regression():
    automl.fit(
        X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
    )
 if __name__ == "__main__":
    test_regression()