adding TODOs for NLP module, so students can implement other tasks easier (#321)

* fixing ray pickle bug, skipping macosx bug, completing code for seqregression

* catching connectionerror

* ading TODOs for NLP module
This commit is contained in:
Xueqing Liu 2021-12-03 12:45:16 -05:00 committed by GitHub
parent c57954fbbd
commit fb59bb9928
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 214 additions and 63 deletions

View File

@ -12,6 +12,7 @@ from .training_log import training_log_reader
from datetime import datetime
from typing import Dict, Union, List
# TODO: if your task is not specified in here, define your task as an all-capitalized word
SEQCLASSIFICATION = "seq-classification"
CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION)
SEQREGRESSION = "seq-regression"
@ -20,10 +21,16 @@ TS_FORECAST = "ts_forecast"
TS_TIMESTAMP_COL = "ds"
TS_VALUE_COL = "y"
FORECAST = "forecast"
SUMMARIZATION = "summarization"
NLG_TASKS = (SUMMARIZATION,)
NLU_TASKS = (
SEQREGRESSION,
SEQCLASSIFICATION,
)
def _is_nlp_task(task):
if task in [SEQCLASSIFICATION, SEQREGRESSION]:
if task in NLU_TASKS + NLG_TASKS:
return True
else:
return False

View File

@ -23,6 +23,8 @@ from .data import (
TS_FORECAST,
TS_TIMESTAMP_COL,
TS_VALUE_COL,
SEQCLASSIFICATION,
SEQREGRESSION,
)
import pandas as pd
@ -303,8 +305,8 @@ class TransformersEstimator(BaseEstimator):
return train_df
@classmethod
def search_space(cls, **params):
return {
def search_space(cls, data_size, task, **params):
search_space_dict = {
"learning_rate": {
"domain": tune.loguniform(lower=1e-6, upper=1e-3),
"init_value": 1e-5,
@ -331,6 +333,14 @@ class TransformersEstimator(BaseEstimator):
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
}
# TODO: if self._task == SUMMARIZATION, uncomment the code below, SET the search space for
# "num_beams" in search_space_dict using
# search_space_dict["num_beams"] = {...}
# if task in NLG_TASKS:
# search_space_dict["num_beams"] = {"domain": tune.choice(...)}
return search_space_dict
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
from .nlp.utils import HPOArgs
@ -356,7 +366,15 @@ class TransformersEstimator(BaseEstimator):
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
from transformers import EarlyStoppingCallback
from transformers.trainer_utils import set_seed
from transformers import AutoTokenizer, TrainingArguments
from transformers import AutoTokenizer
# TODO: if self._task == SUMMARIZATION, uncomment the code below (add indentation before
# from transformers import TrainingArguments)
# if self._task in NLG_TASKS:
# from transformers import Seq2SeqTrainingArguments as TrainingArguments
# else:
from transformers import TrainingArguments
import transformers
from datasets import Dataset
from .nlp.utils import (
@ -367,6 +385,13 @@ class TransformersEstimator(BaseEstimator):
get_trial_fold_name,
date_str,
)
# TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
# from .nlp.huggingface.trainer import TrainerForAuto)
# if self._task in NLG_TASKS:
# from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
# else:
from .nlp.huggingface.trainer import TrainerForAuto
this_params = self.params
@ -414,6 +439,13 @@ class TransformersEstimator(BaseEstimator):
X_train = self._preprocess(X_train, self._task, **kwargs)
train_dataset = Dataset.from_pandas(self._join(X_train, y_train))
# TODO: set a breakpoint here, observe the resulting train_dataset,
# compare it with the output of the tokenized results in your transformer example
# for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with
# the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
# make sure they are the same
if X_val is not None:
X_val = self._preprocess(X_val, self._task, **kwargs)
eval_dataset = Dataset.from_pandas(self._join(X_val, y_val))
@ -528,6 +560,7 @@ class TransformersEstimator(BaseEstimator):
logger.warning("checkpoint {} not found".format(ckpt_location))
def cleanup(self):
super().cleanup()
if hasattr(self, "_ckpt_remains"):
for each_ckpt in self._ckpt_remains:
self._delete_one_ckpt(each_ckpt)
@ -558,7 +591,6 @@ class TransformersEstimator(BaseEstimator):
def _compute_metrics_by_dataset_name(self, eval_pred):
from .ml import sklearn_metric_loss_score
from .data import SEQREGRESSION
import datasets
from .nlp.utils import load_default_huggingface_metric_for_task
@ -638,7 +670,13 @@ class TransformersEstimator(BaseEstimator):
self._model = TrainerForAuto(model=best_model, args=training_args)
predictions = self._model.predict(test_dataset)
return np.argmax(predictions.predictions, axis=1)
if self._task == SEQCLASSIFICATION:
return np.argmax(predictions.predictions, axis=1)
elif self._task == SEQREGRESSION:
return predictions.predictions
# TODO: elif self._task == your task, return the corresponding prediction
# e.g., if your task == QUESTIONANSWERING, you need to return the answer instead
# of the index
def config2params(cls, config: dict) -> dict:
params = config.copy()

View File

@ -2,12 +2,19 @@ import os
try:
from transformers import Trainer as TFTrainer
from transformers import Seq2SeqTrainer
except ImportError:
TFTrainer = object
class TrainerForAuto(TFTrainer):
def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
def evaluate(
self,
eval_dataset=None,
ignore_keys=None,
metric_key_prefix="eval",
is_seq2seq=False,
):
"""Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
@ -15,8 +22,21 @@ class TrainerForAuto(TFTrainer):
self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
)
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
# TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
# if is_seq2seq:
# metrics = eval_dataset and super().evaluate(
# eval_dataset,
# ignore_keys,
# metric_key_prefix,
# num_beams=self.args.num_beams,
# )
# else:
metrics = eval_dataset and super().evaluate(
eval_dataset, ignore_keys, metric_key_prefix
eval_dataset,
ignore_keys,
metric_key_prefix,
)
if metrics:
for key in list(metrics.keys()):
@ -29,3 +49,27 @@ class TrainerForAuto(TFTrainer):
else:
self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
# TODO: if your task is SUMMARIZATION, you need a different
# class Seq2SeqTrainerForAuto, uncomment the code below
# Note: I have implemented it here,
# but I don't know whether it's correct, you need to debug
# Seq2SeqTrainerForAuto to make sure it's correct
# class Seq2SeqTrainerForAuto(Seq2SeqTrainer, TrainerForAuto):
# def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
# """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
# super(TrainerForAuto).evaluate(
# eval_dataset, ignore_keys, metric_key_prefix, is_seq2seq=True
# )
# TODO: if your task is QUESTIONANSWERING, uncomment the code below
# by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28
# class QATrainerForAuto(TrainerForAuto):
# pass
# TODO: if your task is QUESTIONANSWERING, do the post processing here

View File

@ -10,6 +10,14 @@ def load_default_huggingface_metric_for_task(task):
return "accuracy", "max"
elif task == SEQREGRESSION:
return "rmse", "max"
# TODO: elif task == your task, return the default metric name for your task,
# e.g., if task == MULTIPLECHOICE, return "accuracy"
# notice this metric name has to be in ['accuracy', 'bertscore', 'bleu', 'bleurt',
# 'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad',
# 'f1', 'gleu', 'glue', 'google_bleu', 'indic_glue', 'matthews_correlation',
# 'meteor', 'pearsonr', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari',
# 'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer',
# 'wiki_split', 'xnli']
global tokenized_column_names
@ -20,6 +28,11 @@ def tokenize_text(X, task, custom_hpo_task):
if task in (SEQCLASSIFICATION, SEQREGRESSION):
return tokenize_text_seqclassification(X, custom_hpo_task)
# TODO: elif task == your task, return the tokenized result
# for example, if your task == MULTIPLE CHOICE, you should
# create a function named tokenize_text_multiplechoice(X, custom_hpo_args)
# and what it does is the same as preprocess_function at
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
def tokenize_text_seqclassification(X, custom_hpo_args):
@ -79,6 +92,8 @@ def get_num_labels(task, y_train):
return 1
elif task == SEQCLASSIFICATION:
return len(set(y_train))
else:
return None
def _clean_value(value: Any) -> str:
@ -155,25 +170,43 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
def get_this_model():
from transformers import AutoModelForSequenceClassification
return AutoModelForSequenceClassification.from_pretrained(
checkpoint_path, config=model_config
)
if task in (SEQCLASSIFICATION, SEQREGRESSION):
return AutoModelForSequenceClassification.from_pretrained(
checkpoint_path, config=model_config
)
# TODO: elif task == your task, fill in the line in your transformers example
# that loads the model, e.g., if task == MULTIPLE CHOICE, according to
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L298
# you can return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)
def is_pretrained_model_in_classification_head_list(model_type):
return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
def _set_model_config(checkpoint_path):
if per_model_config and len(per_model_config) > 0:
model_config = AutoConfig.from_pretrained(
checkpoint_path,
num_labels=model_config_num_labels,
**per_model_config,
)
else:
model_config = AutoConfig.from_pretrained(
checkpoint_path, num_labels=model_config_num_labels
)
return model_config
if task in (SEQCLASSIFICATION, SEQREGRESSION):
if per_model_config and len(per_model_config) > 0:
model_config = AutoConfig.from_pretrained(
checkpoint_path,
num_labels=model_config_num_labels,
**per_model_config,
)
else:
model_config = AutoConfig.from_pretrained(
checkpoint_path, num_labels=model_config_num_labels
)
return model_config
# TODO: elif task == your task, uncomment the code below:
# else:
# if per_model_config and len(per_model_config) > 0:
# model_config = AutoConfig.from_pretrained(
# checkpoint_path,
# **per_model_config,
# )
# else:
# model_config = AutoConfig.from_pretrained(
# checkpoint_path
# )
# return model_config
if task == SEQCLASSIFICATION:
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
@ -199,8 +232,9 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
this_model = get_this_model()
this_model.resize_token_embeddings(this_vocab_size)
return this_model
elif task == SEQREGRESSION:
model_config_num_labels = 1
else:
if task == SEQREGRESSION:
model_config_num_labels = 1
model_config = _set_model_config(checkpoint_path)
this_model = get_this_model()
return this_model

View File

@ -1,22 +1,25 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_hf_data():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
dev_dataset = (
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
)
test_dataset = (
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
)
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
dev_dataset = (
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
)
test_dataset = (
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"
@ -75,12 +78,15 @@ def test_hf_data():
def _test_custom_data():
from flaml import AutoML
import requests
import pandas as pd
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
try:
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
except requests.exceptions.ConnectionError:
pass
custom_sent_keys = ["#1 String", "#2 String"]
label_key = "Quality"

View File

@ -1,10 +1,17 @@
def test_classification_head():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
try:
train_dataset = (
load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
)
dev_dataset = (
load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["text"]
label_key = "label"

View File

@ -1,16 +1,19 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_cv():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"

View File

@ -1,15 +1,20 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_max_iter_1():
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
try:
train_dataset = (
load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
)
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"

View File

@ -1,23 +1,26 @@
import os
import sys
import pytest
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
def test_regression():
try:
import ray
except ImportError:
return
from flaml import AutoML
import requests
from datasets import load_dataset
train_dataset = (
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
)
dev_dataset = (
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
)
try:
train_dataset = (
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
)
dev_dataset = (
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
)
except requests.exceptions.ConnectionError:
return
custom_sent_keys = ["sentence1", "sentence2"]
label_key = "label"
@ -50,3 +53,7 @@ def test_regression():
automl.fit(
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
)
if __name__ == "__main__":
test_regression()