mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-03 11:20:35 +00:00
adding TODOs for NLP module, so students can implement other tasks easier (#321)
* fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module
This commit is contained in:
parent
c57954fbbd
commit
fb59bb9928
@ -12,6 +12,7 @@ from .training_log import training_log_reader
|
||||
from datetime import datetime
|
||||
from typing import Dict, Union, List
|
||||
|
||||
# TODO: if your task is not specified in here, define your task as an all-capitalized word
|
||||
SEQCLASSIFICATION = "seq-classification"
|
||||
CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION)
|
||||
SEQREGRESSION = "seq-regression"
|
||||
@ -20,10 +21,16 @@ TS_FORECAST = "ts_forecast"
|
||||
TS_TIMESTAMP_COL = "ds"
|
||||
TS_VALUE_COL = "y"
|
||||
FORECAST = "forecast"
|
||||
SUMMARIZATION = "summarization"
|
||||
NLG_TASKS = (SUMMARIZATION,)
|
||||
NLU_TASKS = (
|
||||
SEQREGRESSION,
|
||||
SEQCLASSIFICATION,
|
||||
)
|
||||
|
||||
|
||||
def _is_nlp_task(task):
|
||||
if task in [SEQCLASSIFICATION, SEQREGRESSION]:
|
||||
if task in NLU_TASKS + NLG_TASKS:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
@ -23,6 +23,8 @@ from .data import (
|
||||
TS_FORECAST,
|
||||
TS_TIMESTAMP_COL,
|
||||
TS_VALUE_COL,
|
||||
SEQCLASSIFICATION,
|
||||
SEQREGRESSION,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
@ -303,8 +305,8 @@ class TransformersEstimator(BaseEstimator):
|
||||
return train_df
|
||||
|
||||
@classmethod
|
||||
def search_space(cls, **params):
|
||||
return {
|
||||
def search_space(cls, data_size, task, **params):
|
||||
search_space_dict = {
|
||||
"learning_rate": {
|
||||
"domain": tune.loguniform(lower=1e-6, upper=1e-3),
|
||||
"init_value": 1e-5,
|
||||
@ -331,6 +333,14 @@ class TransformersEstimator(BaseEstimator):
|
||||
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
|
||||
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
|
||||
}
|
||||
# TODO: if self._task == SUMMARIZATION, uncomment the code below, SET the search space for
|
||||
# "num_beams" in search_space_dict using
|
||||
# search_space_dict["num_beams"] = {...}
|
||||
|
||||
# if task in NLG_TASKS:
|
||||
# search_space_dict["num_beams"] = {"domain": tune.choice(...)}
|
||||
|
||||
return search_space_dict
|
||||
|
||||
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
|
||||
from .nlp.utils import HPOArgs
|
||||
@ -356,7 +366,15 @@ class TransformersEstimator(BaseEstimator):
|
||||
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
||||
from transformers import EarlyStoppingCallback
|
||||
from transformers.trainer_utils import set_seed
|
||||
from transformers import AutoTokenizer, TrainingArguments
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
# TODO: if self._task == SUMMARIZATION, uncomment the code below (add indentation before
|
||||
# from transformers import TrainingArguments)
|
||||
# if self._task in NLG_TASKS:
|
||||
# from transformers import Seq2SeqTrainingArguments as TrainingArguments
|
||||
# else:
|
||||
from transformers import TrainingArguments
|
||||
|
||||
import transformers
|
||||
from datasets import Dataset
|
||||
from .nlp.utils import (
|
||||
@ -367,6 +385,13 @@ class TransformersEstimator(BaseEstimator):
|
||||
get_trial_fold_name,
|
||||
date_str,
|
||||
)
|
||||
|
||||
# TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
|
||||
# from .nlp.huggingface.trainer import TrainerForAuto)
|
||||
|
||||
# if self._task in NLG_TASKS:
|
||||
# from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
|
||||
# else:
|
||||
from .nlp.huggingface.trainer import TrainerForAuto
|
||||
|
||||
this_params = self.params
|
||||
@ -414,6 +439,13 @@ class TransformersEstimator(BaseEstimator):
|
||||
|
||||
X_train = self._preprocess(X_train, self._task, **kwargs)
|
||||
train_dataset = Dataset.from_pandas(self._join(X_train, y_train))
|
||||
|
||||
# TODO: set a breakpoint here, observe the resulting train_dataset,
|
||||
# compare it with the output of the tokenized results in your transformer example
|
||||
# for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with
|
||||
# the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
|
||||
# make sure they are the same
|
||||
|
||||
if X_val is not None:
|
||||
X_val = self._preprocess(X_val, self._task, **kwargs)
|
||||
eval_dataset = Dataset.from_pandas(self._join(X_val, y_val))
|
||||
@ -528,6 +560,7 @@ class TransformersEstimator(BaseEstimator):
|
||||
logger.warning("checkpoint {} not found".format(ckpt_location))
|
||||
|
||||
def cleanup(self):
|
||||
super().cleanup()
|
||||
if hasattr(self, "_ckpt_remains"):
|
||||
for each_ckpt in self._ckpt_remains:
|
||||
self._delete_one_ckpt(each_ckpt)
|
||||
@ -558,7 +591,6 @@ class TransformersEstimator(BaseEstimator):
|
||||
|
||||
def _compute_metrics_by_dataset_name(self, eval_pred):
|
||||
from .ml import sklearn_metric_loss_score
|
||||
from .data import SEQREGRESSION
|
||||
import datasets
|
||||
from .nlp.utils import load_default_huggingface_metric_for_task
|
||||
|
||||
@ -638,7 +670,13 @@ class TransformersEstimator(BaseEstimator):
|
||||
self._model = TrainerForAuto(model=best_model, args=training_args)
|
||||
predictions = self._model.predict(test_dataset)
|
||||
|
||||
return np.argmax(predictions.predictions, axis=1)
|
||||
if self._task == SEQCLASSIFICATION:
|
||||
return np.argmax(predictions.predictions, axis=1)
|
||||
elif self._task == SEQREGRESSION:
|
||||
return predictions.predictions
|
||||
# TODO: elif self._task == your task, return the corresponding prediction
|
||||
# e.g., if your task == QUESTIONANSWERING, you need to return the answer instead
|
||||
# of the index
|
||||
|
||||
def config2params(cls, config: dict) -> dict:
|
||||
params = config.copy()
|
||||
|
||||
@ -2,12 +2,19 @@ import os
|
||||
|
||||
try:
|
||||
from transformers import Trainer as TFTrainer
|
||||
from transformers import Seq2SeqTrainer
|
||||
except ImportError:
|
||||
TFTrainer = object
|
||||
|
||||
|
||||
class TrainerForAuto(TFTrainer):
|
||||
def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
|
||||
def evaluate(
|
||||
self,
|
||||
eval_dataset=None,
|
||||
ignore_keys=None,
|
||||
metric_key_prefix="eval",
|
||||
is_seq2seq=False,
|
||||
):
|
||||
"""Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
|
||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||
|
||||
@ -15,8 +22,21 @@ class TrainerForAuto(TFTrainer):
|
||||
self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
|
||||
)
|
||||
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
|
||||
|
||||
# TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
|
||||
|
||||
# if is_seq2seq:
|
||||
# metrics = eval_dataset and super().evaluate(
|
||||
# eval_dataset,
|
||||
# ignore_keys,
|
||||
# metric_key_prefix,
|
||||
# num_beams=self.args.num_beams,
|
||||
# )
|
||||
# else:
|
||||
metrics = eval_dataset and super().evaluate(
|
||||
eval_dataset, ignore_keys, metric_key_prefix
|
||||
eval_dataset,
|
||||
ignore_keys,
|
||||
metric_key_prefix,
|
||||
)
|
||||
if metrics:
|
||||
for key in list(metrics.keys()):
|
||||
@ -29,3 +49,27 @@ class TrainerForAuto(TFTrainer):
|
||||
else:
|
||||
self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
|
||||
self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
|
||||
|
||||
|
||||
# TODO: if your task is SUMMARIZATION, you need a different
|
||||
# class Seq2SeqTrainerForAuto, uncomment the code below
|
||||
# Note: I have implemented it here,
|
||||
# but I don't know whether it's correct, you need to debug
|
||||
# Seq2SeqTrainerForAuto to make sure it's correct
|
||||
|
||||
|
||||
# class Seq2SeqTrainerForAuto(Seq2SeqTrainer, TrainerForAuto):
|
||||
# def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
|
||||
# """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
|
||||
# super(TrainerForAuto).evaluate(
|
||||
# eval_dataset, ignore_keys, metric_key_prefix, is_seq2seq=True
|
||||
# )
|
||||
|
||||
|
||||
# TODO: if your task is QUESTIONANSWERING, uncomment the code below
|
||||
# by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28
|
||||
|
||||
|
||||
# class QATrainerForAuto(TrainerForAuto):
|
||||
# pass
|
||||
# TODO: if your task is QUESTIONANSWERING, do the post processing here
|
||||
|
||||
@ -10,6 +10,14 @@ def load_default_huggingface_metric_for_task(task):
|
||||
return "accuracy", "max"
|
||||
elif task == SEQREGRESSION:
|
||||
return "rmse", "max"
|
||||
# TODO: elif task == your task, return the default metric name for your task,
|
||||
# e.g., if task == MULTIPLECHOICE, return "accuracy"
|
||||
# notice this metric name has to be in ['accuracy', 'bertscore', 'bleu', 'bleurt',
|
||||
# 'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad',
|
||||
# 'f1', 'gleu', 'glue', 'google_bleu', 'indic_glue', 'matthews_correlation',
|
||||
# 'meteor', 'pearsonr', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari',
|
||||
# 'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer',
|
||||
# 'wiki_split', 'xnli']
|
||||
|
||||
|
||||
global tokenized_column_names
|
||||
@ -20,6 +28,11 @@ def tokenize_text(X, task, custom_hpo_task):
|
||||
|
||||
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||
return tokenize_text_seqclassification(X, custom_hpo_task)
|
||||
# TODO: elif task == your task, return the tokenized result
|
||||
# for example, if your task == MULTIPLE CHOICE, you should
|
||||
# create a function named tokenize_text_multiplechoice(X, custom_hpo_args)
|
||||
# and what it does is the same as preprocess_function at
|
||||
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
|
||||
|
||||
|
||||
def tokenize_text_seqclassification(X, custom_hpo_args):
|
||||
@ -79,6 +92,8 @@ def get_num_labels(task, y_train):
|
||||
return 1
|
||||
elif task == SEQCLASSIFICATION:
|
||||
return len(set(y_train))
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def _clean_value(value: Any) -> str:
|
||||
@ -155,25 +170,43 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
||||
def get_this_model():
|
||||
from transformers import AutoModelForSequenceClassification
|
||||
|
||||
return AutoModelForSequenceClassification.from_pretrained(
|
||||
checkpoint_path, config=model_config
|
||||
)
|
||||
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||
return AutoModelForSequenceClassification.from_pretrained(
|
||||
checkpoint_path, config=model_config
|
||||
)
|
||||
# TODO: elif task == your task, fill in the line in your transformers example
|
||||
# that loads the model, e.g., if task == MULTIPLE CHOICE, according to
|
||||
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L298
|
||||
# you can return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)
|
||||
|
||||
def is_pretrained_model_in_classification_head_list(model_type):
|
||||
return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
|
||||
|
||||
def _set_model_config(checkpoint_path):
|
||||
if per_model_config and len(per_model_config) > 0:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path,
|
||||
num_labels=model_config_num_labels,
|
||||
**per_model_config,
|
||||
)
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path, num_labels=model_config_num_labels
|
||||
)
|
||||
return model_config
|
||||
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||
if per_model_config and len(per_model_config) > 0:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path,
|
||||
num_labels=model_config_num_labels,
|
||||
**per_model_config,
|
||||
)
|
||||
else:
|
||||
model_config = AutoConfig.from_pretrained(
|
||||
checkpoint_path, num_labels=model_config_num_labels
|
||||
)
|
||||
return model_config
|
||||
# TODO: elif task == your task, uncomment the code below:
|
||||
# else:
|
||||
# if per_model_config and len(per_model_config) > 0:
|
||||
# model_config = AutoConfig.from_pretrained(
|
||||
# checkpoint_path,
|
||||
# **per_model_config,
|
||||
# )
|
||||
# else:
|
||||
# model_config = AutoConfig.from_pretrained(
|
||||
# checkpoint_path
|
||||
# )
|
||||
# return model_config
|
||||
|
||||
if task == SEQCLASSIFICATION:
|
||||
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
|
||||
@ -199,8 +232,9 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
||||
this_model = get_this_model()
|
||||
this_model.resize_token_embeddings(this_vocab_size)
|
||||
return this_model
|
||||
elif task == SEQREGRESSION:
|
||||
model_config_num_labels = 1
|
||||
else:
|
||||
if task == SEQREGRESSION:
|
||||
model_config_num_labels = 1
|
||||
model_config = _set_model_config(checkpoint_path)
|
||||
this_model = get_this_model()
|
||||
return this_model
|
||||
|
||||
@ -1,22 +1,25 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_hf_data():
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
test_dataset = (
|
||||
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
try:
|
||||
train_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
test_dataset = (
|
||||
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
@ -75,12 +78,15 @@ def test_hf_data():
|
||||
|
||||
def _test_custom_data():
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
|
||||
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
|
||||
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
|
||||
try:
|
||||
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
|
||||
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
|
||||
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
|
||||
except requests.exceptions.ConnectionError:
|
||||
pass
|
||||
|
||||
custom_sent_keys = ["#1 String", "#2 String"]
|
||||
label_key = "Quality"
|
||||
|
||||
@ -1,10 +1,17 @@
|
||||
def test_classification_head():
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
|
||||
dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
|
||||
try:
|
||||
train_dataset = (
|
||||
load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["text"]
|
||||
label_key = "label"
|
||||
|
||||
@ -1,16 +1,19 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_cv():
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
try:
|
||||
train_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
|
||||
@ -1,15 +1,20 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_max_iter_1():
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||
try:
|
||||
train_dataset = (
|
||||
load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||
)
|
||||
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
|
||||
@ -1,23 +1,26 @@
|
||||
import os
|
||||
import sys
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||
def test_regression():
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
return
|
||||
from flaml import AutoML
|
||||
|
||||
import requests
|
||||
from datasets import load_dataset
|
||||
|
||||
train_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
|
||||
)
|
||||
try:
|
||||
train_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
|
||||
)
|
||||
dev_dataset = (
|
||||
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
return
|
||||
|
||||
custom_sent_keys = ["sentence1", "sentence2"]
|
||||
label_key = "label"
|
||||
@ -50,3 +53,7 @@ def test_regression():
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_regression()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user