mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-13 00:24:23 +00:00
adding TODOs for NLP module, so students can implement other tasks easier (#321)
* fixing ray pickle bug, skipping macosx bug, completing code for seqregression * catching connectionerror * ading TODOs for NLP module
This commit is contained in:
parent
c57954fbbd
commit
fb59bb9928
@ -12,6 +12,7 @@ from .training_log import training_log_reader
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Dict, Union, List
|
from typing import Dict, Union, List
|
||||||
|
|
||||||
|
# TODO: if your task is not specified in here, define your task as an all-capitalized word
|
||||||
SEQCLASSIFICATION = "seq-classification"
|
SEQCLASSIFICATION = "seq-classification"
|
||||||
CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION)
|
CLASSIFICATION = ("binary", "multi", "classification", SEQCLASSIFICATION)
|
||||||
SEQREGRESSION = "seq-regression"
|
SEQREGRESSION = "seq-regression"
|
||||||
@ -20,10 +21,16 @@ TS_FORECAST = "ts_forecast"
|
|||||||
TS_TIMESTAMP_COL = "ds"
|
TS_TIMESTAMP_COL = "ds"
|
||||||
TS_VALUE_COL = "y"
|
TS_VALUE_COL = "y"
|
||||||
FORECAST = "forecast"
|
FORECAST = "forecast"
|
||||||
|
SUMMARIZATION = "summarization"
|
||||||
|
NLG_TASKS = (SUMMARIZATION,)
|
||||||
|
NLU_TASKS = (
|
||||||
|
SEQREGRESSION,
|
||||||
|
SEQCLASSIFICATION,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _is_nlp_task(task):
|
def _is_nlp_task(task):
|
||||||
if task in [SEQCLASSIFICATION, SEQREGRESSION]:
|
if task in NLU_TASKS + NLG_TASKS:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|||||||
@ -23,6 +23,8 @@ from .data import (
|
|||||||
TS_FORECAST,
|
TS_FORECAST,
|
||||||
TS_TIMESTAMP_COL,
|
TS_TIMESTAMP_COL,
|
||||||
TS_VALUE_COL,
|
TS_VALUE_COL,
|
||||||
|
SEQCLASSIFICATION,
|
||||||
|
SEQREGRESSION,
|
||||||
)
|
)
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
@ -303,8 +305,8 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
return train_df
|
return train_df
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def search_space(cls, **params):
|
def search_space(cls, data_size, task, **params):
|
||||||
return {
|
search_space_dict = {
|
||||||
"learning_rate": {
|
"learning_rate": {
|
||||||
"domain": tune.loguniform(lower=1e-6, upper=1e-3),
|
"domain": tune.loguniform(lower=1e-6, upper=1e-3),
|
||||||
"init_value": 1e-5,
|
"init_value": 1e-5,
|
||||||
@ -331,6 +333,14 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
|
"seed": {"domain": tune.choice(list(range(40, 45))), "init_value": 42},
|
||||||
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
|
"global_max_steps": {"domain": sys.maxsize, "init_value": sys.maxsize},
|
||||||
}
|
}
|
||||||
|
# TODO: if self._task == SUMMARIZATION, uncomment the code below, SET the search space for
|
||||||
|
# "num_beams" in search_space_dict using
|
||||||
|
# search_space_dict["num_beams"] = {...}
|
||||||
|
|
||||||
|
# if task in NLG_TASKS:
|
||||||
|
# search_space_dict["num_beams"] = {"domain": tune.choice(...)}
|
||||||
|
|
||||||
|
return search_space_dict
|
||||||
|
|
||||||
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
|
def _init_hpo_args(self, automl_fit_kwargs: dict = None):
|
||||||
from .nlp.utils import HPOArgs
|
from .nlp.utils import HPOArgs
|
||||||
@ -356,7 +366,15 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
||||||
from transformers import EarlyStoppingCallback
|
from transformers import EarlyStoppingCallback
|
||||||
from transformers.trainer_utils import set_seed
|
from transformers.trainer_utils import set_seed
|
||||||
from transformers import AutoTokenizer, TrainingArguments
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
|
# TODO: if self._task == SUMMARIZATION, uncomment the code below (add indentation before
|
||||||
|
# from transformers import TrainingArguments)
|
||||||
|
# if self._task in NLG_TASKS:
|
||||||
|
# from transformers import Seq2SeqTrainingArguments as TrainingArguments
|
||||||
|
# else:
|
||||||
|
from transformers import TrainingArguments
|
||||||
|
|
||||||
import transformers
|
import transformers
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from .nlp.utils import (
|
from .nlp.utils import (
|
||||||
@ -367,6 +385,13 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
get_trial_fold_name,
|
get_trial_fold_name,
|
||||||
date_str,
|
date_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# TODO: if self._task == QUESTIONANSWERING, uncomment the code below (add indentation before
|
||||||
|
# from .nlp.huggingface.trainer import TrainerForAuto)
|
||||||
|
|
||||||
|
# if self._task in NLG_TASKS:
|
||||||
|
# from .nlp.huggingface.trainer import Seq2SeqTrainerForAuto as TrainerForAuto
|
||||||
|
# else:
|
||||||
from .nlp.huggingface.trainer import TrainerForAuto
|
from .nlp.huggingface.trainer import TrainerForAuto
|
||||||
|
|
||||||
this_params = self.params
|
this_params = self.params
|
||||||
@ -414,6 +439,13 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
|
|
||||||
X_train = self._preprocess(X_train, self._task, **kwargs)
|
X_train = self._preprocess(X_train, self._task, **kwargs)
|
||||||
train_dataset = Dataset.from_pandas(self._join(X_train, y_train))
|
train_dataset = Dataset.from_pandas(self._join(X_train, y_train))
|
||||||
|
|
||||||
|
# TODO: set a breakpoint here, observe the resulting train_dataset,
|
||||||
|
# compare it with the output of the tokenized results in your transformer example
|
||||||
|
# for example, if your task is MULTIPLECHOICE, you need to compare train_dataset with
|
||||||
|
# the output of https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
|
||||||
|
# make sure they are the same
|
||||||
|
|
||||||
if X_val is not None:
|
if X_val is not None:
|
||||||
X_val = self._preprocess(X_val, self._task, **kwargs)
|
X_val = self._preprocess(X_val, self._task, **kwargs)
|
||||||
eval_dataset = Dataset.from_pandas(self._join(X_val, y_val))
|
eval_dataset = Dataset.from_pandas(self._join(X_val, y_val))
|
||||||
@ -528,6 +560,7 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
logger.warning("checkpoint {} not found".format(ckpt_location))
|
logger.warning("checkpoint {} not found".format(ckpt_location))
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
|
super().cleanup()
|
||||||
if hasattr(self, "_ckpt_remains"):
|
if hasattr(self, "_ckpt_remains"):
|
||||||
for each_ckpt in self._ckpt_remains:
|
for each_ckpt in self._ckpt_remains:
|
||||||
self._delete_one_ckpt(each_ckpt)
|
self._delete_one_ckpt(each_ckpt)
|
||||||
@ -558,7 +591,6 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
|
|
||||||
def _compute_metrics_by_dataset_name(self, eval_pred):
|
def _compute_metrics_by_dataset_name(self, eval_pred):
|
||||||
from .ml import sklearn_metric_loss_score
|
from .ml import sklearn_metric_loss_score
|
||||||
from .data import SEQREGRESSION
|
|
||||||
import datasets
|
import datasets
|
||||||
from .nlp.utils import load_default_huggingface_metric_for_task
|
from .nlp.utils import load_default_huggingface_metric_for_task
|
||||||
|
|
||||||
@ -638,7 +670,13 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
self._model = TrainerForAuto(model=best_model, args=training_args)
|
self._model = TrainerForAuto(model=best_model, args=training_args)
|
||||||
predictions = self._model.predict(test_dataset)
|
predictions = self._model.predict(test_dataset)
|
||||||
|
|
||||||
return np.argmax(predictions.predictions, axis=1)
|
if self._task == SEQCLASSIFICATION:
|
||||||
|
return np.argmax(predictions.predictions, axis=1)
|
||||||
|
elif self._task == SEQREGRESSION:
|
||||||
|
return predictions.predictions
|
||||||
|
# TODO: elif self._task == your task, return the corresponding prediction
|
||||||
|
# e.g., if your task == QUESTIONANSWERING, you need to return the answer instead
|
||||||
|
# of the index
|
||||||
|
|
||||||
def config2params(cls, config: dict) -> dict:
|
def config2params(cls, config: dict) -> dict:
|
||||||
params = config.copy()
|
params = config.copy()
|
||||||
|
|||||||
@ -2,12 +2,19 @@ import os
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from transformers import Trainer as TFTrainer
|
from transformers import Trainer as TFTrainer
|
||||||
|
from transformers import Seq2SeqTrainer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
TFTrainer = object
|
TFTrainer = object
|
||||||
|
|
||||||
|
|
||||||
class TrainerForAuto(TFTrainer):
|
class TrainerForAuto(TFTrainer):
|
||||||
def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
|
def evaluate(
|
||||||
|
self,
|
||||||
|
eval_dataset=None,
|
||||||
|
ignore_keys=None,
|
||||||
|
metric_key_prefix="eval",
|
||||||
|
is_seq2seq=False,
|
||||||
|
):
|
||||||
"""Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
|
"""Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
|
||||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||||
|
|
||||||
@ -15,8 +22,21 @@ class TrainerForAuto(TFTrainer):
|
|||||||
self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
|
self.args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
|
||||||
)
|
)
|
||||||
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
|
eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset
|
||||||
|
|
||||||
|
# TODO: if your task is seq2seq (i.e., SUMMARIZATION), uncomment the code below (add indentation before metrics = eval_dataset...
|
||||||
|
|
||||||
|
# if is_seq2seq:
|
||||||
|
# metrics = eval_dataset and super().evaluate(
|
||||||
|
# eval_dataset,
|
||||||
|
# ignore_keys,
|
||||||
|
# metric_key_prefix,
|
||||||
|
# num_beams=self.args.num_beams,
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
metrics = eval_dataset and super().evaluate(
|
metrics = eval_dataset and super().evaluate(
|
||||||
eval_dataset, ignore_keys, metric_key_prefix
|
eval_dataset,
|
||||||
|
ignore_keys,
|
||||||
|
metric_key_prefix,
|
||||||
)
|
)
|
||||||
if metrics:
|
if metrics:
|
||||||
for key in list(metrics.keys()):
|
for key in list(metrics.keys()):
|
||||||
@ -29,3 +49,27 @@ class TrainerForAuto(TFTrainer):
|
|||||||
else:
|
else:
|
||||||
self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
|
self.ckpt_to_global_step = {ckpt_dir: self.state.global_step}
|
||||||
self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
|
self.ckpt_to_metric = {ckpt_dir: metrics} if metrics else {}
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: if your task is SUMMARIZATION, you need a different
|
||||||
|
# class Seq2SeqTrainerForAuto, uncomment the code below
|
||||||
|
# Note: I have implemented it here,
|
||||||
|
# but I don't know whether it's correct, you need to debug
|
||||||
|
# Seq2SeqTrainerForAuto to make sure it's correct
|
||||||
|
|
||||||
|
|
||||||
|
# class Seq2SeqTrainerForAuto(Seq2SeqTrainer, TrainerForAuto):
|
||||||
|
# def evaluate(self, eval_dataset=None, ignore_keys=None, metric_key_prefix="eval"):
|
||||||
|
# """Overriding transformers.Trainer.evaluate by saving metrics and checkpoint path"""
|
||||||
|
# super(TrainerForAuto).evaluate(
|
||||||
|
# eval_dataset, ignore_keys, metric_key_prefix, is_seq2seq=True
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: if your task is QUESTIONANSWERING, uncomment the code below
|
||||||
|
# by adapting the code in https://github.com/huggingface/transformers/blob/master/examples/pytorch/question-answering/trainer_qa.py#L28
|
||||||
|
|
||||||
|
|
||||||
|
# class QATrainerForAuto(TrainerForAuto):
|
||||||
|
# pass
|
||||||
|
# TODO: if your task is QUESTIONANSWERING, do the post processing here
|
||||||
|
|||||||
@ -10,6 +10,14 @@ def load_default_huggingface_metric_for_task(task):
|
|||||||
return "accuracy", "max"
|
return "accuracy", "max"
|
||||||
elif task == SEQREGRESSION:
|
elif task == SEQREGRESSION:
|
||||||
return "rmse", "max"
|
return "rmse", "max"
|
||||||
|
# TODO: elif task == your task, return the default metric name for your task,
|
||||||
|
# e.g., if task == MULTIPLECHOICE, return "accuracy"
|
||||||
|
# notice this metric name has to be in ['accuracy', 'bertscore', 'bleu', 'bleurt',
|
||||||
|
# 'cer', 'chrf', 'code_eval', 'comet', 'competition_math', 'coval', 'cuad',
|
||||||
|
# 'f1', 'gleu', 'glue', 'google_bleu', 'indic_glue', 'matthews_correlation',
|
||||||
|
# 'meteor', 'pearsonr', 'precision', 'recall', 'rouge', 'sacrebleu', 'sari',
|
||||||
|
# 'seqeval', 'spearmanr', 'squad', 'squad_v2', 'super_glue', 'ter', 'wer',
|
||||||
|
# 'wiki_split', 'xnli']
|
||||||
|
|
||||||
|
|
||||||
global tokenized_column_names
|
global tokenized_column_names
|
||||||
@ -20,6 +28,11 @@ def tokenize_text(X, task, custom_hpo_task):
|
|||||||
|
|
||||||
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||||
return tokenize_text_seqclassification(X, custom_hpo_task)
|
return tokenize_text_seqclassification(X, custom_hpo_task)
|
||||||
|
# TODO: elif task == your task, return the tokenized result
|
||||||
|
# for example, if your task == MULTIPLE CHOICE, you should
|
||||||
|
# create a function named tokenize_text_multiplechoice(X, custom_hpo_args)
|
||||||
|
# and what it does is the same as preprocess_function at
|
||||||
|
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L329
|
||||||
|
|
||||||
|
|
||||||
def tokenize_text_seqclassification(X, custom_hpo_args):
|
def tokenize_text_seqclassification(X, custom_hpo_args):
|
||||||
@ -79,6 +92,8 @@ def get_num_labels(task, y_train):
|
|||||||
return 1
|
return 1
|
||||||
elif task == SEQCLASSIFICATION:
|
elif task == SEQCLASSIFICATION:
|
||||||
return len(set(y_train))
|
return len(set(y_train))
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def _clean_value(value: Any) -> str:
|
def _clean_value(value: Any) -> str:
|
||||||
@ -155,25 +170,43 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
|||||||
def get_this_model():
|
def get_this_model():
|
||||||
from transformers import AutoModelForSequenceClassification
|
from transformers import AutoModelForSequenceClassification
|
||||||
|
|
||||||
return AutoModelForSequenceClassification.from_pretrained(
|
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||||
checkpoint_path, config=model_config
|
return AutoModelForSequenceClassification.from_pretrained(
|
||||||
)
|
checkpoint_path, config=model_config
|
||||||
|
)
|
||||||
|
# TODO: elif task == your task, fill in the line in your transformers example
|
||||||
|
# that loads the model, e.g., if task == MULTIPLE CHOICE, according to
|
||||||
|
# https://github.com/huggingface/transformers/blob/master/examples/pytorch/multiple-choice/run_swag.py#L298
|
||||||
|
# you can return AutoModelForMultipleChoice.from_pretrained(checkpoint_path, config=model_config)
|
||||||
|
|
||||||
def is_pretrained_model_in_classification_head_list(model_type):
|
def is_pretrained_model_in_classification_head_list(model_type):
|
||||||
return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
|
return model_type in MODEL_CLASSIFICATION_HEAD_MAPPING
|
||||||
|
|
||||||
def _set_model_config(checkpoint_path):
|
def _set_model_config(checkpoint_path):
|
||||||
if per_model_config and len(per_model_config) > 0:
|
if task in (SEQCLASSIFICATION, SEQREGRESSION):
|
||||||
model_config = AutoConfig.from_pretrained(
|
if per_model_config and len(per_model_config) > 0:
|
||||||
checkpoint_path,
|
model_config = AutoConfig.from_pretrained(
|
||||||
num_labels=model_config_num_labels,
|
checkpoint_path,
|
||||||
**per_model_config,
|
num_labels=model_config_num_labels,
|
||||||
)
|
**per_model_config,
|
||||||
else:
|
)
|
||||||
model_config = AutoConfig.from_pretrained(
|
else:
|
||||||
checkpoint_path, num_labels=model_config_num_labels
|
model_config = AutoConfig.from_pretrained(
|
||||||
)
|
checkpoint_path, num_labels=model_config_num_labels
|
||||||
return model_config
|
)
|
||||||
|
return model_config
|
||||||
|
# TODO: elif task == your task, uncomment the code below:
|
||||||
|
# else:
|
||||||
|
# if per_model_config and len(per_model_config) > 0:
|
||||||
|
# model_config = AutoConfig.from_pretrained(
|
||||||
|
# checkpoint_path,
|
||||||
|
# **per_model_config,
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# model_config = AutoConfig.from_pretrained(
|
||||||
|
# checkpoint_path
|
||||||
|
# )
|
||||||
|
# return model_config
|
||||||
|
|
||||||
if task == SEQCLASSIFICATION:
|
if task == SEQCLASSIFICATION:
|
||||||
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
|
num_labels_old = AutoConfig.from_pretrained(checkpoint_path).num_labels
|
||||||
@ -199,8 +232,9 @@ def load_model(checkpoint_path, task, num_labels, per_model_config=None):
|
|||||||
this_model = get_this_model()
|
this_model = get_this_model()
|
||||||
this_model.resize_token_embeddings(this_vocab_size)
|
this_model.resize_token_embeddings(this_vocab_size)
|
||||||
return this_model
|
return this_model
|
||||||
elif task == SEQREGRESSION:
|
else:
|
||||||
model_config_num_labels = 1
|
if task == SEQREGRESSION:
|
||||||
|
model_config_num_labels = 1
|
||||||
model_config = _set_model_config(checkpoint_path)
|
model_config = _set_model_config(checkpoint_path)
|
||||||
this_model = get_this_model()
|
this_model = get_this_model()
|
||||||
return this_model
|
return this_model
|
||||||
|
|||||||
@ -1,22 +1,25 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_hf_data():
|
def test_hf_data():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
train_dataset = (
|
try:
|
||||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
train_dataset = (
|
||||||
)
|
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||||
dev_dataset = (
|
)
|
||||||
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
|
dev_dataset = (
|
||||||
)
|
load_dataset("glue", "mrpc", split="train[1%:2%]").to_pandas().iloc[0:4]
|
||||||
test_dataset = (
|
)
|
||||||
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
|
test_dataset = (
|
||||||
)
|
load_dataset("glue", "mrpc", split="test[1%:2%]").to_pandas().iloc[0:4]
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return
|
||||||
|
|
||||||
custom_sent_keys = ["sentence1", "sentence2"]
|
custom_sent_keys = ["sentence1", "sentence2"]
|
||||||
label_key = "label"
|
label_key = "label"
|
||||||
@ -75,12 +78,15 @@ def test_hf_data():
|
|||||||
|
|
||||||
def _test_custom_data():
|
def _test_custom_data():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
|
try:
|
||||||
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
|
train_dataset = pd.read_csv("data/input/train.tsv", delimiter="\t", quoting=3)
|
||||||
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
|
dev_dataset = pd.read_csv("data/input/dev.tsv", delimiter="\t", quoting=3)
|
||||||
|
test_dataset = pd.read_csv("data/input/test.tsv", delimiter="\t", quoting=3)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
pass
|
||||||
|
|
||||||
custom_sent_keys = ["#1 String", "#2 String"]
|
custom_sent_keys = ["#1 String", "#2 String"]
|
||||||
label_key = "Quality"
|
label_key = "Quality"
|
||||||
|
|||||||
@ -1,10 +1,17 @@
|
|||||||
def test_classification_head():
|
def test_classification_head():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
train_dataset = load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
|
try:
|
||||||
dev_dataset = load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
|
train_dataset = (
|
||||||
|
load_dataset("emotion", split="train[:1%]").to_pandas().iloc[0:10]
|
||||||
|
)
|
||||||
|
dev_dataset = (
|
||||||
|
load_dataset("emotion", split="train[1%:2%]").to_pandas().iloc[0:10]
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return
|
||||||
|
|
||||||
custom_sent_keys = ["text"]
|
custom_sent_keys = ["text"]
|
||||||
label_key = "label"
|
label_key = "label"
|
||||||
|
|||||||
@ -1,16 +1,19 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_cv():
|
def test_cv():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
train_dataset = (
|
try:
|
||||||
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
train_dataset = (
|
||||||
)
|
load_dataset("glue", "mrpc", split="train[:1%]").to_pandas().iloc[0:4]
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return
|
||||||
|
|
||||||
custom_sent_keys = ["sentence1", "sentence2"]
|
custom_sent_keys = ["sentence1", "sentence2"]
|
||||||
label_key = "label"
|
label_key = "label"
|
||||||
|
|||||||
@ -1,15 +1,20 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_max_iter_1():
|
def test_max_iter_1():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
train_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
try:
|
||||||
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
train_dataset = (
|
||||||
|
load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||||
|
)
|
||||||
|
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return
|
||||||
|
|
||||||
custom_sent_keys = ["sentence1", "sentence2"]
|
custom_sent_keys = ["sentence1", "sentence2"]
|
||||||
label_key = "label"
|
label_key = "label"
|
||||||
|
|||||||
@ -1,23 +1,26 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "posix", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_regression():
|
def test_regression():
|
||||||
try:
|
try:
|
||||||
import ray
|
import ray
|
||||||
except ImportError:
|
except ImportError:
|
||||||
return
|
return
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
import requests
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
train_dataset = (
|
try:
|
||||||
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
|
train_dataset = (
|
||||||
)
|
load_dataset("glue", "stsb", split="train[:1%]").to_pandas().iloc[:20]
|
||||||
dev_dataset = (
|
)
|
||||||
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
|
dev_dataset = (
|
||||||
)
|
load_dataset("glue", "stsb", split="train[1%:2%]").to_pandas().iloc[:20]
|
||||||
|
)
|
||||||
|
except requests.exceptions.ConnectionError:
|
||||||
|
return
|
||||||
|
|
||||||
custom_sent_keys = ["sentence1", "sentence2"]
|
custom_sent_keys = ["sentence1", "sentence2"]
|
||||||
label_key = "label"
|
label_key = "label"
|
||||||
@ -50,3 +53,7 @@ def test_regression():
|
|||||||
automl.fit(
|
automl.fit(
|
||||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_regression()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user