mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-14 17:13:29 +00:00
serialize TransformerEstimator (#381)
* serialize TransformerEstimator * check has_attr * custom metric needs trainer * skip test on mac
This commit is contained in:
parent
cd9740f022
commit
612668e8ed
@ -321,45 +321,46 @@ class AutoMLState:
|
|||||||
if self.time_budget is None
|
if self.time_budget is None
|
||||||
else self.time_budget - self.time_from_start
|
else self.time_budget - self.time_from_start
|
||||||
)
|
)
|
||||||
# if self.resources_per_trial.get("gpu", 0) > 0:
|
if (
|
||||||
|
hasattr(self, "resources_per_trial")
|
||||||
|
and self.resources_per_trial.get("gpu", 0) > 0
|
||||||
|
):
|
||||||
|
|
||||||
# def _trainable_function_wrapper(config: dict):
|
def _trainable_function_wrapper(config: dict):
|
||||||
|
|
||||||
# return_estimator, train_time = train_estimator(
|
return_estimator, train_time = train_estimator(
|
||||||
# X_train=sampled_X_train,
|
X_train=sampled_X_train,
|
||||||
# y_train=sampled_y_train,
|
y_train=sampled_y_train,
|
||||||
# config_dic=config,
|
config_dic=config,
|
||||||
# task=self.task,
|
task=self.task,
|
||||||
# estimator_name=estimator,
|
estimator_name=estimator,
|
||||||
# n_jobs=self.n_jobs,
|
n_jobs=self.n_jobs,
|
||||||
# estimator_class=self.learner_classes.get(estimator),
|
estimator_class=self.learner_classes.get(estimator),
|
||||||
# budget=budget,
|
budget=budget,
|
||||||
# fit_kwargs=self.fit_kwargs,
|
fit_kwargs=self.fit_kwargs,
|
||||||
# )
|
)
|
||||||
# return {"estimator": return_estimator, "train_time": train_time}
|
return {"estimator": return_estimator, "train_time": train_time}
|
||||||
|
|
||||||
# if estimator not in self.learner_classes:
|
if estimator not in self.learner_classes:
|
||||||
# self.learner_classes[estimator] = get_estimator_class(
|
self.learner_classes[estimator] = get_estimator_class(
|
||||||
# self.task, estimator
|
self.task, estimator
|
||||||
# )
|
)
|
||||||
|
|
||||||
# analysis = tune.run(
|
analysis = tune.run(
|
||||||
# _trainable_function_wrapper,
|
_trainable_function_wrapper,
|
||||||
# config=config_w_resource,
|
config=config_w_resource,
|
||||||
# metric="train_time",
|
metric="train_time",
|
||||||
# mode="min",
|
mode="min",
|
||||||
# resources_per_trial=self.resources_per_trial,
|
resources_per_trial=self.resources_per_trial,
|
||||||
# num_samples=1,
|
num_samples=1,
|
||||||
# use_ray=True,
|
use_ray=True,
|
||||||
# )
|
)
|
||||||
# result = list(analysis.results.values())[0]
|
result = list(analysis.results.values())[0]
|
||||||
# estimator, train_time = result["estimator"], result["train_time"]
|
estimator, train_time = result["estimator"], result["train_time"]
|
||||||
|
else:
|
||||||
# else:
|
|
||||||
if _is_nlp_task(self.task):
|
if _is_nlp_task(self.task):
|
||||||
use_ray = self.fit_kwargs.get("use_ray")
|
use_ray = self.fit_kwargs.get("use_ray")
|
||||||
self.fit_kwargs["use_ray"] = False
|
self.fit_kwargs["use_ray"] = False
|
||||||
# TODO: limit number of GPUs
|
|
||||||
estimator, train_time = train_estimator(
|
estimator, train_time = train_estimator(
|
||||||
X_train=sampled_X_train,
|
X_train=sampled_X_train,
|
||||||
y_train=sampled_y_train,
|
y_train=sampled_y_train,
|
||||||
|
|||||||
@ -384,6 +384,16 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
else:
|
else:
|
||||||
return X, None
|
return X, None
|
||||||
|
|
||||||
|
def _model_init(self, num_labels, per_model_config):
|
||||||
|
from .nlp.utils import load_model
|
||||||
|
|
||||||
|
return load_model(
|
||||||
|
checkpoint_path=self.custom_hpo_args.model_path,
|
||||||
|
task=self._task,
|
||||||
|
num_labels=num_labels,
|
||||||
|
per_model_config=per_model_config,
|
||||||
|
)
|
||||||
|
|
||||||
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
def fit(self, X_train: DataFrame, y_train: Series, budget=None, **kwargs):
|
||||||
from transformers import EarlyStoppingCallback
|
from transformers import EarlyStoppingCallback
|
||||||
from transformers.trainer_utils import set_seed
|
from transformers.trainer_utils import set_seed
|
||||||
@ -548,17 +558,9 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
**training_args_config,
|
**training_args_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _model_init():
|
self._trainer = TrainerForAuto(
|
||||||
return load_model(
|
|
||||||
checkpoint_path=self.custom_hpo_args.model_path,
|
|
||||||
task=self._task,
|
|
||||||
num_labels=num_labels,
|
|
||||||
per_model_config=per_model_config,
|
|
||||||
)
|
|
||||||
|
|
||||||
self._model = TrainerForAuto(
|
|
||||||
args=training_args,
|
args=training_args,
|
||||||
model_init=_model_init,
|
model_init=partial(self._model_init, num_labels, per_model_config),
|
||||||
train_dataset=train_dataset,
|
train_dataset=train_dataset,
|
||||||
eval_dataset=eval_dataset,
|
eval_dataset=eval_dataset,
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
@ -572,20 +574,27 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
callbacks=[EarlyStoppingCallbackForAuto],
|
callbacks=[EarlyStoppingCallbackForAuto],
|
||||||
)
|
)
|
||||||
|
|
||||||
setattr(self._model, "_use_ray", self.use_ray)
|
setattr(self._trainer, "_use_ray", self.use_ray)
|
||||||
if self._task in NLG_TASKS:
|
if self._task in NLG_TASKS:
|
||||||
setattr(self._model, "_is_seq2seq", True)
|
setattr(self._trainer, "_is_seq2seq", True)
|
||||||
self._model.train()
|
self._trainer.train()
|
||||||
|
|
||||||
self.params[self.ITER_HP] = self._model.state.global_step
|
self.params[self.ITER_HP] = self._trainer.state.global_step
|
||||||
self._checkpoint_path = self._select_checkpoint(self._model)
|
self._checkpoint_path = self._select_checkpoint(self._trainer)
|
||||||
|
|
||||||
self._kwargs = kwargs
|
self._kwargs = kwargs
|
||||||
self._num_labels = num_labels
|
self._num_labels = num_labels
|
||||||
self._per_model_config = per_model_config
|
self._per_model_config = per_model_config
|
||||||
self._training_args_config = training_args_config
|
self._training_args_config = training_args_config
|
||||||
|
|
||||||
self._ckpt_remains = list(self._model.ckpt_to_metric.keys())
|
self._ckpt_remains = list(self._trainer.ckpt_to_metric.keys())
|
||||||
|
self._model = load_model(
|
||||||
|
checkpoint_path=self._checkpoint_path,
|
||||||
|
task=self._task,
|
||||||
|
num_labels=self._num_labels,
|
||||||
|
per_model_config=self._per_model_config,
|
||||||
|
)
|
||||||
|
self._trainer = None
|
||||||
|
|
||||||
def _delete_one_ckpt(self, ckpt_location):
|
def _delete_one_ckpt(self, ckpt_location):
|
||||||
if self.use_ray is False:
|
if self.use_ray is False:
|
||||||
@ -667,19 +676,12 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
|
|
||||||
def _init_model_for_predict(self, X_test):
|
def _init_model_for_predict(self, X_test):
|
||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from .nlp.utils import load_model
|
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from .nlp.huggingface.trainer import TrainerForAuto
|
from .nlp.huggingface.trainer import TrainerForAuto
|
||||||
from .nlp.huggingface.data_collator import DataCollatorForPredict
|
from .nlp.huggingface.data_collator import DataCollatorForPredict
|
||||||
|
|
||||||
X_test, _ = self._preprocess(X_test, **self._kwargs)
|
X_test, _ = self._preprocess(X_test, **self._kwargs)
|
||||||
test_dataset = Dataset.from_pandas(X_test)
|
test_dataset = Dataset.from_pandas(X_test)
|
||||||
best_model = load_model(
|
|
||||||
checkpoint_path=self._checkpoint_path,
|
|
||||||
task=self._task,
|
|
||||||
num_labels=self._num_labels,
|
|
||||||
per_model_config=self._per_model_config,
|
|
||||||
)
|
|
||||||
training_args = self._TrainingArguments(
|
training_args = self._TrainingArguments(
|
||||||
per_device_eval_batch_size=1,
|
per_device_eval_batch_size=1,
|
||||||
output_dir=self.custom_hpo_args.output_dir,
|
output_dir=self.custom_hpo_args.output_dir,
|
||||||
@ -688,8 +690,8 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
tokenizer = AutoTokenizer.from_pretrained(
|
tokenizer = AutoTokenizer.from_pretrained(
|
||||||
self.custom_hpo_args.model_path, use_fast=True
|
self.custom_hpo_args.model_path, use_fast=True
|
||||||
)
|
)
|
||||||
self._model = TrainerForAuto(
|
self._trainer = TrainerForAuto(
|
||||||
model=best_model,
|
model=self._model,
|
||||||
args=training_args,
|
args=training_args,
|
||||||
data_collator=DataCollatorForPredict(
|
data_collator=DataCollatorForPredict(
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
@ -706,20 +708,21 @@ class TransformersEstimator(BaseEstimator):
|
|||||||
), "predict_proba() only for classification tasks."
|
), "predict_proba() only for classification tasks."
|
||||||
|
|
||||||
test_dataset, _ = self._init_model_for_predict(X_test)
|
test_dataset, _ = self._init_model_for_predict(X_test)
|
||||||
predictions = self._model.predict(test_dataset)
|
predictions = self._trainer.predict(test_dataset)
|
||||||
|
self._trainer = None
|
||||||
return predictions.predictions
|
return predictions.predictions
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X_test):
|
||||||
test_dataset, training_args = self._init_model_for_predict(X_test)
|
test_dataset, training_args = self._init_model_for_predict(X_test)
|
||||||
if self._task not in NLG_TASKS:
|
if self._task not in NLG_TASKS:
|
||||||
predictions = self._model.predict(test_dataset)
|
predictions = self._trainer.predict(test_dataset)
|
||||||
else:
|
else:
|
||||||
predictions = self._model.predict(
|
predictions = self._trainer.predict(
|
||||||
test_dataset,
|
test_dataset,
|
||||||
max_length=training_args.generation_max_length,
|
max_length=training_args.generation_max_length,
|
||||||
num_beams=training_args.generation_num_beams,
|
num_beams=training_args.generation_num_beams,
|
||||||
)
|
)
|
||||||
|
self._trainer = None
|
||||||
if self._task == SEQCLASSIFICATION:
|
if self._task == SEQCLASSIFICATION:
|
||||||
return np.argmax(predictions.predictions, axis=1)
|
return np.argmax(predictions.predictions, axis=1)
|
||||||
elif self._task == SEQREGRESSION:
|
elif self._task == SEQREGRESSION:
|
||||||
|
|||||||
@ -1,5 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
import pickle
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
@ -53,6 +55,7 @@ def test_hf_data():
|
|||||||
automl.fit(
|
automl.fit(
|
||||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||||
)
|
)
|
||||||
|
|
||||||
automl = AutoML()
|
automl = AutoML()
|
||||||
automl.retrain_from_log(
|
automl.retrain_from_log(
|
||||||
X_train=X_train,
|
X_train=X_train,
|
||||||
@ -61,7 +64,11 @@ def test_hf_data():
|
|||||||
record_id=0,
|
record_id=0,
|
||||||
**automl_settings
|
**automl_settings
|
||||||
)
|
)
|
||||||
|
with open("automl.pkl", "wb") as f:
|
||||||
|
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
||||||
|
with open("automl.pkl", "rb") as f:
|
||||||
|
automl = pickle.load(f)
|
||||||
|
shutil.rmtree("test/data/output/")
|
||||||
automl.predict(X_test)
|
automl.predict(X_test)
|
||||||
automl.predict(["test test", "test test"])
|
automl.predict(["test test", "test test"])
|
||||||
automl.predict(
|
automl.predict(
|
||||||
|
|||||||
@ -18,6 +18,12 @@ def custom_metric(
|
|||||||
from datasets import Dataset
|
from datasets import Dataset
|
||||||
from flaml.model import TransformersEstimator
|
from flaml.model import TransformersEstimator
|
||||||
|
|
||||||
|
if estimator._trainer is None:
|
||||||
|
estimator._init_model_for_predict(X_test)
|
||||||
|
trainer = estimator._trainer
|
||||||
|
estimator._trainer = None
|
||||||
|
else:
|
||||||
|
trainer = estimator._trainer
|
||||||
if y_test is not None:
|
if y_test is not None:
|
||||||
X_test, _ = estimator._preprocess(X_test)
|
X_test, _ = estimator._preprocess(X_test)
|
||||||
eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test))
|
eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test))
|
||||||
@ -25,14 +31,11 @@ def custom_metric(
|
|||||||
X_test, _ = estimator._preprocess(X_test)
|
X_test, _ = estimator._preprocess(X_test)
|
||||||
eval_dataset = Dataset.from_pandas(X_test)
|
eval_dataset = Dataset.from_pandas(X_test)
|
||||||
|
|
||||||
trainer = estimator._model
|
|
||||||
|
|
||||||
trainer_compute_metrics_cache = trainer.compute_metrics
|
trainer_compute_metrics_cache = trainer.compute_metrics
|
||||||
trainer.compute_metrics = None
|
trainer.compute_metrics = None
|
||||||
|
|
||||||
metrics = trainer.evaluate(eval_dataset)
|
metrics = trainer.evaluate(eval_dataset)
|
||||||
trainer.compute_metrics = trainer_compute_metrics_cache
|
trainer.compute_metrics = trainer_compute_metrics_cache
|
||||||
|
|
||||||
return metrics["eval_loss"], metrics
|
return metrics["eval_loss"], metrics
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "darwin", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_mcc():
|
def test_mcc():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
|
|
||||||
|
|||||||
@ -1,8 +1,8 @@
|
|||||||
import os
|
import sys
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(os.name == "darwin", reason="do not run on mac os")
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
||||||
def test_summarization():
|
def test_summarization():
|
||||||
from flaml import AutoML
|
from flaml import AutoML
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user