2021-12-03 12:45:16 -05:00
|
|
|
import sys
|
2021-11-23 14:26:39 -05:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
2021-12-24 16:23:09 -05:00
|
|
|
def custom_metric(
|
2021-12-23 18:44:53 -05:00
|
|
|
X_test,
|
|
|
|
y_test,
|
|
|
|
estimator,
|
|
|
|
labels,
|
|
|
|
X_train,
|
|
|
|
y_train,
|
|
|
|
weight_test=None,
|
|
|
|
weight_train=None,
|
|
|
|
config=None,
|
|
|
|
groups_test=None,
|
|
|
|
groups_train=None,
|
|
|
|
):
|
2021-12-24 16:23:09 -05:00
|
|
|
from datasets import Dataset
|
|
|
|
from flaml.model import TransformersEstimator
|
2022-01-12 22:50:39 -05:00
|
|
|
from flaml.nlp.utils import load_default_huggingface_metric_for_task
|
2021-12-24 16:23:09 -05:00
|
|
|
|
2022-01-06 10:28:19 -08:00
|
|
|
if estimator._trainer is None:
|
|
|
|
estimator._init_model_for_predict(X_test)
|
|
|
|
trainer = estimator._trainer
|
|
|
|
estimator._trainer = None
|
|
|
|
else:
|
|
|
|
trainer = estimator._trainer
|
2021-12-24 16:23:09 -05:00
|
|
|
if y_test is not None:
|
|
|
|
X_test, _ = estimator._preprocess(X_test)
|
|
|
|
eval_dataset = Dataset.from_pandas(TransformersEstimator._join(X_test, y_test))
|
|
|
|
else:
|
|
|
|
X_test, _ = estimator._preprocess(X_test)
|
|
|
|
eval_dataset = Dataset.from_pandas(X_test)
|
|
|
|
|
2022-01-12 22:50:39 -05:00
|
|
|
estimator_metric_cache = estimator._metric
|
|
|
|
estimator._metric = load_default_huggingface_metric_for_task(estimator._task)
|
2021-12-24 16:23:09 -05:00
|
|
|
|
|
|
|
metrics = trainer.evaluate(eval_dataset)
|
2022-01-12 22:50:39 -05:00
|
|
|
estimator._metric = estimator_metric_cache
|
|
|
|
|
|
|
|
return metrics["eval_val_loss"], metrics
|
2021-12-23 18:44:53 -05:00
|
|
|
|
|
|
|
|
2021-12-03 12:45:16 -05:00
|
|
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
2021-12-23 18:44:53 -05:00
|
|
|
def test_custom_metric():
|
2021-11-23 14:26:39 -05:00
|
|
|
from flaml import AutoML
|
2021-12-03 12:45:16 -05:00
|
|
|
import requests
|
2021-11-23 14:26:39 -05:00
|
|
|
from datasets import load_dataset
|
|
|
|
|
2021-12-03 12:45:16 -05:00
|
|
|
try:
|
|
|
|
train_dataset = (
|
|
|
|
load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
|
|
|
)
|
|
|
|
dev_dataset = load_dataset("glue", "mrpc", split="train").to_pandas().iloc[0:4]
|
|
|
|
except requests.exceptions.ConnectionError:
|
|
|
|
return
|
2021-11-23 14:26:39 -05:00
|
|
|
|
|
|
|
custom_sent_keys = ["sentence1", "sentence2"]
|
|
|
|
label_key = "label"
|
|
|
|
|
|
|
|
X_train = train_dataset[custom_sent_keys]
|
|
|
|
y_train = train_dataset[label_key]
|
|
|
|
|
|
|
|
X_val = dev_dataset[custom_sent_keys]
|
|
|
|
y_val = dev_dataset[label_key]
|
|
|
|
|
|
|
|
automl = AutoML()
|
|
|
|
|
2021-12-23 18:44:53 -05:00
|
|
|
# testing when max_iter=1 and do retrain only without hpo
|
2021-11-23 14:26:39 -05:00
|
|
|
|
|
|
|
automl_settings = {
|
|
|
|
"gpu_per_trial": 0,
|
|
|
|
"max_iter": 1,
|
|
|
|
"time_budget": 5,
|
|
|
|
"task": "seq-classification",
|
2021-12-24 16:23:09 -05:00
|
|
|
"metric": custom_metric,
|
2021-11-23 14:26:39 -05:00
|
|
|
"log_file_name": "seqclass.log",
|
|
|
|
}
|
|
|
|
|
|
|
|
automl_settings["custom_hpo_args"] = {
|
|
|
|
"model_path": "google/electra-small-discriminator",
|
|
|
|
"output_dir": "data/output/",
|
|
|
|
"ckpt_per_epoch": 5,
|
|
|
|
"fp16": False,
|
|
|
|
}
|
|
|
|
|
|
|
|
automl.fit(
|
|
|
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
|
|
|
)
|
2021-12-23 18:44:53 -05:00
|
|
|
|
|
|
|
# testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name
|
|
|
|
|
|
|
|
automl_settings["max_iter"] = 3
|
|
|
|
automl.fit(
|
|
|
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
|
|
|
)
|
|
|
|
|
2021-11-23 14:26:39 -05:00
|
|
|
del automl
|
2021-12-24 16:23:09 -05:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_custom_metric()
|