2021-12-03 12:45:16 -05:00
|
|
|
import sys
|
2021-11-23 14:26:39 -05:00
|
|
|
import pytest
|
2022-04-28 14:06:29 -04:00
|
|
|
from utils import get_toy_data_seqclassification, get_automl_settings
|
2022-10-12 20:04:42 -04:00
|
|
|
import os
|
|
|
|
import shutil
|
2021-11-23 14:26:39 -05:00
|
|
|
|
|
|
|
|
2021-12-24 16:23:09 -05:00
|
|
|
def custom_metric(
|
2021-12-23 18:44:53 -05:00
|
|
|
X_test,
|
|
|
|
y_test,
|
|
|
|
estimator,
|
|
|
|
labels,
|
|
|
|
X_train,
|
|
|
|
y_train,
|
|
|
|
weight_test=None,
|
|
|
|
weight_train=None,
|
|
|
|
config=None,
|
|
|
|
groups_test=None,
|
|
|
|
groups_train=None,
|
|
|
|
):
|
2021-12-24 16:23:09 -05:00
|
|
|
from datasets import Dataset
|
|
|
|
from flaml.model import TransformersEstimator
|
|
|
|
|
2022-01-06 10:28:19 -08:00
|
|
|
if estimator._trainer is None:
|
2022-04-28 14:06:29 -04:00
|
|
|
trainer = estimator._init_model_for_predict()
|
2022-01-06 10:28:19 -08:00
|
|
|
estimator._trainer = None
|
|
|
|
else:
|
|
|
|
trainer = estimator._trainer
|
2022-08-03 00:11:29 -04:00
|
|
|
X_test, y_test = estimator._tokenize_text(X_test)
|
|
|
|
|
2021-12-24 16:23:09 -05:00
|
|
|
if y_test is not None:
|
2022-08-03 00:11:29 -04:00
|
|
|
eval_dataset = Dataset.from_pandas(X_test.join(y_test))
|
2021-12-24 16:23:09 -05:00
|
|
|
else:
|
|
|
|
eval_dataset = Dataset.from_pandas(X_test)
|
|
|
|
|
2022-01-14 13:39:09 -08:00
|
|
|
estimator_metric_backup = estimator._metric
|
|
|
|
estimator._metric = "rmse"
|
2021-12-24 16:23:09 -05:00
|
|
|
metrics = trainer.evaluate(eval_dataset)
|
2022-01-14 13:39:09 -08:00
|
|
|
estimator._metric = estimator_metric_backup
|
2022-01-12 22:50:39 -05:00
|
|
|
|
2022-01-14 20:26:10 -05:00
|
|
|
return metrics.pop("eval_automl_metric"), metrics
|
2021-12-23 18:44:53 -05:00
|
|
|
|
|
|
|
|
2021-12-03 12:45:16 -05:00
|
|
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
2021-12-23 18:44:53 -05:00
|
|
|
def test_custom_metric():
|
2021-11-23 14:26:39 -05:00
|
|
|
from flaml import AutoML
|
2022-01-30 01:53:32 -05:00
|
|
|
import requests
|
2022-01-24 17:24:14 -05:00
|
|
|
|
2022-04-28 14:06:29 -04:00
|
|
|
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
|
2021-11-23 14:26:39 -05:00
|
|
|
automl = AutoML()
|
|
|
|
|
2022-03-25 17:00:08 -04:00
|
|
|
try:
|
|
|
|
import ray
|
|
|
|
|
|
|
|
if not ray.is_initialized():
|
|
|
|
ray.init()
|
|
|
|
except ImportError:
|
|
|
|
return
|
|
|
|
|
2022-04-28 14:06:29 -04:00
|
|
|
automl_settings = get_automl_settings()
|
|
|
|
automl_settings["metric"] = custom_metric
|
|
|
|
automl_settings["use_ray"] = {"local_dir": "data/output/"}
|
2021-11-23 14:26:39 -05:00
|
|
|
|
2022-01-30 01:53:32 -05:00
|
|
|
try:
|
|
|
|
automl.fit(
|
|
|
|
X_train=X_train,
|
|
|
|
y_train=y_train,
|
|
|
|
X_val=X_val,
|
|
|
|
y_val=y_val,
|
|
|
|
**automl_settings
|
|
|
|
)
|
|
|
|
except requests.exceptions.HTTPError:
|
|
|
|
return
|
2021-12-23 18:44:53 -05:00
|
|
|
|
|
|
|
# testing calling custom metric in TransformersEstimator._compute_metrics_by_dataset_name
|
|
|
|
|
|
|
|
automl_settings["max_iter"] = 3
|
|
|
|
automl.fit(
|
|
|
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
|
|
|
)
|
2022-03-25 17:00:08 -04:00
|
|
|
automl.score(X_val, y_val, **{"metric": custom_metric})
|
|
|
|
automl.pickle("automl.pkl")
|
2021-12-23 18:44:53 -05:00
|
|
|
|
2021-11-23 14:26:39 -05:00
|
|
|
del automl
|
2021-12-24 16:23:09 -05:00
|
|
|
|
2022-10-12 20:04:42 -04:00
|
|
|
if os.path.exists("test/data/output/"):
|
|
|
|
shutil.rmtree("test/data/output/")
|
|
|
|
|
2021-12-24 16:23:09 -05:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_custom_metric()
|