autogen/test/nlp/test_default.py
Chi Wang 595af7a04f
install editable package in codespace (#826)
* install editable package in codespace

* fix test error in test_forecast

* fix test error in test_space

* openml version

* break tests; pre-commit

* skip on py10+win32

* install mlflow in test

* install mlflow in [test]

* skip test in windows

* import

* handle PermissionError

* skip test in windows

* skip test in windows

* skip test in windows

* skip test in windows

* remove ts_forecast_panel from doc
2022-11-27 14:22:54 -05:00

196 lines
6.2 KiB
Python

from utils import get_toy_data_seqclassification, get_automl_settings
import sys
from flaml.default import portfolio
import os
import shutil
import pytest
def pop_args(fit_kwargs):
fit_kwargs.pop("max_iter", None)
fit_kwargs.pop("use_ray", None)
fit_kwargs.pop("estimator_list", None)
fit_kwargs.pop("time_budget", None)
fit_kwargs.pop("log_file_name", None)
def test_build_portfolio(path="./test/nlp/default", strategy="greedy"):
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures.csv --task seq-classification --estimator transformer_ms --strategy {strategy}".split()
portfolio.main()
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_starting_point_not_in_search_space():
from flaml import AutoML
"""
test starting_points located outside of the search space, and custom_hp is not set
"""
this_estimator_name = "transformer"
X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification()
automl = AutoML()
automl_settings = get_automl_settings(estimator_name=this_estimator_name)
automl_settings["starting_points"] = {
this_estimator_name: [{"learning_rate": 2e-3}]
}
automl.fit(X_train, y_train, **automl_settings)
assert (
automl._search_states[this_estimator_name].init_config[0]["learning_rate"]
!= 2e-3
)
"""
test starting_points located outside of the search space, and custom_hp is set
"""
from flaml import tune
X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification()
this_estimator_name = "transformer_ms"
automl = AutoML()
automl_settings = get_automl_settings(estimator_name=this_estimator_name)
automl_settings["custom_hp"] = {
this_estimator_name: {
"model_path": {
"domain": "albert-base-v2",
},
"learning_rate": {
"domain": tune.choice([1e-4, 1e-5]),
},
"per_device_train_batch_size": {
"domain": 2,
},
}
}
automl_settings["starting_points"] = "data:test/nlp/default/"
automl.fit(X_train, y_train, **automl_settings)
assert len(automl._search_states[this_estimator_name].init_config[0]) == len(
automl._search_states[this_estimator_name]._search_space_domain
) - len(automl_settings["custom_hp"][this_estimator_name]), (
"The search space is updated with the custom_hp on {} hyperparameters of "
"the specified estimator without an initial value. Thus a valid init config "
"should only contain the cardinality of the search space minus {}".format(
len(automl_settings["custom_hp"][this_estimator_name]),
len(automl_settings["custom_hp"][this_estimator_name]),
)
)
assert (
automl._search_states[this_estimator_name].search_space["model_path"]
== "albert-base-v2"
)
if os.path.exists("test/data/output/"):
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_points_to_evaluate():
from flaml import AutoML
X_train, y_train, X_val, y_val, _ = get_toy_data_seqclassification()
automl = AutoML()
automl_settings = get_automl_settings(estimator_name="transformer_ms")
automl_settings["starting_points"] = "data:test/nlp/default/"
automl_settings["custom_hp"] = {
"transformer_ms": {
"model_path": {"domain": "google/electra-small-discriminator"}
}
}
automl.fit(X_train, y_train, **automl_settings)
if os.path.exists("test/data/output/"):
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
# TODO: implement _test_zero_shot_model
@pytest.mark.skipif(sys.platform == "win32", reason="do not run on windows")
def test_zero_shot_nomodel():
from flaml.default import preprocess_and_suggest_hyperparams
estimator_name = "transformer_ms"
location = "test/nlp/default"
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
automl_settings = get_automl_settings(estimator_name)
(
hyperparams,
estimator_class,
X_train,
y_train,
_,
_,
) = preprocess_and_suggest_hyperparams(
"seq-classification", X_train, y_train, estimator_name, location=location
)
model = estimator_class(
**hyperparams
) # estimator_class is TransformersEstimatorModelSelection
fit_kwargs = automl_settings.pop("fit_kwargs_by_estimator", {}).get(estimator_name)
fit_kwargs.update(automl_settings)
pop_args(fit_kwargs)
model.fit(X_train, y_train, **fit_kwargs)
if os.path.exists("test/data/output/"):
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")
def test_build_error_portfolio(path="./test/nlp/default", strategy="greedy"):
import os
os.remove("./test/nlp/default/transformer_ms/seq-classification.json")
sys.argv = f"portfolio.py --output {path} --input {path} --metafeatures {path}/all/metafeatures_err.csv --task seq-classification --estimator transformer_ms --strategy {strategy}".split()
portfolio.main()
from flaml.default import preprocess_and_suggest_hyperparams
estimator_name = "transformer_ms"
location = "test/nlp/default"
X_train, y_train, X_val, y_val, X_test = get_toy_data_seqclassification()
try:
(
hyperparams,
estimator_class,
X_train,
y_train,
_,
_,
) = preprocess_and_suggest_hyperparams(
"seq-classification", X_train, y_train, estimator_name, location=location
)
except ValueError:
print("Feature not implemented")
import os
import shutil
if os.path.exists("test/data/output/"):
try:
shutil.rmtree("test/data/output/")
except PermissionError:
print("PermissionError when deleting test/data/output/")