2021-12-03 12:45:16 -05:00
|
|
|
import sys
|
2021-11-18 09:39:45 -08:00
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
2021-12-03 12:45:16 -05:00
|
|
|
@pytest.mark.skipif(sys.platform == "darwin", reason="do not run on mac os")
|
2021-11-16 14:06:20 -05:00
|
|
|
def test_regression():
|
2021-11-23 14:26:39 -05:00
|
|
|
try:
|
|
|
|
import ray
|
|
|
|
except ImportError:
|
|
|
|
return
|
2021-11-16 14:06:20 -05:00
|
|
|
from flaml import AutoML
|
2022-01-24 17:24:14 -05:00
|
|
|
import pandas as pd
|
2021-11-16 14:06:20 -05:00
|
|
|
|
2022-01-24 17:24:14 -05:00
|
|
|
train_data = {
|
|
|
|
"sentence1": [
|
|
|
|
"A plane is taking off.",
|
|
|
|
"A man is playing a large flute.",
|
|
|
|
"A man is spreading shreded cheese on a pizza.",
|
|
|
|
"Three men are playing chess.",
|
|
|
|
],
|
|
|
|
"sentence2": [
|
|
|
|
"An air plane is taking off.",
|
|
|
|
"A man is playing a flute.",
|
|
|
|
"A man is spreading shredded cheese on an uncooked pizza.",
|
|
|
|
"Two men are playing chess.",
|
|
|
|
],
|
|
|
|
"label": [5.0, 3.799999952316284, 3.799999952316284, 2.5999999046325684],
|
|
|
|
"idx": [0, 1, 2, 3],
|
|
|
|
}
|
|
|
|
train_dataset = pd.DataFrame(train_data)
|
|
|
|
|
|
|
|
dev_data = {
|
|
|
|
"sentence1": [
|
|
|
|
"A man is playing the cello.",
|
|
|
|
"Some men are fighting.",
|
|
|
|
"A man is smoking.",
|
|
|
|
"The man is playing the piano.",
|
|
|
|
],
|
|
|
|
"sentence2": [
|
|
|
|
"A man seated is playing the cello.",
|
|
|
|
"Two men are fighting.",
|
|
|
|
"A man is skating.",
|
|
|
|
"The man is playing the guitar.",
|
|
|
|
],
|
|
|
|
"label": [4.25, 4.25, 0.5, 1.600000023841858],
|
|
|
|
"idx": [4, 5, 6, 7],
|
|
|
|
}
|
|
|
|
dev_dataset = pd.DataFrame(dev_data)
|
2021-11-16 14:06:20 -05:00
|
|
|
|
|
|
|
custom_sent_keys = ["sentence1", "sentence2"]
|
|
|
|
label_key = "label"
|
|
|
|
|
|
|
|
X_train = train_dataset[custom_sent_keys]
|
|
|
|
y_train = train_dataset[label_key]
|
|
|
|
|
|
|
|
X_val = dev_dataset[custom_sent_keys]
|
|
|
|
y_val = dev_dataset[label_key]
|
|
|
|
|
|
|
|
automl = AutoML()
|
|
|
|
|
|
|
|
automl_settings = {
|
|
|
|
"gpu_per_trial": 0,
|
2021-11-18 09:39:45 -08:00
|
|
|
"max_iter": 2,
|
|
|
|
"time_budget": 5,
|
2021-11-16 14:06:20 -05:00
|
|
|
"task": "seq-regression",
|
2022-01-12 22:50:39 -05:00
|
|
|
"metric": "pearsonr",
|
2021-11-18 09:39:45 -08:00
|
|
|
"starting_points": {"transformer": {"num_train_epochs": 1}},
|
2021-11-23 14:26:39 -05:00
|
|
|
"use_ray": True,
|
2021-11-16 14:06:20 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
automl_settings["custom_hpo_args"] = {
|
|
|
|
"model_path": "google/electra-small-discriminator",
|
2021-11-18 09:39:45 -08:00
|
|
|
"output_dir": "test/data/output/",
|
2022-01-30 01:53:32 -05:00
|
|
|
"ckpt_per_epoch": 1,
|
2021-11-16 14:06:20 -05:00
|
|
|
"fp16": False,
|
|
|
|
}
|
|
|
|
|
2022-01-14 13:39:09 -08:00
|
|
|
ray.shutdown()
|
2022-02-09 15:04:29 -08:00
|
|
|
ray.init()
|
2021-11-16 14:06:20 -05:00
|
|
|
automl.fit(
|
|
|
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
|
|
|
)
|
2022-01-14 13:39:09 -08:00
|
|
|
automl.predict(X_val)
|
2021-12-03 12:45:16 -05:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
test_regression()
|