mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-31 12:52:18 +00:00

* adding automl.score * fixing the metric name in train_with_config * adding pickle after score * fixing a bug in automl.pickle
219 lines
6.3 KiB
Python
219 lines
6.3 KiB
Python
from flaml import AutoML
|
|
import pandas as pd
|
|
from sklearn.datasets import fetch_california_housing, fetch_openml
|
|
|
|
|
|
class TestScore:
|
|
def test_forecast(self, budget=5):
|
|
import pickle
|
|
|
|
# using dataframe
|
|
import statsmodels.api as sm
|
|
|
|
data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
|
|
data = (
|
|
data.fillna(data.bfill())
|
|
.to_frame()
|
|
.reset_index()
|
|
.rename(columns={"index": "ds", "co2": "y"})
|
|
)
|
|
num_samples = data.shape[0]
|
|
time_horizon = 12
|
|
split_idx = num_samples - time_horizon
|
|
X_test = data[split_idx:]["ds"]
|
|
y_test = data[split_idx:]["y"]
|
|
|
|
df = data[:split_idx]
|
|
automl = AutoML()
|
|
settings = {
|
|
"time_budget": budget, # total running time in seconds
|
|
"metric": "mape", # primary metric
|
|
"task": "ts_forecast", # task type
|
|
"log_file_name": "test/CO2_forecast.log", # flaml log file
|
|
"eval_method": "holdout",
|
|
"label": "y",
|
|
}
|
|
"""The main flaml automl API"""
|
|
try:
|
|
import prophet
|
|
|
|
automl.fit(
|
|
dataframe=df,
|
|
estimator_list=["prophet", "arima", "sarimax"],
|
|
**settings,
|
|
period=time_horizon,
|
|
)
|
|
automl.score(X_test, y_test)
|
|
automl.pickle("automl.pkl")
|
|
with open("automl.pkl", "rb") as f:
|
|
pickle.load(f)
|
|
except ImportError:
|
|
print("not using prophet due to ImportError")
|
|
automl.fit(
|
|
dataframe=df,
|
|
**settings,
|
|
estimator_list=["arima", "sarimax"],
|
|
period=time_horizon,
|
|
)
|
|
automl.score(X_test, y_test)
|
|
automl.pickle("automl.pkl")
|
|
with open("automl.pkl", "rb") as f:
|
|
pickle.load(f)
|
|
|
|
def test_classification(self):
|
|
X = pd.DataFrame(
|
|
{
|
|
"f1": [1, -2, 3, -4, 5, -6, -7, 8, -9, -10, -11, -12, -13, -14],
|
|
"f2": [
|
|
3.0,
|
|
16.0,
|
|
10.0,
|
|
12.0,
|
|
3.0,
|
|
14.0,
|
|
11.0,
|
|
12.0,
|
|
5.0,
|
|
14.0,
|
|
20.0,
|
|
16.0,
|
|
15.0,
|
|
11.0,
|
|
],
|
|
"f3": [
|
|
"a",
|
|
"b",
|
|
"a",
|
|
"c",
|
|
"c",
|
|
"b",
|
|
"b",
|
|
"b",
|
|
"b",
|
|
"a",
|
|
"b",
|
|
1.0,
|
|
1.0,
|
|
"a",
|
|
],
|
|
"f4": [
|
|
True,
|
|
True,
|
|
False,
|
|
True,
|
|
True,
|
|
False,
|
|
False,
|
|
False,
|
|
True,
|
|
True,
|
|
False,
|
|
False,
|
|
True,
|
|
True,
|
|
],
|
|
}
|
|
)
|
|
y = pd.Series([0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
|
|
|
|
automl = AutoML()
|
|
for each_estimator in [
|
|
"catboost",
|
|
"lrl2",
|
|
"lrl1",
|
|
"rf",
|
|
"lgbm",
|
|
"extra_tree",
|
|
"kneighbor",
|
|
"xgboost",
|
|
]:
|
|
automl_settings = {
|
|
"time_budget": 6,
|
|
"task": "classification",
|
|
"n_jobs": 1,
|
|
"estimator_list": [each_estimator],
|
|
"metric": "accuracy",
|
|
"log_training_metric": True,
|
|
}
|
|
automl.score(X, y) # for covering the case no estimator is trained
|
|
|
|
automl.fit(X, y, **automl_settings)
|
|
automl.score(X, y)
|
|
automl.score(X, y, **{"metric": "accuracy"})
|
|
|
|
automl.pickle("automl.pkl")
|
|
|
|
def test_regression(self):
|
|
automl_experiment = AutoML()
|
|
|
|
X_train, y_train = fetch_california_housing(return_X_y=True)
|
|
n = int(len(y_train) * 9 // 10)
|
|
|
|
for each_estimator in [
|
|
"lgbm",
|
|
"xgboost",
|
|
"rf",
|
|
"extra_tree",
|
|
"catboost",
|
|
"kneighbor",
|
|
]:
|
|
automl_settings = {
|
|
"time_budget": 2,
|
|
"task": "regression",
|
|
"log_file_name": "test/california.log",
|
|
"log_training_metric": True,
|
|
"estimator_list": [each_estimator],
|
|
"n_jobs": 1,
|
|
"model_history": True,
|
|
}
|
|
automl_experiment.fit(
|
|
X_train=X_train[:n],
|
|
y_train=y_train[:n],
|
|
X_val=X_train[n:],
|
|
y_val=y_train[n:],
|
|
**automl_settings,
|
|
)
|
|
|
|
automl_experiment.score(X_train[n:], y_train[n:], **{"metric": "mse"})
|
|
automl_experiment.pickle("automl.pkl")
|
|
|
|
def test_rank(self):
|
|
from sklearn.externals._arff import ArffException
|
|
|
|
dataset = "credit-g"
|
|
|
|
try:
|
|
X, y = fetch_openml(name=dataset, return_X_y=True)
|
|
y = y.cat.codes
|
|
except (ArffException, ValueError):
|
|
from sklearn.datasets import load_wine
|
|
|
|
X, y = load_wine(return_X_y=True)
|
|
|
|
import numpy as np
|
|
|
|
automl = AutoML()
|
|
n = 500
|
|
|
|
for each_estimator in ["lgbm", "xgboost"]:
|
|
automl_settings = {
|
|
"time_budget": 2,
|
|
"task": "rank",
|
|
"log_file_name": "test/{}.log".format(dataset),
|
|
"model_history": True,
|
|
"groups": np.array([0] * 200 + [1] * 200 + [2] * 100), # group labels
|
|
"learner_selector": "roundrobin",
|
|
"estimator_list": [each_estimator],
|
|
}
|
|
automl.fit(X[:n], y[:n], **automl_settings)
|
|
try:
|
|
automl.score(X[n:], y[n:])
|
|
automl.pickle("automl.pkl")
|
|
except NotImplementedError:
|
|
pass
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test = TestScore()
|
|
test.test_forecast()
|