2021-09-01 16:25:04 -07:00
|
|
|
import numpy as np
|
|
|
|
from flaml import AutoML
|
|
|
|
|
|
|
|
|
|
|
|
def test_forecast_automl(budget=5):
|
2021-08-23 16:26:46 -04:00
|
|
|
# using dataframe
|
|
|
|
import statsmodels.api as sm
|
2021-09-10 16:39:16 -07:00
|
|
|
|
|
|
|
data = sm.datasets.co2.load_pandas().data["co2"].resample("MS").mean()
|
|
|
|
data = (
|
|
|
|
data.fillna(data.bfill())
|
|
|
|
.to_frame()
|
|
|
|
.reset_index()
|
|
|
|
.rename(columns={"index": "ds", "co2": "y"})
|
|
|
|
)
|
2021-08-23 16:26:46 -04:00
|
|
|
num_samples = data.shape[0]
|
|
|
|
time_horizon = 12
|
|
|
|
split_idx = num_samples - time_horizon
|
2021-09-01 16:25:04 -07:00
|
|
|
df = data[:split_idx]
|
2021-09-10 16:39:16 -07:00
|
|
|
X_test = data[split_idx:]["ds"]
|
|
|
|
y_test = data[split_idx:]["y"]
|
2021-08-23 16:26:46 -04:00
|
|
|
automl = AutoML()
|
|
|
|
settings = {
|
|
|
|
"time_budget": budget, # total running time in seconds
|
2021-09-10 16:39:16 -07:00
|
|
|
"metric": "mape", # primary metric
|
|
|
|
"task": "forecast", # task type
|
|
|
|
"log_file_name": "test/CO2_forecast.log", # flaml log file
|
2021-08-23 16:26:46 -04:00
|
|
|
"eval_method": "holdout",
|
2021-09-10 16:39:16 -07:00
|
|
|
"label": ("ds", "y"),
|
2021-08-23 16:26:46 -04:00
|
|
|
}
|
2021-09-10 16:39:16 -07:00
|
|
|
"""The main flaml automl API"""
|
2021-08-23 16:26:46 -04:00
|
|
|
try:
|
2021-09-11 21:19:18 -07:00
|
|
|
import prophet
|
|
|
|
|
2021-09-01 16:25:04 -07:00
|
|
|
automl.fit(dataframe=df, **settings, period=time_horizon)
|
2021-08-23 16:26:46 -04:00
|
|
|
except ImportError:
|
2021-09-11 21:19:18 -07:00
|
|
|
print("not using prophet due to ImportError")
|
2021-09-10 16:39:16 -07:00
|
|
|
automl.fit(
|
|
|
|
dataframe=df,
|
|
|
|
**settings,
|
|
|
|
estimator_list=["arima", "sarimax"],
|
|
|
|
period=time_horizon,
|
|
|
|
)
|
|
|
|
""" retrieve best config and best learner"""
|
|
|
|
print("Best ML leaner:", automl.best_estimator)
|
|
|
|
print("Best hyperparmeter config:", automl.best_config)
|
|
|
|
print(f"Best mape on validation data: {automl.best_loss}")
|
|
|
|
print(f"Training duration of best run: {automl.best_config_train_time}s")
|
2021-08-23 16:26:46 -04:00
|
|
|
print(automl.model.estimator)
|
2021-09-10 16:39:16 -07:00
|
|
|
""" pickle and save the automl object """
|
2021-08-23 16:26:46 -04:00
|
|
|
import pickle
|
2021-09-10 16:39:16 -07:00
|
|
|
|
|
|
|
with open("automl.pkl", "wb") as f:
|
2021-08-23 16:26:46 -04:00
|
|
|
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
2021-09-10 16:39:16 -07:00
|
|
|
""" compute predictions of testing dataset """
|
2021-08-23 16:26:46 -04:00
|
|
|
y_pred = automl.predict(X_test)
|
2021-09-10 16:39:16 -07:00
|
|
|
print("Predicted labels", y_pred)
|
|
|
|
print("True labels", y_test)
|
|
|
|
""" compute different metric values on testing dataset"""
|
2021-08-23 16:26:46 -04:00
|
|
|
from flaml.ml import sklearn_metric_loss_score
|
2021-09-10 16:39:16 -07:00
|
|
|
|
|
|
|
print("mape", "=", sklearn_metric_loss_score("mape", y_pred, y_test))
|
2021-08-23 16:26:46 -04:00
|
|
|
from flaml.data import get_output_from_log
|
2021-09-10 16:39:16 -07:00
|
|
|
|
|
|
|
(
|
|
|
|
time_history,
|
|
|
|
best_valid_loss_history,
|
|
|
|
valid_loss_history,
|
|
|
|
config_history,
|
|
|
|
metric_history,
|
|
|
|
) = get_output_from_log(filename=settings["log_file_name"], time_budget=budget)
|
2021-08-23 16:26:46 -04:00
|
|
|
for config in config_history:
|
|
|
|
print(config)
|
|
|
|
print(automl.prune_attr)
|
|
|
|
print(automl.max_resource)
|
|
|
|
print(automl.min_resource)
|
|
|
|
|
2021-09-10 16:39:16 -07:00
|
|
|
X_train = df["ds"]
|
|
|
|
y_train = df["y"]
|
2021-09-01 16:25:04 -07:00
|
|
|
automl = AutoML()
|
|
|
|
try:
|
|
|
|
automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
|
|
|
|
except ImportError:
|
2021-09-11 21:19:18 -07:00
|
|
|
print("not using prophet due to ImportError")
|
2021-09-10 16:39:16 -07:00
|
|
|
automl.fit(
|
|
|
|
X_train=X_train,
|
|
|
|
y_train=y_train,
|
|
|
|
**settings,
|
|
|
|
estimator_list=["arima", "sarimax"],
|
|
|
|
period=time_horizon,
|
|
|
|
)
|
2021-08-23 16:26:46 -04:00
|
|
|
|
2021-09-01 16:25:04 -07:00
|
|
|
|
|
|
|
def test_numpy():
|
2021-09-10 16:39:16 -07:00
|
|
|
X_train = np.arange("2014-01", "2021-01", dtype="datetime64[M]")
|
2021-09-01 16:25:04 -07:00
|
|
|
y_train = np.random.random(size=72)
|
2021-08-23 16:26:46 -04:00
|
|
|
automl = AutoML()
|
|
|
|
try:
|
2021-09-11 21:19:18 -07:00
|
|
|
import prophet
|
|
|
|
|
2021-09-01 16:25:04 -07:00
|
|
|
automl.fit(
|
|
|
|
X_train=X_train[:60], # a single column of timestamp
|
|
|
|
y_train=y_train, # value for each timestamp
|
|
|
|
period=12, # time horizon to forecast, e.g., 12 months
|
2021-09-10 16:39:16 -07:00
|
|
|
task="forecast",
|
|
|
|
time_budget=3, # time budget in seconds
|
|
|
|
log_file_name="test/forecast.log",
|
|
|
|
)
|
2021-09-01 16:25:04 -07:00
|
|
|
print(automl.predict(X_train[60:]))
|
|
|
|
print(automl.predict(12))
|
|
|
|
except ValueError:
|
2021-09-11 21:19:18 -07:00
|
|
|
print("ValueError for prophet is raised as expected.")
|
2021-08-23 16:26:46 -04:00
|
|
|
except ImportError:
|
2021-09-11 21:19:18 -07:00
|
|
|
print("not using prophet due to ImportError")
|
2021-09-01 16:25:04 -07:00
|
|
|
automl = AutoML()
|
|
|
|
automl.fit(
|
|
|
|
X_train=X_train[:72], # a single column of timestamp
|
|
|
|
y_train=y_train, # value for each timestamp
|
|
|
|
period=12, # time horizon to forecast, e.g., 12 months
|
2021-09-10 16:39:16 -07:00
|
|
|
task="forecast",
|
|
|
|
time_budget=1, # time budget in seconds
|
|
|
|
estimator_list=["arima", "sarimax"],
|
|
|
|
log_file_name="test/forecast.log",
|
|
|
|
)
|
2021-09-01 16:25:04 -07:00
|
|
|
print(automl.predict(X_train[72:]))
|
|
|
|
# an alternative way to specify predict steps for arima/sarimax
|
|
|
|
print(automl.predict(12))
|
2021-08-23 16:26:46 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2021-09-01 16:25:04 -07:00
|
|
|
test_forecast_automl(60)
|