fixing auto metric bug (#387)

This commit is contained in:
Xueqing Liu 2022-01-07 19:25:58 -05:00 committed by GitHub
parent d4273669e6
commit c54c1246c6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 35 deletions

View File

@ -73,7 +73,9 @@ class SearchState:
self.total_time_used - self.time_best_found,
)
def __init__(self, learner_class, data_size, task, starting_point=None, period=None):
def __init__(
self, learner_class, data_size, task, starting_point=None, period=None
):
self.init_eci = learner_class.cost_relative2lgbm()
self._search_space_domain = {}
self.init_config = {}
@ -83,7 +85,9 @@ class SearchState:
self.ls_ever_converged = False
self.learner_class = learner_class
if task == TS_FORECAST:
search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period)
search_space = learner_class.search_space(
data_size=data_size, task=task, pred_horizon=period
)
else:
search_space = learner_class.search_space(data_size=data_size, task=task)
for name, space in search_space.items():
@ -820,7 +824,11 @@ class AutoML(BaseEstimator):
dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]"
), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
if y_train_all is not None:
y_df = pd.DataFrame(y_train_all) if isinstance(y_train_all, pd.Series) else pd.DataFrame(y_train_all, columns=['labels'])
y_df = (
pd.DataFrame(y_train_all)
if isinstance(y_train_all, pd.Series)
else pd.DataFrame(y_train_all, columns=["labels"])
)
dataframe = dataframe.join(y_df)
duplicates = dataframe.duplicated()
if any(duplicates):
@ -881,7 +889,9 @@ class AutoML(BaseEstimator):
self._nrow, self._ndim = X_train_all.shape
if self._state.task == TS_FORECAST:
X_train_all = pd.DataFrame(X_train_all)
X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all)
X_train_all, y_train_all = self._validate_ts_data(
X_train_all, y_train_all
)
X, y = X_train_all, y_train_all
elif dataframe is not None and label is not None:
assert isinstance(

View File

@ -1790,14 +1790,15 @@ class SARIMAX(ARIMA):
class TS_SKLearn_Regressor(SKLearnEstimator):
""" The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball"""
"""The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball"""
base_class = SKLearnEstimator
@classmethod
def search_space(cls, data_size, pred_horizon, **params):
space = cls.base_class.search_space(data_size, **params)
space.update({
space.update(
{
"optimize_for_horizon": {
"domain": tune.choice([True, False]),
"init_value": False,
@ -1807,7 +1808,8 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
"domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon),
"init_value": 3,
},
})
}
)
return space
def __init__(self, task=TS_FORECAST, **params):
@ -1841,13 +1843,23 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
# Direct Multi-step Forecast Strategy - fit a seperate model for each horizon
model_list = []
for i in range(1, kwargs["period"] + 1):
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i)
(
X_fit,
y_fit,
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
X_train, y_train, i
)
self.hcrystaball_model.model.set_params(**estimator.params)
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
model_list.append(model)
self._model = model_list
else:
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"])
(
X_fit,
y_fit,
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
X_train, y_train, kwargs["period"]
)
self.hcrystaball_model.model.set_params(**estimator.params)
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
self._model = model
@ -1863,18 +1875,30 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
X_test = self.transform_X(X_test)
X_test = self._preprocess(X_test)
if isinstance(self._model, list):
assert (
len(self._model) == len(X_test)
assert len(self._model) == len(
X_test
), "Model is optimized for horizon, length of X_test must be equal to `period`."
preds = []
for i in range(1, len(self._model) + 1):
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test.iloc[:i, :])
(
X_pred,
_,
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
X_test.iloc[:i, :]
)
preds.append(self._model[i - 1].predict(X_pred)[-1])
forecast = pd.DataFrame(data=np.asarray(preds).reshape(-1, 1),
forecast = pd.DataFrame(
data=np.asarray(preds).reshape(-1, 1),
columns=[self.hcrystaball_model.name],
index=X_test.index)
index=X_test.index,
)
else:
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test)
(
X_pred,
_,
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
X_test
)
forecast = self._model.predict(X_pred)
return forecast
else:
@ -1885,35 +1909,36 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
class LGBM_TS_Regressor(TS_SKLearn_Regressor):
""" The class for tuning LGBM Regressor for time-series forecasting"""
"""The class for tuning LGBM Regressor for time-series forecasting"""
base_class = LGBMEstimator
class XGBoost_TS_Regressor(TS_SKLearn_Regressor):
""" The class for tuning XGBoost Regressor for time-series forecasting"""
"""The class for tuning XGBoost Regressor for time-series forecasting"""
base_class = XGBoostSklearnEstimator
# catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball
# class CatBoost_TS_Regressor(TS_Regressor):
# base_class = CatBoostEstimator
class RF_TS_Regressor(TS_SKLearn_Regressor):
""" The class for tuning Random Forest Regressor for time-series forecasting"""
"""The class for tuning Random Forest Regressor for time-series forecasting"""
base_class = RandomForestEstimator
class ExtraTrees_TS_Regressor(TS_SKLearn_Regressor):
""" The class for tuning Extra Trees Regressor for time-series forecasting"""
"""The class for tuning Extra Trees Regressor for time-series forecasting"""
base_class = ExtraTreesEstimator
class XGBoostLimitDepth_TS_Regressor(TS_SKLearn_Regressor):
""" The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting"""
"""The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting"""
base_class = XGBoostLimitDepthEstimator

View File

@ -16,15 +16,15 @@ from ..data import (
def load_default_huggingface_metric_for_task(task):
if task == SEQCLASSIFICATION:
return "accuracy", "max"
return "accuracy"
elif task == SEQREGRESSION:
return "rmse", "max"
return "rmse"
elif task == SUMMARIZATION:
return "rouge", "max"
return "rouge"
elif task == MULTICHOICECLASSIFICATION:
return "accuracy", "max"
return "accuracy"
elif task == TOKENCLASSIFICATION:
return "seqeval", "max"
return "seqeval"
global tokenized_column_names