mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-08 07:46:24 +00:00
fixing auto metric bug (#387)
This commit is contained in:
parent
d4273669e6
commit
c54c1246c6
@ -73,7 +73,9 @@ class SearchState:
|
|||||||
self.total_time_used - self.time_best_found,
|
self.total_time_used - self.time_best_found,
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, learner_class, data_size, task, starting_point=None, period=None):
|
def __init__(
|
||||||
|
self, learner_class, data_size, task, starting_point=None, period=None
|
||||||
|
):
|
||||||
self.init_eci = learner_class.cost_relative2lgbm()
|
self.init_eci = learner_class.cost_relative2lgbm()
|
||||||
self._search_space_domain = {}
|
self._search_space_domain = {}
|
||||||
self.init_config = {}
|
self.init_config = {}
|
||||||
@ -83,7 +85,9 @@ class SearchState:
|
|||||||
self.ls_ever_converged = False
|
self.ls_ever_converged = False
|
||||||
self.learner_class = learner_class
|
self.learner_class = learner_class
|
||||||
if task == TS_FORECAST:
|
if task == TS_FORECAST:
|
||||||
search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period)
|
search_space = learner_class.search_space(
|
||||||
|
data_size=data_size, task=task, pred_horizon=period
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
search_space = learner_class.search_space(data_size=data_size, task=task)
|
search_space = learner_class.search_space(data_size=data_size, task=task)
|
||||||
for name, space in search_space.items():
|
for name, space in search_space.items():
|
||||||
@ -820,7 +824,11 @@ class AutoML(BaseEstimator):
|
|||||||
dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]"
|
dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]"
|
||||||
), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
|
), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
|
||||||
if y_train_all is not None:
|
if y_train_all is not None:
|
||||||
y_df = pd.DataFrame(y_train_all) if isinstance(y_train_all, pd.Series) else pd.DataFrame(y_train_all, columns=['labels'])
|
y_df = (
|
||||||
|
pd.DataFrame(y_train_all)
|
||||||
|
if isinstance(y_train_all, pd.Series)
|
||||||
|
else pd.DataFrame(y_train_all, columns=["labels"])
|
||||||
|
)
|
||||||
dataframe = dataframe.join(y_df)
|
dataframe = dataframe.join(y_df)
|
||||||
duplicates = dataframe.duplicated()
|
duplicates = dataframe.duplicated()
|
||||||
if any(duplicates):
|
if any(duplicates):
|
||||||
@ -881,7 +889,9 @@ class AutoML(BaseEstimator):
|
|||||||
self._nrow, self._ndim = X_train_all.shape
|
self._nrow, self._ndim = X_train_all.shape
|
||||||
if self._state.task == TS_FORECAST:
|
if self._state.task == TS_FORECAST:
|
||||||
X_train_all = pd.DataFrame(X_train_all)
|
X_train_all = pd.DataFrame(X_train_all)
|
||||||
X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all)
|
X_train_all, y_train_all = self._validate_ts_data(
|
||||||
|
X_train_all, y_train_all
|
||||||
|
)
|
||||||
X, y = X_train_all, y_train_all
|
X, y = X_train_all, y_train_all
|
||||||
elif dataframe is not None and label is not None:
|
elif dataframe is not None and label is not None:
|
||||||
assert isinstance(
|
assert isinstance(
|
||||||
|
@ -1790,24 +1790,26 @@ class SARIMAX(ARIMA):
|
|||||||
|
|
||||||
|
|
||||||
class TS_SKLearn_Regressor(SKLearnEstimator):
|
class TS_SKLearn_Regressor(SKLearnEstimator):
|
||||||
""" The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball"""
|
"""The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball"""
|
||||||
|
|
||||||
base_class = SKLearnEstimator
|
base_class = SKLearnEstimator
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def search_space(cls, data_size, pred_horizon, **params):
|
def search_space(cls, data_size, pred_horizon, **params):
|
||||||
space = cls.base_class.search_space(data_size, **params)
|
space = cls.base_class.search_space(data_size, **params)
|
||||||
space.update({
|
space.update(
|
||||||
"optimize_for_horizon": {
|
{
|
||||||
"domain": tune.choice([True, False]),
|
"optimize_for_horizon": {
|
||||||
"init_value": False,
|
"domain": tune.choice([True, False]),
|
||||||
"low_cost_init_value": False,
|
"init_value": False,
|
||||||
},
|
"low_cost_init_value": False,
|
||||||
"lags": {
|
},
|
||||||
"domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon),
|
"lags": {
|
||||||
"init_value": 3,
|
"domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon),
|
||||||
},
|
"init_value": 3,
|
||||||
})
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
return space
|
return space
|
||||||
|
|
||||||
def __init__(self, task=TS_FORECAST, **params):
|
def __init__(self, task=TS_FORECAST, **params):
|
||||||
@ -1841,13 +1843,23 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
|||||||
# Direct Multi-step Forecast Strategy - fit a seperate model for each horizon
|
# Direct Multi-step Forecast Strategy - fit a seperate model for each horizon
|
||||||
model_list = []
|
model_list = []
|
||||||
for i in range(1, kwargs["period"] + 1):
|
for i in range(1, kwargs["period"] + 1):
|
||||||
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i)
|
(
|
||||||
|
X_fit,
|
||||||
|
y_fit,
|
||||||
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||||
|
X_train, y_train, i
|
||||||
|
)
|
||||||
self.hcrystaball_model.model.set_params(**estimator.params)
|
self.hcrystaball_model.model.set_params(**estimator.params)
|
||||||
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
||||||
model_list.append(model)
|
model_list.append(model)
|
||||||
self._model = model_list
|
self._model = model_list
|
||||||
else:
|
else:
|
||||||
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"])
|
(
|
||||||
|
X_fit,
|
||||||
|
y_fit,
|
||||||
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||||
|
X_train, y_train, kwargs["period"]
|
||||||
|
)
|
||||||
self.hcrystaball_model.model.set_params(**estimator.params)
|
self.hcrystaball_model.model.set_params(**estimator.params)
|
||||||
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
||||||
self._model = model
|
self._model = model
|
||||||
@ -1863,18 +1875,30 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
|||||||
X_test = self.transform_X(X_test)
|
X_test = self.transform_X(X_test)
|
||||||
X_test = self._preprocess(X_test)
|
X_test = self._preprocess(X_test)
|
||||||
if isinstance(self._model, list):
|
if isinstance(self._model, list):
|
||||||
assert (
|
assert len(self._model) == len(
|
||||||
len(self._model) == len(X_test)
|
X_test
|
||||||
), "Model is optimized for horizon, length of X_test must be equal to `period`."
|
), "Model is optimized for horizon, length of X_test must be equal to `period`."
|
||||||
preds = []
|
preds = []
|
||||||
for i in range(1, len(self._model) + 1):
|
for i in range(1, len(self._model) + 1):
|
||||||
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test.iloc[:i, :])
|
(
|
||||||
|
X_pred,
|
||||||
|
_,
|
||||||
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||||
|
X_test.iloc[:i, :]
|
||||||
|
)
|
||||||
preds.append(self._model[i - 1].predict(X_pred)[-1])
|
preds.append(self._model[i - 1].predict(X_pred)[-1])
|
||||||
forecast = pd.DataFrame(data=np.asarray(preds).reshape(-1, 1),
|
forecast = pd.DataFrame(
|
||||||
columns=[self.hcrystaball_model.name],
|
data=np.asarray(preds).reshape(-1, 1),
|
||||||
index=X_test.index)
|
columns=[self.hcrystaball_model.name],
|
||||||
|
index=X_test.index,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test)
|
(
|
||||||
|
X_pred,
|
||||||
|
_,
|
||||||
|
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||||
|
X_test
|
||||||
|
)
|
||||||
forecast = self._model.predict(X_pred)
|
forecast = self._model.predict(X_pred)
|
||||||
return forecast
|
return forecast
|
||||||
else:
|
else:
|
||||||
@ -1885,35 +1909,36 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
|||||||
|
|
||||||
|
|
||||||
class LGBM_TS_Regressor(TS_SKLearn_Regressor):
|
class LGBM_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
""" The class for tuning LGBM Regressor for time-series forecasting"""
|
"""The class for tuning LGBM Regressor for time-series forecasting"""
|
||||||
|
|
||||||
base_class = LGBMEstimator
|
base_class = LGBMEstimator
|
||||||
|
|
||||||
|
|
||||||
class XGBoost_TS_Regressor(TS_SKLearn_Regressor):
|
class XGBoost_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
""" The class for tuning XGBoost Regressor for time-series forecasting"""
|
"""The class for tuning XGBoost Regressor for time-series forecasting"""
|
||||||
|
|
||||||
base_class = XGBoostSklearnEstimator
|
base_class = XGBoostSklearnEstimator
|
||||||
|
|
||||||
|
|
||||||
# catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball
|
# catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball
|
||||||
# class CatBoost_TS_Regressor(TS_Regressor):
|
# class CatBoost_TS_Regressor(TS_Regressor):
|
||||||
# base_class = CatBoostEstimator
|
# base_class = CatBoostEstimator
|
||||||
|
|
||||||
|
|
||||||
class RF_TS_Regressor(TS_SKLearn_Regressor):
|
class RF_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
""" The class for tuning Random Forest Regressor for time-series forecasting"""
|
"""The class for tuning Random Forest Regressor for time-series forecasting"""
|
||||||
|
|
||||||
base_class = RandomForestEstimator
|
base_class = RandomForestEstimator
|
||||||
|
|
||||||
|
|
||||||
class ExtraTrees_TS_Regressor(TS_SKLearn_Regressor):
|
class ExtraTrees_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
""" The class for tuning Extra Trees Regressor for time-series forecasting"""
|
"""The class for tuning Extra Trees Regressor for time-series forecasting"""
|
||||||
|
|
||||||
base_class = ExtraTreesEstimator
|
base_class = ExtraTreesEstimator
|
||||||
|
|
||||||
|
|
||||||
class XGBoostLimitDepth_TS_Regressor(TS_SKLearn_Regressor):
|
class XGBoostLimitDepth_TS_Regressor(TS_SKLearn_Regressor):
|
||||||
""" The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting"""
|
"""The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting"""
|
||||||
|
|
||||||
base_class = XGBoostLimitDepthEstimator
|
base_class = XGBoostLimitDepthEstimator
|
||||||
|
|
||||||
|
@ -16,15 +16,15 @@ from ..data import (
|
|||||||
def load_default_huggingface_metric_for_task(task):
|
def load_default_huggingface_metric_for_task(task):
|
||||||
|
|
||||||
if task == SEQCLASSIFICATION:
|
if task == SEQCLASSIFICATION:
|
||||||
return "accuracy", "max"
|
return "accuracy"
|
||||||
elif task == SEQREGRESSION:
|
elif task == SEQREGRESSION:
|
||||||
return "rmse", "max"
|
return "rmse"
|
||||||
elif task == SUMMARIZATION:
|
elif task == SUMMARIZATION:
|
||||||
return "rouge", "max"
|
return "rouge"
|
||||||
elif task == MULTICHOICECLASSIFICATION:
|
elif task == MULTICHOICECLASSIFICATION:
|
||||||
return "accuracy", "max"
|
return "accuracy"
|
||||||
elif task == TOKENCLASSIFICATION:
|
elif task == TOKENCLASSIFICATION:
|
||||||
return "seqeval", "max"
|
return "seqeval"
|
||||||
|
|
||||||
|
|
||||||
global tokenized_column_names
|
global tokenized_column_names
|
||||||
|
Loading…
x
Reference in New Issue
Block a user