mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-08 07:46:24 +00:00
fixing auto metric bug (#387)
This commit is contained in:
parent
d4273669e6
commit
c54c1246c6
@ -73,7 +73,9 @@ class SearchState:
|
||||
self.total_time_used - self.time_best_found,
|
||||
)
|
||||
|
||||
def __init__(self, learner_class, data_size, task, starting_point=None, period=None):
|
||||
def __init__(
|
||||
self, learner_class, data_size, task, starting_point=None, period=None
|
||||
):
|
||||
self.init_eci = learner_class.cost_relative2lgbm()
|
||||
self._search_space_domain = {}
|
||||
self.init_config = {}
|
||||
@ -83,7 +85,9 @@ class SearchState:
|
||||
self.ls_ever_converged = False
|
||||
self.learner_class = learner_class
|
||||
if task == TS_FORECAST:
|
||||
search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period)
|
||||
search_space = learner_class.search_space(
|
||||
data_size=data_size, task=task, pred_horizon=period
|
||||
)
|
||||
else:
|
||||
search_space = learner_class.search_space(data_size=data_size, task=task)
|
||||
for name, space in search_space.items():
|
||||
@ -820,7 +824,11 @@ class AutoML(BaseEstimator):
|
||||
dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]"
|
||||
), f"For '{TS_FORECAST}' task, the first column must contain timestamp values."
|
||||
if y_train_all is not None:
|
||||
y_df = pd.DataFrame(y_train_all) if isinstance(y_train_all, pd.Series) else pd.DataFrame(y_train_all, columns=['labels'])
|
||||
y_df = (
|
||||
pd.DataFrame(y_train_all)
|
||||
if isinstance(y_train_all, pd.Series)
|
||||
else pd.DataFrame(y_train_all, columns=["labels"])
|
||||
)
|
||||
dataframe = dataframe.join(y_df)
|
||||
duplicates = dataframe.duplicated()
|
||||
if any(duplicates):
|
||||
@ -881,7 +889,9 @@ class AutoML(BaseEstimator):
|
||||
self._nrow, self._ndim = X_train_all.shape
|
||||
if self._state.task == TS_FORECAST:
|
||||
X_train_all = pd.DataFrame(X_train_all)
|
||||
X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all)
|
||||
X_train_all, y_train_all = self._validate_ts_data(
|
||||
X_train_all, y_train_all
|
||||
)
|
||||
X, y = X_train_all, y_train_all
|
||||
elif dataframe is not None and label is not None:
|
||||
assert isinstance(
|
||||
|
@ -1797,7 +1797,8 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
||||
@classmethod
|
||||
def search_space(cls, data_size, pred_horizon, **params):
|
||||
space = cls.base_class.search_space(data_size, **params)
|
||||
space.update({
|
||||
space.update(
|
||||
{
|
||||
"optimize_for_horizon": {
|
||||
"domain": tune.choice([True, False]),
|
||||
"init_value": False,
|
||||
@ -1807,7 +1808,8 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
||||
"domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon),
|
||||
"init_value": 3,
|
||||
},
|
||||
})
|
||||
}
|
||||
)
|
||||
return space
|
||||
|
||||
def __init__(self, task=TS_FORECAST, **params):
|
||||
@ -1841,13 +1843,23 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
||||
# Direct Multi-step Forecast Strategy - fit a seperate model for each horizon
|
||||
model_list = []
|
||||
for i in range(1, kwargs["period"] + 1):
|
||||
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i)
|
||||
(
|
||||
X_fit,
|
||||
y_fit,
|
||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||
X_train, y_train, i
|
||||
)
|
||||
self.hcrystaball_model.model.set_params(**estimator.params)
|
||||
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
||||
model_list.append(model)
|
||||
self._model = model_list
|
||||
else:
|
||||
X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"])
|
||||
(
|
||||
X_fit,
|
||||
y_fit,
|
||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||
X_train, y_train, kwargs["period"]
|
||||
)
|
||||
self.hcrystaball_model.model.set_params(**estimator.params)
|
||||
model = self.hcrystaball_model.model.fit(X_fit, y_fit)
|
||||
self._model = model
|
||||
@ -1863,18 +1875,30 @@ class TS_SKLearn_Regressor(SKLearnEstimator):
|
||||
X_test = self.transform_X(X_test)
|
||||
X_test = self._preprocess(X_test)
|
||||
if isinstance(self._model, list):
|
||||
assert (
|
||||
len(self._model) == len(X_test)
|
||||
assert len(self._model) == len(
|
||||
X_test
|
||||
), "Model is optimized for horizon, length of X_test must be equal to `period`."
|
||||
preds = []
|
||||
for i in range(1, len(self._model) + 1):
|
||||
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test.iloc[:i, :])
|
||||
(
|
||||
X_pred,
|
||||
_,
|
||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||
X_test.iloc[:i, :]
|
||||
)
|
||||
preds.append(self._model[i - 1].predict(X_pred)[-1])
|
||||
forecast = pd.DataFrame(data=np.asarray(preds).reshape(-1, 1),
|
||||
forecast = pd.DataFrame(
|
||||
data=np.asarray(preds).reshape(-1, 1),
|
||||
columns=[self.hcrystaball_model.name],
|
||||
index=X_test.index)
|
||||
index=X_test.index,
|
||||
)
|
||||
else:
|
||||
X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test)
|
||||
(
|
||||
X_pred,
|
||||
_,
|
||||
) = self.hcrystaball_model._transform_data_to_tsmodel_input_format(
|
||||
X_test
|
||||
)
|
||||
forecast = self._model.predict(X_pred)
|
||||
return forecast
|
||||
else:
|
||||
@ -1895,6 +1919,7 @@ class XGBoost_TS_Regressor(TS_SKLearn_Regressor):
|
||||
|
||||
base_class = XGBoostSklearnEstimator
|
||||
|
||||
|
||||
# catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball
|
||||
# class CatBoost_TS_Regressor(TS_Regressor):
|
||||
# base_class = CatBoostEstimator
|
||||
|
@ -16,15 +16,15 @@ from ..data import (
|
||||
def load_default_huggingface_metric_for_task(task):
|
||||
|
||||
if task == SEQCLASSIFICATION:
|
||||
return "accuracy", "max"
|
||||
return "accuracy"
|
||||
elif task == SEQREGRESSION:
|
||||
return "rmse", "max"
|
||||
return "rmse"
|
||||
elif task == SUMMARIZATION:
|
||||
return "rouge", "max"
|
||||
return "rouge"
|
||||
elif task == MULTICHOICECLASSIFICATION:
|
||||
return "accuracy", "max"
|
||||
return "accuracy"
|
||||
elif task == TOKENCLASSIFICATION:
|
||||
return "seqeval", "max"
|
||||
return "seqeval"
|
||||
|
||||
|
||||
global tokenized_column_names
|
||||
|
Loading…
x
Reference in New Issue
Block a user