diff --git a/flaml/automl.py b/flaml/automl.py index 9a668d2e6..7c2d05edd 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -73,7 +73,9 @@ class SearchState: self.total_time_used - self.time_best_found, ) - def __init__(self, learner_class, data_size, task, starting_point=None, period=None): + def __init__( + self, learner_class, data_size, task, starting_point=None, period=None + ): self.init_eci = learner_class.cost_relative2lgbm() self._search_space_domain = {} self.init_config = {} @@ -83,7 +85,9 @@ class SearchState: self.ls_ever_converged = False self.learner_class = learner_class if task == TS_FORECAST: - search_space = learner_class.search_space(data_size=data_size, task=task, pred_horizon=period) + search_space = learner_class.search_space( + data_size=data_size, task=task, pred_horizon=period + ) else: search_space = learner_class.search_space(data_size=data_size, task=task) for name, space in search_space.items(): @@ -820,7 +824,11 @@ class AutoML(BaseEstimator): dataframe[dataframe.columns[0]].dtype.name == "datetime64[ns]" ), f"For '{TS_FORECAST}' task, the first column must contain timestamp values." if y_train_all is not None: - y_df = pd.DataFrame(y_train_all) if isinstance(y_train_all, pd.Series) else pd.DataFrame(y_train_all, columns=['labels']) + y_df = ( + pd.DataFrame(y_train_all) + if isinstance(y_train_all, pd.Series) + else pd.DataFrame(y_train_all, columns=["labels"]) + ) dataframe = dataframe.join(y_df) duplicates = dataframe.duplicated() if any(duplicates): @@ -881,7 +889,9 @@ class AutoML(BaseEstimator): self._nrow, self._ndim = X_train_all.shape if self._state.task == TS_FORECAST: X_train_all = pd.DataFrame(X_train_all) - X_train_all, y_train_all = self._validate_ts_data(X_train_all, y_train_all) + X_train_all, y_train_all = self._validate_ts_data( + X_train_all, y_train_all + ) X, y = X_train_all, y_train_all elif dataframe is not None and label is not None: assert isinstance( diff --git a/flaml/model.py b/flaml/model.py index 8736db77f..24d3ba278 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -1790,24 +1790,26 @@ class SARIMAX(ARIMA): class TS_SKLearn_Regressor(SKLearnEstimator): - """ The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball""" + """The class for tuning SKLearn Regressors for time-series forecasting, using hcrystalball""" base_class = SKLearnEstimator @classmethod def search_space(cls, data_size, pred_horizon, **params): space = cls.base_class.search_space(data_size, **params) - space.update({ - "optimize_for_horizon": { - "domain": tune.choice([True, False]), - "init_value": False, - "low_cost_init_value": False, - }, - "lags": { - "domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon), - "init_value": 3, - }, - }) + space.update( + { + "optimize_for_horizon": { + "domain": tune.choice([True, False]), + "init_value": False, + "low_cost_init_value": False, + }, + "lags": { + "domain": tune.randint(lower=1, upper=data_size[0] - pred_horizon), + "init_value": 3, + }, + } + ) return space def __init__(self, task=TS_FORECAST, **params): @@ -1841,13 +1843,23 @@ class TS_SKLearn_Regressor(SKLearnEstimator): # Direct Multi-step Forecast Strategy - fit a seperate model for each horizon model_list = [] for i in range(1, kwargs["period"] + 1): - X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, i) + ( + X_fit, + y_fit, + ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( + X_train, y_train, i + ) self.hcrystaball_model.model.set_params(**estimator.params) model = self.hcrystaball_model.model.fit(X_fit, y_fit) model_list.append(model) self._model = model_list else: - X_fit, y_fit = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_train, y_train, kwargs["period"]) + ( + X_fit, + y_fit, + ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( + X_train, y_train, kwargs["period"] + ) self.hcrystaball_model.model.set_params(**estimator.params) model = self.hcrystaball_model.model.fit(X_fit, y_fit) self._model = model @@ -1863,18 +1875,30 @@ class TS_SKLearn_Regressor(SKLearnEstimator): X_test = self.transform_X(X_test) X_test = self._preprocess(X_test) if isinstance(self._model, list): - assert ( - len(self._model) == len(X_test) + assert len(self._model) == len( + X_test ), "Model is optimized for horizon, length of X_test must be equal to `period`." preds = [] for i in range(1, len(self._model) + 1): - X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test.iloc[:i, :]) + ( + X_pred, + _, + ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( + X_test.iloc[:i, :] + ) preds.append(self._model[i - 1].predict(X_pred)[-1]) - forecast = pd.DataFrame(data=np.asarray(preds).reshape(-1, 1), - columns=[self.hcrystaball_model.name], - index=X_test.index) + forecast = pd.DataFrame( + data=np.asarray(preds).reshape(-1, 1), + columns=[self.hcrystaball_model.name], + index=X_test.index, + ) else: - X_pred, _ = self.hcrystaball_model._transform_data_to_tsmodel_input_format(X_test) + ( + X_pred, + _, + ) = self.hcrystaball_model._transform_data_to_tsmodel_input_format( + X_test + ) forecast = self._model.predict(X_pred) return forecast else: @@ -1885,35 +1909,36 @@ class TS_SKLearn_Regressor(SKLearnEstimator): class LGBM_TS_Regressor(TS_SKLearn_Regressor): - """ The class for tuning LGBM Regressor for time-series forecasting""" + """The class for tuning LGBM Regressor for time-series forecasting""" base_class = LGBMEstimator class XGBoost_TS_Regressor(TS_SKLearn_Regressor): - """ The class for tuning XGBoost Regressor for time-series forecasting""" + """The class for tuning XGBoost Regressor for time-series forecasting""" base_class = XGBoostSklearnEstimator + # catboost regressor is invalid because it has a `name` parameter, making it incompatible with hcrystalball # class CatBoost_TS_Regressor(TS_Regressor): # base_class = CatBoostEstimator class RF_TS_Regressor(TS_SKLearn_Regressor): - """ The class for tuning Random Forest Regressor for time-series forecasting""" + """The class for tuning Random Forest Regressor for time-series forecasting""" base_class = RandomForestEstimator class ExtraTrees_TS_Regressor(TS_SKLearn_Regressor): - """ The class for tuning Extra Trees Regressor for time-series forecasting""" + """The class for tuning Extra Trees Regressor for time-series forecasting""" base_class = ExtraTreesEstimator class XGBoostLimitDepth_TS_Regressor(TS_SKLearn_Regressor): - """ The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting""" + """The class for tuning XGBoost Regressor with unlimited depth for time-series forecasting""" base_class = XGBoostLimitDepthEstimator diff --git a/flaml/nlp/utils.py b/flaml/nlp/utils.py index 43b08c425..c714c054a 100644 --- a/flaml/nlp/utils.py +++ b/flaml/nlp/utils.py @@ -16,15 +16,15 @@ from ..data import ( def load_default_huggingface_metric_for_task(task): if task == SEQCLASSIFICATION: - return "accuracy", "max" + return "accuracy" elif task == SEQREGRESSION: - return "rmse", "max" + return "rmse" elif task == SUMMARIZATION: - return "rouge", "max" + return "rouge" elif task == MULTICHOICECLASSIFICATION: - return "accuracy", "max" + return "accuracy" elif task == TOKENCLASSIFICATION: - return "seqeval", "max" + return "seqeval" global tokenized_column_names