Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>
2025-12-13 16:01:10 +00:00 · 2021-08-23 16:26:46 -04:00 · 2021-08-23 16:26:46 -04:00 · 3d0a3d26a2
commit 3d0a3d26a2
parent 6270353458
14 changed files with 2613 additions and 1021 deletions
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -41,7 +41,7 @@ jobs:
      - name: If linux or mac, install ray
        if: (matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest') && matrix.python-version != '3.9'
        run: |
-          pip install -e .[ray]
+          pip install -e .[ray,forecast]
          pip install 'tensorboardX<=2.2'
      - name: Lint with flake8
        run: |
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -10,7 +10,7 @@ from functools import partial
 import numpy as np
 from scipy.sparse import issparse
 from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold, \
-    RepeatedKFold, GroupKFold
+    RepeatedKFold, GroupKFold, TimeSeriesSplit
 from sklearn.utils import shuffle
 import pandas as pd

@ -25,6 +25,7 @@ from . import tune
 from .training_log import training_log_reader, training_log_writer

 import logging
+
 logger = logging.getLogger(__name__)
 logger_formatter = logging.Formatter(
    '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
@ -360,11 +361,15 @@ class AutoML:
            return self._trained_estimator.classes_.tolist()
        return None

-    def predict(self, X_test):
+    def predict(self, X_test, freq=None):
        '''Predict label from features.

        Args:
-            X_test: A numpy array of featurized instances, shape n * m.
+            X_test: A numpy array of featurized instances, shape n * m,
+            or a pandas dataframe with one column with timestamp values
+            for 'forecasting' task.
+            freq: str or pandas offset, default=None | The frequency of the
+            time-series.

        Returns:
            A numpy array of shape n * 1 - - each element is a predicted class
@ -375,8 +380,14 @@ class AutoML:
                "No estimator is trained. Please run fit with enough budget.")
            return None
        X_test = self._preprocess(X_test)
-        y_pred = self._trained_estimator.predict(X_test)
-        if y_pred.ndim > 1:
+        if self._state.task == 'forecast':
+            X_test_df = pd.DataFrame(X_test)
+            X_test_col = list(X_test.columns)[0]
+            X_test_df = X_test_df.rename(columns={X_test_col: 'ds'})
+            y_pred = self._trained_estimator.predict(X_test_df, freq=freq)
+        else:
+            y_pred = self._trained_estimator.predict(X_test)
+        if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
            y_pred = y_pred.flatten()
        if self._label_transformer:
            return self._label_transformer.inverse_transform(pd.Series(
@ -408,6 +419,25 @@ class AutoML:

    def _validate_data(self, X_train_all, y_train_all, dataframe, label,
                       X_val=None, y_val=None):
+        if self._state.task == 'forecast':
+            if dataframe is not None and label is not None:
+                dataframe = dataframe.copy()
+                dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'})
+            elif dataframe is not None:
+                if ('ds' not in dataframe) or ('y' not in dataframe):
+                    raise ValueError(
+                        'For forecasting task, Dataframe must have columns "ds" and "y" '
+                        'with the dates and values respectively.'
+                    )
+            elif (X_train_all is not None) and (y_train_all is not None):
+                dataframe = pd.DataFrame(X_train_all)
+                time_col = list(dataframe.columns)[0]
+                dataframe = dataframe.rename(columns={time_col: 'ds'})
+                dataframe['y'] = pd.Series(y_train_all)
+                X_train_all = None
+                y_train_all = None
+            label = 'y'
+
        if X_train_all is not None and y_train_all is not None:
            if not (isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
                    or isinstance(X_train_all, pd.DataFrame)):
@ -440,7 +470,7 @@ class AutoML:
        else:
            raise ValueError(
                "either X_train+y_train or dataframe+label are required")
-        if issparse(X_train_all):
+        if issparse(X_train_all) or self._state.task == 'forecast':
            self._transformer = self._label_transformer = False
            self._X_train_all, self._y_train_all = X, y
        else:
@ -482,7 +512,8 @@ class AutoML:
    def _prepare_data(self,
                      eval_method,
                      split_ratio,
-                      n_splits):
+                      n_splits,
+                      period=None):
        X_val, y_val = self._state.X_val, self._state.y_val
        if issparse(X_val):
            X_val = X_val.tocsr()
@ -490,8 +521,9 @@ class AutoML:
            self._X_train_all, self._y_train_all
        if issparse(X_train_all):
            X_train_all = X_train_all.tocsr()
-        if self._state.task != 'regression' and self._state.fit_kwargs.get(
-                'sample_weight') is None:
+        if (self._state.task == 'binary:logistic' or self._state.task == 'multi:softmax') \
+                and self._state.fit_kwargs.get('sample_weight') is None \
+                and self._split_type != 'time':
            # logger.info(f"label {pd.unique(y_train_all)}")
            label_set, counts = np.unique(y_train_all, return_counts=True)
            # augment rare classes
@ -518,19 +550,21 @@ class AutoML:
                    count += rare_count
                logger.info(
                    f"class {label} augmented from {rare_count} to {count}")
-        if 'sample_weight' in self._state.fit_kwargs:
-            X_train_all, y_train_all, self._state.fit_kwargs[
-                'sample_weight'] = shuffle(
+        SHUFFLE_SPLIT_TYPES = ['uniform', 'stratified']
+        if self._split_type in SHUFFLE_SPLIT_TYPES:
+            if 'sample_weight' in self._state.fit_kwargs:
+                X_train_all, y_train_all, self._state.fit_kwargs[
+                    'sample_weight'] = shuffle(
                    X_train_all, y_train_all,
                    self._state.fit_kwargs['sample_weight'],
                    random_state=RANDOM_SEED)
-        elif hasattr(self._state, 'groups') and self._state.groups is not None:
-            X_train_all, y_train_all, self._state.groups = shuffle(
-                X_train_all, y_train_all, self._state.groups,
-                random_state=RANDOM_SEED)
-        else:
-            X_train_all, y_train_all = shuffle(
-                X_train_all, y_train_all, random_state=RANDOM_SEED)
+            elif hasattr(self._state, 'groups') and self._state.groups is not None:
+                X_train_all, y_train_all, self._state.groups = shuffle(
+                    X_train_all, y_train_all, self._state.groups,
+                    random_state=RANDOM_SEED)
+            else:
+                X_train_all, y_train_all = shuffle(
+                    X_train_all, y_train_all, random_state=RANDOM_SEED)
        if self._df:
            X_train_all.reset_index(drop=True, inplace=True)
            if isinstance(y_train_all, pd.Series):
@ -539,7 +573,31 @@ class AutoML:
        X_train, y_train = X_train_all, y_train_all
        if X_val is None:
            # if eval_method = holdout, make holdout data
-            if self._state.task != 'regression' and eval_method == 'holdout':
+            if eval_method == 'holdout' and self._split_type == 'time':
+                if 'period' in self._state.fit_kwargs:
+                    num_samples = X_train_all.shape[0]
+                    split_idx = num_samples - self._state.fit_kwargs.get('period')
+                    X_train = X_train_all[:split_idx]
+                    y_train = y_train_all[:split_idx]
+                    X_val = X_train_all[split_idx:]
+                    y_val = y_train_all[split_idx:]
+                else:
+                    if 'sample_weight' in self._state.fit_kwargs:
+                        X_train, X_val, y_train, y_val, self._state.fit_kwargs[
+                            'sample_weight'], self._state.weight_val = \
+                            train_test_split(
+                                X_train_all,
+                                y_train_all,
+                                self._state.fit_kwargs['sample_weight'],
+                                test_size=split_ratio,
+                                shuffle=False)
+                    else:
+                        X_train, X_val, y_train, y_val = train_test_split(
+                            X_train_all,
+                            y_train_all,
+                            test_size=split_ratio,
+                            shuffle=False)
+            elif self._state.task != 'regression' and eval_method == 'holdout':
                # for classification, make sure the labels are complete in both
                # training and validation data
                label_set, first = np.unique(y_train_all, return_index=True)
@ -624,6 +682,13 @@ class AutoML:
                f"requires input data with at least {n_splits*2} examples.")
            self._state.kf = RepeatedStratifiedKFold(
                n_splits=n_splits, n_repeats=1, random_state=RANDOM_SEED)
+        elif self._split_type == "time":
+            logger.info("Using TimeSeriesSplit")
+            if self._state.task == 'forecast':
+                self._state.kf = TimeSeriesSplit(
+                    n_splits=n_splits, test_size=self._state.fit_kwargs.get('period'))
+            else:
+                self._state.kf = TimeSeriesSplit(n_splits=n_splits)
        else:
            logger.info("Using RepeatedKFold")
            self._state.kf = RepeatedKFold(
@ -762,10 +827,15 @@ class AutoML:
        if self._state.task == 'classification':
            self._state.task = get_classification_objective(
                len(np.unique(self._y_train_all)))
-            assert split_type in ["stratified", "uniform"]
+            assert split_type in ["stratified", "uniform", "time"]
            self._split_type = split_type
-        else:
-            self._split_type = "uniform"
+        elif self._state.task == 'regression':
+            if split_type in ["uniform", "time"]:
+                self._split_type = split_type
+            else:
+                self._split_type = "uniform"
+        elif self._state.task == 'forecast':
+            self._split_type = "time"
        if record_id >= 0:
            eval_method = 'cv'
        elif eval_method == 'auto':
@ -1011,15 +1081,22 @@ class AutoML:
        Args:
            X_train: A numpy array or a pandas dataframe of training data in
                shape (n, m)
+                For 'forecast' task, X_train should be timestamp
            y_train: A numpy array or a pandas series of labels in shape (n,)
+                For 'forecast' task, y_train should be value
            dataframe: A dataframe of training data including label column
-            label: A str of the label column name
+                For 'forecast' task, dataframe must be specified and should
+                have two columns: timestamp and value
+            label: A str of the label column name for 'classification' or
+                'regression' task or a tuple of strings for timestamp and
+                value columns for 'forecasting' task
                Note: If X_train and y_train are provided,
                dataframe and label are ignored;
                If not, dataframe and label must be provided.
            metric: A string of the metric name or a function,
                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
-                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2'
+                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mape', 'mae', 'mse', 'r2'
+                for 'forecast' task, use 'mape'
                if passing a customized metric function, the function needs to
                have the follwing signature:

@ -1034,7 +1111,7 @@ class AutoML:
                which returns a float number as the minimization objective,
                and a tuple of floats or a dictionary as the metrics to log
            task: A string of the task type, e.g.,
-                'classification', 'regression'
+                'classification', 'regression', 'forecast'
            n_jobs: An integer of the number of threads for training
            log_file_name: A string of the log file name
            estimator_list: A list of strings for estimator names, or 'auto'
@ -1085,7 +1162,8 @@ class AutoML:
                hyperparamter configurations for the corresponding estimators.
            seed: int or None, default=None | The random seed for np.random.
            **fit_kwargs: Other key word arguments to pass to fit() function of
-                the searched learners, such as sample_weight.
+                the searched learners, such as sample_weight. Include period as
+                a key word argument for 'forecast' task.
        '''
        self._start_time_flag = time.time()
        self._state.task = task
@ -1093,6 +1171,7 @@ class AutoML:
        self._state.fit_kwargs = fit_kwargs
        self._state.weight_val = sample_weight_val
        self._state.groups = groups
+
        self._validate_data(X_train, y_train, dataframe, label, X_val, y_val)
        self._search_states = {}  # key: estimator name; value: SearchState
        self._random = np.random.RandomState(RANDOM_SEED)
@ -1106,10 +1185,19 @@ class AutoML:
        if self._state.task == 'classification':
            self._state.task = get_classification_objective(
                len(np.unique(self._y_train_all)))
-            assert split_type in ["stratified", "uniform"]
+            assert split_type in ["stratified", "uniform", "time"]
            self._split_type = split_type
-        else:
-            self._split_type = "uniform"
+        elif self._state.task == 'regression':
+            if split_type in ["uniform", "time"]:
+                self._split_type = split_type
+            else:
+                self._split_type = "uniform"
+        elif self._state.task == 'forecast':
+            if split_type is not None and split_type != 'time':
+                    raise ValueError("split_type must be 'time' when task is 'forecast'. ")
+            self._split_type = "time"
+        if self._state.task == 'forecast' and self._state.fit_kwargs.get('period') is None:
+            raise TypeError("missing 1 required argument for 'forecast' task: 'period'. ")
        if eval_method == 'auto' or self._state.X_val is not None:
            eval_method = self._decide_eval_method(time_budget)
        self._state.eval_method = eval_method
@ -1122,7 +1210,11 @@ class AutoML:

        self._retrain_full = retrain_full and (
            eval_method == 'holdout' and self._state.X_val is None)
-        self._prepare_data(eval_method, split_ratio, n_splits)
+        if self._state.task != 'forecast':
+            self._prepare_data(eval_method, split_ratio, n_splits)
+        else:
+            self._prepare_data(eval_method, split_ratio, n_splits,
+                               period=self._state.fit_kwargs.get('period'))
        self._sample = sample and eval_method != 'cv' and (
            MIN_SAMPLE_TRAIN * SAMPLE_MULTIPLY_FACTOR < self._state.data_size)
        if 'auto' == metric:
@ -1130,6 +1222,8 @@ class AutoML:
                metric = 'roc_auc'
            elif 'multi' in self._state.task:
                metric = 'log_loss'
+            elif self._state.task == 'forecast':
+                metric = 'mape'
            else:
                metric = 'r2'
        self._state.metric = metric
@ -1146,6 +1240,8 @@ class AutoML:
            estimator_list = ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree']
            if 'regression' != self._state.task:
                estimator_list += ['lrl1']
+            if self._state.task == 'forecast':
+                estimator_list = ['fbprophet', 'arima', 'sarimax']
        for estimator_name in estimator_list:
            if estimator_name not in self._state.learner_classes:
                self.add_learner(
@ -1237,7 +1333,7 @@ class AutoML:
        elif 'bs' == self._hpo_method:
            from flaml import BlendSearch as SearchAlgo
        elif 'cfocat' == self._hpo_method:
-            from flaml import CFOCat as SearchAlgo
+            from flaml.searcher.cfo_cat import CFOCat as SearchAlgo
        else:
            raise NotImplementedError(
                f"hpo_method={self._hpo_method} is not recognized. "
--- a/flaml/data.py
+++ b/flaml/data.py
@ -120,7 +120,7 @@ def get_output_from_log(filename, time_budget):
        time_budget: A float of the time budget in seconds

    Returns:
-        training_time_list: A list of the finished time of each logged iter
+        search_time_list: A list of the finished time of each logged iter
        best_error_list:
            A list of the best validation error after each logged iter
        error_list: A list of the validation error of each logged iter
@ -132,9 +132,8 @@ def get_output_from_log(filename, time_budget):
    best_config = None
    best_learner = None
    best_val_loss = float('+inf')
-    training_duration = 0.0

-    training_time_list = []
+    search_time_list = []
    config_list = []
    best_error_list = []
    error_list = []
@ -143,7 +142,6 @@ def get_output_from_log(filename, time_budget):
    with training_log_reader(filename) as reader:
        for record in reader.records():
            time_used = record.total_search_time
-            training_duration = time_used
            val_loss = record.validation_loss
            config = record.config
            learner = record.learner.split('_')[0]
@ -156,7 +154,7 @@ def get_output_from_log(filename, time_budget):
                    best_config = config
                    best_learner = learner
                    best_config_list.append(best_config)
-                training_time_list.append(training_duration)
+                search_time_list.append(time_used)
                best_error_list.append(best_val_loss)
                logged_metric_list.append(train_loss)
                error_list.append(val_loss)
@ -166,7 +164,7 @@ def get_output_from_log(filename, time_budget):
                                    "Best Learner": best_learner,
                                    "Best Hyper-parameters": best_config})

-    return (training_time_list, best_error_list, error_list, config_list,
+    return (search_time_list, best_error_list, error_list, config_list,
            logged_metric_list)


--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -9,12 +9,12 @@ import numpy as np
 import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
    accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
-    f1_score
-from sklearn.model_selection import RepeatedStratifiedKFold, GroupKFold
+    f1_score, mean_absolute_percentage_error
+from sklearn.model_selection import RepeatedStratifiedKFold, GroupKFold, TimeSeriesSplit
 from .model import (
    XGBoostEstimator, XGBoostSklearnEstimator, RandomForestEstimator,
    LGBMEstimator, LRL1Classifier, LRL2Classifier, CatBoostEstimator,
-    ExtraTreeEstimator, KNeighborsEstimator)
+    ExtraTreeEstimator, KNeighborsEstimator, FBProphet, ARIMA, SARIMAX)

 import logging
 logger = logging.getLogger(__name__)
@ -42,6 +42,12 @@ def get_estimator_class(task, estimator_name):
        estimator_class = ExtraTreeEstimator
    elif 'kneighbor' == estimator_name:
        estimator_class = KNeighborsEstimator
+    elif 'prophet' in estimator_name:
+        estimator_class = FBProphet
+    elif estimator_name == 'arima':
+        estimator_class = ARIMA
+    elif estimator_name == 'sarimax':
+        estimator_class = SARIMAX
    else:
        raise ValueError(
            estimator_name + ' is not a built-in learner. '
@ -57,7 +63,7 @@ def sklearn_metric_loss_score(
    Args:
        metric_name: A string of the metric name, one of
            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
-            'roc_auc_ovo', 'log_loss', 'f1', 'ap', 'micro_f1', 'macro_f1'
+            'roc_auc_ovo', 'log_loss', 'mape', 'f1', 'ap', 'micro_f1', 'macro_f1'
        y_predict: A 1d or 2d numpy array of the predictions which can be
            used to calculate the metric. E.g., 2d for log_loss and 1d
            for others.
@ -95,6 +101,9 @@ def sklearn_metric_loss_score(
    elif 'log_loss' in metric_name:
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
+    elif 'mape' in metric_name:
+        score = mean_absolute_percentage_error(
+            y_true, y_predict)
    elif 'micro_f1' in metric_name:
        score = 1 - f1_score(
            y_true, y_predict, sample_weight=sample_weight, average='micro')
@ -111,18 +120,20 @@ def sklearn_metric_loss_score(
            metric_name + ' is not a built-in metric, '
            'currently built-in metrics are: '
            'r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,'
-            'log_loss, f1, micro_f1, macro_f1, ap. '
+            'log_loss, mape, f1, micro_f1, macro_f1, ap. '
            'please pass a customized metric function to AutoML.fit(metric=func)')
    return score


-def get_y_pred(estimator, X, eval_metric, obj):
+def get_y_pred(estimator, X, eval_metric, obj, freq=None):
    if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
        y_pred_classes = estimator.predict_proba(X)
        y_pred = y_pred_classes[
            :, 1] if y_pred_classes.ndim > 1 else y_pred_classes
    elif eval_metric in ['log_loss', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']:
        y_pred = estimator.predict_proba(X)
+    elif eval_metric == 'mape':
+        y_pred = estimator.predict(X, freq=freq)
    else:
        y_pred = estimator.predict(X)
    return y_pred
@ -201,15 +212,21 @@ def evaluate_model_CV(
    valid_fold_num = total_fold_num = 0
    n = kf.get_n_splits()
    X_train_split, y_train_split = X_train_all, y_train_all
-    if task == 'regression':
-        labels = None
-    else:
+    if task == 'binary:logistics' or task == 'multi:softmax':
        labels = np.unique(y_train_all)
+    else:
+        labels = None

    if isinstance(kf, RepeatedStratifiedKFold):
        kf = kf.split(X_train_split, y_train_split)
    elif isinstance(kf, GroupKFold):
        kf = kf.split(X_train_split, y_train_split, kf.groups)
+    elif isinstance(kf, TimeSeriesSplit) and task == 'forecast':
+        y_train_all = pd.DataFrame(y_train_all, columns=['y'])
+        train = X_train_all.join(y_train_all)
+        kf = kf.split(train)
+    elif isinstance(kf, TimeSeriesSplit):
+        kf = kf.split(X_train_split, y_train_split)
    else:
        kf = kf.split(X_train_split)
    rng = np.random.RandomState(2020)
@ -221,7 +238,8 @@ def evaluate_model_CV(
    else:
        weight = weight_val = None
    for train_index, val_index in kf:
-        train_index = rng.permutation(train_index)
+        if not isinstance(kf, TimeSeriesSplit):
+            train_index = rng.permutation(train_index)
        if isinstance(X_train_all, pd.DataFrame):
            X_train, X_val = X_train_split.iloc[
                train_index], X_train_split.iloc[val_index]
--- a/flaml/model.py
+++ b/flaml/model.py
@ -15,6 +15,7 @@ import pandas as pd
 from . import tune

 import logging
+
 logger = logging.getLogger(__name__)


@ -635,7 +636,6 @@ class LRL2Classifier(SKLearnEstimator):


 class CatBoostEstimator(BaseEstimator):
-
    _time_per_iter = None
    _train_size = 0

@ -834,3 +834,222 @@ class KNeighborsEstimator(BaseEstimator):
            X = X.drop(cat_columns, axis=1)
            X = X.to_numpy()
        return X
+
+
+class FBProphet(BaseEstimator):
+    @classmethod
+    def search_space(cls, **params):
+        space = {
+            'changepoint_prior_scale': {
+                'domain': tune.loguniform(lower=0.001, upper=1000),
+                'init_value': 0.01,
+                'low_cost_init_value': 0.001,
+            },
+            'seasonality_prior_scale': {
+                'domain': tune.loguniform(lower=0.01, upper=100),
+                'init_value': 1,
+            },
+            'holidays_prior_scale': {
+                'domain': tune.loguniform(lower=0.01, upper=100),
+                'init_value': 1,
+            },
+            'seasonality_mode': {
+                'domain': tune.choice(['additive', 'multiplicative']),
+                'init_value': 'multiplicative',
+            }
+        }
+        return space
+
+    def fit(self, X_train, y_train, budget=None, **kwargs):
+        y_train = pd.DataFrame(y_train, columns=['y'])
+        train_df = X_train.join(y_train)
+
+        if ('ds' not in train_df) or ('y' not in train_df):
+            raise ValueError(
+                'Dataframe for training forecast model must have columns "ds" and "y" with the dates and '
+                'values respectively.'
+            )
+
+        if 'n_jobs' in self.params:
+            self.params.pop('n_jobs')
+
+        from prophet import Prophet
+
+        current_time = time.time()
+        model = Prophet(**self.params).fit(train_df)
+        train_time = time.time() - current_time
+        self._model = model
+        return train_time
+
+    def predict(self, X_test, freq=None):
+        if self._model is not None:
+            if isinstance(X_test, int) and freq is not None:
+                future = self._model.make_future_dataframe(periods=X_test, freq=freq)
+                forecast = self._model.predict(future)
+            elif isinstance(X_test, pd.DataFrame):
+                forecast = self._model.predict(X_test)
+            else:
+                raise ValueError(
+                    "either X_test(pd.Dataframe with dates for predictions, column ds) or"
+                    "X_test(int number of periods)+freq are required.")
+            return forecast['yhat']
+        else:
+            return np.ones(X_test.shape[0])
+
+
+class ARIMA(BaseEstimator):
+    @classmethod
+    def search_space(cls, **params):
+        space = {
+            'p': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            },
+            'd': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            },
+            'q': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            }
+        }
+        return space
+
+    def fit(self, X_train, y_train, budget=None, **kwargs):
+        y_train = pd.DataFrame(y_train, columns=['y'])
+        train_df = X_train.join(y_train)
+
+        if ('ds' not in train_df) or ('y' not in train_df):
+            raise ValueError(
+                'Dataframe for training forecast model must have columns "ds" and "y" with the dates and '
+                'values respectively.'
+            )
+
+        train_df.index = pd.to_datetime(train_df['ds'])
+        train_df = train_df.drop('ds', axis=1)
+
+        if 'n_jobs' in self.params:
+            self.params.pop('n_jobs')
+
+        from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
+        import warnings
+        warnings.filterwarnings("ignore")
+
+        current_time = time.time()
+        model = ARIMA_estimator(train_df,
+                                order=(self.params['p'], self.params['d'], self.params['q']),
+                                enforce_stationarity=False,
+                                enforce_invertibility=False)
+
+        model = model.fit()
+        train_time = time.time() - current_time
+        self._model = model
+        return train_time
+
+    def predict(self, X_test, freq=None):
+        if self._model is not None:
+            if isinstance(X_test, int) and freq is not None:
+                forecast = self._model.forecast(steps=X_test).to_frame().reset_index()
+            elif isinstance(X_test, pd.DataFrame):
+                start_date = X_test.iloc[0, 0]
+                end_date = X_test.iloc[-1, 0]
+                forecast = self._model.predict(start=start_date, end=end_date)
+            else:
+                raise ValueError(
+                    "either X_test(pd.Dataframe with dates for predictions, column ds) or"
+                    "X_test(int number of periods)+freq are required.")
+            return forecast
+        else:
+            return np.ones(X_test.shape[0])
+
+
+class SARIMAX(BaseEstimator):
+    @classmethod
+    def search_space(cls, **params):
+        space = {
+            'p': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            },
+            'd': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            },
+            'q': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 2,
+                'low_cost_init_value': 0,
+            },
+            'P': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 1,
+                'low_cost_init_value': 0,
+            },
+            'D': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 1,
+                'low_cost_init_value': 0,
+            },
+            'Q': {
+                'domain': tune.quniform(lower=0, upper=10, q=1),
+                'init_value': 1,
+                'low_cost_init_value': 0,
+            },
+            's': {
+                'domain': tune.choice([1, 4, 6, 12]),
+                'init_value': 12,
+            }
+        }
+        return space
+
+    def fit(self, X_train, y_train, budget=None, **kwargs):
+        y_train = pd.DataFrame(y_train, columns=['y'])
+        train_df = X_train.join(y_train)
+
+        if ('ds' not in train_df) or ('y' not in train_df):
+            raise ValueError(
+                'Dataframe for training forecast model must have columns "ds" and "y" with the dates and '
+                'values respectively.'
+            )
+
+        train_df.index = pd.to_datetime(train_df['ds'])
+        train_df = train_df.drop('ds', axis=1)
+
+        if 'n_jobs' in self.params:
+            self.params.pop('n_jobs')
+
+        from statsmodels.tsa.statespace.sarimax import SARIMAX as SARIMAX_estimator
+
+        current_time = time.time()
+        model = SARIMAX_estimator(train_df,
+                                  order=(self.params['p'], self.params['d'], self.params['q']),
+                                  seasonality_order=(self.params['P'], self.params['D'], self.params['Q'], self.params['s']),
+                                  enforce_stationarity=False,
+                                  enforce_invertibility=False)
+
+        model = model.fit()
+        train_time = time.time() - current_time
+        self._model = model
+        return train_time
+
+    def predict(self, X_test, freq=None):
+        if self._model is not None:
+            if isinstance(X_test, int) and freq is not None:
+                forecast = self._model.forecast(steps=X_test).to_frame().reset_index()
+            elif isinstance(X_test, pd.DataFrame):
+                start_date = X_test.iloc[0, 0]
+                end_date = X_test.iloc[-1, 0]
+                forecast = self._model.predict(start=start_date, end=end_date)
+            else:
+                raise ValueError(
+                    "either X_test(pd.Dataframe with dates for predictions, column ds)"
+                    "or X_test(int number of periods)+freq are required.")
+            return forecast
+        else:
+            return np.ones(X_test.shape[0])
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@ -165,7 +165,8 @@ class BlendSearch(Searcher):
            min_resource, max_resource, reduction_factor, self.cost_attr, seed)
        self._is_ls_ever_converged = False
        self._subspace = {}     # the subspace for each trial id
-        self._init_search()
+        if space:
+            self._init_search()

    def set_search_properties(self,
                              metric: Optional[str] = None,
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.5.12"
+__version__ = "0.5.13"
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
--- a/notebook/flaml_forecast.ipynb
+++ b/notebook/flaml_forecast.ipynb
--- a/notebook/flaml_lightgbm.ipynb
+++ b/notebook/flaml_lightgbm.ipynb
--- a/setup.py
+++ b/setup.py
@ -56,6 +56,7 @@ setuptools.setup(
            "torch==1.8.1",
            "datasets==1.4.1",
            "azure-storage-blob",
+            "statsmodels>=0.12.2"
        ],
        "blendsearch": [
            "optuna==2.8.0"
@ -79,6 +80,10 @@ setuptools.setup(
            "datasets==1.4.1",
            "tensorboardX<=2.2",
            "torch"
+        ],
+        "forecast": [
+            "prophet>=1.0.1",
+            "statsmodels>=0.12.2"
        ]
    },
    classifiers=[
--- a/test/test_forecast.py
+++ b/test/test_forecast.py
@ -0,0 +1,119 @@
+def test_forecast_automl_df(budget=5):
+    # using dataframe
+    import statsmodels.api as sm
+    data = sm.datasets.co2.load_pandas()
+    data = data.data
+    data = data['co2'].resample('MS').mean()
+    data = data.fillna(data.bfill())
+    data = data.to_frame().reset_index()
+    data = data.rename(columns={'index': 'ds', 'co2': 'y'})
+    num_samples = data.shape[0]
+    time_horizon = 12
+    split_idx = num_samples - time_horizon
+    X_train = data[:split_idx]
+    X_test = data[split_idx:]['ds'].to_frame()
+    y_test = data[split_idx:]['y'].to_frame()
+    ''' import AutoML class from flaml package '''
+    from flaml import AutoML
+    automl = AutoML()
+    settings = {
+        "time_budget": budget,  # total running time in seconds
+        "metric": 'mape',  # primary metric
+        "task": 'forecast',  # task type
+        "log_file_name": 'CO2_forecast.log',  # flaml log file
+        "eval_method": "holdout",
+        "split_type": 'time'
+    }
+    '''The main flaml automl API'''
+    try:
+        automl.fit(dataframe=X_train, **settings, period=time_horizon, freq='M')
+    except ImportError:
+        automl.fit(dataframe=X_train, **settings, estimator_list=['arima', 'sarimax'], period=time_horizon, freq='M')
+    ''' retrieve best config and best learner'''
+    print('Best ML leaner:', automl.best_estimator)
+    print('Best hyperparmeter config:', automl.best_config)
+    print(f'Best mape on validation data: {automl.best_loss}')
+    print(f'Training duration of best run: {automl.best_config_train_time}s')
+    print(automl.model.estimator)
+    ''' pickle and save the automl object '''
+    import pickle
+    with open('automl.pkl', 'wb') as f:
+        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
+    ''' compute predictions of testing dataset '''
+    y_pred = automl.predict(X_test)
+    print('Predicted labels', y_pred)
+    print('True labels', y_test)
+    ''' compute different metric values on testing dataset'''
+    from flaml.ml import sklearn_metric_loss_score
+    print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))
+    from flaml.data import get_output_from_log
+    time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
+        get_output_from_log(filename=settings['log_file_name'], time_budget=budget)
+    for config in config_history:
+        print(config)
+    print(automl.prune_attr)
+    print(automl.max_resource)
+    print(automl.min_resource)
+
+
+def test_forecast_automl_Xy(budget=5):
+    # using X_train and y_train
+    import statsmodels.api as sm
+    data = sm.datasets.co2.load_pandas()
+    data = data.data
+    data = data['co2'].resample('MS').mean()
+    data = data.fillna(data.bfill())
+    data = data.to_frame().reset_index()
+    num_samples = data.shape[0]
+    time_horizon = 12
+    split_idx = num_samples - time_horizon
+    X_train = data[:split_idx]['index'].to_frame()
+    y_train = data[:split_idx]['co2']
+    X_test = data[split_idx:]['index'].to_frame()
+    y_test = data[split_idx:]['co2'].to_frame()
+    ''' import AutoML class from flaml package '''
+    from flaml import AutoML
+    automl = AutoML()
+    settings = {
+        "time_budget": budget,  # total running time in seconds
+        "metric": 'mape',  # primary metric
+        "task": 'forecast',  # task type
+        "log_file_name": 'CO2_forecast.log',  # flaml log file
+        "eval_method": "holdout",
+        "split_type": 'time'
+    }
+    '''The main flaml automl API'''
+    try:
+        automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon, freq='M')
+    except ImportError:
+        automl.fit(X_train=X_train, y_train=y_train, **settings, estimator_list=['arima', 'sarimax'], period=time_horizon, freq='M')
+    ''' retrieve best config and best learner'''
+    print('Best ML leaner:', automl.best_estimator)
+    print('Best hyperparmeter config:', automl.best_config)
+    print(f'Best mape on validation data: {automl.best_loss}')
+    print(f'Training duration of best run: {automl.best_config_train_time}s')
+    print(automl.model.estimator)
+    ''' pickle and save the automl object '''
+    import pickle
+    with open('automl.pkl', 'wb') as f:
+        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
+    ''' compute predictions of testing dataset '''
+    y_pred = automl.predict(X_test)
+    print('Predicted labels', y_pred)
+    print('True labels', y_test)
+    ''' compute different metric values on testing dataset'''
+    from flaml.ml import sklearn_metric_loss_score
+    print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))
+    from flaml.data import get_output_from_log
+    time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
+        get_output_from_log(filename=settings['log_file_name'], time_budget=budget)
+    for config in config_history:
+        print(config)
+    print(automl.prune_attr)
+    print(automl.max_resource)
+    print(automl.min_resource)
+
+
+if __name__ == "__main__":
+    test_forecast_automl_df(60)
+    test_forecast_automl_Xy(60)
--- a/test/test_split.py
+++ b/test/test_split.py
@ -6,10 +6,12 @@ from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score


-dataset = "credit"
+dataset = "credit-g"


 def _test(split_type):
+    from sklearn.externals._arff import ArffException
+
    automl = AutoML()

    automl_settings = {
@ -22,9 +24,17 @@ def _test(split_type):
        "split_type": split_type,
    }

-    X, y = fetch_openml(name=dataset, return_X_y=True)
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
-                                                        random_state=42)
+    try:
+        X, y = fetch_openml(name=dataset, return_X_y=True)
+    except (ArffException, ValueError):
+        from sklearn.datasets import load_wine
+        X, y = load_wine(return_X_y=True)
+    if split_type != 'time':
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
+                                                            random_state=42)
+    else:
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33,
+                                                            shuffle=False)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)

    pred = automl.predict(X_test)
@ -37,6 +47,10 @@ def _test_uniform():
    _test(split_type="uniform")


+def test_time():
+    _test(split_type="time")
+
+
 def test_groups():
    from sklearn.externals._arff import ArffException
    try:
--- a/test/tune/example.py
+++ b/test/tune/example.py
@ -0,0 +1,52 @@
+import time
+
+
+def evaluation_fn(step, width, height):
+    return (0.1 + width * step / 100)**(-1) + height * 0.1
+
+
+def easy_objective(config):
+    from ray import tune
+    # Hyperparameters
+    width, height = config["width"], config["height"]
+
+    for step in range(config["steps"]):
+        # Iterative training function - can be any arbitrary training procedure
+        intermediate_score = evaluation_fn(step, width, height)
+        # Feed the score back back to Tune.
+        tune.report(iterations=step, mean_loss=intermediate_score)
+        time.sleep(0.1)
+
+
+def test_blendsearch_tune(smoke_test=True):
+    try:
+        from ray import tune
+        from ray.tune.suggest import ConcurrencyLimiter
+        from ray.tune.schedulers import AsyncHyperBandScheduler
+        from ray.tune.suggest.flaml import BlendSearch
+    except ImportError:
+        print('ray[tune] is not installed, skipping test')
+        return
+    algo = BlendSearch()
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
+    scheduler = AsyncHyperBandScheduler()
+    analysis = tune.run(
+        easy_objective,
+        metric="mean_loss",
+        mode="min",
+        search_alg=algo,
+        scheduler=scheduler,
+        num_samples=10 if smoke_test else 100,
+        config={
+            "steps": 100,
+            "width": tune.uniform(0, 20),
+            "height": tune.uniform(-100, 100),
+            # This is an ignored parameter.
+            "activation": tune.choice(["relu", "tanh"])
+        })
+
+    print("Best hyperparameters found were: ", analysis.best_config)
+
+
+if __name__ == "__main__":
+    test_blendsearch_tune(False)