autogen/test/test_forecast.py

import numpy as np
from flaml import AutoML


def test_forecast_automl(budget=5):
    # using dataframe
    import statsmodels.api as sm
    data = sm.datasets.co2.load_pandas().data['co2'].resample('MS').mean()
    data = data.fillna(data.bfill()).to_frame().reset_index().rename(
        columns={'index': 'ds', 'co2': 'y'})
    num_samples = data.shape[0]
    time_horizon = 12
    split_idx = num_samples - time_horizon
    df = data[:split_idx]
    X_test = data[split_idx:]['ds']
    y_test = data[split_idx:]['y']
    automl = AutoML()
    settings = {
        "time_budget": budget,  # total running time in seconds
        "metric": 'mape',  # primary metric
        "task": 'forecast',  # task type
        "log_file_name": 'CO2_forecast.log',  # flaml log file
        "eval_method": "holdout",
    }
    '''The main flaml automl API'''
    try:
        automl.fit(dataframe=df, **settings, period=time_horizon)
    except ImportError:
        print("not using FBProphet due to ImportError")
        automl.fit(dataframe=df, **settings, estimator_list=[
            'arima', 'sarimax'], period=time_horizon)
    ''' retrieve best config and best learner'''
    print('Best ML leaner:', automl.best_estimator)
    print('Best hyperparmeter config:', automl.best_config)
    print(f'Best mape on validation data: {automl.best_loss}')
    print(f'Training duration of best run: {automl.best_config_train_time}s')
    print(automl.model.estimator)
    ''' pickle and save the automl object '''
    import pickle
    with open('automl.pkl', 'wb') as f:
        pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
    ''' compute predictions of testing dataset '''
    y_pred = automl.predict(X_test)
    print('Predicted labels', y_pred)
    print('True labels', y_test)
    ''' compute different metric values on testing dataset'''
    from flaml.ml import sklearn_metric_loss_score
    print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))
    from flaml.data import get_output_from_log
    time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \
        get_output_from_log(filename=settings['log_file_name'], time_budget=budget)
    for config in config_history:
        print(config)
    print(automl.prune_attr)
    print(automl.max_resource)
    print(automl.min_resource)

    X_train = df['ds']
    y_train = df['y']
    automl = AutoML()
    try:
        automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
    except ImportError:
        print("not using FBProphet due to ImportError")
        automl.fit(X_train=X_train, y_train=y_train, **settings, estimator_list=[
            'arima', 'sarimax'], period=time_horizon)


def test_numpy():
    X_train = np.arange('2014-01', '2021-01', dtype='datetime64[M]')
    y_train = np.random.random(size=72)
    automl = AutoML()
    try:
        automl.fit(
            X_train=X_train[:60],  # a single column of timestamp
            y_train=y_train,  # value for each timestamp
            period=12,  # time horizon to forecast, e.g., 12 months
            task='forecast', time_budget=3,  # time budget in seconds
            log_file_name="test/forecast.log")
        print(automl.predict(X_train[60:]))
        print(automl.predict(12))
    except ValueError:
        print("ValueError for FBProphet is raised as expected.")
    except ImportError:
        print("not using FBProphet due to ImportError")
        automl = AutoML()
        automl.fit(
            X_train=X_train[:72],  # a single column of timestamp
            y_train=y_train,  # value for each timestamp
            period=12,  # time horizon to forecast, e.g., 12 months
            task='forecast', time_budget=1,  # time budget in seconds
            estimator_list=['arima', 'sarimax'],
            log_file_name="test/forecast.log")
        print(automl.predict(X_train[72:]))
        # an alternative way to specify predict steps for arima/sarimax
        print(automl.predict(12))


if __name__ == "__main__":
    test_forecast_automl(60)
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`import numpy as np`
			`from flaml import AutoML`


			`def test_forecast_automl(budget=5):`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`# using dataframe`
			`import statsmodels.api as sm`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`data = sm.datasets.co2.load_pandas().data['co2'].resample('MS').mean()`
			`data = data.fillna(data.bfill()).to_frame().reset_index().rename(`
			`columns={'index': 'ds', 'co2': 'y'})`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`num_samples = data.shape[0]`
			`time_horizon = 12`
			`split_idx = num_samples - time_horizon`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`df = data[:split_idx]`
			`X_test = data[split_idx:]['ds']`
			`y_test = data[split_idx:]['y']`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`automl = AutoML()`
			`settings = {`
			`"time_budget": budget, # total running time in seconds`
			`"metric": 'mape', # primary metric`
			`"task": 'forecast', # task type`
			`"log_file_name": 'CO2_forecast.log', # flaml log file`
			`"eval_method": "holdout",`
			`}`
			`'''The main flaml automl API'''`
			`try:`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`automl.fit(dataframe=df, **settings, period=time_horizon)`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`except ImportError:`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`print("not using FBProphet due to ImportError")`
			`automl.fit(dataframe=df, **settings, estimator_list=[`
			`'arima', 'sarimax'], period=time_horizon)`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`''' retrieve best config and best learner'''`
			`print('Best ML leaner:', automl.best_estimator)`
			`print('Best hyperparmeter config:', automl.best_config)`
			`print(f'Best mape on validation data: {automl.best_loss}')`
			`print(f'Training duration of best run: {automl.best_config_train_time}s')`
			`print(automl.model.estimator)`
			`''' pickle and save the automl object '''`
			`import pickle`
			`with open('automl.pkl', 'wb') as f:`
			`pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)`
			`''' compute predictions of testing dataset '''`
			`y_pred = automl.predict(X_test)`
			`print('Predicted labels', y_pred)`
			`print('True labels', y_test)`
			`''' compute different metric values on testing dataset'''`
			`from flaml.ml import sklearn_metric_loss_score`
			`print('mape', '=', sklearn_metric_loss_score('mape', y_pred, y_test))`
			`from flaml.data import get_output_from_log`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`time_history, best_valid_loss_history, valid_loss_history, config_history, metric_history = \`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`get_output_from_log(filename=settings['log_file_name'], time_budget=budget)`
			`for config in config_history:`
			`print(config)`
			`print(automl.prune_attr)`
			`print(automl.max_resource)`
			`print(automl.min_resource)`

remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`X_train = df['ds']`
			`y_train = df['y']`
			`automl = AutoML()`
			`try:`
			`automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)`
			`except ImportError:`
			`print("not using FBProphet due to ImportError")`
			`automl.fit(X_train=X_train, y_train=y_train, **settings, estimator_list=[`
			`'arima', 'sarimax'], period=time_horizon)`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00
			`def test_numpy():`
			`X_train = np.arange('2014-01', '2021-01', dtype='datetime64[M]')`
			`y_train = np.random.random(size=72)`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`automl = AutoML()`
			`try:`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`automl.fit(`
			`X_train=X_train[:60], # a single column of timestamp`
			`y_train=y_train, # value for each timestamp`
			`period=12, # time horizon to forecast, e.g., 12 months`
			`task='forecast', time_budget=3, # time budget in seconds`
			`log_file_name="test/forecast.log")`
			`print(automl.predict(X_train[60:]))`
			`print(automl.predict(12))`
			`except ValueError:`
			`print("ValueError for FBProphet is raised as expected.")`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00			`except ImportError:`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`print("not using FBProphet due to ImportError")`
			`automl = AutoML()`
			`automl.fit(`
			`X_train=X_train[:72], # a single column of timestamp`
			`y_train=y_train, # value for each timestamp`
			`period=12, # time horizon to forecast, e.g., 12 months`
			`task='forecast', time_budget=1, # time budget in seconds`
			`estimator_list=['arima', 'sarimax'],`
			`log_file_name="test/forecast.log")`
			`print(automl.predict(X_train[72:]))`
			`# an alternative way to specify predict steps for arima/sarimax`
			`print(automl.predict(12))`
Forecast (#162) * added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax'] * update setup.py * add TimeSeriesSplit to 'regression' and 'classification' task * add 'time' split_type for 'classification' and 'regression' task Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> * feature importance * variable name * Update test/test_split.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update test/test_forecast.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * prophet installation fail in windows * upload flaml_forecast.ipynb Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com> 2021-08-23 16:26:46 -04:00

			`if __name__ == "__main__":`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`test_forecast_automl(60)`