autogen/test/automl/test_regression.py

import unittest
import numpy as np
import scipy.sparse
from sklearn.datasets import (
    fetch_california_housing,
)

from flaml import AutoML
from flaml.data import get_output_from_log
from flaml.model import XGBoostEstimator


def logregobj(preds, dtrain):
    labels = dtrain.get_label()
    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
    grad = preds - labels
    hess = preds * (1.0 - preds)
    return grad, hess


class MyXGB1(XGBoostEstimator):
    """XGBoostEstimator with logregobj as the objective function"""

    def __init__(self, **config):
        super().__init__(objective=logregobj, **config)


class MyXGB2(XGBoostEstimator):
    """XGBoostEstimator with 'reg:squarederror' as the objective function"""

    def __init__(self, **config):
        super().__init__(objective="reg:squarederror", **config)


class TestRegression(unittest.TestCase):
    def test_regression(self):
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 2,
            "task": "regression",
            "log_file_name": "test/california.log",
            "log_training_metric": True,
            "n_jobs": 1,
            "model_history": True,
        }
        X_train, y_train = fetch_california_housing(return_X_y=True)
        n = int(len(y_train) * 9 // 10)
        automl_experiment.fit(
            X_train=X_train[:n],
            y_train=y_train[:n],
            X_val=X_train[n:],
            y_val=y_train[n:],
            **automl_settings
        )
        assert automl_experiment._state.eval_method == "holdout"
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.best_model_for_estimator("xgboost"))
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
        print(get_output_from_log(automl_settings["log_file_name"], 1))
        automl_experiment.retrain_from_log(
            task="regression",
            log_file_name=automl_settings["log_file_name"],
            X_train=X_train,
            y_train=y_train,
            train_full=True,
            time_budget=1,
        )
        automl_experiment.retrain_from_log(
            task="regression",
            log_file_name=automl_settings["log_file_name"],
            X_train=X_train,
            y_train=y_train,
            train_full=True,
            time_budget=0,
        )

    def test_sparse_matrix_regression(self):
        X_train = scipy.sparse.random(300, 900, density=0.0001)
        y_train = np.random.uniform(size=300)
        X_val = scipy.sparse.random(100, 900, density=0.0001)
        y_val = np.random.uniform(size=100)
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 2,
            "metric": "mae",
            "task": "regression",
            "log_file_name": "test/sparse_regression.log",
            "n_jobs": 1,
            "model_history": True,
            "keep_search_state": True,
            "verbose": 0,
            "early_stop": True,
        }
        automl_experiment.fit(
            X_train=X_train,
            y_train=y_train,
            X_val=X_val,
            y_val=y_val,
            **automl_settings
        )
        assert automl_experiment._state.X_val.shape == X_val.shape
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.best_model_for_estimator("rf"))
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
        print(automl_experiment.best_config)
        print(automl_experiment.best_loss)
        print(automl_experiment.best_config_train_time)

    def test_parallel(self, hpo_method=None):
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 10,
            "task": "regression",
            "log_file_name": "test/california.log",
            "log_type": "all",
            "n_jobs": 1,
            "n_concurrent_trials": 10,
            "hpo_method": hpo_method,
        }
        X_train, y_train = fetch_california_housing(return_X_y=True)
        try:
            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
            print(automl_experiment.predict(X_train))
            print(automl_experiment.model)
            print(automl_experiment.config_history)
            print(automl_experiment.best_model_for_estimator("xgboost"))
            print(automl_experiment.best_iteration)
            print(automl_experiment.best_estimator)
        except ImportError:
            return

    def test_sparse_matrix_regression_holdout(self):
        X_train = scipy.sparse.random(8, 100)
        y_train = np.random.uniform(size=8)
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 1,
            "eval_method": "holdout",
            "task": "regression",
            "log_file_name": "test/sparse_regression.log",
            "n_jobs": 1,
            "model_history": True,
            "metric": "mse",
            "sample_weight": np.ones(len(y_train)),
            "early_stop": True,
        }
        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.best_model_for_estimator("rf"))
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)

    def test_regression_xgboost(self):
        X_train = scipy.sparse.random(300, 900, density=0.0001)
        y_train = np.random.uniform(size=300)
        X_val = scipy.sparse.random(100, 900, density=0.0001)
        y_val = np.random.uniform(size=100)
        automl_experiment = AutoML()
        automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
        automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
        automl_settings = {
            "time_budget": 2,
            "estimator_list": ["my_xgb1", "my_xgb2"],
            "task": "regression",
            "log_file_name": "test/regression_xgboost.log",
            "n_jobs": 1,
            "model_history": True,
            "keep_search_state": True,
            "early_stop": True,
        }
        automl_experiment.fit(
            X_train=X_train,
            y_train=y_train,
            X_val=X_val,
            y_val=y_val,
            **automl_settings
        )
        assert automl_experiment._state.X_val.shape == X_val.shape
        print(automl_experiment.predict(X_train))
        print(automl_experiment.model)
        print(automl_experiment.config_history)
        print(automl_experiment.best_model_for_estimator("my_xgb2"))
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
        print(automl_experiment.best_config)
        print(automl_experiment.best_loss)
        print(automl_experiment.best_config_train_time)


if __name__ == "__main__":
    unittest.main()
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`import unittest`
			`import numpy as np`
			`import scipy.sparse`
			`from sklearn.datasets import (`
			`fetch_california_housing,`
			`)`

			`from flaml import AutoML`
			`from flaml.data import get_output_from_log`
			`from flaml.model import XGBoostEstimator`


			`def logregobj(preds, dtrain):`
			`labels = dtrain.get_label()`
			`preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight`
			`grad = preds - labels`
			`hess = preds * (1.0 - preds)`
			`return grad, hess`


			`class MyXGB1(XGBoostEstimator):`
			`"""XGBoostEstimator with logregobj as the objective function"""`

			`def __init__(self, **config):`
			`super().__init__(objective=logregobj, **config)`


			`class MyXGB2(XGBoostEstimator):`
			`"""XGBoostEstimator with 'reg:squarederror' as the objective function"""`

			`def __init__(self, **config):`
			`super().__init__(objective="reg:squarederror", **config)`


			`class TestRegression(unittest.TestCase):`
			`def test_regression(self):`
			`automl_experiment = AutoML()`
			`automl_settings = {`
			`"time_budget": 2,`
			`"task": "regression",`
			`"log_file_name": "test/california.log",`
			`"log_training_metric": True,`
			`"n_jobs": 1,`
			`"model_history": True,`
			`}`
			`X_train, y_train = fetch_california_housing(return_X_y=True)`
			`n = int(len(y_train) * 9 // 10)`
			`automl_experiment.fit(`
			`X_train=X_train[:n],`
			`y_train=y_train[:n],`
			`X_val=X_train[n:],`
			`y_val=y_train[n:],`
			`**automl_settings`
			`)`
			`assert automl_experiment._state.eval_method == "holdout"`
			`print(automl_experiment.predict(X_train))`
			`print(automl_experiment.model)`
			`print(automl_experiment.config_history)`
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283) if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del 2021-11-18 09:39:45 -08:00			`print(automl_experiment.best_model_for_estimator("xgboost"))`
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`print(automl_experiment.best_iteration)`
			`print(automl_experiment.best_estimator)`
			`print(get_output_from_log(automl_settings["log_file_name"], 1))`
			`automl_experiment.retrain_from_log(`
			`task="regression",`
			`log_file_name=automl_settings["log_file_name"],`
			`X_train=X_train,`
			`y_train=y_train,`
			`train_full=True,`
			`time_budget=1,`
			`)`
			`automl_experiment.retrain_from_log(`
			`task="regression",`
			`log_file_name=automl_settings["log_file_name"],`
			`X_train=X_train,`
			`y_train=y_train,`
			`train_full=True,`
			`time_budget=0,`
			`)`

			`def test_sparse_matrix_regression(self):`
			`X_train = scipy.sparse.random(300, 900, density=0.0001)`
			`y_train = np.random.uniform(size=300)`
			`X_val = scipy.sparse.random(100, 900, density=0.0001)`
			`y_val = np.random.uniform(size=100)`
			`automl_experiment = AutoML()`
			`automl_settings = {`
			`"time_budget": 2,`
			`"metric": "mae",`
			`"task": "regression",`
			`"log_file_name": "test/sparse_regression.log",`
			`"n_jobs": 1,`
			`"model_history": True,`
			`"keep_search_state": True,`
			`"verbose": 0,`
			`"early_stop": True,`
			`}`
			`automl_experiment.fit(`
			`X_train=X_train,`
			`y_train=y_train,`
			`X_val=X_val,`
			`y_val=y_val,`
			`**automl_settings`
			`)`
			`assert automl_experiment._state.X_val.shape == X_val.shape`
			`print(automl_experiment.predict(X_train))`
			`print(automl_experiment.model)`
			`print(automl_experiment.config_history)`
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283) if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del 2021-11-18 09:39:45 -08:00			`print(automl_experiment.best_model_for_estimator("rf"))`
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`print(automl_experiment.best_iteration)`
			`print(automl_experiment.best_estimator)`
			`print(automl_experiment.best_config)`
			`print(automl_experiment.best_loss)`
			`print(automl_experiment.best_config_train_time)`

			`def test_parallel(self, hpo_method=None):`
			`automl_experiment = AutoML()`
			`automl_settings = {`
			`"time_budget": 10,`
			`"task": "regression",`
			`"log_file_name": "test/california.log",`
			`"log_type": "all",`
			`"n_jobs": 1,`
			`"n_concurrent_trials": 10,`
			`"hpo_method": hpo_method,`
			`}`
			`X_train, y_train = fetch_california_housing(return_X_y=True)`
			`try:`
			`automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)`
			`print(automl_experiment.predict(X_train))`
			`print(automl_experiment.model)`
			`print(automl_experiment.config_history)`
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283) if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del 2021-11-18 09:39:45 -08:00			`print(automl_experiment.best_model_for_estimator("xgboost"))`
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`print(automl_experiment.best_iteration)`
			`print(automl_experiment.best_estimator)`
			`except ImportError:`
			`return`

			`def test_sparse_matrix_regression_holdout(self):`
			`X_train = scipy.sparse.random(8, 100)`
			`y_train = np.random.uniform(size=8)`
			`automl_experiment = AutoML()`
			`automl_settings = {`
			`"time_budget": 1,`
			`"eval_method": "holdout",`
			`"task": "regression",`
			`"log_file_name": "test/sparse_regression.log",`
			`"n_jobs": 1,`
			`"model_history": True,`
			`"metric": "mse",`
			`"sample_weight": np.ones(len(y_train)),`
			`"early_stop": True,`
			`}`
			`automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)`
			`print(automl_experiment.predict(X_train))`
			`print(automl_experiment.model)`
			`print(automl_experiment.config_history)`
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283) if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del 2021-11-18 09:39:45 -08:00			`print(automl_experiment.best_model_for_estimator("rf"))`
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`print(automl_experiment.best_iteration)`
			`print(automl_experiment.best_estimator)`

			`def test_regression_xgboost(self):`
			`X_train = scipy.sparse.random(300, 900, density=0.0001)`
			`y_train = np.random.uniform(size=300)`
			`X_val = scipy.sparse.random(100, 900, density=0.0001)`
			`y_val = np.random.uniform(size=100)`
			`automl_experiment = AutoML()`
			`automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)`
			`automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)`
			`automl_settings = {`
			`"time_budget": 2,`
			`"estimator_list": ["my_xgb1", "my_xgb2"],`
			`"task": "regression",`
			`"log_file_name": "test/regression_xgboost.log",`
			`"n_jobs": 1,`
			`"model_history": True,`
			`"keep_search_state": True,`
			`"early_stop": True,`
			`}`
			`automl_experiment.fit(`
			`X_train=X_train,`
			`y_train=y_train,`
			`X_val=X_val,`
			`y_val=y_val,`
			`**automl_settings`
			`)`
			`assert automl_experiment._state.X_val.shape == X_val.shape`
			`print(automl_experiment.predict(X_train))`
			`print(automl_experiment.model)`
			`print(automl_experiment.config_history)`
model_history, ITER_HP, settings in AutoML(), checkpoint bug fix (#283) if save_best_model_per_estimator is False and retrain_final is True, unfit the model after evaluation in HPO. retrain if using ray. update ITER_HP in config after a trial is finished. change prophet logging level. example and notebook update. allow settings to be passed to AutoML constructor. Are you planning to add multi-output-regression capability to FLAML #192 Is multi-tasking allowed? #277 can pass the auotml setting to the constructor instead of requiring a derived class. remove model_history. checkpoint bug fix. * model_history meaning save_best_model_per_estimator * ITER_HP * example update * prophet logging level * comment update in forecast notebook * print format improvement * allow settings to be passed to AutoML constructor * checkpoint bug fix * time limit for autohf regression test * skip slow test on macos * cleanup before del 2021-11-18 09:39:45 -08:00			`print(automl_experiment.best_model_for_estimator("my_xgb2"))`
limit time and memory consumption (#264) * limit time and memory * separate tests * lrl1 can't be limited by limit_resource * free memory when possible * passthrough=False when ensemble fails; retrain when trained_estimator is None * use callback to for resource limit * handle lower version of xgb with no callback * free mem ratio * reduce verbosity * retrain_final when max_iter==1 * remove trained_estimator from result * model_history * wheel * retrain time as best_config_train_time * ci: libomp version for xgboost on macos * limit_resource not working in windows * test pickle load * mute forecaster * notebook update * check hard * preventive callback * add use_ray 2021-11-03 19:08:23 -07:00			`print(automl_experiment.best_iteration)`
			`print(automl_experiment.best_estimator)`
			`print(automl_experiment.best_config)`
			`print(automl_experiment.best_loss)`
			`print(automl_experiment.best_config_train_time)`


			`if __name__ == "__main__":`
			`unittest.main()`