autogen/test/test_xgboost2d.py

import unittest

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from flaml.automl import AutoML
from flaml.model import XGBoostSklearnEstimator
from flaml import tune


dataset = "credit-g"


class XGBoost2D(XGBoostSklearnEstimator):

    @classmethod
    def search_space(cls, data_size, task):
        upper = min(32768, int(data_size))
        return {
            'n_estimators': {
                'domain': tune.lograndint(lower=4, upper=upper),
                'low_cost_init_value': 4,
            },
            'max_leaves': {
                'domain': tune.lograndint(lower=4, upper=upper),
                'low_cost_init_value': 4,
            },
        }


def test_simple(method=None):
    automl = AutoML()
    automl.add_learner(learner_name='XGBoost2D',
                       learner_class=XGBoost2D)

    automl_settings = {
        "estimator_list": ['XGBoost2D'],
        "task": 'classification',
        "log_file_name": f"test/xgboost2d_{dataset}_{method}.log",
        "n_jobs": 1,
        "hpo_method": method,
        "log_type": "all",
        "time_budget": 1
    }
    from sklearn.externals._arff import ArffException
    try:
        X, y = fetch_openml(name=dataset, return_X_y=True)
    except (ArffException, ValueError):
        from sklearn.datasets import load_wine
        X, y = load_wine(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.33, random_state=42)
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
    print(automl.points_to_evalaute)
    config = automl.best_config.copy()
    config['learner'] = automl.best_estimator
    automl.trainable(config)
    from flaml import tune
    analysis = tune.run(
        automl.trainable, automl.search_space, metric='val_loss',
        low_cost_partial_config=automl.low_cost_partial_config,
        points_to_evaluate=automl.points_to_evalaute,
        cat_hp_cost=automl.cat_hp_cost,
        prune_attr=automl.prune_attr,
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
        config_constraints=[(automl.size, '<=', automl._mem_thres)],
        metric_constraints=automl.metric_constraints)
    print(analysis.trials[-1])


def _test_optuna():
    test_simple(method="optuna")


def test_grid():
    test_simple(method="grid")


if __name__ == "__main__":
    unittest.main()
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`import unittest`

			`from sklearn.datasets import fetch_openml`
			`from sklearn.model_selection import train_test_split`
			`import numpy as np`
			`from flaml.automl import AutoML`
			`from flaml.model import XGBoostSklearnEstimator`
			`from flaml import tune`


			`dataset = "credit-g"`


			`class XGBoost2D(XGBoostSklearnEstimator):`

			`@classmethod`
			`def search_space(cls, data_size, task):`
Issue58 (#59) * iter per learner * code cleanup 2021-04-08 09:29:55 -07:00			`upper = min(32768, int(data_size))`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`return {`
			`'n_estimators': {`
coverage (#135) * coverage * readme * timeout 2021-07-20 17:00:44 -07:00			`'domain': tune.lograndint(lower=4, upper=upper),`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`'low_cost_init_value': 4,`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`},`
			`'max_leaves': {`
coverage (#135) * coverage * readme * timeout 2021-07-20 17:00:44 -07:00			`'domain': tune.lograndint(lower=4, upper=upper),`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`'low_cost_init_value': 4,`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`},`
			`}`


			`def test_simple(method=None):`
			`automl = AutoML()`
Issue58 (#59) * iter per learner * code cleanup 2021-04-08 09:29:55 -07:00			`automl.add_learner(learner_name='XGBoost2D',`
			`learner_class=XGBoost2D)`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00
			`automl_settings = {`
			`"estimator_list": ['XGBoost2D'],`
			`"task": 'classification',`
			`"log_file_name": f"test/xgboost2d_{dataset}_{method}.log",`
			`"n_jobs": 1,`
			`"hpo_method": method,`
			`"log_type": "all",`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`"time_budget": 1`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`}`
update image url (#71) * update image url * ArffException * OpenMLError is ValueError * CatBoostError * reduce build on push Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com> 2021-04-21 04:36:06 -04:00			`from sklearn.externals._arff import ArffException`
v0.2.5 (#30) * test distillbert * import check * complete partial config * None check * init config is not suggested by bo * badge * notebook for lightgbm 2021-02-22 22:10:41 -08:00			`try:`
			`X, y = fetch_openml(name=dataset, return_X_y=True)`
update image url (#71) * update image url * ArffException * OpenMLError is ValueError * CatBoostError * reduce build on push Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com> 2021-04-21 04:36:06 -04:00			`except (ArffException, ValueError):`
v0.2.5 (#30) * test distillbert * import check * complete partial config * None check * init config is not suggested by bo * badge * notebook for lightgbm 2021-02-22 22:10:41 -08:00			`from sklearn.datasets import load_wine`
			`X, y = load_wine(return_X_y=True)`
Issue58 (#59) * iter per learner * code cleanup 2021-04-08 09:29:55 -07:00			`X_train, X_test, y_train, y_test = train_test_split(`
			`X, y, test_size=0.33, random_state=42)`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`automl.fit(X_train=X_train, y_train=y_train, **automl_settings)`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`print(automl.estimator_list)`
			`print(automl.search_space)`
			`print(automl.points_to_evalaute)`
			`config = automl.best_config.copy()`
			`config['learner'] = automl.best_estimator`
			`automl.trainable(config)`
			`from flaml import tune`
			`analysis = tune.run(`
			`automl.trainable, automl.search_space, metric='val_loss',`
			`low_cost_partial_config=automl.low_cost_partial_config,`
			`points_to_evaluate=automl.points_to_evalaute,`
			`cat_hp_cost=automl.cat_hp_cost,`
			`prune_attr=automl.prune_attr,`
			`min_resource=automl.min_resource,`
			`max_resource=automl.max_resource,`
			`time_budget_s=automl._state.time_budget,`
			`config_constraints=[(automl.size, '<=', automl._mem_thres)],`
			`metric_constraints=automl.metric_constraints)`
			`print(analysis.trials[-1])`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00

			`def _test_optuna():`
			`test_simple(method="optuna")`


			`def test_grid():`
			`test_simple(method="grid")`


			`if __name__ == "__main__":`
			`unittest.main()`