autogen/test/test_python_log.py

from flaml.tune.space import unflatten_hierarchical
from flaml import AutoML
from sklearn.datasets import load_boston
import os
import unittest
import logging
import tempfile
import io


class TestLogging(unittest.TestCase):

    def test_logging_level(self):

        from flaml import logger, logger_formatter

        with tempfile.TemporaryDirectory() as d:

            training_log = os.path.join(d, "training.log")

            # Configure logging for the FLAML logger
            # and add a handler that outputs to a buffer.
            logger.setLevel(logging.INFO)
            buf = io.StringIO()
            ch = logging.StreamHandler(buf)
            ch.setFormatter(logger_formatter)
            logger.addHandler(ch)

            # Run a simple job.
            automl = AutoML()
            automl_settings = {
                "time_budget": 1,
                "metric": 'rmse',
                "task": 'regression',
                "log_file_name": training_log,
                "log_training_metric": True,
                "n_jobs": 1,
                "model_history": True,
                "keep_search_state": True,
                "learner_selector": "roundrobin",
            }
            X_train, y_train = load_boston(return_X_y=True)
            n = len(y_train) >> 1
            automl.fit(X_train=X_train[:n], y_train=y_train[:n],
                       X_val=X_train[n:], y_val=y_train[n:],
                       **automl_settings)
            logger.info(automl.search_space)
            logger.info(automl.low_cost_partial_config)
            logger.info(automl.points_to_evaluate)
            logger.info(automl.cat_hp_cost)
            import optuna as ot
            study = ot.create_study()
            from flaml.tune.space import define_by_run_func, add_cost_to_space
            sample = define_by_run_func(study.ask(), automl.search_space)
            logger.info(sample)
            logger.info(unflatten_hierarchical(sample, automl.search_space))
            add_cost_to_space(
                automl.search_space, automl.low_cost_partial_config,
                automl.cat_hp_cost
            )
            logger.info(automl.search_space["ml"].categories)
            config = automl.best_config.copy()
            config['learner'] = automl.best_estimator
            automl.trainable({"ml": config})
            from flaml import tune, BlendSearch
            from flaml.automl import size
            from functools import partial
            search_alg = BlendSearch(
                metric='val_loss', mode='min',
                space=automl.search_space,
                low_cost_partial_config=automl.low_cost_partial_config,
                points_to_evaluate=automl.points_to_evaluate,
                cat_hp_cost=automl.cat_hp_cost,
                prune_attr=automl.prune_attr,
                min_resource=automl.min_resource,
                max_resource=automl.max_resource,
                config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
                metric_constraints=automl.metric_constraints)
            analysis = tune.run(
                automl.trainable, search_alg=search_alg,    # verbose=2,
                time_budget_s=1, num_samples=-1)
            print(min(trial.last_result["val_loss"]
                      for trial in analysis.trials))
            # Check if the log buffer is populated.
            self.assertTrue(len(buf.getvalue()) > 0)

        import pickle
        with open('automl.pkl', 'wb') as f:
            pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
        print(automl.__version__)
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`from flaml.tune.space import unflatten_hierarchical`
Fix #11; add tests for training log and python logger (#12) 2020-12-14 23:10:03 -08:00			`from flaml import AutoML`
			`from sklearn.datasets import load_boston`
			`import os`
			`import unittest`
			`import logging`
			`import tempfile`
			`import io`


			`class TestLogging(unittest.TestCase):`

			`def test_logging_level(self):`

			`from flaml import logger, logger_formatter`

			`with tempfile.TemporaryDirectory() as d:`

			`training_log = os.path.join(d, "training.log")`

			`# Configure logging for the FLAML logger`
			`# and add a handler that outputs to a buffer.`
			`logger.setLevel(logging.INFO)`
			`buf = io.StringIO()`
			`ch = logging.StreamHandler(buf)`
			`ch.setFormatter(logger_formatter)`
			`logger.addHandler(ch)`

			`# Run a simple job.`
pickle the AutoML object (#37) * pickle the AutoML object * get best model per estimator * test deberta * stateless API * Add Gitter badge (#41) * prevent divide by zero * test roberta * BlendSearchTuner Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: The Gitter Badger <badger@gitter.im> 2021-03-16 22:13:35 -07:00			`automl = AutoML()`
Fix #11; add tests for training log and python logger (#12) 2020-12-14 23:10:03 -08:00			`automl_settings = {`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`"time_budget": 1,`
coverage (#135) * coverage * readme * timeout 2021-07-20 17:00:44 -07:00			`"metric": 'rmse',`
Fix #11; add tests for training log and python logger (#12) 2020-12-14 23:10:03 -08:00			`"task": 'regression',`
			`"log_file_name": training_log,`
			`"log_training_metric": True,`
V0.2.2 (#19) * v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu> 2021-02-05 21:41:14 -08:00			`"n_jobs": 1,`
pickle the AutoML object (#37) * pickle the AutoML object * get best model per estimator * test deberta * stateless API * Add Gitter badge (#41) * prevent divide by zero * test roberta * BlendSearchTuner Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: The Gitter Badger <badger@gitter.im> 2021-03-16 22:13:35 -07:00			`"model_history": True,`
remove big objects after fit (#176) * remove big objects after fit * xgboost>1.3.3 has a weird auc socre on: kr-vs-kp, fold 5, 1h1c * keep_search_state 2021-08-26 13:45:13 -07:00			`"keep_search_state": True,`
coverage (#135) * coverage * readme * timeout 2021-07-20 17:00:44 -07:00			`"learner_selector": "roundrobin",`
Fix #11; add tests for training log and python logger (#12) 2020-12-14 23:10:03 -08:00			`}`
			`X_train, y_train = load_boston(return_X_y=True)`
v0.1.3 Set default logging level to INFO (#14) * set default logging level to INFO * remove unnecessary import * API future compatibility * add test for customized learner * test dependency Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> 2020-12-15 08:10:43 -08:00			`n = len(y_train) >> 1`
pickle the AutoML object (#37) * pickle the AutoML object * get best model per estimator * test deberta * stateless API * Add Gitter badge (#41) * prevent divide by zero * test roberta * BlendSearchTuner Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: The Gitter Badger <badger@gitter.im> 2021-03-16 22:13:35 -07:00			`automl.fit(X_train=X_train[:n], y_train=y_train[:n],`
Issue58 (#59) * iter per learner * code cleanup 2021-04-08 09:29:55 -07:00			`X_val=X_train[n:], y_val=y_train[n:],`
			`**automl_settings)`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`logger.info(automl.search_space)`
			`logger.info(automl.low_cost_partial_config)`
Support parallel and add random search (#167) * non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> 2021-08-23 19:36:51 -04:00			`logger.info(automl.points_to_evaluate)`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`logger.info(automl.cat_hp_cost)`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`import optuna as ot`
			`study = ot.create_study()`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`from flaml.tune.space import define_by_run_func, add_cost_to_space`
			`sample = define_by_run_func(study.ask(), automl.search_space)`
			`logger.info(sample)`
			`logger.info(unflatten_hierarchical(sample, automl.search_space))`
			`add_cost_to_space(`
			`automl.search_space, automl.low_cost_partial_config,`
			`automl.cat_hp_cost`
			`)`
			`logger.info(automl.search_space["ml"].categories)`
space -> main (#148) * subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-08-02 19:10:26 -04:00			`config = automl.best_config.copy()`
			`config['learner'] = automl.best_estimator`
			`automl.trainable({"ml": config})`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`from flaml import tune, BlendSearch`
Support parallel and add random search (#167) * non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> 2021-08-23 19:36:51 -04:00			`from flaml.automl import size`
			`from functools import partial`
remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178) * remove catboost training dir * close #48 * bs for hierarchical space. close #85 * retrain for hierarchical space * clean ml (#180) Co-authored-by: Qingyun Wu <qxw5138@psu.edu> * support ranking task * examples * cv shuffle * forecast api and implementation cleaner * period constraints * delete groups after fit 2021-09-01 16:25:04 -07:00			`search_alg = BlendSearch(`
			`metric='val_loss', mode='min',`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`space=automl.search_space,`
			`low_cost_partial_config=automl.low_cost_partial_config,`
Support parallel and add random search (#167) * non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> 2021-08-23 19:36:51 -04:00			`points_to_evaluate=automl.points_to_evaluate,`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`cat_hp_cost=automl.cat_hp_cost,`
			`prune_attr=automl.prune_attr,`
			`min_resource=automl.min_resource,`
			`max_resource=automl.max_resource,`
Support parallel and add random search (#167) * non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> 2021-08-23 19:36:51 -04:00			`config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`metric_constraints=automl.metric_constraints)`
			`analysis = tune.run(`
			`automl.trainable, search_alg=search_alg, # verbose=2,`
			`time_budget_s=1, num_samples=-1)`
remove big objects after fit (#176) * remove big objects after fit * xgboost>1.3.3 has a weird auc socre on: kr-vs-kp, fold 5, 1h1c * keep_search_state 2021-08-26 13:45:13 -07:00			`print(min(trial.last_result["val_loss"]`
v0.5.12 (#150) * remove extra comma * exclusive bound * log file name * add cost to space * dataset_format * add load_openml_dataset test * docstr * revise test format * simplify restore * order categories * openml server exception in test * process space * add warning * log format * reduce n_cpu * nested space * hierarchical search space for CFO * non hierarchical for bs * unflatten hierarchical config * connection error * random sample * config signature * check ray version * preprocess numpy array * catboost preprocess * time budget * seed, verbose, hpo_method * test cfocat * shallow copy in flatten_dict prevent lgbm model duplication * match estimator name * quantize and log * test qloguniform and qrandint * test qlograndint * thread.running Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local> 2021-08-12 02:02:22 -04:00			`for trial in analysis.trials))`
Fix #11; add tests for training log and python logger (#12) 2020-12-14 23:10:03 -08:00			`# Check if the log buffer is populated.`
			`self.assertTrue(len(buf.getvalue()) > 0)`
pickle the AutoML object (#37) * pickle the AutoML object * get best model per estimator * test deberta * stateless API * Add Gitter badge (#41) * prevent divide by zero * test roberta * BlendSearchTuner Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: The Gitter Badger <badger@gitter.im> 2021-03-16 22:13:35 -07:00
			`import pickle`
			`with open('automl.pkl', 'wb') as f:`
			`pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)`
Issue58 (#59) * iter per learner * code cleanup 2021-04-08 09:29:55 -07:00			`print(automl.__version__)`