Support parallel and add random search (#167)

* non hashable value out of signature * parallel trials * add random in _search_parallel * fix bug in retraining * check memory constraint before training * retrain_full * log custom metric * retraining budget check * sample size check before retrain * remove 'time2eval' from result * report 'total_search_time' in result * rename total_search_time to wall_clock_time * rename train_loss boolean to log_training_metric * set default train_loss to None * exclude oom result * log retrained model * no subsample * doc str * notebook * predicted value is NaN for sarimax * version Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
2025-11-13 08:34:29 +00:00 · 2021-08-23 19:36:51 -04:00 · 2021-08-23 19:36:51 -04:00 · a229a6112a
commit a229a6112a
parent 3d0a3d26a2
21 changed files with 5142 additions and 4677 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
--- a/flaml/data.py
+++ b/flaml/data.py
@ -141,14 +141,14 @@ def get_output_from_log(filename, time_budget):
    best_config_list = []
    with training_log_reader(filename) as reader:
        for record in reader.records():
-            time_used = record.total_search_time
+            time_used = record.wall_clock_time
            val_loss = record.validation_loss
            config = record.config
            learner = record.learner.split('_')[0]
            sample_size = record.sample_size
            train_loss = record.logged_metric
-            if time_used < time_budget:
+            if time_used < time_budget and np.isfinite(val_loss):
                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    best_config = config
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -102,8 +102,11 @@ def sklearn_metric_loss_score(
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
    elif 'mape' in metric_name:
-        score = mean_absolute_percentage_error(
+        try:
-            y_true, y_predict)
+            score = mean_absolute_percentage_error(
                y_true, y_predict)
        except ValueError:
            return np.inf
    elif 'micro_f1' in metric_name:
        score = 1 - f1_score(
            y_true, y_predict, sample_weight=sample_weight, average='micro')
@ -141,21 +144,23 @@ def get_y_pred(estimator, X, eval_metric, obj, freq=None):
 def get_test_loss(
    estimator, X_train, y_train, X_test, y_test, weight_test,
-    eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={}
+    eval_metric, obj, labels=None, budget=None, log_training_metric=False, fit_kwargs={}
 ):
    start = time.time()
-    train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
+    estimator.fit(X_train, y_train, budget, **fit_kwargs)
    if isinstance(eval_metric, str):
        pred_start = time.time()
        test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
        pred_time = (time.time() - pred_start) / X_test.shape[0]
        test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
                                              labels, weight_test)
-        if train_loss is not False:
+        if log_training_metric:
            test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
            train_loss = sklearn_metric_loss_score(
                eval_metric, test_pred_y,
                y_train, labels, fit_kwargs.get('sample_weight'))
        else:
            train_loss = None
    else:  # customized metric function
        test_loss, metrics = eval_metric(
            X_test, y_test, estimator, labels, X_train, y_train,
@ -174,40 +179,41 @@ def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
 def evaluate_model(
    estimator, X_train, y_train, X_val, y_val, weight_val,
-    budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False,
+    budget, kf, task, eval_method, eval_metric, best_val_loss, log_training_metric=False,
    fit_kwargs={}
 ):
    if 'holdout' in eval_method:
        val_loss, train_loss, train_time, pred_time = evaluate_model_holdout(
            estimator, X_train, y_train, X_val, y_val, weight_val, budget,
-            task, eval_metric, train_loss=train_loss,
+            task, eval_metric, log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs)
    else:
        val_loss, train_loss, train_time, pred_time = evaluate_model_CV(
            estimator, X_train, y_train, budget, kf, task,
-            eval_metric, best_val_loss, train_loss=train_loss,
+            eval_metric, best_val_loss, log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs)
    return val_loss, train_loss, train_time, pred_time
 def evaluate_model_holdout(
    estimator, X_train, y_train, X_val, y_val,
-    weight_val, budget, task, eval_metric, train_loss=False,
+    weight_val, budget, task, eval_metric, log_training_metric=False,
    fit_kwargs={}
 ):
    val_loss, train_time, train_loss, pred_time = get_test_loss(
        estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
-        task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
+        task, budget=budget, log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
    return val_loss, train_loss, train_time, pred_time
 def evaluate_model_CV(
    estimator, X_train_all, y_train_all, budget, kf,
-    task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
+    task, eval_metric, best_val_loss, log_training_metric=False, fit_kwargs={}
 ):
    start_time = time.time()
    total_val_loss = 0
    total_train_loss = None
    train_loss = None
    train_time = pred_time = 0
    valid_fold_num = total_fold_num = 0
    n = kf.get_n_splits()
@ -231,7 +237,7 @@ def evaluate_model_CV(
        kf = kf.split(X_train_split)
    rng = np.random.RandomState(2020)
    val_loss_list = []
-    budget_per_train = budget / (n + 1)
+    budget_per_train = budget / n
    if 'sample_weight' in fit_kwargs:
        weight = fit_kwargs['sample_weight']
        weight_val = None
@ -259,13 +265,13 @@ def evaluate_model_CV(
        val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss(
            estimator, X_train, y_train, X_val, y_val, weight_val,
            eval_metric, task, labels, budget_per_train,
-            train_loss=train_loss, fit_kwargs=fit_kwargs)
+            log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
        if weight is not None:
            fit_kwargs['sample_weight'] = weight
        valid_fold_num += 1
        total_fold_num += 1
        total_val_loss += val_loss_i
-        if train_loss is not False:
+        if log_training_metric or not isinstance(eval_metric, str):
            if isinstance(total_train_loss, list):
                total_train_loss = [
                    total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
@ -286,7 +292,7 @@ def evaluate_model_CV(
            break
    val_loss = np.max(val_loss_list)
    n = total_fold_num
-    if train_loss is not False:
+    if log_training_metric or not isinstance(eval_metric, str):
        if isinstance(total_train_loss, list):
            train_loss = [v / n for v in total_train_loss]
        elif isinstance(total_train_loss, dict):
@ -294,17 +300,17 @@ def evaluate_model_CV(
        else:
            train_loss = total_train_loss / n
    pred_time /= n
-    budget -= time.time() - start_time
+    # budget -= time.time() - start_time
-    if val_loss < best_val_loss and budget > budget_per_train:
+    # if val_loss < best_val_loss and budget > budget_per_train:
-        estimator.cleanup()
+    #     estimator.cleanup()
-        estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
+    #     estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
    return val_loss, train_loss, train_time, pred_time
 def compute_estimator(
    X_train, y_train, X_val, y_val, weight_val, budget, kf,
    config_dic, task, estimator_name, eval_method, eval_metric,
-    best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
+    best_val_loss=np.Inf, n_jobs=1, estimator_class=None, log_training_metric=False,
    fit_kwargs={}
 ):
    estimator_class = estimator_class or get_estimator_class(
@ -313,7 +319,7 @@ def compute_estimator(
        **config_dic, task=task, n_jobs=n_jobs)
    val_loss, train_loss, train_time, pred_time = evaluate_model(
        estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
-        eval_method, eval_metric, best_val_loss, train_loss=train_loss,
+        eval_method, eval_metric, best_val_loss, log_training_metric=log_training_metric,
        fit_kwargs=fit_kwargs)
    return estimator, val_loss, train_loss, train_time, pred_time
--- a/flaml/model.py
+++ b/flaml/model.py
@ -222,10 +222,10 @@ class LGBMEstimator(BaseEstimator):
                'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
                'init_value': 0.1,
            },
-            'subsample': {
+            # 'subsample': {
-                'domain': tune.uniform(lower=0.1, upper=1.0),
+            #     'domain': tune.uniform(lower=0.1, upper=1.0),
-                'init_value': 1.0,
+            #     'init_value': 1.0,
-            },
+            # },
            'log_max_bin': {
                'domain': tune.lograndint(lower=3, upper=11),
                'init_value': 8,
@ -252,28 +252,30 @@ class LGBMEstimator(BaseEstimator):
    def __init__(self, task='binary:logistic', log_max_bin=8, **params):
        super().__init__(task, **params)
-        # Default: ‘regression’ for LGBMRegressor,
+        if "objective" not in self.params:
-        # ‘binary’ or ‘multiclass’ for LGBMClassifier
+            # Default: ‘regression’ for LGBMRegressor,
-        if 'regression' in task:
+            # ‘binary’ or ‘multiclass’ for LGBMClassifier
-            objective = 'regression'
+            if 'regression' in task:
-        elif 'binary' in task:
+                objective = 'regression'
-            objective = 'binary'
+            elif 'binary' in task:
-        elif 'multi' in task:
+                objective = 'binary'
-            objective = 'multiclass'
+            elif 'multi' in task:
-        else:
+                objective = 'multiclass'
-            objective = 'regression'
+            else:
                objective = 'regression'
            self.params["objective"] = objective
        if "n_estimators" in self.params:
            self.params["n_estimators"] = int(round(self.params["n_estimators"]))
        if "num_leaves" in self.params:
            self.params["num_leaves"] = int(round(self.params["num_leaves"]))
        if "min_child_samples" in self.params:
            self.params["min_child_samples"] = int(round(self.params["min_child_samples"]))
        if "objective" not in self.params:
            self.params["objective"] = objective
        if "max_bin" not in self.params:
            self.params['max_bin'] = 1 << int(round(log_max_bin)) - 1
        if "verbose" not in self.params:
            self.params['verbose'] = -1
        # if "subsample_freq" not in self.params:
        #     self.params['subsample_freq'] = 1
        if 'regression' in task:
            self.estimator_class = LGBMRegressor
        else:
--- a/flaml/nlp/autotransformers.py
+++ b/flaml/nlp/autotransformers.py
@ -748,6 +748,7 @@ class AutoTransformers:
        self._set_metric(custom_metric_name, custom_metric_mode_name)
        self._set_task()
        self._fp16 = fp16
        ray.shutdown()
        ray.init(local_mode=ray_local_mode)
        self._set_search_space(**custom_hpo_args)
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@ -3,6 +3,7 @@
 * Licensed under the MIT License. See LICENSE file in the
 * project root for license information.
 '''
 from flaml.tune.sample import Domain
 from typing import Dict, Optional, Tuple
 import numpy as np
 try:
@ -140,7 +141,7 @@ class FLOW2(Searcher):
                if str(sampler) != 'Normal':
                    self._bounded_keys.append(key)
        if not hier:
-            self._space_keys = sorted(self._space.keys())
+            self._space_keys = sorted(self._tunable_keys)
        self._hierarchical = hier
        if (self.prune_attr and self.prune_attr not in self._space
                and self.max_resource):
@ -499,18 +500,28 @@ class FLOW2(Searcher):
        else:
            space = self._space
        value_list = []
        # self._space_keys doesn't contain keys with const values,
        # e.g., "eval_metric": ["logloss", "error"].
        keys = sorted(config.keys()) if self._hierarchical else self._space_keys
        for key in keys:
            value = config[key]
            if key == self.prune_attr:
                value_list.append(value)
            # else key must be in self.space
            # get rid of list type or constant,
            # e.g., "eval_metric": ["logloss", "error"]
            elif isinstance(space[key], sample.Integer):
                value_list.append(int(round(value)))
            else:
-                value_list.append(value)
+                # key must be in space
                domain = space[key]
                if self._hierarchical:
                    # can't remove constant for hierarchical search space,
                    # e.g., learner
                    if not (domain is None or type(domain) in (str, int, float)
                            or isinstance(domain, sample.Domain)):
                        # not domain or hashable
                        # get rid of list type for hierarchical search space.
                        continue
                if isinstance(domain, sample.Integer):
                    value_list.append(int(round(value)))
                else:
                    value_list.append(value)
        return tuple(value_list)
    @property
--- a/flaml/training_log.py
+++ b/flaml/training_log.py
@ -16,7 +16,7 @@ class TrainingLogRecord(object):
                 iter_per_learner: int,
                 logged_metric: float,
                 trial_time: float,
-                 total_search_time: float,
+                 wall_clock_time: float,
                 validation_loss,
                 config,
                 best_validation_loss,
@ -27,7 +27,7 @@ class TrainingLogRecord(object):
        self.iter_per_learner = iter_per_learner
        self.logged_metric = logged_metric
        self.trial_time = trial_time
-        self.total_search_time = total_search_time
+        self.wall_clock_time = wall_clock_time
        self.validation_loss = validation_loss
        self.config = config
        self.best_validation_loss = best_validation_loss
@ -71,7 +71,7 @@ class TrainingLogWriter(object):
               it_counter: int,
               train_loss: float,
               trial_time: float,
-               total_search_time: float,
+               wall_clock_time: float,
               validation_loss,
               config,
               best_validation_loss,
@ -86,7 +86,7 @@ class TrainingLogWriter(object):
                                   it_counter,
                                   train_loss,
                                   trial_time,
-                                   total_search_time,
+                                   wall_clock_time,
                                   validation_loss,
                                   config,
                                   best_validation_loss,
@ -95,6 +95,7 @@ class TrainingLogWriter(object):
                                   sample_size)
        if validation_loss < self.current_best_loss or \
            validation_loss == self.current_best_loss and \
                self.current_sample_size is not None and \
                sample_size > self.current_sample_size:
            self.current_best_loss = validation_loss
            self.current_sample_size = sample_size
--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@ -363,6 +363,7 @@ def indexof(domain: Dict, config: Dict) -> int:
            continue
        # print(domain.const[i])
        if all(config[key] == value for key, value in domain.const[i].items()):
            # assumption: the concatenation of constants is a unique identifier
            return i
    return None
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.5.13"
+__version__ = "0.6.0"
--- a/notebook/automl_in_sklearn_pipeline.ipynb
+++ b/notebook/automl_in_sklearn_pipeline.ipynb
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
--- a/notebook/flaml_azureml.ipynb
+++ b/notebook/flaml_azureml.ipynb
--- a/notebook/flaml_forecast.ipynb
+++ b/notebook/flaml_forecast.ipynb
--- a/notebook/flaml_lightgbm.ipynb
+++ b/notebook/flaml_lightgbm.ipynb
--- a/notebook/flaml_xgboost.ipynb
+++ b/notebook/flaml_xgboost.ipynb
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -10,7 +10,7 @@ from datetime import datetime
 from flaml import AutoML
 from flaml.data import get_output_from_log
-from flaml.model import SKLearnEstimator, XGBoostEstimator
+from flaml.model import LGBMEstimator, SKLearnEstimator, XGBoostEstimator
 from rgf.sklearn import RGFClassifier, RGFRegressor
 from flaml import tune
@ -92,6 +92,24 @@ class MyXGB2(XGBoostEstimator):
        super().__init__(objective='reg:squarederror', **params)
 class MyLargeLGBM(LGBMEstimator):
    @classmethod
    def search_space(cls, **params):
        return {
            'n_estimators': {
                'domain': tune.lograndint(lower=4, upper=32768),
                'init_value': 32768,
                'low_cost_init_value': 4,
            },
            'num_leaves': {
                'domain': tune.lograndint(lower=4, upper=32768),
                'init_value': 32768,
                'low_cost_init_value': 4,
            },
        }
 def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
                  weight_test=None, weight_train=None):
    from sklearn.metrics import log_loss
@ -477,6 +495,66 @@ class TestAutoML(unittest.TestCase):
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
    def test_parallel_xgboost(self, hpo_method=None):
        automl_experiment = AutoML()
        automl_settings = {
            "time_budget": 10,
            "metric": 'ap',
            "task": 'classification',
            "log_file_name": "test/sparse_classification.log",
            "estimator_list": ["xgboost"],
            "log_type": "all",
            "n_jobs": 1,
            "n_concurrent_trials": 2,
            "hpo_method": hpo_method,
        }
        X_train = scipy.sparse.eye(900000)
        y_train = np.random.randint(2, size=900000)
        try:
            automl_experiment.fit(X_train=X_train, y_train=y_train,
                                  **automl_settings)
            print(automl_experiment.predict(X_train))
            print(automl_experiment.model)
            print(automl_experiment.config_history)
            print(automl_experiment.model_history)
            print(automl_experiment.best_iteration)
            print(automl_experiment.best_estimator)
        except ImportError:
            return
    def test_parallel_xgboost_random(self):
        # use random search as the hpo_method
        self.test_parallel_xgboost(hpo_method='random')
    def test_random_out_of_memory(self):
        automl_experiment = AutoML()
        automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": 2,
            "metric": 'ap',
            "task": 'classification',
            "log_file_name": "test/sparse_classification_oom.log",
            "estimator_list": ["large_lgbm"],
            "log_type": "all",
            "n_jobs": 1,
            "n_concurrent_trials": 2,
            "hpo_method": 'random',
        }
        X_train = scipy.sparse.eye(900000)
        y_train = np.random.randint(2, size=900000)
        try:
            automl_experiment.fit(X_train=X_train, y_train=y_train,
                                  **automl_settings)
            print(automl_experiment.predict(X_train))
            print(automl_experiment.model)
            print(automl_experiment.config_history)
            print(automl_experiment.model_history)
            print(automl_experiment.best_iteration)
            print(automl_experiment.best_estimator)
        except ImportError:
            return
    def test_sparse_matrix_lr(self):
        automl_experiment = AutoML()
        automl_settings = {
--- a/test/test_notebook_example.py
+++ b/test/test_notebook_example.py
@ -17,6 +17,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
        "task": 'classification',  # task type
        "log_file_name": 'airlines_experiment.log',  # flaml log file
        "seed": 7654321,    # random seed
    }
    '''The main flaml automl API'''
    automl.fit(X_train=X_train, y_train=y_train, **settings)
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@ -45,7 +45,7 @@ class TestLogging(unittest.TestCase):
                       **automl_settings)
            logger.info(automl.search_space)
            logger.info(automl.low_cost_partial_config)
-            logger.info(automl.points_to_evalaute)
+            logger.info(automl.points_to_evaluate)
            logger.info(automl.cat_hp_cost)
            import optuna as ot
            study = ot.create_study()
@ -62,16 +62,18 @@ class TestLogging(unittest.TestCase):
            config['learner'] = automl.best_estimator
            automl.trainable({"ml": config})
            from flaml import tune, CFO
            from flaml.automl import size
            from functools import partial
            search_alg = CFO(
                metric='val_loss',
                space=automl.search_space,
                low_cost_partial_config=automl.low_cost_partial_config,
-                points_to_evaluate=automl.points_to_evalaute,
+                points_to_evaluate=automl.points_to_evaluate,
                cat_hp_cost=automl.cat_hp_cost,
                prune_attr=automl.prune_attr,
                min_resource=automl.min_resource,
                max_resource=automl.max_resource,
-                config_constraints=[(automl.size, '<=', automl._mem_thres)],
+                config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
                metric_constraints=automl.metric_constraints)
            analysis = tune.run(
                automl.trainable, search_alg=search_alg,    # verbose=2,
--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@ -40,6 +40,7 @@ def test_simple(method=None):
        "n_jobs": 1,
        "hpo_method": method,
        "log_type": "all",
        "retrain_full": "budget",
        "time_budget": 1
    }
    from sklearn.externals._arff import ArffException
@ -53,21 +54,23 @@ def test_simple(method=None):
    automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
    print(automl.estimator_list)
    print(automl.search_space)
-    print(automl.points_to_evalaute)
+    print(automl.points_to_evaluate)
    config = automl.best_config.copy()
    config['learner'] = automl.best_estimator
    automl.trainable(config)
    from flaml import tune
    from flaml.automl import size
    from functools import partial
    analysis = tune.run(
        automl.trainable, automl.search_space, metric='val_loss', mode="min",
        low_cost_partial_config=automl.low_cost_partial_config,
-        points_to_evaluate=automl.points_to_evalaute,
+        points_to_evaluate=automl.points_to_evaluate,
        cat_hp_cost=automl.cat_hp_cost,
        prune_attr=automl.prune_attr,
        min_resource=automl.min_resource,
        max_resource=automl.max_resource,
        time_budget_s=automl._state.time_budget,
-        config_constraints=[(automl.size, '<=', automl._mem_thres)],
+        config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
        metric_constraints=automl.metric_constraints, num_samples=5)
    print(analysis.trials[-1])
--- a/test/tune/example.py
+++ b/test/tune/example.py
@ -27,6 +27,8 @@ def test_blendsearch_tune(smoke_test=True):
    except ImportError:
        print('ray[tune] is not installed, skipping test')
        return
    import numpy as np
    algo = BlendSearch()
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
@ -42,7 +44,8 @@ def test_blendsearch_tune(smoke_test=True):
            "width": tune.uniform(0, 20),
            "height": tune.uniform(-100, 100),
            # This is an ignored parameter.
-            "activation": tune.choice(["relu", "tanh"])
+            "activation": tune.choice(["relu", "tanh"]),
            "test4": np.zeros((3, 1)),
        })
    print("Best hyperparameters found were: ", analysis.best_config)
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@ -63,6 +63,7 @@ def _test_xgboost(method='BlendSearch'):
        time_budget_s = 60
        for n_cpu in [4]:
            start_time = time.time()
            ray.shutdown()
            ray.init(num_cpus=n_cpu, num_gpus=0)
            # ray.init(address='auto')
            if method == 'BlendSearch':