warmstart blendsearch (#186)

* increase test coverage * use define by run only when needed * warmstart bs * classification -> binary, multi * warm start with evaluated rewards * data transformer; resource attr for gs * BlendSearchTuner bug fix and unittest * bug fix * docstr and import * task type
2025-11-15 17:44:33 +00:00 · 2021-09-04 01:42:21 -07:00 · 2021-09-04 01:42:21 -07:00 · e46573a01d
commit e46573a01d
parent 5fdfa2559b
26 changed files with 599 additions and 707 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -5,7 +5,6 @@
 '''
 import time
 from typing import Callable, Optional
 import warnings
 from functools import partial
 import numpy as np
 from scipy.sparse import issparse
@ -144,9 +143,8 @@ class SearchState:
 class AutoMLState:
    def _prepare_sample_train_data(self, sample_size):
        full_size = len(self.y_train)
        sampled_weight = groups = None
-        if sample_size <= full_size:
+        if sample_size <= self.data_size:
            if isinstance(self.X_train, pd.DataFrame):
                sampled_X_train = self.X_train.iloc[:sample_size]
            else:
@ -225,13 +223,13 @@ class AutoMLState:
        self, estimator, config_w_resource, sample_size=None
    ):
        if not sample_size:
-            sample_size = config_w_resource['FLAML_sample_size']
+            sample_size = config_w_resource.get(
                'FLAML_sample_size', len(self.y_train_all))
        config = config_w_resource.get('ml', config_w_resource).copy()
        if 'FLAML_sample_size' in config:
            del config['FLAML_sample_size']
        if "learner" in config:
-            del config['learner']
+            del config["learner"]
        assert sample_size is not None
        sampled_X_train, sampled_y_train, sampled_weight, groups = \
            self._prepare_sample_train_data(sample_size)
        if sampled_weight is not None:
@ -316,10 +314,7 @@ class AutoML:
        '''An object with `predict()` and `predict_proba()` method (for
        classification), storing the best trained model.
        '''
-        if self._trained_estimator:
+        return self.__dict__.get('_trained_estimator')
            return self._trained_estimator
        else:
            return None
    def best_model_for_estimator(self, estimator_name):
        '''Return the best model found for a particular estimator
@ -331,11 +326,8 @@ class AutoML:
            An object with `predict()` and `predict_proba()` method (for
        classification), storing the best trained model for estimator_name.
        '''
-        if estimator_name in self._search_states:
+        state = self._search_states.get(estimator_name)
-            state = self._search_states[estimator_name]
+        return state and getattr(state, 'trained_estimator', None)
            if hasattr(state, 'trained_estimator'):
                return state.trained_estimator
        return None
    @property
    def best_estimator(self):
@ -374,10 +366,12 @@ class AutoML:
    @property
    def classes_(self):
        '''A list of n_classes elements for class labels.'''
-        if self._label_transformer:
+        attr = getattr(self, "label_transformer", None)
-            return self._label_transformer.classes_.tolist()
+        if attr:
-        if self._trained_estimator:
+            return attr.classes_.tolist()
-            return self._trained_estimator.classes_.tolist()
+        attr = getattr(self, "_trained_estimator", None)
        if attr:
            return attr.classes_.tolist()
        return None
    def predict(self, X_test):
@ -394,12 +388,13 @@ class AutoML:
            A array-like of shape n * 1 - - each element is a predicted
            label for an instance.
        '''
-        if self._trained_estimator is None:
+        estimator = getattr(self, "_trained_estimator", None)
-            warnings.warn(
+        if estimator is None:
            logger.warning(
                "No estimator is trained. Please run fit with enough budget.")
            return None
        X_test = self._preprocess(X_test)
-        y_pred = self._trained_estimator.predict(X_test)
+        y_pred = estimator.predict(X_test)
        if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
            y_pred = y_pred.flatten()
        if self._label_transformer:
@ -443,10 +438,9 @@ class AutoML:
                dataframe = dataframe.copy()
                dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'})
            elif dataframe is not None:
-                if ('ds' not in dataframe) or ('y' not in dataframe):
+                assert 'ds' in dataframe and 'y' in dataframe, (
-                    raise ValueError(
+                    'For forecasting task, dataframe must have columns '
-                        'For forecasting task, dataframe must have columns "ds" and "y" '
+                    '"ds" and "y" with the dates and values respectively.')
                        'with the dates and values respectively.')
            elif (X_train_all is not None) and (y_train_all is not None):
                dataframe = pd.DataFrame(X_train_all)
                dataframe = dataframe.rename(columns={dataframe.columns[0]: 'ds'})
@ -456,30 +450,29 @@ class AutoML:
            label = 'y'
        if X_train_all is not None and y_train_all is not None:
-            if not (isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
+            assert (
-                    or isinstance(X_train_all, pd.DataFrame)):
+                isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
-                raise ValueError(
+                or isinstance(X_train_all, pd.DataFrame)), (
-                    "X_train_all must be a numpy array, a pandas dataframe, "
+                "X_train_all must be a numpy array, a pandas dataframe, "
-                    "or Scipy sparse matrix.")
+                "or Scipy sparse matrix.")
-            if not (isinstance(y_train_all, np.ndarray)
+            assert (
-                    or isinstance(y_train_all, pd.Series)):
+                isinstance(y_train_all, np.ndarray)
-                raise ValueError(
+                or isinstance(y_train_all, pd.Series)), (
-                    "y_train_all must be a numpy array or a pandas series.")
+                "y_train_all must be a numpy array or a pandas series.")
-            if X_train_all.size == 0 or y_train_all.size == 0:
+            assert X_train_all.size != 0 and y_train_all.size != 0, (
-                raise ValueError("Input data must not be empty.")
+                "Input data must not be empty.")
            if isinstance(y_train_all, np.ndarray):
                y_train_all = y_train_all.flatten()
-            if X_train_all.shape[0] != y_train_all.shape[0]:
+            assert X_train_all.shape[0] == y_train_all.shape[0], (
-                raise ValueError(
+                "# rows in X_train must match length of y_train.")
                    "# rows in X_train must match length of y_train.")
            self._df = isinstance(X_train_all, pd.DataFrame)
            self._nrow, self._ndim = X_train_all.shape
            X, y = X_train_all, y_train_all
        elif dataframe is not None and label is not None:
-            if not isinstance(dataframe, pd.DataFrame):
+            assert isinstance(dataframe, pd.DataFrame), (
-                raise ValueError("dataframe must be a pandas DataFrame")
+                "dataframe must be a pandas DataFrame")
-            if label not in dataframe.columns:
+            assert label in dataframe.columns, (
-                raise ValueError("label must a column name in dataframe")
+                "label must a column name in dataframe")
            self._df = True
            X = dataframe.drop(columns=label)
            self._nrow, self._ndim = X.shape
@ -498,23 +491,21 @@ class AutoML:
            self._label_transformer = self._transformer.label_transformer
        self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
        if X_val is not None and y_val is not None:
-            if not (isinstance(X_val, np.ndarray) or issparse(X_val)
+            assert (
-                    or isinstance(X_val, pd.DataFrame)):
+                isinstance(X_val, np.ndarray) or issparse(X_val)
-                raise ValueError(
+                or isinstance(X_val, pd.DataFrame)), (
-                    "X_val must be None, a numpy array, a pandas dataframe, "
+                "X_val must be None, a numpy array, a pandas dataframe, "
-                    "or Scipy sparse matrix.")
+                "or Scipy sparse matrix.")
-            if not (isinstance(y_val, np.ndarray)
+            assert (
-                    or isinstance(y_val, pd.Series)):
+                isinstance(y_val, np.ndarray) or isinstance(y_val, pd.Series)
-                raise ValueError(
+            ), "y_val must be None, a numpy array or a pandas series."
-                    "y_val must be None, a numpy array or a pandas series.")
+            assert X_val.size != 0 and y_val.size != 0, (
-            if X_val.size == 0 or y_val.size == 0:
+                "Validation data are expected to be nonempty. "
-                raise ValueError(
+                "Use None for X_val and y_val if no validation data.")
                    "Validation data are expected to be nonempty. "
                    "Use None for X_val and y_val if no validation data.")
            if isinstance(y_val, np.ndarray):
                y_val = y_val.flatten()
-            if X_val.shape[0] != y_val.shape[0]:
+            assert X_val.shape[0] == y_val.shape[0], (
-                raise ValueError("# rows in X_val must match length of y_val.")
+                "# rows in X_val must match length of y_val.")
            if self._transformer:
                self._state.X_val = self._transformer.transform(X_val)
            else:
@ -548,7 +539,7 @@ class AutoML:
        X_train_all, y_train_all = self._X_train_all, self._y_train_all
        if issparse(X_train_all):
            X_train_all = X_train_all.tocsr()
-        if self._state.task in ('binary:logistic', 'multi:softmax') \
+        if self._state.task in ('binary', 'multi') \
                and self._state.fit_kwargs.get('sample_weight') is None \
                and self._split_type != 'time':
            # logger.info(f"label {pd.unique(y_train_all)}")
@ -638,7 +629,7 @@ class AutoML:
                    y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
                    self._state.groups, self._state.groups_val = self._state.groups[
                        train_idx], self._state.groups[val_idx]
-            elif self._state.task != 'regression':
+            elif self._state.task in ('binary', 'multi'):
                # for classification, make sure the labels are complete in both
                # training and validation data
                label_set, first = np.unique(y_train_all, return_index=True)
@ -760,7 +751,7 @@ class AutoML:
            record_id: An integer of the record ID in the file,
                0 corresponds to the first trial
            task: A string of the task type,
-                'binary', 'multi', or 'regression'
+                'binary', 'multi', 'regression', 'forecast', 'rank'
        Returns:
            An estimator object for the given configuration
@ -875,9 +866,10 @@ class AutoML:
                            best_val_loss = val_loss
                            sample_size = size
                if not training_duration:
                    logger.warning(
                        f"No estimator found within time_budget={time_budget}")
                    from .model import BaseEstimator as Estimator
                    self._trained_estimator = Estimator()
                    self._trained_estimator.model = None
                    return training_duration
        if not best:
            return
@ -898,11 +890,7 @@ class AutoML:
        elif eval_method == 'auto':
            eval_method = self._decide_eval_method(time_budget)
        self.modelcount = 0
-        if self._state.task != 'forecast':
+        self._prepare_data(eval_method, split_ratio, n_splits)
            self._prepare_data(eval_method, split_ratio, n_splits)
        else:
            self._prepare_data(eval_method, split_ratio, n_splits,
                               period=self._state.fit_kwargs['period'])
        self._state.time_budget = None
        self._state.n_jobs = n_jobs
        self._trained_estimator = self._state._train_with_config(
@ -911,9 +899,10 @@ class AutoML:
        return training_duration
    def _decide_split_type(self, split_type):
-        if self._state.task in ('classification', 'binary', 'multi'):
+        if self._state.task == 'classification':
            self._state.task = get_classification_objective(
                len(np.unique(self._y_train_all)))
        if self._state.task in ('binary', 'multi'):
            assert split_type in [None, "stratified", "uniform", "time"]
            self._split_type = split_type or "stratified"
        elif self._state.task == 'regression':
@ -1248,13 +1237,14 @@ class AutoML:
                For time series forecasting, must be None or 'time'.
                For ranking task, must be None or 'group'.
            hpo_method: str or None, default=None | The hyperparameter
-                optimization method. When it is None, CFO is used.
+                optimization method. By default, CFO is used for sequential
                search and BlendSearch is used for parallel search.
                No need to set when using flaml's default search space or using
                a simple customized search space. When set to 'bs', BlendSearch
                is used. BlendSearch can be tried when the search space is
                complex, for example, containing multiple disjoint, discontinuous
-                subspaces. When set to 'random' and the argument 'n_concurrent_trials'
+                subspaces. When set to 'random' and the argument
-                is larger than 1, RandomSearch is used.
+                `n_concurrent_trials` is larger than 1, random search is used.
            starting_points: A dictionary to specify the starting hyperparameter
                config for the estimators.
                Keys are the name of the estimators, and values are the starting
@ -1355,8 +1345,7 @@ class AutoML:
            estimator_list))
        self.estimator_list = estimator_list
        self._hpo_method = hpo_method or (
-            'cfo' if n_concurrent_trials == 1 or len(estimator_list) == 1
+            'cfo' if n_concurrent_trials == 1 else 'bs')
            else 'bs')
        self._state.time_budget = time_budget
        self._active_estimators = estimator_list.copy()
        self._ensemble = ensemble
@ -1379,14 +1368,16 @@ class AutoML:
        if self._best_estimator:
            logger.info("fit succeeded")
            logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
-            if self._time_taken_best_iter >= time_budget * 0.7 and not all(
+            if self._hpo_method in ('cfo', 'bs') and (
                self._time_taken_best_iter >= time_budget * 0.7) and not all(
                state.search_alg and state.search_alg.searcher.is_ls_ever_converged
                for state in self._search_states.values()
            ):
-                logger.warn("Time taken to find the best model is {0:.0f}% of the "
+                logger.warning(
-                            "provided time budget and not all estimators' hyperparameter "
+                    "Time taken to find the best model is {0:.0f}% of the "
-                            "search converged. Consider increasing the time budget.".format(
+                    "provided time budget and not all estimators' hyperparameter "
-                                self._time_taken_best_iter / time_budget * 100))
+                    "search converged. Consider increasing the time budget.".format(
                        self._time_taken_best_iter / time_budget * 100))
        if not keep_search_state:
            # release space
@ -1413,20 +1404,16 @@ class AutoML:
                "Please run pip install flaml[ray]")
        if self._hpo_method in ('cfo', 'grid'):
            from flaml import CFO as SearchAlgo
        elif 'optuna' == self._hpo_method:
            from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
        elif 'bs' == self._hpo_method:
            from flaml import BlendSearch as SearchAlgo
        elif 'cfocat' == self._hpo_method:
            from flaml.searcher.cfo_cat import CFOCat as SearchAlgo
        elif 'random' == self._hpo_method:
            from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
-            from ray.tune.sample import Domain as RayDomain
+            from ray.tune.sample import Domain
            from .tune.sample import Domain
        else:
            raise NotImplementedError(
                f"hpo_method={self._hpo_method} is not recognized. "
                "'cfo' and 'bs' are supported.")
        space = self.search_space
        if self._hpo_method == 'random':
            # Any point in points_to_evaluate must consist of hyperparamters
            # that are tunable, which can be identified by checking whether
@ -1434,19 +1421,19 @@ class AutoML:
            # the 'Domain' class from flaml or ray.tune
            points_to_evaluate = self.points_to_evaluate.copy()
            to_del = []
-            for k, v in self.search_space.items():
+            for k, v in space.items():
-                if not (isinstance(v, Domain) or isinstance(v, RayDomain)):
+                if not isinstance(v, Domain):
                    to_del.append(k)
            for k in to_del:
                for p in points_to_evaluate:
-                    del p[k]
+                    if k in p:
-
+                        del p[k]
-            search_alg = SearchAlgo(max_concurrent=self._n_concurrent_trials,
+            search_alg = SearchAlgo(
-                                    points_to_evaluate=points_to_evaluate)
+                max_concurrent=self._n_concurrent_trials,
                points_to_evaluate=points_to_evaluate)
        else:
            search_alg = SearchAlgo(
-                metric='val_loss',
+                metric='val_loss', space=space,
                space=self.search_space,
                low_cost_partial_config=self.low_cost_partial_config,
                points_to_evaluate=self.points_to_evaluate,
                cat_hp_cost=self.cat_hp_cost,
@ -1463,7 +1450,7 @@ class AutoML:
        resources_per_trial = {
            "cpu": self._state.n_jobs} if self._state.n_jobs > 1 else None
        analysis = ray.tune.run(
-            self.trainable, search_alg=search_alg, config=self.search_space,
+            self.trainable, search_alg=search_alg, config=space,
            metric='val_loss', mode='min', resources_per_trial=resources_per_trial,
            time_budget_s=self._state.time_budget, num_samples=self._max_iter,
            verbose=self.verbose)
@ -1521,6 +1508,7 @@ class AutoML:
            from flaml import CFO as SearchAlgo
        elif 'optuna' == self._hpo_method:
            try:
                from ray import __version__ as ray_version
                assert ray_version >= '1.0.0'
                from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
            except (ImportError, AssertionError):
@ -1600,7 +1588,9 @@ class AutoML:
                else:
                    algo = SearchAlgo(
                        metric='val_loss', mode='min', space=search_space,
-                        points_to_evaluate=points_to_evaluate,
+                        points_to_evaluate=points_to_evaluate
                        if len(search_state.init_config) == len(
                            search_space) else None,
                    )
                search_state.search_alg = ConcurrencyLimiter(algo,
                                                             max_concurrent=1)
@ -1710,13 +1700,16 @@ class AutoML:
                        search_state.best_loss,
                        self._best_estimator,
                        self._state.best_loss))
-                if all(state.search_alg and state.search_alg.searcher.is_ls_ever_converged
+                if self._hpo_method in ('cfo', 'bs') and all(
-                       for state in self._search_states.values()) and (
+                    state.search_alg and state.search_alg.searcher.is_ls_ever_converged
-                           self._state.time_from_start
+                    for state in self._search_states.values()) and (
-                           > self._warn_threshold * self._time_taken_best_iter):
+                        self._state.time_from_start
-                    logger.warn("All estimator hyperparameters local search has converged at least once, "
+                        > self._warn_threshold * self._time_taken_best_iter):
-                                f"and the total search time exceeds {self._warn_threshold} times the time taken "
+                    logger.warning(
-                                "to find the best model.")
+                        "All estimator hyperparameters local search has "
                        "converged at least once, and the total search time "
                        f"exceeds {self._warn_threshold} times the time taken "
                        "to find the best model.")
                    self._warn_threshold *= 10
            else:
                logger.info(f"no enough budget for learner {estimator}")
@ -1766,6 +1759,8 @@ class AutoML:
        self._best_estimator = None
        self._retrained_config = {}
        self._warn_threshold = 10
        self._selected = None
        self.modelcount = 0
        if self._n_concurrent_trials == 1:
            self._search_sequential()
@ -1782,7 +1777,7 @@ class AutoML:
            if self._trained_estimator:
                logger.info(f'selected model: {self._trained_estimator.model}')
            if self._ensemble and self._state.task in (
-                'binary:logistic', 'multi:softmax', 'regression',
+                'binary', 'multi', 'regression',
            ):
                search_states = list(x for x in self._search_states.items()
                                     if x[1].trained_estimator)
@ -1795,7 +1790,7 @@ class AutoML:
                logger.info(estimators)
                if len(estimators) <= 1:
                    return
-                if self._state.task in ('binary:logistic', 'multi:softmax'):
+                if self._state.task in ('binary', 'multi'):
                    from sklearn.ensemble import StackingClassifier as Stacker
                else:
                    from sklearn.ensemble import StackingRegressor as Stacker
@ -1838,9 +1833,6 @@ class AutoML:
                else:
                    logger.info(
                        "not retraining because the time budget is too small.")
        else:
            self._selected = self._trained_estimator = None
            self.modelcount = 0
        if self.model and mlflow is not None and mlflow.active_run():
            mlflow.sklearn.log_model(self.model, 'best_model')
@ -1886,8 +1878,7 @@ class AutoML:
                    speed = delta_loss / delta_time
                    if speed:
                        estimated_cost = max(2 * gap / speed, estimated_cost)
-                if estimated_cost == 0:
+                estimated_cost == estimated_cost or 1e-10
                    estimated_cost = 1e-10
                inv.append(1 / estimated_cost)
            else:
                estimated_cost = self._eci[i]
--- a/flaml/data.py
+++ b/flaml/data.py
@ -261,7 +261,7 @@ class DataTransformer:
                cat_columns, num_columns, datetime_columns
            self._drop = drop
-        if task in ('binary:logistic', 'multi:softmax'):
+        if task in ('binary', 'multi', 'classification'):
            from sklearn.preprocessing import LabelEncoder
            self.label_transformer = LabelEncoder()
            y = self.label_transformer.fit_transform(y)
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -24,7 +24,7 @@ def get_estimator_class(task, estimator_name):
    ''' when adding a new learner, need to add an elif branch '''
    if 'xgboost' == estimator_name:
-        if 'regression' in task:
+        if 'regression' == task:
            estimator_class = XGBoostEstimator
        else:
            estimator_class = XGBoostSklearnEstimator
@ -179,7 +179,8 @@ def _eval_estimator(config, estimator, X_train, y_train, X_test, y_test, weight_
            fit_kwargs.get('groups'))
        if isinstance(metric_for_logging, dict):
            pred_time = metric_for_logging.get('pred_time', 0)
-        test_pred_y = None  # eval_metric may return test_pred_y but not necessarily. Setting None for now.
+        test_pred_y = None
        # eval_metric may return test_pred_y but not necessarily. Setting None for now.
    return test_loss, metric_for_logging, pred_time, test_pred_y
@ -193,10 +194,10 @@ def get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_te
    #     fit_kwargs['X_val'] = X_test
    #     fit_kwargs['y_val'] = y_test
    estimator.fit(X_train, y_train, budget, **fit_kwargs)
-    test_loss, metric_for_logging, pred_time, _ = _eval_estimator(config, estimator,
+    test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
-                                                                  X_train, y_train, X_test, y_test,
+        config, estimator, X_train, y_train, X_test, y_test,
-                                                                  weight_test, groups_test, eval_metric, obj,
+        weight_test, groups_test, eval_metric, obj,
-                                                                  labels, log_training_metric, fit_kwargs)
+        labels, log_training_metric, fit_kwargs)
    train_time = time.time() - start
    return test_loss, metric_for_logging, train_time, pred_time
@ -212,7 +213,7 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
    valid_fold_num = total_fold_num = 0
    n = kf.get_n_splits()
    X_train_split, y_train_split = X_train_all, y_train_all
-    if task == 'binary:logistics' or task == 'multi:softmax':
+    if task in ('binary', 'multi'):
        labels = np.unique(y_train_all)
    else:
        labels = None
@ -346,9 +347,9 @@ def train_estimator(
 def get_classification_objective(num_labels: int) -> str:
    if num_labels == 2:
-        objective_name = 'binary:logistic'
+        objective_name = 'binary'
    else:
-        objective_name = 'multi:softmax'
+        objective_name = 'multi'
    return objective_name
--- a/flaml/model.py
+++ b/flaml/model.py
@ -3,7 +3,6 @@
 * Licensed under the MIT License.
 '''
 import warnings
 import numpy as np
 import xgboost as xgb
 import time
@ -31,12 +30,12 @@ class BaseEstimator:
            for both regression and classification
    '''
-    def __init__(self, task='binary:logistic', **params):
+    def __init__(self, task='binary', **params):
        '''Constructor
        Args:
            task: A string of the task type, one of
-                'binary:logistic', 'multi:softmax', 'regression'
+                'binary', 'multi', 'regression', 'rank', 'forecast'
            n_jobs: An integer of the number of parallel threads
            params: A dictionary of the hyperparameter names and values
        '''
@ -48,7 +47,7 @@ class BaseEstimator:
            del self.params['_estimator_type']
        else:
            self._estimator_type = "classifier" if task in (
-                'binary:logistic', 'multi:softmax') else "regressor"
+                'binary', 'multi') else "regressor"
    def get_params(self, deep=False):
        params = self.params.copy()
@ -145,11 +144,10 @@ class BaseEstimator:
            Each element at (i,j) is the probability for instance i to be in
                class j
        '''
-        if 'regression' in self._task:
+        assert self._task in ('binary', 'multi'), (
-            raise ValueError('Regression tasks do not support predict_prob')
+            'predict_prob() only for classification task.')
-        else:
+        X_test = self._preprocess(X_test)
-            X_test = self._preprocess(X_test)
+        return self._model.predict_proba(X_test)
            return self._model.predict_proba(X_test)
    def cleanup(self):
        pass
@ -193,7 +191,7 @@ class BaseEstimator:
 class SKLearnEstimator(BaseEstimator):
-    def __init__(self, task='binary:logistic', **params):
+    def __init__(self, task='binary', **params):
        super().__init__(task, **params)
    def _preprocess(self, X):
@ -264,21 +262,18 @@ class LGBMEstimator(BaseEstimator):
        n_estimators = int(round(config['n_estimators']))
        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
-    def __init__(self, task='binary:logistic', log_max_bin=8, **params):
+    def __init__(self, task='binary', log_max_bin=8, **params):
        super().__init__(task, **params)
        if "objective" not in self.params:
            # Default: ‘regression’ for LGBMRegressor,
            # ‘binary’ or ‘multiclass’ for LGBMClassifier
-            if 'regression' == task:
+            objective = 'regression'
-                objective = 'regression'
+            if 'binary' in task:
            elif 'binary' in task:
                objective = 'binary'
            elif 'multi' in task:
                objective = 'multiclass'
            elif 'rank' == task:
                objective = 'lambdarank'
            else:
                objective = 'regression'
            self.params["objective"] = objective
        if "n_estimators" in self.params:
            self.params["n_estimators"] = int(round(self.params["n_estimators"]))
@ -477,7 +472,7 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
        return XGBoostEstimator.cost_relative2lgbm()
    def __init__(
-        self, task='binary:logistic', n_jobs=1,
+        self, task='binary', n_jobs=1,
        n_estimators=4, max_leaves=4, subsample=1.0,
        min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
        colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
@ -506,11 +501,10 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
            'use_label_encoder': params.get('use_label_encoder', False),
        })
-        if 'regression' == task:
+        self.estimator_class = xgb.XGBRegressor
-            self.estimator_class = xgb.XGBRegressor
+        if 'rank' == task:
        elif 'rank' == task:
            self.estimator_class = xgb.XGBRanker
-        else:
+        elif task in ('binary', 'multi'):
            self.estimator_class = xgb.XGBClassifier
        self._time_per_iter = None
        self._train_size = 0
@ -543,7 +537,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
                'low_cost_init_value': 4,
            },
        }
-        if task != 'regression':
+        if task in ('binary', 'multi'):
            space['criterion'] = {
                'domain': tune.choice(['gini', 'entropy']),
                # 'init_value': 'gini',
@ -555,7 +549,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
        return 2.0
    def __init__(
-        self, task='binary:logistic', n_jobs=1,
+        self, task='binary', n_jobs=1,
        n_estimators=4, max_features=1.0, criterion='gini', max_leaves=4,
        **params
    ):
@ -569,9 +563,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
            'max_features': float(max_features),
            "max_leaf_nodes": params.get('max_leaf_nodes', int(round(max_leaves))),
        })
-        if 'regression' in task:
+        self.estimator_class = RandomForestRegressor
-            self.estimator_class = RandomForestRegressor
+        if task in ('binary', 'multi'):
        else:
            self.estimator_class = RandomForestClassifier
            self.params['criterion'] = criterion
@ -586,7 +579,7 @@ class ExtraTreeEstimator(RandomForestEstimator):
    def cost_relative2lgbm(cls):
        return 1.9
-    def __init__(self, task='binary:logistic', **params):
+    def __init__(self, task='binary', **params):
        super().__init__(task, **params)
        if 'regression' in task:
            self.estimator_class = ExtraTreesRegressor
@ -610,7 +603,7 @@ class LRL1Classifier(SKLearnEstimator):
        return 160
    def __init__(
-        self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
+        self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
        **params
    ):
        super().__init__(task, **params)
@ -621,11 +614,9 @@ class LRL1Classifier(SKLearnEstimator):
            'solver': params.get("solver", 'saga'),
            'n_jobs': n_jobs,
        })
-        if 'regression' in task:
+        assert task in ('binary', 'multi'), (
-            self.estimator_class = None
+            'LogisticRegression for classification task only')
-            raise NotImplementedError('LR does not support regression task')
+        self.estimator_class = LogisticRegression
        else:
            self.estimator_class = LogisticRegression
 class LRL2Classifier(SKLearnEstimator):
@ -639,7 +630,7 @@ class LRL2Classifier(SKLearnEstimator):
        return 25
    def __init__(
-        self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
+        self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
        **params
    ):
        super().__init__(task, **params)
@ -650,11 +641,9 @@ class LRL2Classifier(SKLearnEstimator):
            'solver': params.get("solver", 'lbfgs'),
            'n_jobs': n_jobs,
        })
-        if 'regression' in task:
+        assert task in ('binary', 'multi'), (
-            self.estimator_class = None
+            'LogisticRegression for classification task only')
-            raise NotImplementedError('LR does not support regression task')
+        self.estimator_class = LogisticRegression
        else:
            self.estimator_class = LogisticRegression
 class CatBoostEstimator(BaseEstimator):
@ -711,7 +700,7 @@ class CatBoostEstimator(BaseEstimator):
        return X
    def __init__(
-        self, task='binary:logistic', n_jobs=1,
+        self, task='binary', n_jobs=1,
        n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
    ):
        super().__init__(task, **params)
@ -723,10 +712,9 @@ class CatBoostEstimator(BaseEstimator):
            'verbose': params.get('verbose', False),
            'random_seed': params.get("random_seed", 10242048),
        })
-        if 'regression' in task:
+        from catboost import CatBoostRegressor
-            from catboost import CatBoostRegressor
+        self.estimator_class = CatBoostRegressor
-            self.estimator_class = CatBoostRegressor
+        if task in ('binary', 'multi'):
        else:
            from catboost import CatBoostClassifier
            self.estimator_class = CatBoostClassifier
@ -831,7 +819,7 @@ class KNeighborsEstimator(BaseEstimator):
        return 30
    def __init__(
-        self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
+        self, task='binary', n_jobs=1, n_neighbors=5, **params
    ):
        super().__init__(task, **params)
        self.params.update({
@ -839,10 +827,9 @@ class KNeighborsEstimator(BaseEstimator):
            'weights': params.get('weights', 'distance'),
            'n_jobs': n_jobs,
        })
-        if 'regression' in task:
+        from sklearn.neighbors import KNeighborsRegressor
-            from sklearn.neighbors import KNeighborsRegressor
+        self.estimator_class = KNeighborsRegressor
-            self.estimator_class = KNeighborsRegressor
+        if task in ('binary', 'multi'):
        else:
            from sklearn.neighbors import KNeighborsClassifier
            self.estimator_class = KNeighborsClassifier
@ -920,7 +907,7 @@ class FBProphet(BaseEstimator):
            forecast = self._model.predict(X_test)
            return forecast['yhat']
        else:
-            warnings.warn(
+            logger.warning(
                "Estimator is not fit yet. Please run fit() before predict().")
            return np.ones(X_test.shape[0])
@ -954,8 +941,9 @@ class ARIMA(FBProphet):
        return train_df
    def fit(self, X_train, y_train, budget=None, **kwargs):
-        from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
+        import warnings
        warnings.filterwarnings("ignore")
        from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
        current_time = time.time()
        train_df = self._join(X_train, y_train)
        model = ARIMA_estimator(
--- a/flaml/nlp/autotransformers.py
+++ b/flaml/nlp/autotransformers.py
@ -29,12 +29,11 @@ class AutoTransformers:
        .. code-block:: python
            autohf = AutoTransformers()
-            autohf_settings = {"resources_per_trial": {"cpu": 1},
+            autohf_settings = {
-                       "num_samples": -1,
+                "resources_per_trial": {"cpu": 1, "gpu": 1},
-                       "time_budget": 100000,
+                "num_samples": -1,
-                       "ckpt_per_epoch": 1,
+                "time_budget": 60,
-                       "fp16": False,
+            }
                      }
            validation_metric, analysis = autohf.fit(**autohf_settings)
@ -45,10 +44,11 @@ class AutoTransformers:
        search_space = {}
        if mode == "grid":
            # TODO add test
            for each_hp in config_json.keys():
                this_config = config_json[each_hp]
                assert isinstance(this_config, dict) or isinstance(this_config, list), \
-                    "config of " + each_hp + " must be dict or list"
+                    "config of " + each_hp + " must be dict or list for grid search"
                search_space[each_hp] = ray.tune.grid_search(this_config)
        else:
            for each_hp in config_json.keys():
@ -85,10 +85,6 @@ class AutoTransformers:
            search_space_hpo_json,
            mode=self.jobid_config.mod)
    @staticmethod
    def _wrapper(func, *args):  # with star
        return func(*args)
    @staticmethod
    def _get_split_name(data_raw, fold_name=None):
        if fold_name:
@ -179,7 +175,7 @@ class AutoTransformers:
            data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat),
                                    self.jobid_config.subdat)
        else:
-            data_raw = AutoTransformers._wrapper(load_dataset, *self.jobid_config.dat)
+            data_raw = load_dataset(*self.jobid_config.dat)
        self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name(
            data_raw,
@ -349,6 +345,7 @@ class AutoTransformers:
        return training_args_config, per_model_config
    def _objective(self, config, reporter, checkpoint_dir=None):
        # TODO add test
        from transformers.trainer_utils import set_seed
        self._set_transformers_verbosity(self._transformers_verbose)
@ -827,6 +824,7 @@ class AutoTransformers:
        test_trainer = TrainerForAutoTransformers(best_model, training_args)
        if self.jobid_config.spt == "ori":
            # TODO add test
            if "label" in self.test_dataset.features.keys():
                self.test_dataset.remove_columns_("label")
                print("Cleaning the existing label column from test data")
--- a/flaml/scheduler/init.py
+++ b/flaml/scheduler/init.py
@ -1,2 +1,2 @@
-from .trial_scheduler import TrialScheduler, FIFOScheduler
+from .trial_scheduler import TrialScheduler
 from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler
--- a/flaml/scheduler/online_scheduler.py
+++ b/flaml/scheduler/online_scheduler.py
@ -1,12 +1,12 @@
 import numpy as np
 import logging
-from typing import Optional, Dict
+from typing import Dict
-from flaml.scheduler import FIFOScheduler, TrialScheduler
+from flaml.scheduler import TrialScheduler
 from flaml.tune import Trial
 logger = logging.getLogger(__name__)
-class OnlineScheduler(FIFOScheduler):
+class OnlineScheduler(TrialScheduler):
    """Implementation of the OnlineFIFOSchedulers.
    Methods:
--- a/flaml/scheduler/trial_scheduler.py
+++ b/flaml/scheduler/trial_scheduler.py
@ -17,10 +17,8 @@ This source file is adapted here because ray does not fully support Windows.
 Copyright (c) Microsoft Corporation.
 '''
 from typing import Dict, Optional
 from flaml.tune import trial_runner
 from flaml.tune.result import DEFAULT_METRIC
 from flaml.tune.trial import Trial
@ -31,127 +29,10 @@ class TrialScheduler:
    PAUSE = "PAUSE"  #: Status for pausing trial execution
    STOP = "STOP"  #: Status for stopping trial execution
    _metric = None
    @property
    def metric(self):
        return self._metric
    def set_search_properties(self, metric: Optional[str],
                              mode: Optional[str]) -> bool:
        """Pass search properties to scheduler.
        This method acts as an alternative to instantiating schedulers
        that react to metrics with their own `metric` and `mode` parameters.
        Args:
            metric (str): Metric to optimize
            mode (str): One of ["min", "max"]. Direction to optimize.
        """
        if self._metric and metric:
            return False
        if metric:
            self._metric = metric
        if self._metric is None:
            # Per default, use anonymous metric
            self._metric = DEFAULT_METRIC
        return True
    def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
                     trial: Trial):
        """Called when a new trial is added to the trial runner."""
        raise NotImplementedError
    def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
                       trial: Trial):
        """Notification for the error of trial.
        This will only be called when the trial is in the RUNNING state."""
        raise NotImplementedError
    def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
                        trial: Trial, result: Dict) -> str:
        """Called on each intermediate result returned by a trial.
        At this point, the trial scheduler can make a decision by returning
        one of CONTINUE, PAUSE, and STOP. This will only be called when the
        trial is in the RUNNING state."""
        raise NotImplementedError
    def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
                          trial: Trial, result: Dict):
        """Notification for the completion of trial.
        This will only be called when the trial is in the RUNNING state and
        either completes naturally or by manual termination."""
        raise NotImplementedError
    def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
                        trial: Trial):
        """Called to remove trial.
        This is called when the trial is in PAUSED or PENDING state. Otherwise,
        call `on_trial_complete`."""
        raise NotImplementedError
    def choose_trial_to_run(
            self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
        """Called to choose a new trial to run.
        This should return one of the trials in trial_runner that is in
        the PENDING or PAUSED state. This function must be idempotent.
        If no trial is ready, return None."""
        raise NotImplementedError
    def debug_string(self) -> str:
        """Returns a human readable message for printing to the console."""
        raise NotImplementedError
    def save(self, checkpoint_path: str):
        """Save trial scheduler to a checkpoint"""
        raise NotImplementedError
    def restore(self, checkpoint_path: str):
        """Restore trial scheduler from checkpoint."""
        raise NotImplementedError
 class FIFOScheduler(TrialScheduler):
    """Simple scheduler that just runs trials in submission order."""
    def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
                     trial: Trial):
        pass
    def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
                       trial: Trial):
        pass
    def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
                        trial: Trial, result: Dict) -> str:
        return TrialScheduler.CONTINUE
    def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
                          trial: Trial, result: Dict):
        pass
    def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
                        trial: Trial):
        pass
    def choose_trial_to_run(
            self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
        for trial in trial_runner.get_trials():
            if (trial.status == Trial.PENDING
                    and trial_runner.has_resources_for_trial(trial)):
                return trial
        for trial in trial_runner.get_trials():
            if (trial.status == Trial.PAUSED
                    and trial_runner.has_resources_for_trial(trial)):
                return trial
        return None
    def debug_string(self) -> str:
        return "Using FIFO scheduling algorithm."
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@ -14,14 +14,14 @@ try:
    assert ray_version >= '1.0.0'
    from ray.tune.suggest import Searcher
    from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
    from ray.tune.utils.util import unflatten_dict
 except (ImportError, AssertionError):
    from .suggestion import Searcher
    from .suggestion import OptunaSearch as GlobalSearch
-    from ..tune.trial import unflatten_dict
+from ..tune.trial import unflatten_dict, flatten_dict
 from .search_thread import SearchThread
 from .flow2 import FLOW2
-from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func
+from ..tune.space import (
    add_cost_to_space, indexof, normalize, define_by_run_func)
 import logging
 logger = logging.getLogger(__name__)
@ -40,9 +40,10 @@ class BlendSearch(Searcher):
                 metric: Optional[str] = None,
                 mode: Optional[str] = None,
                 space: Optional[dict] = None,
                 points_to_evaluate: Optional[List[dict]] = None,
                 low_cost_partial_config: Optional[dict] = None,
                 cat_hp_cost: Optional[dict] = None,
                 points_to_evaluate: Optional[List[dict]] = None,
                 evaluated_rewards: Optional[List] = None,
                 prune_attr: Optional[str] = None,
                 min_resource: Optional[float] = None,
                 max_resource: Optional[float] = None,
@ -61,7 +62,6 @@ class BlendSearch(Searcher):
            mode: A string in ['min', 'max'] to specify the objective as
                minimization or maximization.
            space: A dictionary to specify the search space.
            points_to_evaluate: Initial parameter suggestions to be run first.
            low_cost_partial_config: A dictionary from a subset of
                controlled dimensions to the initial low-cost values.
                e.g.,
@ -80,6 +80,13 @@ class BlendSearch(Searcher):
                i.e., the relative cost of the
                three choices of 'tree_method' is 1, 1 and 2 respectively.
            points_to_evaluate: Initial parameter suggestions to be run first.
            evaluated_rewards (list): If you have previously evaluated the
                parameters passed in as points_to_evaluate you can avoid
                re-running those trials by passing in the reward attributes
                as a list so the optimiser can be told the results without
                needing to re-compute the trial. Must be the same length as
                points_to_evaluate.
            prune_attr: A string of the attribute used for pruning.
                Not necessarily in space.
                When prune_attr is in space, it is a hyperparameter, e.g.,
@ -122,7 +129,20 @@ class BlendSearch(Searcher):
                "consider providing low-cost values for cost-related hps via "
                "'low_cost_partial_config'."
            )
-        self._points_to_evaluate = points_to_evaluate or []
+        if evaluated_rewards and mode:
            self._points_to_evaluate = []
            self._evaluated_rewards = []
            best = max(evaluated_rewards) if mode == 'max' else min(
                evaluated_rewards)
            # only keep the best points as start points
            for i, r in enumerate(evaluated_rewards):
                if r == best:
                    p = points_to_evaluate[i]
                    self._points_to_evaluate.append(p)
                    self._evaluated_rewards.append(r)
        else:
            self._points_to_evaluate = points_to_evaluate or []
            self._evaluated_rewards = evaluated_rewards or []
        self._config_constraints = config_constraints
        self._metric_constraints = metric_constraints
        if self._metric_constraints:
@ -131,40 +151,45 @@ class BlendSearch(Searcher):
        self._cat_hp_cost = cat_hp_cost or {}
        if space:
            add_cost_to_space(space, init_config, self._cat_hp_cost)
        self._ls = self.LocalSearch(
            init_config, metric, mode, space, prune_attr,
            min_resource, max_resource, reduction_factor, self.cost_attr, seed)
        if global_search_alg is not None:
            self._gs = global_search_alg
        elif getattr(self, '__name__', None) != 'CFO':
-            from functools import partial
+            if space and self._ls.hierarchical:
-            gs_space = partial(define_by_run_func, space=space)
+                from functools import partial
                gs_space = partial(define_by_run_func, space=space)
                evaluated_rewards = None    # not supproted by define-by-run
            else:
                gs_space = space
            gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
            if experimental:
                import optuna as ot
                sampler = ot.samplers.TPESampler(
                    seed=seed, multivariate=True, group=True)
            else:
                sampler = None
            try:
-                gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
+                self._gs = GlobalSearch(
-                if experimental:
+                    space=gs_space, metric=metric, mode=mode, seed=gs_seed,
-                    import optuna as ot
+                    sampler=sampler, points_to_evaluate=points_to_evaluate,
-                    sampler = ot.samplers.TPESampler(
+                    evaluated_rewards=evaluated_rewards)
-                        seed=seed, multivariate=True, group=True)
+            except ValueError:
                else:
                    sampler = None
                self._gs = GlobalSearch(
                    space=gs_space, metric=metric, mode=mode, seed=gs_seed,
                    sampler=sampler)
            except TypeError:
                self._gs = GlobalSearch(space=gs_space, metric=metric, mode=mode)
            self._gs.space = space
        else:
            self._gs = None
        self._experimental = experimental
        if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len(
-           points_to_evaluate) > 1:
+           self._points_to_evaluate) > 1:
            # use the best config in points_to_evaluate as the start point
            self._candidate_start_points = {}
            self._started_from_low_cost = not low_cost_partial_config
        else:
            self._candidate_start_points = None
        self._ls = self.LocalSearch(
            init_config, metric, mode, space, prune_attr,
            min_resource, max_resource, reduction_factor, self.cost_attr, seed)
        self._is_ls_ever_converged = False
        self._subspace = {}     # the subspace for each trial id
        if space:
            self._init_search()
@ -187,6 +212,7 @@ class BlendSearch(Searcher):
        if not self._ls.space:
            # the search space can be set only once
            if self._gs is not None:
                # define-by-run is not supported via set_search_properties
                self._gs.set_search_properties(metric, mode, config)
                self._gs.space = config
            if config:
@ -216,6 +242,8 @@ class BlendSearch(Searcher):
    def _init_search(self):
        '''initialize the search
        '''
        self._is_ls_ever_converged = False
        self._subspace = {}     # the subspace for each trial id
        self._metric_target = np.inf * self._ls.metric_op
        self._search_thread_pool = {
            # id: int -> thread: SearchThread
@ -239,6 +267,7 @@ class BlendSearch(Searcher):
        else:
            self._metric_constraint_satisfied = True
            self._metric_constraint_penalty = None
        self.best_resource = self._ls.min_resource
    def save(self, checkpoint_path: str):
        ''' save states to a checkpoint path
@ -295,10 +324,11 @@ class BlendSearch(Searcher):
                trial_id, result, error)
            del self._trial_proposed_by[trial_id]
        if result:
-            config = {}
+            config = result.get('config', {})
-            for key, value in result.items():
+            if not config:
-                if key.startswith('config/'):
+                for key, value in result.items():
-                    config[key[7:]] = value
+                    if key.startswith('config/'):
                        config[key[7:]] = value
            signature = self._ls.config_signature(
                config, self._subspace.get(trial_id, {}))
            if error:  # remove from result cache
@ -309,17 +339,22 @@ class BlendSearch(Searcher):
                objective = result[self._ls.metric]
                if (objective - self._metric_target) * self._ls.metric_op < 0:
                    self._metric_target = objective
                    if self._ls.resource:
                        self._best_resource = config[self._ls.prune_attr]
                if thread_id:
                    if not self._metric_constraint_satisfied:
                        # no point has been found to satisfy metric constraint
                        self._expand_admissible_region(
                            self._ls_bound_min, self._ls_bound_max,
                            self._subspace.get(trial_id, self._ls.space))
-                    # if self._gs is not None and self._experimental:
+                    if self._gs is not None and self._experimental and (
-                    #     # TODO: recover when supported
+                       not self._ls.hierarchical):
-                    #     converted = convert_key(config, self._gs.space)
+                        self._gs.add_evaluated_point(
-                    #     logger.info(converted)
+                            flatten_dict(config), objective)
-                    #     self._gs.add_evaluated_point(converted, objective)
+                        # TODO: recover when supported
                        # converted = convert_key(config, self._gs.space)
                        # logger.info(converted)
                        # self._gs.add_evaluated_point(converted, objective)
                elif metric_constraint_satisfied and self._create_condition(
                        result):
                    # thread creator
@ -496,10 +531,12 @@ class BlendSearch(Searcher):
        '''
        if self._init_used and not self._points_to_evaluate:
            choice, backup = self._select_thread()
-            if choice < 0:  # timeout
+            # if choice < 0:  # timeout
-                return None
+            #     return None
            config = self._search_thread_pool[choice].suggest(trial_id)
-            if choice and config is None:
+            if not choice and config is not None and self._ls.resource:
                config[self._ls.prune_attr] = self.best_resource
            elif choice and config is None:
                # local search thread finishes
                if self._search_thread_pool[choice].converged:
                    self._expand_admissible_region(
@ -544,9 +581,6 @@ class BlendSearch(Searcher):
                    self._trial_proposed_by[trial_id] = backup
                    choice = backup
            if not choice:  # global search
                if self._ls._resource:
                    # TODO: min or median?
                    config[self._ls.prune_attr] = self._ls.min_resource
                # temporarily relax admissible region for parallel proposals
                self._update_admissible_region(
                    config, self._gs_admissible_min, self._gs_admissible_max,
@ -563,22 +597,35 @@ class BlendSearch(Searcher):
        else:  # use init config
            if self._candidate_start_points is not None and self._points_to_evaluate:
                self._candidate_start_points[trial_id] = None
-            init_config = self._points_to_evaluate.pop(
+            reward = None
-                0) if self._points_to_evaluate else self._ls.init_config
+            if self._points_to_evaluate:
                init_config = self._points_to_evaluate.pop(0)
                if self._evaluated_rewards:
                    reward = self._evaluated_rewards.pop(0)
            else:
                init_config = self._ls.init_config
            config, space = self._ls.complete_config(
                init_config, self._ls_bound_min, self._ls_bound_max)
-            config_signature = self._ls.config_signature(config, space)
+            if reward is None:
-            result = self._result.get(config_signature)
+                config_signature = self._ls.config_signature(config, space)
-            if result:  # tried before
+                result = self._result.get(config_signature)
-                return None
+                if result:  # tried before
-            elif result is None:  # not tried before
+                    return None
-                self._result[config_signature] = {}
+                elif result is None:  # not tried before
-            else:  # running but no result yet
+                    self._result[config_signature] = {}
-                return None
+                else:  # running but no result yet
                    return None
            self._init_used = True
            self._trial_proposed_by[trial_id] = 0
            self._search_thread_pool[0].running += 1
            self._subspace[trial_id] = space
            if reward is not None:
                result = {
                    self._metric: reward, self.cost_attr: 1,
                    'config': config
                }
                self.on_trial_complete(trial_id, result)
                return None
        return config
    def _should_skip(self, choice, trial_id, config, space) -> bool:
@ -694,79 +741,88 @@ except (ImportError, AssertionError):
 try:
    from nni.tuner import Tuner as NNITuner
    from nni.utils import extract_scalar_reward
    class BlendSearchTuner(BlendSearch, NNITuner):
        '''Tuner class for NNI
        '''
        def receive_trial_result(self, parameter_id, parameters, value,
                                 **kwargs):
            '''
            Receive trial's final result.
            parameter_id: int
            parameters: object created by 'generate_parameters()'
            value: final metrics of the trial, including default metric
            '''
            result = {}
            for key, value in parameters.items():
                result['config/' + key] = value
            reward = extract_scalar_reward(value)
            result[self._metric] = reward
            # if nni does not report training cost,
            # using sequence as an approximation.
            # if no sequence, using a constant 1
            result[self.cost_attr] = value.get(self.cost_attr, value.get(
                'sequence', 1))
            self.on_trial_complete(str(parameter_id), result)
        ...
        def generate_parameters(self, parameter_id, **kwargs) -> Dict:
            '''
            Returns a set of trial (hyper-)parameters, as a serializable object
            parameter_id: int
            '''
            return self.suggest(str(parameter_id))
        ...
        def update_search_space(self, search_space):
            '''
            Tuners are advised to support updating search space at run-time.
            If a tuner can only set search space once before generating first hyper-parameters,
            it should explicitly document this behaviour.
            search_space: JSON object created by experiment owner
            '''
            config = {}
            for key, value in search_space.items():
                v = value.get("_value")
                _type = value['_type']
                if _type == 'choice':
                    config[key] = choice(v)
                elif _type == 'randint':
                    config[key] = randint(v[0], v[1] - 1)
                elif _type == 'uniform':
                    config[key] = uniform(v[0], v[1])
                elif _type == 'quniform':
                    config[key] = quniform(v[0], v[1], v[2])
                elif _type == 'loguniform':
                    config[key] = loguniform(v[0], v[1])
                elif _type == 'qloguniform':
                    config[key] = qloguniform(v[0], v[1], v[2])
                elif _type == 'normal':
                    config[key] = randn(v[1], v[2])
                elif _type == 'qnormal':
                    config[key] = qrandn(v[1], v[2], v[3])
                else:
                    raise ValueError(
                        f'unsupported type in search_space {_type}')
            self._ls.set_search_properties(None, None, config)
            if self._gs is not None:
                self._gs.set_search_properties(None, None, config)
            self._init_search()
 except ImportError:
-    class BlendSearchTuner(BlendSearch):
+    class NNITuner:
        pass
    def extract_scalar_reward(x: Dict):
        return x.get('reward')
 class BlendSearchTuner(BlendSearch, NNITuner):
    '''Tuner class for NNI
    '''
    def receive_trial_result(self, parameter_id, parameters, value,
                             **kwargs):
        '''
        Receive trial's final result.
        parameter_id: int
        parameters: object created by 'generate_parameters()'
        value: final metrics of the trial, including default metric
        '''
        result = {}
        for k, v in parameters.items():
            result['config/' + k] = v
        reward = extract_scalar_reward(value)
        result[self._metric] = reward
        # if nni does not report training cost,
        # using sequence as an approximation.
        # if no sequence, using a constant 1
        result[self.cost_attr] = value.get(self.cost_attr, value.get(
            'sequence', 1))
        self.on_trial_complete(str(parameter_id), result)
    ...
    def generate_parameters(self, parameter_id, **kwargs) -> Dict:
        '''
        Returns a set of trial (hyper-)parameters, as a serializable object
        parameter_id: int
        '''
        return self.suggest(str(parameter_id))
    ...
    def update_search_space(self, search_space):
        '''
        Tuners are advised to support updating search space at run-time.
        If a tuner can only set search space once before generating first hyper-parameters,
        it should explicitly document this behaviour.
        search_space: JSON object created by experiment owner
        '''
        config = {}
        for key, value in search_space.items():
            v = value.get("_value")
            _type = value['_type']
            if _type == 'choice':
                config[key] = choice(v)
            elif _type == 'randint':
                config[key] = randint(*v)
            elif _type == 'uniform':
                config[key] = uniform(*v)
            elif _type == 'quniform':
                config[key] = quniform(*v)
            elif _type == 'loguniform':
                config[key] = loguniform(*v)
            elif _type == 'qloguniform':
                config[key] = qloguniform(*v)
            elif _type == 'normal':
                config[key] = randn(*v)
            elif _type == 'qnormal':
                config[key] = qrandn(*v)
            else:
                raise ValueError(
                    f'unsupported type in search_space {_type}')
        add_cost_to_space(config, {}, {})
        self._ls = self.LocalSearch(
            {}, self._ls.metric, self._mode, config, cost_attr=self.cost_attr,
            seed=self._ls.seed)
        if self._gs is not None:
            self._gs = GlobalSearch(
                space=config, metric=self._metric, mode=self._mode,
                sampler=self._gs._sampler)
            self._gs.space = config
        self._init_search()
 class CFO(BlendSearchTuner):
    ''' class for CFO algorithm
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@ -15,8 +15,9 @@ try:
    from ray.tune.utils.util import flatten_dict, unflatten_dict
 except (ImportError, AssertionError):
    from .suggestion import Searcher
-    from .variant_generator import generate_variants, flatten_dict, unflatten_dict
+    from .variant_generator import generate_variants
    from ..tune import sample
    from ..tune.trial import flatten_dict, unflatten_dict
 from ..tune.space import complete_config, denormalize, normalize
@ -95,7 +96,7 @@ class FLOW2(Searcher):
        self.space = space or {}
        self._space = flatten_dict(self.space, prevent_delimiter=True)
        self._random = np.random.RandomState(seed)
-        self._seed = seed
+        self.seed = seed
        self.init_config = init_config
        self.best_config = flatten_dict(init_config)
        self.prune_attr = prune_attr
@ -142,7 +143,7 @@ class FLOW2(Searcher):
                    self._bounded_keys.append(key)
        if not hier:
            self._space_keys = sorted(self._tunable_keys)
-        self._hierarchical = hier
+        self.hierarchical = hier
        if (self.prune_attr and self.prune_attr not in self._space
                and self.max_resource):
            self.min_resource = self.min_resource or self._min_resource()
@ -253,10 +254,10 @@ class FLOW2(Searcher):
            init_config, self.metric, self.mode,
            space, self.prune_attr,
            self.min_resource, self.max_resource,
-            self.resource_multiple_factor, self.cost_attr, self._seed + 1)
+            self.resource_multiple_factor, self.cost_attr, self.seed + 1)
        flow2.best_obj = obj * self.metric_op  # minimize internally
        flow2.cost_incumbent = cost
-        self._seed += 1
+        self.seed += 1
        return flow2
    def normalize(self, config, recursive=False) -> Dict:
@ -502,7 +503,7 @@ class FLOW2(Searcher):
        value_list = []
        # self._space_keys doesn't contain keys with const values,
        # e.g., "eval_metric": ["logloss", "error"].
-        keys = sorted(config.keys()) if self._hierarchical else self._space_keys
+        keys = sorted(config.keys()) if self.hierarchical else self._space_keys
        for key in keys:
            value = config[key]
            if key == self.prune_attr:
@ -510,7 +511,7 @@ class FLOW2(Searcher):
            else:
                # key must be in space
                domain = space[key]
-                if self._hierarchical:
+                if self.hierarchical:
                    # can't remove constant for hierarchical search space,
                    # e.g., learner
                    if not (domain is None or type(domain) in (str, int, float)
--- a/flaml/searcher/search_thread.py
+++ b/flaml/searcher/search_thread.py
@ -12,7 +12,7 @@ try:
 except (ImportError, AssertionError):
    from .suggestion import Searcher
 from .flow2 import FLOW2
-from ..tune.space import unflatten_hierarchical
+from ..tune.space import add_cost_to_space, unflatten_hierarchical
 import logging
 logger = logging.getLogger(__name__)
@ -46,6 +46,11 @@ class SearchThread:
        self.cost_attr = cost_attr
        if search_alg:
            self.space = self._space = search_alg.space  # unflattened space
            if self.space and not isinstance(search_alg, FLOW2) and isinstance(
                search_alg._space, dict
            ):
                # remember const config
                self._const = add_cost_to_space(self.space, {}, {})
    @classmethod
    def set_eps(cls, time_budget_s):
@ -59,7 +64,12 @@ class SearchThread:
        else:
            try:
                config = self._search_alg.suggest(trial_id)
-                config, self.space = unflatten_hierarchical(config, self._space)
+                if isinstance(self._search_alg._space, dict):
                    config.update(self._const)
                else:
                    # define by run
                    config, self.space = unflatten_hierarchical(
                        config, self._space)
            except FloatingPointError:
                logger.warning(
                    'The global search method raises FloatingPointError. '
--- a/flaml/searcher/suggestion.py
+++ b/flaml/searcher/suggestion.py
@ -91,15 +91,6 @@ class Searcher:
                 mode: Optional[str] = None,
                 max_concurrent: Optional[int] = None,
                 use_early_stopped_trials: Optional[bool] = None):
        if use_early_stopped_trials is False:
            raise DeprecationWarning(
                "Early stopped trials are now always used. If this is a "
                "problem, file an issue: https://github.com/ray-project/ray.")
        if max_concurrent is not None:
            logger.warning(
                "DeprecationWarning: `max_concurrent` is deprecated for this "
                "search algorithm. Use tune.suggest.ConcurrencyLimiter() "
                "instead. This will raise an error in future versions of Ray.")
        self._metric = metric
        self._mode = mode
@ -152,83 +143,6 @@ class Searcher:
        """
        pass
    def on_trial_complete(self,
                          trial_id: str,
                          result: Optional[Dict] = None,
                          error: bool = False):
        """Notification for the completion of trial.
        Typically, this method is used for notifying the underlying
        optimizer of the result.
        Args:
            trial_id (str): A unique string ID for the trial.
            result (dict): Dictionary of metrics for current training progress.
                Note that the result dict may include NaNs or
                may not include the optimization metric. It is up to the
                subclass implementation to preprocess the result to
                avoid breaking the optimization process. Upon errors, this
                may also be None.
            error (bool): True if the training process raised an error.
        """
        raise NotImplementedError
    def suggest(self, trial_id: str) -> Optional[Dict]:
        """Queries the algorithm to retrieve the next set of parameters.
        Arguments:
            trial_id (str): Trial ID used for subsequent notifications.
        Returns:
            dict | FINISHED | None: Configuration for a trial, if possible.
                If FINISHED is returned, Tune will be notified that
                no more suggestions/configurations will be provided.
                If None is returned, Tune will skip the querying of the
                searcher for this step.
        """
        raise NotImplementedError
    def save(self, checkpoint_path: str):
        """Save state to path for this search algorithm.
        Args:
            checkpoint_path (str): File where the search algorithm
                state is saved. This path should be used later when
                restoring from file.
        Example:
        .. code-block:: python
            search_alg = Searcher(...)
            analysis = tune.run(
                cost,
                num_samples=5,
                search_alg=search_alg,
                name=self.experiment_name,
                local_dir=self.tmpdir)
            search_alg.save("./my_favorite_path.pkl")
        .. versionchanged:: 0.8.7
            Save is automatically called by `tune.run`. You can use
            `restore_from_dir` to restore from an experiment directory
            such as `~/ray_results/trainable`.
        """
        raise NotImplementedError
    def restore(self, checkpoint_path: str):
        """Restore state for this search algorithm
        Args:
            checkpoint_path (str): File where the search algorithm
                state is saved. This path should be the same
                as the one provided to "save".
        Example:
        .. code-block:: python
            search_alg.save("./my_favorite_path.pkl")
            search_alg2 = Searcher(...)
            search_alg2 = ConcurrencyLimiter(search_alg2, 1)
            search_alg2.restore(checkpoint_path)
            tune.run(cost, num_samples=5, search_alg=search_alg2)
        """
        raise NotImplementedError
    def get_state(self) -> Dict:
        raise NotImplementedError
    def set_state(self, state: Dict):
        raise NotImplementedError
    @property
    def metric(self) -> str:
        """The training result objective value attribute."""
@ -536,14 +450,6 @@ class OptunaSearch(Searcher):
                # Flatten to support nested dicts
                space = flatten_dict(space, "/")
        # Deprecate: 1.5
        if isinstance(space, list):
            logger.warning(
                "Passing lists of `param.suggest_*()` calls to OptunaSearch "
                "as a search space is deprecated and will be removed in "
                "a future release of Ray. Please pass a dict mapping "
                "to `optuna.distributions` objects instead.")
        self._space = space
        self._points_to_evaluate = points_to_evaluate or []
--- a/flaml/searcher/variant_generator.py
+++ b/flaml/searcher/variant_generator.py
@ -19,57 +19,16 @@ Copyright (c) Microsoft Corporation.
 '''
 import copy
 import logging
-from collections.abc import Mapping
+from typing import Any, Dict, Generator, List, Tuple
 from typing import Any, Dict, Generator, List, Optional, Tuple
 import numpy
 import random
-from ..tune.sample import Categorical, Domain, Function
+from ..tune.sample import Categorical, Domain
 logger = logging.getLogger(__name__)
 def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
    dt = copy.deepcopy(dt)
    if prevent_delimiter and any(delimiter in key for key in dt):
        # Raise if delimiter is any of the keys
        raise ValueError(
            "Found delimiter `{}` in key when trying to flatten array."
            "Please avoid using the delimiter in your specification.")
    while any(isinstance(v, dict) for v in dt.values()):
        remove = []
        add = {}
        for key, value in dt.items():
            if isinstance(value, dict):
                for subkey, v in value.items():
                    if prevent_delimiter and delimiter in subkey:
                        # Raise  if delimiter is in any of the subkeys
                        raise ValueError(
                            "Found delimiter `{}` in key when trying to "
                            "flatten array. Please avoid using the delimiter "
                            "in your specification.")
                    add[delimiter.join([key, str(subkey)])] = v
                remove.append(key)
        dt.update(add)
        for k in remove:
            del dt[k]
    return dt
 def unflatten_dict(dt, delimiter="/"):
    """Unflatten dict. Does not support unflattening lists."""
    dict_type = type(dt)
    out = dict_type()
    for key, val in dt.items():
        path = key.split(delimiter)
        item = out
        for k in path[:-1]:
            item = item.setdefault(k, dict_type())
        item[path[-1]] = val
    return out
 class TuneError(Exception):
    """General error class raised by ray.tune."""
    pass
@ -84,16 +43,9 @@ def generate_variants(
        variants in combination:
            "activation": grid_search(["relu", "tanh"])
            "learning_rate": grid_search([1e-3, 1e-4, 1e-5])
        Lambda functions: These are evaluated to produce a concrete value, and
        can express dependencies or conditional distributions between values.
        They can also be used to express random search (e.g., by calling
        into the `random` or `np` module).
            "cpu": lambda spec: spec.config.num_workers
            "batch_size": lambda spec: random.uniform(1, 1000)
    Finally, to support defining specs in plain JSON / YAML, grid search
-    and lambda functions can also be defined alternatively as follows:
+    can also be defined alternatively as follows:
        "activation": {"grid_search": ["relu", "tanh"]}
        "cpu": {"eval": "spec.config.num_workers"}
    Use `format_vars` to format the returned dict of hyperparameters.
    Yields:
        (Dict of resolved variables, Spec object)
@ -242,10 +194,6 @@ def _try_resolve(v) -> Tuple[bool, Any]:
    if isinstance(v, Domain):
        # Domain to sample from
        return False, v
    elif isinstance(v, dict) and len(v) == 1 and "eval" in v:
        # Lambda function in eval syntax
        return False, Function(
            lambda spec: eval(v["eval"], _STANDARD_IMPORTS, {"spec": spec}))
    elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v:
        # Grid search values
        grid_values = v["grid_search"]
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@ -325,11 +325,6 @@ class Categorical(Domain):
        new.set_sampler(self._Uniform())
        return new
    def grid(self):
        new = copy(self)
        new.set_sampler(Grid())
        return new
    def __len__(self):
        return len(self.categories)
@ -344,55 +339,6 @@ class Categorical(Domain):
        return f"{self.categories}"
 class Function(Domain):
    class _CallSampler(BaseSampler):
        def sample(self,
                   domain: "Function",
                   spec: Optional[Union[List[Dict], Dict]] = None,
                   size: int = 1):
            if domain.pass_spec:
                items = [
                    domain.func(spec[i] if isinstance(spec, list) else spec)
                    for i in range(size)
                ]
            else:
                items = [domain.func() for i in range(size)]
            return items if len(items) > 1 else domain.cast(items[0])
    default_sampler_cls = _CallSampler
    def __init__(self, func: Callable):
        sig = signature(func)
        pass_spec = True  # whether we should pass `spec` when calling `func`
        try:
            sig.bind({})
        except TypeError:
            pass_spec = False
        if not pass_spec:
            try:
                sig.bind()
            except TypeError as exc:
                raise ValueError(
                    "The function passed to a `Function` parameter must be "
                    "callable with either 0 or 1 parameters.") from exc
        self.pass_spec = pass_spec
        self.func = func
    def is_function(self):
        return True
    def is_valid(self, value: Any):
        return True  # This is user-defined, so lets not assume anything
    @property
    def domain_str(self):
        return f"{self.func}()"
 class Quantized(Sampler):
    def __init__(self, sampler: Sampler, q: Union[float, int]):
        self.sampler = sampler
@ -439,22 +385,6 @@ class PolynomialExpansionSet:
        return "PolynomialExpansionSet"
 # TODO (krfricke): Remove tune.function
 def function(func):
    logger.warning(
        "DeprecationWarning: wrapping {} with tune.function() is no "
        "longer needed".format(func))
    return func
 def sample_from(func: Callable[[Dict], Any]):
    """Specify that tune should sample configuration values from this function.
    Arguments:
        func: An callable function to draw a sample from.
    """
    return Function(func)
 def uniform(lower: float, upper: float):
    """Sample a float value uniformly between ``lower`` and ``upper``.
    Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from
--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@ -90,30 +90,30 @@ def define_by_run_func(
    return config
-def convert_key(
+# def convert_key(
-    conf: Dict, space: Dict, path: str = ""
+#     conf: Dict, space: Dict, path: str = ""
-) -> Optional[Dict[str, Any]]:
+# ) -> Optional[Dict[str, Any]]:
-    """Convert config keys to define-by-run keys.
+#     """Convert config keys to define-by-run keys.
-    Returns:
+#     Returns:
-        A dict with converted keys.
+#         A dict with converted keys.
-    """
+#     """
-    config = {}
+#     config = {}
-    for key, domain in space.items():
+#     for key, domain in space.items():
-        value = conf[key]
+#         value = conf[key]
-        if path:
+#         if path:
-            key = path + '/' + key
+#             key = path + '/' + key
-        if isinstance(domain, dict):
+#         if isinstance(domain, dict):
-            config.update(convert_key(conf[key], domain, key))
+#             config.update(convert_key(conf[key], domain, key))
-        elif isinstance(domain, sample.Categorical):
+#         elif isinstance(domain, sample.Categorical):
-            index = indexof(domain, value)
+#             index = indexof(domain, value)
-            config[key + '_choice_'] = index
+#             config[key + '_choice_'] = index
-            if isinstance(value, dict):
+#             if isinstance(value, dict):
-                key += f":{index}"
+#                 key += f":{index}"
-                config.update(convert_key(value, domain.categories[index], key))
+#                 config.update(convert_key(value, domain.categories[index], key))
-        else:
+#         else:
-            config[key] = value
+#             config[key] = value
-    return config
+#     return config
 def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
@ -306,10 +306,8 @@ def normalize(
        elif str(sampler) == 'Normal':
            # N(mean, sd) -> N(0,1)
            config_norm[key] = (value - sampler.mean) / sampler.sd
-        else:
+        # else:
-            # TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
+        #     config_norm[key] = value
            # e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
            config_norm[key] = value
    return config_norm
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -13,6 +13,7 @@ try:
    from ray.tune.analysis import ExperimentAnalysis as EA
 except (ImportError, AssertionError):
    from .analysis import ExperimentAnalysis as EA
 from .result import DEFAULT_METRIC
 import logging
 logger = logging.getLogger(__name__)
@ -33,7 +34,7 @@ class ExperimentAnalysis(EA):
            super().__init__(self, None, trials, metric, mode)
        except (TypeError, ValueError):
            self.trials = trials
-            self.default_metric = metric or '_default_anonymous_metric'
+            self.default_metric = metric or DEFAULT_METRIC
            self.default_mode = mode
@ -82,7 +83,7 @@ def report(_metric=None, **kwargs):
        if _verbose == 2:
            logger.info(f"result: {kwargs}")
        if _metric:
-            result['_default_anonymous_metric'] = _metric
+            result[DEFAULT_METRIC] = _metric
        trial = _runner.running_trial
        if _running_trial == trial:
            _training_iteration += 1
@ -105,12 +106,13 @@ def report(_metric=None, **kwargs):
 def run(training_function,
        config: Optional[dict] = None,
        points_to_evaluate: Optional[List[dict]] = None,
        low_cost_partial_config: Optional[dict] = None,
        cat_hp_cost: Optional[dict] = None,
        metric: Optional[str] = None,
        mode: Optional[str] = None,
        time_budget_s: Union[int, float, datetime.timedelta] = None,
        points_to_evaluate: Optional[List[dict]] = None,
        evaluated_rewards: Optional[List] = None,
        prune_attr: Optional[str] = None,
        min_resource: Optional[float] = None,
        max_resource: Optional[float] = None,
@ -155,8 +157,6 @@ def run(training_function,
    Args:
        training_function: A user-defined training function.
        config: A dictionary to specify the search space.
        points_to_evaluate: A list of initial hyperparameter
            configurations to run first.
        low_cost_partial_config: A dictionary from a subset of
            controlled dimensions to the initial low-cost values.
            e.g.,
@ -179,6 +179,14 @@ def run(training_function,
        mode: A string in ['min', 'max'] to specify the objective as
            minimization or maximization.
        time_budget_s: A float of the time budget in seconds.
        points_to_evaluate: A list of initial hyperparameter
            configurations to run first.
        evaluated_rewards (list): If you have previously evaluated the
            parameters passed in as points_to_evaluate you can avoid
            re-running those trials by passing in the reward attributes
            as a list so the optimiser can be told the results without
            needing to re-compute the trial. Must be the same length as
            points_to_evaluate.
        prune_attr: A string of the attribute used for pruning.
            Not necessarily in space.
            When prune_attr is in space, it is a hyperparameter, e.g.,
@ -259,9 +267,10 @@ def run(training_function,
    if search_alg is None:
        from ..searcher.blendsearch import BlendSearch
        search_alg = BlendSearch(
-            metric=metric or '_default_anonymous_metric', mode=mode,
+            metric=metric or DEFAULT_METRIC, mode=mode,
            space=config,
            points_to_evaluate=points_to_evaluate,
            evaluated_rewards=evaluated_rewards,
            low_cost_partial_config=low_cost_partial_config,
            cat_hp_cost=cat_hp_cost,
            prune_attr=prune_attr,
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
@ -842,12 +842,12 @@
    "class MyRegularizedGreedyForest(SKLearnEstimator):\n",
    "\n",
    "\n",
-    "    def __init__(self, task='binary:logistic', n_jobs=1, **params):\n",
+    "    def __init__(self, task='binary', n_jobs=1, **params):\n",
    "        '''Constructor\n",
    "        \n",
    "        Args:\n",
    "            task: A string of the task type, one of\n",
-    "                'binary:logistic', 'multi:softmax', 'regression'\n",
+    "                'binary', 'multi', 'regression'\n",
    "            n_jobs: An integer of the number of parallel threads\n",
    "            params: A dictionary of the hyperparameter names and values\n",
    "        '''\n",
@ -855,7 +855,7 @@
    "        super().__init__(task, **params)\n",
    "\n",
    "        '''task=regression for RGFRegressor; \n",
-    "        binary:logistic and multiclass:softmax for RGFClassifier'''\n",
+    "        binary or multiclass for RGFClassifier'''\n",
    "        if 'regression' in task:\n",
    "            self.estimator_class = RGFRegressor\n",
    "        else:\n",
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -17,7 +17,7 @@ from flaml import tune
 class MyRegularizedGreedyForest(SKLearnEstimator):
-    def __init__(self, task='binary:logistic', n_jobs=1, max_leaf=4,
+    def __init__(self, task='binary', n_jobs=1, max_leaf=4,
                 n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
                 min_samples_leaf=1, **params):
@ -264,6 +264,7 @@ class TestAutoML(unittest.TestCase):
            "model_history": True,
            "sample_weight": np.ones(len(y)),
            "pred_time_limit": 1e-5,
            "ensemble": True,
        }
        automl_experiment.fit(**automl_settings)
        print(automl_experiment.classes_)
@ -382,23 +383,25 @@ class TestAutoML(unittest.TestCase):
    def test_roc_auc_ovr(self):
        automl_experiment = AutoML()
        X_train, y_train = load_iris(return_X_y=True)
        automl_settings = {
-            "time_budget": 2,
+            "time_budget": 1,
            "metric": "roc_auc_ovr",
            "task": "classification",
            "log_file_name": "test/roc_auc_ovr.log",
            "log_training_metric": True,
            "n_jobs": 1,
            "sample_weight": np.ones(len(y_train)),
            "eval_method": "holdout",
            "model_history": True
        }
        X_train, y_train = load_iris(return_X_y=True)
        automl_experiment.fit(
            X_train=X_train, y_train=y_train, **automl_settings)
    def test_roc_auc_ovo(self):
        automl_experiment = AutoML()
        automl_settings = {
-            "time_budget": 2,
+            "time_budget": 1,
            "metric": "roc_auc_ovo",
            "task": "classification",
            "log_file_name": "test/roc_auc_ovo.log",
@ -438,6 +441,11 @@ class TestAutoML(unittest.TestCase):
            log_file_name=automl_settings["log_file_name"],
            X_train=X_train, y_train=y_train,
            train_full=True, time_budget=1)
        automl_experiment.retrain_from_log(
            task="regression",
            log_file_name=automl_settings["log_file_name"],
            X_train=X_train, y_train=y_train,
            train_full=True, time_budget=0)
    def test_sparse_matrix_classification(self):
        automl_experiment = AutoML()
@ -565,13 +573,14 @@ class TestAutoML(unittest.TestCase):
        except ImportError:
            return
-    def test_parallel_xgboost_random(self):
+    def test_parallel_xgboost_others(self):
        # use random search as the hpo_method
        self.test_parallel_xgboost(hpo_method='random')
    def test_random_out_of_memory(self):
        automl_experiment = AutoML()
-        automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM)
+        automl_experiment.add_learner(
            learner_name='large_lgbm', learner_class=MyLargeLGBM)
        automl_settings = {
            "time_budget": 2,
            "metric": 'ap',
@ -620,13 +629,13 @@ class TestAutoML(unittest.TestCase):
        print(automl_experiment.best_iteration)
        print(automl_experiment.best_estimator)
-    def test_sparse_matrix_regression_cv(self):
+    def test_sparse_matrix_regression_holdout(self):
        X_train = scipy.sparse.random(8, 100)
        y_train = np.random.uniform(size=8)
        automl_experiment = AutoML()
        automl_settings = {
-            "time_budget": 2,
+            "time_budget": 1,
-            'eval_method': 'cv',
+            'eval_method': 'holdout',
            "task": 'regression',
            "log_file_name": "test/sparse_regression.log",
            "n_jobs": 1,
--- a/test/test_forecast.py
+++ b/test/test_forecast.py
@ -21,6 +21,7 @@ def test_forecast_automl(budget=5):
        "task": 'forecast',  # task type
        "log_file_name": 'CO2_forecast.log',  # flaml log file
        "eval_method": "holdout",
        "label": ('ds', 'y'),
    }
    '''The main flaml automl API'''
    try:
--- a/test/test_notebook_example.py
+++ b/test/test_notebook_example.py
@ -1,7 +1,7 @@
 from openml.exceptions import OpenMLServerException
-def test_automl(budget=5, dataset_format='dataframe'):
+def test_automl(budget=5, dataset_format='dataframe', hpo_method=None):
    from flaml.data import load_openml_dataset
    try:
        X_train, X_test, y_train, y_test = load_openml_dataset(
@ -18,6 +18,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
        "task": 'classification',  # task type
        "log_file_name": 'airlines_experiment.log',  # flaml log file
        "seed": 7654321,    # random seed
        'hpo_method': hpo_method
    }
    '''The main flaml automl API'''
    automl.fit(X_train=X_train, y_train=y_train, **settings)
@ -52,7 +53,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
 def test_automl_array():
-    test_automl(5, 'array')
+    test_automl(5, 'array', 'bs')
 def test_mlflow():
@ -81,8 +82,11 @@ def test_mlflow():
    mlflow.set_experiment("flaml")
    with mlflow.start_run():
        '''The main flaml automl API'''
-        automl.fit(X_train=X_train, y_train=y_train, **settings)
+        automl.fit(
            X_train=X_train, y_train=y_train, **settings)
    # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
    automl._mem_thres = 0
    print(automl.trainable(automl.points_to_evaluate[0]))
 if __name__ == "__main__":
--- a/test/test_python_log.py
+++ b/test/test_python_log.py
@ -41,6 +41,7 @@ class TestLogging(unittest.TestCase):
            }
            X_train, y_train = load_boston(return_X_y=True)
            n = len(y_train) >> 1
            print(automl.model, automl.classes_, automl.predict(X_train))
            automl.fit(X_train=X_train[:n], y_train=y_train[:n],
                       X_val=X_train[n:], y_val=y_train[n:],
                       **automl_settings)
@ -81,6 +82,8 @@ class TestLogging(unittest.TestCase):
                time_budget_s=1, num_samples=-1)
            print(min(trial.last_result["val_loss"]
                      for trial in analysis.trials))
            config = analysis.trials[-1].last_result['config']['ml']
            automl._state._train_with_config(config['learner'], config)
            # Check if the log buffer is populated.
            self.assertTrue(len(buf.getvalue()) > 0)
--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@ -16,9 +16,9 @@ class TestTrainingLog(unittest.TestCase):
            filename = os.path.join(d, path)
            # Run a simple job.
-            automl_experiment = AutoML()
+            automl = AutoML()
            automl_settings = {
-                "time_budget": 2,
+                "time_budget": 1,
                "metric": 'mse',
                "task": 'regression',
                "log_file_name": filename,
@ -29,10 +29,12 @@ class TestTrainingLog(unittest.TestCase):
                "train_time_limit": 0.01,
                "verbose": 3,
                "ensemble": True,
                "keep_search_state": True,
            }
            X_train, y_train = load_boston(return_X_y=True)
-            automl_experiment.fit(X_train=X_train, y_train=y_train,
+            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-                                  **automl_settings)
+            automl._state._train_with_config(
                automl.best_estimator, automl.best_config)
            # Check if the training log file is populated.
            self.assertTrue(os.path.exists(filename))
@ -44,8 +46,10 @@ class TestTrainingLog(unittest.TestCase):
                self.assertGreater(count, 0)
            automl_settings["log_file_name"] = None
-            automl_experiment.fit(X_train=X_train, y_train=y_train,
+            automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
-                                  **automl_settings)
+            automl._selected.update(None, 0)
            automl = AutoML()
            automl.fit(X_train=X_train, y_train=y_train, max_iter=0)
    def test_illfilename(self):
        try:
--- a/test/test_xgboost2d.py
+++ b/test/test_xgboost2d.py
@ -76,7 +76,7 @@ def test_simple(method=None):
    print(analysis.trials[-1])
-def _test_optuna():
+def test_optuna():
    test_simple(method="optuna")
--- a/test/tune/test_sample.py
+++ b/test/tune/test_sample.py
@ -0,0 +1,18 @@
 from flaml.tune.sample import (
    BaseSampler, PolynomialExpansionSet, Domain,
    uniform, quniform, choice, randint, qrandint, randn,
    qrandn, loguniform, qloguniform, lograndint, qlograndint)
 def test_sampler():
    print(randn().sample(size=2))
    print(PolynomialExpansionSet(), BaseSampler())
    print(qrandn(2, 10, 2).sample(size=2))
    c = choice([1, 2])
    print(c.domain_str, len(c), c.is_valid(3))
    i = randint(1, 10)
    print(i.domain_str, i.is_valid(10))
    d = Domain()
    print(d.domain_str, d.is_function())
    d.default_sampler_cls = BaseSampler
    print(d.get_sampler())
--- a/test/tune/test_searcher.py
+++ b/test/tune/test_searcher.py
@ -0,0 +1,126 @@
 from flaml.searcher.blendsearch import CFO
 import numpy as np
 try:
    from ray import __version__ as ray_version
    assert ray_version >= '1.0.0'
    from ray.tune import sample
 except (ImportError, AssertionError):
    from flaml.tune import sample
    from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter
    from flaml.searcher.blendsearch import BlendSearch
    def define_search_space(trial):
        trial.suggest_float("a", 6, 8)
        trial.suggest_float("b", 1e-4, 1e-2, log=True)
    def test_searcher():
        searcher = Searcher()
        searcher = Searcher(metric=['m1', 'm2'], mode=['max', 'min'])
        searcher.set_search_properties(None, None, None)
        searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {}
        searcher.on_trial_complete = lambda trial_id, result, error: None
        searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True)
        searcher.suggest("t1")
        searcher.suggest("t2")
        searcher.on_pause("t1")
        searcher.on_unpause("t1")
        searcher.suggest("t3")
        searcher.on_trial_complete("t1", {})
        searcher.on_trial_complete("t2", {})
        searcher.set_state({})
        print(searcher.get_state())
        import optuna
        config = {
            "a": optuna.distributions.UniformDistribution(6, 8),
            "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2),
        }
        searcher = OptunaSearch(
            config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
            evaluated_rewards=[{'m': 2}], metric='m', mode='max'
        )
        config = {
            "a": sample.uniform(6, 8),
            "b": sample.loguniform(1e-4, 1e-2)
        }
        searcher = OptunaSearch(
            config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
            evaluated_rewards=[{'m': 2}], metric='m', mode='max'
        )
        searcher = OptunaSearch(
            define_search_space, points_to_evaluate=[{"a": 6, "b": 1e-3}],
            # evaluated_rewards=[{'m': 2}], metric='m', mode='max'
            mode='max'
        )
        searcher = OptunaSearch()
        # searcher.set_search_properties('m', 'min', define_search_space)
        searcher.set_search_properties('m', 'min', config)
        searcher.suggest('t1')
        searcher.on_trial_complete('t1', None, False)
        searcher.suggest('t2')
        searcher.on_trial_complete('t2', None, True)
        searcher.suggest('t3')
        searcher.on_trial_complete('t3', {'m': np.nan})
        searcher.save('test/tune/optuna.pickle')
        searcher.restore('test/tune/optuna.pickle')
        searcher = BlendSearch(
            metric="m",
            global_search_alg=searcher, metric_constraints=[("c", "<", 1)])
        searcher.set_search_properties(metric="m2", config=config)
        searcher.set_search_properties(config={"time_budget_s": 0})
        c = searcher.suggest('t1')
        searcher.on_trial_complete("t1", {"config": c}, True)
        c = searcher.suggest('t2')
        searcher.on_trial_complete(
            "t2", {"config": c, "m2": 1, "c": 2, "time_total_s": 1})
        config1 = config.copy()
        config1['_choice_'] = 0
        searcher._expand_admissible_region(
            lower={'root': [{'a': 0.5}, {'a': 0.4}]},
            upper={'root': [{'a': 0.9}, {'a': 0.8}]},
            space={'root': config1},
        )
        searcher = CFO(
            metric='m', mode='min', space=config,
            points_to_evaluate=[{'a': 7, 'b': 1e-3}, {'a': 6, 'b': 3e-4}],
            evaluated_rewards=[1, 1])
        searcher.suggest("t1")
        searcher.suggest("t2")
        searcher.on_trial_result('t3', {})
        c = searcher.generate_parameters(1)
        searcher.receive_trial_result(1, c, {'reward': 0})
        searcher.update_search_space(
            {
                "a": {
                    "_value": [1, 2],
                    "_type": "choice",
                },
                "b": {
                    "_value": [1, 3],
                    "_type": "randint",
                },
                "c": {
                    "_value": [.1, 3],
                    "_type": "uniform",
                },
                "d": {
                    "_value": [2, 8, 2],
                    "_type": "quniform",
                },
                "e": {
                    "_value": [2, 8],
                    "_type": "loguniform",
                },
                "f": {
                    "_value": [2, 8, 2],
                    "_type": "qloguniform",
                },
                "g": {
                    "_value": [0, 2],
                    "_type": "normal",
                },
                "h": {
                    "_value": [0, 2, 2],
                    "_type": "qnormal",
                },
            }
        )
--- a/test/tune/test_tune.py
+++ b/test/tune/test_tune.py
@ -15,7 +15,7 @@ import xgboost as xgb
 import logging
 logger = logging.getLogger(__name__)
 os.makedirs('logs', exist_ok=True)
-logger.addHandler(logging.FileHandler('logs/tune_xgboost.log'))
+logger.addHandler(logging.FileHandler('logs/tune.log'))
 logger.setLevel(logging.INFO)
@ -223,12 +223,22 @@ def test_nested():
    logger.info(f"BlendSearch exp best config: {best_trial.config}")
    logger.info(f"BlendSearch exp best result: {best_trial.last_result}")
    points_to_evaluate = [
        {"b": .99, "cost_related": {"a": 3}},
        {"b": .99, "cost_related": {"a": 2}},
    ]
    analysis = tune.run(
        simple_func,
        config=search_space,
        low_cost_partial_config={
            "cost_related": {"a": 1}
        },
        points_to_evaluate=points_to_evaluate,
        evaluated_rewards=[
            (config["cost_related"]["a"] - 4)**2
            + (config["b"] - config["cost_related"]["a"])**2
            for config in points_to_evaluate
        ],
        metric="obj",
        mode="min",
        metric_constraints=[("ab", "<=", 4)],
`@ -1,2 +1,2 @@`
	`from .trial_scheduler import TrialScheduler, FIFOScheduler`	`from .trial_scheduler import TrialScheduler`
	`from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler`	`from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler`