From 92b79221b67cad7e7d4f7fc7c622d3f58c513a7e Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Tue, 6 Dec 2022 10:13:39 -0800
Subject: [PATCH] make performance test reproducible (#837)

* make performance test reproducible

* fix test error

* Doc update and disable logging

* document random_state and version

* remove hardcoded budget

* fix test error and dependency; close #777

* iloc
---
 flaml/automl.py                               | 101 +++++++++++++-----
 flaml/ml.py                                   |  17 ++-
 flaml/model.py                                |  88 ++++++++-------
 flaml/tune/searcher/blendsearch.py            |   3 +-
 flaml/version.py                              |   2 +-
 notebook/automl_classification.ipynb          |  12 +--
 setup.py                                      |   1 +
 test/automl/test_multiclass.py                |   1 +
 test/automl/test_notebook_example.py          |  12 ++-
 test/automl/test_training_log.py              |   2 +-
 website/docs/Examples/AutoML-for-LightGBM.md  |   9 ++
 website/docs/Examples/AutoML-for-XGBoost.md   |   9 ++
 .../Integrate - Scikit-learn Pipeline.md      |   2 +
 .../Examples/Tune-Lexicographic-objectives.md |   2 +-
 .../docs/Use-Cases/Task-Oriented-AutoML.md    |  15 +--
 15 files changed, 185 insertions(+), 91 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index cff46230a..664e712cf 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -119,8 +119,9 @@ class SearchState:
         period=None,
         custom_hp=None,
         max_iter=None,
+        budget=None,
     ):
-        self.init_eci = learner_class.cost_relative2lgbm()
+        self.init_eci = learner_class.cost_relative2lgbm() if budget >= 0 else 1
         self._search_space_domain = {}
         self.init_config = None
         self.low_cost_partial_config = {}
@@ -128,6 +129,7 @@ class SearchState:
         self.data_size = data_size
         self.ls_ever_converged = False
         self.learner_class = learner_class
+        self._budget = budget
         if task in TS_FORECAST:
             search_space = learner_class.search_space(
                 data_size=data_size, task=task, pred_horizon=period
@@ -240,7 +242,7 @@ class SearchState:
             obj, time2eval, trained_estimator = np.inf, 0.0, None
             metric_for_logging = config = None
         self.trial_time = time2eval
-        self.total_time_used += time_used
+        self.total_time_used += time_used if self._budget >= 0 else 1
         self.total_iter += 1
 
         if self.base_eci is None:
@@ -291,14 +293,25 @@ class AutoMLState:
                 sampled_X_train = self.X_train.iloc[:sample_size]
             else:
                 sampled_X_train = self.X_train[:sample_size]
-            sampled_y_train = self.y_train[:sample_size]
+            if isinstance(self.y_train, pd.Series):
+                sampled_y_train = self.y_train.iloc[:sample_size]
+            else:
+                sampled_y_train = self.y_train[:sample_size]
             weight = self.fit_kwargs.get(
                 "sample_weight"
             )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator
             if weight is not None:
-                sampled_weight = weight[:sample_size]
+                sampled_weight = (
+                    weight.iloc[:sample_size]
+                    if isinstance(weight, pd.Series)
+                    else weight[:sample_size]
+                )
             if self.groups is not None:
-                groups = self.groups[:sample_size]
+                groups = (
+                    self.groups.iloc[:sample_size]
+                    if isinstance(self.groups, pd.Series)
+                    else self.groups[:sample_size]
+                )
         else:
             sampled_X_train = self.X_train_all
             sampled_y_train = self.y_train_all
@@ -336,7 +349,7 @@ class AutoMLState:
             del config["FLAML_sample_size"]
         budget = (
             None
-            if state.time_budget is None
+            if state.time_budget < 0
             else state.time_budget - state.time_from_start
             if sample_size == state.data_size[0]
             else (state.time_budget - state.time_from_start)
@@ -360,7 +373,7 @@ class AutoMLState:
             state.groups_val,
             state.train_time_limit
             if budget is None
-            else min(budget, state.train_time_limit),
+            else min(budget, state.train_time_limit or np.inf),
             state.kf,
             config,
             state.task,
@@ -373,6 +386,7 @@ class AutoMLState:
             state.cv_score_agg_func,
             state.log_training_metric,
             this_estimator_kwargs,
+            state.free_mem_ratio,
         )
         if state.retrain_final and not state.model_history:
             trained_estimator.cleanup()
@@ -432,9 +446,7 @@ class AutoMLState:
             ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
 
         budget = (
-            None
-            if self.time_budget is None
-            else self.time_budget - self.time_from_start
+            None if self.time_budget < 0 else self.time_budget - self.time_from_start
         )
 
         estimator, train_time = train_estimator(
@@ -448,6 +460,7 @@ class AutoMLState:
             budget=budget,
             fit_kwargs=this_estimator_kwargs,  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator
             eval_metric=self.metric if hasattr(self, "metric") else "train_time",
+            free_mem_ratio=self.free_mem_ratio,
         )
 
         if sampled_weight is not None:
@@ -648,6 +661,7 @@ class AutoML(BaseEstimator):
                 datasets, but will incur more overhead in time.
                 If dict: the dict contains the keywords arguments to be passed to
                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
+            free_mem_ratio: float between 0 and 1, default=0. The free memory ratio to keep during training.
             metric_constraints: list, default=[] | The list of metric constraints.
                 Each element in this list is a 3-tuple, which shall be expressed
                 in the following format: the first element of the 3-tuple is the name of the
@@ -724,7 +738,7 @@ class AutoML(BaseEstimator):
         settings["log_training_metric"] = settings.get("log_training_metric", False)
         settings["mem_thres"] = settings.get("mem_thres", MEM_THRES)
         settings["pred_time_limit"] = settings.get("pred_time_limit", np.inf)
-        settings["train_time_limit"] = settings.get("train_time_limit", np.inf)
+        settings["train_time_limit"] = settings.get("train_time_limit", None)
         settings["verbose"] = settings.get("verbose", 3)
         settings["retrain_full"] = settings.get("retrain_full", True)
         settings["split_type"] = settings.get("split_type", "auto")
@@ -738,6 +752,7 @@ class AutoML(BaseEstimator):
         settings["append_log"] = settings.get("append_log", False)
         settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN)
         settings["use_ray"] = settings.get("use_ray", False)
+        settings["free_mem_ratio"] = settings.get("free_mem_ratio", 0)
         settings["metric_constraints"] = settings.get("metric_constraints", [])
         settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None)
         settings["fit_kwargs_by_estimator"] = settings.get(
@@ -1271,6 +1286,8 @@ class AutoML(BaseEstimator):
                 ] = (
                     self._state.sample_weight_all
                 )  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
+                if isinstance(self._state.sample_weight_all, pd.Series):
+                    self._state.sample_weight_all.reset_index(drop=True, inplace=True)
             else:
                 X_train_all, y_train_all = shuffle(
                     X_train_all, y_train_all, random_state=RANDOM_SEED
@@ -1394,6 +1411,7 @@ class AutoML(BaseEstimator):
                             rest
                         ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator
                         test_size=split_ratio,
+                        stratify=stratify,
                         random_state=RANDOM_SEED,
                     )
                     weight1 = self._state.fit_kwargs["sample_weight"][
@@ -1796,7 +1814,8 @@ class AutoML(BaseEstimator):
         self.modelcount = 0
         self._auto_augment = auto_augment
         self._prepare_data(eval_method, split_ratio, n_splits)
-        self._state.time_budget = None
+        self._state.time_budget = -1
+        self._state.free_mem_ratio = 0
         self._state.n_jobs = n_jobs
         import os
 
@@ -1885,7 +1904,7 @@ class AutoML(BaseEstimator):
             return eval_method
         nrow, dim = self._nrow, self._ndim
         if (
-            time_budget is None
+            time_budget < 0
             or nrow * dim / 0.9 < SMALL_LARGE_THRES * (time_budget / 3600)
             and nrow < CV_HOLDOUT_THRESHOLD
         ):
@@ -2145,6 +2164,7 @@ class AutoML(BaseEstimator):
         auto_augment=None,
         min_sample_size=None,
         use_ray=None,
+        free_mem_ratio=0,
         metric_constraints=None,
         custom_hp=None,
         cv_score_agg_func=None,
@@ -2250,7 +2270,7 @@ class AutoML(BaseEstimator):
             mem_thres: A float of the memory size constraint in bytes.
             pred_time_limit: A float of the prediction latency constraint in seconds.
                 It refers to the average prediction time per row in validation data.
-            train_time_limit: A float of the training time constraint in seconds.
+            train_time_limit: None or a float of the training time constraint in seconds.
             X_val: None or a numpy array or a pandas dataframe of validation data.
             y_val: None or a numpy array or a pandas series of validation labels.
             sample_weight_val: None or a numpy array of the sample weight of
@@ -2337,6 +2357,7 @@ class AutoML(BaseEstimator):
                 datasets, but will incur more overhead in time.
                 If dict: the dict contains the keywords arguments to be passed to
                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html).
+            free_mem_ratio: float between 0 and 1, default=0. The free memory ratio to keep during training.
             metric_constraints: list, default=[] | The list of metric constraints.
                 Each element in this list is a 3-tuple, which shall be expressed
                 in the following format: the first element of the 3-tuple is the name of the
@@ -2523,7 +2544,7 @@ class AutoML(BaseEstimator):
             self._settings.get("early_stop") if early_stop is None else early_stop
         )
         # no search budget is provided?
-        no_budget = time_budget == -1 and max_iter is None and not early_stop
+        no_budget = time_budget < 0 and max_iter is None and not early_stop
         append_log = (
             self._settings.get("append_log") if append_log is None else append_log
         )
@@ -2562,7 +2583,11 @@ class AutoML(BaseEstimator):
                 X_train = ray.get(X_train)
             elif isinstance(dataframe, ray.ObjectRef):
                 dataframe = ray.get(dataframe)
-
+        self._state.free_mem_ratio = (
+            self._settings.get("free_mem_ratio")
+            if free_mem_ratio is None
+            else free_mem_ratio
+        )
         self._state.task = task
         self._state.log_training_metric = log_training_metric
 
@@ -2835,8 +2860,8 @@ class AutoML(BaseEstimator):
             except FileNotFoundError:
                 pass
 
+        self._state.time_budget = time_budget
         starting_points = {} if starting_points == "static" else starting_points
-
         for estimator_name in estimator_list:
             estimator_class = self._state.learner_classes[estimator_name]
             estimator_class.init()
@@ -2869,10 +2894,10 @@ class AutoML(BaseEstimator):
                 max_iter=max_iter / len(estimator_list)
                 if self._learner_selector == "roundrobin"
                 else max_iter,
+                budget=self._state.time_budget,
             )
         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list))
         self.estimator_list = estimator_list
-        self._state.time_budget = time_budget if time_budget > 0 else 1e10
         self._active_estimators = estimator_list.copy()
         self._ensemble = ensemble
         self._max_iter = max_iter
@@ -2907,6 +2932,7 @@ class AutoML(BaseEstimator):
             )
             if (
                 self._hpo_method in ("cfo", "bs")
+                and self._state.time_budget > 0
                 and (self._time_taken_best_iter >= self._state.time_budget * 0.7)
                 and not all(
                     state.search_alg and state.search_alg.searcher.is_ls_ever_converged
@@ -2973,7 +2999,11 @@ class AutoML(BaseEstimator):
             )
         space = self.search_space
         self._state.time_from_start = time.time() - self._start_time_flag
-        time_left = self._state.time_budget - self._state.time_from_start
+        time_budget_s = (
+            self._state.time_budget - self._state.time_from_start
+            if self._state.time_budget >= 0
+            else None
+        )
         if self._hpo_method != "optuna":
             min_resource = self.min_resource
             if isinstance(min_resource, dict):
@@ -2999,7 +3029,8 @@ class AutoML(BaseEstimator):
                 ],
                 metric_constraints=self.metric_constraints,
                 seed=self._seed,
-                time_budget_s=time_left,
+                time_budget_s=time_budget_s,
+                num_samples=self._max_iter,
                 allow_empty_config=True,
             )
         else:
@@ -3032,7 +3063,7 @@ class AutoML(BaseEstimator):
             metric="val_loss",
             mode="min",
             resources_per_trial=resources_per_trial,
-            time_budget_s=self._state.time_budget,
+            time_budget_s=time_budget_s,
             num_samples=self._max_iter,
             verbose=max(self.verbose - 2, 0),
             raise_on_failed_trial=False,
@@ -3217,6 +3248,11 @@ class AutoML(BaseEstimator):
                     points_to_evaluate = search_state.init_config.copy()
 
                     low_cost_partial_config = search_state.low_cost_partial_config
+                time_budget_s = (
+                    min(budget_left, self._state.train_time_limit or np.inf)
+                    if self._state.time_budget >= 0
+                    else None
+                )
                 if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"):
                     algo = SearchAlgo(
                         metric="val_loss",
@@ -3234,6 +3270,8 @@ class AutoML(BaseEstimator):
                         metric_constraints=self.metric_constraints,
                         seed=self._seed,
                         allow_empty_config=True,
+                        time_budget_s=time_budget_s,
+                        num_samples=self._max_iter,
                     )
                 else:
                     # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match
@@ -3272,7 +3310,7 @@ class AutoML(BaseEstimator):
             analysis = tune.run(
                 search_state.training_function,
                 search_alg=search_state.search_alg,
-                time_budget_s=min(budget_left, self._state.train_time_limit),
+                time_budget_s=time_budget_s,
                 verbose=max(self.verbose - 3, 0),
                 use_ray=False,
             )
@@ -3408,7 +3446,7 @@ class AutoML(BaseEstimator):
                 est_retrain_time = 0
             self._state.time_from_start = time.time() - self._start_time_flag
             if (
-                self._state.time_from_start >= self._state.time_budget
+                self._state.time_from_start >= self._state.time_budget >= 0
                 or not self._active_estimators
             ):
                 break
@@ -3581,17 +3619,18 @@ class AutoML(BaseEstimator):
             elif self._state.retrain_final:
                 # reset time budget for retraining
                 if self._max_iter > 1:
-                    self._state.time_from_start -= self._state.time_budget
+                    self._state.time_budget = -1
                 if (
                     self._state.task in TS_FORECAST
                     or self._trained_estimator is None
                     or self._trained_estimator.model is None
                     or (
-                        self._state.time_budget - self._state.time_from_start
+                        self._state.time_budget < 0
+                        or self._state.time_budget - self._state.time_from_start
                         > self._selected.est_retrain_time(self.data_size_full)
-                        and self._selected.best_config_sample_size
-                        == self._state.data_size[0]
                     )
+                    and self._selected.best_config_sample_size
+                    == self._state.data_size[0]
                 ):
                     state = self._search_states[self._best_estimator]
                     (
@@ -3638,7 +3677,8 @@ class AutoML(BaseEstimator):
             ):  # sample_size=None meaning no result
                 search_state = self._search_states[estimator]
                 if (
-                    self._search_states[estimator].time2eval_best
+                    self._state.time_budget >= 0
+                    and self._search_states[estimator].time2eval_best
                     > self._state.time_budget - self._state.time_from_start
                     or self._iter_per_learner_fullsize[estimator]
                     >= self._max_iter_per_learner
@@ -3646,7 +3686,10 @@ class AutoML(BaseEstimator):
                     inv.append(0)
                     continue
                 estimated_cost = search_state.estimated_cost4improvement
-                if search_state.sample_size < self._state.data_size[0]:
+                if (
+                    search_state.sample_size < self._state.data_size[0]
+                    and self._state.time_budget >= 0
+                ):
                     estimated_cost = min(
                         estimated_cost,
                         search_state.time2eval_best
diff --git a/flaml/ml.py b/flaml/ml.py
index 31825308f..ea7edfbd5 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -432,6 +432,7 @@ def get_val_loss(
     budget=None,
     log_training_metric=False,
     fit_kwargs={},
+    free_mem_ratio=0,
 ):
 
     start = time.time()
@@ -439,7 +440,7 @@ def get_val_loss(
     #     fit_kwargs['groups_val'] = groups_val
     #     fit_kwargs['X_val'] = X_val
     #     fit_kwargs['y_val'] = y_val
-    estimator.fit(X_train, y_train, budget, **fit_kwargs)
+    estimator.fit(X_train, y_train, budget, free_mem_ratio, **fit_kwargs)
     val_loss, metric_for_logging, pred_time, _ = _eval_estimator(
         config,
         estimator,
@@ -494,6 +495,7 @@ def evaluate_model_CV(
     cv_score_agg_func=None,
     log_training_metric=False,
     fit_kwargs={},
+    free_mem_ratio=0,
 ):
     if cv_score_agg_func is None:
         cv_score_agg_func = default_cv_score_agg_func
@@ -524,7 +526,7 @@ def evaluate_model_CV(
     else:
         kf = kf.split(X_train_split)
     rng = np.random.RandomState(2020)
-    budget_per_train = budget / n
+    budget_per_train = budget and budget / n
     if "sample_weight" in fit_kwargs:
         weight = fit_kwargs["sample_weight"]
         weight_val = None
@@ -565,6 +567,7 @@ def evaluate_model_CV(
             budget_per_train,
             log_training_metric=log_training_metric,
             fit_kwargs=fit_kwargs,
+            free_mem_ratio=free_mem_ratio,
         )
         if isinstance(metric_i, dict) and "intermediate_results" in metric_i.keys():
             del metric_i["intermediate_results"]
@@ -575,7 +578,7 @@ def evaluate_model_CV(
         log_metric_folds.append(metric_i)
         train_time += train_time_i
         pred_time += pred_time_i
-        if time.time() - start_time >= budget:
+        if budget and time.time() - start_time >= budget:
             break
     val_loss, metric = cv_score_agg_func(val_loss_folds, log_metric_folds)
     n = total_fold_num
@@ -603,6 +606,7 @@ def compute_estimator(
     cv_score_agg_func=None,
     log_training_metric=False,
     fit_kwargs={},
+    free_mem_ratio=0,
 ):
     estimator_class = estimator_class or get_estimator_class(task, estimator_name)
     estimator = estimator_class(
@@ -635,6 +639,7 @@ def compute_estimator(
             budget=budget,
             log_training_metric=log_training_metric,
             fit_kwargs=fit_kwargs,
+            free_mem_ratio=0,
         )
     else:
         val_loss, metric_for_logging, train_time, pred_time = evaluate_model_CV(
@@ -650,6 +655,7 @@ def compute_estimator(
             cv_score_agg_func,
             log_training_metric=log_training_metric,
             fit_kwargs=fit_kwargs,
+            free_mem_ratio=0,
         )
 
     if isinstance(estimator, TransformersEstimator):
@@ -669,6 +675,7 @@ def train_estimator(
     budget=None,
     fit_kwargs={},
     eval_metric=None,
+    free_mem_ratio=0,
 ):
     start_time = time.time()
     estimator_class = estimator_class or get_estimator_class(task, estimator_name)
@@ -681,7 +688,9 @@ def train_estimator(
         fit_kwargs["metric"] = eval_metric
 
     if X_train is not None:
-        train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
+        train_time = estimator.fit(
+            X_train, y_train, budget, free_mem_ratio, **fit_kwargs
+        )
     else:
         estimator = estimator.estimator_class(**estimator.params)
     train_time = time.time() - start_time
diff --git a/flaml/model.py b/flaml/model.py
index 76331084e..5fb2d723e 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -44,7 +44,7 @@ except ImportError:
     resource = None
 
 logger = logging.getLogger("flaml.automl")
-FREE_MEM_RATIO = 0.2
+# FREE_MEM_RATIO = 0.2
 
 
 def TimeoutHandler(sig, frame):
@@ -201,13 +201,14 @@ class BaseEstimator:
         self._model = model
         return train_time
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         """Train the model from given training data.
 
         Args:
             X_train: A numpy array or a dataframe of training data in shape n*m.
             y_train: A numpy array or a series of labels in shape n*1.
             budget: A float of the time budget in seconds.
+            free_mem_ratio: A float between 0 and 1 for the free memory ratio to keep during training.
 
         Returns:
             train_time: A float of the training time in seconds.
@@ -221,7 +222,7 @@ class BaseEstimator:
             mem = psutil.virtual_memory() if psutil is not None else None
             try:
                 with limit_resource(
-                    mem.available * (1 - FREE_MEM_RATIO)
+                    mem.available * (1 - free_mem_ratio)
                     + psutil.Process(os.getpid()).memory_info().rss
                     if mem is not None
                     else -1,
@@ -596,6 +597,7 @@ class TransformersEstimator(BaseEstimator):
         X_train: DataFrame,
         y_train: Series,
         budget=None,
+        free_mem_ratio=0,
         X_val=None,
         y_val=None,
         gpu_per_trial=None,
@@ -1036,7 +1038,7 @@ class LGBMEstimator(BaseEstimator):
         self._time_per_iter = None
         self._train_size = 0
         self._mem_per_iter = -1
-        self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0) is not None
+        self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0, 0) is not None
 
     def _preprocess(self, X):
         if (
@@ -1054,7 +1056,7 @@ class LGBMEstimator(BaseEstimator):
             X = X.to_numpy()
         return X
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         start_time = time.time()
         deadline = start_time + budget if budget else np.inf
         n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER)
@@ -1118,7 +1120,7 @@ class LGBMEstimator(BaseEstimator):
                     )
                     if budget is not None
                     else n_iter,
-                    int((1 - FREE_MEM_RATIO) * mem0 / self._mem_per_iter)
+                    int((1 - free_mem_ratio) * mem0 / self._mem_per_iter)
                     if psutil is not None and self._mem_per_iter > 0
                     else n_iter,
                 )
@@ -1129,10 +1131,12 @@ class LGBMEstimator(BaseEstimator):
         if self.HAS_CALLBACK:
             kwargs_callbacks = kwargs.get("callbacks")
             if kwargs_callbacks:
-                callbacks = kwargs_callbacks + self._callbacks(start_time, deadline)
+                callbacks = kwargs_callbacks + self._callbacks(
+                    start_time, deadline, free_mem_ratio
+                )
                 kwargs.pop("callbacks")
             else:
-                callbacks = self._callbacks(start_time, deadline)
+                callbacks = self._callbacks(start_time, deadline, free_mem_ratio)
             if isinstance(self, XGBoostSklearnEstimator):
                 from xgboost import __version__
 
@@ -1162,10 +1166,10 @@ class LGBMEstimator(BaseEstimator):
         train_time = time.time() - start_time
         return train_time
 
-    def _callbacks(self, start_time, deadline) -> List[Callable]:
-        return [partial(self._callback, start_time, deadline)]
+    def _callbacks(self, start_time, deadline, free_mem_ratio) -> List[Callable]:
+        return [partial(self._callback, start_time, deadline, free_mem_ratio)]
 
-    def _callback(self, start_time, deadline, env) -> None:
+    def _callback(self, start_time, deadline, free_mem_ratio, env) -> None:
         from lightgbm.callback import EarlyStopException
 
         now = time.time()
@@ -1175,7 +1179,7 @@ class LGBMEstimator(BaseEstimator):
             raise EarlyStopException(env.iteration, env.evaluation_result_list)
         if psutil is not None:
             mem = psutil.virtual_memory()
-            if mem.available / mem.total < FREE_MEM_RATIO:
+            if mem.available / mem.total < free_mem_ratio:
                 raise EarlyStopException(env.iteration, env.evaluation_result_list)
 
 
@@ -1260,7 +1264,7 @@ class XGBoostEstimator(SKLearnEstimator):
         super().__init__(task, **config)
         self.params["verbosity"] = 0
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         import xgboost as xgb
 
         start_time = time.time()
@@ -1284,7 +1288,7 @@ class XGBoostEstimator(SKLearnEstimator):
             if "objective" in self.params:
                 del self.params["objective"]
         _n_estimators = self.params.pop("n_estimators")
-        callbacks = XGBoostEstimator._callbacks(start_time, deadline)
+        callbacks = XGBoostEstimator._callbacks(start_time, deadline, free_mem_ratio)
         if callbacks:
             self._model = xgb.train(
                 self.params,
@@ -1311,7 +1315,7 @@ class XGBoostEstimator(SKLearnEstimator):
         return super().predict(dtest, **kwargs)
 
     @classmethod
-    def _callbacks(cls, start_time, deadline):
+    def _callbacks(cls, start_time, deadline, free_mem_ratio):
         try:
             from xgboost.callback import TrainingCallback
         except ImportError:  # for xgboost<1.3
@@ -1326,7 +1330,7 @@ class XGBoostEstimator(SKLearnEstimator):
                     return True
                 if psutil is not None:
                     mem = psutil.virtual_memory()
-                    if mem.available / mem.total < FREE_MEM_RATIO:
+                    if mem.available / mem.total < free_mem_ratio:
                         return True
                 return False
 
@@ -1374,17 +1378,17 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
             self.estimator_class = xgb.XGBClassifier
         self._xgb_version = xgb.__version__
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         if issparse(X_train) and self._xgb_version < "1.6.0":
             # "auto" fails for sparse input since xgboost 1.6.0
             self.params["tree_method"] = "auto"
         if kwargs.get("gpu_per_trial"):
             self.params["tree_method"] = "gpu_hist"
             kwargs.pop("gpu_per_trial")
-        return super().fit(X_train, y_train, budget, **kwargs)
+        return super().fit(X_train, y_train, budget, free_mem_ratio, **kwargs)
 
-    def _callbacks(self, start_time, deadline) -> List[Callable]:
-        return XGBoostEstimator._callbacks(start_time, deadline)
+    def _callbacks(self, start_time, deadline, free_mem_ratio) -> List[Callable]:
+        return XGBoostEstimator._callbacks(start_time, deadline, free_mem_ratio)
 
 
 class XGBoostLimitDepthEstimator(XGBoostSklearnEstimator):
@@ -1459,6 +1463,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
             )
         if self._task not in CLASSIFICATION and "criterion" in config:
             params.pop("criterion")
+        if "random_state" not in params:
+            params["random_state"] = 12032022
         return params
 
     def __init__(
@@ -1627,7 +1633,7 @@ class CatBoostEstimator(BaseEstimator):
 
             self.estimator_class = CatBoostClassifier
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         start_time = time.time()
         deadline = start_time + budget if budget else np.inf
         train_dir = f"catboost_{str(start_time)}"
@@ -1665,7 +1671,7 @@ class CatBoostEstimator(BaseEstimator):
                 cat_features=cat_features,
                 eval_set=eval_set,
                 callbacks=CatBoostEstimator._callbacks(
-                    start_time, deadline, FREE_MEM_RATIO if use_best_model else None
+                    start_time, deadline, free_mem_ratio if use_best_model else None
                 ),
                 **kwargs,
             )
@@ -1791,7 +1797,7 @@ class Prophet(SKLearnEstimator):
         train_df = X_train.join(y_train)
         return train_df
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         from prophet import Prophet
 
         current_time = time.time()
@@ -1869,7 +1875,7 @@ class ARIMA(Prophet):
         train_df = train_df.drop(TS_TIMESTAMP_COL, axis=1)
         return train_df
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         import warnings
 
         warnings.filterwarnings("ignore")
@@ -1969,7 +1975,7 @@ class SARIMAX(ARIMA):
         }
         return space
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         import warnings
 
         warnings.filterwarnings("ignore")
@@ -2094,7 +2100,7 @@ class TS_SKLearn(SKLearnEstimator):
             model = self.hcrystaball_model.model.fit(X_fit, y_fit)
             self._model = model
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         current_time = time.time()
         self._fit(X_train, y_train, budget=budget, **kwargs)
         train_time = time.time() - current_time
@@ -2266,11 +2272,10 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
 
         return training, train_dataloader, val_dataloader
 
-    def fit(self, X_train, y_train, budget=None, **kwargs):
+    def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs):
         import warnings
         import pytorch_lightning as pl
         from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
-        from pytorch_lightning.loggers import TensorBoardLogger
         import torch
         from pytorch_forecasting import TemporalFusionTransformer
         from pytorch_forecasting.metrics import QuantileLoss
@@ -2287,7 +2292,6 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
         early_stop_callback = EarlyStopping(
             monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min"
         )
-        lr_logger = LearningRateMonitor()  # log the learning rate
 
         def _fit(log):
             default_trainer_kwargs = dict(
@@ -2296,7 +2300,9 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
                 else None,
                 max_epochs=max_epochs,
                 gradient_clip_val=gradient_clip_val,
-                callbacks=[lr_logger, early_stop_callback] if log else False,
+                callbacks=[LearningRateMonitor(), early_stop_callback]
+                if log
+                else [early_stop_callback],
                 logger=log,
             )
             trainer = pl.Trainer(
@@ -2308,7 +2314,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
                 lstm_layers=2,  # 2 is mostly optimal according to documentation
                 output_size=7,  # 7 quantiles by default
                 loss=QuantileLoss(),
-                log_interval=10,
+                log_interval=10 if log else 0,
                 # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
                 reduce_on_plateau_patience=4,
             )
@@ -2320,15 +2326,17 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator):
             )
             return trainer
 
-        try:
-            logger = TensorBoardLogger(
-                kwargs.get("log_dir", "lightning_logs")
-            )  # logging results to a tensorboard
-            trainer = _fit(log=logger)
-        except ValueError:
-            # issue with pytorch forecasting model log_prediction() function
-            # pytorch-forecasting issue #1145
-            trainer = _fit(log=False)
+        # try:
+        #     from pytorch_lightning.loggers import TensorBoardLogger
+
+        #     logger = TensorBoardLogger(
+        #         kwargs.get("log_dir", "lightning_logs")
+        #     )  # logging results to a tensorboard
+        #     trainer = _fit(log=logger)
+        # except ValueError:
+        # issue with pytorch forecasting model log_prediction() function
+        # pytorch-forecasting issue #1145
+        trainer = _fit(log=False)
         best_model_path = trainer.checkpoint_callback.best_model_path
         best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
         train_time = time.time() - current_time
diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py
index 94480d401..e1227aa77 100644
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@@ -146,7 +146,6 @@ class BlendSearch(Searcher):
                 self.cost_attr = None
         else:
             self.cost_attr = cost_attr
-
         self.penalty = PENALTY  # penalty term for constraints
         self._metric, self._mode = metric, mode
         self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
@@ -310,7 +309,7 @@ class BlendSearch(Searcher):
                 self._time_used += now - self._start_time
                 self._start_time = now
                 self._set_deadline()
-                if self._input_cost_attr == "auto":
+                if self._input_cost_attr == "auto" and self._time_budget_s:
                     self.cost_attr = self._ls.cost_attr = TIME_TOTAL_S
             if "metric_target" in spec:
                 self._metric_target = spec.get("metric_target")
diff --git a/flaml/version.py b/flaml/version.py
index b19b12ea3..6849410aa 100644
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "1.0.14"
+__version__ = "1.1.0"
diff --git a/notebook/automl_classification.ipynb b/notebook/automl_classification.ipynb
index c759a3c0a..86a6eb199 100644
--- a/notebook/automl_classification.ipynb
+++ b/notebook/automl_classification.ipynb
@@ -38,10 +38,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install flaml[notebook] openml==0.10.2\n",
+    "%pip install flaml[notebook]\n",
     "# From v0.6.6, catboost is made an optional dependency to build conda package.\n",
     "# To install catboost, you can run:\n",
-    "%pip install flaml[catboost]"
+    "# %pip install flaml[catboost]"
    ]
   },
   {
@@ -112,7 +112,7 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 600,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy', ",
+    "    \"metric\": 'accuracy', \n",
     "                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)\n",
     "    \"task\": 'classification',  # task type\n",
     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
@@ -1269,7 +1269,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.7 ('base')",
+   "display_name": "Python 3.9.15 64-bit",
    "language": "python",
    "name": "python3"
   },
@@ -1283,11 +1283,11 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.15"
   },
   "vscode": {
    "interpreter": {
-    "hash": "e811209110f5aa4d8c2189eeb3ff7b9b4d146931cb9189ef6041ff71605c541d"
+    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
    }
   }
  },
diff --git a/setup.py b/setup.py
index c86882972..11b9f29a1 100644
--- a/setup.py
+++ b/setup.py
@@ -42,6 +42,7 @@ setuptools.setup(
         "notebook": [
             "jupyter",
             "matplotlib",
+            "openml==0.10.2",
         ],
         "test": [
             "flake8>=3.8.4",
diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py
index fb6451eef..a0f18bbcb 100644
--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@@ -380,6 +380,7 @@ class TestMultiClass(unittest.TestCase):
             "estimator_list": ["large_lgbm"],
             "log_type": "all",
             "hpo_method": "random",
+            "free_mem_ratio": 0.2,
         }
         X_train, y_train = load_iris(return_X_y=True, as_frame=True)
 
diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py
index 818f7af36..09be23ebb 100644
--- a/test/automl/test_notebook_example.py
+++ b/test/automl/test_notebook_example.py
@@ -17,7 +17,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
         budget = performance_check_budget  # revise the buget on macos
     if budget == performance_check_budget:
         budget = None
-        max_iter = 100
+        max_iter = 60
     else:
         max_iter = None
     try:
@@ -44,6 +44,15 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None):
         "log_file_name": "airlines_experiment.log",  # flaml log file
         "seed": 7654321,  # random seed
         "hpo_method": hpo_method,
+        "log_type": "all",
+        "estimator_list": [
+            "lgbm",
+            "xgboost",
+            "xgb_limitdepth",
+            "rf",
+            "extra_tree",
+        ],  # list of ML learners
+        "eval_method": "holdout",
     }
     """The main flaml automl API"""
     automl.fit(X_train=X_train, y_train=y_train, **settings)
@@ -130,6 +139,7 @@ def test_mlflow():
         "task": "classification",  # task type
         "sample": False,  # whether to subsample training data
         "log_file_name": "adult.log",  # flaml log file
+        "learner_selector": "roundrobin",
     }
     mlflow.set_experiment("flaml")
     with mlflow.start_run() as run:
diff --git a/test/automl/test_training_log.py b/test/automl/test_training_log.py
index 66724d129..85cccbe34 100644
--- a/test/automl/test_training_log.py
+++ b/test/automl/test_training_log.py
@@ -43,7 +43,7 @@ class TestTrainingLog(unittest.TestCase):
                 print(model0.params["n_estimators"], config)
 
                 # train on full data with no time limit
-                automl._state.time_budget = None
+                automl._state.time_budget = -1
                 model, _ = automl._state._train_with_config(estimator, config)
 
                 # assuming estimator & config are saved and loaded as follows
diff --git a/website/docs/Examples/AutoML-for-LightGBM.md b/website/docs/Examples/AutoML-for-LightGBM.md
index 40c3115cc..ad5c49ab9 100644
--- a/website/docs/Examples/AutoML-for-LightGBM.md
+++ b/website/docs/Examples/AutoML-for-LightGBM.md
@@ -1,5 +1,14 @@
 # AutoML for LightGBM
 
+### Prerequisites for this example
+
+Install the [notebook] option.
+```bash
+pip install "flaml[notebook]"
+```
+
+This option is not necessary in general.
+
 ### Use built-in LGBMEstimator
 
 ```python
diff --git a/website/docs/Examples/AutoML-for-XGBoost.md b/website/docs/Examples/AutoML-for-XGBoost.md
index c2b4e0ec8..2b7bf6092 100644
--- a/website/docs/Examples/AutoML-for-XGBoost.md
+++ b/website/docs/Examples/AutoML-for-XGBoost.md
@@ -1,5 +1,14 @@
 # AutoML for XGBoost
 
+### Prerequisites for this example
+
+Install the [notebook] option.
+```bash
+pip install "flaml[notebook]"
+```
+
+This option is not necessary in general.
+
 ### Use built-in XGBoostSklearnEstimator
 
 ```python
diff --git a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md
index c02ed6295..7f272cea7 100644
--- a/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
+++ b/website/docs/Examples/Integrate - Scikit-learn Pipeline.md	
@@ -1,5 +1,7 @@
 As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline.
 
+This example requires openml==0.10.2.
+
 ### Load data
 
 ```python
diff --git a/website/docs/Examples/Tune-Lexicographic-objectives.md b/website/docs/Examples/Tune-Lexicographic-objectives.md
index 7e17850f3..c7fff5463 100644
--- a/website/docs/Examples/Tune-Lexicographic-objectives.md
+++ b/website/docs/Examples/Tune-Lexicographic-objectives.md
@@ -3,7 +3,7 @@
 ## Requirements
 
 ```python
-pip install flaml thop torchvision torch
+pip install "flaml>=1.1.0" thop torchvision torch
 ```
 
 ## Tuning accurate and efficient neural networks with lexicographic preference
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 66e11d62f..7627a510d 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -12,7 +12,7 @@
     - 'regression': regression with tabular data.
     - 'ts_forecast': time series forecasting.
     - 'ts_forecast_classification': time series forecasting for classification.
-    <!-- - 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series). -->
+    - 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series).
     - 'rank': learning to rank.
     - 'seq-classification': sequence classification.
     - 'seq-regression': sequence regression.
@@ -20,7 +20,7 @@
     - 'token-classification': token classification.
     - 'multichoice-classification': multichoice classification.
 
-Two optional inputs are `time_budget` and `max_iter` for searching models and hyperparameters. When both are unspecified, only one model per estimator will be trained (using our [zero-shot](Zero-Shot-AutoML) technique).
+Two optional inputs are `time_budget` and `max_iter` for searching models and hyperparameters. When both are unspecified, only one model per estimator will be trained (using our [zero-shot](Zero-Shot-AutoML) technique). When `time_budget` is provided, there can be randomness in the result due to runtime variance.
 
 A typical way to use `flaml.AutoML`:
 
@@ -112,9 +112,12 @@ The estimator list can contain one or more estimator names, each corresponding t
 #### Estimator
 * Built-in estimator.
     - 'lgbm': LGBMEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, num_leaves, min_child_samples, learning_rate, log_max_bin (logarithm of (max_bin + 1) with base 2), colsample_bytree, reg_alpha, reg_lambda.
-    - 'xgboost': XGBoostSkLearnEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_leaves, max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
-    - 'rf': RandomForestEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only).
-    - 'extra_tree': ExtraTreesEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only).
+    - 'xgboost': XGBoostSkLearnEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_leaves, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
+    - 'xgb_limitdepth': XGBoostLimitDepthEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators,  max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda.
+    - 'rf': RandomForestEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0,
+    it uses a fixed ranndom_state by default.
+    - 'extra_tree': ExtraTreesEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0,
+    it uses a fixed ranndom_state by default.
     - 'lrl1': LRL1Classifier (sklearn.LogisticRegression with L1 regularization) for task "classification". Hyperparameters: C.
     - 'lrl2': LRL2Classifier (sklearn.LogisticRegression with L2 regularization) for task "classification". Hyperparameters: C.
     - 'catboost': CatBoostEstimator for task "classification" and "regression". Hyperparameters: early_stopping_rounds, learning_rate, n_estimators.
@@ -123,7 +126,7 @@ The estimator list can contain one or more estimator names, each corresponding t
     - 'arima': ARIMA for task "ts_forecast". Hyperparameters: p, d, q.
     - 'sarimax': SARIMAX for task "ts_forecast". Hyperparameters: p, d, q, P, D, Q, s.
     - 'transformer': Huggingface transformer models for task "seq-classification", "seq-regression", "multichoice-classification", "token-classification" and "summarization". Hyperparameters: learning_rate, num_train_epochs, per_device_train_batch_size, warmup_ratio, weight_decay, adam_epsilon, seed.
-    <!-- - 'temporal_fusion_transform': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate. -->
+    - 'temporal_fusion_transformer': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate. There is a [known issue](https://github.com/jdb78/pytorch-forecasting/issues/1145) with pytorch-forecast logging.
 * Custom estimator. Use custom estimator for:
     - tuning an estimator that is not built-in;
     - customizing search space for a built-in estimator.