From e3d26c0650ba3cf6b55ee9e9231cc54c680e9fb9 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Sat, 20 Aug 2022 09:18:35 -0400
Subject: [PATCH 1/4] add guideline collection (#687)

* add guideline collection

* remove redundancy
---
 website/docs/FAQ.md                            | 9 ++++++++-
 website/docs/Use-Cases/Task-Oriented-AutoML.md | 3 ++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md
index 42ffac8ed..2fdbcd2fd 100644
--- a/website/docs/FAQ.md
+++ b/website/docs/FAQ.md
@@ -1,8 +1,15 @@
 # Frequently Asked Questions
 
+### [Guidelines on how to set a hyperparameter search space](Use-Cases/Tune-User-Defined-Function#details-and-guidelines-on-hyperparameter-search-space)
+
+### [Guidelines on parallel vs seqential tuning](Use-Cases/Task-Oriented-AutoML#guidelines-on-parallel-vs-sequential-tuning)
+
+### [Guidelines on creating and tuning a custom estimator](Use-Cases/Task-Oriented-AutoML#guidelines-on-tuning-a-custom-estimator)
+
+
 ### About `low_cost_partial_config` in `tune`.
 
-- Definition and purpose: The `low_cost_partial_config` is a dictionary of subset of the hyperparameter coordinates whose value corresponds to a configuration with known low-cost (i.e., low computation cost for training the corresponding model).  The concept of low/high-cost is meaningful in the case where a subset of the hyperparameters to tune directly affects the computation cost for training the model. For example, `n_estimators` and `max_leaves` are known to affect the training cost of tree-based learners. We call this subset of hyperparameters, *cost-related hyperparameters*. In such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are recommended to set them as the `low_cost_partial_config`. Using the tree-based method example again, since we know that small `n_estimators` and  `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that `4` is the lower bound of search space for these two hyperparameters), e.g., in [LGBM](https://github.com/microsoft/FLAML/blob/main/flaml/model.py#L215).  Configuring `low_cost_partial_config` helps the search algorithms make more cost-efficient choices.  
+- Definition and purpose: The `low_cost_partial_config` is a dictionary of subset of the hyperparameter coordinates whose value corresponds to a configuration with known low-cost (i.e., low computation cost for training the corresponding model).  The concept of low/high-cost is meaningful in the case where a subset of the hyperparameters to tune directly affects the computation cost for training the model. For example, `n_estimators` and `max_leaves` are known to affect the training cost of tree-based learners. We call this subset of hyperparameters, *cost-related hyperparameters*. In such scenarios, if you are aware of low-cost configurations for the cost-related hyperparameters, you are recommended to set them as the `low_cost_partial_config`. Using the tree-based method example again, since we know that small `n_estimators` and  `max_leaves` generally correspond to simpler models and thus lower cost, we set `{'n_estimators': 4, 'max_leaves': 4}` as the `low_cost_partial_config` by default (note that `4` is the lower bound of search space for these two hyperparameters), e.g., in [LGBM](https://github.com/microsoft/FLAML/blob/main/flaml/model.py#L215).  Configuring `low_cost_partial_config` helps the search algorithms make more cost-efficient choices.
 In AutoML, the `low_cost_init_value` in `search_space()` function for each estimator serves the same role.
 
 - Usage in practice: It is recommended to configure it if there are cost-related hyperparameters in your tuning task and you happen to know the low-cost values for them, but it is not required (It is fine to leave it the default value, i.e., `None`).
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 6e427df7d..6752de7eb 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -125,8 +125,9 @@ The estimator list can contain one or more estimator names, each corresponding t
     - tuning an estimator that is not built-in;
     - customizing search space for a built-in estimator.
 
-To tune a custom estimator that is not built-in, you need to:
+#### Guidelines on tuning a custom estimator
 
+To tune a custom estimator that is not built-in, you need to:
 1. Build a custom estimator by inheritting [`flaml.model.BaseEstimator`](../reference/model#baseestimator-objects) or a derived class.
 For example, if you have a estimator class with scikit-learn style `fit()` and `predict()` functions, you only need to set `self.estimator_class` to be that class in your constructor.
 

From 47e034d2032c0e369024caec681ff72a11d9023c Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 20 Aug 2022 07:43:06 -0700
Subject: [PATCH 2/4] LightGBM notebook update (#690)

* version update in notebook

* comment about optuna install

* monotone constraints
---
 notebook/automl_lightgbm.ipynb                 | 3 ++-
 website/docs/Use-Cases/Task-Oriented-AutoML.md | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/notebook/automl_lightgbm.ipynb b/notebook/automl_lightgbm.ipynb
index 41610a31e..3b76e39c0 100644
--- a/notebook/automl_lightgbm.ipynb
+++ b/notebook/automl_lightgbm.ipynb
@@ -39,7 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%pip install flaml[notebook]==1.0.8"
+    "%pip install flaml[notebook]==1.0.10"
    ]
   },
   {
@@ -651,6 +651,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "# uncomment the following line if optuna is not installed\n",
     "# %pip install optuna==2.8.0"
    ]
   },
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 6752de7eb..de7c11820 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -281,7 +281,9 @@ Some constraints on the estimator can be implemented via the custom learner. For
 class MonotonicXGBoostEstimator(XGBoostSklearnEstimator):
     @classmethod
     def search_space(**args):
-        return super().search_space(**args).update({"monotone_constraints": "(1, -1)"})
+        space = super().search_space(**args)
+        space.update({"monotone_constraints": {"domain": "(1, -1)"}})
+        return space
 ```
 
 It adds a monotonicity constraint to XGBoost. This approach can be used to set any constraint that is an argument in the underlying estimator's constructor.

From 3d1a28bfc04b6230badf1099474f8af38a5b6d1a Mon Sep 17 00:00:00 2001
From: Xueqing Liu <liususan091219@users.noreply.github.com>
Date: Sat, 20 Aug 2022 18:17:10 -0400
Subject: [PATCH 3/4] Add preserve_checkpoint to preserve the checkpoint after
 del (#692)

* fix del bug
---
 flaml/automl.py         | 32 +++++++++++++++++++++++++++++---
 test/nlp/test_autohf.py |  3 +++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/flaml/automl.py b/flaml/automl.py
index b959437f0..381a61a0a 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -627,6 +627,8 @@ class AutoML(BaseEstimator):
             keep_search_state: boolean, default=False | Whether to keep data needed
                 for model search after fit(). By default the state is deleted for
                 space saving.
+            preserve_checkpoint: boolean, default=True | Whether to preserve the saved checkpoint
+                on disk when deleting automl. By default the checkpoint is preserved.
             early_stop: boolean, default=False | Whether to stop early if the
                 search is considered to converge.
             append_log: boolean, default=False | Whetehr to directly append the log
@@ -726,6 +728,7 @@ class AutoML(BaseEstimator):
         settings["starting_points"] = settings.get("starting_points", "static")
         settings["n_concurrent_trials"] = settings.get("n_concurrent_trials", 1)
         settings["keep_search_state"] = settings.get("keep_search_state", False)
+        settings["preserve_checkpoint"] = settings.get("preserve_checkpoint", True)
         settings["early_stop"] = settings.get("early_stop", False)
         settings["append_log"] = settings.get("append_log", False)
         settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN)
@@ -1576,6 +1579,7 @@ class AutoML(BaseEstimator):
         auto_augment=None,
         custom_hp=None,
         skip_transform=None,
+        preserve_checkpoint=True,
         fit_kwargs_by_estimator=None,
         **fit_kwargs,
     ):
@@ -1704,10 +1708,19 @@ class AutoML(BaseEstimator):
 
         self._state.fit_kwargs = fit_kwargs
         self._state.custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        self._skip_transform = (
+            self._settings.get("skip_transform")
+            if skip_transform is None
+            else skip_transform
+        )
         self._state.fit_kwargs_by_estimator = (
             fit_kwargs_by_estimator or self._settings.get("fit_kwargs_by_estimator")
         )
+        self.preserve_checkpoint = (
+            self._settings.get("preserve_checkpoint")
+            if preserve_checkpoint is None
+            else preserve_checkpoint
+        )
         self._validate_data(X_train, y_train, dataframe, label, groups=groups)
 
         logger.info("log file name {}".format(log_file_name))
@@ -2123,6 +2136,7 @@ class AutoML(BaseEstimator):
         seed=None,
         n_concurrent_trials=None,
         keep_search_state=None,
+        preserve_checkpoint=True,
         early_stop=None,
         append_log=None,
         auto_augment=None,
@@ -2303,6 +2317,8 @@ class AutoML(BaseEstimator):
             keep_search_state: boolean, default=False | Whether to keep data needed
                 for model search after fit(). By default the state is deleted for
                 space saving.
+            preserve_checkpoint: boolean, default=True | Whether to preserve the saved checkpoint
+                on disk when deleting automl. By default the checkpoint is preserved.
             early_stop: boolean, default=False | Whether to stop early if the
                 search is considered to converge.
             append_log: boolean, default=False | Whetehr to directly append the log
@@ -2464,6 +2480,11 @@ class AutoML(BaseEstimator):
             if keep_search_state is None
             else keep_search_state
         )
+        self.preserve_checkpoint = (
+            self._settings.get("preserve_checkpoint")
+            if preserve_checkpoint is None
+            else preserve_checkpoint
+        )
         early_stop = (
             self._settings.get("early_stop") if early_stop is None else early_stop
         )
@@ -2513,7 +2534,11 @@ class AutoML(BaseEstimator):
 
         self._state.fit_kwargs = fit_kwargs
         custom_hp = custom_hp or self._settings.get("custom_hp")
-        self._skip_transform = self._settings.get("skip_transform") if skip_transform is None else skip_transform
+        self._skip_transform = (
+            self._settings.get("skip_transform")
+            if skip_transform is None
+            else skip_transform
+        )
         fit_kwargs_by_estimator = fit_kwargs_by_estimator or self._settings.get(
             "fit_kwargs_by_estimator"
         )
@@ -3566,7 +3591,8 @@ class AutoML(BaseEstimator):
             and self._trained_estimator
             and hasattr(self._trained_estimator, "cleanup")
         ):
-            self._trained_estimator.cleanup()
+            if self.preserve_checkpoint is False:
+                self._trained_estimator.cleanup()
             del self._trained_estimator
 
     def _select_estimator(self, estimator_list):
diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py
index 6e59bcf2d..ee0ab693f 100644
--- a/test/nlp/test_autohf.py
+++ b/test/nlp/test_autohf.py
@@ -13,6 +13,7 @@ def test_hf_data():
     automl = AutoML()
 
     automl_settings = get_automl_settings()
+    automl_settings["preserve_checkpoint"] = False
 
     try:
         automl.fit(
@@ -68,6 +69,8 @@ def test_hf_data():
     automl.predict_proba(X_test)
     print(automl.classes_)
 
+    del automl
+
 
 if __name__ == "__main__":
     test_hf_data()

From dffa802b3e377ea3dc591f0c7d99f3b3ccc1a26d Mon Sep 17 00:00:00 2001
From: Chi Wang <wang.chi@microsoft.com>
Date: Sat, 20 Aug 2022 18:38:56 -0700
Subject: [PATCH 4/4] use_best_model for catboost (#679)

* use_best_model for catboost

* bump version to 1.0.11
---
 flaml/model.py                 | 31 ++++++++++++++--------
 flaml/version.py               |  2 +-
 test/automl/test_regression.py | 47 +++++++++++++++++++++-------------
 3 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/flaml/model.py b/flaml/model.py
index 4a7825f1a..890f06c6b 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -1626,15 +1626,26 @@ class CatBoostEstimator(BaseEstimator):
             cat_features = list(X_train.select_dtypes(include="category").columns)
         else:
             cat_features = []
-        n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
+        use_best_model = kwargs.get("use_best_model", True)
+        n = (
+            max(int(len(y_train) * 0.9), len(y_train) - 1000)
+            if use_best_model
+            else len(y_train)
+        )
         X_tr, y_tr = X_train[:n], y_train[:n]
+        from catboost import Pool, __version__
+
+        eval_set = (
+            Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features)
+            if use_best_model
+            else None
+        )
         if "sample_weight" in kwargs:
             weight = kwargs["sample_weight"]
             if weight is not None:
                 kwargs["sample_weight"] = weight[:n]
         else:
             weight = None
-        from catboost import Pool, __version__
 
         model = self.estimator_class(train_dir=train_dir, **self.params)
         if __version__ >= "0.26":
@@ -1642,10 +1653,10 @@ class CatBoostEstimator(BaseEstimator):
                 X_tr,
                 y_tr,
                 cat_features=cat_features,
-                eval_set=Pool(
-                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
+                eval_set=eval_set,
+                callbacks=CatBoostEstimator._callbacks(
+                    start_time, deadline, FREE_MEM_RATIO if use_best_model else None
                 ),
-                callbacks=CatBoostEstimator._callbacks(start_time, deadline),
                 **kwargs,
             )
         else:
@@ -1653,9 +1664,7 @@ class CatBoostEstimator(BaseEstimator):
                 X_tr,
                 y_tr,
                 cat_features=cat_features,
-                eval_set=Pool(
-                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
-                ),
+                eval_set=eval_set,
                 **kwargs,
             )
         shutil.rmtree(train_dir, ignore_errors=True)
@@ -1667,7 +1676,7 @@ class CatBoostEstimator(BaseEstimator):
         return train_time
 
     @classmethod
-    def _callbacks(cls, start_time, deadline):
+    def _callbacks(cls, start_time, deadline, free_mem_ratio):
         class ResourceLimit:
             def after_iteration(self, info) -> bool:
                 now = time.time()
@@ -1675,9 +1684,9 @@ class CatBoostEstimator(BaseEstimator):
                     self._time_per_iter = now - start_time
                 if now + self._time_per_iter > deadline:
                     return False
-                if psutil is not None:
+                if psutil is not None and free_mem_ratio is not None:
                     mem = psutil.virtual_memory()
-                    if mem.available / mem.total < FREE_MEM_RATIO:
+                    if mem.available / mem.total < free_mem_ratio:
                         return False
                 return True  # can continue
 
diff --git a/flaml/version.py b/flaml/version.py
index 9fd0f8dd6..9eb1ebec5 100644
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "1.0.10"
+__version__ = "1.0.11"
diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py
index 47869ee34..0aca92f15 100644
--- a/test/automl/test_regression.py
+++ b/test/automl/test_regression.py
@@ -98,8 +98,8 @@ class TestRegression(unittest.TestCase):
         y_train = np.random.uniform(size=300)
         X_val = scipy.sparse.random(100, 900, density=0.0001)
         y_val = np.random.uniform(size=100)
-        automl_experiment = AutoML()
-        automl_settings = {
+        automl = AutoML()
+        settings = {
             "time_budget": 2,
             "metric": "mae",
             "task": "regression",
@@ -110,23 +110,34 @@ class TestRegression(unittest.TestCase):
             "verbose": 0,
             "early_stop": True,
         }
-        automl_experiment.fit(
-            X_train=X_train,
-            y_train=y_train,
-            X_val=X_val,
-            y_val=y_val,
-            **automl_settings
+        automl.fit(
+            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
+        )
+        assert automl._state.X_val.shape == X_val.shape
+        print(automl.predict(X_train))
+        print(automl.model)
+        print(automl.config_history)
+        print(automl.best_model_for_estimator("rf"))
+        print(automl.best_iteration)
+        print(automl.best_estimator)
+        print(automl.best_config)
+        print(automl.best_loss)
+        print(automl.best_config_train_time)
+
+        settings.update(
+            {
+                "estimator_list": ["catboost"],
+                "keep_search_state": False,
+                "model_history": False,
+                "use_best_model": False,
+                "time_budget": None,
+                "max_iter": 2,
+                "custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
+            }
+        )
+        automl.fit(
+            X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
         )
-        assert automl_experiment._state.X_val.shape == X_val.shape
-        print(automl_experiment.predict(X_train))
-        print(automl_experiment.model)
-        print(automl_experiment.config_history)
-        print(automl_experiment.best_model_for_estimator("rf"))
-        print(automl_experiment.best_iteration)
-        print(automl_experiment.best_estimator)
-        print(automl_experiment.best_config)
-        print(automl_experiment.best_loss)
-        print(automl_experiment.best_config_train_time)
 
     def test_parallel(self, hpo_method=None):
         automl_experiment = AutoML()