notebook test; spark warning message; reproducibility bug; sequential tuning stop condition (#869)

* notebook test * add ipykernel, remove except * only create dir if not empty * Stop sequential tuning when result is None * fix reproducibility of global search * save gs seed * use get to avoid KeyError * test
2025-12-27 06:59:03 +00:00 · 2023-01-07 18:39:29 -08:00 · 2023-01-07 18:39:29 -08:00 · 75e3454120
commit 75e3454120
parent 9fde27e536
14 changed files with 129 additions and 29 deletions
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@ -213,11 +213,12 @@ class BlendSearch(Searcher):
            else:
                gs_space = space
            gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
+            self._gs_seed = gs_seed
            if experimental:
                import optuna as ot

                sampler = ot.samplers.TPESampler(
-                    seed=seed, multivariate=True, group=True
+                    seed=gs_seed, multivariate=True, group=True
                )
            else:
                sampler = None
@ -297,7 +298,7 @@ class BlendSearch(Searcher):
                        space=self._gs._space,
                        metric=metric,
                        mode=mode,
-                        sampler=self._gs._sampler,
+                        seed=self._gs_seed,
                    )
                    self._gs.space = self._ls.space
                self._init_search()
--- a/flaml/tune/space.py
+++ b/flaml/tune/space.py
@ -547,8 +547,8 @@ def complete_config(
                    domain.categories[index],
                    flow2,
                    disturb,
-                    lower and lower[key][index],
-                    upper and upper[key][index],
+                    lower and lower.get(key) and lower[key][index],
+                    upper and upper.get(key) and upper[key][index],
                )
                assert (
                    "_choice_" not in subspace[key]
@ -560,8 +560,8 @@ def complete_config(
                    space[key],
                    flow2,
                    disturb,
-                    lower and lower[key],
-                    upper and upper[key],
+                    lower and lower.get(key),
+                    upper and upper.get(key),
                )
            continue
        subspace[key] = domain
--- a/flaml/tune/spark/utils.py
+++ b/flaml/tune/spark/utils.py
@ -31,23 +31,21 @@ def check_spark():
        Return (True, None) if the check passes, otherwise log the exception message and
        return (False, Exception(msg)). The exception can be raised by the caller.
    """
-    logger.warning("\ncheck Spark installation...This line should appear only once.\n")
+    logger.debug("\ncheck Spark installation...This line should appear only once.\n")
    if not _have_spark:
        msg = """use_spark=True requires installation of PySpark. Please run pip install flaml[spark]
        and check [here](https://spark.apache.org/docs/latest/api/python/getting_started/install.html)
        for more details about installing Spark."""
-        logger.warning(msg)
        return False, ImportError(msg)

    if _spark_major_minor_version[0] < 3:
        msg = "Spark version must be >= 3.0 to use flaml[spark]"
-        logger.warning(msg)
        return False, ImportError(msg)

    try:
        SparkSession.builder.getOrCreate()
    except RuntimeError as e:
-        logger.warning(f"\nSparkSession is not available: {e}\n")
+        # logger.warning(f"\nSparkSession is not available: {e}\n")
        return False, RuntimeError(e)

    return True, None
--- a/flaml/tune/trial_runner.py
+++ b/flaml/tune/trial_runner.py
@ -30,7 +30,7 @@ class SimpleTrial(Trial):
        self.config = config or {}
        self.status = Trial.PENDING
        self.start_time = None
-        self.last_result = {}
+        self.last_result = None
        self.last_update_time = -float("inf")
        self.custom_trial_name = None
        self.trainable_name = "trainable"
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -448,7 +448,9 @@ def run(
            logger.addHandler(old_handlers[0])
        if verbose > 0:
            if log_file_name:
-                os.makedirs(os.path.dirname(log_file_name), exist_ok=True)
+                dir_name = os.path.dirname(log_file_name)
+                if dir_name:
+                    os.makedirs(dir_name, exist_ok=True)
                logger.addHandler(logging.FileHandler(log_file_name))
            elif not logger.hasHandlers():
                # Add the console handler.
@ -789,6 +791,10 @@ def run(
                        report(_metric=result)
                _runner.stop_trial(trial_to_run)
                num_failures = 0
+                if trial_to_run.last_result is None:
+                    # application stops tuning by returning None
+                    # TODO document this feature when it is finalized
+                    break
            else:
                # break with upperbound_num_failures consecutive failures
                num_failures += 1
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "1.1.0"
+__version__ = "1.1.1"
--- a/notebook/automl_classification.ipynb
+++ b/notebook/automl_classification.ipynb
@ -1,6 +1,7 @@
 {
 "cells": [
  {
+   "attachments": {},
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
@ -38,10 +39,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install flaml[notebook]\n",
-    "# From v0.6.6, catboost is made an optional dependency to build conda package.\n",
-    "# To install catboost, you can run:\n",
-    "# %pip install flaml[catboost]"
+    "# %pip install flaml[notebook]"
   ]
  },
  {
@ -749,7 +747,8 @@
    "xgb = XGBClassifier()\n",
    "cat_columns = X_train.select_dtypes(include=['category']).columns\n",
    "X = X_train.copy()\n",
-    "X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n"
+    "X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n",
+    "y_train_xgb = y_train.astype(\"int\")"
   ]
  },
  {
@ -758,7 +757,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "xgb.fit(X, y_train)"
+    "xgb.fit(X, y_train_xgb)"
   ]
  },
  {
@ -769,7 +768,8 @@
   "source": [
    "X = X_test.copy()\n",
    "X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)\n",
-    "y_pred_xgb = xgb.predict(X)"
+    "y_pred_xgb = xgb.predict(X)\n",
+    "y_test_xgb = y_test.astype(\"int\")\n"
   ]
  },
  {
@ -788,7 +788,7 @@
    }
   ],
   "source": [
-    "print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test))\n",
+    "print('default xgboost accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_xgb, y_test_xgb))\n",
    "print('default lgbm accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred_lgbm, y_test))\n",
    "print('flaml (10 min) accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))"
   ]
@ -1283,7 +1283,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.15"
+   "version": "3.9.15 (main, Oct 26 2022, 03:47:43) \n[GCC 10.2.1 20210110]"
  },
  "vscode": {
   "interpreter": {
--- a/notebook/zeroshot_lightgbm.ipynb
+++ b/notebook/zeroshot_lightgbm.ipynb
@ -28,7 +28,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "%pip install -U flaml openml;"
+    "# %pip install -U flaml openml;"
   ]
  },
  {
@ -520,7 +520,7 @@
    "    \"task\": \"regression\",\n",
    "    \"starting_points\": \"data\",\n",
    "    \"estimator_list\": [\"lgbm\"],\n",
-    "    \"time_budget\": 600,\n",
+    "    \"time_budget\": 300,\n",
    "}\n",
    "automl.fit(X_train, y_train, **settings)"
   ]
@ -545,7 +545,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
+   "version": "3.9.15 (main, Oct 26 2022, 03:47:43) \n[GCC 10.2.1 20210110]"
  }
 },
 "nbformat": 4,
--- a/setup.py
+++ b/setup.py
@ -75,6 +75,7 @@ setuptools.setup(
            "joblibspark>=0.5.0",
            "nbconvert",
            "nbformat",
+            "ipykernel",
        ],
        "catboost": ["catboost>=0.26"],
        "blendsearch": ["optuna==2.8.0"],
--- a/test/automl/test_notebook.py
+++ b/test/automl/test_notebook.py
@ -0,0 +1,45 @@
+import nbformat
+from nbconvert.preprocessors import ExecutePreprocessor
+from nbconvert.preprocessors import CellExecutionError
+import os
+import sys
+import pytest
+
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+
+def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
+    try:
+        file_path = os.path.join(here, os.pardir, os.pardir, "notebook", input_nb)
+        with open(file_path) as f:
+            nb = nbformat.read(f, as_version=4)
+        ep = ExecutePreprocessor(timeout=3600, kernel_name="python3")
+        ep.preprocess(nb, {"metadata": {"path": here}})
+    except CellExecutionError:
+        raise
+    finally:
+        if save:
+            with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
+                nbformat.write(nb, f)
+
+
+@pytest.mark.skipif(
+    sys.platform != "darwin" or "3.8" not in sys.version,
+    reason="Only run on macOS with Python 3.8",
+)
+def test_automl_classification(save=False):
+    run_notebook("automl_classification.ipynb", save=save)
+
+
+@pytest.mark.skipif(
+    sys.platform != "darwin" or "3.7" not in sys.version,
+    reason="Only run on macOS with Python 3.7",
+)
+def test_zeroshot_lightgbm(save=False):
+    run_notebook("zeroshot_lightgbm.ipynb", save=save)
+
+
+if __name__ == "__main__":
+    # test_automl_classification(save=True)
+    test_zeroshot_lightgbm(save=True)
--- a/test/spark/test_notebook.py
+++ b/test/spark/test_notebook.py
@ -25,8 +25,8 @@ def run_notebook(input_nb, output_nb="executed_notebook.ipynb", save=False):
        ep.preprocess(nb, {"metadata": {"path": here}})
    except CellExecutionError:
        raise
-    except Exception as e:
-        print("\nIgnoring below error:\n", e, "\n\n")
+    # except Exception as e:
+    #     print("\nIgnoring below error:\n", e, "\n\n")
    finally:
        if save:
            with open(os.path.join(here, output_nb), "w", encoding="utf-8") as f:
--- a/test/tune/test_reproducibility.py
+++ b/test/tune/test_reproducibility.py
@ -34,7 +34,7 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
        "width": tune.uniform(0, 20),
        "height": tune.uniform(-100, 100),
    }
-    if externally_setup_searcher:
+    if externally_setup_searcher is True:

        searcher = BlendSearch(
            space=search_space,
@ -84,8 +84,10 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
            metric="mean_loss",
            mode="min",
        )
-    else:
+    elif externally_setup_searcher is False:
        searcher = None
+    else:
+        searcher = externally_setup_searcher

    analysis = tune.run(
        easy_objective_custom_tune,
@ -120,5 +122,19 @@ def test_reproducibility():
    ), "flaml.tune not reproducible when the searcher is set up externally"


+def test_gs_reproducibility():
+    from flaml import BlendSearch, tune
+
+    def f(config):
+        return {"m": 0.35}
+
+    search_space = {"a": tune.randint(1, 100)}
+    bs = BlendSearch(space=search_space, cost_attr=None)
+    analysis1 = tune.run(f, search_alg=bs, num_samples=2, metric="m", mode="max")
+    bs = BlendSearch(space=search_space, cost_attr=None)
+    analysis2 = tune.run(f, search_alg=bs, num_samples=2, metric="m", mode="max")
+    assert analysis1.trials[-1].config == analysis2.trials[-1].config
+
+
 if __name__ == "__main__":
    test_reproducibility()
--- a/test/tune/test_space.py
+++ b/test/tune/test_space.py
@ -66,6 +66,14 @@ def test_define_by_run():
        cfo.suggest(f"t{i}")
    # print(normalize(config, bs._gs.space, config, {}, False))
    print(complete_config({}, cfo._ls.space, cfo._ls))
+    # test hierarchical space with low_cost_partial_config
+    bs = BlendSearch(
+        space={"c": tune.choice([0, choice]), "randn": tune.randn(10, 2)},
+        low_cost_partial_config={"randn": 10},
+        metric="metric",
+        mode="max",
+    )
+    tune.run(lambda config: {"metric": 1}, search_alg=bs)


 def test_grid():
--- a/test/tune/test_stop.py
+++ b/test/tune/test_stop.py
@ -0,0 +1,25 @@
+from flaml import tune
+
+n_trials = 0
+
+
+def evaluate_config(config):
+    global n_trials
+    n_trials += 1
+    if n_trials >= 10:
+        return None
+    metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
+    return metric
+
+
+def test_eval_stop():
+    analysis = tune.run(
+        evaluate_config,
+        config={
+            "x": tune.qloguniform(lower=1, upper=100000, q=1),
+            "y": tune.qlograndint(lower=2, upper=100000, q=2),
+        },
+        num_samples=100,
+        mode="max",
+    )
+    assert len(analysis.trials) == 10