add consistency test (#216)

* add consistency test * test_consistency and format * add results attribute * skip when ray is not installed * Update flaml/tune/analysis.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2025-11-02 10:50:03 +00:00 · 2021-09-19 20:44:25 -04:00 · 2021-09-19 20:44:25 -04:00 · b1115d5347
commit b1115d5347
parent f3e50136e8
2 changed files with 161 additions and 21 deletions
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@ -1,4 +1,4 @@
-'''
+"""
 Copyright 2020 The Ray Authors.

 Licensed under the Apache License, Version 2.0 (the "License");
@ -16,12 +16,13 @@ limitations under the License.
 This source file is adapted here because ray does not fully support Windows.

 Copyright (c) Microsoft Corporation.
-'''
+"""
 from typing import Dict, Optional
 import numpy as np
 from .trial import Trial

 import logging
+
 logger = logging.getLogger(__name__)


@ -30,8 +31,7 @@ def is_nan_or_inf(value):


 class ExperimentAnalysis:
-    """Analyze results from a Tune experiment.
-    """
+    """Analyze results from a Tune experiment."""

    @property
    def best_trial(self) -> Trial:
@ -46,7 +46,8 @@ class ExperimentAnalysis:
                "To fetch the `best_trial`, pass a `metric` and `mode` "
                "parameter to `tune.run()`. Alternatively, use the "
                "`get_best_trial(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
+            )
        return self.get_best_trial(self.default_metric, self.default_mode)

    @property
@ -62,30 +63,41 @@ class ExperimentAnalysis:
                "To fetch the `best_config`, pass a `metric` and `mode` "
                "parameter to `tune.run()`. Alternatively, use the "
                "`get_best_config(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
+            )
        return self.get_best_config(self.default_metric, self.default_mode)

+    @property
+    def results(self) -> Dict[str, Dict]:
+        """Get the last result of all the trials of the experiment"""
+
+        return {trial.trial_id: trial.last_result for trial in self.trials}
+
    def _validate_metric(self, metric: str) -> str:
        if not metric and not self.default_metric:
            raise ValueError(
                "No `metric` has been passed and  `default_metric` has "
-                "not been set. Please specify the `metric` parameter.")
+                "not been set. Please specify the `metric` parameter."
+            )
        return metric or self.default_metric

    def _validate_mode(self, mode: str) -> str:
        if not mode and not self.default_mode:
            raise ValueError(
                "No `mode` has been passed and  `default_mode` has "
-                "not been set. Please specify the `mode` parameter.")
+                "not been set. Please specify the `mode` parameter."
+            )
        if mode and mode not in ["min", "max"]:
            raise ValueError("If set, `mode` has to be one of [min, max]")
        return mode or self.default_mode

-    def get_best_trial(self,
-                       metric: Optional[str] = None,
-                       mode: Optional[str] = None,
-                       scope: str = "last",
-                       filter_nan_and_inf: bool = True) -> Optional[Trial]:
+    def get_best_trial(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+        filter_nan_and_inf: bool = True,
+    ) -> Optional[Trial]:
        """Retrieve the best trial object.
        Compares all trials' scores on ``metric``.
        If ``metric`` is not specified, ``self.default_metric`` will be used.
@ -116,11 +128,13 @@ class ExperimentAnalysis:
        if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
-                "metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
-                "\"last-5-avg\", \"last-10-avg\"]. "
+                'metric {} for scope {} not in ["all", "last", "avg", '
+                '"last-5-avg", "last-10-avg"]. '
                "If you didn't pass a `metric` parameter to `tune.run()`, "
                "you have to pass one when fetching the best trial.".format(
-                    metric, scope))
+                    metric, scope
+                )
+            )
        best_trial = None
        best_metric_score = None
        for trial in self.trials:
@ -150,13 +164,16 @@ class ExperimentAnalysis:
        if not best_trial:
            logger.warning(
                "Could not find best trial. Did you pass the correct `metric` "
-                "parameter?")
+                "parameter?"
+            )
        return best_trial

-    def get_best_config(self,
-                        metric: Optional[str] = None,
-                        mode: Optional[str] = None,
-                        scope: str = "last") -> Optional[Dict]:
+    def get_best_config(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+    ) -> Optional[Dict]:
        """Retrieve the best config corresponding to the trial.
        Compares all trials' scores on `metric`.
        If ``metric`` is not specified, ``self.default_metric`` will be used.
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@ -0,0 +1,123 @@
+# import unittest
+import numpy as np
+
+# require: pip install flaml[blendsearch, ray]
+# require: pip install flaml[ray]
+import time
+from flaml import tune
+
+
+def evaluate_config(config):
+    """evaluate a hyperparameter configuration"""
+    # we uss a toy example with 2 hyperparameters
+    metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
+    # usually the evaluation takes an non-neglible cost
+    # and the cost could be related to certain hyperparameters
+    # in this example, we assume it's proportional to x
+    time.sleep(config["x"] / 100000)
+    # use tune.report to report the metric to optimize
+    tune.report(metric=metric)
+
+
+config_search_space = {
+    "x": tune.lograndint(lower=1, upper=100000),
+    "y": tune.randint(lower=1, upper=100000),
+}
+
+low_cost_partial_config = {"x": 1}
+
+
+def setup_searcher(searcher_name):
+    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
+
+    if "cfo" in searcher_name:
+        searcher = CFO(
+            space=config_search_space, low_cost_partial_config=low_cost_partial_config
+        )
+    elif searcher_name == "bs":
+        searcher = BlendSearch(
+            metric="metric",
+            mode="min",
+            space=config_search_space,
+            low_cost_partial_config=low_cost_partial_config,
+        )
+    elif searcher_name == "random":
+        searcher = RandomSearch(space=config_search_space)
+    else:
+        return None
+    return searcher
+
+
+def _test_flaml_raytune_consistency(
+    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
+):
+    try:
+        from ray import tune as raytune
+    except ImportError:
+        print(
+            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
+        )
+        return
+    np.random.seed(100)
+    searcher = setup_searcher(searcher_name)
+    analysis = tune.run(
+        evaluate_config,  # the function to evaluate a config
+        config=config_search_space,  # the search space
+        low_cost_partial_config=low_cost_partial_config,  # a initial (partial) config with low cost
+        metric="metric",  # the name of the metric used for optimization
+        mode="min",  # the optimization mode, 'min' or 'max'
+        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
+        time_budget_s=None,  # the time budget in seconds
+        local_dir="logs/",  # the local directory to store logs
+        search_alg=searcher,
+        # verbose=0,          # verbosity
+        # use_ray=True, # uncomment when performing parallel tuning using ray
+    )
+    flaml_best_config = analysis.best_config
+    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
+    print(analysis.best_trial.last_result)  # the best trial's result
+    print("best flaml", searcher_name, flaml_best_config)  # the best config
+    print("flaml config in results", searcher_name, flaml_config_in_results)
+
+    np.random.seed(100)
+    searcher = setup_searcher(searcher_name)
+    from ray.tune.suggest import ConcurrencyLimiter
+
+    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
+    analysis = raytune.run(
+        evaluate_config,  # the function to evaluate a config
+        config=config_search_space,
+        metric="metric",  # the name of the metric used for optimization
+        mode="min",  # the optimization mode, 'min' or 'max'
+        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
+        local_dir="logs/",  # the local directory to store logs
+        # max_concurrent_trials=max_concurrent_trials,
+        # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
+        search_alg=search_alg,
+    )
+    ray_best_config = analysis.best_config
+    ray_config_in_results = [v["config"] for v in analysis.results.values()]
+    print(analysis.best_trial.last_result)  # the best trial's result
+    print("ray best", searcher_name, analysis.best_config)  # the best config
+    print("ray config in results", searcher_name, ray_config_in_results)
+    assert ray_best_config == flaml_best_config, "best config should be the same"
+    assert (
+        flaml_config_in_results == ray_config_in_results
+    ), "results from raytune and flaml should be the same"
+
+
+def test_consistency():
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="random"
+    )
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
+    )
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="bs"
+    )
+
+
+if __name__ == "__main__":
+    # unittest.main()
+    test_consistency()