add consistency test (#216)

* add consistency test * test_consistency and format * add results attribute * skip when ray is not installed * Update flaml/tune/analysis.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
2025-11-03 19:29:52 +00:00 · 2021-09-19 20:44:25 -04:00 · 2021-09-19 20:44:25 -04:00 · b1115d5347
commit b1115d5347
parent f3e50136e8
2 changed files with 161 additions and 21 deletions
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@ -1,4 +1,4 @@
-'''
+"""
 Copyright 2020 The Ray Authors.
 Licensed under the Apache License, Version 2.0 (the "License");
@ -16,12 +16,13 @@ limitations under the License.
 This source file is adapted here because ray does not fully support Windows.
 Copyright (c) Microsoft Corporation.
-'''
+"""
 from typing import Dict, Optional
 import numpy as np
 from .trial import Trial
 import logging
 logger = logging.getLogger(__name__)
@ -30,8 +31,7 @@ def is_nan_or_inf(value):
 class ExperimentAnalysis:
-    """Analyze results from a Tune experiment.
+    """Analyze results from a Tune experiment."""
    """
    @property
    def best_trial(self) -> Trial:
@ -46,7 +46,8 @@ class ExperimentAnalysis:
                "To fetch the `best_trial`, pass a `metric` and `mode` "
                "parameter to `tune.run()`. Alternatively, use the "
                "`get_best_trial(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
            )
        return self.get_best_trial(self.default_metric, self.default_mode)
    @property
@ -62,30 +63,41 @@ class ExperimentAnalysis:
                "To fetch the `best_config`, pass a `metric` and `mode` "
                "parameter to `tune.run()`. Alternatively, use the "
                "`get_best_config(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
            )
        return self.get_best_config(self.default_metric, self.default_mode)
    @property
    def results(self) -> Dict[str, Dict]:
        """Get the last result of all the trials of the experiment"""
        return {trial.trial_id: trial.last_result for trial in self.trials}
    def _validate_metric(self, metric: str) -> str:
        if not metric and not self.default_metric:
            raise ValueError(
                "No `metric` has been passed and  `default_metric` has "
-                "not been set. Please specify the `metric` parameter.")
+                "not been set. Please specify the `metric` parameter."
            )
        return metric or self.default_metric
    def _validate_mode(self, mode: str) -> str:
        if not mode and not self.default_mode:
            raise ValueError(
                "No `mode` has been passed and  `default_mode` has "
-                "not been set. Please specify the `mode` parameter.")
+                "not been set. Please specify the `mode` parameter."
            )
        if mode and mode not in ["min", "max"]:
            raise ValueError("If set, `mode` has to be one of [min, max]")
        return mode or self.default_mode
-    def get_best_trial(self,
+    def get_best_trial(
-                       metric: Optional[str] = None,
+        self,
-                       mode: Optional[str] = None,
+        metric: Optional[str] = None,
-                       scope: str = "last",
+        mode: Optional[str] = None,
-                       filter_nan_and_inf: bool = True) -> Optional[Trial]:
+        scope: str = "last",
        filter_nan_and_inf: bool = True,
    ) -> Optional[Trial]:
        """Retrieve the best trial object.
        Compares all trials' scores on ``metric``.
        If ``metric`` is not specified, ``self.default_metric`` will be used.
@ -116,11 +128,13 @@ class ExperimentAnalysis:
        if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
-                "metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
+                'metric {} for scope {} not in ["all", "last", "avg", '
-                "\"last-5-avg\", \"last-10-avg\"]. "
+                '"last-5-avg", "last-10-avg"]. '
                "If you didn't pass a `metric` parameter to `tune.run()`, "
                "you have to pass one when fetching the best trial.".format(
-                    metric, scope))
+                    metric, scope
                )
            )
        best_trial = None
        best_metric_score = None
        for trial in self.trials:
@ -150,13 +164,16 @@ class ExperimentAnalysis:
        if not best_trial:
            logger.warning(
                "Could not find best trial. Did you pass the correct `metric` "
-                "parameter?")
+                "parameter?"
            )
        return best_trial
-    def get_best_config(self,
+    def get_best_config(
-                        metric: Optional[str] = None,
+        self,
-                        mode: Optional[str] = None,
+        metric: Optional[str] = None,
-                        scope: str = "last") -> Optional[Dict]:
+        mode: Optional[str] = None,
        scope: str = "last",
    ) -> Optional[Dict]:
        """Retrieve the best config corresponding to the trial.
        Compares all trials' scores on `metric`.
        If ``metric`` is not specified, ``self.default_metric`` will be used.
--- a/test/tune/test_flaml_raytune_consistency.py
+++ b/test/tune/test_flaml_raytune_consistency.py
@ -0,0 +1,123 @@
 # import unittest
 import numpy as np
 # require: pip install flaml[blendsearch, ray]
 # require: pip install flaml[ray]
 import time
 from flaml import tune
 def evaluate_config(config):
    """evaluate a hyperparameter configuration"""
    # we uss a toy example with 2 hyperparameters
    metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
    # usually the evaluation takes an non-neglible cost
    # and the cost could be related to certain hyperparameters
    # in this example, we assume it's proportional to x
    time.sleep(config["x"] / 100000)
    # use tune.report to report the metric to optimize
    tune.report(metric=metric)
 config_search_space = {
    "x": tune.lograndint(lower=1, upper=100000),
    "y": tune.randint(lower=1, upper=100000),
 }
 low_cost_partial_config = {"x": 1}
 def setup_searcher(searcher_name):
    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
    if "cfo" in searcher_name:
        searcher = CFO(
            space=config_search_space, low_cost_partial_config=low_cost_partial_config
        )
    elif searcher_name == "bs":
        searcher = BlendSearch(
            metric="metric",
            mode="min",
            space=config_search_space,
            low_cost_partial_config=low_cost_partial_config,
        )
    elif searcher_name == "random":
        searcher = RandomSearch(space=config_search_space)
    else:
        return None
    return searcher
 def _test_flaml_raytune_consistency(
    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
 ):
    try:
        from ray import tune as raytune
    except ImportError:
        print(
            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
        )
        return
    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    analysis = tune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,  # the search space
        low_cost_partial_config=low_cost_partial_config,  # a initial (partial) config with low cost
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
        time_budget_s=None,  # the time budget in seconds
        local_dir="logs/",  # the local directory to store logs
        search_alg=searcher,
        # verbose=0,          # verbosity
        # use_ray=True, # uncomment when performing parallel tuning using ray
    )
    flaml_best_config = analysis.best_config
    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("best flaml", searcher_name, flaml_best_config)  # the best config
    print("flaml config in results", searcher_name, flaml_config_in_results)
    np.random.seed(100)
    searcher = setup_searcher(searcher_name)
    from ray.tune.suggest import ConcurrencyLimiter
    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
    analysis = raytune.run(
        evaluate_config,  # the function to evaluate a config
        config=config_search_space,
        metric="metric",  # the name of the metric used for optimization
        mode="min",  # the optimization mode, 'min' or 'max'
        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
        local_dir="logs/",  # the local directory to store logs
        # max_concurrent_trials=max_concurrent_trials,
        # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
        search_alg=search_alg,
    )
    ray_best_config = analysis.best_config
    ray_config_in_results = [v["config"] for v in analysis.results.values()]
    print(analysis.best_trial.last_result)  # the best trial's result
    print("ray best", searcher_name, analysis.best_config)  # the best config
    print("ray config in results", searcher_name, ray_config_in_results)
    assert ray_best_config == flaml_best_config, "best config should be the same"
    assert (
        flaml_config_in_results == ray_config_in_results
    ), "results from raytune and flaml should be the same"
 def test_consistency():
    _test_flaml_raytune_consistency(
        num_samples=5, max_concurrent_trials=1, searcher_name="random"
    )
    _test_flaml_raytune_consistency(
        num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
    )
    _test_flaml_raytune_consistency(
        num_samples=5, max_concurrent_trials=1, searcher_name="bs"
    )
 if __name__ == "__main__":
    # unittest.main()
    test_consistency()