From b1115d5347e00326d10e5d20836b912bb2cc95e9 Mon Sep 17 00:00:00 2001 From: Qingyun Wu Date: Sun, 19 Sep 2021 20:44:25 -0400 Subject: [PATCH] add consistency test (#216) * add consistency test * test_consistency and format * add results attribute * skip when ray is not installed * Update flaml/tune/analysis.py Co-authored-by: Chi Wang Co-authored-by: Qingyun Wu Co-authored-by: Chi Wang --- flaml/tune/analysis.py | 59 ++++++---- test/tune/test_flaml_raytune_consistency.py | 123 ++++++++++++++++++++ 2 files changed, 161 insertions(+), 21 deletions(-) create mode 100644 test/tune/test_flaml_raytune_consistency.py diff --git a/flaml/tune/analysis.py b/flaml/tune/analysis.py index bd8135b94..320b0d279 100644 --- a/flaml/tune/analysis.py +++ b/flaml/tune/analysis.py @@ -1,4 +1,4 @@ -''' +""" Copyright 2020 The Ray Authors. Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,12 +16,13 @@ limitations under the License. This source file is adapted here because ray does not fully support Windows. Copyright (c) Microsoft Corporation. -''' +""" from typing import Dict, Optional import numpy as np from .trial import Trial import logging + logger = logging.getLogger(__name__) @@ -30,8 +31,7 @@ def is_nan_or_inf(value): class ExperimentAnalysis: - """Analyze results from a Tune experiment. - """ + """Analyze results from a Tune experiment.""" @property def best_trial(self) -> Trial: @@ -46,7 +46,8 @@ class ExperimentAnalysis: "To fetch the `best_trial`, pass a `metric` and `mode` " "parameter to `tune.run()`. Alternatively, use the " "`get_best_trial(metric, mode)` method to set the metric " - "and mode explicitly.") + "and mode explicitly." + ) return self.get_best_trial(self.default_metric, self.default_mode) @property @@ -62,30 +63,41 @@ class ExperimentAnalysis: "To fetch the `best_config`, pass a `metric` and `mode` " "parameter to `tune.run()`. Alternatively, use the " "`get_best_config(metric, mode)` method to set the metric " - "and mode explicitly.") + "and mode explicitly." + ) return self.get_best_config(self.default_metric, self.default_mode) + @property + def results(self) -> Dict[str, Dict]: + """Get the last result of all the trials of the experiment""" + + return {trial.trial_id: trial.last_result for trial in self.trials} + def _validate_metric(self, metric: str) -> str: if not metric and not self.default_metric: raise ValueError( "No `metric` has been passed and `default_metric` has " - "not been set. Please specify the `metric` parameter.") + "not been set. Please specify the `metric` parameter." + ) return metric or self.default_metric def _validate_mode(self, mode: str) -> str: if not mode and not self.default_mode: raise ValueError( "No `mode` has been passed and `default_mode` has " - "not been set. Please specify the `mode` parameter.") + "not been set. Please specify the `mode` parameter." + ) if mode and mode not in ["min", "max"]: raise ValueError("If set, `mode` has to be one of [min, max]") return mode or self.default_mode - def get_best_trial(self, - metric: Optional[str] = None, - mode: Optional[str] = None, - scope: str = "last", - filter_nan_and_inf: bool = True) -> Optional[Trial]: + def get_best_trial( + self, + metric: Optional[str] = None, + mode: Optional[str] = None, + scope: str = "last", + filter_nan_and_inf: bool = True, + ) -> Optional[Trial]: """Retrieve the best trial object. Compares all trials' scores on ``metric``. If ``metric`` is not specified, ``self.default_metric`` will be used. @@ -116,11 +128,13 @@ class ExperimentAnalysis: if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]: raise ValueError( "ExperimentAnalysis: attempting to get best trial for " - "metric {} for scope {} not in [\"all\", \"last\", \"avg\", " - "\"last-5-avg\", \"last-10-avg\"]. " + 'metric {} for scope {} not in ["all", "last", "avg", ' + '"last-5-avg", "last-10-avg"]. ' "If you didn't pass a `metric` parameter to `tune.run()`, " "you have to pass one when fetching the best trial.".format( - metric, scope)) + metric, scope + ) + ) best_trial = None best_metric_score = None for trial in self.trials: @@ -150,13 +164,16 @@ class ExperimentAnalysis: if not best_trial: logger.warning( "Could not find best trial. Did you pass the correct `metric` " - "parameter?") + "parameter?" + ) return best_trial - def get_best_config(self, - metric: Optional[str] = None, - mode: Optional[str] = None, - scope: str = "last") -> Optional[Dict]: + def get_best_config( + self, + metric: Optional[str] = None, + mode: Optional[str] = None, + scope: str = "last", + ) -> Optional[Dict]: """Retrieve the best config corresponding to the trial. Compares all trials' scores on `metric`. If ``metric`` is not specified, ``self.default_metric`` will be used. diff --git a/test/tune/test_flaml_raytune_consistency.py b/test/tune/test_flaml_raytune_consistency.py new file mode 100644 index 000000000..dee393c3a --- /dev/null +++ b/test/tune/test_flaml_raytune_consistency.py @@ -0,0 +1,123 @@ +# import unittest +import numpy as np + +# require: pip install flaml[blendsearch, ray] +# require: pip install flaml[ray] +import time +from flaml import tune + + +def evaluate_config(config): + """evaluate a hyperparameter configuration""" + # we uss a toy example with 2 hyperparameters + metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"] + # usually the evaluation takes an non-neglible cost + # and the cost could be related to certain hyperparameters + # in this example, we assume it's proportional to x + time.sleep(config["x"] / 100000) + # use tune.report to report the metric to optimize + tune.report(metric=metric) + + +config_search_space = { + "x": tune.lograndint(lower=1, upper=100000), + "y": tune.randint(lower=1, upper=100000), +} + +low_cost_partial_config = {"x": 1} + + +def setup_searcher(searcher_name): + from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch + + if "cfo" in searcher_name: + searcher = CFO( + space=config_search_space, low_cost_partial_config=low_cost_partial_config + ) + elif searcher_name == "bs": + searcher = BlendSearch( + metric="metric", + mode="min", + space=config_search_space, + low_cost_partial_config=low_cost_partial_config, + ) + elif searcher_name == "random": + searcher = RandomSearch(space=config_search_space) + else: + return None + return searcher + + +def _test_flaml_raytune_consistency( + num_samples=-1, max_concurrent_trials=1, searcher_name="cfo" +): + try: + from ray import tune as raytune + except ImportError: + print( + "skip _test_flaml_raytune_consistency because ray tune cannot be imported." + ) + return + np.random.seed(100) + searcher = setup_searcher(searcher_name) + analysis = tune.run( + evaluate_config, # the function to evaluate a config + config=config_search_space, # the search space + low_cost_partial_config=low_cost_partial_config, # a initial (partial) config with low cost + metric="metric", # the name of the metric used for optimization + mode="min", # the optimization mode, 'min' or 'max' + num_samples=num_samples, # the maximal number of configs to try, -1 means infinite + time_budget_s=None, # the time budget in seconds + local_dir="logs/", # the local directory to store logs + search_alg=searcher, + # verbose=0, # verbosity + # use_ray=True, # uncomment when performing parallel tuning using ray + ) + flaml_best_config = analysis.best_config + flaml_config_in_results = [v["config"] for v in analysis.results.values()] + print(analysis.best_trial.last_result) # the best trial's result + print("best flaml", searcher_name, flaml_best_config) # the best config + print("flaml config in results", searcher_name, flaml_config_in_results) + + np.random.seed(100) + searcher = setup_searcher(searcher_name) + from ray.tune.suggest import ConcurrencyLimiter + + search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials) + analysis = raytune.run( + evaluate_config, # the function to evaluate a config + config=config_search_space, + metric="metric", # the name of the metric used for optimization + mode="min", # the optimization mode, 'min' or 'max' + num_samples=num_samples, # the maximal number of configs to try, -1 means infinite + local_dir="logs/", # the local directory to store logs + # max_concurrent_trials=max_concurrent_trials, + # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0}, + search_alg=search_alg, + ) + ray_best_config = analysis.best_config + ray_config_in_results = [v["config"] for v in analysis.results.values()] + print(analysis.best_trial.last_result) # the best trial's result + print("ray best", searcher_name, analysis.best_config) # the best config + print("ray config in results", searcher_name, ray_config_in_results) + assert ray_best_config == flaml_best_config, "best config should be the same" + assert ( + flaml_config_in_results == ray_config_in_results + ), "results from raytune and flaml should be the same" + + +def test_consistency(): + _test_flaml_raytune_consistency( + num_samples=5, max_concurrent_trials=1, searcher_name="random" + ) + _test_flaml_raytune_consistency( + num_samples=5, max_concurrent_trials=1, searcher_name="cfo" + ) + _test_flaml_raytune_consistency( + num_samples=5, max_concurrent_trials=1, searcher_name="bs" + ) + + +if __name__ == "__main__": + # unittest.main() + test_consistency()