mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-03 19:29:52 +00:00
add consistency test (#216)
* add consistency test * test_consistency and format * add results attribute * skip when ray is not installed * Update flaml/tune/analysis.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qxw5138@psu.edu> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
parent
f3e50136e8
commit
b1115d5347
@ -1,4 +1,4 @@
|
|||||||
'''
|
"""
|
||||||
Copyright 2020 The Ray Authors.
|
Copyright 2020 The Ray Authors.
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -16,12 +16,13 @@ limitations under the License.
|
|||||||
This source file is adapted here because ray does not fully support Windows.
|
This source file is adapted here because ray does not fully support Windows.
|
||||||
|
|
||||||
Copyright (c) Microsoft Corporation.
|
Copyright (c) Microsoft Corporation.
|
||||||
'''
|
"""
|
||||||
from typing import Dict, Optional
|
from typing import Dict, Optional
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .trial import Trial
|
from .trial import Trial
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@ -30,8 +31,7 @@ def is_nan_or_inf(value):
|
|||||||
|
|
||||||
|
|
||||||
class ExperimentAnalysis:
|
class ExperimentAnalysis:
|
||||||
"""Analyze results from a Tune experiment.
|
"""Analyze results from a Tune experiment."""
|
||||||
"""
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def best_trial(self) -> Trial:
|
def best_trial(self) -> Trial:
|
||||||
@ -46,7 +46,8 @@ class ExperimentAnalysis:
|
|||||||
"To fetch the `best_trial`, pass a `metric` and `mode` "
|
"To fetch the `best_trial`, pass a `metric` and `mode` "
|
||||||
"parameter to `tune.run()`. Alternatively, use the "
|
"parameter to `tune.run()`. Alternatively, use the "
|
||||||
"`get_best_trial(metric, mode)` method to set the metric "
|
"`get_best_trial(metric, mode)` method to set the metric "
|
||||||
"and mode explicitly.")
|
"and mode explicitly."
|
||||||
|
)
|
||||||
return self.get_best_trial(self.default_metric, self.default_mode)
|
return self.get_best_trial(self.default_metric, self.default_mode)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -62,30 +63,41 @@ class ExperimentAnalysis:
|
|||||||
"To fetch the `best_config`, pass a `metric` and `mode` "
|
"To fetch the `best_config`, pass a `metric` and `mode` "
|
||||||
"parameter to `tune.run()`. Alternatively, use the "
|
"parameter to `tune.run()`. Alternatively, use the "
|
||||||
"`get_best_config(metric, mode)` method to set the metric "
|
"`get_best_config(metric, mode)` method to set the metric "
|
||||||
"and mode explicitly.")
|
"and mode explicitly."
|
||||||
|
)
|
||||||
return self.get_best_config(self.default_metric, self.default_mode)
|
return self.get_best_config(self.default_metric, self.default_mode)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def results(self) -> Dict[str, Dict]:
|
||||||
|
"""Get the last result of all the trials of the experiment"""
|
||||||
|
|
||||||
|
return {trial.trial_id: trial.last_result for trial in self.trials}
|
||||||
|
|
||||||
def _validate_metric(self, metric: str) -> str:
|
def _validate_metric(self, metric: str) -> str:
|
||||||
if not metric and not self.default_metric:
|
if not metric and not self.default_metric:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No `metric` has been passed and `default_metric` has "
|
"No `metric` has been passed and `default_metric` has "
|
||||||
"not been set. Please specify the `metric` parameter.")
|
"not been set. Please specify the `metric` parameter."
|
||||||
|
)
|
||||||
return metric or self.default_metric
|
return metric or self.default_metric
|
||||||
|
|
||||||
def _validate_mode(self, mode: str) -> str:
|
def _validate_mode(self, mode: str) -> str:
|
||||||
if not mode and not self.default_mode:
|
if not mode and not self.default_mode:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"No `mode` has been passed and `default_mode` has "
|
"No `mode` has been passed and `default_mode` has "
|
||||||
"not been set. Please specify the `mode` parameter.")
|
"not been set. Please specify the `mode` parameter."
|
||||||
|
)
|
||||||
if mode and mode not in ["min", "max"]:
|
if mode and mode not in ["min", "max"]:
|
||||||
raise ValueError("If set, `mode` has to be one of [min, max]")
|
raise ValueError("If set, `mode` has to be one of [min, max]")
|
||||||
return mode or self.default_mode
|
return mode or self.default_mode
|
||||||
|
|
||||||
def get_best_trial(self,
|
def get_best_trial(
|
||||||
metric: Optional[str] = None,
|
self,
|
||||||
mode: Optional[str] = None,
|
metric: Optional[str] = None,
|
||||||
scope: str = "last",
|
mode: Optional[str] = None,
|
||||||
filter_nan_and_inf: bool = True) -> Optional[Trial]:
|
scope: str = "last",
|
||||||
|
filter_nan_and_inf: bool = True,
|
||||||
|
) -> Optional[Trial]:
|
||||||
"""Retrieve the best trial object.
|
"""Retrieve the best trial object.
|
||||||
Compares all trials' scores on ``metric``.
|
Compares all trials' scores on ``metric``.
|
||||||
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
||||||
@ -116,11 +128,13 @@ class ExperimentAnalysis:
|
|||||||
if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
|
if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"ExperimentAnalysis: attempting to get best trial for "
|
"ExperimentAnalysis: attempting to get best trial for "
|
||||||
"metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
|
'metric {} for scope {} not in ["all", "last", "avg", '
|
||||||
"\"last-5-avg\", \"last-10-avg\"]. "
|
'"last-5-avg", "last-10-avg"]. '
|
||||||
"If you didn't pass a `metric` parameter to `tune.run()`, "
|
"If you didn't pass a `metric` parameter to `tune.run()`, "
|
||||||
"you have to pass one when fetching the best trial.".format(
|
"you have to pass one when fetching the best trial.".format(
|
||||||
metric, scope))
|
metric, scope
|
||||||
|
)
|
||||||
|
)
|
||||||
best_trial = None
|
best_trial = None
|
||||||
best_metric_score = None
|
best_metric_score = None
|
||||||
for trial in self.trials:
|
for trial in self.trials:
|
||||||
@ -150,13 +164,16 @@ class ExperimentAnalysis:
|
|||||||
if not best_trial:
|
if not best_trial:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Could not find best trial. Did you pass the correct `metric` "
|
"Could not find best trial. Did you pass the correct `metric` "
|
||||||
"parameter?")
|
"parameter?"
|
||||||
|
)
|
||||||
return best_trial
|
return best_trial
|
||||||
|
|
||||||
def get_best_config(self,
|
def get_best_config(
|
||||||
metric: Optional[str] = None,
|
self,
|
||||||
mode: Optional[str] = None,
|
metric: Optional[str] = None,
|
||||||
scope: str = "last") -> Optional[Dict]:
|
mode: Optional[str] = None,
|
||||||
|
scope: str = "last",
|
||||||
|
) -> Optional[Dict]:
|
||||||
"""Retrieve the best config corresponding to the trial.
|
"""Retrieve the best config corresponding to the trial.
|
||||||
Compares all trials' scores on `metric`.
|
Compares all trials' scores on `metric`.
|
||||||
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
If ``metric`` is not specified, ``self.default_metric`` will be used.
|
||||||
|
|||||||
123
test/tune/test_flaml_raytune_consistency.py
Normal file
123
test/tune/test_flaml_raytune_consistency.py
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
# import unittest
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
# require: pip install flaml[blendsearch, ray]
|
||||||
|
# require: pip install flaml[ray]
|
||||||
|
import time
|
||||||
|
from flaml import tune
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate_config(config):
|
||||||
|
"""evaluate a hyperparameter configuration"""
|
||||||
|
# we uss a toy example with 2 hyperparameters
|
||||||
|
metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
|
||||||
|
# usually the evaluation takes an non-neglible cost
|
||||||
|
# and the cost could be related to certain hyperparameters
|
||||||
|
# in this example, we assume it's proportional to x
|
||||||
|
time.sleep(config["x"] / 100000)
|
||||||
|
# use tune.report to report the metric to optimize
|
||||||
|
tune.report(metric=metric)
|
||||||
|
|
||||||
|
|
||||||
|
config_search_space = {
|
||||||
|
"x": tune.lograndint(lower=1, upper=100000),
|
||||||
|
"y": tune.randint(lower=1, upper=100000),
|
||||||
|
}
|
||||||
|
|
||||||
|
low_cost_partial_config = {"x": 1}
|
||||||
|
|
||||||
|
|
||||||
|
def setup_searcher(searcher_name):
|
||||||
|
from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
|
||||||
|
|
||||||
|
if "cfo" in searcher_name:
|
||||||
|
searcher = CFO(
|
||||||
|
space=config_search_space, low_cost_partial_config=low_cost_partial_config
|
||||||
|
)
|
||||||
|
elif searcher_name == "bs":
|
||||||
|
searcher = BlendSearch(
|
||||||
|
metric="metric",
|
||||||
|
mode="min",
|
||||||
|
space=config_search_space,
|
||||||
|
low_cost_partial_config=low_cost_partial_config,
|
||||||
|
)
|
||||||
|
elif searcher_name == "random":
|
||||||
|
searcher = RandomSearch(space=config_search_space)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
return searcher
|
||||||
|
|
||||||
|
|
||||||
|
def _test_flaml_raytune_consistency(
|
||||||
|
num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
|
||||||
|
):
|
||||||
|
try:
|
||||||
|
from ray import tune as raytune
|
||||||
|
except ImportError:
|
||||||
|
print(
|
||||||
|
"skip _test_flaml_raytune_consistency because ray tune cannot be imported."
|
||||||
|
)
|
||||||
|
return
|
||||||
|
np.random.seed(100)
|
||||||
|
searcher = setup_searcher(searcher_name)
|
||||||
|
analysis = tune.run(
|
||||||
|
evaluate_config, # the function to evaluate a config
|
||||||
|
config=config_search_space, # the search space
|
||||||
|
low_cost_partial_config=low_cost_partial_config, # a initial (partial) config with low cost
|
||||||
|
metric="metric", # the name of the metric used for optimization
|
||||||
|
mode="min", # the optimization mode, 'min' or 'max'
|
||||||
|
num_samples=num_samples, # the maximal number of configs to try, -1 means infinite
|
||||||
|
time_budget_s=None, # the time budget in seconds
|
||||||
|
local_dir="logs/", # the local directory to store logs
|
||||||
|
search_alg=searcher,
|
||||||
|
# verbose=0, # verbosity
|
||||||
|
# use_ray=True, # uncomment when performing parallel tuning using ray
|
||||||
|
)
|
||||||
|
flaml_best_config = analysis.best_config
|
||||||
|
flaml_config_in_results = [v["config"] for v in analysis.results.values()]
|
||||||
|
print(analysis.best_trial.last_result) # the best trial's result
|
||||||
|
print("best flaml", searcher_name, flaml_best_config) # the best config
|
||||||
|
print("flaml config in results", searcher_name, flaml_config_in_results)
|
||||||
|
|
||||||
|
np.random.seed(100)
|
||||||
|
searcher = setup_searcher(searcher_name)
|
||||||
|
from ray.tune.suggest import ConcurrencyLimiter
|
||||||
|
|
||||||
|
search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
|
||||||
|
analysis = raytune.run(
|
||||||
|
evaluate_config, # the function to evaluate a config
|
||||||
|
config=config_search_space,
|
||||||
|
metric="metric", # the name of the metric used for optimization
|
||||||
|
mode="min", # the optimization mode, 'min' or 'max'
|
||||||
|
num_samples=num_samples, # the maximal number of configs to try, -1 means infinite
|
||||||
|
local_dir="logs/", # the local directory to store logs
|
||||||
|
# max_concurrent_trials=max_concurrent_trials,
|
||||||
|
# resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
|
||||||
|
search_alg=search_alg,
|
||||||
|
)
|
||||||
|
ray_best_config = analysis.best_config
|
||||||
|
ray_config_in_results = [v["config"] for v in analysis.results.values()]
|
||||||
|
print(analysis.best_trial.last_result) # the best trial's result
|
||||||
|
print("ray best", searcher_name, analysis.best_config) # the best config
|
||||||
|
print("ray config in results", searcher_name, ray_config_in_results)
|
||||||
|
assert ray_best_config == flaml_best_config, "best config should be the same"
|
||||||
|
assert (
|
||||||
|
flaml_config_in_results == ray_config_in_results
|
||||||
|
), "results from raytune and flaml should be the same"
|
||||||
|
|
||||||
|
|
||||||
|
def test_consistency():
|
||||||
|
_test_flaml_raytune_consistency(
|
||||||
|
num_samples=5, max_concurrent_trials=1, searcher_name="random"
|
||||||
|
)
|
||||||
|
_test_flaml_raytune_consistency(
|
||||||
|
num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
|
||||||
|
)
|
||||||
|
_test_flaml_raytune_consistency(
|
||||||
|
num_samples=5, max_concurrent_trials=1, searcher_name="bs"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# unittest.main()
|
||||||
|
test_consistency()
|
||||||
Loading…
x
Reference in New Issue
Block a user