add consistency test (#216)

* add consistency test

* test_consistency and format

* add results attribute

* skip when ray is not installed

* Update flaml/tune/analysis.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
Qingyun Wu 2021-09-19 20:44:25 -04:00 committed by GitHub
parent f3e50136e8
commit b1115d5347
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 161 additions and 21 deletions

View File

@ -1,4 +1,4 @@
'''
"""
Copyright 2020 The Ray Authors.
Licensed under the Apache License, Version 2.0 (the "License");
@ -16,12 +16,13 @@ limitations under the License.
This source file is adapted here because ray does not fully support Windows.
Copyright (c) Microsoft Corporation.
'''
"""
from typing import Dict, Optional
import numpy as np
from .trial import Trial
import logging
logger = logging.getLogger(__name__)
@ -30,8 +31,7 @@ def is_nan_or_inf(value):
class ExperimentAnalysis:
"""Analyze results from a Tune experiment.
"""
"""Analyze results from a Tune experiment."""
@property
def best_trial(self) -> Trial:
@ -46,7 +46,8 @@ class ExperimentAnalysis:
"To fetch the `best_trial`, pass a `metric` and `mode` "
"parameter to `tune.run()`. Alternatively, use the "
"`get_best_trial(metric, mode)` method to set the metric "
"and mode explicitly.")
"and mode explicitly."
)
return self.get_best_trial(self.default_metric, self.default_mode)
@property
@ -62,30 +63,41 @@ class ExperimentAnalysis:
"To fetch the `best_config`, pass a `metric` and `mode` "
"parameter to `tune.run()`. Alternatively, use the "
"`get_best_config(metric, mode)` method to set the metric "
"and mode explicitly.")
"and mode explicitly."
)
return self.get_best_config(self.default_metric, self.default_mode)
@property
def results(self) -> Dict[str, Dict]:
"""Get the last result of all the trials of the experiment"""
return {trial.trial_id: trial.last_result for trial in self.trials}
def _validate_metric(self, metric: str) -> str:
if not metric and not self.default_metric:
raise ValueError(
"No `metric` has been passed and `default_metric` has "
"not been set. Please specify the `metric` parameter.")
"not been set. Please specify the `metric` parameter."
)
return metric or self.default_metric
def _validate_mode(self, mode: str) -> str:
if not mode and not self.default_mode:
raise ValueError(
"No `mode` has been passed and `default_mode` has "
"not been set. Please specify the `mode` parameter.")
"not been set. Please specify the `mode` parameter."
)
if mode and mode not in ["min", "max"]:
raise ValueError("If set, `mode` has to be one of [min, max]")
return mode or self.default_mode
def get_best_trial(self,
metric: Optional[str] = None,
mode: Optional[str] = None,
scope: str = "last",
filter_nan_and_inf: bool = True) -> Optional[Trial]:
def get_best_trial(
self,
metric: Optional[str] = None,
mode: Optional[str] = None,
scope: str = "last",
filter_nan_and_inf: bool = True,
) -> Optional[Trial]:
"""Retrieve the best trial object.
Compares all trials' scores on ``metric``.
If ``metric`` is not specified, ``self.default_metric`` will be used.
@ -116,11 +128,13 @@ class ExperimentAnalysis:
if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
raise ValueError(
"ExperimentAnalysis: attempting to get best trial for "
"metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
"\"last-5-avg\", \"last-10-avg\"]. "
'metric {} for scope {} not in ["all", "last", "avg", '
'"last-5-avg", "last-10-avg"]. '
"If you didn't pass a `metric` parameter to `tune.run()`, "
"you have to pass one when fetching the best trial.".format(
metric, scope))
metric, scope
)
)
best_trial = None
best_metric_score = None
for trial in self.trials:
@ -150,13 +164,16 @@ class ExperimentAnalysis:
if not best_trial:
logger.warning(
"Could not find best trial. Did you pass the correct `metric` "
"parameter?")
"parameter?"
)
return best_trial
def get_best_config(self,
metric: Optional[str] = None,
mode: Optional[str] = None,
scope: str = "last") -> Optional[Dict]:
def get_best_config(
self,
metric: Optional[str] = None,
mode: Optional[str] = None,
scope: str = "last",
) -> Optional[Dict]:
"""Retrieve the best config corresponding to the trial.
Compares all trials' scores on `metric`.
If ``metric`` is not specified, ``self.default_metric`` will be used.

View File

@ -0,0 +1,123 @@
# import unittest
import numpy as np
# require: pip install flaml[blendsearch, ray]
# require: pip install flaml[ray]
import time
from flaml import tune
def evaluate_config(config):
"""evaluate a hyperparameter configuration"""
# we uss a toy example with 2 hyperparameters
metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
# usually the evaluation takes an non-neglible cost
# and the cost could be related to certain hyperparameters
# in this example, we assume it's proportional to x
time.sleep(config["x"] / 100000)
# use tune.report to report the metric to optimize
tune.report(metric=metric)
config_search_space = {
"x": tune.lograndint(lower=1, upper=100000),
"y": tune.randint(lower=1, upper=100000),
}
low_cost_partial_config = {"x": 1}
def setup_searcher(searcher_name):
from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
if "cfo" in searcher_name:
searcher = CFO(
space=config_search_space, low_cost_partial_config=low_cost_partial_config
)
elif searcher_name == "bs":
searcher = BlendSearch(
metric="metric",
mode="min",
space=config_search_space,
low_cost_partial_config=low_cost_partial_config,
)
elif searcher_name == "random":
searcher = RandomSearch(space=config_search_space)
else:
return None
return searcher
def _test_flaml_raytune_consistency(
num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
):
try:
from ray import tune as raytune
except ImportError:
print(
"skip _test_flaml_raytune_consistency because ray tune cannot be imported."
)
return
np.random.seed(100)
searcher = setup_searcher(searcher_name)
analysis = tune.run(
evaluate_config, # the function to evaluate a config
config=config_search_space, # the search space
low_cost_partial_config=low_cost_partial_config, # a initial (partial) config with low cost
metric="metric", # the name of the metric used for optimization
mode="min", # the optimization mode, 'min' or 'max'
num_samples=num_samples, # the maximal number of configs to try, -1 means infinite
time_budget_s=None, # the time budget in seconds
local_dir="logs/", # the local directory to store logs
search_alg=searcher,
# verbose=0, # verbosity
# use_ray=True, # uncomment when performing parallel tuning using ray
)
flaml_best_config = analysis.best_config
flaml_config_in_results = [v["config"] for v in analysis.results.values()]
print(analysis.best_trial.last_result) # the best trial's result
print("best flaml", searcher_name, flaml_best_config) # the best config
print("flaml config in results", searcher_name, flaml_config_in_results)
np.random.seed(100)
searcher = setup_searcher(searcher_name)
from ray.tune.suggest import ConcurrencyLimiter
search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
analysis = raytune.run(
evaluate_config, # the function to evaluate a config
config=config_search_space,
metric="metric", # the name of the metric used for optimization
mode="min", # the optimization mode, 'min' or 'max'
num_samples=num_samples, # the maximal number of configs to try, -1 means infinite
local_dir="logs/", # the local directory to store logs
# max_concurrent_trials=max_concurrent_trials,
# resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
search_alg=search_alg,
)
ray_best_config = analysis.best_config
ray_config_in_results = [v["config"] for v in analysis.results.values()]
print(analysis.best_trial.last_result) # the best trial's result
print("ray best", searcher_name, analysis.best_config) # the best config
print("ray config in results", searcher_name, ray_config_in_results)
assert ray_best_config == flaml_best_config, "best config should be the same"
assert (
flaml_config_in_results == ray_config_in_results
), "results from raytune and flaml should be the same"
def test_consistency():
_test_flaml_raytune_consistency(
num_samples=5, max_concurrent_trials=1, searcher_name="random"
)
_test_flaml_raytune_consistency(
num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
)
_test_flaml_raytune_consistency(
num_samples=5, max_concurrent_trials=1, searcher_name="bs"
)
if __name__ == "__main__":
# unittest.main()
test_consistency()