mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-11 15:24:16 +00:00
handle non-flaml scheduler in flaml.tune (#532)
* handle non-flaml scheduler in flaml.tune * revise time budget * Update website/docs/Use-Cases/Tune-User-Defined-Function.md Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update website/docs/Use-Cases/Tune-User-Defined-Function.md Co-authored-by: Chi Wang <wang.chi@microsoft.com> * Update flaml/tune/tune.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * add docstr * remove random seed * StopIteration * StopIteration format * format * Update flaml/tune/tune.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * revise docstr Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
parent
c1e1299855
commit
bcdfdc8735
@ -47,6 +47,7 @@ class ExperimentAnalysis(EA):
|
|||||||
|
|
||||||
|
|
||||||
def report(_metric=None, **kwargs):
|
def report(_metric=None, **kwargs):
|
||||||
|
|
||||||
"""A function called by the HPO application to report final or intermediate
|
"""A function called by the HPO application to report final or intermediate
|
||||||
results.
|
results.
|
||||||
|
|
||||||
@ -78,6 +79,10 @@ def report(_metric=None, **kwargs):
|
|||||||
_metric: Optional default anonymous metric for ``tune.report(value)``.
|
_metric: Optional default anonymous metric for ``tune.report(value)``.
|
||||||
(For compatibility with ray.tune.report)
|
(For compatibility with ray.tune.report)
|
||||||
**kwargs: Any key value pair to be reported.
|
**kwargs: Any key value pair to be reported.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
StopIteration (when not using ray, i.e., _use_ray=False):
|
||||||
|
A StopIteration exception is raised if the trial has been signaled to stop.
|
||||||
"""
|
"""
|
||||||
global _use_ray
|
global _use_ray
|
||||||
global _verbose
|
global _verbose
|
||||||
@ -109,9 +114,7 @@ def report(_metric=None, **kwargs):
|
|||||||
if _verbose > 2:
|
if _verbose > 2:
|
||||||
logger.info(f"result: {result}")
|
logger.info(f"result: {result}")
|
||||||
if trial.is_finished():
|
if trial.is_finished():
|
||||||
return None
|
raise StopIteration
|
||||||
else:
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def run(
|
def run(
|
||||||
@ -223,7 +226,7 @@ def run(
|
|||||||
reduction_factor: A float of the reduction factor used for incremental
|
reduction_factor: A float of the reduction factor used for incremental
|
||||||
pruning.
|
pruning.
|
||||||
scheduler: A scheduler for executing the experiment. Can be None, 'flaml',
|
scheduler: A scheduler for executing the experiment. Can be None, 'flaml',
|
||||||
'asha' or a custom instance of the TrialScheduler class. Default is None:
|
'asha' (or 'async_hyperband', 'asynchyperband') or a custom instance of the TrialScheduler class. Default is None:
|
||||||
in this case when resource_attr is provided, the 'flaml' scheduler will be
|
in this case when resource_attr is provided, the 'flaml' scheduler will be
|
||||||
used, otherwise no scheduler will be used. When set 'flaml', an
|
used, otherwise no scheduler will be used. When set 'flaml', an
|
||||||
authentic scheduler implemented in FLAML will be used. It does not
|
authentic scheduler implemented in FLAML will be used. It does not
|
||||||
@ -236,9 +239,22 @@ def run(
|
|||||||
respectively. You can also provide a self-defined scheduler instance
|
respectively. You can also provide a self-defined scheduler instance
|
||||||
of the TrialScheduler class. When 'asha' or self-defined scheduler is
|
of the TrialScheduler class. When 'asha' or self-defined scheduler is
|
||||||
used, you usually need to report intermediate results in the evaluation
|
used, you usually need to report intermediate results in the evaluation
|
||||||
function. Please find examples using different types of schedulers
|
function via 'tune.report()'. In addition, when 'use_ray' is not enabled,
|
||||||
|
you also need to stop the evaluation function by explicitly catching the
|
||||||
|
`StopIteration` exception, as shown in the following example.
|
||||||
|
Please find more examples using different types of schedulers
|
||||||
and how to set up the corresponding evaluation functions in
|
and how to set up the corresponding evaluation functions in
|
||||||
test/tune/test_scheduler.py. TODO: point to notebook examples.
|
test/tune/test_scheduler.py, and test/tune/example_scheduler.py.
|
||||||
|
```python
|
||||||
|
def easy_objective(config):
|
||||||
|
width, height = config["width"], config["height"]
|
||||||
|
for step in range(config["steps"]):
|
||||||
|
intermediate_score = evaluation_fn(step, width, height)
|
||||||
|
try:
|
||||||
|
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
```
|
||||||
search_alg: An instance of BlendSearch as the search algorithm
|
search_alg: An instance of BlendSearch as the search algorithm
|
||||||
to be used. The same instance can be used for iterative tuning.
|
to be used. The same instance can be used for iterative tuning.
|
||||||
e.g.,
|
e.g.,
|
||||||
@ -316,8 +332,7 @@ def run(
|
|||||||
flaml_scheduler_min_resource
|
flaml_scheduler_min_resource
|
||||||
) = flaml_scheduler_max_resource = flaml_scheduler_reduction_factor = None
|
) = flaml_scheduler_max_resource = flaml_scheduler_reduction_factor = None
|
||||||
if scheduler in (None, "flaml"):
|
if scheduler in (None, "flaml"):
|
||||||
|
# when scheduler is set 'flaml' or None, we will use a scheduler that is
|
||||||
# when scheduler is set 'flaml', we will use a scheduler that is
|
|
||||||
# authentic to the search algorithms in flaml. After setting up
|
# authentic to the search algorithms in flaml. After setting up
|
||||||
# the search algorithm accordingly, we need to set scheduler to
|
# the search algorithm accordingly, we need to set scheduler to
|
||||||
# None in case it is later used in the trial runner.
|
# None in case it is later used in the trial runner.
|
||||||
@ -388,7 +403,7 @@ def run(
|
|||||||
searcher.set_search_properties(metric, mode, config, setting)
|
searcher.set_search_properties(metric, mode, config, setting)
|
||||||
else:
|
else:
|
||||||
searcher.set_search_properties(metric, mode, config)
|
searcher.set_search_properties(metric, mode, config)
|
||||||
if scheduler == "asha":
|
if scheduler in ("asha", "asynchyperband", "async_hyperband"):
|
||||||
params = {}
|
params = {}
|
||||||
# scheduler resource_dimension=resource_attr
|
# scheduler resource_dimension=resource_attr
|
||||||
if resource_attr:
|
if resource_attr:
|
||||||
|
|||||||
107
test/tune/example_scheduler.py
Normal file
107
test/tune/example_scheduler.py
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
from functools import partial
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def evaluation_fn(step, width, height):
|
||||||
|
return (0.1 + width * step / 100) ** (-1) + height * 0.1
|
||||||
|
|
||||||
|
|
||||||
|
def easy_objective(use_raytune, config):
|
||||||
|
if use_raytune:
|
||||||
|
from ray import tune
|
||||||
|
else:
|
||||||
|
from flaml import tune
|
||||||
|
# Hyperparameters
|
||||||
|
width, height = config["width"], config["height"]
|
||||||
|
|
||||||
|
for step in range(config["steps"]):
|
||||||
|
# Iterative training function - can be any arbitrary training procedure
|
||||||
|
intermediate_score = evaluation_fn(step, width, height)
|
||||||
|
# Feed the score back back to Tune.
|
||||||
|
try:
|
||||||
|
tune.report(iterations=step, mean_loss=intermediate_score)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def test_tune_scheduler(smoke_test=True, use_ray=True, use_raytune=False):
|
||||||
|
import numpy as np
|
||||||
|
from flaml.searcher.blendsearch import BlendSearch
|
||||||
|
|
||||||
|
np.random.seed(100)
|
||||||
|
easy_objective_custom_tune = partial(easy_objective, use_raytune)
|
||||||
|
if use_raytune:
|
||||||
|
try:
|
||||||
|
from ray import tune
|
||||||
|
except ImportError:
|
||||||
|
print("ray[tune] is not installed, skipping test")
|
||||||
|
return
|
||||||
|
searcher = BlendSearch(
|
||||||
|
space={
|
||||||
|
"steps": 100,
|
||||||
|
"width": tune.uniform(0, 20),
|
||||||
|
"height": tune.uniform(-100, 100),
|
||||||
|
# This is an ignored parameter.
|
||||||
|
"activation": tune.choice(["relu", "tanh"]),
|
||||||
|
"test4": np.zeros((3, 1)),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
analysis = tune.run(
|
||||||
|
easy_objective_custom_tune,
|
||||||
|
search_alg=searcher,
|
||||||
|
metric="mean_loss",
|
||||||
|
mode="min",
|
||||||
|
num_samples=10 if smoke_test else 100,
|
||||||
|
scheduler="asynchyperband",
|
||||||
|
config={
|
||||||
|
"steps": 100,
|
||||||
|
"width": tune.uniform(0, 20),
|
||||||
|
"height": tune.uniform(-100, 100),
|
||||||
|
# This is an ignored parameter.
|
||||||
|
"activation": tune.choice(["relu", "tanh"]),
|
||||||
|
"test4": np.zeros((3, 1)),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
from flaml import tune
|
||||||
|
|
||||||
|
searcher = BlendSearch(
|
||||||
|
space={
|
||||||
|
"steps": 100,
|
||||||
|
"width": tune.uniform(0, 20),
|
||||||
|
"height": tune.uniform(-100, 100),
|
||||||
|
# This is an ignored parameter.
|
||||||
|
"activation": tune.choice(["relu", "tanh"]),
|
||||||
|
"test4": np.zeros((3, 1)),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
analysis = tune.run(
|
||||||
|
easy_objective_custom_tune,
|
||||||
|
search_alg=searcher,
|
||||||
|
metric="mean_loss",
|
||||||
|
mode="min",
|
||||||
|
num_samples=10 if smoke_test else 100,
|
||||||
|
scheduler="asynchyperband",
|
||||||
|
resource_attr="iterations",
|
||||||
|
max_resource=99,
|
||||||
|
# min_resource=1,
|
||||||
|
# reduction_factor=4,
|
||||||
|
config={
|
||||||
|
"steps": 100,
|
||||||
|
"width": tune.uniform(0, 20),
|
||||||
|
"height": tune.uniform(-100, 100),
|
||||||
|
# This is an ignored parameter.
|
||||||
|
"activation": tune.choice(["relu", "tanh"]),
|
||||||
|
"test4": np.zeros((3, 1)),
|
||||||
|
},
|
||||||
|
use_ray=use_ray,
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Best hyperparameters found were: ", analysis.best_config)
|
||||||
|
print("best results", analysis.best_result)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_tune_scheduler(smoke_test=True, use_ray=True, use_raytune=True)
|
||||||
|
test_tune_scheduler(smoke_test=True, use_ray=True)
|
||||||
|
test_tune_scheduler(smoke_test=True, use_ray=False)
|
||||||
@ -58,7 +58,6 @@ def _test_flaml_raytune_consistency(
|
|||||||
"skip _test_flaml_raytune_consistency because ray tune cannot be imported."
|
"skip _test_flaml_raytune_consistency because ray tune cannot be imported."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
np.random.seed(100)
|
|
||||||
searcher = setup_searcher(searcher_name)
|
searcher = setup_searcher(searcher_name)
|
||||||
analysis = tune.run(
|
analysis = tune.run(
|
||||||
evaluate_config, # the function to evaluate a config
|
evaluate_config, # the function to evaluate a config
|
||||||
@ -78,7 +77,6 @@ def _test_flaml_raytune_consistency(
|
|||||||
flaml_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
|
flaml_time_in_results = [v["time_total_s"] for v in analysis.results.values()]
|
||||||
print(analysis.best_trial.last_result) # the best trial's result
|
print(analysis.best_trial.last_result) # the best trial's result
|
||||||
|
|
||||||
np.random.seed(100)
|
|
||||||
searcher = setup_searcher(searcher_name)
|
searcher = setup_searcher(searcher_name)
|
||||||
from ray.tune.suggest import ConcurrencyLimiter
|
from ray.tune.suggest import ConcurrencyLimiter
|
||||||
|
|
||||||
|
|||||||
@ -15,7 +15,7 @@ def rand_vector_unit_sphere(dim):
|
|||||||
return vec / mag
|
return vec / mag
|
||||||
|
|
||||||
|
|
||||||
def simple_obj(config, resource=10000):
|
def simple_obj(resource, config):
|
||||||
config_value_vector = np.array([config["x"], config["y"], config["z"]])
|
config_value_vector = np.array([config["x"], config["y"], config["z"]])
|
||||||
score_sequence = []
|
score_sequence = []
|
||||||
for i in range(resource):
|
for i in range(resource):
|
||||||
@ -41,23 +41,29 @@ def obj_w_intermediate_report(resource, config):
|
|||||||
score_avg = np.mean(np.array(score_sequence))
|
score_avg = np.mean(np.array(score_sequence))
|
||||||
score_std = np.std(np.array(score_sequence))
|
score_std = np.std(np.array(score_sequence))
|
||||||
score_lb = score_avg - 1.96 * score_std / np.sqrt(i + 1)
|
score_lb = score_avg - 1.96 * score_std / np.sqrt(i + 1)
|
||||||
tune.report(samplesize=i + 1, sphere_projection=score_lb)
|
try:
|
||||||
|
tune.report(samplesize=i + 1, sphere_projection=score_lb)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def obj_w_suggested_resource(resource_attr, config):
|
def obj_w_suggested_resource(resource_attr, config):
|
||||||
resource = config[resource_attr]
|
resource = config[resource_attr]
|
||||||
simple_obj(config, resource)
|
simple_obj(resource, config)
|
||||||
|
|
||||||
|
|
||||||
def test_scheduler(scheduler=None):
|
def test_scheduler(scheduler=None, use_ray=False, time_budget_s=1):
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
resource_attr = "samplesize"
|
resource_attr = "samplesize"
|
||||||
max_resource = 10000
|
max_resource = 10000
|
||||||
|
min_resource = 1000
|
||||||
|
reduction_factor = 2
|
||||||
|
time_budget_s = time_budget_s
|
||||||
# specify the objective functions
|
# specify the objective functions
|
||||||
if scheduler is None:
|
if scheduler is None:
|
||||||
evaluation_obj = simple_obj
|
evaluation_obj = partial(simple_obj, max_resource)
|
||||||
|
min_resource = max_resource = reduction_factor = None
|
||||||
elif scheduler == "flaml":
|
elif scheduler == "flaml":
|
||||||
evaluation_obj = partial(obj_w_suggested_resource, resource_attr)
|
evaluation_obj = partial(obj_w_suggested_resource, resource_attr)
|
||||||
elif scheduler == "asha" or isinstance(scheduler, TrialScheduler):
|
elif scheduler == "asha" or isinstance(scheduler, TrialScheduler):
|
||||||
@ -89,14 +95,17 @@ def test_scheduler(scheduler=None):
|
|||||||
resource_attr=resource_attr,
|
resource_attr=resource_attr,
|
||||||
scheduler=scheduler,
|
scheduler=scheduler,
|
||||||
max_resource=max_resource,
|
max_resource=max_resource,
|
||||||
min_resource=100,
|
min_resource=min_resource,
|
||||||
reduction_factor=2,
|
reduction_factor=reduction_factor,
|
||||||
time_budget_s=1,
|
time_budget_s=time_budget_s,
|
||||||
num_samples=500,
|
num_samples=500,
|
||||||
|
use_ray=use_ray,
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Best hyperparameters found were: ", analysis.best_config)
|
print("Best hyperparameters found were: ", analysis.best_config)
|
||||||
# print(analysis.get_best_trial)
|
print(
|
||||||
|
f"{len(analysis.results)} trials finished \
|
||||||
|
in {time_budget_s} seconds with {str(scheduler)} scheduler"
|
||||||
|
)
|
||||||
return analysis.best_config
|
return analysis.best_config
|
||||||
|
|
||||||
|
|
||||||
@ -105,13 +114,15 @@ def test_no_scheduler():
|
|||||||
print("No scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
|
print("No scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
|
||||||
|
|
||||||
|
|
||||||
def test_asha_scheduler():
|
def test_asha_scheduler(use_ray=False, time_budget_s=1):
|
||||||
try:
|
try:
|
||||||
from ray.tune.schedulers import ASHAScheduler
|
from ray.tune.schedulers import ASHAScheduler
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("skip the test as ray tune cannot be imported.")
|
print("skip the test as ray tune cannot be imported.")
|
||||||
return
|
return
|
||||||
best_config = test_scheduler(scheduler="asha")
|
best_config = test_scheduler(
|
||||||
|
scheduler="asha", use_ray=use_ray, time_budget_s=time_budget_s
|
||||||
|
)
|
||||||
print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
|
print("Auto ASHA scheduler, test error:", abs(10 / 2 - best_config["z"] / 2))
|
||||||
|
|
||||||
|
|
||||||
@ -150,6 +161,7 @@ def test_flaml_scheduler():
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_no_scheduler()
|
test_no_scheduler()
|
||||||
test_asha_scheduler()
|
test_asha_scheduler()
|
||||||
|
test_asha_scheduler(use_ray=True, time_budget_s=3)
|
||||||
test_custom_scheduler()
|
test_custom_scheduler()
|
||||||
test_custom_scheduler_default_time_attr()
|
test_custom_scheduler_default_time_attr()
|
||||||
test_flaml_scheduler()
|
test_flaml_scheduler()
|
||||||
|
|||||||
@ -350,7 +350,9 @@ tune.run(.., scheduler=my_scheduler, ...)
|
|||||||
```
|
```
|
||||||
- Similar to the case where the `flaml` scheduler is used, you need to specify the resource dimension, use the resource dimension accordingly in your `evaluation_function`, and provide the necessary information needed for scheduling, such as `min_resource`, `max_resource` and `reduction_factor` (depending on the requirements of the specific scheduler).
|
- Similar to the case where the `flaml` scheduler is used, you need to specify the resource dimension, use the resource dimension accordingly in your `evaluation_function`, and provide the necessary information needed for scheduling, such as `min_resource`, `max_resource` and `reduction_factor` (depending on the requirements of the specific scheduler).
|
||||||
|
|
||||||
- Different from the case when the `flaml` scheduler is used, the amount of resources to use at each iteration is not suggested by the search algorithm through the `resource_attr` in a configuration. You need to specify the evaluation schedule explicitly by yourself in the `evaluation_function` and report intermediate results (using `tune.report()`) accordingly. In the following code example, we use the ASHA scheduler by setting `scheduler="asha"`, we specify `resource_attr`, `min_resource`, `min_resource` and `reduction_factor` the same way as in the previous example (when "flaml" is used as the scheduler). We perform the evaluation in a customized schedule.
|
- Different from the case when the `flaml` scheduler is used, the amount of resources to use at each iteration is not suggested by the search algorithm through the `resource_attr` in a configuration. You need to specify the evaluation schedule explicitly by yourself in the `evaluation_function` and **report intermediate results (using `tune.report()`) accordingly**. In the following code example, we use the ASHA scheduler by setting `scheduler="asha"`. We specify `resource_attr`, `min_resource`, `min_resource` and `reduction_factor` the same way as in the previous example (when "flaml" is used as the scheduler). We perform the evaluation in a customized schedule.
|
||||||
|
|
||||||
|
- Use ray backend or not? You can choose to use ray backend or not by specifying `use_ray=True` or `use_ray=False`. When ray backend is not used, i.e., `use_ray=False`, you also need to stop the evaluation function by explicitly catching the `StopIteration` exception, as shown in the last two lines of the evaluation function `obj_w_intermediate_report()` in the following code example.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, min_resource, max_resource, config):
|
def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, min_resource, max_resource, config):
|
||||||
@ -370,7 +372,10 @@ def obj_w_intermediate_report(resource_attr, X_train, X_test, y_train, y_test, m
|
|||||||
y_test_predict = model.predict(X_test)
|
y_test_predict = model.predict(X_test)
|
||||||
test_loss = 1.0 - accuracy_score(y_test, y_test_predict)
|
test_loss = 1.0 - accuracy_score(y_test, y_test_predict)
|
||||||
# need to report the resource attribute used and the corresponding intermediate results
|
# need to report the resource attribute used and the corresponding intermediate results
|
||||||
tune.report(sample_size=resource, loss=test_loss)
|
try:
|
||||||
|
tune.report(sample_size=resource, loss=test_loss)
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
|
|
||||||
resource_attr = "sample_size"
|
resource_attr = "sample_size"
|
||||||
min_resource = 1000
|
min_resource = 1000
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user