autogen/flaml/tune/space.py
Qingyun Wu a229a6112a
Support parallel and add random search (#167)
* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
2021-08-23 16:36:51 -07:00

438 lines
18 KiB
Python

try:
from ray import __version__ as ray_version
assert ray_version >= '1.0.0'
from ray.tune import sample
from ray.tune.suggest.variant_generator import generate_variants
except (ImportError, AssertionError):
from . import sample
from ..searcher.variant_generator import generate_variants
from typing import Dict, Optional, Any, Tuple
import numpy as np
import logging
logger = logging.getLogger(__name__)
def define_by_run_func(
trial, space: Dict, path: str = ""
) -> Optional[Dict[str, Any]]:
"""Define-by-run function to create the search space.
Returns:
A dict with constant values.
"""
config = {}
for key, domain in space.items():
if path:
key = path + '/' + key
if not isinstance(domain, sample.Domain):
config[key] = domain
continue
sampler = domain.get_sampler()
quantize = None
if isinstance(sampler, sample.Quantized):
quantize = sampler.q
sampler = sampler.sampler
if isinstance(sampler, sample.LogUniform):
logger.warning(
"Optuna does not handle quantization in loguniform "
"sampling. The parameter will be passed but it will "
"probably be ignored.")
if isinstance(domain, sample.Float):
if isinstance(sampler, sample.LogUniform):
if quantize:
logger.warning(
"Optuna does not support both quantization and "
"sampling from LogUniform. Dropped quantization.")
trial.suggest_float(
key, domain.lower, domain.upper, log=True)
elif isinstance(sampler, sample.Uniform):
if quantize:
trial.suggest_float(
key, domain.lower, domain.upper, step=quantize)
trial.suggest_float(key, domain.lower, domain.upper)
elif isinstance(domain, sample.Integer):
if isinstance(sampler, sample.LogUniform):
trial.suggest_int(
key, domain.lower,
domain.upper - int(bool(not quantize)),
step=quantize or 1, log=True)
elif isinstance(sampler, sample.Uniform):
# Upper bound should be inclusive for quantization and
# exclusive otherwise
trial.suggest_int(
key, domain.lower,
domain.upper - int(bool(not quantize)),
step=quantize or 1)
elif isinstance(domain, sample.Categorical):
if isinstance(sampler, sample.Uniform):
if not hasattr(domain, 'choices'):
domain.choices = list(range(len(domain.categories)))
choices = domain.choices
# This choice needs to be removed from the final config
index = trial.suggest_categorical(key + '_choice_', choices)
choice = domain.categories[index]
if isinstance(choice, dict):
key += f":{index}"
# the suffix needs to be removed from the final config
config[key] = define_by_run_func(trial, choice, key)
else:
raise ValueError(
"Optuna search does not support parameters of type "
"`{}` with samplers of type `{}`".format(
type(domain).__name__,
type(domain.sampler).__name__))
# Return all constants in a dictionary.
return config
def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
'''unflatten hierarchical config'''
hier = {}
subspace = {}
for key, value in config.items():
if '/' in key:
key = key[key.rfind('/') + 1:]
if ':' in key:
pos = key.rfind(':')
true_key = key[:pos]
choice = int(key[pos + 1:])
hier[true_key], subspace[true_key] = unflatten_hierarchical(
value, space[true_key][choice])
else:
domain = space.get(key)
if domain is not None:
subspace[key] = domain
if isinstance(domain, sample.Domain):
sampler = domain.sampler
if isinstance(sampler, sample.Quantized):
q = sampler.q
sampler = sampler.sampler
if isinstance(sampler, sample.LogUniform):
value = domain.cast(np.round(value / q) * q)
hier[key] = value
return hier, subspace
def add_cost_to_space(space: Dict, low_cost_point: Dict, choice_cost: Dict):
"""Update the space in place by adding low_cost_point and choice_cost
Returns:
A dict with constant values.
"""
config = {}
for key in space:
domain = space[key]
if not isinstance(domain, sample.Domain):
if isinstance(domain, dict):
low_cost = low_cost_point.get(key, {})
choice_cost_list = choice_cost.get(key, {})
const = add_cost_to_space(
domain, low_cost, choice_cost_list)
if const:
config[key] = const
else:
config[key] = domain
continue
low_cost = low_cost_point.get(key)
choice_cost_list = choice_cost.get(key)
if callable(getattr(domain, 'get_sampler', None)):
sampler = domain.get_sampler()
if isinstance(sampler, sample.Quantized):
sampler = sampler.get_sampler()
domain.bounded = str(sampler) != 'Normal'
if isinstance(domain, sample.Categorical):
domain.const = []
for i, cat in enumerate(domain.categories):
if isinstance(cat, dict):
if isinstance(low_cost, list):
low_cost_dict = low_cost[i]
else:
low_cost_dict = {}
if choice_cost_list:
choice_cost_dict = choice_cost_list[i]
else:
choice_cost_dict = {}
domain.const.append(add_cost_to_space(
cat, low_cost_dict, choice_cost_dict))
else:
domain.const.append(None)
if choice_cost_list:
if len(choice_cost_list) == len(domain.categories):
domain.choice_cost = choice_cost_list
else:
domain.choice_cost = choice_cost_list[-1]
# sort the choices by cost
cost = np.array(domain.choice_cost)
ind = np.argsort(cost)
domain.categories = [domain.categories[i] for i in ind]
domain.choice_cost = cost[ind]
domain.const = [domain.const[i] for i in ind]
domain.ordered = True
elif all(isinstance(x, int) or isinstance(x, float)
for x in domain.categories):
# sort the choices by value
ind = np.argsort(domain.categories)
domain.categories = [domain.categories[i] for i in ind]
domain.ordered = True
else:
domain.ordered = False
if low_cost and low_cost not in domain.categories:
assert isinstance(low_cost, list), \
f"low cost {low_cost} not in domain {domain.categories}"
if domain.ordered:
sorted_points = [low_cost[i] for i in ind]
for i, point in enumerate(sorted_points):
low_cost[i] = point
if len(low_cost) > len(domain.categories):
if domain.ordered:
low_cost[-1] = int(np.where(ind == low_cost[-1])[0])
domain.low_cost_point = low_cost[-1]
return
if low_cost:
domain.low_cost_point = low_cost
return config
def normalize(
config: Dict, space: Dict, reference_config: Dict,
normalized_reference_config: Dict, recursive: bool = False,
):
'''normalize config in space according to reference_config.
normalize each dimension in config to [0,1].
'''
config_norm = {}
for key in config:
value = config[key]
domain = space.get(key)
if domain is None: # e.g., prune_attr
config_norm[key] = value
continue
if not callable(getattr(domain, 'get_sampler', None)):
if recursive and isinstance(domain, dict):
config_norm[key] = normalize(
value, domain, reference_config[key], {})
else:
config_norm[key] = value
continue
# domain: sample.Categorical/Integer/Float/Function
if isinstance(domain, sample.Categorical):
norm = None
# value is either one category, or the low_cost_point list
if value not in domain.categories:
# nested, low_cost_point list
if recursive:
norm = []
for i, cat in enumerate(domain.categories):
norm.append(normalize(
value[i], cat, reference_config[key][i], {}))
if isinstance(value, list) and len(value) > len(
domain.categories):
# low_cost_point list
index = value[-1]
config[key] = value[index]
value = domain.categories[index]
else:
continue
# normalize categorical
n = len(domain.categories)
if domain.ordered:
normalized = (domain.categories.index(value) + 0.5) / n
elif key in normalized_reference_config:
normalized = normalized_reference_config[
key] if value == reference_config[key] else (
normalized_reference_config[key] + 1 / n) % 1
else:
normalized = 0.5
if norm:
norm.append(normalized)
else:
norm = normalized
config_norm[key] = norm
continue
# Uniform/LogUniform/Normal/Base
sampler = domain.get_sampler()
if isinstance(sampler, sample.Quantized):
# sampler is sample.Quantized
quantize = sampler.q
sampler = sampler.get_sampler()
else:
quantize = None
if str(sampler) == 'LogUniform':
upper = domain.upper - (
isinstance(domain, sample.Integer) & (quantize is None))
config_norm[key] = np.log(value / domain.lower) / np.log(
upper / domain.lower)
elif str(sampler) == 'Uniform':
upper = domain.upper - (
isinstance(domain, sample.Integer) & (quantize is None))
config_norm[key] = (value - domain.lower) / (upper - domain.lower)
elif str(sampler) == 'Normal':
# N(mean, sd) -> N(0,1)
config_norm[key] = (value - sampler.mean) / sampler.sd
else:
# TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
# e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
config_norm[key] = value
return config_norm
def denormalize(
config: Dict, space: Dict, reference_config: Dict,
normalized_reference_config: Dict, random_state
):
config_denorm = {}
for key, value in config.items():
if key in space:
# domain: sample.Categorical/Integer/Float/Function
domain = space[key]
if not callable(getattr(domain, 'get_sampler', None)):
config_denorm[key] = value
else:
if isinstance(domain, sample.Categorical):
# denormalize categorical
n = len(domain.categories)
if domain.ordered:
config_denorm[key] = domain.categories[
min(n - 1, int(np.floor(value * n)))]
else:
assert key in normalized_reference_config
if np.floor(value * n) == np.floor(
normalized_reference_config[key] * n):
config_denorm[key] = reference_config[key]
else: # ****random value each time!****
config_denorm[key] = random_state.choice(
[x for x in domain.categories
if x != reference_config[key]])
continue
# Uniform/LogUniform/Normal/Base
sampler = domain.get_sampler()
if isinstance(sampler, sample.Quantized):
# sampler is sample.Quantized
quantize = sampler.q
sampler = sampler.get_sampler()
else:
quantize = None
# Handle Log/Uniform
if str(sampler) == 'LogUniform':
upper = domain.upper - (isinstance(domain, sample.Integer)
& (quantize is None))
config_denorm[key] = (
upper / domain.lower) ** value * domain.lower
elif str(sampler) == 'Uniform':
upper = domain.upper - (isinstance(domain, sample.Integer)
& (quantize is None))
config_denorm[key] = value * (
upper - domain.lower) + domain.lower
elif str(sampler) == 'Normal':
# denormalization for 'Normal'
config_denorm[key] = value * sampler.sd + sampler.mean
else:
config_denorm[key] = value
# Handle quantized
if quantize is not None:
config_denorm[key] = np.round(
np.divide(config_denorm[key], quantize)) * quantize
# Handle int (4.6 -> 5)
if isinstance(domain, sample.Integer):
config_denorm[key] = int(round(config_denorm[key]))
else: # prune_attr
config_denorm[key] = value
return config_denorm
def indexof(domain: Dict, config: Dict) -> int:
'''find the index of config in domain.categories
'''
index = config.get('_choice_')
if index is not None:
return index
if config in domain.categories:
return domain.categories.index(config)
# print(config)
for i, cat in enumerate(domain.categories):
# print(cat)
if not isinstance(cat, dict):
continue
# print(len(cat), len(config))
if len(cat) != len(config):
continue
# print(cat.keys())
if not set(cat.keys()).issubset(set(config.keys())):
continue
# print(domain.const[i])
if all(config[key] == value for key, value in domain.const[i].items()):
# assumption: the concatenation of constants is a unique identifier
return i
return None
def complete_config(
partial_config: Dict, space: Dict, flow2, disturb: bool = False,
lower: Optional[Dict] = None, upper: Optional[Dict] = None
) -> Tuple[Dict, Dict]:
'''Complete partial config in space
Returns:
config, space
'''
config = partial_config.copy()
normalized = normalize(config, space, config, {})
if disturb:
for key in normalized:
domain = space.get(key)
if getattr(domain, 'ordered', True) is False:
# don't change unordered cat choice
continue
if not callable(getattr(domain, 'get_sampler', None)):
continue
if upper and lower:
up, low = upper[key], lower[key]
gauss_std = up - low or flow2.STEPSIZE
# allowed bound
up += flow2.STEPSIZE
low -= flow2.STEPSIZE
elif domain.bounded:
up, low, gauss_std = 1, 0, 1.0
else:
up, low, gauss_std = np.Inf, -np.Inf, 1.0
if domain.bounded:
up = min(up, 1)
low = max(low, 0)
delta = flow2.rand_vector_gaussian(1, gauss_std)[0]
normalized[key] = max(low, min(up, normalized[key] + delta))
config = denormalize(normalized, space, config, normalized, flow2._random)
for key, value in space.items():
if key not in config:
config[key] = value
for _, generated in generate_variants({'config': config}):
config = generated['config']
break
subspace = {}
for key, domain in space.items():
value = config[key]
if isinstance(value, dict):
if isinstance(domain, sample.Categorical):
# nested space
index = indexof(domain, value)
# point = partial_config.get(key)
# if isinstance(point, list): # low cost point list
# point = point[index]
# else:
# point = {}
config[key], subspace[key] = complete_config(
value, domain.categories[index], flow2, disturb,
lower and lower[key][index], upper and upper[key][index]
)
assert '_choice_' not in subspace[key], \
"_choice_ is a reserved key for hierarchical search space"
subspace[key]['_choice_'] = index
else:
config[key], subspace[key] = complete_config(
value, space[key], flow2, disturb,
lower and lower[key], upper and upper[key])
continue
subspace[key] = domain
return config, subspace