mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-03 05:17:07 +00:00
reproducibility for random sampling (#349)
* reproducibility for random sampling #236 * doc update
This commit is contained in:
parent
ee3162e232
commit
0b25e89f29
@ -542,7 +542,7 @@ class AutoML(BaseEstimator):
|
||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||
```
|
||||
|
||||
seed: int or None, default=None | The random seed for np.random.
|
||||
seed: int or None, default=None | The random seed for hpo.
|
||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||
concurrent trials. For n_concurrent_trials > 1, installation of
|
||||
ray is required: `pip install flaml[ray]`.
|
||||
@ -1845,7 +1845,7 @@ class AutoML(BaseEstimator):
|
||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||
```
|
||||
|
||||
seed: int or None, default=None | The random seed for np.random.
|
||||
seed: int or None, default=None | The random seed for hpo.
|
||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||
concurrent trials. For n_concurrent_trials > 1, installation of
|
||||
ray is required: `pip install flaml[ray]`.
|
||||
@ -1949,13 +1949,10 @@ class AutoML(BaseEstimator):
|
||||
)
|
||||
self._search_states = {} # key: estimator name; value: SearchState
|
||||
self._random = np.random.RandomState(RANDOM_SEED)
|
||||
if seed is not None:
|
||||
np.random.seed(seed)
|
||||
self._seed = seed + 19823 if seed is not None else 20
|
||||
self._seed = seed if seed is not None else 20
|
||||
self._learner_selector = learner_selector
|
||||
old_level = logger.getEffectiveLevel()
|
||||
self.verbose = verbose
|
||||
# if verbose == 0:
|
||||
logger.setLevel(50 - verbose * 10)
|
||||
if (not mlflow or not mlflow.active_run()) and not logger.handlers:
|
||||
# Add the console handler.
|
||||
|
@ -1,7 +1,6 @@
|
||||
import argparse
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, Any
|
||||
|
||||
from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS
|
||||
|
||||
|
||||
|
@ -113,7 +113,7 @@ class BlendSearch(Searcher):
|
||||
"For cost-frugal search, "
|
||||
"consider providing low-cost values for cost-related hps via "
|
||||
"'low_cost_partial_config'. More info can be found at "
|
||||
"https://github.com/microsoft/FLAML/wiki/About-%60low_cost_partial_config%60"
|
||||
"https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune"
|
||||
)
|
||||
if evaluated_rewards and mode:
|
||||
self._points_to_evaluate = []
|
||||
|
@ -2,27 +2,28 @@
|
||||
# * Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# * Licensed under the MIT License. See LICENSE file in the
|
||||
# * project root for license information.
|
||||
from flaml.tune.sample import Domain
|
||||
from typing import Dict, Optional, Tuple
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
try:
|
||||
from ray import __version__ as ray_version
|
||||
|
||||
assert ray_version >= "1.0.0"
|
||||
from ray.tune.suggest import Searcher
|
||||
from ray.tune.suggest.variant_generator import generate_variants
|
||||
from ray.tune import sample
|
||||
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
||||
except (ImportError, AssertionError):
|
||||
from .suggestion import Searcher
|
||||
from .variant_generator import generate_variants
|
||||
from ..tune import sample
|
||||
from ..tune.trial import flatten_dict, unflatten_dict
|
||||
from ..tune.space import complete_config, denormalize, normalize
|
||||
|
||||
|
||||
import logging
|
||||
from flaml.tune.sample import _BackwardsCompatibleNumpyRng
|
||||
from ..tune.space import (
|
||||
complete_config,
|
||||
denormalize,
|
||||
normalize,
|
||||
generate_variants_compatible,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -84,6 +85,7 @@ class FLOW2(Searcher):
|
||||
self.space = space or {}
|
||||
self._space = flatten_dict(self.space, prevent_delimiter=True)
|
||||
self._random = np.random.RandomState(seed)
|
||||
self.rs_random = _BackwardsCompatibleNumpyRng(seed + 19823)
|
||||
self.seed = seed
|
||||
self.init_config = init_config
|
||||
self.best_config = flatten_dict(init_config)
|
||||
@ -464,8 +466,8 @@ class FLOW2(Searcher):
|
||||
# random
|
||||
for i, key in enumerate(self._tunable_keys):
|
||||
if self._direction_tried[i] != 0:
|
||||
for _, generated in generate_variants(
|
||||
{"config": {key: self._space[key]}}
|
||||
for _, generated in generate_variants_compatible(
|
||||
{"config": {key: self._space[key]}}, random_state=self.rs_random
|
||||
):
|
||||
if generated["config"][key] != best_config[key]:
|
||||
config[key] = generated["config"][key]
|
||||
|
@ -178,7 +178,7 @@ class ConcurrencyLimiter(Searcher):
|
||||
batch (bool): Whether to wait for all concurrent samples
|
||||
to finish before updating the underlying searcher.
|
||||
Example:
|
||||
```python
|
||||
```python
|
||||
from ray.tune.suggest import ConcurrencyLimiter
|
||||
search_alg = HyperOptSearch(metric="accuracy")
|
||||
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2)
|
||||
@ -366,81 +366,81 @@ class _OptunaTrialSuggestCaptor:
|
||||
|
||||
class OptunaSearch(Searcher):
|
||||
"""A wrapper around Optuna to provide trial suggestions.
|
||||
[Optuna](https://optuna.org/)
|
||||
is a hyperparameter optimization library.
|
||||
In contrast to other libraries, it employs define-by-run style
|
||||
hyperparameter definitions.
|
||||
This Searcher is a thin wrapper around Optuna's search algorithms.
|
||||
You can pass any Optuna sampler, which will be used to generate
|
||||
hyperparameter suggestions.
|
||||
Args:
|
||||
space (dict|Callable): Hyperparameter search space definition for
|
||||
Optuna's sampler. This can be either a class `dict` with
|
||||
parameter names as keys and ``optuna.distributions`` as values,
|
||||
or a Callable - in which case, it should be a define-by-run
|
||||
function using ``optuna.trial`` to obtain the hyperparameter
|
||||
values. The function should return either a class `dict` of
|
||||
constant values with names as keys, or None.
|
||||
For more information, see
|
||||
[tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html).
|
||||
Warning - No actual computation should take place in the define-by-run
|
||||
function. Instead, put the training logic inside the function
|
||||
or class trainable passed to tune.run.
|
||||
metric (str): The training result objective value attribute. If None
|
||||
but a mode was passed, the anonymous metric `_metric` will be used
|
||||
per default.
|
||||
mode (str): One of {min, max}. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
points_to_evaluate (list): Initial parameter suggestions to be run
|
||||
first. This is for when you already have some good parameters
|
||||
you want to run first to help the algorithm make better suggestions
|
||||
for future parameters. Needs to be a list of dicts containing the
|
||||
configurations.
|
||||
sampler (optuna.samplers.BaseSampler): Optuna sampler used to
|
||||
draw hyperparameter configurations. Defaults to ``TPESampler``.
|
||||
seed (int): Seed to initialize sampler with. This parameter is only
|
||||
used when ``sampler=None``. In all other cases, the sampler
|
||||
you pass should be initialized with the seed already.
|
||||
evaluated_rewards (list): If you have previously evaluated the
|
||||
parameters passed in as points_to_evaluate you can avoid
|
||||
re-running those trials by passing in the reward attributes
|
||||
as a list so the optimiser can be told the results without
|
||||
needing to re-compute the trial. Must be the same length as
|
||||
points_to_evaluate.
|
||||
[Optuna](https://optuna.org/)
|
||||
is a hyperparameter optimization library.
|
||||
In contrast to other libraries, it employs define-by-run style
|
||||
hyperparameter definitions.
|
||||
This Searcher is a thin wrapper around Optuna's search algorithms.
|
||||
You can pass any Optuna sampler, which will be used to generate
|
||||
hyperparameter suggestions.
|
||||
Args:
|
||||
space (dict|Callable): Hyperparameter search space definition for
|
||||
Optuna's sampler. This can be either a class `dict` with
|
||||
parameter names as keys and ``optuna.distributions`` as values,
|
||||
or a Callable - in which case, it should be a define-by-run
|
||||
function using ``optuna.trial`` to obtain the hyperparameter
|
||||
values. The function should return either a class `dict` of
|
||||
constant values with names as keys, or None.
|
||||
For more information, see
|
||||
[tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html).
|
||||
Warning - No actual computation should take place in the define-by-run
|
||||
function. Instead, put the training logic inside the function
|
||||
or class trainable passed to tune.run.
|
||||
metric (str): The training result objective value attribute. If None
|
||||
but a mode was passed, the anonymous metric `_metric` will be used
|
||||
per default.
|
||||
mode (str): One of {min, max}. Determines whether objective is
|
||||
minimizing or maximizing the metric attribute.
|
||||
points_to_evaluate (list): Initial parameter suggestions to be run
|
||||
first. This is for when you already have some good parameters
|
||||
you want to run first to help the algorithm make better suggestions
|
||||
for future parameters. Needs to be a list of dicts containing the
|
||||
configurations.
|
||||
sampler (optuna.samplers.BaseSampler): Optuna sampler used to
|
||||
draw hyperparameter configurations. Defaults to ``TPESampler``.
|
||||
seed (int): Seed to initialize sampler with. This parameter is only
|
||||
used when ``sampler=None``. In all other cases, the sampler
|
||||
you pass should be initialized with the seed already.
|
||||
evaluated_rewards (list): If you have previously evaluated the
|
||||
parameters passed in as points_to_evaluate you can avoid
|
||||
re-running those trials by passing in the reward attributes
|
||||
as a list so the optimiser can be told the results without
|
||||
needing to re-compute the trial. Must be the same length as
|
||||
points_to_evaluate.
|
||||
|
||||
Tune automatically converts search spaces to Optuna's format:
|
||||
Tune automatically converts search spaces to Optuna's format:
|
||||
|
||||
````python
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
config = { "a": tune.uniform(6, 8),
|
||||
"b": tune.loguniform(1e-4, 1e-2)}
|
||||
optuna_search = OptunaSearch(metric="loss", mode="min")
|
||||
tune.run(trainable, config=config, search_alg=optuna_search)
|
||||
````
|
||||
````python
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
config = { "a": tune.uniform(6, 8),
|
||||
"b": tune.loguniform(1e-4, 1e-2)}
|
||||
optuna_search = OptunaSearch(metric="loss", mode="min")
|
||||
tune.run(trainable, config=config, search_alg=optuna_search)
|
||||
````
|
||||
|
||||
If you would like to pass the search space manually, the code would
|
||||
look like this:
|
||||
If you would like to pass the search space manually, the code would
|
||||
look like this:
|
||||
|
||||
```python
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
import optuna
|
||||
config = { "a": optuna.distributions.UniformDistribution(6, 8),
|
||||
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
|
||||
optuna_search = OptunaSearch(space,metric="loss",mode="min")
|
||||
tune.run(trainable, search_alg=optuna_search)
|
||||
# Equivalent Optuna define-by-run function approach:
|
||||
def define_search_space(trial: optuna.Trial):
|
||||
trial.suggest_float("a", 6, 8)
|
||||
trial.suggest_float("b", 1e-4, 1e-2, log=True)
|
||||
# training logic goes into trainable, this is just
|
||||
# for search space definition
|
||||
optuna_search = OptunaSearch(
|
||||
define_search_space,
|
||||
metric="loss",
|
||||
mode="min")
|
||||
tune.run(trainable, search_alg=optuna_search)
|
||||
.. versionadded:: 0.8.8
|
||||
```
|
||||
```python
|
||||
from ray.tune.suggest.optuna import OptunaSearch
|
||||
import optuna
|
||||
config = { "a": optuna.distributions.UniformDistribution(6, 8),
|
||||
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
|
||||
optuna_search = OptunaSearch(space,metric="loss",mode="min")
|
||||
tune.run(trainable, search_alg=optuna_search)
|
||||
# Equivalent Optuna define-by-run function approach:
|
||||
def define_search_space(trial: optuna.Trial):
|
||||
trial.suggest_float("a", 6, 8)
|
||||
trial.suggest_float("b", 1e-4, 1e-2, log=True)
|
||||
# training logic goes into trainable, this is just
|
||||
# for search space definition
|
||||
optuna_search = OptunaSearch(
|
||||
define_search_space,
|
||||
metric="loss",
|
||||
mode="min")
|
||||
tune.run(trainable, search_alg=optuna_search)
|
||||
.. versionadded:: 0.8.8
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
|
@ -18,11 +18,9 @@
|
||||
import copy
|
||||
import logging
|
||||
from typing import Any, Dict, Generator, List, Tuple
|
||||
|
||||
import numpy
|
||||
import random
|
||||
|
||||
from ..tune.sample import Categorical, Domain
|
||||
from ..tune.sample import Categorical, Domain, RandomState
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@ -35,6 +33,8 @@ class TuneError(Exception):
|
||||
|
||||
def generate_variants(
|
||||
unresolved_spec: Dict,
|
||||
constant_grid_search: bool = False,
|
||||
random_state: "RandomState" = None,
|
||||
) -> Generator[Tuple[Dict, Dict], None, None]:
|
||||
"""Generates variants from a spec (dict) with unresolved values.
|
||||
There are two types of unresolved values:
|
||||
@ -43,14 +43,25 @@ def generate_variants(
|
||||
variants in combination:
|
||||
"activation": grid_search(["relu", "tanh"])
|
||||
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
||||
Lambda functions: These are evaluated to produce a concrete value, and
|
||||
can express dependencies or conditional distributions between values.
|
||||
They can also be used to express random search (e.g., by calling
|
||||
into the `random` or `np` module).
|
||||
"cpu": lambda spec: spec.config.num_workers
|
||||
"batch_size": lambda spec: random.uniform(1, 1000)
|
||||
Finally, to support defining specs in plain JSON / YAML, grid search
|
||||
can also be defined alternatively as follows:
|
||||
and lambda functions can also be defined alternatively as follows:
|
||||
"activation": {"grid_search": ["relu", "tanh"]}
|
||||
"cpu": {"eval": "spec.config.num_workers"}
|
||||
Use `format_vars` to format the returned dict of hyperparameters.
|
||||
Yields:
|
||||
(Dict of resolved variables, Spec object)
|
||||
"""
|
||||
for resolved_vars, spec in _generate_variants(unresolved_spec):
|
||||
for resolved_vars, spec in _generate_variants(
|
||||
unresolved_spec,
|
||||
constant_grid_search=constant_grid_search,
|
||||
random_state=random_state,
|
||||
):
|
||||
assert not _unresolved_values(spec)
|
||||
yield resolved_vars, spec
|
||||
|
||||
@ -93,7 +104,9 @@ def parse_spec_vars(
|
||||
return resolved_vars, domain_vars, grid_vars
|
||||
|
||||
|
||||
def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
|
||||
def _generate_variants(
|
||||
spec: Dict, constant_grid_search: bool = False, random_state: "RandomState" = None
|
||||
) -> Tuple[Dict, Dict]:
|
||||
spec = copy.deepcopy(spec)
|
||||
_, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||
|
||||
@ -101,10 +114,34 @@ def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
|
||||
yield {}, spec
|
||||
return
|
||||
|
||||
# Variables to resolve
|
||||
to_resolve = domain_vars
|
||||
|
||||
all_resolved = True
|
||||
if constant_grid_search:
|
||||
# In this path, we first sample random variables and keep them constant
|
||||
# for grid search.
|
||||
# `_resolve_domain_vars` will alter `spec` directly
|
||||
all_resolved, resolved_vars = _resolve_domain_vars(
|
||||
spec, domain_vars, allow_fail=True, random_state=random_state
|
||||
)
|
||||
if not all_resolved:
|
||||
# Not all variables have been resolved, but remove those that have
|
||||
# from the `to_resolve` list.
|
||||
to_resolve = [(r, d) for r, d in to_resolve if r not in resolved_vars]
|
||||
grid_search = _grid_search_generator(spec, grid_vars)
|
||||
for resolved_spec in grid_search:
|
||||
resolved_vars = _resolve_domain_vars(resolved_spec, domain_vars)
|
||||
for resolved, spec in _generate_variants(resolved_spec):
|
||||
if not constant_grid_search or not all_resolved:
|
||||
# In this path, we sample the remaining random variables
|
||||
_, resolved_vars = _resolve_domain_vars(
|
||||
resolved_spec, to_resolve, random_state=random_state
|
||||
)
|
||||
|
||||
for resolved, spec in _generate_variants(
|
||||
resolved_spec,
|
||||
constant_grid_search=constant_grid_search,
|
||||
random_state=random_state,
|
||||
):
|
||||
for path, value in grid_vars:
|
||||
resolved_vars[path] = _get_value(spec, path)
|
||||
for k, v in resolved.items():
|
||||
@ -134,7 +171,12 @@ def _get_value(spec: Dict, path: Tuple) -> Any:
|
||||
return spec
|
||||
|
||||
|
||||
def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> Dict:
|
||||
def _resolve_domain_vars(
|
||||
spec: Dict,
|
||||
domain_vars: List[Tuple[Tuple, Domain]],
|
||||
allow_fail: bool = False,
|
||||
random_state: "RandomState" = None,
|
||||
) -> Tuple[bool, Dict]:
|
||||
resolved = {}
|
||||
error = True
|
||||
num_passes = 0
|
||||
@ -145,7 +187,9 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
|
||||
if path in resolved:
|
||||
continue
|
||||
try:
|
||||
value = domain.sample(_UnresolvedAccessGuard(spec))
|
||||
value = domain.sample(
|
||||
_UnresolvedAccessGuard(spec), random_state=random_state
|
||||
)
|
||||
except RecursiveDependencyError as e:
|
||||
error = e
|
||||
except Exception:
|
||||
@ -156,8 +200,11 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
|
||||
assign_value(spec, path, value)
|
||||
resolved[path] = value
|
||||
if error:
|
||||
raise error
|
||||
return resolved
|
||||
if not allow_fail:
|
||||
raise error
|
||||
else:
|
||||
return False, resolved
|
||||
return True, resolved
|
||||
|
||||
|
||||
def _grid_search_generator(
|
||||
|
@ -12,21 +12,76 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This source file is included here because ray does not fully support Windows.
|
||||
# This source file is adapted here because ray does not fully support Windows.
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
import logging
|
||||
import random
|
||||
from copy import copy
|
||||
from inspect import signature
|
||||
from math import isclose
|
||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
||||
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
import numpy as np
|
||||
|
||||
# Backwards compatibility
|
||||
try:
|
||||
# Added in numpy>=1.17 but we require numpy>=1.16
|
||||
np_random_generator = np.random.Generator
|
||||
LEGACY_RNG = False
|
||||
except AttributeError:
|
||||
|
||||
class np_random_generator:
|
||||
pass
|
||||
|
||||
LEGACY_RNG = True
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class _BackwardsCompatibleNumpyRng:
|
||||
"""Thin wrapper to ensure backwards compatibility between
|
||||
new and old numpy randomness generators.
|
||||
"""
|
||||
|
||||
_rng = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
generator_or_seed: Optional[
|
||||
Union["np_random_generator", np.random.RandomState, int]
|
||||
] = None,
|
||||
):
|
||||
if generator_or_seed is None or isinstance(
|
||||
generator_or_seed, (np.random.RandomState, np_random_generator)
|
||||
):
|
||||
self._rng = generator_or_seed
|
||||
elif LEGACY_RNG:
|
||||
self._rng = np.random.RandomState(generator_or_seed)
|
||||
else:
|
||||
self._rng = np.random.default_rng(generator_or_seed)
|
||||
|
||||
@property
|
||||
def legacy_rng(self) -> bool:
|
||||
return not isinstance(self._rng, np_random_generator)
|
||||
|
||||
@property
|
||||
def rng(self):
|
||||
# don't set self._rng to np.random to avoid picking issues
|
||||
return self._rng if self._rng is not None else np.random
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
# https://numpy.org/doc/stable/reference/random/new-or-different.html
|
||||
if self.legacy_rng:
|
||||
if name == "integers":
|
||||
name = "randint"
|
||||
elif name == "random":
|
||||
name = "rand"
|
||||
return getattr(self.rng, name)
|
||||
|
||||
|
||||
RandomState = Union[
|
||||
None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int
|
||||
]
|
||||
|
||||
|
||||
class Domain:
|
||||
"""Base class to specify a type and valid range to sample parameters from.
|
||||
This base class is implemented by parameter spaces, like float ranges
|
||||
@ -61,9 +116,16 @@ class Domain:
|
||||
sampler = self.default_sampler_cls()
|
||||
return sampler
|
||||
|
||||
def sample(self, spec=None, size=1):
|
||||
def sample(
|
||||
self,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
sampler = self.get_sampler()
|
||||
return sampler.sample(self, spec=spec, size=size)
|
||||
return sampler.sample(self, spec=spec, size=size, random_state=random_state)
|
||||
|
||||
def is_grid(self):
|
||||
return isinstance(self.sampler, Grid)
|
||||
@ -86,6 +148,7 @@ class Sampler:
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
raise NotImplementedError
|
||||
|
||||
@ -128,6 +191,7 @@ class Grid(Sampler):
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
return RuntimeError("Do not call `sample()` on grid.")
|
||||
|
||||
@ -139,10 +203,13 @@ class Float(Domain):
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
assert domain.lower > float("-inf"), "Uniform needs a lower bound"
|
||||
assert domain.upper < float("inf"), "Uniform needs a upper bound"
|
||||
items = np.random.uniform(domain.lower, domain.upper, size=size)
|
||||
items = random_state.uniform(domain.lower, domain.upper, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _LogUniform(LogUniform):
|
||||
@ -151,7 +218,10 @@ class Float(Domain):
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
||||
assert (
|
||||
0 < domain.upper < float("inf")
|
||||
@ -159,7 +229,7 @@ class Float(Domain):
|
||||
logmin = np.log(domain.lower) / np.log(self.base)
|
||||
logmax = np.log(domain.upper) / np.log(self.base)
|
||||
|
||||
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
|
||||
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _Normal(Normal):
|
||||
@ -168,14 +238,17 @@ class Float(Domain):
|
||||
domain: "Float",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
assert not domain.lower or domain.lower == float(
|
||||
"-inf"
|
||||
), "Normal sampling does not allow a lower value bound."
|
||||
assert not domain.upper or domain.upper == float(
|
||||
"inf"
|
||||
), "Normal sampling does not allow a upper value bound."
|
||||
items = np.random.normal(self.mean, self.sd, size=size)
|
||||
items = random_state.normal(self.mean, self.sd, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
@ -262,8 +335,11 @@ class Integer(Domain):
|
||||
domain: "Integer",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
items = np.random.randint(domain.lower, domain.upper, size=size)
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
items = random_state.integers(domain.lower, domain.upper, size=size)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
class _LogUniform(LogUniform):
|
||||
@ -272,7 +348,10 @@ class Integer(Domain):
|
||||
domain: "Integer",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
||||
assert (
|
||||
0 < domain.upper < float("inf")
|
||||
@ -280,8 +359,8 @@ class Integer(Domain):
|
||||
logmin = np.log(domain.lower) / np.log(self.base)
|
||||
logmax = np.log(domain.upper) / np.log(self.base)
|
||||
|
||||
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
|
||||
items = np.round(items).astype(int)
|
||||
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
|
||||
items = np.floor(items).astype(int)
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
@ -337,9 +416,11 @@ class Categorical(Domain):
|
||||
domain: "Categorical",
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
|
||||
items = random.choices(domain.categories, k=size)
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
items = random_state.choice(domain.categories, size=size).tolist()
|
||||
return items if len(items) > 1 else domain.cast(items[0])
|
||||
|
||||
default_sampler_cls = _Uniform
|
||||
@ -352,6 +433,11 @@ class Categorical(Domain):
|
||||
new.set_sampler(self._Uniform())
|
||||
return new
|
||||
|
||||
def grid(self):
|
||||
new = copy(self)
|
||||
new.set_sampler(Grid())
|
||||
return new
|
||||
|
||||
def __len__(self):
|
||||
return len(self.categories)
|
||||
|
||||
@ -381,8 +467,11 @@ class Quantized(Sampler):
|
||||
domain: Domain,
|
||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||
size: int = 1,
|
||||
random_state: "RandomState" = None,
|
||||
):
|
||||
values = self.sampler.sample(domain, spec, size)
|
||||
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||
values = self.sampler.sample(domain, spec, size, random_state=random_state)
|
||||
quantized = np.round(np.divide(values, self.q)) * self.q
|
||||
if not isinstance(quantized, np.ndarray):
|
||||
return domain.cast(quantized)
|
||||
@ -462,10 +551,10 @@ def qloguniform(lower: float, upper: float, q: float, base: float = 10):
|
||||
return Float(lower, upper).loguniform(base).quantized(q)
|
||||
|
||||
|
||||
def choice(categories: List):
|
||||
def choice(categories: Sequence):
|
||||
"""Sample a categorical value.
|
||||
Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from
|
||||
``random.choice([1, 2])``
|
||||
``np.random.choice([1, 2])``
|
||||
"""
|
||||
return Categorical(categories).uniform()
|
||||
|
||||
|
@ -7,13 +7,22 @@ try:
|
||||
except (ImportError, AssertionError):
|
||||
from . import sample
|
||||
from ..searcher.variant_generator import generate_variants
|
||||
from typing import Dict, Optional, Any, Tuple
|
||||
from typing import Dict, Optional, Any, Tuple, Generator
|
||||
import numpy as np
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def generate_variants_compatible(
|
||||
unresolved_spec: Dict, constant_grid_search: bool = False, random_state=None
|
||||
) -> Generator[Tuple[Dict, Dict], None, None]:
|
||||
try:
|
||||
return generate_variants(unresolved_spec, constant_grid_search, random_state)
|
||||
except TypeError:
|
||||
return generate_variants(unresolved_spec, constant_grid_search)
|
||||
|
||||
|
||||
def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]:
|
||||
"""Define-by-run function to create the search space.
|
||||
|
||||
@ -417,7 +426,6 @@ def indexof(domain: Dict, config: Dict) -> int:
|
||||
return index
|
||||
if config in domain.categories:
|
||||
return domain.categories.index(config)
|
||||
# print(config)
|
||||
for i, cat in enumerate(domain.categories):
|
||||
if not isinstance(cat, dict):
|
||||
continue
|
||||
@ -491,7 +499,9 @@ def complete_config(
|
||||
for key, value in space.items():
|
||||
if key not in config:
|
||||
config[key] = value
|
||||
for _, generated in generate_variants({"config": config}):
|
||||
for _, generated in generate_variants_compatible(
|
||||
{"config": config}, random_state=flow2.rs_random
|
||||
):
|
||||
config = generated["config"]
|
||||
break
|
||||
subspace = {}
|
||||
|
@ -1 +1 @@
|
||||
__version__ = "0.9.1"
|
||||
__version__ = "0.9.2"
|
||||
|
@ -214,15 +214,14 @@ def test_multioutput():
|
||||
|
||||
# predict
|
||||
print(model.predict(X_test))
|
||||
|
||||
#train the model
|
||||
|
||||
# train the model
|
||||
model = RegressorChain(AutoML(task="regression", time_budget=1))
|
||||
model.fit(X_train, y_train)
|
||||
|
||||
|
||||
# predict
|
||||
print(model.predict(X_test))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -38,7 +38,7 @@ class TestWarmStart(unittest.TestCase):
|
||||
starting_points = automl_experiment.best_config_per_estimator
|
||||
print("starting_points", starting_points)
|
||||
print("loss of the starting_points", automl_experiment.best_loss_per_estimator)
|
||||
starting_point = starting_points['lgbm']
|
||||
starting_point = starting_points["lgbm"]
|
||||
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
|
||||
|
||||
# 2. Constrct a new class:
|
||||
@ -55,17 +55,13 @@ class TestWarmStart(unittest.TestCase):
|
||||
# if an hp is specifed to be freezed, use tine value provided in the starting_point
|
||||
# otherwise use the setting from the original search space
|
||||
if hp_name in starting_point:
|
||||
space[hp_name] = {
|
||||
"domain": starting_point[hp_name]
|
||||
}
|
||||
space[hp_name] = {"domain": starting_point[hp_name]}
|
||||
# (3.1) Configure the search space for hps that are in the original search space
|
||||
# but you want to change something, for example the range.
|
||||
revised_hps_to_search = {
|
||||
"n_estimators": {
|
||||
"domain": tune.lograndint(lower=10, upper=32768),
|
||||
"init_value": starting_point.get(
|
||||
"n_estimators"
|
||||
)
|
||||
"init_value": starting_point.get("n_estimators")
|
||||
or space["n_estimators"].get("init_value", 10),
|
||||
"low_cost_init_value": space["n_estimators"].get(
|
||||
"low_cost_init_value", 10
|
||||
@ -73,9 +69,7 @@ class TestWarmStart(unittest.TestCase):
|
||||
},
|
||||
"num_leaves": {
|
||||
"domain": tune.lograndint(lower=10, upper=3276),
|
||||
"init_value": starting_point.get(
|
||||
"num_leaves"
|
||||
)
|
||||
"init_value": starting_point.get("num_leaves")
|
||||
or space["num_leaves"].get("init_value", 10),
|
||||
"low_cost_init_value": space["num_leaves"].get(
|
||||
"low_cost_init_value", 10
|
||||
@ -95,7 +89,7 @@ class TestWarmStart(unittest.TestCase):
|
||||
new_automl_experiment.add_learner(
|
||||
learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM
|
||||
)
|
||||
|
||||
|
||||
automl_settings_resume = {
|
||||
"time_budget": 3,
|
||||
"metric": "accuracy",
|
||||
|
36
test/rep.py
Normal file
36
test/rep.py
Normal file
@ -0,0 +1,36 @@
|
||||
from flaml.data import load_openml_dataset
|
||||
from flaml.ml import ExtraTreesEstimator
|
||||
from flaml import AutoML
|
||||
|
||||
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||
X_train = X_train.iloc[:1000]
|
||||
y_train = y_train.iloc[:1000]
|
||||
|
||||
|
||||
class ExtraTreesEstimatorSeeded(ExtraTreesEstimator):
|
||||
"""ExtraTreesEstimator for reproducible FLAML run."""
|
||||
|
||||
def config2params(self, config: dict) -> dict:
|
||||
params = super().config2params(config)
|
||||
params["random_state"] = 0
|
||||
return params
|
||||
|
||||
|
||||
settings = {
|
||||
"time_budget": 1e10, # total running time in seconds
|
||||
"max_iter": 3,
|
||||
"metric": "ap", # average_precision
|
||||
"task": "classification", # task type
|
||||
"seed": 7654321, # random seed
|
||||
"estimator_list": ["extra_trees_seeded"],
|
||||
"verbose": False,
|
||||
}
|
||||
|
||||
for trial_num in range(8):
|
||||
automl = AutoML()
|
||||
automl.add_learner(
|
||||
learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded
|
||||
)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||
print(automl.best_loss)
|
||||
print(automl.best_config)
|
@ -11,8 +11,8 @@ def test_package_minimum():
|
||||
# Specify automl goal and constraint
|
||||
automl_settings = {
|
||||
"time_budget": 10, # in seconds
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"metric": "accuracy",
|
||||
"task": "classification",
|
||||
"log_file_name": "iris.log",
|
||||
}
|
||||
X_train, y_train = load_iris(return_X_y=True)
|
||||
@ -27,4 +27,3 @@ def test_package_minimum():
|
||||
preds = automl.predict_proba(X_train)
|
||||
assert preds.shape == (150, 3)
|
||||
print(preds)
|
||||
|
||||
|
@ -436,13 +436,12 @@ analysis = tune.run(
|
||||
|
||||
### Reproducibility
|
||||
|
||||
By default, there is randomness in our tuning process. If reproducibility is desired, you could
|
||||
manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
||||
With this random seed, running the following code multiple times will generate exactly the same search trajectory.
|
||||
By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
||||
With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
|
||||
|
||||
```python
|
||||
import numpy as np
|
||||
np.random.seed(100)
|
||||
np.random.seed(100) # This line is not needed starting from version v0.9.1.
|
||||
analysis = tune.run(
|
||||
simple_obj,
|
||||
config=config_search_space,
|
||||
|
Loading…
x
Reference in New Issue
Block a user