mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-07 15:27:58 +00:00
reproducibility for random sampling (#349)
* reproducibility for random sampling #236 * doc update
This commit is contained in:
parent
ee3162e232
commit
0b25e89f29
@ -542,7 +542,7 @@ class AutoML(BaseEstimator):
|
|||||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||||
```
|
```
|
||||||
|
|
||||||
seed: int or None, default=None | The random seed for np.random.
|
seed: int or None, default=None | The random seed for hpo.
|
||||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||||
concurrent trials. For n_concurrent_trials > 1, installation of
|
concurrent trials. For n_concurrent_trials > 1, installation of
|
||||||
ray is required: `pip install flaml[ray]`.
|
ray is required: `pip install flaml[ray]`.
|
||||||
@ -1845,7 +1845,7 @@ class AutoML(BaseEstimator):
|
|||||||
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
new_automl.fit(X_train, y_train, starting_points=starting_points)
|
||||||
```
|
```
|
||||||
|
|
||||||
seed: int or None, default=None | The random seed for np.random.
|
seed: int or None, default=None | The random seed for hpo.
|
||||||
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
n_concurrent_trials: [Experimental] int, default=1 | The number of
|
||||||
concurrent trials. For n_concurrent_trials > 1, installation of
|
concurrent trials. For n_concurrent_trials > 1, installation of
|
||||||
ray is required: `pip install flaml[ray]`.
|
ray is required: `pip install flaml[ray]`.
|
||||||
@ -1949,13 +1949,10 @@ class AutoML(BaseEstimator):
|
|||||||
)
|
)
|
||||||
self._search_states = {} # key: estimator name; value: SearchState
|
self._search_states = {} # key: estimator name; value: SearchState
|
||||||
self._random = np.random.RandomState(RANDOM_SEED)
|
self._random = np.random.RandomState(RANDOM_SEED)
|
||||||
if seed is not None:
|
self._seed = seed if seed is not None else 20
|
||||||
np.random.seed(seed)
|
|
||||||
self._seed = seed + 19823 if seed is not None else 20
|
|
||||||
self._learner_selector = learner_selector
|
self._learner_selector = learner_selector
|
||||||
old_level = logger.getEffectiveLevel()
|
old_level = logger.getEffectiveLevel()
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
# if verbose == 0:
|
|
||||||
logger.setLevel(50 - verbose * 10)
|
logger.setLevel(50 - verbose * 10)
|
||||||
if (not mlflow or not mlflow.active_run()) and not logger.handlers:
|
if (not mlflow or not mlflow.active_run()) and not logger.handlers:
|
||||||
# Add the console handler.
|
# Add the console handler.
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import argparse
|
import argparse
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS
|
from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,7 +113,7 @@ class BlendSearch(Searcher):
|
|||||||
"For cost-frugal search, "
|
"For cost-frugal search, "
|
||||||
"consider providing low-cost values for cost-related hps via "
|
"consider providing low-cost values for cost-related hps via "
|
||||||
"'low_cost_partial_config'. More info can be found at "
|
"'low_cost_partial_config'. More info can be found at "
|
||||||
"https://github.com/microsoft/FLAML/wiki/About-%60low_cost_partial_config%60"
|
"https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune"
|
||||||
)
|
)
|
||||||
if evaluated_rewards and mode:
|
if evaluated_rewards and mode:
|
||||||
self._points_to_evaluate = []
|
self._points_to_evaluate = []
|
||||||
|
@ -2,27 +2,28 @@
|
|||||||
# * Copyright (c) Microsoft Corporation. All rights reserved.
|
# * Copyright (c) Microsoft Corporation. All rights reserved.
|
||||||
# * Licensed under the MIT License. See LICENSE file in the
|
# * Licensed under the MIT License. See LICENSE file in the
|
||||||
# * project root for license information.
|
# * project root for license information.
|
||||||
from flaml.tune.sample import Domain
|
|
||||||
from typing import Dict, Optional, Tuple
|
from typing import Dict, Optional, Tuple
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import logging
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from ray import __version__ as ray_version
|
from ray import __version__ as ray_version
|
||||||
|
|
||||||
assert ray_version >= "1.0.0"
|
assert ray_version >= "1.0.0"
|
||||||
from ray.tune.suggest import Searcher
|
from ray.tune.suggest import Searcher
|
||||||
from ray.tune.suggest.variant_generator import generate_variants
|
|
||||||
from ray.tune import sample
|
from ray.tune import sample
|
||||||
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from .suggestion import Searcher
|
from .suggestion import Searcher
|
||||||
from .variant_generator import generate_variants
|
|
||||||
from ..tune import sample
|
from ..tune import sample
|
||||||
from ..tune.trial import flatten_dict, unflatten_dict
|
from ..tune.trial import flatten_dict, unflatten_dict
|
||||||
from ..tune.space import complete_config, denormalize, normalize
|
from flaml.tune.sample import _BackwardsCompatibleNumpyRng
|
||||||
|
from ..tune.space import (
|
||||||
|
complete_config,
|
||||||
import logging
|
denormalize,
|
||||||
|
normalize,
|
||||||
|
generate_variants_compatible,
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -84,6 +85,7 @@ class FLOW2(Searcher):
|
|||||||
self.space = space or {}
|
self.space = space or {}
|
||||||
self._space = flatten_dict(self.space, prevent_delimiter=True)
|
self._space = flatten_dict(self.space, prevent_delimiter=True)
|
||||||
self._random = np.random.RandomState(seed)
|
self._random = np.random.RandomState(seed)
|
||||||
|
self.rs_random = _BackwardsCompatibleNumpyRng(seed + 19823)
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
self.init_config = init_config
|
self.init_config = init_config
|
||||||
self.best_config = flatten_dict(init_config)
|
self.best_config = flatten_dict(init_config)
|
||||||
@ -464,8 +466,8 @@ class FLOW2(Searcher):
|
|||||||
# random
|
# random
|
||||||
for i, key in enumerate(self._tunable_keys):
|
for i, key in enumerate(self._tunable_keys):
|
||||||
if self._direction_tried[i] != 0:
|
if self._direction_tried[i] != 0:
|
||||||
for _, generated in generate_variants(
|
for _, generated in generate_variants_compatible(
|
||||||
{"config": {key: self._space[key]}}
|
{"config": {key: self._space[key]}}, random_state=self.rs_random
|
||||||
):
|
):
|
||||||
if generated["config"][key] != best_config[key]:
|
if generated["config"][key] != best_config[key]:
|
||||||
config[key] = generated["config"][key]
|
config[key] = generated["config"][key]
|
||||||
|
@ -410,37 +410,37 @@ class OptunaSearch(Searcher):
|
|||||||
|
|
||||||
Tune automatically converts search spaces to Optuna's format:
|
Tune automatically converts search spaces to Optuna's format:
|
||||||
|
|
||||||
````python
|
````python
|
||||||
from ray.tune.suggest.optuna import OptunaSearch
|
from ray.tune.suggest.optuna import OptunaSearch
|
||||||
config = { "a": tune.uniform(6, 8),
|
config = { "a": tune.uniform(6, 8),
|
||||||
"b": tune.loguniform(1e-4, 1e-2)}
|
"b": tune.loguniform(1e-4, 1e-2)}
|
||||||
optuna_search = OptunaSearch(metric="loss", mode="min")
|
optuna_search = OptunaSearch(metric="loss", mode="min")
|
||||||
tune.run(trainable, config=config, search_alg=optuna_search)
|
tune.run(trainable, config=config, search_alg=optuna_search)
|
||||||
````
|
````
|
||||||
|
|
||||||
If you would like to pass the search space manually, the code would
|
If you would like to pass the search space manually, the code would
|
||||||
look like this:
|
look like this:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from ray.tune.suggest.optuna import OptunaSearch
|
from ray.tune.suggest.optuna import OptunaSearch
|
||||||
import optuna
|
import optuna
|
||||||
config = { "a": optuna.distributions.UniformDistribution(6, 8),
|
config = { "a": optuna.distributions.UniformDistribution(6, 8),
|
||||||
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
|
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
|
||||||
optuna_search = OptunaSearch(space,metric="loss",mode="min")
|
optuna_search = OptunaSearch(space,metric="loss",mode="min")
|
||||||
tune.run(trainable, search_alg=optuna_search)
|
tune.run(trainable, search_alg=optuna_search)
|
||||||
# Equivalent Optuna define-by-run function approach:
|
# Equivalent Optuna define-by-run function approach:
|
||||||
def define_search_space(trial: optuna.Trial):
|
def define_search_space(trial: optuna.Trial):
|
||||||
trial.suggest_float("a", 6, 8)
|
trial.suggest_float("a", 6, 8)
|
||||||
trial.suggest_float("b", 1e-4, 1e-2, log=True)
|
trial.suggest_float("b", 1e-4, 1e-2, log=True)
|
||||||
# training logic goes into trainable, this is just
|
# training logic goes into trainable, this is just
|
||||||
# for search space definition
|
# for search space definition
|
||||||
optuna_search = OptunaSearch(
|
optuna_search = OptunaSearch(
|
||||||
define_search_space,
|
define_search_space,
|
||||||
metric="loss",
|
metric="loss",
|
||||||
mode="min")
|
mode="min")
|
||||||
tune.run(trainable, search_alg=optuna_search)
|
tune.run(trainable, search_alg=optuna_search)
|
||||||
.. versionadded:: 0.8.8
|
.. versionadded:: 0.8.8
|
||||||
```
|
```
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -18,11 +18,9 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Generator, List, Tuple
|
from typing import Any, Dict, Generator, List, Tuple
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import random
|
import random
|
||||||
|
from ..tune.sample import Categorical, Domain, RandomState
|
||||||
from ..tune.sample import Categorical, Domain
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -35,6 +33,8 @@ class TuneError(Exception):
|
|||||||
|
|
||||||
def generate_variants(
|
def generate_variants(
|
||||||
unresolved_spec: Dict,
|
unresolved_spec: Dict,
|
||||||
|
constant_grid_search: bool = False,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
) -> Generator[Tuple[Dict, Dict], None, None]:
|
) -> Generator[Tuple[Dict, Dict], None, None]:
|
||||||
"""Generates variants from a spec (dict) with unresolved values.
|
"""Generates variants from a spec (dict) with unresolved values.
|
||||||
There are two types of unresolved values:
|
There are two types of unresolved values:
|
||||||
@ -43,14 +43,25 @@ def generate_variants(
|
|||||||
variants in combination:
|
variants in combination:
|
||||||
"activation": grid_search(["relu", "tanh"])
|
"activation": grid_search(["relu", "tanh"])
|
||||||
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
||||||
|
Lambda functions: These are evaluated to produce a concrete value, and
|
||||||
|
can express dependencies or conditional distributions between values.
|
||||||
|
They can also be used to express random search (e.g., by calling
|
||||||
|
into the `random` or `np` module).
|
||||||
|
"cpu": lambda spec: spec.config.num_workers
|
||||||
|
"batch_size": lambda spec: random.uniform(1, 1000)
|
||||||
Finally, to support defining specs in plain JSON / YAML, grid search
|
Finally, to support defining specs in plain JSON / YAML, grid search
|
||||||
can also be defined alternatively as follows:
|
and lambda functions can also be defined alternatively as follows:
|
||||||
"activation": {"grid_search": ["relu", "tanh"]}
|
"activation": {"grid_search": ["relu", "tanh"]}
|
||||||
|
"cpu": {"eval": "spec.config.num_workers"}
|
||||||
Use `format_vars` to format the returned dict of hyperparameters.
|
Use `format_vars` to format the returned dict of hyperparameters.
|
||||||
Yields:
|
Yields:
|
||||||
(Dict of resolved variables, Spec object)
|
(Dict of resolved variables, Spec object)
|
||||||
"""
|
"""
|
||||||
for resolved_vars, spec in _generate_variants(unresolved_spec):
|
for resolved_vars, spec in _generate_variants(
|
||||||
|
unresolved_spec,
|
||||||
|
constant_grid_search=constant_grid_search,
|
||||||
|
random_state=random_state,
|
||||||
|
):
|
||||||
assert not _unresolved_values(spec)
|
assert not _unresolved_values(spec)
|
||||||
yield resolved_vars, spec
|
yield resolved_vars, spec
|
||||||
|
|
||||||
@ -93,7 +104,9 @@ def parse_spec_vars(
|
|||||||
return resolved_vars, domain_vars, grid_vars
|
return resolved_vars, domain_vars, grid_vars
|
||||||
|
|
||||||
|
|
||||||
def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
|
def _generate_variants(
|
||||||
|
spec: Dict, constant_grid_search: bool = False, random_state: "RandomState" = None
|
||||||
|
) -> Tuple[Dict, Dict]:
|
||||||
spec = copy.deepcopy(spec)
|
spec = copy.deepcopy(spec)
|
||||||
_, domain_vars, grid_vars = parse_spec_vars(spec)
|
_, domain_vars, grid_vars = parse_spec_vars(spec)
|
||||||
|
|
||||||
@ -101,10 +114,34 @@ def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
|
|||||||
yield {}, spec
|
yield {}, spec
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Variables to resolve
|
||||||
|
to_resolve = domain_vars
|
||||||
|
|
||||||
|
all_resolved = True
|
||||||
|
if constant_grid_search:
|
||||||
|
# In this path, we first sample random variables and keep them constant
|
||||||
|
# for grid search.
|
||||||
|
# `_resolve_domain_vars` will alter `spec` directly
|
||||||
|
all_resolved, resolved_vars = _resolve_domain_vars(
|
||||||
|
spec, domain_vars, allow_fail=True, random_state=random_state
|
||||||
|
)
|
||||||
|
if not all_resolved:
|
||||||
|
# Not all variables have been resolved, but remove those that have
|
||||||
|
# from the `to_resolve` list.
|
||||||
|
to_resolve = [(r, d) for r, d in to_resolve if r not in resolved_vars]
|
||||||
grid_search = _grid_search_generator(spec, grid_vars)
|
grid_search = _grid_search_generator(spec, grid_vars)
|
||||||
for resolved_spec in grid_search:
|
for resolved_spec in grid_search:
|
||||||
resolved_vars = _resolve_domain_vars(resolved_spec, domain_vars)
|
if not constant_grid_search or not all_resolved:
|
||||||
for resolved, spec in _generate_variants(resolved_spec):
|
# In this path, we sample the remaining random variables
|
||||||
|
_, resolved_vars = _resolve_domain_vars(
|
||||||
|
resolved_spec, to_resolve, random_state=random_state
|
||||||
|
)
|
||||||
|
|
||||||
|
for resolved, spec in _generate_variants(
|
||||||
|
resolved_spec,
|
||||||
|
constant_grid_search=constant_grid_search,
|
||||||
|
random_state=random_state,
|
||||||
|
):
|
||||||
for path, value in grid_vars:
|
for path, value in grid_vars:
|
||||||
resolved_vars[path] = _get_value(spec, path)
|
resolved_vars[path] = _get_value(spec, path)
|
||||||
for k, v in resolved.items():
|
for k, v in resolved.items():
|
||||||
@ -134,7 +171,12 @@ def _get_value(spec: Dict, path: Tuple) -> Any:
|
|||||||
return spec
|
return spec
|
||||||
|
|
||||||
|
|
||||||
def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> Dict:
|
def _resolve_domain_vars(
|
||||||
|
spec: Dict,
|
||||||
|
domain_vars: List[Tuple[Tuple, Domain]],
|
||||||
|
allow_fail: bool = False,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
|
) -> Tuple[bool, Dict]:
|
||||||
resolved = {}
|
resolved = {}
|
||||||
error = True
|
error = True
|
||||||
num_passes = 0
|
num_passes = 0
|
||||||
@ -145,7 +187,9 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
|
|||||||
if path in resolved:
|
if path in resolved:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
value = domain.sample(_UnresolvedAccessGuard(spec))
|
value = domain.sample(
|
||||||
|
_UnresolvedAccessGuard(spec), random_state=random_state
|
||||||
|
)
|
||||||
except RecursiveDependencyError as e:
|
except RecursiveDependencyError as e:
|
||||||
error = e
|
error = e
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -156,8 +200,11 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
|
|||||||
assign_value(spec, path, value)
|
assign_value(spec, path, value)
|
||||||
resolved[path] = value
|
resolved[path] = value
|
||||||
if error:
|
if error:
|
||||||
|
if not allow_fail:
|
||||||
raise error
|
raise error
|
||||||
return resolved
|
else:
|
||||||
|
return False, resolved
|
||||||
|
return True, resolved
|
||||||
|
|
||||||
|
|
||||||
def _grid_search_generator(
|
def _grid_search_generator(
|
||||||
|
@ -12,21 +12,76 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
# This source file is included here because ray does not fully support Windows.
|
# This source file is adapted here because ray does not fully support Windows.
|
||||||
|
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
import logging
|
import logging
|
||||||
import random
|
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from inspect import signature
|
|
||||||
from math import isclose
|
from math import isclose
|
||||||
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
|
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
# Backwards compatibility
|
||||||
|
try:
|
||||||
|
# Added in numpy>=1.17 but we require numpy>=1.16
|
||||||
|
np_random_generator = np.random.Generator
|
||||||
|
LEGACY_RNG = False
|
||||||
|
except AttributeError:
|
||||||
|
|
||||||
|
class np_random_generator:
|
||||||
|
pass
|
||||||
|
|
||||||
|
LEGACY_RNG = True
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class _BackwardsCompatibleNumpyRng:
|
||||||
|
"""Thin wrapper to ensure backwards compatibility between
|
||||||
|
new and old numpy randomness generators.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_rng = None
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
generator_or_seed: Optional[
|
||||||
|
Union["np_random_generator", np.random.RandomState, int]
|
||||||
|
] = None,
|
||||||
|
):
|
||||||
|
if generator_or_seed is None or isinstance(
|
||||||
|
generator_or_seed, (np.random.RandomState, np_random_generator)
|
||||||
|
):
|
||||||
|
self._rng = generator_or_seed
|
||||||
|
elif LEGACY_RNG:
|
||||||
|
self._rng = np.random.RandomState(generator_or_seed)
|
||||||
|
else:
|
||||||
|
self._rng = np.random.default_rng(generator_or_seed)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def legacy_rng(self) -> bool:
|
||||||
|
return not isinstance(self._rng, np_random_generator)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def rng(self):
|
||||||
|
# don't set self._rng to np.random to avoid picking issues
|
||||||
|
return self._rng if self._rng is not None else np.random
|
||||||
|
|
||||||
|
def __getattr__(self, name: str) -> Any:
|
||||||
|
# https://numpy.org/doc/stable/reference/random/new-or-different.html
|
||||||
|
if self.legacy_rng:
|
||||||
|
if name == "integers":
|
||||||
|
name = "randint"
|
||||||
|
elif name == "random":
|
||||||
|
name = "rand"
|
||||||
|
return getattr(self.rng, name)
|
||||||
|
|
||||||
|
|
||||||
|
RandomState = Union[
|
||||||
|
None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class Domain:
|
class Domain:
|
||||||
"""Base class to specify a type and valid range to sample parameters from.
|
"""Base class to specify a type and valid range to sample parameters from.
|
||||||
This base class is implemented by parameter spaces, like float ranges
|
This base class is implemented by parameter spaces, like float ranges
|
||||||
@ -61,9 +116,16 @@ class Domain:
|
|||||||
sampler = self.default_sampler_cls()
|
sampler = self.default_sampler_cls()
|
||||||
return sampler
|
return sampler
|
||||||
|
|
||||||
def sample(self, spec=None, size=1):
|
def sample(
|
||||||
|
self,
|
||||||
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
sampler = self.get_sampler()
|
sampler = self.get_sampler()
|
||||||
return sampler.sample(self, spec=spec, size=size)
|
return sampler.sample(self, spec=spec, size=size, random_state=random_state)
|
||||||
|
|
||||||
def is_grid(self):
|
def is_grid(self):
|
||||||
return isinstance(self.sampler, Grid)
|
return isinstance(self.sampler, Grid)
|
||||||
@ -86,6 +148,7 @@ class Sampler:
|
|||||||
domain: Domain,
|
domain: Domain,
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
@ -128,6 +191,7 @@ class Grid(Sampler):
|
|||||||
domain: Domain,
|
domain: Domain,
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
return RuntimeError("Do not call `sample()` on grid.")
|
return RuntimeError("Do not call `sample()` on grid.")
|
||||||
|
|
||||||
@ -139,10 +203,13 @@ class Float(Domain):
|
|||||||
domain: "Float",
|
domain: "Float",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
assert domain.lower > float("-inf"), "Uniform needs a lower bound"
|
assert domain.lower > float("-inf"), "Uniform needs a lower bound"
|
||||||
assert domain.upper < float("inf"), "Uniform needs a upper bound"
|
assert domain.upper < float("inf"), "Uniform needs a upper bound"
|
||||||
items = np.random.uniform(domain.lower, domain.upper, size=size)
|
items = random_state.uniform(domain.lower, domain.upper, size=size)
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
class _LogUniform(LogUniform):
|
class _LogUniform(LogUniform):
|
||||||
@ -151,7 +218,10 @@ class Float(Domain):
|
|||||||
domain: "Float",
|
domain: "Float",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
||||||
assert (
|
assert (
|
||||||
0 < domain.upper < float("inf")
|
0 < domain.upper < float("inf")
|
||||||
@ -159,7 +229,7 @@ class Float(Domain):
|
|||||||
logmin = np.log(domain.lower) / np.log(self.base)
|
logmin = np.log(domain.lower) / np.log(self.base)
|
||||||
logmax = np.log(domain.upper) / np.log(self.base)
|
logmax = np.log(domain.upper) / np.log(self.base)
|
||||||
|
|
||||||
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
|
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
class _Normal(Normal):
|
class _Normal(Normal):
|
||||||
@ -168,14 +238,17 @@ class Float(Domain):
|
|||||||
domain: "Float",
|
domain: "Float",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
assert not domain.lower or domain.lower == float(
|
assert not domain.lower or domain.lower == float(
|
||||||
"-inf"
|
"-inf"
|
||||||
), "Normal sampling does not allow a lower value bound."
|
), "Normal sampling does not allow a lower value bound."
|
||||||
assert not domain.upper or domain.upper == float(
|
assert not domain.upper or domain.upper == float(
|
||||||
"inf"
|
"inf"
|
||||||
), "Normal sampling does not allow a upper value bound."
|
), "Normal sampling does not allow a upper value bound."
|
||||||
items = np.random.normal(self.mean, self.sd, size=size)
|
items = random_state.normal(self.mean, self.sd, size=size)
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
default_sampler_cls = _Uniform
|
default_sampler_cls = _Uniform
|
||||||
@ -262,8 +335,11 @@ class Integer(Domain):
|
|||||||
domain: "Integer",
|
domain: "Integer",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
items = np.random.randint(domain.lower, domain.upper, size=size)
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
|
items = random_state.integers(domain.lower, domain.upper, size=size)
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
class _LogUniform(LogUniform):
|
class _LogUniform(LogUniform):
|
||||||
@ -272,7 +348,10 @@ class Integer(Domain):
|
|||||||
domain: "Integer",
|
domain: "Integer",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
|
||||||
assert (
|
assert (
|
||||||
0 < domain.upper < float("inf")
|
0 < domain.upper < float("inf")
|
||||||
@ -280,8 +359,8 @@ class Integer(Domain):
|
|||||||
logmin = np.log(domain.lower) / np.log(self.base)
|
logmin = np.log(domain.lower) / np.log(self.base)
|
||||||
logmax = np.log(domain.upper) / np.log(self.base)
|
logmax = np.log(domain.upper) / np.log(self.base)
|
||||||
|
|
||||||
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
|
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
|
||||||
items = np.round(items).astype(int)
|
items = np.floor(items).astype(int)
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
default_sampler_cls = _Uniform
|
default_sampler_cls = _Uniform
|
||||||
@ -337,9 +416,11 @@ class Categorical(Domain):
|
|||||||
domain: "Categorical",
|
domain: "Categorical",
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
items = random.choices(domain.categories, k=size)
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
|
items = random_state.choice(domain.categories, size=size).tolist()
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
return items if len(items) > 1 else domain.cast(items[0])
|
||||||
|
|
||||||
default_sampler_cls = _Uniform
|
default_sampler_cls = _Uniform
|
||||||
@ -352,6 +433,11 @@ class Categorical(Domain):
|
|||||||
new.set_sampler(self._Uniform())
|
new.set_sampler(self._Uniform())
|
||||||
return new
|
return new
|
||||||
|
|
||||||
|
def grid(self):
|
||||||
|
new = copy(self)
|
||||||
|
new.set_sampler(Grid())
|
||||||
|
return new
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.categories)
|
return len(self.categories)
|
||||||
|
|
||||||
@ -381,8 +467,11 @@ class Quantized(Sampler):
|
|||||||
domain: Domain,
|
domain: Domain,
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
spec: Optional[Union[List[Dict], Dict]] = None,
|
||||||
size: int = 1,
|
size: int = 1,
|
||||||
|
random_state: "RandomState" = None,
|
||||||
):
|
):
|
||||||
values = self.sampler.sample(domain, spec, size)
|
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
|
||||||
|
random_state = _BackwardsCompatibleNumpyRng(random_state)
|
||||||
|
values = self.sampler.sample(domain, spec, size, random_state=random_state)
|
||||||
quantized = np.round(np.divide(values, self.q)) * self.q
|
quantized = np.round(np.divide(values, self.q)) * self.q
|
||||||
if not isinstance(quantized, np.ndarray):
|
if not isinstance(quantized, np.ndarray):
|
||||||
return domain.cast(quantized)
|
return domain.cast(quantized)
|
||||||
@ -462,10 +551,10 @@ def qloguniform(lower: float, upper: float, q: float, base: float = 10):
|
|||||||
return Float(lower, upper).loguniform(base).quantized(q)
|
return Float(lower, upper).loguniform(base).quantized(q)
|
||||||
|
|
||||||
|
|
||||||
def choice(categories: List):
|
def choice(categories: Sequence):
|
||||||
"""Sample a categorical value.
|
"""Sample a categorical value.
|
||||||
Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from
|
Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from
|
||||||
``random.choice([1, 2])``
|
``np.random.choice([1, 2])``
|
||||||
"""
|
"""
|
||||||
return Categorical(categories).uniform()
|
return Categorical(categories).uniform()
|
||||||
|
|
||||||
|
@ -7,13 +7,22 @@ try:
|
|||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from . import sample
|
from . import sample
|
||||||
from ..searcher.variant_generator import generate_variants
|
from ..searcher.variant_generator import generate_variants
|
||||||
from typing import Dict, Optional, Any, Tuple
|
from typing import Dict, Optional, Any, Tuple, Generator
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_variants_compatible(
|
||||||
|
unresolved_spec: Dict, constant_grid_search: bool = False, random_state=None
|
||||||
|
) -> Generator[Tuple[Dict, Dict], None, None]:
|
||||||
|
try:
|
||||||
|
return generate_variants(unresolved_spec, constant_grid_search, random_state)
|
||||||
|
except TypeError:
|
||||||
|
return generate_variants(unresolved_spec, constant_grid_search)
|
||||||
|
|
||||||
|
|
||||||
def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]:
|
def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]:
|
||||||
"""Define-by-run function to create the search space.
|
"""Define-by-run function to create the search space.
|
||||||
|
|
||||||
@ -417,7 +426,6 @@ def indexof(domain: Dict, config: Dict) -> int:
|
|||||||
return index
|
return index
|
||||||
if config in domain.categories:
|
if config in domain.categories:
|
||||||
return domain.categories.index(config)
|
return domain.categories.index(config)
|
||||||
# print(config)
|
|
||||||
for i, cat in enumerate(domain.categories):
|
for i, cat in enumerate(domain.categories):
|
||||||
if not isinstance(cat, dict):
|
if not isinstance(cat, dict):
|
||||||
continue
|
continue
|
||||||
@ -491,7 +499,9 @@ def complete_config(
|
|||||||
for key, value in space.items():
|
for key, value in space.items():
|
||||||
if key not in config:
|
if key not in config:
|
||||||
config[key] = value
|
config[key] = value
|
||||||
for _, generated in generate_variants({"config": config}):
|
for _, generated in generate_variants_compatible(
|
||||||
|
{"config": config}, random_state=flow2.rs_random
|
||||||
|
):
|
||||||
config = generated["config"]
|
config = generated["config"]
|
||||||
break
|
break
|
||||||
subspace = {}
|
subspace = {}
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = "0.9.1"
|
__version__ = "0.9.2"
|
||||||
|
@ -215,7 +215,7 @@ def test_multioutput():
|
|||||||
# predict
|
# predict
|
||||||
print(model.predict(X_test))
|
print(model.predict(X_test))
|
||||||
|
|
||||||
#train the model
|
# train the model
|
||||||
model = RegressorChain(AutoML(task="regression", time_budget=1))
|
model = RegressorChain(AutoML(task="regression", time_budget=1))
|
||||||
model.fit(X_train, y_train)
|
model.fit(X_train, y_train)
|
||||||
|
|
||||||
@ -223,6 +223,5 @@ def test_multioutput():
|
|||||||
print(model.predict(X_test))
|
print(model.predict(X_test))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
@ -38,7 +38,7 @@ class TestWarmStart(unittest.TestCase):
|
|||||||
starting_points = automl_experiment.best_config_per_estimator
|
starting_points = automl_experiment.best_config_per_estimator
|
||||||
print("starting_points", starting_points)
|
print("starting_points", starting_points)
|
||||||
print("loss of the starting_points", automl_experiment.best_loss_per_estimator)
|
print("loss of the starting_points", automl_experiment.best_loss_per_estimator)
|
||||||
starting_point = starting_points['lgbm']
|
starting_point = starting_points["lgbm"]
|
||||||
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
|
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
|
||||||
|
|
||||||
# 2. Constrct a new class:
|
# 2. Constrct a new class:
|
||||||
@ -55,17 +55,13 @@ class TestWarmStart(unittest.TestCase):
|
|||||||
# if an hp is specifed to be freezed, use tine value provided in the starting_point
|
# if an hp is specifed to be freezed, use tine value provided in the starting_point
|
||||||
# otherwise use the setting from the original search space
|
# otherwise use the setting from the original search space
|
||||||
if hp_name in starting_point:
|
if hp_name in starting_point:
|
||||||
space[hp_name] = {
|
space[hp_name] = {"domain": starting_point[hp_name]}
|
||||||
"domain": starting_point[hp_name]
|
|
||||||
}
|
|
||||||
# (3.1) Configure the search space for hps that are in the original search space
|
# (3.1) Configure the search space for hps that are in the original search space
|
||||||
# but you want to change something, for example the range.
|
# but you want to change something, for example the range.
|
||||||
revised_hps_to_search = {
|
revised_hps_to_search = {
|
||||||
"n_estimators": {
|
"n_estimators": {
|
||||||
"domain": tune.lograndint(lower=10, upper=32768),
|
"domain": tune.lograndint(lower=10, upper=32768),
|
||||||
"init_value": starting_point.get(
|
"init_value": starting_point.get("n_estimators")
|
||||||
"n_estimators"
|
|
||||||
)
|
|
||||||
or space["n_estimators"].get("init_value", 10),
|
or space["n_estimators"].get("init_value", 10),
|
||||||
"low_cost_init_value": space["n_estimators"].get(
|
"low_cost_init_value": space["n_estimators"].get(
|
||||||
"low_cost_init_value", 10
|
"low_cost_init_value", 10
|
||||||
@ -73,9 +69,7 @@ class TestWarmStart(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
"num_leaves": {
|
"num_leaves": {
|
||||||
"domain": tune.lograndint(lower=10, upper=3276),
|
"domain": tune.lograndint(lower=10, upper=3276),
|
||||||
"init_value": starting_point.get(
|
"init_value": starting_point.get("num_leaves")
|
||||||
"num_leaves"
|
|
||||||
)
|
|
||||||
or space["num_leaves"].get("init_value", 10),
|
or space["num_leaves"].get("init_value", 10),
|
||||||
"low_cost_init_value": space["num_leaves"].get(
|
"low_cost_init_value": space["num_leaves"].get(
|
||||||
"low_cost_init_value", 10
|
"low_cost_init_value", 10
|
||||||
|
36
test/rep.py
Normal file
36
test/rep.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from flaml.data import load_openml_dataset
|
||||||
|
from flaml.ml import ExtraTreesEstimator
|
||||||
|
from flaml import AutoML
|
||||||
|
|
||||||
|
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
|
||||||
|
X_train = X_train.iloc[:1000]
|
||||||
|
y_train = y_train.iloc[:1000]
|
||||||
|
|
||||||
|
|
||||||
|
class ExtraTreesEstimatorSeeded(ExtraTreesEstimator):
|
||||||
|
"""ExtraTreesEstimator for reproducible FLAML run."""
|
||||||
|
|
||||||
|
def config2params(self, config: dict) -> dict:
|
||||||
|
params = super().config2params(config)
|
||||||
|
params["random_state"] = 0
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
settings = {
|
||||||
|
"time_budget": 1e10, # total running time in seconds
|
||||||
|
"max_iter": 3,
|
||||||
|
"metric": "ap", # average_precision
|
||||||
|
"task": "classification", # task type
|
||||||
|
"seed": 7654321, # random seed
|
||||||
|
"estimator_list": ["extra_trees_seeded"],
|
||||||
|
"verbose": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
for trial_num in range(8):
|
||||||
|
automl = AutoML()
|
||||||
|
automl.add_learner(
|
||||||
|
learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded
|
||||||
|
)
|
||||||
|
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||||
|
print(automl.best_loss)
|
||||||
|
print(automl.best_config)
|
@ -11,8 +11,8 @@ def test_package_minimum():
|
|||||||
# Specify automl goal and constraint
|
# Specify automl goal and constraint
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 10, # in seconds
|
"time_budget": 10, # in seconds
|
||||||
"metric": 'accuracy',
|
"metric": "accuracy",
|
||||||
"task": 'classification',
|
"task": "classification",
|
||||||
"log_file_name": "iris.log",
|
"log_file_name": "iris.log",
|
||||||
}
|
}
|
||||||
X_train, y_train = load_iris(return_X_y=True)
|
X_train, y_train = load_iris(return_X_y=True)
|
||||||
@ -27,4 +27,3 @@ def test_package_minimum():
|
|||||||
preds = automl.predict_proba(X_train)
|
preds = automl.predict_proba(X_train)
|
||||||
assert preds.shape == (150, 3)
|
assert preds.shape == (150, 3)
|
||||||
print(preds)
|
print(preds)
|
||||||
|
|
||||||
|
@ -436,13 +436,12 @@ analysis = tune.run(
|
|||||||
|
|
||||||
### Reproducibility
|
### Reproducibility
|
||||||
|
|
||||||
By default, there is randomness in our tuning process. If reproducibility is desired, you could
|
By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
||||||
manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
|
||||||
With this random seed, running the following code multiple times will generate exactly the same search trajectory.
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(100)
|
np.random.seed(100) # This line is not needed starting from version v0.9.1.
|
||||||
analysis = tune.run(
|
analysis = tune.run(
|
||||||
simple_obj,
|
simple_obj,
|
||||||
config=config_search_space,
|
config=config_search_space,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user