reproducibility for random sampling (#349)

* reproducibility for random sampling #236

* doc update
This commit is contained in:
Chi Wang 2021-12-22 12:12:25 -08:00 committed by GitHub
parent ee3162e232
commit 0b25e89f29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 316 additions and 145 deletions

View File

@ -542,7 +542,7 @@ class AutoML(BaseEstimator):
new_automl.fit(X_train, y_train, starting_points=starting_points)
```
seed: int or None, default=None | The random seed for np.random.
seed: int or None, default=None | The random seed for hpo.
n_concurrent_trials: [Experimental] int, default=1 | The number of
concurrent trials. For n_concurrent_trials > 1, installation of
ray is required: `pip install flaml[ray]`.
@ -1845,7 +1845,7 @@ class AutoML(BaseEstimator):
new_automl.fit(X_train, y_train, starting_points=starting_points)
```
seed: int or None, default=None | The random seed for np.random.
seed: int or None, default=None | The random seed for hpo.
n_concurrent_trials: [Experimental] int, default=1 | The number of
concurrent trials. For n_concurrent_trials > 1, installation of
ray is required: `pip install flaml[ray]`.
@ -1949,13 +1949,10 @@ class AutoML(BaseEstimator):
)
self._search_states = {} # key: estimator name; value: SearchState
self._random = np.random.RandomState(RANDOM_SEED)
if seed is not None:
np.random.seed(seed)
self._seed = seed + 19823 if seed is not None else 20
self._seed = seed if seed is not None else 20
self._learner_selector = learner_selector
old_level = logger.getEffectiveLevel()
self.verbose = verbose
# if verbose == 0:
logger.setLevel(50 - verbose * 10)
if (not mlflow or not mlflow.active_run()) and not logger.handlers:
# Add the console handler.

View File

@ -1,7 +1,6 @@
import argparse
from dataclasses import dataclass, field
from typing import Dict, Any
from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS

View File

@ -113,7 +113,7 @@ class BlendSearch(Searcher):
"For cost-frugal search, "
"consider providing low-cost values for cost-related hps via "
"'low_cost_partial_config'. More info can be found at "
"https://github.com/microsoft/FLAML/wiki/About-%60low_cost_partial_config%60"
"https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune"
)
if evaluated_rewards and mode:
self._points_to_evaluate = []

View File

@ -2,27 +2,28 @@
# * Copyright (c) Microsoft Corporation. All rights reserved.
# * Licensed under the MIT License. See LICENSE file in the
# * project root for license information.
from flaml.tune.sample import Domain
from typing import Dict, Optional, Tuple
import numpy as np
import logging
try:
from ray import __version__ as ray_version
assert ray_version >= "1.0.0"
from ray.tune.suggest import Searcher
from ray.tune.suggest.variant_generator import generate_variants
from ray.tune import sample
from ray.tune.utils.util import flatten_dict, unflatten_dict
except (ImportError, AssertionError):
from .suggestion import Searcher
from .variant_generator import generate_variants
from ..tune import sample
from ..tune.trial import flatten_dict, unflatten_dict
from ..tune.space import complete_config, denormalize, normalize
import logging
from flaml.tune.sample import _BackwardsCompatibleNumpyRng
from ..tune.space import (
complete_config,
denormalize,
normalize,
generate_variants_compatible,
)
logger = logging.getLogger(__name__)
@ -84,6 +85,7 @@ class FLOW2(Searcher):
self.space = space or {}
self._space = flatten_dict(self.space, prevent_delimiter=True)
self._random = np.random.RandomState(seed)
self.rs_random = _BackwardsCompatibleNumpyRng(seed + 19823)
self.seed = seed
self.init_config = init_config
self.best_config = flatten_dict(init_config)
@ -464,8 +466,8 @@ class FLOW2(Searcher):
# random
for i, key in enumerate(self._tunable_keys):
if self._direction_tried[i] != 0:
for _, generated in generate_variants(
{"config": {key: self._space[key]}}
for _, generated in generate_variants_compatible(
{"config": {key: self._space[key]}}, random_state=self.rs_random
):
if generated["config"][key] != best_config[key]:
config[key] = generated["config"][key]

View File

@ -178,7 +178,7 @@ class ConcurrencyLimiter(Searcher):
batch (bool): Whether to wait for all concurrent samples
to finish before updating the underlying searcher.
Example:
```python
```python
from ray.tune.suggest import ConcurrencyLimiter
search_alg = HyperOptSearch(metric="accuracy")
search_alg = ConcurrencyLimiter(search_alg, max_concurrent=2)
@ -366,81 +366,81 @@ class _OptunaTrialSuggestCaptor:
class OptunaSearch(Searcher):
"""A wrapper around Optuna to provide trial suggestions.
[Optuna](https://optuna.org/)
is a hyperparameter optimization library.
In contrast to other libraries, it employs define-by-run style
hyperparameter definitions.
This Searcher is a thin wrapper around Optuna's search algorithms.
You can pass any Optuna sampler, which will be used to generate
hyperparameter suggestions.
Args:
space (dict|Callable): Hyperparameter search space definition for
Optuna's sampler. This can be either a class `dict` with
parameter names as keys and ``optuna.distributions`` as values,
or a Callable - in which case, it should be a define-by-run
function using ``optuna.trial`` to obtain the hyperparameter
values. The function should return either a class `dict` of
constant values with names as keys, or None.
For more information, see
[tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html).
Warning - No actual computation should take place in the define-by-run
function. Instead, put the training logic inside the function
or class trainable passed to tune.run.
metric (str): The training result objective value attribute. If None
but a mode was passed, the anonymous metric `_metric` will be used
per default.
mode (str): One of {min, max}. Determines whether objective is
minimizing or maximizing the metric attribute.
points_to_evaluate (list): Initial parameter suggestions to be run
first. This is for when you already have some good parameters
you want to run first to help the algorithm make better suggestions
for future parameters. Needs to be a list of dicts containing the
configurations.
sampler (optuna.samplers.BaseSampler): Optuna sampler used to
draw hyperparameter configurations. Defaults to ``TPESampler``.
seed (int): Seed to initialize sampler with. This parameter is only
used when ``sampler=None``. In all other cases, the sampler
you pass should be initialized with the seed already.
evaluated_rewards (list): If you have previously evaluated the
parameters passed in as points_to_evaluate you can avoid
re-running those trials by passing in the reward attributes
as a list so the optimiser can be told the results without
needing to re-compute the trial. Must be the same length as
points_to_evaluate.
[Optuna](https://optuna.org/)
is a hyperparameter optimization library.
In contrast to other libraries, it employs define-by-run style
hyperparameter definitions.
This Searcher is a thin wrapper around Optuna's search algorithms.
You can pass any Optuna sampler, which will be used to generate
hyperparameter suggestions.
Args:
space (dict|Callable): Hyperparameter search space definition for
Optuna's sampler. This can be either a class `dict` with
parameter names as keys and ``optuna.distributions`` as values,
or a Callable - in which case, it should be a define-by-run
function using ``optuna.trial`` to obtain the hyperparameter
values. The function should return either a class `dict` of
constant values with names as keys, or None.
For more information, see
[tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html).
Warning - No actual computation should take place in the define-by-run
function. Instead, put the training logic inside the function
or class trainable passed to tune.run.
metric (str): The training result objective value attribute. If None
but a mode was passed, the anonymous metric `_metric` will be used
per default.
mode (str): One of {min, max}. Determines whether objective is
minimizing or maximizing the metric attribute.
points_to_evaluate (list): Initial parameter suggestions to be run
first. This is for when you already have some good parameters
you want to run first to help the algorithm make better suggestions
for future parameters. Needs to be a list of dicts containing the
configurations.
sampler (optuna.samplers.BaseSampler): Optuna sampler used to
draw hyperparameter configurations. Defaults to ``TPESampler``.
seed (int): Seed to initialize sampler with. This parameter is only
used when ``sampler=None``. In all other cases, the sampler
you pass should be initialized with the seed already.
evaluated_rewards (list): If you have previously evaluated the
parameters passed in as points_to_evaluate you can avoid
re-running those trials by passing in the reward attributes
as a list so the optimiser can be told the results without
needing to re-compute the trial. Must be the same length as
points_to_evaluate.
Tune automatically converts search spaces to Optuna's format:
Tune automatically converts search spaces to Optuna's format:
````python
from ray.tune.suggest.optuna import OptunaSearch
config = { "a": tune.uniform(6, 8),
"b": tune.loguniform(1e-4, 1e-2)}
optuna_search = OptunaSearch(metric="loss", mode="min")
tune.run(trainable, config=config, search_alg=optuna_search)
````
````python
from ray.tune.suggest.optuna import OptunaSearch
config = { "a": tune.uniform(6, 8),
"b": tune.loguniform(1e-4, 1e-2)}
optuna_search = OptunaSearch(metric="loss", mode="min")
tune.run(trainable, config=config, search_alg=optuna_search)
````
If you would like to pass the search space manually, the code would
look like this:
If you would like to pass the search space manually, the code would
look like this:
```python
from ray.tune.suggest.optuna import OptunaSearch
import optuna
config = { "a": optuna.distributions.UniformDistribution(6, 8),
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
optuna_search = OptunaSearch(space,metric="loss",mode="min")
tune.run(trainable, search_alg=optuna_search)
# Equivalent Optuna define-by-run function approach:
def define_search_space(trial: optuna.Trial):
trial.suggest_float("a", 6, 8)
trial.suggest_float("b", 1e-4, 1e-2, log=True)
# training logic goes into trainable, this is just
# for search space definition
optuna_search = OptunaSearch(
define_search_space,
metric="loss",
mode="min")
tune.run(trainable, search_alg=optuna_search)
.. versionadded:: 0.8.8
```
```python
from ray.tune.suggest.optuna import OptunaSearch
import optuna
config = { "a": optuna.distributions.UniformDistribution(6, 8),
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)}
optuna_search = OptunaSearch(space,metric="loss",mode="min")
tune.run(trainable, search_alg=optuna_search)
# Equivalent Optuna define-by-run function approach:
def define_search_space(trial: optuna.Trial):
trial.suggest_float("a", 6, 8)
trial.suggest_float("b", 1e-4, 1e-2, log=True)
# training logic goes into trainable, this is just
# for search space definition
optuna_search = OptunaSearch(
define_search_space,
metric="loss",
mode="min")
tune.run(trainable, search_alg=optuna_search)
.. versionadded:: 0.8.8
```
"""

View File

@ -18,11 +18,9 @@
import copy
import logging
from typing import Any, Dict, Generator, List, Tuple
import numpy
import random
from ..tune.sample import Categorical, Domain
from ..tune.sample import Categorical, Domain, RandomState
logger = logging.getLogger(__name__)
@ -35,6 +33,8 @@ class TuneError(Exception):
def generate_variants(
unresolved_spec: Dict,
constant_grid_search: bool = False,
random_state: "RandomState" = None,
) -> Generator[Tuple[Dict, Dict], None, None]:
"""Generates variants from a spec (dict) with unresolved values.
There are two types of unresolved values:
@ -43,14 +43,25 @@ def generate_variants(
variants in combination:
"activation": grid_search(["relu", "tanh"])
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
Lambda functions: These are evaluated to produce a concrete value, and
can express dependencies or conditional distributions between values.
They can also be used to express random search (e.g., by calling
into the `random` or `np` module).
"cpu": lambda spec: spec.config.num_workers
"batch_size": lambda spec: random.uniform(1, 1000)
Finally, to support defining specs in plain JSON / YAML, grid search
can also be defined alternatively as follows:
and lambda functions can also be defined alternatively as follows:
"activation": {"grid_search": ["relu", "tanh"]}
"cpu": {"eval": "spec.config.num_workers"}
Use `format_vars` to format the returned dict of hyperparameters.
Yields:
(Dict of resolved variables, Spec object)
"""
for resolved_vars, spec in _generate_variants(unresolved_spec):
for resolved_vars, spec in _generate_variants(
unresolved_spec,
constant_grid_search=constant_grid_search,
random_state=random_state,
):
assert not _unresolved_values(spec)
yield resolved_vars, spec
@ -93,7 +104,9 @@ def parse_spec_vars(
return resolved_vars, domain_vars, grid_vars
def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
def _generate_variants(
spec: Dict, constant_grid_search: bool = False, random_state: "RandomState" = None
) -> Tuple[Dict, Dict]:
spec = copy.deepcopy(spec)
_, domain_vars, grid_vars = parse_spec_vars(spec)
@ -101,10 +114,34 @@ def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]:
yield {}, spec
return
# Variables to resolve
to_resolve = domain_vars
all_resolved = True
if constant_grid_search:
# In this path, we first sample random variables and keep them constant
# for grid search.
# `_resolve_domain_vars` will alter `spec` directly
all_resolved, resolved_vars = _resolve_domain_vars(
spec, domain_vars, allow_fail=True, random_state=random_state
)
if not all_resolved:
# Not all variables have been resolved, but remove those that have
# from the `to_resolve` list.
to_resolve = [(r, d) for r, d in to_resolve if r not in resolved_vars]
grid_search = _grid_search_generator(spec, grid_vars)
for resolved_spec in grid_search:
resolved_vars = _resolve_domain_vars(resolved_spec, domain_vars)
for resolved, spec in _generate_variants(resolved_spec):
if not constant_grid_search or not all_resolved:
# In this path, we sample the remaining random variables
_, resolved_vars = _resolve_domain_vars(
resolved_spec, to_resolve, random_state=random_state
)
for resolved, spec in _generate_variants(
resolved_spec,
constant_grid_search=constant_grid_search,
random_state=random_state,
):
for path, value in grid_vars:
resolved_vars[path] = _get_value(spec, path)
for k, v in resolved.items():
@ -134,7 +171,12 @@ def _get_value(spec: Dict, path: Tuple) -> Any:
return spec
def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> Dict:
def _resolve_domain_vars(
spec: Dict,
domain_vars: List[Tuple[Tuple, Domain]],
allow_fail: bool = False,
random_state: "RandomState" = None,
) -> Tuple[bool, Dict]:
resolved = {}
error = True
num_passes = 0
@ -145,7 +187,9 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
if path in resolved:
continue
try:
value = domain.sample(_UnresolvedAccessGuard(spec))
value = domain.sample(
_UnresolvedAccessGuard(spec), random_state=random_state
)
except RecursiveDependencyError as e:
error = e
except Exception:
@ -156,8 +200,11 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) ->
assign_value(spec, path, value)
resolved[path] = value
if error:
raise error
return resolved
if not allow_fail:
raise error
else:
return False, resolved
return True, resolved
def _grid_search_generator(

View File

@ -12,21 +12,76 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# This source file is included here because ray does not fully support Windows.
# This source file is adapted here because ray does not fully support Windows.
# Copyright (c) Microsoft Corporation.
import logging
import random
from copy import copy
from inspect import signature
from math import isclose
from typing import Any, Callable, Dict, List, Optional, Sequence, Union
from typing import Any, Dict, List, Optional, Sequence, Union
import numpy as np
# Backwards compatibility
try:
# Added in numpy>=1.17 but we require numpy>=1.16
np_random_generator = np.random.Generator
LEGACY_RNG = False
except AttributeError:
class np_random_generator:
pass
LEGACY_RNG = True
logger = logging.getLogger(__name__)
class _BackwardsCompatibleNumpyRng:
"""Thin wrapper to ensure backwards compatibility between
new and old numpy randomness generators.
"""
_rng = None
def __init__(
self,
generator_or_seed: Optional[
Union["np_random_generator", np.random.RandomState, int]
] = None,
):
if generator_or_seed is None or isinstance(
generator_or_seed, (np.random.RandomState, np_random_generator)
):
self._rng = generator_or_seed
elif LEGACY_RNG:
self._rng = np.random.RandomState(generator_or_seed)
else:
self._rng = np.random.default_rng(generator_or_seed)
@property
def legacy_rng(self) -> bool:
return not isinstance(self._rng, np_random_generator)
@property
def rng(self):
# don't set self._rng to np.random to avoid picking issues
return self._rng if self._rng is not None else np.random
def __getattr__(self, name: str) -> Any:
# https://numpy.org/doc/stable/reference/random/new-or-different.html
if self.legacy_rng:
if name == "integers":
name = "randint"
elif name == "random":
name = "rand"
return getattr(self.rng, name)
RandomState = Union[
None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int
]
class Domain:
"""Base class to specify a type and valid range to sample parameters from.
This base class is implemented by parameter spaces, like float ranges
@ -61,9 +116,16 @@ class Domain:
sampler = self.default_sampler_cls()
return sampler
def sample(self, spec=None, size=1):
def sample(
self,
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
sampler = self.get_sampler()
return sampler.sample(self, spec=spec, size=size)
return sampler.sample(self, spec=spec, size=size, random_state=random_state)
def is_grid(self):
return isinstance(self.sampler, Grid)
@ -86,6 +148,7 @@ class Sampler:
domain: Domain,
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
raise NotImplementedError
@ -128,6 +191,7 @@ class Grid(Sampler):
domain: Domain,
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
return RuntimeError("Do not call `sample()` on grid.")
@ -139,10 +203,13 @@ class Float(Domain):
domain: "Float",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
assert domain.lower > float("-inf"), "Uniform needs a lower bound"
assert domain.upper < float("inf"), "Uniform needs a upper bound"
items = np.random.uniform(domain.lower, domain.upper, size=size)
items = random_state.uniform(domain.lower, domain.upper, size=size)
return items if len(items) > 1 else domain.cast(items[0])
class _LogUniform(LogUniform):
@ -151,7 +218,10 @@ class Float(Domain):
domain: "Float",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
assert (
0 < domain.upper < float("inf")
@ -159,7 +229,7 @@ class Float(Domain):
logmin = np.log(domain.lower) / np.log(self.base)
logmax = np.log(domain.upper) / np.log(self.base)
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
return items if len(items) > 1 else domain.cast(items[0])
class _Normal(Normal):
@ -168,14 +238,17 @@ class Float(Domain):
domain: "Float",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
assert not domain.lower or domain.lower == float(
"-inf"
), "Normal sampling does not allow a lower value bound."
assert not domain.upper or domain.upper == float(
"inf"
), "Normal sampling does not allow a upper value bound."
items = np.random.normal(self.mean, self.sd, size=size)
items = random_state.normal(self.mean, self.sd, size=size)
return items if len(items) > 1 else domain.cast(items[0])
default_sampler_cls = _Uniform
@ -262,8 +335,11 @@ class Integer(Domain):
domain: "Integer",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
items = np.random.randint(domain.lower, domain.upper, size=size)
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
items = random_state.integers(domain.lower, domain.upper, size=size)
return items if len(items) > 1 else domain.cast(items[0])
class _LogUniform(LogUniform):
@ -272,7 +348,10 @@ class Integer(Domain):
domain: "Integer",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
assert domain.lower > 0, "LogUniform needs a lower bound greater than 0"
assert (
0 < domain.upper < float("inf")
@ -280,8 +359,8 @@ class Integer(Domain):
logmin = np.log(domain.lower) / np.log(self.base)
logmax = np.log(domain.upper) / np.log(self.base)
items = self.base ** (np.random.uniform(logmin, logmax, size=size))
items = np.round(items).astype(int)
items = self.base ** (random_state.uniform(logmin, logmax, size=size))
items = np.floor(items).astype(int)
return items if len(items) > 1 else domain.cast(items[0])
default_sampler_cls = _Uniform
@ -337,9 +416,11 @@ class Categorical(Domain):
domain: "Categorical",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
items = random.choices(domain.categories, k=size)
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
items = random_state.choice(domain.categories, size=size).tolist()
return items if len(items) > 1 else domain.cast(items[0])
default_sampler_cls = _Uniform
@ -352,6 +433,11 @@ class Categorical(Domain):
new.set_sampler(self._Uniform())
return new
def grid(self):
new = copy(self)
new.set_sampler(Grid())
return new
def __len__(self):
return len(self.categories)
@ -381,8 +467,11 @@ class Quantized(Sampler):
domain: Domain,
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1,
random_state: "RandomState" = None,
):
values = self.sampler.sample(domain, spec, size)
if not isinstance(random_state, _BackwardsCompatibleNumpyRng):
random_state = _BackwardsCompatibleNumpyRng(random_state)
values = self.sampler.sample(domain, spec, size, random_state=random_state)
quantized = np.round(np.divide(values, self.q)) * self.q
if not isinstance(quantized, np.ndarray):
return domain.cast(quantized)
@ -462,10 +551,10 @@ def qloguniform(lower: float, upper: float, q: float, base: float = 10):
return Float(lower, upper).loguniform(base).quantized(q)
def choice(categories: List):
def choice(categories: Sequence):
"""Sample a categorical value.
Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from
``random.choice([1, 2])``
``np.random.choice([1, 2])``
"""
return Categorical(categories).uniform()

View File

@ -7,13 +7,22 @@ try:
except (ImportError, AssertionError):
from . import sample
from ..searcher.variant_generator import generate_variants
from typing import Dict, Optional, Any, Tuple
from typing import Dict, Optional, Any, Tuple, Generator
import numpy as np
import logging
logger = logging.getLogger(__name__)
def generate_variants_compatible(
unresolved_spec: Dict, constant_grid_search: bool = False, random_state=None
) -> Generator[Tuple[Dict, Dict], None, None]:
try:
return generate_variants(unresolved_spec, constant_grid_search, random_state)
except TypeError:
return generate_variants(unresolved_spec, constant_grid_search)
def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]:
"""Define-by-run function to create the search space.
@ -417,7 +426,6 @@ def indexof(domain: Dict, config: Dict) -> int:
return index
if config in domain.categories:
return domain.categories.index(config)
# print(config)
for i, cat in enumerate(domain.categories):
if not isinstance(cat, dict):
continue
@ -491,7 +499,9 @@ def complete_config(
for key, value in space.items():
if key not in config:
config[key] = value
for _, generated in generate_variants({"config": config}):
for _, generated in generate_variants_compatible(
{"config": config}, random_state=flow2.rs_random
):
config = generated["config"]
break
subspace = {}

View File

@ -1 +1 @@
__version__ = "0.9.1"
__version__ = "0.9.2"

View File

@ -214,15 +214,14 @@ def test_multioutput():
# predict
print(model.predict(X_test))
#train the model
# train the model
model = RegressorChain(AutoML(task="regression", time_budget=1))
model.fit(X_train, y_train)
# predict
print(model.predict(X_test))
if __name__ == "__main__":
unittest.main()

View File

@ -38,7 +38,7 @@ class TestWarmStart(unittest.TestCase):
starting_points = automl_experiment.best_config_per_estimator
print("starting_points", starting_points)
print("loss of the starting_points", automl_experiment.best_loss_per_estimator)
starting_point = starting_points['lgbm']
starting_point = starting_points["lgbm"]
hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"]
# 2. Constrct a new class:
@ -55,17 +55,13 @@ class TestWarmStart(unittest.TestCase):
# if an hp is specifed to be freezed, use tine value provided in the starting_point
# otherwise use the setting from the original search space
if hp_name in starting_point:
space[hp_name] = {
"domain": starting_point[hp_name]
}
space[hp_name] = {"domain": starting_point[hp_name]}
# (3.1) Configure the search space for hps that are in the original search space
# but you want to change something, for example the range.
revised_hps_to_search = {
"n_estimators": {
"domain": tune.lograndint(lower=10, upper=32768),
"init_value": starting_point.get(
"n_estimators"
)
"init_value": starting_point.get("n_estimators")
or space["n_estimators"].get("init_value", 10),
"low_cost_init_value": space["n_estimators"].get(
"low_cost_init_value", 10
@ -73,9 +69,7 @@ class TestWarmStart(unittest.TestCase):
},
"num_leaves": {
"domain": tune.lograndint(lower=10, upper=3276),
"init_value": starting_point.get(
"num_leaves"
)
"init_value": starting_point.get("num_leaves")
or space["num_leaves"].get("init_value", 10),
"low_cost_init_value": space["num_leaves"].get(
"low_cost_init_value", 10
@ -95,7 +89,7 @@ class TestWarmStart(unittest.TestCase):
new_automl_experiment.add_learner(
learner_name=new_estimator_name, learner_class=MyPartiallyFreezedLargeLGBM
)
automl_settings_resume = {
"time_budget": 3,
"metric": "accuracy",

36
test/rep.py Normal file
View File

@ -0,0 +1,36 @@
from flaml.data import load_openml_dataset
from flaml.ml import ExtraTreesEstimator
from flaml import AutoML
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./")
X_train = X_train.iloc[:1000]
y_train = y_train.iloc[:1000]
class ExtraTreesEstimatorSeeded(ExtraTreesEstimator):
"""ExtraTreesEstimator for reproducible FLAML run."""
def config2params(self, config: dict) -> dict:
params = super().config2params(config)
params["random_state"] = 0
return params
settings = {
"time_budget": 1e10, # total running time in seconds
"max_iter": 3,
"metric": "ap", # average_precision
"task": "classification", # task type
"seed": 7654321, # random seed
"estimator_list": ["extra_trees_seeded"],
"verbose": False,
}
for trial_num in range(8):
automl = AutoML()
automl.add_learner(
learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded
)
automl.fit(X_train=X_train, y_train=y_train, **settings)
print(automl.best_loss)
print(automl.best_config)

View File

@ -11,8 +11,8 @@ def test_package_minimum():
# Specify automl goal and constraint
automl_settings = {
"time_budget": 10, # in seconds
"metric": 'accuracy',
"task": 'classification',
"metric": "accuracy",
"task": "classification",
"log_file_name": "iris.log",
}
X_train, y_train = load_iris(return_X_y=True)
@ -27,4 +27,3 @@ def test_package_minimum():
preds = automl.predict_proba(X_train)
assert preds.shape == (150, 3)
print(preds)

View File

@ -436,13 +436,12 @@ analysis = tune.run(
### Reproducibility
By default, there is randomness in our tuning process. If reproducibility is desired, you could
manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
With this random seed, running the following code multiple times will generate exactly the same search trajectory.
By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
```python
import numpy as np
np.random.seed(100)
np.random.seed(100) # This line is not needed starting from version v0.9.1.
analysis = tune.run(
simple_obj,
config=config_search_space,