mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-25 23:10:01 +00:00 
			
		
		
		
	reproducibility for random sampling (#349)
* reproducibility for random sampling #236 * doc update
This commit is contained in:
		
							parent
							
								
									ee3162e232
								
							
						
					
					
						commit
						0b25e89f29
					
				| @ -542,7 +542,7 @@ class AutoML(BaseEstimator): | |||||||
|         new_automl.fit(X_train, y_train, starting_points=starting_points) |         new_automl.fit(X_train, y_train, starting_points=starting_points) | ||||||
|         ``` |         ``` | ||||||
| 
 | 
 | ||||||
|             seed: int or None, default=None | The random seed for np.random. |             seed: int or None, default=None | The random seed for hpo. | ||||||
|             n_concurrent_trials: [Experimental] int, default=1 | The number of |             n_concurrent_trials: [Experimental] int, default=1 | The number of | ||||||
|                 concurrent trials. For n_concurrent_trials > 1, installation of |                 concurrent trials. For n_concurrent_trials > 1, installation of | ||||||
|                 ray is required: `pip install flaml[ray]`. |                 ray is required: `pip install flaml[ray]`. | ||||||
| @ -1845,7 +1845,7 @@ class AutoML(BaseEstimator): | |||||||
|         new_automl.fit(X_train, y_train, starting_points=starting_points) |         new_automl.fit(X_train, y_train, starting_points=starting_points) | ||||||
|         ``` |         ``` | ||||||
| 
 | 
 | ||||||
|             seed: int or None, default=None | The random seed for np.random. |             seed: int or None, default=None | The random seed for hpo. | ||||||
|             n_concurrent_trials: [Experimental] int, default=1 | The number of |             n_concurrent_trials: [Experimental] int, default=1 | The number of | ||||||
|                 concurrent trials. For n_concurrent_trials > 1, installation of |                 concurrent trials. For n_concurrent_trials > 1, installation of | ||||||
|                 ray is required: `pip install flaml[ray]`. |                 ray is required: `pip install flaml[ray]`. | ||||||
| @ -1949,13 +1949,10 @@ class AutoML(BaseEstimator): | |||||||
|         ) |         ) | ||||||
|         self._search_states = {}  # key: estimator name; value: SearchState |         self._search_states = {}  # key: estimator name; value: SearchState | ||||||
|         self._random = np.random.RandomState(RANDOM_SEED) |         self._random = np.random.RandomState(RANDOM_SEED) | ||||||
|         if seed is not None: |         self._seed = seed if seed is not None else 20 | ||||||
|             np.random.seed(seed) |  | ||||||
|         self._seed = seed + 19823 if seed is not None else 20 |  | ||||||
|         self._learner_selector = learner_selector |         self._learner_selector = learner_selector | ||||||
|         old_level = logger.getEffectiveLevel() |         old_level = logger.getEffectiveLevel() | ||||||
|         self.verbose = verbose |         self.verbose = verbose | ||||||
|         # if verbose == 0: |  | ||||||
|         logger.setLevel(50 - verbose * 10) |         logger.setLevel(50 - verbose * 10) | ||||||
|         if (not mlflow or not mlflow.active_run()) and not logger.handlers: |         if (not mlflow or not mlflow.active_run()) and not logger.handlers: | ||||||
|             # Add the console handler. |             # Add the console handler. | ||||||
|  | |||||||
| @ -1,7 +1,6 @@ | |||||||
| import argparse | import argparse | ||||||
| from dataclasses import dataclass, field | from dataclasses import dataclass, field | ||||||
| from typing import Dict, Any | from typing import Dict, Any | ||||||
| 
 |  | ||||||
| from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS | from ..data import SUMMARIZATION, SEQREGRESSION, SEQCLASSIFICATION, NLG_TASKS | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -113,7 +113,7 @@ class BlendSearch(Searcher): | |||||||
|                 "For cost-frugal search, " |                 "For cost-frugal search, " | ||||||
|                 "consider providing low-cost values for cost-related hps via " |                 "consider providing low-cost values for cost-related hps via " | ||||||
|                 "'low_cost_partial_config'. More info can be found at " |                 "'low_cost_partial_config'. More info can be found at " | ||||||
|                 "https://github.com/microsoft/FLAML/wiki/About-%60low_cost_partial_config%60" |                 "https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune" | ||||||
|             ) |             ) | ||||||
|         if evaluated_rewards and mode: |         if evaluated_rewards and mode: | ||||||
|             self._points_to_evaluate = [] |             self._points_to_evaluate = [] | ||||||
|  | |||||||
| @ -2,27 +2,28 @@ | |||||||
| #  * Copyright (c) Microsoft Corporation. All rights reserved. | #  * Copyright (c) Microsoft Corporation. All rights reserved. | ||||||
| #  * Licensed under the MIT License. See LICENSE file in the | #  * Licensed under the MIT License. See LICENSE file in the | ||||||
| #  * project root for license information. | #  * project root for license information. | ||||||
| from flaml.tune.sample import Domain |  | ||||||
| from typing import Dict, Optional, Tuple | from typing import Dict, Optional, Tuple | ||||||
| import numpy as np | import numpy as np | ||||||
|  | import logging | ||||||
| 
 | 
 | ||||||
| try: | try: | ||||||
|     from ray import __version__ as ray_version |     from ray import __version__ as ray_version | ||||||
| 
 | 
 | ||||||
|     assert ray_version >= "1.0.0" |     assert ray_version >= "1.0.0" | ||||||
|     from ray.tune.suggest import Searcher |     from ray.tune.suggest import Searcher | ||||||
|     from ray.tune.suggest.variant_generator import generate_variants |  | ||||||
|     from ray.tune import sample |     from ray.tune import sample | ||||||
|     from ray.tune.utils.util import flatten_dict, unflatten_dict |     from ray.tune.utils.util import flatten_dict, unflatten_dict | ||||||
| except (ImportError, AssertionError): | except (ImportError, AssertionError): | ||||||
|     from .suggestion import Searcher |     from .suggestion import Searcher | ||||||
|     from .variant_generator import generate_variants |  | ||||||
|     from ..tune import sample |     from ..tune import sample | ||||||
|     from ..tune.trial import flatten_dict, unflatten_dict |     from ..tune.trial import flatten_dict, unflatten_dict | ||||||
| from ..tune.space import complete_config, denormalize, normalize | from flaml.tune.sample import _BackwardsCompatibleNumpyRng | ||||||
| 
 | from ..tune.space import ( | ||||||
| 
 |     complete_config, | ||||||
| import logging |     denormalize, | ||||||
|  |     normalize, | ||||||
|  |     generate_variants_compatible, | ||||||
|  | ) | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| @ -84,6 +85,7 @@ class FLOW2(Searcher): | |||||||
|         self.space = space or {} |         self.space = space or {} | ||||||
|         self._space = flatten_dict(self.space, prevent_delimiter=True) |         self._space = flatten_dict(self.space, prevent_delimiter=True) | ||||||
|         self._random = np.random.RandomState(seed) |         self._random = np.random.RandomState(seed) | ||||||
|  |         self.rs_random = _BackwardsCompatibleNumpyRng(seed + 19823) | ||||||
|         self.seed = seed |         self.seed = seed | ||||||
|         self.init_config = init_config |         self.init_config = init_config | ||||||
|         self.best_config = flatten_dict(init_config) |         self.best_config = flatten_dict(init_config) | ||||||
| @ -464,8 +466,8 @@ class FLOW2(Searcher): | |||||||
|             # random |             # random | ||||||
|             for i, key in enumerate(self._tunable_keys): |             for i, key in enumerate(self._tunable_keys): | ||||||
|                 if self._direction_tried[i] != 0: |                 if self._direction_tried[i] != 0: | ||||||
|                     for _, generated in generate_variants( |                     for _, generated in generate_variants_compatible( | ||||||
|                         {"config": {key: self._space[key]}} |                         {"config": {key: self._space[key]}}, random_state=self.rs_random | ||||||
|                     ): |                     ): | ||||||
|                         if generated["config"][key] != best_config[key]: |                         if generated["config"][key] != best_config[key]: | ||||||
|                             config[key] = generated["config"][key] |                             config[key] = generated["config"][key] | ||||||
|  | |||||||
| @ -366,81 +366,81 @@ class _OptunaTrialSuggestCaptor: | |||||||
| 
 | 
 | ||||||
| class OptunaSearch(Searcher): | class OptunaSearch(Searcher): | ||||||
|     """A wrapper around Optuna to provide trial suggestions. |     """A wrapper around Optuna to provide trial suggestions. | ||||||
|     [Optuna](https://optuna.org/) |         [Optuna](https://optuna.org/) | ||||||
|     is a hyperparameter optimization library. |         is a hyperparameter optimization library. | ||||||
|     In contrast to other libraries, it employs define-by-run style |         In contrast to other libraries, it employs define-by-run style | ||||||
|     hyperparameter definitions. |         hyperparameter definitions. | ||||||
|     This Searcher is a thin wrapper around Optuna's search algorithms. |         This Searcher is a thin wrapper around Optuna's search algorithms. | ||||||
|     You can pass any Optuna sampler, which will be used to generate |         You can pass any Optuna sampler, which will be used to generate | ||||||
|     hyperparameter suggestions. |         hyperparameter suggestions. | ||||||
|     Args: |         Args: | ||||||
|         space (dict|Callable): Hyperparameter search space definition for |             space (dict|Callable): Hyperparameter search space definition for | ||||||
|             Optuna's sampler. This can be either a class `dict` with |                 Optuna's sampler. This can be either a class `dict` with | ||||||
|             parameter names as keys and ``optuna.distributions`` as values, |                 parameter names as keys and ``optuna.distributions`` as values, | ||||||
|             or a Callable - in which case, it should be a define-by-run |                 or a Callable - in which case, it should be a define-by-run | ||||||
|             function using ``optuna.trial`` to obtain the hyperparameter |                 function using ``optuna.trial`` to obtain the hyperparameter | ||||||
|             values. The function should return either a class `dict` of |                 values. The function should return either a class `dict` of | ||||||
|             constant values with names as keys, or None. |                 constant values with names as keys, or None. | ||||||
|             For more information, see  |                 For more information, see | ||||||
|             [tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html). |                 [tutorial](https://optuna.readthedocs.io/en/stable/tutorial/10_key_features/002_configurations.html). | ||||||
|             Warning - No actual computation should take place in the define-by-run |                 Warning - No actual computation should take place in the define-by-run | ||||||
|             function. Instead, put the training logic inside the function |                 function. Instead, put the training logic inside the function | ||||||
|             or class trainable passed to tune.run. |                 or class trainable passed to tune.run. | ||||||
|         metric (str): The training result objective value attribute. If None |             metric (str): The training result objective value attribute. If None | ||||||
|             but a mode was passed, the anonymous metric `_metric` will be used |                 but a mode was passed, the anonymous metric `_metric` will be used | ||||||
|             per default. |                 per default. | ||||||
|         mode (str): One of {min, max}. Determines whether objective is |             mode (str): One of {min, max}. Determines whether objective is | ||||||
|             minimizing or maximizing the metric attribute. |                 minimizing or maximizing the metric attribute. | ||||||
|         points_to_evaluate (list): Initial parameter suggestions to be run |             points_to_evaluate (list): Initial parameter suggestions to be run | ||||||
|             first. This is for when you already have some good parameters |                 first. This is for when you already have some good parameters | ||||||
|             you want to run first to help the algorithm make better suggestions |                 you want to run first to help the algorithm make better suggestions | ||||||
|             for future parameters. Needs to be a list of dicts containing the |                 for future parameters. Needs to be a list of dicts containing the | ||||||
|             configurations. |                 configurations. | ||||||
|         sampler (optuna.samplers.BaseSampler): Optuna sampler used to |             sampler (optuna.samplers.BaseSampler): Optuna sampler used to | ||||||
|             draw hyperparameter configurations. Defaults to ``TPESampler``. |                 draw hyperparameter configurations. Defaults to ``TPESampler``. | ||||||
|         seed (int): Seed to initialize sampler with. This parameter is only |             seed (int): Seed to initialize sampler with. This parameter is only | ||||||
|             used when ``sampler=None``. In all other cases, the sampler |                 used when ``sampler=None``. In all other cases, the sampler | ||||||
|             you pass should be initialized with the seed already. |                 you pass should be initialized with the seed already. | ||||||
|         evaluated_rewards (list): If you have previously evaluated the |             evaluated_rewards (list): If you have previously evaluated the | ||||||
|             parameters passed in as points_to_evaluate you can avoid |                 parameters passed in as points_to_evaluate you can avoid | ||||||
|             re-running those trials by passing in the reward attributes |                 re-running those trials by passing in the reward attributes | ||||||
|             as a list so the optimiser can be told the results without |                 as a list so the optimiser can be told the results without | ||||||
|             needing to re-compute the trial. Must be the same length as |                 needing to re-compute the trial. Must be the same length as | ||||||
|             points_to_evaluate. |                 points_to_evaluate. | ||||||
| 
 | 
 | ||||||
|     Tune automatically converts search spaces to Optuna's format: |         Tune automatically converts search spaces to Optuna's format: | ||||||
| 
 | 
 | ||||||
| ````python |     ````python | ||||||
| from ray.tune.suggest.optuna import OptunaSearch |     from ray.tune.suggest.optuna import OptunaSearch | ||||||
| config = { "a": tune.uniform(6, 8), |     config = { "a": tune.uniform(6, 8), | ||||||
|            "b": tune.loguniform(1e-4, 1e-2)} |                "b": tune.loguniform(1e-4, 1e-2)} | ||||||
| optuna_search = OptunaSearch(metric="loss", mode="min") |     optuna_search = OptunaSearch(metric="loss", mode="min") | ||||||
| tune.run(trainable, config=config, search_alg=optuna_search) |     tune.run(trainable, config=config, search_alg=optuna_search) | ||||||
| ```` |     ```` | ||||||
| 
 | 
 | ||||||
|     If you would like to pass the search space manually, the code would |         If you would like to pass the search space manually, the code would | ||||||
|     look like this: |         look like this: | ||||||
| 
 | 
 | ||||||
| ```python |     ```python | ||||||
| from ray.tune.suggest.optuna import OptunaSearch |     from ray.tune.suggest.optuna import OptunaSearch | ||||||
| import optuna |     import optuna | ||||||
| config = { "a": optuna.distributions.UniformDistribution(6, 8), |     config = { "a": optuna.distributions.UniformDistribution(6, 8), | ||||||
|            "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)} |                "b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2)} | ||||||
| optuna_search = OptunaSearch(space,metric="loss",mode="min") |     optuna_search = OptunaSearch(space,metric="loss",mode="min") | ||||||
| tune.run(trainable, search_alg=optuna_search) |     tune.run(trainable, search_alg=optuna_search) | ||||||
| # Equivalent Optuna define-by-run function approach: |     # Equivalent Optuna define-by-run function approach: | ||||||
| def define_search_space(trial: optuna.Trial): |     def define_search_space(trial: optuna.Trial): | ||||||
|     trial.suggest_float("a", 6, 8) |         trial.suggest_float("a", 6, 8) | ||||||
|     trial.suggest_float("b", 1e-4, 1e-2, log=True) |         trial.suggest_float("b", 1e-4, 1e-2, log=True) | ||||||
|     # training logic goes into trainable, this is just |         # training logic goes into trainable, this is just | ||||||
|     # for search space definition |         # for search space definition | ||||||
| optuna_search = OptunaSearch( |     optuna_search = OptunaSearch( | ||||||
|     define_search_space, |         define_search_space, | ||||||
|     metric="loss", |         metric="loss", | ||||||
|     mode="min") |         mode="min") | ||||||
| tune.run(trainable, search_alg=optuna_search) |     tune.run(trainable, search_alg=optuna_search) | ||||||
| .. versionadded:: 0.8.8 |     .. versionadded:: 0.8.8 | ||||||
| ``` |     ``` | ||||||
| 
 | 
 | ||||||
|     """ |     """ | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -18,11 +18,9 @@ | |||||||
| import copy | import copy | ||||||
| import logging | import logging | ||||||
| from typing import Any, Dict, Generator, List, Tuple | from typing import Any, Dict, Generator, List, Tuple | ||||||
| 
 |  | ||||||
| import numpy | import numpy | ||||||
| import random | import random | ||||||
| 
 | from ..tune.sample import Categorical, Domain, RandomState | ||||||
| from ..tune.sample import Categorical, Domain |  | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| @ -35,6 +33,8 @@ class TuneError(Exception): | |||||||
| 
 | 
 | ||||||
| def generate_variants( | def generate_variants( | ||||||
|     unresolved_spec: Dict, |     unresolved_spec: Dict, | ||||||
|  |     constant_grid_search: bool = False, | ||||||
|  |     random_state: "RandomState" = None, | ||||||
| ) -> Generator[Tuple[Dict, Dict], None, None]: | ) -> Generator[Tuple[Dict, Dict], None, None]: | ||||||
|     """Generates variants from a spec (dict) with unresolved values. |     """Generates variants from a spec (dict) with unresolved values. | ||||||
|     There are two types of unresolved values: |     There are two types of unresolved values: | ||||||
| @ -43,14 +43,25 @@ def generate_variants( | |||||||
|         variants in combination: |         variants in combination: | ||||||
|             "activation": grid_search(["relu", "tanh"]) |             "activation": grid_search(["relu", "tanh"]) | ||||||
|             "learning_rate": grid_search([1e-3, 1e-4, 1e-5]) |             "learning_rate": grid_search([1e-3, 1e-4, 1e-5]) | ||||||
|  |         Lambda functions: These are evaluated to produce a concrete value, and | ||||||
|  |         can express dependencies or conditional distributions between values. | ||||||
|  |         They can also be used to express random search (e.g., by calling | ||||||
|  |         into the `random` or `np` module). | ||||||
|  |             "cpu": lambda spec: spec.config.num_workers | ||||||
|  |             "batch_size": lambda spec: random.uniform(1, 1000) | ||||||
|     Finally, to support defining specs in plain JSON / YAML, grid search |     Finally, to support defining specs in plain JSON / YAML, grid search | ||||||
|     can also be defined alternatively as follows: |     and lambda functions can also be defined alternatively as follows: | ||||||
|         "activation": {"grid_search": ["relu", "tanh"]} |         "activation": {"grid_search": ["relu", "tanh"]} | ||||||
|  |         "cpu": {"eval": "spec.config.num_workers"} | ||||||
|     Use `format_vars` to format the returned dict of hyperparameters. |     Use `format_vars` to format the returned dict of hyperparameters. | ||||||
|     Yields: |     Yields: | ||||||
|         (Dict of resolved variables, Spec object) |         (Dict of resolved variables, Spec object) | ||||||
|     """ |     """ | ||||||
|     for resolved_vars, spec in _generate_variants(unresolved_spec): |     for resolved_vars, spec in _generate_variants( | ||||||
|  |         unresolved_spec, | ||||||
|  |         constant_grid_search=constant_grid_search, | ||||||
|  |         random_state=random_state, | ||||||
|  |     ): | ||||||
|         assert not _unresolved_values(spec) |         assert not _unresolved_values(spec) | ||||||
|         yield resolved_vars, spec |         yield resolved_vars, spec | ||||||
| 
 | 
 | ||||||
| @ -93,7 +104,9 @@ def parse_spec_vars( | |||||||
|     return resolved_vars, domain_vars, grid_vars |     return resolved_vars, domain_vars, grid_vars | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]: | def _generate_variants( | ||||||
|  |     spec: Dict, constant_grid_search: bool = False, random_state: "RandomState" = None | ||||||
|  | ) -> Tuple[Dict, Dict]: | ||||||
|     spec = copy.deepcopy(spec) |     spec = copy.deepcopy(spec) | ||||||
|     _, domain_vars, grid_vars = parse_spec_vars(spec) |     _, domain_vars, grid_vars = parse_spec_vars(spec) | ||||||
| 
 | 
 | ||||||
| @ -101,10 +114,34 @@ def _generate_variants(spec: Dict) -> Tuple[Dict, Dict]: | |||||||
|         yield {}, spec |         yield {}, spec | ||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|  |     # Variables to resolve | ||||||
|  |     to_resolve = domain_vars | ||||||
|  | 
 | ||||||
|  |     all_resolved = True | ||||||
|  |     if constant_grid_search: | ||||||
|  |         # In this path, we first sample random variables and keep them constant | ||||||
|  |         # for grid search. | ||||||
|  |         # `_resolve_domain_vars` will alter `spec` directly | ||||||
|  |         all_resolved, resolved_vars = _resolve_domain_vars( | ||||||
|  |             spec, domain_vars, allow_fail=True, random_state=random_state | ||||||
|  |         ) | ||||||
|  |         if not all_resolved: | ||||||
|  |             # Not all variables have been resolved, but remove those that have | ||||||
|  |             # from the `to_resolve` list. | ||||||
|  |             to_resolve = [(r, d) for r, d in to_resolve if r not in resolved_vars] | ||||||
|     grid_search = _grid_search_generator(spec, grid_vars) |     grid_search = _grid_search_generator(spec, grid_vars) | ||||||
|     for resolved_spec in grid_search: |     for resolved_spec in grid_search: | ||||||
|         resolved_vars = _resolve_domain_vars(resolved_spec, domain_vars) |         if not constant_grid_search or not all_resolved: | ||||||
|         for resolved, spec in _generate_variants(resolved_spec): |             # In this path, we sample the remaining random variables | ||||||
|  |             _, resolved_vars = _resolve_domain_vars( | ||||||
|  |                 resolved_spec, to_resolve, random_state=random_state | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |         for resolved, spec in _generate_variants( | ||||||
|  |             resolved_spec, | ||||||
|  |             constant_grid_search=constant_grid_search, | ||||||
|  |             random_state=random_state, | ||||||
|  |         ): | ||||||
|             for path, value in grid_vars: |             for path, value in grid_vars: | ||||||
|                 resolved_vars[path] = _get_value(spec, path) |                 resolved_vars[path] = _get_value(spec, path) | ||||||
|             for k, v in resolved.items(): |             for k, v in resolved.items(): | ||||||
| @ -134,7 +171,12 @@ def _get_value(spec: Dict, path: Tuple) -> Any: | |||||||
|     return spec |     return spec | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> Dict: | def _resolve_domain_vars( | ||||||
|  |     spec: Dict, | ||||||
|  |     domain_vars: List[Tuple[Tuple, Domain]], | ||||||
|  |     allow_fail: bool = False, | ||||||
|  |     random_state: "RandomState" = None, | ||||||
|  | ) -> Tuple[bool, Dict]: | ||||||
|     resolved = {} |     resolved = {} | ||||||
|     error = True |     error = True | ||||||
|     num_passes = 0 |     num_passes = 0 | ||||||
| @ -145,7 +187,9 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> | |||||||
|             if path in resolved: |             if path in resolved: | ||||||
|                 continue |                 continue | ||||||
|             try: |             try: | ||||||
|                 value = domain.sample(_UnresolvedAccessGuard(spec)) |                 value = domain.sample( | ||||||
|  |                     _UnresolvedAccessGuard(spec), random_state=random_state | ||||||
|  |                 ) | ||||||
|             except RecursiveDependencyError as e: |             except RecursiveDependencyError as e: | ||||||
|                 error = e |                 error = e | ||||||
|             except Exception: |             except Exception: | ||||||
| @ -156,8 +200,11 @@ def _resolve_domain_vars(spec: Dict, domain_vars: List[Tuple[Tuple, Domain]]) -> | |||||||
|                 assign_value(spec, path, value) |                 assign_value(spec, path, value) | ||||||
|                 resolved[path] = value |                 resolved[path] = value | ||||||
|     if error: |     if error: | ||||||
|         raise error |         if not allow_fail: | ||||||
|     return resolved |             raise error | ||||||
|  |         else: | ||||||
|  |             return False, resolved | ||||||
|  |     return True, resolved | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _grid_search_generator( | def _grid_search_generator( | ||||||
|  | |||||||
| @ -12,21 +12,76 @@ | |||||||
| # See the License for the specific language governing permissions and | # See the License for the specific language governing permissions and | ||||||
| # limitations under the License. | # limitations under the License. | ||||||
| 
 | 
 | ||||||
| # This source file is included here because ray does not fully support Windows. | # This source file is adapted here because ray does not fully support Windows. | ||||||
| 
 | 
 | ||||||
| # Copyright (c) Microsoft Corporation. | # Copyright (c) Microsoft Corporation. | ||||||
| import logging | import logging | ||||||
| import random |  | ||||||
| from copy import copy | from copy import copy | ||||||
| from inspect import signature |  | ||||||
| from math import isclose | from math import isclose | ||||||
| from typing import Any, Callable, Dict, List, Optional, Sequence, Union | from typing import Any, Dict, List, Optional, Sequence, Union | ||||||
| 
 |  | ||||||
| import numpy as np | import numpy as np | ||||||
| 
 | 
 | ||||||
|  | # Backwards compatibility | ||||||
|  | try: | ||||||
|  |     # Added in numpy>=1.17 but we require numpy>=1.16 | ||||||
|  |     np_random_generator = np.random.Generator | ||||||
|  |     LEGACY_RNG = False | ||||||
|  | except AttributeError: | ||||||
|  | 
 | ||||||
|  |     class np_random_generator: | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  |     LEGACY_RNG = True | ||||||
|  | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class _BackwardsCompatibleNumpyRng: | ||||||
|  |     """Thin wrapper to ensure backwards compatibility between | ||||||
|  |     new and old numpy randomness generators. | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     _rng = None | ||||||
|  | 
 | ||||||
|  |     def __init__( | ||||||
|  |         self, | ||||||
|  |         generator_or_seed: Optional[ | ||||||
|  |             Union["np_random_generator", np.random.RandomState, int] | ||||||
|  |         ] = None, | ||||||
|  |     ): | ||||||
|  |         if generator_or_seed is None or isinstance( | ||||||
|  |             generator_or_seed, (np.random.RandomState, np_random_generator) | ||||||
|  |         ): | ||||||
|  |             self._rng = generator_or_seed | ||||||
|  |         elif LEGACY_RNG: | ||||||
|  |             self._rng = np.random.RandomState(generator_or_seed) | ||||||
|  |         else: | ||||||
|  |             self._rng = np.random.default_rng(generator_or_seed) | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def legacy_rng(self) -> bool: | ||||||
|  |         return not isinstance(self._rng, np_random_generator) | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def rng(self): | ||||||
|  |         # don't set self._rng to np.random to avoid picking issues | ||||||
|  |         return self._rng if self._rng is not None else np.random | ||||||
|  | 
 | ||||||
|  |     def __getattr__(self, name: str) -> Any: | ||||||
|  |         # https://numpy.org/doc/stable/reference/random/new-or-different.html | ||||||
|  |         if self.legacy_rng: | ||||||
|  |             if name == "integers": | ||||||
|  |                 name = "randint" | ||||||
|  |             elif name == "random": | ||||||
|  |                 name = "rand" | ||||||
|  |         return getattr(self.rng, name) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | RandomState = Union[ | ||||||
|  |     None, _BackwardsCompatibleNumpyRng, np_random_generator, np.random.RandomState, int | ||||||
|  | ] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| class Domain: | class Domain: | ||||||
|     """Base class to specify a type and valid range to sample parameters from. |     """Base class to specify a type and valid range to sample parameters from. | ||||||
|     This base class is implemented by parameter spaces, like float ranges |     This base class is implemented by parameter spaces, like float ranges | ||||||
| @ -61,9 +116,16 @@ class Domain: | |||||||
|             sampler = self.default_sampler_cls() |             sampler = self.default_sampler_cls() | ||||||
|         return sampler |         return sampler | ||||||
| 
 | 
 | ||||||
|     def sample(self, spec=None, size=1): |     def sample( | ||||||
|  |         self, | ||||||
|  |         spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|  |         size: int = 1, | ||||||
|  |         random_state: "RandomState" = None, | ||||||
|  |     ): | ||||||
|  |         if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |             random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|         sampler = self.get_sampler() |         sampler = self.get_sampler() | ||||||
|         return sampler.sample(self, spec=spec, size=size) |         return sampler.sample(self, spec=spec, size=size, random_state=random_state) | ||||||
| 
 | 
 | ||||||
|     def is_grid(self): |     def is_grid(self): | ||||||
|         return isinstance(self.sampler, Grid) |         return isinstance(self.sampler, Grid) | ||||||
| @ -86,6 +148,7 @@ class Sampler: | |||||||
|         domain: Domain, |         domain: Domain, | ||||||
|         spec: Optional[Union[List[Dict], Dict]] = None, |         spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|         size: int = 1, |         size: int = 1, | ||||||
|  |         random_state: "RandomState" = None, | ||||||
|     ): |     ): | ||||||
|         raise NotImplementedError |         raise NotImplementedError | ||||||
| 
 | 
 | ||||||
| @ -128,6 +191,7 @@ class Grid(Sampler): | |||||||
|         domain: Domain, |         domain: Domain, | ||||||
|         spec: Optional[Union[List[Dict], Dict]] = None, |         spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|         size: int = 1, |         size: int = 1, | ||||||
|  |         random_state: "RandomState" = None, | ||||||
|     ): |     ): | ||||||
|         return RuntimeError("Do not call `sample()` on grid.") |         return RuntimeError("Do not call `sample()` on grid.") | ||||||
| 
 | 
 | ||||||
| @ -139,10 +203,13 @@ class Float(Domain): | |||||||
|             domain: "Float", |             domain: "Float", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
|  |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|             assert domain.lower > float("-inf"), "Uniform needs a lower bound" |             assert domain.lower > float("-inf"), "Uniform needs a lower bound" | ||||||
|             assert domain.upper < float("inf"), "Uniform needs a upper bound" |             assert domain.upper < float("inf"), "Uniform needs a upper bound" | ||||||
|             items = np.random.uniform(domain.lower, domain.upper, size=size) |             items = random_state.uniform(domain.lower, domain.upper, size=size) | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     class _LogUniform(LogUniform): |     class _LogUniform(LogUniform): | ||||||
| @ -151,7 +218,10 @@ class Float(Domain): | |||||||
|             domain: "Float", |             domain: "Float", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
|  |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0" |             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0" | ||||||
|             assert ( |             assert ( | ||||||
|                 0 < domain.upper < float("inf") |                 0 < domain.upper < float("inf") | ||||||
| @ -159,7 +229,7 @@ class Float(Domain): | |||||||
|             logmin = np.log(domain.lower) / np.log(self.base) |             logmin = np.log(domain.lower) / np.log(self.base) | ||||||
|             logmax = np.log(domain.upper) / np.log(self.base) |             logmax = np.log(domain.upper) / np.log(self.base) | ||||||
| 
 | 
 | ||||||
|             items = self.base ** (np.random.uniform(logmin, logmax, size=size)) |             items = self.base ** (random_state.uniform(logmin, logmax, size=size)) | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     class _Normal(Normal): |     class _Normal(Normal): | ||||||
| @ -168,14 +238,17 @@ class Float(Domain): | |||||||
|             domain: "Float", |             domain: "Float", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
|  |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|             assert not domain.lower or domain.lower == float( |             assert not domain.lower or domain.lower == float( | ||||||
|                 "-inf" |                 "-inf" | ||||||
|             ), "Normal sampling does not allow a lower value bound." |             ), "Normal sampling does not allow a lower value bound." | ||||||
|             assert not domain.upper or domain.upper == float( |             assert not domain.upper or domain.upper == float( | ||||||
|                 "inf" |                 "inf" | ||||||
|             ), "Normal sampling does not allow a upper value bound." |             ), "Normal sampling does not allow a upper value bound." | ||||||
|             items = np.random.normal(self.mean, self.sd, size=size) |             items = random_state.normal(self.mean, self.sd, size=size) | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     default_sampler_cls = _Uniform |     default_sampler_cls = _Uniform | ||||||
| @ -262,8 +335,11 @@ class Integer(Domain): | |||||||
|             domain: "Integer", |             domain: "Integer", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
|             items = np.random.randint(domain.lower, domain.upper, size=size) |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|  |             items = random_state.integers(domain.lower, domain.upper, size=size) | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     class _LogUniform(LogUniform): |     class _LogUniform(LogUniform): | ||||||
| @ -272,7 +348,10 @@ class Integer(Domain): | |||||||
|             domain: "Integer", |             domain: "Integer", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
|  |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0" |             assert domain.lower > 0, "LogUniform needs a lower bound greater than 0" | ||||||
|             assert ( |             assert ( | ||||||
|                 0 < domain.upper < float("inf") |                 0 < domain.upper < float("inf") | ||||||
| @ -280,8 +359,8 @@ class Integer(Domain): | |||||||
|             logmin = np.log(domain.lower) / np.log(self.base) |             logmin = np.log(domain.lower) / np.log(self.base) | ||||||
|             logmax = np.log(domain.upper) / np.log(self.base) |             logmax = np.log(domain.upper) / np.log(self.base) | ||||||
| 
 | 
 | ||||||
|             items = self.base ** (np.random.uniform(logmin, logmax, size=size)) |             items = self.base ** (random_state.uniform(logmin, logmax, size=size)) | ||||||
|             items = np.round(items).astype(int) |             items = np.floor(items).astype(int) | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     default_sampler_cls = _Uniform |     default_sampler_cls = _Uniform | ||||||
| @ -337,9 +416,11 @@ class Categorical(Domain): | |||||||
|             domain: "Categorical", |             domain: "Categorical", | ||||||
|             spec: Optional[Union[List[Dict], Dict]] = None, |             spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|             size: int = 1, |             size: int = 1, | ||||||
|  |             random_state: "RandomState" = None, | ||||||
|         ): |         ): | ||||||
| 
 |             if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|             items = random.choices(domain.categories, k=size) |                 random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|  |             items = random_state.choice(domain.categories, size=size).tolist() | ||||||
|             return items if len(items) > 1 else domain.cast(items[0]) |             return items if len(items) > 1 else domain.cast(items[0]) | ||||||
| 
 | 
 | ||||||
|     default_sampler_cls = _Uniform |     default_sampler_cls = _Uniform | ||||||
| @ -352,6 +433,11 @@ class Categorical(Domain): | |||||||
|         new.set_sampler(self._Uniform()) |         new.set_sampler(self._Uniform()) | ||||||
|         return new |         return new | ||||||
| 
 | 
 | ||||||
|  |     def grid(self): | ||||||
|  |         new = copy(self) | ||||||
|  |         new.set_sampler(Grid()) | ||||||
|  |         return new | ||||||
|  | 
 | ||||||
|     def __len__(self): |     def __len__(self): | ||||||
|         return len(self.categories) |         return len(self.categories) | ||||||
| 
 | 
 | ||||||
| @ -381,8 +467,11 @@ class Quantized(Sampler): | |||||||
|         domain: Domain, |         domain: Domain, | ||||||
|         spec: Optional[Union[List[Dict], Dict]] = None, |         spec: Optional[Union[List[Dict], Dict]] = None, | ||||||
|         size: int = 1, |         size: int = 1, | ||||||
|  |         random_state: "RandomState" = None, | ||||||
|     ): |     ): | ||||||
|         values = self.sampler.sample(domain, spec, size) |         if not isinstance(random_state, _BackwardsCompatibleNumpyRng): | ||||||
|  |             random_state = _BackwardsCompatibleNumpyRng(random_state) | ||||||
|  |         values = self.sampler.sample(domain, spec, size, random_state=random_state) | ||||||
|         quantized = np.round(np.divide(values, self.q)) * self.q |         quantized = np.round(np.divide(values, self.q)) * self.q | ||||||
|         if not isinstance(quantized, np.ndarray): |         if not isinstance(quantized, np.ndarray): | ||||||
|             return domain.cast(quantized) |             return domain.cast(quantized) | ||||||
| @ -462,10 +551,10 @@ def qloguniform(lower: float, upper: float, q: float, base: float = 10): | |||||||
|     return Float(lower, upper).loguniform(base).quantized(q) |     return Float(lower, upper).loguniform(base).quantized(q) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def choice(categories: List): | def choice(categories: Sequence): | ||||||
|     """Sample a categorical value. |     """Sample a categorical value. | ||||||
|     Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from |     Sampling from ``tune.choice([1, 2])`` is equivalent to sampling from | ||||||
|     ``random.choice([1, 2])`` |     ``np.random.choice([1, 2])`` | ||||||
|     """ |     """ | ||||||
|     return Categorical(categories).uniform() |     return Categorical(categories).uniform() | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -7,13 +7,22 @@ try: | |||||||
| except (ImportError, AssertionError): | except (ImportError, AssertionError): | ||||||
|     from . import sample |     from . import sample | ||||||
|     from ..searcher.variant_generator import generate_variants |     from ..searcher.variant_generator import generate_variants | ||||||
| from typing import Dict, Optional, Any, Tuple | from typing import Dict, Optional, Any, Tuple, Generator | ||||||
| import numpy as np | import numpy as np | ||||||
| import logging | import logging | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def generate_variants_compatible( | ||||||
|  |     unresolved_spec: Dict, constant_grid_search: bool = False, random_state=None | ||||||
|  | ) -> Generator[Tuple[Dict, Dict], None, None]: | ||||||
|  |     try: | ||||||
|  |         return generate_variants(unresolved_spec, constant_grid_search, random_state) | ||||||
|  |     except TypeError: | ||||||
|  |         return generate_variants(unresolved_spec, constant_grid_search) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]: | def define_by_run_func(trial, space: Dict, path: str = "") -> Optional[Dict[str, Any]]: | ||||||
|     """Define-by-run function to create the search space. |     """Define-by-run function to create the search space. | ||||||
| 
 | 
 | ||||||
| @ -417,7 +426,6 @@ def indexof(domain: Dict, config: Dict) -> int: | |||||||
|         return index |         return index | ||||||
|     if config in domain.categories: |     if config in domain.categories: | ||||||
|         return domain.categories.index(config) |         return domain.categories.index(config) | ||||||
|     # print(config) |  | ||||||
|     for i, cat in enumerate(domain.categories): |     for i, cat in enumerate(domain.categories): | ||||||
|         if not isinstance(cat, dict): |         if not isinstance(cat, dict): | ||||||
|             continue |             continue | ||||||
| @ -491,7 +499,9 @@ def complete_config( | |||||||
|     for key, value in space.items(): |     for key, value in space.items(): | ||||||
|         if key not in config: |         if key not in config: | ||||||
|             config[key] = value |             config[key] = value | ||||||
|     for _, generated in generate_variants({"config": config}): |     for _, generated in generate_variants_compatible( | ||||||
|  |         {"config": config}, random_state=flow2.rs_random | ||||||
|  |     ): | ||||||
|         config = generated["config"] |         config = generated["config"] | ||||||
|         break |         break | ||||||
|     subspace = {} |     subspace = {} | ||||||
|  | |||||||
| @ -1 +1 @@ | |||||||
| __version__ = "0.9.1" | __version__ = "0.9.2" | ||||||
|  | |||||||
| @ -215,7 +215,7 @@ def test_multioutput(): | |||||||
|     # predict |     # predict | ||||||
|     print(model.predict(X_test)) |     print(model.predict(X_test)) | ||||||
| 
 | 
 | ||||||
|     #train the model |     # train the model | ||||||
|     model = RegressorChain(AutoML(task="regression", time_budget=1)) |     model = RegressorChain(AutoML(task="regression", time_budget=1)) | ||||||
|     model.fit(X_train, y_train) |     model.fit(X_train, y_train) | ||||||
| 
 | 
 | ||||||
| @ -223,6 +223,5 @@ def test_multioutput(): | |||||||
|     print(model.predict(X_test)) |     print(model.predict(X_test)) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     unittest.main() |     unittest.main() | ||||||
|  | |||||||
| @ -38,7 +38,7 @@ class TestWarmStart(unittest.TestCase): | |||||||
|         starting_points = automl_experiment.best_config_per_estimator |         starting_points = automl_experiment.best_config_per_estimator | ||||||
|         print("starting_points", starting_points) |         print("starting_points", starting_points) | ||||||
|         print("loss of the starting_points", automl_experiment.best_loss_per_estimator) |         print("loss of the starting_points", automl_experiment.best_loss_per_estimator) | ||||||
|         starting_point = starting_points['lgbm'] |         starting_point = starting_points["lgbm"] | ||||||
|         hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"] |         hps_to_freeze = ["colsample_bytree", "reg_alpha", "reg_lambda", "log_max_bin"] | ||||||
| 
 | 
 | ||||||
|         # 2. Constrct a new class: |         # 2. Constrct a new class: | ||||||
| @ -55,17 +55,13 @@ class TestWarmStart(unittest.TestCase): | |||||||
|                     # if an hp is specifed to be freezed, use tine value provided in the starting_point |                     # if an hp is specifed to be freezed, use tine value provided in the starting_point | ||||||
|                     # otherwise use the setting from the original search space |                     # otherwise use the setting from the original search space | ||||||
|                     if hp_name in starting_point: |                     if hp_name in starting_point: | ||||||
|                         space[hp_name] = { |                         space[hp_name] = {"domain": starting_point[hp_name]} | ||||||
|                             "domain": starting_point[hp_name] |  | ||||||
|                         } |  | ||||||
|                 # (3.1) Configure the search space for hps that are in the original search space |                 # (3.1) Configure the search space for hps that are in the original search space | ||||||
|                 #  but you want to change something, for example the range. |                 #  but you want to change something, for example the range. | ||||||
|                 revised_hps_to_search = { |                 revised_hps_to_search = { | ||||||
|                     "n_estimators": { |                     "n_estimators": { | ||||||
|                         "domain": tune.lograndint(lower=10, upper=32768), |                         "domain": tune.lograndint(lower=10, upper=32768), | ||||||
|                         "init_value": starting_point.get( |                         "init_value": starting_point.get("n_estimators") | ||||||
|                             "n_estimators" |  | ||||||
|                         ) |  | ||||||
|                         or space["n_estimators"].get("init_value", 10), |                         or space["n_estimators"].get("init_value", 10), | ||||||
|                         "low_cost_init_value": space["n_estimators"].get( |                         "low_cost_init_value": space["n_estimators"].get( | ||||||
|                             "low_cost_init_value", 10 |                             "low_cost_init_value", 10 | ||||||
| @ -73,9 +69,7 @@ class TestWarmStart(unittest.TestCase): | |||||||
|                     }, |                     }, | ||||||
|                     "num_leaves": { |                     "num_leaves": { | ||||||
|                         "domain": tune.lograndint(lower=10, upper=3276), |                         "domain": tune.lograndint(lower=10, upper=3276), | ||||||
|                         "init_value": starting_point.get( |                         "init_value": starting_point.get("num_leaves") | ||||||
|                             "num_leaves" |  | ||||||
|                         ) |  | ||||||
|                         or space["num_leaves"].get("init_value", 10), |                         or space["num_leaves"].get("init_value", 10), | ||||||
|                         "low_cost_init_value": space["num_leaves"].get( |                         "low_cost_init_value": space["num_leaves"].get( | ||||||
|                             "low_cost_init_value", 10 |                             "low_cost_init_value", 10 | ||||||
|  | |||||||
							
								
								
									
										36
									
								
								test/rep.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								test/rep.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | |||||||
|  | from flaml.data import load_openml_dataset | ||||||
|  | from flaml.ml import ExtraTreesEstimator | ||||||
|  | from flaml import AutoML | ||||||
|  | 
 | ||||||
|  | X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir="./") | ||||||
|  | X_train = X_train.iloc[:1000] | ||||||
|  | y_train = y_train.iloc[:1000] | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class ExtraTreesEstimatorSeeded(ExtraTreesEstimator): | ||||||
|  |     """ExtraTreesEstimator for reproducible FLAML run.""" | ||||||
|  | 
 | ||||||
|  |     def config2params(self, config: dict) -> dict: | ||||||
|  |         params = super().config2params(config) | ||||||
|  |         params["random_state"] = 0 | ||||||
|  |         return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | settings = { | ||||||
|  |     "time_budget": 1e10,  # total running time in seconds | ||||||
|  |     "max_iter": 3, | ||||||
|  |     "metric": "ap",  # average_precision | ||||||
|  |     "task": "classification",  # task type | ||||||
|  |     "seed": 7654321,  # random seed | ||||||
|  |     "estimator_list": ["extra_trees_seeded"], | ||||||
|  |     "verbose": False, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | for trial_num in range(8): | ||||||
|  |     automl = AutoML() | ||||||
|  |     automl.add_learner( | ||||||
|  |         learner_name="extra_trees_seeded", learner_class=ExtraTreesEstimatorSeeded | ||||||
|  |     ) | ||||||
|  |     automl.fit(X_train=X_train, y_train=y_train, **settings) | ||||||
|  |     print(automl.best_loss) | ||||||
|  |     print(automl.best_config) | ||||||
| @ -11,8 +11,8 @@ def test_package_minimum(): | |||||||
|     # Specify automl goal and constraint |     # Specify automl goal and constraint | ||||||
|     automl_settings = { |     automl_settings = { | ||||||
|         "time_budget": 10,  # in seconds |         "time_budget": 10,  # in seconds | ||||||
|         "metric": 'accuracy', |         "metric": "accuracy", | ||||||
|         "task": 'classification', |         "task": "classification", | ||||||
|         "log_file_name": "iris.log", |         "log_file_name": "iris.log", | ||||||
|     } |     } | ||||||
|     X_train, y_train = load_iris(return_X_y=True) |     X_train, y_train = load_iris(return_X_y=True) | ||||||
| @ -27,4 +27,3 @@ def test_package_minimum(): | |||||||
|     preds = automl.predict_proba(X_train) |     preds = automl.predict_proba(X_train) | ||||||
|     assert preds.shape == (150, 3) |     assert preds.shape == (150, 3) | ||||||
|     print(preds) |     print(preds) | ||||||
| 
 |  | ||||||
|  | |||||||
| @ -436,13 +436,12 @@ analysis = tune.run( | |||||||
| 
 | 
 | ||||||
| ### Reproducibility | ### Reproducibility | ||||||
| 
 | 
 | ||||||
| By default, there is randomness in our tuning process. If reproducibility is desired, you could | By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed. | ||||||
| manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed. | With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning. | ||||||
| With this random seed, running the following code multiple times will generate exactly the same search trajectory. |  | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| import numpy as np | import numpy as np | ||||||
| np.random.seed(100) | np.random.seed(100)  # This line is not needed starting from version v0.9.1. | ||||||
| analysis = tune.run( | analysis = tune.run( | ||||||
|     simple_obj, |     simple_obj, | ||||||
|     config=config_search_space, |     config=config_search_space, | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Chi Wang
						Chi Wang