mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-15 17:44:33 +00:00
warmstart blendsearch (#186)
* increase test coverage * use define by run only when needed * warmstart bs * classification -> binary, multi * warm start with evaluated rewards * data transformer; resource attr for gs * BlendSearchTuner bug fix and unittest * bug fix * docstr and import * task type
This commit is contained in:
parent
5fdfa2559b
commit
e46573a01d
209
flaml/automl.py
209
flaml/automl.py
@ -5,7 +5,6 @@
|
|||||||
'''
|
'''
|
||||||
import time
|
import time
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
import warnings
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy.sparse import issparse
|
from scipy.sparse import issparse
|
||||||
@ -144,9 +143,8 @@ class SearchState:
|
|||||||
class AutoMLState:
|
class AutoMLState:
|
||||||
|
|
||||||
def _prepare_sample_train_data(self, sample_size):
|
def _prepare_sample_train_data(self, sample_size):
|
||||||
full_size = len(self.y_train)
|
|
||||||
sampled_weight = groups = None
|
sampled_weight = groups = None
|
||||||
if sample_size <= full_size:
|
if sample_size <= self.data_size:
|
||||||
if isinstance(self.X_train, pd.DataFrame):
|
if isinstance(self.X_train, pd.DataFrame):
|
||||||
sampled_X_train = self.X_train.iloc[:sample_size]
|
sampled_X_train = self.X_train.iloc[:sample_size]
|
||||||
else:
|
else:
|
||||||
@ -225,13 +223,13 @@ class AutoMLState:
|
|||||||
self, estimator, config_w_resource, sample_size=None
|
self, estimator, config_w_resource, sample_size=None
|
||||||
):
|
):
|
||||||
if not sample_size:
|
if not sample_size:
|
||||||
sample_size = config_w_resource['FLAML_sample_size']
|
sample_size = config_w_resource.get(
|
||||||
|
'FLAML_sample_size', len(self.y_train_all))
|
||||||
config = config_w_resource.get('ml', config_w_resource).copy()
|
config = config_w_resource.get('ml', config_w_resource).copy()
|
||||||
if 'FLAML_sample_size' in config:
|
if 'FLAML_sample_size' in config:
|
||||||
del config['FLAML_sample_size']
|
del config['FLAML_sample_size']
|
||||||
if "learner" in config:
|
if "learner" in config:
|
||||||
del config['learner']
|
del config["learner"]
|
||||||
assert sample_size is not None
|
|
||||||
sampled_X_train, sampled_y_train, sampled_weight, groups = \
|
sampled_X_train, sampled_y_train, sampled_weight, groups = \
|
||||||
self._prepare_sample_train_data(sample_size)
|
self._prepare_sample_train_data(sample_size)
|
||||||
if sampled_weight is not None:
|
if sampled_weight is not None:
|
||||||
@ -316,10 +314,7 @@ class AutoML:
|
|||||||
'''An object with `predict()` and `predict_proba()` method (for
|
'''An object with `predict()` and `predict_proba()` method (for
|
||||||
classification), storing the best trained model.
|
classification), storing the best trained model.
|
||||||
'''
|
'''
|
||||||
if self._trained_estimator:
|
return self.__dict__.get('_trained_estimator')
|
||||||
return self._trained_estimator
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
def best_model_for_estimator(self, estimator_name):
|
def best_model_for_estimator(self, estimator_name):
|
||||||
'''Return the best model found for a particular estimator
|
'''Return the best model found for a particular estimator
|
||||||
@ -331,11 +326,8 @@ class AutoML:
|
|||||||
An object with `predict()` and `predict_proba()` method (for
|
An object with `predict()` and `predict_proba()` method (for
|
||||||
classification), storing the best trained model for estimator_name.
|
classification), storing the best trained model for estimator_name.
|
||||||
'''
|
'''
|
||||||
if estimator_name in self._search_states:
|
state = self._search_states.get(estimator_name)
|
||||||
state = self._search_states[estimator_name]
|
return state and getattr(state, 'trained_estimator', None)
|
||||||
if hasattr(state, 'trained_estimator'):
|
|
||||||
return state.trained_estimator
|
|
||||||
return None
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def best_estimator(self):
|
def best_estimator(self):
|
||||||
@ -374,10 +366,12 @@ class AutoML:
|
|||||||
@property
|
@property
|
||||||
def classes_(self):
|
def classes_(self):
|
||||||
'''A list of n_classes elements for class labels.'''
|
'''A list of n_classes elements for class labels.'''
|
||||||
if self._label_transformer:
|
attr = getattr(self, "label_transformer", None)
|
||||||
return self._label_transformer.classes_.tolist()
|
if attr:
|
||||||
if self._trained_estimator:
|
return attr.classes_.tolist()
|
||||||
return self._trained_estimator.classes_.tolist()
|
attr = getattr(self, "_trained_estimator", None)
|
||||||
|
if attr:
|
||||||
|
return attr.classes_.tolist()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def predict(self, X_test):
|
def predict(self, X_test):
|
||||||
@ -394,12 +388,13 @@ class AutoML:
|
|||||||
A array-like of shape n * 1 - - each element is a predicted
|
A array-like of shape n * 1 - - each element is a predicted
|
||||||
label for an instance.
|
label for an instance.
|
||||||
'''
|
'''
|
||||||
if self._trained_estimator is None:
|
estimator = getattr(self, "_trained_estimator", None)
|
||||||
warnings.warn(
|
if estimator is None:
|
||||||
|
logger.warning(
|
||||||
"No estimator is trained. Please run fit with enough budget.")
|
"No estimator is trained. Please run fit with enough budget.")
|
||||||
return None
|
return None
|
||||||
X_test = self._preprocess(X_test)
|
X_test = self._preprocess(X_test)
|
||||||
y_pred = self._trained_estimator.predict(X_test)
|
y_pred = estimator.predict(X_test)
|
||||||
if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
|
if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
|
||||||
y_pred = y_pred.flatten()
|
y_pred = y_pred.flatten()
|
||||||
if self._label_transformer:
|
if self._label_transformer:
|
||||||
@ -443,10 +438,9 @@ class AutoML:
|
|||||||
dataframe = dataframe.copy()
|
dataframe = dataframe.copy()
|
||||||
dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'})
|
dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'})
|
||||||
elif dataframe is not None:
|
elif dataframe is not None:
|
||||||
if ('ds' not in dataframe) or ('y' not in dataframe):
|
assert 'ds' in dataframe and 'y' in dataframe, (
|
||||||
raise ValueError(
|
'For forecasting task, dataframe must have columns '
|
||||||
'For forecasting task, dataframe must have columns "ds" and "y" '
|
'"ds" and "y" with the dates and values respectively.')
|
||||||
'with the dates and values respectively.')
|
|
||||||
elif (X_train_all is not None) and (y_train_all is not None):
|
elif (X_train_all is not None) and (y_train_all is not None):
|
||||||
dataframe = pd.DataFrame(X_train_all)
|
dataframe = pd.DataFrame(X_train_all)
|
||||||
dataframe = dataframe.rename(columns={dataframe.columns[0]: 'ds'})
|
dataframe = dataframe.rename(columns={dataframe.columns[0]: 'ds'})
|
||||||
@ -456,30 +450,29 @@ class AutoML:
|
|||||||
label = 'y'
|
label = 'y'
|
||||||
|
|
||||||
if X_train_all is not None and y_train_all is not None:
|
if X_train_all is not None and y_train_all is not None:
|
||||||
if not (isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
|
assert (
|
||||||
or isinstance(X_train_all, pd.DataFrame)):
|
isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
|
||||||
raise ValueError(
|
or isinstance(X_train_all, pd.DataFrame)), (
|
||||||
"X_train_all must be a numpy array, a pandas dataframe, "
|
"X_train_all must be a numpy array, a pandas dataframe, "
|
||||||
"or Scipy sparse matrix.")
|
"or Scipy sparse matrix.")
|
||||||
if not (isinstance(y_train_all, np.ndarray)
|
assert (
|
||||||
or isinstance(y_train_all, pd.Series)):
|
isinstance(y_train_all, np.ndarray)
|
||||||
raise ValueError(
|
or isinstance(y_train_all, pd.Series)), (
|
||||||
"y_train_all must be a numpy array or a pandas series.")
|
"y_train_all must be a numpy array or a pandas series.")
|
||||||
if X_train_all.size == 0 or y_train_all.size == 0:
|
assert X_train_all.size != 0 and y_train_all.size != 0, (
|
||||||
raise ValueError("Input data must not be empty.")
|
"Input data must not be empty.")
|
||||||
if isinstance(y_train_all, np.ndarray):
|
if isinstance(y_train_all, np.ndarray):
|
||||||
y_train_all = y_train_all.flatten()
|
y_train_all = y_train_all.flatten()
|
||||||
if X_train_all.shape[0] != y_train_all.shape[0]:
|
assert X_train_all.shape[0] == y_train_all.shape[0], (
|
||||||
raise ValueError(
|
"# rows in X_train must match length of y_train.")
|
||||||
"# rows in X_train must match length of y_train.")
|
|
||||||
self._df = isinstance(X_train_all, pd.DataFrame)
|
self._df = isinstance(X_train_all, pd.DataFrame)
|
||||||
self._nrow, self._ndim = X_train_all.shape
|
self._nrow, self._ndim = X_train_all.shape
|
||||||
X, y = X_train_all, y_train_all
|
X, y = X_train_all, y_train_all
|
||||||
elif dataframe is not None and label is not None:
|
elif dataframe is not None and label is not None:
|
||||||
if not isinstance(dataframe, pd.DataFrame):
|
assert isinstance(dataframe, pd.DataFrame), (
|
||||||
raise ValueError("dataframe must be a pandas DataFrame")
|
"dataframe must be a pandas DataFrame")
|
||||||
if label not in dataframe.columns:
|
assert label in dataframe.columns, (
|
||||||
raise ValueError("label must a column name in dataframe")
|
"label must a column name in dataframe")
|
||||||
self._df = True
|
self._df = True
|
||||||
X = dataframe.drop(columns=label)
|
X = dataframe.drop(columns=label)
|
||||||
self._nrow, self._ndim = X.shape
|
self._nrow, self._ndim = X.shape
|
||||||
@ -498,23 +491,21 @@ class AutoML:
|
|||||||
self._label_transformer = self._transformer.label_transformer
|
self._label_transformer = self._transformer.label_transformer
|
||||||
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
|
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
|
||||||
if X_val is not None and y_val is not None:
|
if X_val is not None and y_val is not None:
|
||||||
if not (isinstance(X_val, np.ndarray) or issparse(X_val)
|
assert (
|
||||||
or isinstance(X_val, pd.DataFrame)):
|
isinstance(X_val, np.ndarray) or issparse(X_val)
|
||||||
raise ValueError(
|
or isinstance(X_val, pd.DataFrame)), (
|
||||||
"X_val must be None, a numpy array, a pandas dataframe, "
|
"X_val must be None, a numpy array, a pandas dataframe, "
|
||||||
"or Scipy sparse matrix.")
|
"or Scipy sparse matrix.")
|
||||||
if not (isinstance(y_val, np.ndarray)
|
assert (
|
||||||
or isinstance(y_val, pd.Series)):
|
isinstance(y_val, np.ndarray) or isinstance(y_val, pd.Series)
|
||||||
raise ValueError(
|
), "y_val must be None, a numpy array or a pandas series."
|
||||||
"y_val must be None, a numpy array or a pandas series.")
|
assert X_val.size != 0 and y_val.size != 0, (
|
||||||
if X_val.size == 0 or y_val.size == 0:
|
"Validation data are expected to be nonempty. "
|
||||||
raise ValueError(
|
"Use None for X_val and y_val if no validation data.")
|
||||||
"Validation data are expected to be nonempty. "
|
|
||||||
"Use None for X_val and y_val if no validation data.")
|
|
||||||
if isinstance(y_val, np.ndarray):
|
if isinstance(y_val, np.ndarray):
|
||||||
y_val = y_val.flatten()
|
y_val = y_val.flatten()
|
||||||
if X_val.shape[0] != y_val.shape[0]:
|
assert X_val.shape[0] == y_val.shape[0], (
|
||||||
raise ValueError("# rows in X_val must match length of y_val.")
|
"# rows in X_val must match length of y_val.")
|
||||||
if self._transformer:
|
if self._transformer:
|
||||||
self._state.X_val = self._transformer.transform(X_val)
|
self._state.X_val = self._transformer.transform(X_val)
|
||||||
else:
|
else:
|
||||||
@ -548,7 +539,7 @@ class AutoML:
|
|||||||
X_train_all, y_train_all = self._X_train_all, self._y_train_all
|
X_train_all, y_train_all = self._X_train_all, self._y_train_all
|
||||||
if issparse(X_train_all):
|
if issparse(X_train_all):
|
||||||
X_train_all = X_train_all.tocsr()
|
X_train_all = X_train_all.tocsr()
|
||||||
if self._state.task in ('binary:logistic', 'multi:softmax') \
|
if self._state.task in ('binary', 'multi') \
|
||||||
and self._state.fit_kwargs.get('sample_weight') is None \
|
and self._state.fit_kwargs.get('sample_weight') is None \
|
||||||
and self._split_type != 'time':
|
and self._split_type != 'time':
|
||||||
# logger.info(f"label {pd.unique(y_train_all)}")
|
# logger.info(f"label {pd.unique(y_train_all)}")
|
||||||
@ -638,7 +629,7 @@ class AutoML:
|
|||||||
y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
|
y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
|
||||||
self._state.groups, self._state.groups_val = self._state.groups[
|
self._state.groups, self._state.groups_val = self._state.groups[
|
||||||
train_idx], self._state.groups[val_idx]
|
train_idx], self._state.groups[val_idx]
|
||||||
elif self._state.task != 'regression':
|
elif self._state.task in ('binary', 'multi'):
|
||||||
# for classification, make sure the labels are complete in both
|
# for classification, make sure the labels are complete in both
|
||||||
# training and validation data
|
# training and validation data
|
||||||
label_set, first = np.unique(y_train_all, return_index=True)
|
label_set, first = np.unique(y_train_all, return_index=True)
|
||||||
@ -760,7 +751,7 @@ class AutoML:
|
|||||||
record_id: An integer of the record ID in the file,
|
record_id: An integer of the record ID in the file,
|
||||||
0 corresponds to the first trial
|
0 corresponds to the first trial
|
||||||
task: A string of the task type,
|
task: A string of the task type,
|
||||||
'binary', 'multi', or 'regression'
|
'binary', 'multi', 'regression', 'forecast', 'rank'
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An estimator object for the given configuration
|
An estimator object for the given configuration
|
||||||
@ -875,9 +866,10 @@ class AutoML:
|
|||||||
best_val_loss = val_loss
|
best_val_loss = val_loss
|
||||||
sample_size = size
|
sample_size = size
|
||||||
if not training_duration:
|
if not training_duration:
|
||||||
|
logger.warning(
|
||||||
|
f"No estimator found within time_budget={time_budget}")
|
||||||
from .model import BaseEstimator as Estimator
|
from .model import BaseEstimator as Estimator
|
||||||
self._trained_estimator = Estimator()
|
self._trained_estimator = Estimator()
|
||||||
self._trained_estimator.model = None
|
|
||||||
return training_duration
|
return training_duration
|
||||||
if not best:
|
if not best:
|
||||||
return
|
return
|
||||||
@ -898,11 +890,7 @@ class AutoML:
|
|||||||
elif eval_method == 'auto':
|
elif eval_method == 'auto':
|
||||||
eval_method = self._decide_eval_method(time_budget)
|
eval_method = self._decide_eval_method(time_budget)
|
||||||
self.modelcount = 0
|
self.modelcount = 0
|
||||||
if self._state.task != 'forecast':
|
self._prepare_data(eval_method, split_ratio, n_splits)
|
||||||
self._prepare_data(eval_method, split_ratio, n_splits)
|
|
||||||
else:
|
|
||||||
self._prepare_data(eval_method, split_ratio, n_splits,
|
|
||||||
period=self._state.fit_kwargs['period'])
|
|
||||||
self._state.time_budget = None
|
self._state.time_budget = None
|
||||||
self._state.n_jobs = n_jobs
|
self._state.n_jobs = n_jobs
|
||||||
self._trained_estimator = self._state._train_with_config(
|
self._trained_estimator = self._state._train_with_config(
|
||||||
@ -911,9 +899,10 @@ class AutoML:
|
|||||||
return training_duration
|
return training_duration
|
||||||
|
|
||||||
def _decide_split_type(self, split_type):
|
def _decide_split_type(self, split_type):
|
||||||
if self._state.task in ('classification', 'binary', 'multi'):
|
if self._state.task == 'classification':
|
||||||
self._state.task = get_classification_objective(
|
self._state.task = get_classification_objective(
|
||||||
len(np.unique(self._y_train_all)))
|
len(np.unique(self._y_train_all)))
|
||||||
|
if self._state.task in ('binary', 'multi'):
|
||||||
assert split_type in [None, "stratified", "uniform", "time"]
|
assert split_type in [None, "stratified", "uniform", "time"]
|
||||||
self._split_type = split_type or "stratified"
|
self._split_type = split_type or "stratified"
|
||||||
elif self._state.task == 'regression':
|
elif self._state.task == 'regression':
|
||||||
@ -1248,13 +1237,14 @@ class AutoML:
|
|||||||
For time series forecasting, must be None or 'time'.
|
For time series forecasting, must be None or 'time'.
|
||||||
For ranking task, must be None or 'group'.
|
For ranking task, must be None or 'group'.
|
||||||
hpo_method: str or None, default=None | The hyperparameter
|
hpo_method: str or None, default=None | The hyperparameter
|
||||||
optimization method. When it is None, CFO is used.
|
optimization method. By default, CFO is used for sequential
|
||||||
|
search and BlendSearch is used for parallel search.
|
||||||
No need to set when using flaml's default search space or using
|
No need to set when using flaml's default search space or using
|
||||||
a simple customized search space. When set to 'bs', BlendSearch
|
a simple customized search space. When set to 'bs', BlendSearch
|
||||||
is used. BlendSearch can be tried when the search space is
|
is used. BlendSearch can be tried when the search space is
|
||||||
complex, for example, containing multiple disjoint, discontinuous
|
complex, for example, containing multiple disjoint, discontinuous
|
||||||
subspaces. When set to 'random' and the argument 'n_concurrent_trials'
|
subspaces. When set to 'random' and the argument
|
||||||
is larger than 1, RandomSearch is used.
|
`n_concurrent_trials` is larger than 1, random search is used.
|
||||||
starting_points: A dictionary to specify the starting hyperparameter
|
starting_points: A dictionary to specify the starting hyperparameter
|
||||||
config for the estimators.
|
config for the estimators.
|
||||||
Keys are the name of the estimators, and values are the starting
|
Keys are the name of the estimators, and values are the starting
|
||||||
@ -1355,8 +1345,7 @@ class AutoML:
|
|||||||
estimator_list))
|
estimator_list))
|
||||||
self.estimator_list = estimator_list
|
self.estimator_list = estimator_list
|
||||||
self._hpo_method = hpo_method or (
|
self._hpo_method = hpo_method or (
|
||||||
'cfo' if n_concurrent_trials == 1 or len(estimator_list) == 1
|
'cfo' if n_concurrent_trials == 1 else 'bs')
|
||||||
else 'bs')
|
|
||||||
self._state.time_budget = time_budget
|
self._state.time_budget = time_budget
|
||||||
self._active_estimators = estimator_list.copy()
|
self._active_estimators = estimator_list.copy()
|
||||||
self._ensemble = ensemble
|
self._ensemble = ensemble
|
||||||
@ -1379,14 +1368,16 @@ class AutoML:
|
|||||||
if self._best_estimator:
|
if self._best_estimator:
|
||||||
logger.info("fit succeeded")
|
logger.info("fit succeeded")
|
||||||
logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
|
logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
|
||||||
if self._time_taken_best_iter >= time_budget * 0.7 and not all(
|
if self._hpo_method in ('cfo', 'bs') and (
|
||||||
|
self._time_taken_best_iter >= time_budget * 0.7) and not all(
|
||||||
state.search_alg and state.search_alg.searcher.is_ls_ever_converged
|
state.search_alg and state.search_alg.searcher.is_ls_ever_converged
|
||||||
for state in self._search_states.values()
|
for state in self._search_states.values()
|
||||||
):
|
):
|
||||||
logger.warn("Time taken to find the best model is {0:.0f}% of the "
|
logger.warning(
|
||||||
"provided time budget and not all estimators' hyperparameter "
|
"Time taken to find the best model is {0:.0f}% of the "
|
||||||
"search converged. Consider increasing the time budget.".format(
|
"provided time budget and not all estimators' hyperparameter "
|
||||||
self._time_taken_best_iter / time_budget * 100))
|
"search converged. Consider increasing the time budget.".format(
|
||||||
|
self._time_taken_best_iter / time_budget * 100))
|
||||||
|
|
||||||
if not keep_search_state:
|
if not keep_search_state:
|
||||||
# release space
|
# release space
|
||||||
@ -1413,20 +1404,16 @@ class AutoML:
|
|||||||
"Please run pip install flaml[ray]")
|
"Please run pip install flaml[ray]")
|
||||||
if self._hpo_method in ('cfo', 'grid'):
|
if self._hpo_method in ('cfo', 'grid'):
|
||||||
from flaml import CFO as SearchAlgo
|
from flaml import CFO as SearchAlgo
|
||||||
elif 'optuna' == self._hpo_method:
|
|
||||||
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
|
|
||||||
elif 'bs' == self._hpo_method:
|
elif 'bs' == self._hpo_method:
|
||||||
from flaml import BlendSearch as SearchAlgo
|
from flaml import BlendSearch as SearchAlgo
|
||||||
elif 'cfocat' == self._hpo_method:
|
|
||||||
from flaml.searcher.cfo_cat import CFOCat as SearchAlgo
|
|
||||||
elif 'random' == self._hpo_method:
|
elif 'random' == self._hpo_method:
|
||||||
from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
|
from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
|
||||||
from ray.tune.sample import Domain as RayDomain
|
from ray.tune.sample import Domain
|
||||||
from .tune.sample import Domain
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
f"hpo_method={self._hpo_method} is not recognized. "
|
f"hpo_method={self._hpo_method} is not recognized. "
|
||||||
"'cfo' and 'bs' are supported.")
|
"'cfo' and 'bs' are supported.")
|
||||||
|
space = self.search_space
|
||||||
if self._hpo_method == 'random':
|
if self._hpo_method == 'random':
|
||||||
# Any point in points_to_evaluate must consist of hyperparamters
|
# Any point in points_to_evaluate must consist of hyperparamters
|
||||||
# that are tunable, which can be identified by checking whether
|
# that are tunable, which can be identified by checking whether
|
||||||
@ -1434,19 +1421,19 @@ class AutoML:
|
|||||||
# the 'Domain' class from flaml or ray.tune
|
# the 'Domain' class from flaml or ray.tune
|
||||||
points_to_evaluate = self.points_to_evaluate.copy()
|
points_to_evaluate = self.points_to_evaluate.copy()
|
||||||
to_del = []
|
to_del = []
|
||||||
for k, v in self.search_space.items():
|
for k, v in space.items():
|
||||||
if not (isinstance(v, Domain) or isinstance(v, RayDomain)):
|
if not isinstance(v, Domain):
|
||||||
to_del.append(k)
|
to_del.append(k)
|
||||||
for k in to_del:
|
for k in to_del:
|
||||||
for p in points_to_evaluate:
|
for p in points_to_evaluate:
|
||||||
del p[k]
|
if k in p:
|
||||||
|
del p[k]
|
||||||
search_alg = SearchAlgo(max_concurrent=self._n_concurrent_trials,
|
search_alg = SearchAlgo(
|
||||||
points_to_evaluate=points_to_evaluate)
|
max_concurrent=self._n_concurrent_trials,
|
||||||
|
points_to_evaluate=points_to_evaluate)
|
||||||
else:
|
else:
|
||||||
search_alg = SearchAlgo(
|
search_alg = SearchAlgo(
|
||||||
metric='val_loss',
|
metric='val_loss', space=space,
|
||||||
space=self.search_space,
|
|
||||||
low_cost_partial_config=self.low_cost_partial_config,
|
low_cost_partial_config=self.low_cost_partial_config,
|
||||||
points_to_evaluate=self.points_to_evaluate,
|
points_to_evaluate=self.points_to_evaluate,
|
||||||
cat_hp_cost=self.cat_hp_cost,
|
cat_hp_cost=self.cat_hp_cost,
|
||||||
@ -1463,7 +1450,7 @@ class AutoML:
|
|||||||
resources_per_trial = {
|
resources_per_trial = {
|
||||||
"cpu": self._state.n_jobs} if self._state.n_jobs > 1 else None
|
"cpu": self._state.n_jobs} if self._state.n_jobs > 1 else None
|
||||||
analysis = ray.tune.run(
|
analysis = ray.tune.run(
|
||||||
self.trainable, search_alg=search_alg, config=self.search_space,
|
self.trainable, search_alg=search_alg, config=space,
|
||||||
metric='val_loss', mode='min', resources_per_trial=resources_per_trial,
|
metric='val_loss', mode='min', resources_per_trial=resources_per_trial,
|
||||||
time_budget_s=self._state.time_budget, num_samples=self._max_iter,
|
time_budget_s=self._state.time_budget, num_samples=self._max_iter,
|
||||||
verbose=self.verbose)
|
verbose=self.verbose)
|
||||||
@ -1521,6 +1508,7 @@ class AutoML:
|
|||||||
from flaml import CFO as SearchAlgo
|
from flaml import CFO as SearchAlgo
|
||||||
elif 'optuna' == self._hpo_method:
|
elif 'optuna' == self._hpo_method:
|
||||||
try:
|
try:
|
||||||
|
from ray import __version__ as ray_version
|
||||||
assert ray_version >= '1.0.0'
|
assert ray_version >= '1.0.0'
|
||||||
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
|
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
@ -1600,7 +1588,9 @@ class AutoML:
|
|||||||
else:
|
else:
|
||||||
algo = SearchAlgo(
|
algo = SearchAlgo(
|
||||||
metric='val_loss', mode='min', space=search_space,
|
metric='val_loss', mode='min', space=search_space,
|
||||||
points_to_evaluate=points_to_evaluate,
|
points_to_evaluate=points_to_evaluate
|
||||||
|
if len(search_state.init_config) == len(
|
||||||
|
search_space) else None,
|
||||||
)
|
)
|
||||||
search_state.search_alg = ConcurrencyLimiter(algo,
|
search_state.search_alg = ConcurrencyLimiter(algo,
|
||||||
max_concurrent=1)
|
max_concurrent=1)
|
||||||
@ -1710,13 +1700,16 @@ class AutoML:
|
|||||||
search_state.best_loss,
|
search_state.best_loss,
|
||||||
self._best_estimator,
|
self._best_estimator,
|
||||||
self._state.best_loss))
|
self._state.best_loss))
|
||||||
if all(state.search_alg and state.search_alg.searcher.is_ls_ever_converged
|
if self._hpo_method in ('cfo', 'bs') and all(
|
||||||
for state in self._search_states.values()) and (
|
state.search_alg and state.search_alg.searcher.is_ls_ever_converged
|
||||||
self._state.time_from_start
|
for state in self._search_states.values()) and (
|
||||||
> self._warn_threshold * self._time_taken_best_iter):
|
self._state.time_from_start
|
||||||
logger.warn("All estimator hyperparameters local search has converged at least once, "
|
> self._warn_threshold * self._time_taken_best_iter):
|
||||||
f"and the total search time exceeds {self._warn_threshold} times the time taken "
|
logger.warning(
|
||||||
"to find the best model.")
|
"All estimator hyperparameters local search has "
|
||||||
|
"converged at least once, and the total search time "
|
||||||
|
f"exceeds {self._warn_threshold} times the time taken "
|
||||||
|
"to find the best model.")
|
||||||
self._warn_threshold *= 10
|
self._warn_threshold *= 10
|
||||||
else:
|
else:
|
||||||
logger.info(f"no enough budget for learner {estimator}")
|
logger.info(f"no enough budget for learner {estimator}")
|
||||||
@ -1766,6 +1759,8 @@ class AutoML:
|
|||||||
self._best_estimator = None
|
self._best_estimator = None
|
||||||
self._retrained_config = {}
|
self._retrained_config = {}
|
||||||
self._warn_threshold = 10
|
self._warn_threshold = 10
|
||||||
|
self._selected = None
|
||||||
|
self.modelcount = 0
|
||||||
|
|
||||||
if self._n_concurrent_trials == 1:
|
if self._n_concurrent_trials == 1:
|
||||||
self._search_sequential()
|
self._search_sequential()
|
||||||
@ -1782,7 +1777,7 @@ class AutoML:
|
|||||||
if self._trained_estimator:
|
if self._trained_estimator:
|
||||||
logger.info(f'selected model: {self._trained_estimator.model}')
|
logger.info(f'selected model: {self._trained_estimator.model}')
|
||||||
if self._ensemble and self._state.task in (
|
if self._ensemble and self._state.task in (
|
||||||
'binary:logistic', 'multi:softmax', 'regression',
|
'binary', 'multi', 'regression',
|
||||||
):
|
):
|
||||||
search_states = list(x for x in self._search_states.items()
|
search_states = list(x for x in self._search_states.items()
|
||||||
if x[1].trained_estimator)
|
if x[1].trained_estimator)
|
||||||
@ -1795,7 +1790,7 @@ class AutoML:
|
|||||||
logger.info(estimators)
|
logger.info(estimators)
|
||||||
if len(estimators) <= 1:
|
if len(estimators) <= 1:
|
||||||
return
|
return
|
||||||
if self._state.task in ('binary:logistic', 'multi:softmax'):
|
if self._state.task in ('binary', 'multi'):
|
||||||
from sklearn.ensemble import StackingClassifier as Stacker
|
from sklearn.ensemble import StackingClassifier as Stacker
|
||||||
else:
|
else:
|
||||||
from sklearn.ensemble import StackingRegressor as Stacker
|
from sklearn.ensemble import StackingRegressor as Stacker
|
||||||
@ -1838,9 +1833,6 @@ class AutoML:
|
|||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
"not retraining because the time budget is too small.")
|
"not retraining because the time budget is too small.")
|
||||||
else:
|
|
||||||
self._selected = self._trained_estimator = None
|
|
||||||
self.modelcount = 0
|
|
||||||
if self.model and mlflow is not None and mlflow.active_run():
|
if self.model and mlflow is not None and mlflow.active_run():
|
||||||
mlflow.sklearn.log_model(self.model, 'best_model')
|
mlflow.sklearn.log_model(self.model, 'best_model')
|
||||||
|
|
||||||
@ -1886,8 +1878,7 @@ class AutoML:
|
|||||||
speed = delta_loss / delta_time
|
speed = delta_loss / delta_time
|
||||||
if speed:
|
if speed:
|
||||||
estimated_cost = max(2 * gap / speed, estimated_cost)
|
estimated_cost = max(2 * gap / speed, estimated_cost)
|
||||||
if estimated_cost == 0:
|
estimated_cost == estimated_cost or 1e-10
|
||||||
estimated_cost = 1e-10
|
|
||||||
inv.append(1 / estimated_cost)
|
inv.append(1 / estimated_cost)
|
||||||
else:
|
else:
|
||||||
estimated_cost = self._eci[i]
|
estimated_cost = self._eci[i]
|
||||||
|
|||||||
@ -261,7 +261,7 @@ class DataTransformer:
|
|||||||
cat_columns, num_columns, datetime_columns
|
cat_columns, num_columns, datetime_columns
|
||||||
self._drop = drop
|
self._drop = drop
|
||||||
|
|
||||||
if task in ('binary:logistic', 'multi:softmax'):
|
if task in ('binary', 'multi', 'classification'):
|
||||||
from sklearn.preprocessing import LabelEncoder
|
from sklearn.preprocessing import LabelEncoder
|
||||||
self.label_transformer = LabelEncoder()
|
self.label_transformer = LabelEncoder()
|
||||||
y = self.label_transformer.fit_transform(y)
|
y = self.label_transformer.fit_transform(y)
|
||||||
|
|||||||
19
flaml/ml.py
19
flaml/ml.py
@ -24,7 +24,7 @@ def get_estimator_class(task, estimator_name):
|
|||||||
''' when adding a new learner, need to add an elif branch '''
|
''' when adding a new learner, need to add an elif branch '''
|
||||||
|
|
||||||
if 'xgboost' == estimator_name:
|
if 'xgboost' == estimator_name:
|
||||||
if 'regression' in task:
|
if 'regression' == task:
|
||||||
estimator_class = XGBoostEstimator
|
estimator_class = XGBoostEstimator
|
||||||
else:
|
else:
|
||||||
estimator_class = XGBoostSklearnEstimator
|
estimator_class = XGBoostSklearnEstimator
|
||||||
@ -179,7 +179,8 @@ def _eval_estimator(config, estimator, X_train, y_train, X_test, y_test, weight_
|
|||||||
fit_kwargs.get('groups'))
|
fit_kwargs.get('groups'))
|
||||||
if isinstance(metric_for_logging, dict):
|
if isinstance(metric_for_logging, dict):
|
||||||
pred_time = metric_for_logging.get('pred_time', 0)
|
pred_time = metric_for_logging.get('pred_time', 0)
|
||||||
test_pred_y = None # eval_metric may return test_pred_y but not necessarily. Setting None for now.
|
test_pred_y = None
|
||||||
|
# eval_metric may return test_pred_y but not necessarily. Setting None for now.
|
||||||
return test_loss, metric_for_logging, pred_time, test_pred_y
|
return test_loss, metric_for_logging, pred_time, test_pred_y
|
||||||
|
|
||||||
|
|
||||||
@ -193,10 +194,10 @@ def get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_te
|
|||||||
# fit_kwargs['X_val'] = X_test
|
# fit_kwargs['X_val'] = X_test
|
||||||
# fit_kwargs['y_val'] = y_test
|
# fit_kwargs['y_val'] = y_test
|
||||||
estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||||
test_loss, metric_for_logging, pred_time, _ = _eval_estimator(config, estimator,
|
test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
|
||||||
X_train, y_train, X_test, y_test,
|
config, estimator, X_train, y_train, X_test, y_test,
|
||||||
weight_test, groups_test, eval_metric, obj,
|
weight_test, groups_test, eval_metric, obj,
|
||||||
labels, log_training_metric, fit_kwargs)
|
labels, log_training_metric, fit_kwargs)
|
||||||
train_time = time.time() - start
|
train_time = time.time() - start
|
||||||
return test_loss, metric_for_logging, train_time, pred_time
|
return test_loss, metric_for_logging, train_time, pred_time
|
||||||
|
|
||||||
@ -212,7 +213,7 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
|
|||||||
valid_fold_num = total_fold_num = 0
|
valid_fold_num = total_fold_num = 0
|
||||||
n = kf.get_n_splits()
|
n = kf.get_n_splits()
|
||||||
X_train_split, y_train_split = X_train_all, y_train_all
|
X_train_split, y_train_split = X_train_all, y_train_all
|
||||||
if task == 'binary:logistics' or task == 'multi:softmax':
|
if task in ('binary', 'multi'):
|
||||||
labels = np.unique(y_train_all)
|
labels = np.unique(y_train_all)
|
||||||
else:
|
else:
|
||||||
labels = None
|
labels = None
|
||||||
@ -346,9 +347,9 @@ def train_estimator(
|
|||||||
|
|
||||||
def get_classification_objective(num_labels: int) -> str:
|
def get_classification_objective(num_labels: int) -> str:
|
||||||
if num_labels == 2:
|
if num_labels == 2:
|
||||||
objective_name = 'binary:logistic'
|
objective_name = 'binary'
|
||||||
else:
|
else:
|
||||||
objective_name = 'multi:softmax'
|
objective_name = 'multi'
|
||||||
return objective_name
|
return objective_name
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -3,7 +3,6 @@
|
|||||||
* Licensed under the MIT License.
|
* Licensed under the MIT License.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import warnings
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import xgboost as xgb
|
import xgboost as xgb
|
||||||
import time
|
import time
|
||||||
@ -31,12 +30,12 @@ class BaseEstimator:
|
|||||||
for both regression and classification
|
for both regression and classification
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, task='binary:logistic', **params):
|
def __init__(self, task='binary', **params):
|
||||||
'''Constructor
|
'''Constructor
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
task: A string of the task type, one of
|
task: A string of the task type, one of
|
||||||
'binary:logistic', 'multi:softmax', 'regression'
|
'binary', 'multi', 'regression', 'rank', 'forecast'
|
||||||
n_jobs: An integer of the number of parallel threads
|
n_jobs: An integer of the number of parallel threads
|
||||||
params: A dictionary of the hyperparameter names and values
|
params: A dictionary of the hyperparameter names and values
|
||||||
'''
|
'''
|
||||||
@ -48,7 +47,7 @@ class BaseEstimator:
|
|||||||
del self.params['_estimator_type']
|
del self.params['_estimator_type']
|
||||||
else:
|
else:
|
||||||
self._estimator_type = "classifier" if task in (
|
self._estimator_type = "classifier" if task in (
|
||||||
'binary:logistic', 'multi:softmax') else "regressor"
|
'binary', 'multi') else "regressor"
|
||||||
|
|
||||||
def get_params(self, deep=False):
|
def get_params(self, deep=False):
|
||||||
params = self.params.copy()
|
params = self.params.copy()
|
||||||
@ -145,11 +144,10 @@ class BaseEstimator:
|
|||||||
Each element at (i,j) is the probability for instance i to be in
|
Each element at (i,j) is the probability for instance i to be in
|
||||||
class j
|
class j
|
||||||
'''
|
'''
|
||||||
if 'regression' in self._task:
|
assert self._task in ('binary', 'multi'), (
|
||||||
raise ValueError('Regression tasks do not support predict_prob')
|
'predict_prob() only for classification task.')
|
||||||
else:
|
X_test = self._preprocess(X_test)
|
||||||
X_test = self._preprocess(X_test)
|
return self._model.predict_proba(X_test)
|
||||||
return self._model.predict_proba(X_test)
|
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
pass
|
pass
|
||||||
@ -193,7 +191,7 @@ class BaseEstimator:
|
|||||||
|
|
||||||
class SKLearnEstimator(BaseEstimator):
|
class SKLearnEstimator(BaseEstimator):
|
||||||
|
|
||||||
def __init__(self, task='binary:logistic', **params):
|
def __init__(self, task='binary', **params):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
|
|
||||||
def _preprocess(self, X):
|
def _preprocess(self, X):
|
||||||
@ -264,21 +262,18 @@ class LGBMEstimator(BaseEstimator):
|
|||||||
n_estimators = int(round(config['n_estimators']))
|
n_estimators = int(round(config['n_estimators']))
|
||||||
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||||
|
|
||||||
def __init__(self, task='binary:logistic', log_max_bin=8, **params):
|
def __init__(self, task='binary', log_max_bin=8, **params):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
if "objective" not in self.params:
|
if "objective" not in self.params:
|
||||||
# Default: ‘regression’ for LGBMRegressor,
|
# Default: ‘regression’ for LGBMRegressor,
|
||||||
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
# ‘binary’ or ‘multiclass’ for LGBMClassifier
|
||||||
if 'regression' == task:
|
objective = 'regression'
|
||||||
objective = 'regression'
|
if 'binary' in task:
|
||||||
elif 'binary' in task:
|
|
||||||
objective = 'binary'
|
objective = 'binary'
|
||||||
elif 'multi' in task:
|
elif 'multi' in task:
|
||||||
objective = 'multiclass'
|
objective = 'multiclass'
|
||||||
elif 'rank' == task:
|
elif 'rank' == task:
|
||||||
objective = 'lambdarank'
|
objective = 'lambdarank'
|
||||||
else:
|
|
||||||
objective = 'regression'
|
|
||||||
self.params["objective"] = objective
|
self.params["objective"] = objective
|
||||||
if "n_estimators" in self.params:
|
if "n_estimators" in self.params:
|
||||||
self.params["n_estimators"] = int(round(self.params["n_estimators"]))
|
self.params["n_estimators"] = int(round(self.params["n_estimators"]))
|
||||||
@ -477,7 +472,7 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
return XGBoostEstimator.cost_relative2lgbm()
|
return XGBoostEstimator.cost_relative2lgbm()
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1,
|
self, task='binary', n_jobs=1,
|
||||||
n_estimators=4, max_leaves=4, subsample=1.0,
|
n_estimators=4, max_leaves=4, subsample=1.0,
|
||||||
min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
|
min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
|
||||||
colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
|
colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
|
||||||
@ -506,11 +501,10 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
'use_label_encoder': params.get('use_label_encoder', False),
|
'use_label_encoder': params.get('use_label_encoder', False),
|
||||||
})
|
})
|
||||||
|
|
||||||
if 'regression' == task:
|
self.estimator_class = xgb.XGBRegressor
|
||||||
self.estimator_class = xgb.XGBRegressor
|
if 'rank' == task:
|
||||||
elif 'rank' == task:
|
|
||||||
self.estimator_class = xgb.XGBRanker
|
self.estimator_class = xgb.XGBRanker
|
||||||
else:
|
elif task in ('binary', 'multi'):
|
||||||
self.estimator_class = xgb.XGBClassifier
|
self.estimator_class = xgb.XGBClassifier
|
||||||
self._time_per_iter = None
|
self._time_per_iter = None
|
||||||
self._train_size = 0
|
self._train_size = 0
|
||||||
@ -543,7 +537,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
'low_cost_init_value': 4,
|
'low_cost_init_value': 4,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
if task != 'regression':
|
if task in ('binary', 'multi'):
|
||||||
space['criterion'] = {
|
space['criterion'] = {
|
||||||
'domain': tune.choice(['gini', 'entropy']),
|
'domain': tune.choice(['gini', 'entropy']),
|
||||||
# 'init_value': 'gini',
|
# 'init_value': 'gini',
|
||||||
@ -555,7 +549,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
return 2.0
|
return 2.0
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1,
|
self, task='binary', n_jobs=1,
|
||||||
n_estimators=4, max_features=1.0, criterion='gini', max_leaves=4,
|
n_estimators=4, max_features=1.0, criterion='gini', max_leaves=4,
|
||||||
**params
|
**params
|
||||||
):
|
):
|
||||||
@ -569,9 +563,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
'max_features': float(max_features),
|
'max_features': float(max_features),
|
||||||
"max_leaf_nodes": params.get('max_leaf_nodes', int(round(max_leaves))),
|
"max_leaf_nodes": params.get('max_leaf_nodes', int(round(max_leaves))),
|
||||||
})
|
})
|
||||||
if 'regression' in task:
|
self.estimator_class = RandomForestRegressor
|
||||||
self.estimator_class = RandomForestRegressor
|
if task in ('binary', 'multi'):
|
||||||
else:
|
|
||||||
self.estimator_class = RandomForestClassifier
|
self.estimator_class = RandomForestClassifier
|
||||||
self.params['criterion'] = criterion
|
self.params['criterion'] = criterion
|
||||||
|
|
||||||
@ -586,7 +579,7 @@ class ExtraTreeEstimator(RandomForestEstimator):
|
|||||||
def cost_relative2lgbm(cls):
|
def cost_relative2lgbm(cls):
|
||||||
return 1.9
|
return 1.9
|
||||||
|
|
||||||
def __init__(self, task='binary:logistic', **params):
|
def __init__(self, task='binary', **params):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
if 'regression' in task:
|
if 'regression' in task:
|
||||||
self.estimator_class = ExtraTreesRegressor
|
self.estimator_class = ExtraTreesRegressor
|
||||||
@ -610,7 +603,7 @@ class LRL1Classifier(SKLearnEstimator):
|
|||||||
return 160
|
return 160
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
|
||||||
**params
|
**params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
@ -621,11 +614,9 @@ class LRL1Classifier(SKLearnEstimator):
|
|||||||
'solver': params.get("solver", 'saga'),
|
'solver': params.get("solver", 'saga'),
|
||||||
'n_jobs': n_jobs,
|
'n_jobs': n_jobs,
|
||||||
})
|
})
|
||||||
if 'regression' in task:
|
assert task in ('binary', 'multi'), (
|
||||||
self.estimator_class = None
|
'LogisticRegression for classification task only')
|
||||||
raise NotImplementedError('LR does not support regression task')
|
self.estimator_class = LogisticRegression
|
||||||
else:
|
|
||||||
self.estimator_class = LogisticRegression
|
|
||||||
|
|
||||||
|
|
||||||
class LRL2Classifier(SKLearnEstimator):
|
class LRL2Classifier(SKLearnEstimator):
|
||||||
@ -639,7 +630,7 @@ class LRL2Classifier(SKLearnEstimator):
|
|||||||
return 25
|
return 25
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0,
|
self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
|
||||||
**params
|
**params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
@ -650,11 +641,9 @@ class LRL2Classifier(SKLearnEstimator):
|
|||||||
'solver': params.get("solver", 'lbfgs'),
|
'solver': params.get("solver", 'lbfgs'),
|
||||||
'n_jobs': n_jobs,
|
'n_jobs': n_jobs,
|
||||||
})
|
})
|
||||||
if 'regression' in task:
|
assert task in ('binary', 'multi'), (
|
||||||
self.estimator_class = None
|
'LogisticRegression for classification task only')
|
||||||
raise NotImplementedError('LR does not support regression task')
|
self.estimator_class = LogisticRegression
|
||||||
else:
|
|
||||||
self.estimator_class = LogisticRegression
|
|
||||||
|
|
||||||
|
|
||||||
class CatBoostEstimator(BaseEstimator):
|
class CatBoostEstimator(BaseEstimator):
|
||||||
@ -711,7 +700,7 @@ class CatBoostEstimator(BaseEstimator):
|
|||||||
return X
|
return X
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1,
|
self, task='binary', n_jobs=1,
|
||||||
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
|
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
@ -723,10 +712,9 @@ class CatBoostEstimator(BaseEstimator):
|
|||||||
'verbose': params.get('verbose', False),
|
'verbose': params.get('verbose', False),
|
||||||
'random_seed': params.get("random_seed", 10242048),
|
'random_seed': params.get("random_seed", 10242048),
|
||||||
})
|
})
|
||||||
if 'regression' in task:
|
from catboost import CatBoostRegressor
|
||||||
from catboost import CatBoostRegressor
|
self.estimator_class = CatBoostRegressor
|
||||||
self.estimator_class = CatBoostRegressor
|
if task in ('binary', 'multi'):
|
||||||
else:
|
|
||||||
from catboost import CatBoostClassifier
|
from catboost import CatBoostClassifier
|
||||||
self.estimator_class = CatBoostClassifier
|
self.estimator_class = CatBoostClassifier
|
||||||
|
|
||||||
@ -831,7 +819,7 @@ class KNeighborsEstimator(BaseEstimator):
|
|||||||
return 30
|
return 30
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
|
self, task='binary', n_jobs=1, n_neighbors=5, **params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
self.params.update({
|
self.params.update({
|
||||||
@ -839,10 +827,9 @@ class KNeighborsEstimator(BaseEstimator):
|
|||||||
'weights': params.get('weights', 'distance'),
|
'weights': params.get('weights', 'distance'),
|
||||||
'n_jobs': n_jobs,
|
'n_jobs': n_jobs,
|
||||||
})
|
})
|
||||||
if 'regression' in task:
|
from sklearn.neighbors import KNeighborsRegressor
|
||||||
from sklearn.neighbors import KNeighborsRegressor
|
self.estimator_class = KNeighborsRegressor
|
||||||
self.estimator_class = KNeighborsRegressor
|
if task in ('binary', 'multi'):
|
||||||
else:
|
|
||||||
from sklearn.neighbors import KNeighborsClassifier
|
from sklearn.neighbors import KNeighborsClassifier
|
||||||
self.estimator_class = KNeighborsClassifier
|
self.estimator_class = KNeighborsClassifier
|
||||||
|
|
||||||
@ -920,7 +907,7 @@ class FBProphet(BaseEstimator):
|
|||||||
forecast = self._model.predict(X_test)
|
forecast = self._model.predict(X_test)
|
||||||
return forecast['yhat']
|
return forecast['yhat']
|
||||||
else:
|
else:
|
||||||
warnings.warn(
|
logger.warning(
|
||||||
"Estimator is not fit yet. Please run fit() before predict().")
|
"Estimator is not fit yet. Please run fit() before predict().")
|
||||||
return np.ones(X_test.shape[0])
|
return np.ones(X_test.shape[0])
|
||||||
|
|
||||||
@ -954,8 +941,9 @@ class ARIMA(FBProphet):
|
|||||||
return train_df
|
return train_df
|
||||||
|
|
||||||
def fit(self, X_train, y_train, budget=None, **kwargs):
|
def fit(self, X_train, y_train, budget=None, **kwargs):
|
||||||
from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
|
import warnings
|
||||||
warnings.filterwarnings("ignore")
|
warnings.filterwarnings("ignore")
|
||||||
|
from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
train_df = self._join(X_train, y_train)
|
train_df = self._join(X_train, y_train)
|
||||||
model = ARIMA_estimator(
|
model = ARIMA_estimator(
|
||||||
|
|||||||
@ -29,12 +29,11 @@ class AutoTransformers:
|
|||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
autohf = AutoTransformers()
|
autohf = AutoTransformers()
|
||||||
autohf_settings = {"resources_per_trial": {"cpu": 1},
|
autohf_settings = {
|
||||||
"num_samples": -1,
|
"resources_per_trial": {"cpu": 1, "gpu": 1},
|
||||||
"time_budget": 100000,
|
"num_samples": -1,
|
||||||
"ckpt_per_epoch": 1,
|
"time_budget": 60,
|
||||||
"fp16": False,
|
}
|
||||||
}
|
|
||||||
|
|
||||||
validation_metric, analysis = autohf.fit(**autohf_settings)
|
validation_metric, analysis = autohf.fit(**autohf_settings)
|
||||||
|
|
||||||
@ -45,10 +44,11 @@ class AutoTransformers:
|
|||||||
search_space = {}
|
search_space = {}
|
||||||
|
|
||||||
if mode == "grid":
|
if mode == "grid":
|
||||||
|
# TODO add test
|
||||||
for each_hp in config_json.keys():
|
for each_hp in config_json.keys():
|
||||||
this_config = config_json[each_hp]
|
this_config = config_json[each_hp]
|
||||||
assert isinstance(this_config, dict) or isinstance(this_config, list), \
|
assert isinstance(this_config, dict) or isinstance(this_config, list), \
|
||||||
"config of " + each_hp + " must be dict or list"
|
"config of " + each_hp + " must be dict or list for grid search"
|
||||||
search_space[each_hp] = ray.tune.grid_search(this_config)
|
search_space[each_hp] = ray.tune.grid_search(this_config)
|
||||||
else:
|
else:
|
||||||
for each_hp in config_json.keys():
|
for each_hp in config_json.keys():
|
||||||
@ -85,10 +85,6 @@ class AutoTransformers:
|
|||||||
search_space_hpo_json,
|
search_space_hpo_json,
|
||||||
mode=self.jobid_config.mod)
|
mode=self.jobid_config.mod)
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _wrapper(func, *args): # with star
|
|
||||||
return func(*args)
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_split_name(data_raw, fold_name=None):
|
def _get_split_name(data_raw, fold_name=None):
|
||||||
if fold_name:
|
if fold_name:
|
||||||
@ -179,7 +175,7 @@ class AutoTransformers:
|
|||||||
data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat),
|
data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat),
|
||||||
self.jobid_config.subdat)
|
self.jobid_config.subdat)
|
||||||
else:
|
else:
|
||||||
data_raw = AutoTransformers._wrapper(load_dataset, *self.jobid_config.dat)
|
data_raw = load_dataset(*self.jobid_config.dat)
|
||||||
|
|
||||||
self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name(
|
self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name(
|
||||||
data_raw,
|
data_raw,
|
||||||
@ -349,6 +345,7 @@ class AutoTransformers:
|
|||||||
return training_args_config, per_model_config
|
return training_args_config, per_model_config
|
||||||
|
|
||||||
def _objective(self, config, reporter, checkpoint_dir=None):
|
def _objective(self, config, reporter, checkpoint_dir=None):
|
||||||
|
# TODO add test
|
||||||
from transformers.trainer_utils import set_seed
|
from transformers.trainer_utils import set_seed
|
||||||
self._set_transformers_verbosity(self._transformers_verbose)
|
self._set_transformers_verbosity(self._transformers_verbose)
|
||||||
|
|
||||||
@ -827,6 +824,7 @@ class AutoTransformers:
|
|||||||
test_trainer = TrainerForAutoTransformers(best_model, training_args)
|
test_trainer = TrainerForAutoTransformers(best_model, training_args)
|
||||||
|
|
||||||
if self.jobid_config.spt == "ori":
|
if self.jobid_config.spt == "ori":
|
||||||
|
# TODO add test
|
||||||
if "label" in self.test_dataset.features.keys():
|
if "label" in self.test_dataset.features.keys():
|
||||||
self.test_dataset.remove_columns_("label")
|
self.test_dataset.remove_columns_("label")
|
||||||
print("Cleaning the existing label column from test data")
|
print("Cleaning the existing label column from test data")
|
||||||
|
|||||||
@ -1,2 +1,2 @@
|
|||||||
from .trial_scheduler import TrialScheduler, FIFOScheduler
|
from .trial_scheduler import TrialScheduler
|
||||||
from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler
|
from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler
|
||||||
|
|||||||
@ -1,12 +1,12 @@
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import logging
|
import logging
|
||||||
from typing import Optional, Dict
|
from typing import Dict
|
||||||
from flaml.scheduler import FIFOScheduler, TrialScheduler
|
from flaml.scheduler import TrialScheduler
|
||||||
from flaml.tune import Trial
|
from flaml.tune import Trial
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class OnlineScheduler(FIFOScheduler):
|
class OnlineScheduler(TrialScheduler):
|
||||||
"""Implementation of the OnlineFIFOSchedulers.
|
"""Implementation of the OnlineFIFOSchedulers.
|
||||||
|
|
||||||
Methods:
|
Methods:
|
||||||
|
|||||||
@ -17,10 +17,8 @@ This source file is adapted here because ray does not fully support Windows.
|
|||||||
|
|
||||||
Copyright (c) Microsoft Corporation.
|
Copyright (c) Microsoft Corporation.
|
||||||
'''
|
'''
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
from flaml.tune import trial_runner
|
from flaml.tune import trial_runner
|
||||||
from flaml.tune.result import DEFAULT_METRIC
|
|
||||||
from flaml.tune.trial import Trial
|
from flaml.tune.trial import Trial
|
||||||
|
|
||||||
|
|
||||||
@ -31,127 +29,10 @@ class TrialScheduler:
|
|||||||
PAUSE = "PAUSE" #: Status for pausing trial execution
|
PAUSE = "PAUSE" #: Status for pausing trial execution
|
||||||
STOP = "STOP" #: Status for stopping trial execution
|
STOP = "STOP" #: Status for stopping trial execution
|
||||||
|
|
||||||
_metric = None
|
|
||||||
|
|
||||||
@property
|
|
||||||
def metric(self):
|
|
||||||
return self._metric
|
|
||||||
|
|
||||||
def set_search_properties(self, metric: Optional[str],
|
|
||||||
mode: Optional[str]) -> bool:
|
|
||||||
"""Pass search properties to scheduler.
|
|
||||||
This method acts as an alternative to instantiating schedulers
|
|
||||||
that react to metrics with their own `metric` and `mode` parameters.
|
|
||||||
Args:
|
|
||||||
metric (str): Metric to optimize
|
|
||||||
mode (str): One of ["min", "max"]. Direction to optimize.
|
|
||||||
"""
|
|
||||||
if self._metric and metric:
|
|
||||||
return False
|
|
||||||
if metric:
|
|
||||||
self._metric = metric
|
|
||||||
|
|
||||||
if self._metric is None:
|
|
||||||
# Per default, use anonymous metric
|
|
||||||
self._metric = DEFAULT_METRIC
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
|
def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
|
||||||
trial: Trial):
|
trial: Trial):
|
||||||
"""Called when a new trial is added to the trial runner."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial):
|
|
||||||
"""Notification for the error of trial.
|
|
||||||
This will only be called when the trial is in the RUNNING state."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial, result: Dict) -> str:
|
|
||||||
"""Called on each intermediate result returned by a trial.
|
|
||||||
At this point, the trial scheduler can make a decision by returning
|
|
||||||
one of CONTINUE, PAUSE, and STOP. This will only be called when the
|
|
||||||
trial is in the RUNNING state."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial, result: Dict):
|
|
||||||
"""Notification for the completion of trial.
|
|
||||||
This will only be called when the trial is in the RUNNING state and
|
|
||||||
either completes naturally or by manual termination."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial):
|
|
||||||
"""Called to remove trial.
|
|
||||||
This is called when the trial is in PAUSED or PENDING state. Otherwise,
|
|
||||||
call `on_trial_complete`."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def choose_trial_to_run(
|
|
||||||
self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
|
|
||||||
"""Called to choose a new trial to run.
|
|
||||||
This should return one of the trials in trial_runner that is in
|
|
||||||
the PENDING or PAUSED state. This function must be idempotent.
|
|
||||||
If no trial is ready, return None."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def debug_string(self) -> str:
|
|
||||||
"""Returns a human readable message for printing to the console."""
|
|
||||||
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def save(self, checkpoint_path: str):
|
|
||||||
"""Save trial scheduler to a checkpoint"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def restore(self, checkpoint_path: str):
|
|
||||||
"""Restore trial scheduler from checkpoint."""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
|
|
||||||
class FIFOScheduler(TrialScheduler):
|
|
||||||
"""Simple scheduler that just runs trials in submission order."""
|
|
||||||
|
|
||||||
def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial, result: Dict) -> str:
|
|
||||||
return TrialScheduler.CONTINUE
|
|
||||||
|
|
||||||
def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
|
|
||||||
trial: Trial, result: Dict):
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
|
def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
|
||||||
trial: Trial):
|
trial: Trial):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def choose_trial_to_run(
|
|
||||||
self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
|
|
||||||
for trial in trial_runner.get_trials():
|
|
||||||
if (trial.status == Trial.PENDING
|
|
||||||
and trial_runner.has_resources_for_trial(trial)):
|
|
||||||
return trial
|
|
||||||
for trial in trial_runner.get_trials():
|
|
||||||
if (trial.status == Trial.PAUSED
|
|
||||||
and trial_runner.has_resources_for_trial(trial)):
|
|
||||||
return trial
|
|
||||||
return None
|
|
||||||
|
|
||||||
def debug_string(self) -> str:
|
|
||||||
return "Using FIFO scheduling algorithm."
|
|
||||||
|
|||||||
@ -14,14 +14,14 @@ try:
|
|||||||
assert ray_version >= '1.0.0'
|
assert ray_version >= '1.0.0'
|
||||||
from ray.tune.suggest import Searcher
|
from ray.tune.suggest import Searcher
|
||||||
from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
|
from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
|
||||||
from ray.tune.utils.util import unflatten_dict
|
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from .suggestion import Searcher
|
from .suggestion import Searcher
|
||||||
from .suggestion import OptunaSearch as GlobalSearch
|
from .suggestion import OptunaSearch as GlobalSearch
|
||||||
from ..tune.trial import unflatten_dict
|
from ..tune.trial import unflatten_dict, flatten_dict
|
||||||
from .search_thread import SearchThread
|
from .search_thread import SearchThread
|
||||||
from .flow2 import FLOW2
|
from .flow2 import FLOW2
|
||||||
from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func
|
from ..tune.space import (
|
||||||
|
add_cost_to_space, indexof, normalize, define_by_run_func)
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -40,9 +40,10 @@ class BlendSearch(Searcher):
|
|||||||
metric: Optional[str] = None,
|
metric: Optional[str] = None,
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
space: Optional[dict] = None,
|
space: Optional[dict] = None,
|
||||||
points_to_evaluate: Optional[List[dict]] = None,
|
|
||||||
low_cost_partial_config: Optional[dict] = None,
|
low_cost_partial_config: Optional[dict] = None,
|
||||||
cat_hp_cost: Optional[dict] = None,
|
cat_hp_cost: Optional[dict] = None,
|
||||||
|
points_to_evaluate: Optional[List[dict]] = None,
|
||||||
|
evaluated_rewards: Optional[List] = None,
|
||||||
prune_attr: Optional[str] = None,
|
prune_attr: Optional[str] = None,
|
||||||
min_resource: Optional[float] = None,
|
min_resource: Optional[float] = None,
|
||||||
max_resource: Optional[float] = None,
|
max_resource: Optional[float] = None,
|
||||||
@ -61,7 +62,6 @@ class BlendSearch(Searcher):
|
|||||||
mode: A string in ['min', 'max'] to specify the objective as
|
mode: A string in ['min', 'max'] to specify the objective as
|
||||||
minimization or maximization.
|
minimization or maximization.
|
||||||
space: A dictionary to specify the search space.
|
space: A dictionary to specify the search space.
|
||||||
points_to_evaluate: Initial parameter suggestions to be run first.
|
|
||||||
low_cost_partial_config: A dictionary from a subset of
|
low_cost_partial_config: A dictionary from a subset of
|
||||||
controlled dimensions to the initial low-cost values.
|
controlled dimensions to the initial low-cost values.
|
||||||
e.g.,
|
e.g.,
|
||||||
@ -80,6 +80,13 @@ class BlendSearch(Searcher):
|
|||||||
|
|
||||||
i.e., the relative cost of the
|
i.e., the relative cost of the
|
||||||
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
three choices of 'tree_method' is 1, 1 and 2 respectively.
|
||||||
|
points_to_evaluate: Initial parameter suggestions to be run first.
|
||||||
|
evaluated_rewards (list): If you have previously evaluated the
|
||||||
|
parameters passed in as points_to_evaluate you can avoid
|
||||||
|
re-running those trials by passing in the reward attributes
|
||||||
|
as a list so the optimiser can be told the results without
|
||||||
|
needing to re-compute the trial. Must be the same length as
|
||||||
|
points_to_evaluate.
|
||||||
prune_attr: A string of the attribute used for pruning.
|
prune_attr: A string of the attribute used for pruning.
|
||||||
Not necessarily in space.
|
Not necessarily in space.
|
||||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||||
@ -122,7 +129,20 @@ class BlendSearch(Searcher):
|
|||||||
"consider providing low-cost values for cost-related hps via "
|
"consider providing low-cost values for cost-related hps via "
|
||||||
"'low_cost_partial_config'."
|
"'low_cost_partial_config'."
|
||||||
)
|
)
|
||||||
self._points_to_evaluate = points_to_evaluate or []
|
if evaluated_rewards and mode:
|
||||||
|
self._points_to_evaluate = []
|
||||||
|
self._evaluated_rewards = []
|
||||||
|
best = max(evaluated_rewards) if mode == 'max' else min(
|
||||||
|
evaluated_rewards)
|
||||||
|
# only keep the best points as start points
|
||||||
|
for i, r in enumerate(evaluated_rewards):
|
||||||
|
if r == best:
|
||||||
|
p = points_to_evaluate[i]
|
||||||
|
self._points_to_evaluate.append(p)
|
||||||
|
self._evaluated_rewards.append(r)
|
||||||
|
else:
|
||||||
|
self._points_to_evaluate = points_to_evaluate or []
|
||||||
|
self._evaluated_rewards = evaluated_rewards or []
|
||||||
self._config_constraints = config_constraints
|
self._config_constraints = config_constraints
|
||||||
self._metric_constraints = metric_constraints
|
self._metric_constraints = metric_constraints
|
||||||
if self._metric_constraints:
|
if self._metric_constraints:
|
||||||
@ -131,40 +151,45 @@ class BlendSearch(Searcher):
|
|||||||
self._cat_hp_cost = cat_hp_cost or {}
|
self._cat_hp_cost = cat_hp_cost or {}
|
||||||
if space:
|
if space:
|
||||||
add_cost_to_space(space, init_config, self._cat_hp_cost)
|
add_cost_to_space(space, init_config, self._cat_hp_cost)
|
||||||
|
self._ls = self.LocalSearch(
|
||||||
|
init_config, metric, mode, space, prune_attr,
|
||||||
|
min_resource, max_resource, reduction_factor, self.cost_attr, seed)
|
||||||
if global_search_alg is not None:
|
if global_search_alg is not None:
|
||||||
self._gs = global_search_alg
|
self._gs = global_search_alg
|
||||||
elif getattr(self, '__name__', None) != 'CFO':
|
elif getattr(self, '__name__', None) != 'CFO':
|
||||||
from functools import partial
|
if space and self._ls.hierarchical:
|
||||||
gs_space = partial(define_by_run_func, space=space)
|
from functools import partial
|
||||||
|
gs_space = partial(define_by_run_func, space=space)
|
||||||
|
evaluated_rewards = None # not supproted by define-by-run
|
||||||
|
else:
|
||||||
|
gs_space = space
|
||||||
|
gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
|
||||||
|
if experimental:
|
||||||
|
import optuna as ot
|
||||||
|
sampler = ot.samplers.TPESampler(
|
||||||
|
seed=seed, multivariate=True, group=True)
|
||||||
|
else:
|
||||||
|
sampler = None
|
||||||
try:
|
try:
|
||||||
gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
|
self._gs = GlobalSearch(
|
||||||
if experimental:
|
space=gs_space, metric=metric, mode=mode, seed=gs_seed,
|
||||||
import optuna as ot
|
sampler=sampler, points_to_evaluate=points_to_evaluate,
|
||||||
sampler = ot.samplers.TPESampler(
|
evaluated_rewards=evaluated_rewards)
|
||||||
seed=seed, multivariate=True, group=True)
|
except ValueError:
|
||||||
else:
|
|
||||||
sampler = None
|
|
||||||
self._gs = GlobalSearch(
|
self._gs = GlobalSearch(
|
||||||
space=gs_space, metric=metric, mode=mode, seed=gs_seed,
|
space=gs_space, metric=metric, mode=mode, seed=gs_seed,
|
||||||
sampler=sampler)
|
sampler=sampler)
|
||||||
except TypeError:
|
|
||||||
self._gs = GlobalSearch(space=gs_space, metric=metric, mode=mode)
|
|
||||||
self._gs.space = space
|
self._gs.space = space
|
||||||
else:
|
else:
|
||||||
self._gs = None
|
self._gs = None
|
||||||
self._experimental = experimental
|
self._experimental = experimental
|
||||||
if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len(
|
if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len(
|
||||||
points_to_evaluate) > 1:
|
self._points_to_evaluate) > 1:
|
||||||
# use the best config in points_to_evaluate as the start point
|
# use the best config in points_to_evaluate as the start point
|
||||||
self._candidate_start_points = {}
|
self._candidate_start_points = {}
|
||||||
self._started_from_low_cost = not low_cost_partial_config
|
self._started_from_low_cost = not low_cost_partial_config
|
||||||
else:
|
else:
|
||||||
self._candidate_start_points = None
|
self._candidate_start_points = None
|
||||||
self._ls = self.LocalSearch(
|
|
||||||
init_config, metric, mode, space, prune_attr,
|
|
||||||
min_resource, max_resource, reduction_factor, self.cost_attr, seed)
|
|
||||||
self._is_ls_ever_converged = False
|
|
||||||
self._subspace = {} # the subspace for each trial id
|
|
||||||
if space:
|
if space:
|
||||||
self._init_search()
|
self._init_search()
|
||||||
|
|
||||||
@ -187,6 +212,7 @@ class BlendSearch(Searcher):
|
|||||||
if not self._ls.space:
|
if not self._ls.space:
|
||||||
# the search space can be set only once
|
# the search space can be set only once
|
||||||
if self._gs is not None:
|
if self._gs is not None:
|
||||||
|
# define-by-run is not supported via set_search_properties
|
||||||
self._gs.set_search_properties(metric, mode, config)
|
self._gs.set_search_properties(metric, mode, config)
|
||||||
self._gs.space = config
|
self._gs.space = config
|
||||||
if config:
|
if config:
|
||||||
@ -216,6 +242,8 @@ class BlendSearch(Searcher):
|
|||||||
def _init_search(self):
|
def _init_search(self):
|
||||||
'''initialize the search
|
'''initialize the search
|
||||||
'''
|
'''
|
||||||
|
self._is_ls_ever_converged = False
|
||||||
|
self._subspace = {} # the subspace for each trial id
|
||||||
self._metric_target = np.inf * self._ls.metric_op
|
self._metric_target = np.inf * self._ls.metric_op
|
||||||
self._search_thread_pool = {
|
self._search_thread_pool = {
|
||||||
# id: int -> thread: SearchThread
|
# id: int -> thread: SearchThread
|
||||||
@ -239,6 +267,7 @@ class BlendSearch(Searcher):
|
|||||||
else:
|
else:
|
||||||
self._metric_constraint_satisfied = True
|
self._metric_constraint_satisfied = True
|
||||||
self._metric_constraint_penalty = None
|
self._metric_constraint_penalty = None
|
||||||
|
self.best_resource = self._ls.min_resource
|
||||||
|
|
||||||
def save(self, checkpoint_path: str):
|
def save(self, checkpoint_path: str):
|
||||||
''' save states to a checkpoint path
|
''' save states to a checkpoint path
|
||||||
@ -295,10 +324,11 @@ class BlendSearch(Searcher):
|
|||||||
trial_id, result, error)
|
trial_id, result, error)
|
||||||
del self._trial_proposed_by[trial_id]
|
del self._trial_proposed_by[trial_id]
|
||||||
if result:
|
if result:
|
||||||
config = {}
|
config = result.get('config', {})
|
||||||
for key, value in result.items():
|
if not config:
|
||||||
if key.startswith('config/'):
|
for key, value in result.items():
|
||||||
config[key[7:]] = value
|
if key.startswith('config/'):
|
||||||
|
config[key[7:]] = value
|
||||||
signature = self._ls.config_signature(
|
signature = self._ls.config_signature(
|
||||||
config, self._subspace.get(trial_id, {}))
|
config, self._subspace.get(trial_id, {}))
|
||||||
if error: # remove from result cache
|
if error: # remove from result cache
|
||||||
@ -309,17 +339,22 @@ class BlendSearch(Searcher):
|
|||||||
objective = result[self._ls.metric]
|
objective = result[self._ls.metric]
|
||||||
if (objective - self._metric_target) * self._ls.metric_op < 0:
|
if (objective - self._metric_target) * self._ls.metric_op < 0:
|
||||||
self._metric_target = objective
|
self._metric_target = objective
|
||||||
|
if self._ls.resource:
|
||||||
|
self._best_resource = config[self._ls.prune_attr]
|
||||||
if thread_id:
|
if thread_id:
|
||||||
if not self._metric_constraint_satisfied:
|
if not self._metric_constraint_satisfied:
|
||||||
# no point has been found to satisfy metric constraint
|
# no point has been found to satisfy metric constraint
|
||||||
self._expand_admissible_region(
|
self._expand_admissible_region(
|
||||||
self._ls_bound_min, self._ls_bound_max,
|
self._ls_bound_min, self._ls_bound_max,
|
||||||
self._subspace.get(trial_id, self._ls.space))
|
self._subspace.get(trial_id, self._ls.space))
|
||||||
# if self._gs is not None and self._experimental:
|
if self._gs is not None and self._experimental and (
|
||||||
# # TODO: recover when supported
|
not self._ls.hierarchical):
|
||||||
# converted = convert_key(config, self._gs.space)
|
self._gs.add_evaluated_point(
|
||||||
# logger.info(converted)
|
flatten_dict(config), objective)
|
||||||
# self._gs.add_evaluated_point(converted, objective)
|
# TODO: recover when supported
|
||||||
|
# converted = convert_key(config, self._gs.space)
|
||||||
|
# logger.info(converted)
|
||||||
|
# self._gs.add_evaluated_point(converted, objective)
|
||||||
elif metric_constraint_satisfied and self._create_condition(
|
elif metric_constraint_satisfied and self._create_condition(
|
||||||
result):
|
result):
|
||||||
# thread creator
|
# thread creator
|
||||||
@ -496,10 +531,12 @@ class BlendSearch(Searcher):
|
|||||||
'''
|
'''
|
||||||
if self._init_used and not self._points_to_evaluate:
|
if self._init_used and not self._points_to_evaluate:
|
||||||
choice, backup = self._select_thread()
|
choice, backup = self._select_thread()
|
||||||
if choice < 0: # timeout
|
# if choice < 0: # timeout
|
||||||
return None
|
# return None
|
||||||
config = self._search_thread_pool[choice].suggest(trial_id)
|
config = self._search_thread_pool[choice].suggest(trial_id)
|
||||||
if choice and config is None:
|
if not choice and config is not None and self._ls.resource:
|
||||||
|
config[self._ls.prune_attr] = self.best_resource
|
||||||
|
elif choice and config is None:
|
||||||
# local search thread finishes
|
# local search thread finishes
|
||||||
if self._search_thread_pool[choice].converged:
|
if self._search_thread_pool[choice].converged:
|
||||||
self._expand_admissible_region(
|
self._expand_admissible_region(
|
||||||
@ -544,9 +581,6 @@ class BlendSearch(Searcher):
|
|||||||
self._trial_proposed_by[trial_id] = backup
|
self._trial_proposed_by[trial_id] = backup
|
||||||
choice = backup
|
choice = backup
|
||||||
if not choice: # global search
|
if not choice: # global search
|
||||||
if self._ls._resource:
|
|
||||||
# TODO: min or median?
|
|
||||||
config[self._ls.prune_attr] = self._ls.min_resource
|
|
||||||
# temporarily relax admissible region for parallel proposals
|
# temporarily relax admissible region for parallel proposals
|
||||||
self._update_admissible_region(
|
self._update_admissible_region(
|
||||||
config, self._gs_admissible_min, self._gs_admissible_max,
|
config, self._gs_admissible_min, self._gs_admissible_max,
|
||||||
@ -563,22 +597,35 @@ class BlendSearch(Searcher):
|
|||||||
else: # use init config
|
else: # use init config
|
||||||
if self._candidate_start_points is not None and self._points_to_evaluate:
|
if self._candidate_start_points is not None and self._points_to_evaluate:
|
||||||
self._candidate_start_points[trial_id] = None
|
self._candidate_start_points[trial_id] = None
|
||||||
init_config = self._points_to_evaluate.pop(
|
reward = None
|
||||||
0) if self._points_to_evaluate else self._ls.init_config
|
if self._points_to_evaluate:
|
||||||
|
init_config = self._points_to_evaluate.pop(0)
|
||||||
|
if self._evaluated_rewards:
|
||||||
|
reward = self._evaluated_rewards.pop(0)
|
||||||
|
else:
|
||||||
|
init_config = self._ls.init_config
|
||||||
config, space = self._ls.complete_config(
|
config, space = self._ls.complete_config(
|
||||||
init_config, self._ls_bound_min, self._ls_bound_max)
|
init_config, self._ls_bound_min, self._ls_bound_max)
|
||||||
config_signature = self._ls.config_signature(config, space)
|
if reward is None:
|
||||||
result = self._result.get(config_signature)
|
config_signature = self._ls.config_signature(config, space)
|
||||||
if result: # tried before
|
result = self._result.get(config_signature)
|
||||||
return None
|
if result: # tried before
|
||||||
elif result is None: # not tried before
|
return None
|
||||||
self._result[config_signature] = {}
|
elif result is None: # not tried before
|
||||||
else: # running but no result yet
|
self._result[config_signature] = {}
|
||||||
return None
|
else: # running but no result yet
|
||||||
|
return None
|
||||||
self._init_used = True
|
self._init_used = True
|
||||||
self._trial_proposed_by[trial_id] = 0
|
self._trial_proposed_by[trial_id] = 0
|
||||||
self._search_thread_pool[0].running += 1
|
self._search_thread_pool[0].running += 1
|
||||||
self._subspace[trial_id] = space
|
self._subspace[trial_id] = space
|
||||||
|
if reward is not None:
|
||||||
|
result = {
|
||||||
|
self._metric: reward, self.cost_attr: 1,
|
||||||
|
'config': config
|
||||||
|
}
|
||||||
|
self.on_trial_complete(trial_id, result)
|
||||||
|
return None
|
||||||
return config
|
return config
|
||||||
|
|
||||||
def _should_skip(self, choice, trial_id, config, space) -> bool:
|
def _should_skip(self, choice, trial_id, config, space) -> bool:
|
||||||
@ -694,79 +741,88 @@ except (ImportError, AssertionError):
|
|||||||
try:
|
try:
|
||||||
from nni.tuner import Tuner as NNITuner
|
from nni.tuner import Tuner as NNITuner
|
||||||
from nni.utils import extract_scalar_reward
|
from nni.utils import extract_scalar_reward
|
||||||
|
|
||||||
class BlendSearchTuner(BlendSearch, NNITuner):
|
|
||||||
'''Tuner class for NNI
|
|
||||||
'''
|
|
||||||
|
|
||||||
def receive_trial_result(self, parameter_id, parameters, value,
|
|
||||||
**kwargs):
|
|
||||||
'''
|
|
||||||
Receive trial's final result.
|
|
||||||
parameter_id: int
|
|
||||||
parameters: object created by 'generate_parameters()'
|
|
||||||
value: final metrics of the trial, including default metric
|
|
||||||
'''
|
|
||||||
result = {}
|
|
||||||
for key, value in parameters.items():
|
|
||||||
result['config/' + key] = value
|
|
||||||
reward = extract_scalar_reward(value)
|
|
||||||
result[self._metric] = reward
|
|
||||||
# if nni does not report training cost,
|
|
||||||
# using sequence as an approximation.
|
|
||||||
# if no sequence, using a constant 1
|
|
||||||
result[self.cost_attr] = value.get(self.cost_attr, value.get(
|
|
||||||
'sequence', 1))
|
|
||||||
self.on_trial_complete(str(parameter_id), result)
|
|
||||||
...
|
|
||||||
|
|
||||||
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
|
|
||||||
'''
|
|
||||||
Returns a set of trial (hyper-)parameters, as a serializable object
|
|
||||||
parameter_id: int
|
|
||||||
'''
|
|
||||||
return self.suggest(str(parameter_id))
|
|
||||||
...
|
|
||||||
|
|
||||||
def update_search_space(self, search_space):
|
|
||||||
'''
|
|
||||||
Tuners are advised to support updating search space at run-time.
|
|
||||||
If a tuner can only set search space once before generating first hyper-parameters,
|
|
||||||
it should explicitly document this behaviour.
|
|
||||||
search_space: JSON object created by experiment owner
|
|
||||||
'''
|
|
||||||
config = {}
|
|
||||||
for key, value in search_space.items():
|
|
||||||
v = value.get("_value")
|
|
||||||
_type = value['_type']
|
|
||||||
if _type == 'choice':
|
|
||||||
config[key] = choice(v)
|
|
||||||
elif _type == 'randint':
|
|
||||||
config[key] = randint(v[0], v[1] - 1)
|
|
||||||
elif _type == 'uniform':
|
|
||||||
config[key] = uniform(v[0], v[1])
|
|
||||||
elif _type == 'quniform':
|
|
||||||
config[key] = quniform(v[0], v[1], v[2])
|
|
||||||
elif _type == 'loguniform':
|
|
||||||
config[key] = loguniform(v[0], v[1])
|
|
||||||
elif _type == 'qloguniform':
|
|
||||||
config[key] = qloguniform(v[0], v[1], v[2])
|
|
||||||
elif _type == 'normal':
|
|
||||||
config[key] = randn(v[1], v[2])
|
|
||||||
elif _type == 'qnormal':
|
|
||||||
config[key] = qrandn(v[1], v[2], v[3])
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
f'unsupported type in search_space {_type}')
|
|
||||||
self._ls.set_search_properties(None, None, config)
|
|
||||||
if self._gs is not None:
|
|
||||||
self._gs.set_search_properties(None, None, config)
|
|
||||||
self._init_search()
|
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
class BlendSearchTuner(BlendSearch):
|
class NNITuner:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def extract_scalar_reward(x: Dict):
|
||||||
|
return x.get('reward')
|
||||||
|
|
||||||
|
|
||||||
|
class BlendSearchTuner(BlendSearch, NNITuner):
|
||||||
|
'''Tuner class for NNI
|
||||||
|
'''
|
||||||
|
|
||||||
|
def receive_trial_result(self, parameter_id, parameters, value,
|
||||||
|
**kwargs):
|
||||||
|
'''
|
||||||
|
Receive trial's final result.
|
||||||
|
parameter_id: int
|
||||||
|
parameters: object created by 'generate_parameters()'
|
||||||
|
value: final metrics of the trial, including default metric
|
||||||
|
'''
|
||||||
|
result = {}
|
||||||
|
for k, v in parameters.items():
|
||||||
|
result['config/' + k] = v
|
||||||
|
reward = extract_scalar_reward(value)
|
||||||
|
result[self._metric] = reward
|
||||||
|
# if nni does not report training cost,
|
||||||
|
# using sequence as an approximation.
|
||||||
|
# if no sequence, using a constant 1
|
||||||
|
result[self.cost_attr] = value.get(self.cost_attr, value.get(
|
||||||
|
'sequence', 1))
|
||||||
|
self.on_trial_complete(str(parameter_id), result)
|
||||||
|
...
|
||||||
|
|
||||||
|
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
|
||||||
|
'''
|
||||||
|
Returns a set of trial (hyper-)parameters, as a serializable object
|
||||||
|
parameter_id: int
|
||||||
|
'''
|
||||||
|
return self.suggest(str(parameter_id))
|
||||||
|
...
|
||||||
|
|
||||||
|
def update_search_space(self, search_space):
|
||||||
|
'''
|
||||||
|
Tuners are advised to support updating search space at run-time.
|
||||||
|
If a tuner can only set search space once before generating first hyper-parameters,
|
||||||
|
it should explicitly document this behaviour.
|
||||||
|
search_space: JSON object created by experiment owner
|
||||||
|
'''
|
||||||
|
config = {}
|
||||||
|
for key, value in search_space.items():
|
||||||
|
v = value.get("_value")
|
||||||
|
_type = value['_type']
|
||||||
|
if _type == 'choice':
|
||||||
|
config[key] = choice(v)
|
||||||
|
elif _type == 'randint':
|
||||||
|
config[key] = randint(*v)
|
||||||
|
elif _type == 'uniform':
|
||||||
|
config[key] = uniform(*v)
|
||||||
|
elif _type == 'quniform':
|
||||||
|
config[key] = quniform(*v)
|
||||||
|
elif _type == 'loguniform':
|
||||||
|
config[key] = loguniform(*v)
|
||||||
|
elif _type == 'qloguniform':
|
||||||
|
config[key] = qloguniform(*v)
|
||||||
|
elif _type == 'normal':
|
||||||
|
config[key] = randn(*v)
|
||||||
|
elif _type == 'qnormal':
|
||||||
|
config[key] = qrandn(*v)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
f'unsupported type in search_space {_type}')
|
||||||
|
add_cost_to_space(config, {}, {})
|
||||||
|
self._ls = self.LocalSearch(
|
||||||
|
{}, self._ls.metric, self._mode, config, cost_attr=self.cost_attr,
|
||||||
|
seed=self._ls.seed)
|
||||||
|
if self._gs is not None:
|
||||||
|
self._gs = GlobalSearch(
|
||||||
|
space=config, metric=self._metric, mode=self._mode,
|
||||||
|
sampler=self._gs._sampler)
|
||||||
|
self._gs.space = config
|
||||||
|
self._init_search()
|
||||||
|
|
||||||
|
|
||||||
class CFO(BlendSearchTuner):
|
class CFO(BlendSearchTuner):
|
||||||
''' class for CFO algorithm
|
''' class for CFO algorithm
|
||||||
|
|||||||
@ -15,8 +15,9 @@ try:
|
|||||||
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
from ray.tune.utils.util import flatten_dict, unflatten_dict
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from .suggestion import Searcher
|
from .suggestion import Searcher
|
||||||
from .variant_generator import generate_variants, flatten_dict, unflatten_dict
|
from .variant_generator import generate_variants
|
||||||
from ..tune import sample
|
from ..tune import sample
|
||||||
|
from ..tune.trial import flatten_dict, unflatten_dict
|
||||||
from ..tune.space import complete_config, denormalize, normalize
|
from ..tune.space import complete_config, denormalize, normalize
|
||||||
|
|
||||||
|
|
||||||
@ -95,7 +96,7 @@ class FLOW2(Searcher):
|
|||||||
self.space = space or {}
|
self.space = space or {}
|
||||||
self._space = flatten_dict(self.space, prevent_delimiter=True)
|
self._space = flatten_dict(self.space, prevent_delimiter=True)
|
||||||
self._random = np.random.RandomState(seed)
|
self._random = np.random.RandomState(seed)
|
||||||
self._seed = seed
|
self.seed = seed
|
||||||
self.init_config = init_config
|
self.init_config = init_config
|
||||||
self.best_config = flatten_dict(init_config)
|
self.best_config = flatten_dict(init_config)
|
||||||
self.prune_attr = prune_attr
|
self.prune_attr = prune_attr
|
||||||
@ -142,7 +143,7 @@ class FLOW2(Searcher):
|
|||||||
self._bounded_keys.append(key)
|
self._bounded_keys.append(key)
|
||||||
if not hier:
|
if not hier:
|
||||||
self._space_keys = sorted(self._tunable_keys)
|
self._space_keys = sorted(self._tunable_keys)
|
||||||
self._hierarchical = hier
|
self.hierarchical = hier
|
||||||
if (self.prune_attr and self.prune_attr not in self._space
|
if (self.prune_attr and self.prune_attr not in self._space
|
||||||
and self.max_resource):
|
and self.max_resource):
|
||||||
self.min_resource = self.min_resource or self._min_resource()
|
self.min_resource = self.min_resource or self._min_resource()
|
||||||
@ -253,10 +254,10 @@ class FLOW2(Searcher):
|
|||||||
init_config, self.metric, self.mode,
|
init_config, self.metric, self.mode,
|
||||||
space, self.prune_attr,
|
space, self.prune_attr,
|
||||||
self.min_resource, self.max_resource,
|
self.min_resource, self.max_resource,
|
||||||
self.resource_multiple_factor, self.cost_attr, self._seed + 1)
|
self.resource_multiple_factor, self.cost_attr, self.seed + 1)
|
||||||
flow2.best_obj = obj * self.metric_op # minimize internally
|
flow2.best_obj = obj * self.metric_op # minimize internally
|
||||||
flow2.cost_incumbent = cost
|
flow2.cost_incumbent = cost
|
||||||
self._seed += 1
|
self.seed += 1
|
||||||
return flow2
|
return flow2
|
||||||
|
|
||||||
def normalize(self, config, recursive=False) -> Dict:
|
def normalize(self, config, recursive=False) -> Dict:
|
||||||
@ -502,7 +503,7 @@ class FLOW2(Searcher):
|
|||||||
value_list = []
|
value_list = []
|
||||||
# self._space_keys doesn't contain keys with const values,
|
# self._space_keys doesn't contain keys with const values,
|
||||||
# e.g., "eval_metric": ["logloss", "error"].
|
# e.g., "eval_metric": ["logloss", "error"].
|
||||||
keys = sorted(config.keys()) if self._hierarchical else self._space_keys
|
keys = sorted(config.keys()) if self.hierarchical else self._space_keys
|
||||||
for key in keys:
|
for key in keys:
|
||||||
value = config[key]
|
value = config[key]
|
||||||
if key == self.prune_attr:
|
if key == self.prune_attr:
|
||||||
@ -510,7 +511,7 @@ class FLOW2(Searcher):
|
|||||||
else:
|
else:
|
||||||
# key must be in space
|
# key must be in space
|
||||||
domain = space[key]
|
domain = space[key]
|
||||||
if self._hierarchical:
|
if self.hierarchical:
|
||||||
# can't remove constant for hierarchical search space,
|
# can't remove constant for hierarchical search space,
|
||||||
# e.g., learner
|
# e.g., learner
|
||||||
if not (domain is None or type(domain) in (str, int, float)
|
if not (domain is None or type(domain) in (str, int, float)
|
||||||
|
|||||||
@ -12,7 +12,7 @@ try:
|
|||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from .suggestion import Searcher
|
from .suggestion import Searcher
|
||||||
from .flow2 import FLOW2
|
from .flow2 import FLOW2
|
||||||
from ..tune.space import unflatten_hierarchical
|
from ..tune.space import add_cost_to_space, unflatten_hierarchical
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -46,6 +46,11 @@ class SearchThread:
|
|||||||
self.cost_attr = cost_attr
|
self.cost_attr = cost_attr
|
||||||
if search_alg:
|
if search_alg:
|
||||||
self.space = self._space = search_alg.space # unflattened space
|
self.space = self._space = search_alg.space # unflattened space
|
||||||
|
if self.space and not isinstance(search_alg, FLOW2) and isinstance(
|
||||||
|
search_alg._space, dict
|
||||||
|
):
|
||||||
|
# remember const config
|
||||||
|
self._const = add_cost_to_space(self.space, {}, {})
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def set_eps(cls, time_budget_s):
|
def set_eps(cls, time_budget_s):
|
||||||
@ -59,7 +64,12 @@ class SearchThread:
|
|||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
config = self._search_alg.suggest(trial_id)
|
config = self._search_alg.suggest(trial_id)
|
||||||
config, self.space = unflatten_hierarchical(config, self._space)
|
if isinstance(self._search_alg._space, dict):
|
||||||
|
config.update(self._const)
|
||||||
|
else:
|
||||||
|
# define by run
|
||||||
|
config, self.space = unflatten_hierarchical(
|
||||||
|
config, self._space)
|
||||||
except FloatingPointError:
|
except FloatingPointError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
'The global search method raises FloatingPointError. '
|
'The global search method raises FloatingPointError. '
|
||||||
|
|||||||
@ -91,15 +91,6 @@ class Searcher:
|
|||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
max_concurrent: Optional[int] = None,
|
max_concurrent: Optional[int] = None,
|
||||||
use_early_stopped_trials: Optional[bool] = None):
|
use_early_stopped_trials: Optional[bool] = None):
|
||||||
if use_early_stopped_trials is False:
|
|
||||||
raise DeprecationWarning(
|
|
||||||
"Early stopped trials are now always used. If this is a "
|
|
||||||
"problem, file an issue: https://github.com/ray-project/ray.")
|
|
||||||
if max_concurrent is not None:
|
|
||||||
logger.warning(
|
|
||||||
"DeprecationWarning: `max_concurrent` is deprecated for this "
|
|
||||||
"search algorithm. Use tune.suggest.ConcurrencyLimiter() "
|
|
||||||
"instead. This will raise an error in future versions of Ray.")
|
|
||||||
|
|
||||||
self._metric = metric
|
self._metric = metric
|
||||||
self._mode = mode
|
self._mode = mode
|
||||||
@ -152,83 +143,6 @@ class Searcher:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def on_trial_complete(self,
|
|
||||||
trial_id: str,
|
|
||||||
result: Optional[Dict] = None,
|
|
||||||
error: bool = False):
|
|
||||||
"""Notification for the completion of trial.
|
|
||||||
Typically, this method is used for notifying the underlying
|
|
||||||
optimizer of the result.
|
|
||||||
Args:
|
|
||||||
trial_id (str): A unique string ID for the trial.
|
|
||||||
result (dict): Dictionary of metrics for current training progress.
|
|
||||||
Note that the result dict may include NaNs or
|
|
||||||
may not include the optimization metric. It is up to the
|
|
||||||
subclass implementation to preprocess the result to
|
|
||||||
avoid breaking the optimization process. Upon errors, this
|
|
||||||
may also be None.
|
|
||||||
error (bool): True if the training process raised an error.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def suggest(self, trial_id: str) -> Optional[Dict]:
|
|
||||||
"""Queries the algorithm to retrieve the next set of parameters.
|
|
||||||
Arguments:
|
|
||||||
trial_id (str): Trial ID used for subsequent notifications.
|
|
||||||
Returns:
|
|
||||||
dict | FINISHED | None: Configuration for a trial, if possible.
|
|
||||||
If FINISHED is returned, Tune will be notified that
|
|
||||||
no more suggestions/configurations will be provided.
|
|
||||||
If None is returned, Tune will skip the querying of the
|
|
||||||
searcher for this step.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def save(self, checkpoint_path: str):
|
|
||||||
"""Save state to path for this search algorithm.
|
|
||||||
Args:
|
|
||||||
checkpoint_path (str): File where the search algorithm
|
|
||||||
state is saved. This path should be used later when
|
|
||||||
restoring from file.
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
search_alg = Searcher(...)
|
|
||||||
analysis = tune.run(
|
|
||||||
cost,
|
|
||||||
num_samples=5,
|
|
||||||
search_alg=search_alg,
|
|
||||||
name=self.experiment_name,
|
|
||||||
local_dir=self.tmpdir)
|
|
||||||
search_alg.save("./my_favorite_path.pkl")
|
|
||||||
.. versionchanged:: 0.8.7
|
|
||||||
Save is automatically called by `tune.run`. You can use
|
|
||||||
`restore_from_dir` to restore from an experiment directory
|
|
||||||
such as `~/ray_results/trainable`.
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def restore(self, checkpoint_path: str):
|
|
||||||
"""Restore state for this search algorithm
|
|
||||||
Args:
|
|
||||||
checkpoint_path (str): File where the search algorithm
|
|
||||||
state is saved. This path should be the same
|
|
||||||
as the one provided to "save".
|
|
||||||
Example:
|
|
||||||
.. code-block:: python
|
|
||||||
search_alg.save("./my_favorite_path.pkl")
|
|
||||||
search_alg2 = Searcher(...)
|
|
||||||
search_alg2 = ConcurrencyLimiter(search_alg2, 1)
|
|
||||||
search_alg2.restore(checkpoint_path)
|
|
||||||
tune.run(cost, num_samples=5, search_alg=search_alg2)
|
|
||||||
"""
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def get_state(self) -> Dict:
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
def set_state(self, state: Dict):
|
|
||||||
raise NotImplementedError
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def metric(self) -> str:
|
def metric(self) -> str:
|
||||||
"""The training result objective value attribute."""
|
"""The training result objective value attribute."""
|
||||||
@ -536,14 +450,6 @@ class OptunaSearch(Searcher):
|
|||||||
# Flatten to support nested dicts
|
# Flatten to support nested dicts
|
||||||
space = flatten_dict(space, "/")
|
space = flatten_dict(space, "/")
|
||||||
|
|
||||||
# Deprecate: 1.5
|
|
||||||
if isinstance(space, list):
|
|
||||||
logger.warning(
|
|
||||||
"Passing lists of `param.suggest_*()` calls to OptunaSearch "
|
|
||||||
"as a search space is deprecated and will be removed in "
|
|
||||||
"a future release of Ray. Please pass a dict mapping "
|
|
||||||
"to `optuna.distributions` objects instead.")
|
|
||||||
|
|
||||||
self._space = space
|
self._space = space
|
||||||
|
|
||||||
self._points_to_evaluate = points_to_evaluate or []
|
self._points_to_evaluate = points_to_evaluate or []
|
||||||
|
|||||||
@ -19,57 +19,16 @@ Copyright (c) Microsoft Corporation.
|
|||||||
'''
|
'''
|
||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Mapping
|
from typing import Any, Dict, Generator, List, Tuple
|
||||||
from typing import Any, Dict, Generator, List, Optional, Tuple
|
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from ..tune.sample import Categorical, Domain, Function
|
from ..tune.sample import Categorical, Domain
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
|
|
||||||
dt = copy.deepcopy(dt)
|
|
||||||
if prevent_delimiter and any(delimiter in key for key in dt):
|
|
||||||
# Raise if delimiter is any of the keys
|
|
||||||
raise ValueError(
|
|
||||||
"Found delimiter `{}` in key when trying to flatten array."
|
|
||||||
"Please avoid using the delimiter in your specification.")
|
|
||||||
while any(isinstance(v, dict) for v in dt.values()):
|
|
||||||
remove = []
|
|
||||||
add = {}
|
|
||||||
for key, value in dt.items():
|
|
||||||
if isinstance(value, dict):
|
|
||||||
for subkey, v in value.items():
|
|
||||||
if prevent_delimiter and delimiter in subkey:
|
|
||||||
# Raise if delimiter is in any of the subkeys
|
|
||||||
raise ValueError(
|
|
||||||
"Found delimiter `{}` in key when trying to "
|
|
||||||
"flatten array. Please avoid using the delimiter "
|
|
||||||
"in your specification.")
|
|
||||||
add[delimiter.join([key, str(subkey)])] = v
|
|
||||||
remove.append(key)
|
|
||||||
dt.update(add)
|
|
||||||
for k in remove:
|
|
||||||
del dt[k]
|
|
||||||
return dt
|
|
||||||
|
|
||||||
|
|
||||||
def unflatten_dict(dt, delimiter="/"):
|
|
||||||
"""Unflatten dict. Does not support unflattening lists."""
|
|
||||||
dict_type = type(dt)
|
|
||||||
out = dict_type()
|
|
||||||
for key, val in dt.items():
|
|
||||||
path = key.split(delimiter)
|
|
||||||
item = out
|
|
||||||
for k in path[:-1]:
|
|
||||||
item = item.setdefault(k, dict_type())
|
|
||||||
item[path[-1]] = val
|
|
||||||
return out
|
|
||||||
|
|
||||||
|
|
||||||
class TuneError(Exception):
|
class TuneError(Exception):
|
||||||
"""General error class raised by ray.tune."""
|
"""General error class raised by ray.tune."""
|
||||||
pass
|
pass
|
||||||
@ -84,16 +43,9 @@ def generate_variants(
|
|||||||
variants in combination:
|
variants in combination:
|
||||||
"activation": grid_search(["relu", "tanh"])
|
"activation": grid_search(["relu", "tanh"])
|
||||||
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
"learning_rate": grid_search([1e-3, 1e-4, 1e-5])
|
||||||
Lambda functions: These are evaluated to produce a concrete value, and
|
|
||||||
can express dependencies or conditional distributions between values.
|
|
||||||
They can also be used to express random search (e.g., by calling
|
|
||||||
into the `random` or `np` module).
|
|
||||||
"cpu": lambda spec: spec.config.num_workers
|
|
||||||
"batch_size": lambda spec: random.uniform(1, 1000)
|
|
||||||
Finally, to support defining specs in plain JSON / YAML, grid search
|
Finally, to support defining specs in plain JSON / YAML, grid search
|
||||||
and lambda functions can also be defined alternatively as follows:
|
can also be defined alternatively as follows:
|
||||||
"activation": {"grid_search": ["relu", "tanh"]}
|
"activation": {"grid_search": ["relu", "tanh"]}
|
||||||
"cpu": {"eval": "spec.config.num_workers"}
|
|
||||||
Use `format_vars` to format the returned dict of hyperparameters.
|
Use `format_vars` to format the returned dict of hyperparameters.
|
||||||
Yields:
|
Yields:
|
||||||
(Dict of resolved variables, Spec object)
|
(Dict of resolved variables, Spec object)
|
||||||
@ -242,10 +194,6 @@ def _try_resolve(v) -> Tuple[bool, Any]:
|
|||||||
if isinstance(v, Domain):
|
if isinstance(v, Domain):
|
||||||
# Domain to sample from
|
# Domain to sample from
|
||||||
return False, v
|
return False, v
|
||||||
elif isinstance(v, dict) and len(v) == 1 and "eval" in v:
|
|
||||||
# Lambda function in eval syntax
|
|
||||||
return False, Function(
|
|
||||||
lambda spec: eval(v["eval"], _STANDARD_IMPORTS, {"spec": spec}))
|
|
||||||
elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v:
|
elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v:
|
||||||
# Grid search values
|
# Grid search values
|
||||||
grid_values = v["grid_search"]
|
grid_values = v["grid_search"]
|
||||||
|
|||||||
@ -325,11 +325,6 @@ class Categorical(Domain):
|
|||||||
new.set_sampler(self._Uniform())
|
new.set_sampler(self._Uniform())
|
||||||
return new
|
return new
|
||||||
|
|
||||||
def grid(self):
|
|
||||||
new = copy(self)
|
|
||||||
new.set_sampler(Grid())
|
|
||||||
return new
|
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self.categories)
|
return len(self.categories)
|
||||||
|
|
||||||
@ -344,55 +339,6 @@ class Categorical(Domain):
|
|||||||
return f"{self.categories}"
|
return f"{self.categories}"
|
||||||
|
|
||||||
|
|
||||||
class Function(Domain):
|
|
||||||
class _CallSampler(BaseSampler):
|
|
||||||
def sample(self,
|
|
||||||
domain: "Function",
|
|
||||||
spec: Optional[Union[List[Dict], Dict]] = None,
|
|
||||||
size: int = 1):
|
|
||||||
if domain.pass_spec:
|
|
||||||
items = [
|
|
||||||
domain.func(spec[i] if isinstance(spec, list) else spec)
|
|
||||||
for i in range(size)
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
items = [domain.func() for i in range(size)]
|
|
||||||
|
|
||||||
return items if len(items) > 1 else domain.cast(items[0])
|
|
||||||
|
|
||||||
default_sampler_cls = _CallSampler
|
|
||||||
|
|
||||||
def __init__(self, func: Callable):
|
|
||||||
sig = signature(func)
|
|
||||||
|
|
||||||
pass_spec = True # whether we should pass `spec` when calling `func`
|
|
||||||
try:
|
|
||||||
sig.bind({})
|
|
||||||
except TypeError:
|
|
||||||
pass_spec = False
|
|
||||||
|
|
||||||
if not pass_spec:
|
|
||||||
try:
|
|
||||||
sig.bind()
|
|
||||||
except TypeError as exc:
|
|
||||||
raise ValueError(
|
|
||||||
"The function passed to a `Function` parameter must be "
|
|
||||||
"callable with either 0 or 1 parameters.") from exc
|
|
||||||
|
|
||||||
self.pass_spec = pass_spec
|
|
||||||
self.func = func
|
|
||||||
|
|
||||||
def is_function(self):
|
|
||||||
return True
|
|
||||||
|
|
||||||
def is_valid(self, value: Any):
|
|
||||||
return True # This is user-defined, so lets not assume anything
|
|
||||||
|
|
||||||
@property
|
|
||||||
def domain_str(self):
|
|
||||||
return f"{self.func}()"
|
|
||||||
|
|
||||||
|
|
||||||
class Quantized(Sampler):
|
class Quantized(Sampler):
|
||||||
def __init__(self, sampler: Sampler, q: Union[float, int]):
|
def __init__(self, sampler: Sampler, q: Union[float, int]):
|
||||||
self.sampler = sampler
|
self.sampler = sampler
|
||||||
@ -439,22 +385,6 @@ class PolynomialExpansionSet:
|
|||||||
return "PolynomialExpansionSet"
|
return "PolynomialExpansionSet"
|
||||||
|
|
||||||
|
|
||||||
# TODO (krfricke): Remove tune.function
|
|
||||||
def function(func):
|
|
||||||
logger.warning(
|
|
||||||
"DeprecationWarning: wrapping {} with tune.function() is no "
|
|
||||||
"longer needed".format(func))
|
|
||||||
return func
|
|
||||||
|
|
||||||
|
|
||||||
def sample_from(func: Callable[[Dict], Any]):
|
|
||||||
"""Specify that tune should sample configuration values from this function.
|
|
||||||
Arguments:
|
|
||||||
func: An callable function to draw a sample from.
|
|
||||||
"""
|
|
||||||
return Function(func)
|
|
||||||
|
|
||||||
|
|
||||||
def uniform(lower: float, upper: float):
|
def uniform(lower: float, upper: float):
|
||||||
"""Sample a float value uniformly between ``lower`` and ``upper``.
|
"""Sample a float value uniformly between ``lower`` and ``upper``.
|
||||||
Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from
|
Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from
|
||||||
|
|||||||
@ -90,30 +90,30 @@ def define_by_run_func(
|
|||||||
return config
|
return config
|
||||||
|
|
||||||
|
|
||||||
def convert_key(
|
# def convert_key(
|
||||||
conf: Dict, space: Dict, path: str = ""
|
# conf: Dict, space: Dict, path: str = ""
|
||||||
) -> Optional[Dict[str, Any]]:
|
# ) -> Optional[Dict[str, Any]]:
|
||||||
"""Convert config keys to define-by-run keys.
|
# """Convert config keys to define-by-run keys.
|
||||||
|
|
||||||
Returns:
|
# Returns:
|
||||||
A dict with converted keys.
|
# A dict with converted keys.
|
||||||
"""
|
# """
|
||||||
config = {}
|
# config = {}
|
||||||
for key, domain in space.items():
|
# for key, domain in space.items():
|
||||||
value = conf[key]
|
# value = conf[key]
|
||||||
if path:
|
# if path:
|
||||||
key = path + '/' + key
|
# key = path + '/' + key
|
||||||
if isinstance(domain, dict):
|
# if isinstance(domain, dict):
|
||||||
config.update(convert_key(conf[key], domain, key))
|
# config.update(convert_key(conf[key], domain, key))
|
||||||
elif isinstance(domain, sample.Categorical):
|
# elif isinstance(domain, sample.Categorical):
|
||||||
index = indexof(domain, value)
|
# index = indexof(domain, value)
|
||||||
config[key + '_choice_'] = index
|
# config[key + '_choice_'] = index
|
||||||
if isinstance(value, dict):
|
# if isinstance(value, dict):
|
||||||
key += f":{index}"
|
# key += f":{index}"
|
||||||
config.update(convert_key(value, domain.categories[index], key))
|
# config.update(convert_key(value, domain.categories[index], key))
|
||||||
else:
|
# else:
|
||||||
config[key] = value
|
# config[key] = value
|
||||||
return config
|
# return config
|
||||||
|
|
||||||
|
|
||||||
def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
|
def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
|
||||||
@ -306,10 +306,8 @@ def normalize(
|
|||||||
elif str(sampler) == 'Normal':
|
elif str(sampler) == 'Normal':
|
||||||
# N(mean, sd) -> N(0,1)
|
# N(mean, sd) -> N(0,1)
|
||||||
config_norm[key] = (value - sampler.mean) / sampler.sd
|
config_norm[key] = (value - sampler.mean) / sampler.sd
|
||||||
else:
|
# else:
|
||||||
# TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler
|
# config_norm[key] = value
|
||||||
# e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
|
|
||||||
config_norm[key] = value
|
|
||||||
return config_norm
|
return config_norm
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -13,6 +13,7 @@ try:
|
|||||||
from ray.tune.analysis import ExperimentAnalysis as EA
|
from ray.tune.analysis import ExperimentAnalysis as EA
|
||||||
except (ImportError, AssertionError):
|
except (ImportError, AssertionError):
|
||||||
from .analysis import ExperimentAnalysis as EA
|
from .analysis import ExperimentAnalysis as EA
|
||||||
|
from .result import DEFAULT_METRIC
|
||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -33,7 +34,7 @@ class ExperimentAnalysis(EA):
|
|||||||
super().__init__(self, None, trials, metric, mode)
|
super().__init__(self, None, trials, metric, mode)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
self.trials = trials
|
self.trials = trials
|
||||||
self.default_metric = metric or '_default_anonymous_metric'
|
self.default_metric = metric or DEFAULT_METRIC
|
||||||
self.default_mode = mode
|
self.default_mode = mode
|
||||||
|
|
||||||
|
|
||||||
@ -82,7 +83,7 @@ def report(_metric=None, **kwargs):
|
|||||||
if _verbose == 2:
|
if _verbose == 2:
|
||||||
logger.info(f"result: {kwargs}")
|
logger.info(f"result: {kwargs}")
|
||||||
if _metric:
|
if _metric:
|
||||||
result['_default_anonymous_metric'] = _metric
|
result[DEFAULT_METRIC] = _metric
|
||||||
trial = _runner.running_trial
|
trial = _runner.running_trial
|
||||||
if _running_trial == trial:
|
if _running_trial == trial:
|
||||||
_training_iteration += 1
|
_training_iteration += 1
|
||||||
@ -105,12 +106,13 @@ def report(_metric=None, **kwargs):
|
|||||||
|
|
||||||
def run(training_function,
|
def run(training_function,
|
||||||
config: Optional[dict] = None,
|
config: Optional[dict] = None,
|
||||||
points_to_evaluate: Optional[List[dict]] = None,
|
|
||||||
low_cost_partial_config: Optional[dict] = None,
|
low_cost_partial_config: Optional[dict] = None,
|
||||||
cat_hp_cost: Optional[dict] = None,
|
cat_hp_cost: Optional[dict] = None,
|
||||||
metric: Optional[str] = None,
|
metric: Optional[str] = None,
|
||||||
mode: Optional[str] = None,
|
mode: Optional[str] = None,
|
||||||
time_budget_s: Union[int, float, datetime.timedelta] = None,
|
time_budget_s: Union[int, float, datetime.timedelta] = None,
|
||||||
|
points_to_evaluate: Optional[List[dict]] = None,
|
||||||
|
evaluated_rewards: Optional[List] = None,
|
||||||
prune_attr: Optional[str] = None,
|
prune_attr: Optional[str] = None,
|
||||||
min_resource: Optional[float] = None,
|
min_resource: Optional[float] = None,
|
||||||
max_resource: Optional[float] = None,
|
max_resource: Optional[float] = None,
|
||||||
@ -155,8 +157,6 @@ def run(training_function,
|
|||||||
Args:
|
Args:
|
||||||
training_function: A user-defined training function.
|
training_function: A user-defined training function.
|
||||||
config: A dictionary to specify the search space.
|
config: A dictionary to specify the search space.
|
||||||
points_to_evaluate: A list of initial hyperparameter
|
|
||||||
configurations to run first.
|
|
||||||
low_cost_partial_config: A dictionary from a subset of
|
low_cost_partial_config: A dictionary from a subset of
|
||||||
controlled dimensions to the initial low-cost values.
|
controlled dimensions to the initial low-cost values.
|
||||||
e.g.,
|
e.g.,
|
||||||
@ -179,6 +179,14 @@ def run(training_function,
|
|||||||
mode: A string in ['min', 'max'] to specify the objective as
|
mode: A string in ['min', 'max'] to specify the objective as
|
||||||
minimization or maximization.
|
minimization or maximization.
|
||||||
time_budget_s: A float of the time budget in seconds.
|
time_budget_s: A float of the time budget in seconds.
|
||||||
|
points_to_evaluate: A list of initial hyperparameter
|
||||||
|
configurations to run first.
|
||||||
|
evaluated_rewards (list): If you have previously evaluated the
|
||||||
|
parameters passed in as points_to_evaluate you can avoid
|
||||||
|
re-running those trials by passing in the reward attributes
|
||||||
|
as a list so the optimiser can be told the results without
|
||||||
|
needing to re-compute the trial. Must be the same length as
|
||||||
|
points_to_evaluate.
|
||||||
prune_attr: A string of the attribute used for pruning.
|
prune_attr: A string of the attribute used for pruning.
|
||||||
Not necessarily in space.
|
Not necessarily in space.
|
||||||
When prune_attr is in space, it is a hyperparameter, e.g.,
|
When prune_attr is in space, it is a hyperparameter, e.g.,
|
||||||
@ -259,9 +267,10 @@ def run(training_function,
|
|||||||
if search_alg is None:
|
if search_alg is None:
|
||||||
from ..searcher.blendsearch import BlendSearch
|
from ..searcher.blendsearch import BlendSearch
|
||||||
search_alg = BlendSearch(
|
search_alg = BlendSearch(
|
||||||
metric=metric or '_default_anonymous_metric', mode=mode,
|
metric=metric or DEFAULT_METRIC, mode=mode,
|
||||||
space=config,
|
space=config,
|
||||||
points_to_evaluate=points_to_evaluate,
|
points_to_evaluate=points_to_evaluate,
|
||||||
|
evaluated_rewards=evaluated_rewards,
|
||||||
low_cost_partial_config=low_cost_partial_config,
|
low_cost_partial_config=low_cost_partial_config,
|
||||||
cat_hp_cost=cat_hp_cost,
|
cat_hp_cost=cat_hp_cost,
|
||||||
prune_attr=prune_attr,
|
prune_attr=prune_attr,
|
||||||
|
|||||||
@ -842,12 +842,12 @@
|
|||||||
"class MyRegularizedGreedyForest(SKLearnEstimator):\n",
|
"class MyRegularizedGreedyForest(SKLearnEstimator):\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
" def __init__(self, task='binary:logistic', n_jobs=1, **params):\n",
|
" def __init__(self, task='binary', n_jobs=1, **params):\n",
|
||||||
" '''Constructor\n",
|
" '''Constructor\n",
|
||||||
" \n",
|
" \n",
|
||||||
" Args:\n",
|
" Args:\n",
|
||||||
" task: A string of the task type, one of\n",
|
" task: A string of the task type, one of\n",
|
||||||
" 'binary:logistic', 'multi:softmax', 'regression'\n",
|
" 'binary', 'multi', 'regression'\n",
|
||||||
" n_jobs: An integer of the number of parallel threads\n",
|
" n_jobs: An integer of the number of parallel threads\n",
|
||||||
" params: A dictionary of the hyperparameter names and values\n",
|
" params: A dictionary of the hyperparameter names and values\n",
|
||||||
" '''\n",
|
" '''\n",
|
||||||
@ -855,7 +855,7 @@
|
|||||||
" super().__init__(task, **params)\n",
|
" super().__init__(task, **params)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" '''task=regression for RGFRegressor; \n",
|
" '''task=regression for RGFRegressor; \n",
|
||||||
" binary:logistic and multiclass:softmax for RGFClassifier'''\n",
|
" binary or multiclass for RGFClassifier'''\n",
|
||||||
" if 'regression' in task:\n",
|
" if 'regression' in task:\n",
|
||||||
" self.estimator_class = RGFRegressor\n",
|
" self.estimator_class = RGFRegressor\n",
|
||||||
" else:\n",
|
" else:\n",
|
||||||
|
|||||||
@ -17,7 +17,7 @@ from flaml import tune
|
|||||||
|
|
||||||
class MyRegularizedGreedyForest(SKLearnEstimator):
|
class MyRegularizedGreedyForest(SKLearnEstimator):
|
||||||
|
|
||||||
def __init__(self, task='binary:logistic', n_jobs=1, max_leaf=4,
|
def __init__(self, task='binary', n_jobs=1, max_leaf=4,
|
||||||
n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
|
n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
|
||||||
min_samples_leaf=1, **params):
|
min_samples_leaf=1, **params):
|
||||||
|
|
||||||
@ -264,6 +264,7 @@ class TestAutoML(unittest.TestCase):
|
|||||||
"model_history": True,
|
"model_history": True,
|
||||||
"sample_weight": np.ones(len(y)),
|
"sample_weight": np.ones(len(y)),
|
||||||
"pred_time_limit": 1e-5,
|
"pred_time_limit": 1e-5,
|
||||||
|
"ensemble": True,
|
||||||
}
|
}
|
||||||
automl_experiment.fit(**automl_settings)
|
automl_experiment.fit(**automl_settings)
|
||||||
print(automl_experiment.classes_)
|
print(automl_experiment.classes_)
|
||||||
@ -382,23 +383,25 @@ class TestAutoML(unittest.TestCase):
|
|||||||
|
|
||||||
def test_roc_auc_ovr(self):
|
def test_roc_auc_ovr(self):
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
|
X_train, y_train = load_iris(return_X_y=True)
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 2,
|
"time_budget": 1,
|
||||||
"metric": "roc_auc_ovr",
|
"metric": "roc_auc_ovr",
|
||||||
"task": "classification",
|
"task": "classification",
|
||||||
"log_file_name": "test/roc_auc_ovr.log",
|
"log_file_name": "test/roc_auc_ovr.log",
|
||||||
"log_training_metric": True,
|
"log_training_metric": True,
|
||||||
"n_jobs": 1,
|
"n_jobs": 1,
|
||||||
|
"sample_weight": np.ones(len(y_train)),
|
||||||
|
"eval_method": "holdout",
|
||||||
"model_history": True
|
"model_history": True
|
||||||
}
|
}
|
||||||
X_train, y_train = load_iris(return_X_y=True)
|
|
||||||
automl_experiment.fit(
|
automl_experiment.fit(
|
||||||
X_train=X_train, y_train=y_train, **automl_settings)
|
X_train=X_train, y_train=y_train, **automl_settings)
|
||||||
|
|
||||||
def test_roc_auc_ovo(self):
|
def test_roc_auc_ovo(self):
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 2,
|
"time_budget": 1,
|
||||||
"metric": "roc_auc_ovo",
|
"metric": "roc_auc_ovo",
|
||||||
"task": "classification",
|
"task": "classification",
|
||||||
"log_file_name": "test/roc_auc_ovo.log",
|
"log_file_name": "test/roc_auc_ovo.log",
|
||||||
@ -438,6 +441,11 @@ class TestAutoML(unittest.TestCase):
|
|||||||
log_file_name=automl_settings["log_file_name"],
|
log_file_name=automl_settings["log_file_name"],
|
||||||
X_train=X_train, y_train=y_train,
|
X_train=X_train, y_train=y_train,
|
||||||
train_full=True, time_budget=1)
|
train_full=True, time_budget=1)
|
||||||
|
automl_experiment.retrain_from_log(
|
||||||
|
task="regression",
|
||||||
|
log_file_name=automl_settings["log_file_name"],
|
||||||
|
X_train=X_train, y_train=y_train,
|
||||||
|
train_full=True, time_budget=0)
|
||||||
|
|
||||||
def test_sparse_matrix_classification(self):
|
def test_sparse_matrix_classification(self):
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
@ -565,13 +573,14 @@ class TestAutoML(unittest.TestCase):
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
return
|
return
|
||||||
|
|
||||||
def test_parallel_xgboost_random(self):
|
def test_parallel_xgboost_others(self):
|
||||||
# use random search as the hpo_method
|
# use random search as the hpo_method
|
||||||
self.test_parallel_xgboost(hpo_method='random')
|
self.test_parallel_xgboost(hpo_method='random')
|
||||||
|
|
||||||
def test_random_out_of_memory(self):
|
def test_random_out_of_memory(self):
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM)
|
automl_experiment.add_learner(
|
||||||
|
learner_name='large_lgbm', learner_class=MyLargeLGBM)
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 2,
|
"time_budget": 2,
|
||||||
"metric": 'ap',
|
"metric": 'ap',
|
||||||
@ -620,13 +629,13 @@ class TestAutoML(unittest.TestCase):
|
|||||||
print(automl_experiment.best_iteration)
|
print(automl_experiment.best_iteration)
|
||||||
print(automl_experiment.best_estimator)
|
print(automl_experiment.best_estimator)
|
||||||
|
|
||||||
def test_sparse_matrix_regression_cv(self):
|
def test_sparse_matrix_regression_holdout(self):
|
||||||
X_train = scipy.sparse.random(8, 100)
|
X_train = scipy.sparse.random(8, 100)
|
||||||
y_train = np.random.uniform(size=8)
|
y_train = np.random.uniform(size=8)
|
||||||
automl_experiment = AutoML()
|
automl_experiment = AutoML()
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 2,
|
"time_budget": 1,
|
||||||
'eval_method': 'cv',
|
'eval_method': 'holdout',
|
||||||
"task": 'regression',
|
"task": 'regression',
|
||||||
"log_file_name": "test/sparse_regression.log",
|
"log_file_name": "test/sparse_regression.log",
|
||||||
"n_jobs": 1,
|
"n_jobs": 1,
|
||||||
|
|||||||
@ -21,6 +21,7 @@ def test_forecast_automl(budget=5):
|
|||||||
"task": 'forecast', # task type
|
"task": 'forecast', # task type
|
||||||
"log_file_name": 'CO2_forecast.log', # flaml log file
|
"log_file_name": 'CO2_forecast.log', # flaml log file
|
||||||
"eval_method": "holdout",
|
"eval_method": "holdout",
|
||||||
|
"label": ('ds', 'y'),
|
||||||
}
|
}
|
||||||
'''The main flaml automl API'''
|
'''The main flaml automl API'''
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
from openml.exceptions import OpenMLServerException
|
from openml.exceptions import OpenMLServerException
|
||||||
|
|
||||||
|
|
||||||
def test_automl(budget=5, dataset_format='dataframe'):
|
def test_automl(budget=5, dataset_format='dataframe', hpo_method=None):
|
||||||
from flaml.data import load_openml_dataset
|
from flaml.data import load_openml_dataset
|
||||||
try:
|
try:
|
||||||
X_train, X_test, y_train, y_test = load_openml_dataset(
|
X_train, X_test, y_train, y_test = load_openml_dataset(
|
||||||
@ -18,6 +18,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
|
|||||||
"task": 'classification', # task type
|
"task": 'classification', # task type
|
||||||
"log_file_name": 'airlines_experiment.log', # flaml log file
|
"log_file_name": 'airlines_experiment.log', # flaml log file
|
||||||
"seed": 7654321, # random seed
|
"seed": 7654321, # random seed
|
||||||
|
'hpo_method': hpo_method
|
||||||
}
|
}
|
||||||
'''The main flaml automl API'''
|
'''The main flaml automl API'''
|
||||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
||||||
@ -52,7 +53,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
|
|||||||
|
|
||||||
|
|
||||||
def test_automl_array():
|
def test_automl_array():
|
||||||
test_automl(5, 'array')
|
test_automl(5, 'array', 'bs')
|
||||||
|
|
||||||
|
|
||||||
def test_mlflow():
|
def test_mlflow():
|
||||||
@ -81,8 +82,11 @@ def test_mlflow():
|
|||||||
mlflow.set_experiment("flaml")
|
mlflow.set_experiment("flaml")
|
||||||
with mlflow.start_run():
|
with mlflow.start_run():
|
||||||
'''The main flaml automl API'''
|
'''The main flaml automl API'''
|
||||||
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
automl.fit(
|
||||||
|
X_train=X_train, y_train=y_train, **settings)
|
||||||
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
||||||
|
automl._mem_thres = 0
|
||||||
|
print(automl.trainable(automl.points_to_evaluate[0]))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -41,6 +41,7 @@ class TestLogging(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
X_train, y_train = load_boston(return_X_y=True)
|
X_train, y_train = load_boston(return_X_y=True)
|
||||||
n = len(y_train) >> 1
|
n = len(y_train) >> 1
|
||||||
|
print(automl.model, automl.classes_, automl.predict(X_train))
|
||||||
automl.fit(X_train=X_train[:n], y_train=y_train[:n],
|
automl.fit(X_train=X_train[:n], y_train=y_train[:n],
|
||||||
X_val=X_train[n:], y_val=y_train[n:],
|
X_val=X_train[n:], y_val=y_train[n:],
|
||||||
**automl_settings)
|
**automl_settings)
|
||||||
@ -81,6 +82,8 @@ class TestLogging(unittest.TestCase):
|
|||||||
time_budget_s=1, num_samples=-1)
|
time_budget_s=1, num_samples=-1)
|
||||||
print(min(trial.last_result["val_loss"]
|
print(min(trial.last_result["val_loss"]
|
||||||
for trial in analysis.trials))
|
for trial in analysis.trials))
|
||||||
|
config = analysis.trials[-1].last_result['config']['ml']
|
||||||
|
automl._state._train_with_config(config['learner'], config)
|
||||||
# Check if the log buffer is populated.
|
# Check if the log buffer is populated.
|
||||||
self.assertTrue(len(buf.getvalue()) > 0)
|
self.assertTrue(len(buf.getvalue()) > 0)
|
||||||
|
|
||||||
|
|||||||
@ -16,9 +16,9 @@ class TestTrainingLog(unittest.TestCase):
|
|||||||
filename = os.path.join(d, path)
|
filename = os.path.join(d, path)
|
||||||
|
|
||||||
# Run a simple job.
|
# Run a simple job.
|
||||||
automl_experiment = AutoML()
|
automl = AutoML()
|
||||||
automl_settings = {
|
automl_settings = {
|
||||||
"time_budget": 2,
|
"time_budget": 1,
|
||||||
"metric": 'mse',
|
"metric": 'mse',
|
||||||
"task": 'regression',
|
"task": 'regression',
|
||||||
"log_file_name": filename,
|
"log_file_name": filename,
|
||||||
@ -29,10 +29,12 @@ class TestTrainingLog(unittest.TestCase):
|
|||||||
"train_time_limit": 0.01,
|
"train_time_limit": 0.01,
|
||||||
"verbose": 3,
|
"verbose": 3,
|
||||||
"ensemble": True,
|
"ensemble": True,
|
||||||
|
"keep_search_state": True,
|
||||||
}
|
}
|
||||||
X_train, y_train = load_boston(return_X_y=True)
|
X_train, y_train = load_boston(return_X_y=True)
|
||||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||||
**automl_settings)
|
automl._state._train_with_config(
|
||||||
|
automl.best_estimator, automl.best_config)
|
||||||
|
|
||||||
# Check if the training log file is populated.
|
# Check if the training log file is populated.
|
||||||
self.assertTrue(os.path.exists(filename))
|
self.assertTrue(os.path.exists(filename))
|
||||||
@ -44,8 +46,10 @@ class TestTrainingLog(unittest.TestCase):
|
|||||||
self.assertGreater(count, 0)
|
self.assertGreater(count, 0)
|
||||||
|
|
||||||
automl_settings["log_file_name"] = None
|
automl_settings["log_file_name"] = None
|
||||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||||
**automl_settings)
|
automl._selected.update(None, 0)
|
||||||
|
automl = AutoML()
|
||||||
|
automl.fit(X_train=X_train, y_train=y_train, max_iter=0)
|
||||||
|
|
||||||
def test_illfilename(self):
|
def test_illfilename(self):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@ -76,7 +76,7 @@ def test_simple(method=None):
|
|||||||
print(analysis.trials[-1])
|
print(analysis.trials[-1])
|
||||||
|
|
||||||
|
|
||||||
def _test_optuna():
|
def test_optuna():
|
||||||
test_simple(method="optuna")
|
test_simple(method="optuna")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
18
test/tune/test_sample.py
Normal file
18
test/tune/test_sample.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from flaml.tune.sample import (
|
||||||
|
BaseSampler, PolynomialExpansionSet, Domain,
|
||||||
|
uniform, quniform, choice, randint, qrandint, randn,
|
||||||
|
qrandn, loguniform, qloguniform, lograndint, qlograndint)
|
||||||
|
|
||||||
|
|
||||||
|
def test_sampler():
|
||||||
|
print(randn().sample(size=2))
|
||||||
|
print(PolynomialExpansionSet(), BaseSampler())
|
||||||
|
print(qrandn(2, 10, 2).sample(size=2))
|
||||||
|
c = choice([1, 2])
|
||||||
|
print(c.domain_str, len(c), c.is_valid(3))
|
||||||
|
i = randint(1, 10)
|
||||||
|
print(i.domain_str, i.is_valid(10))
|
||||||
|
d = Domain()
|
||||||
|
print(d.domain_str, d.is_function())
|
||||||
|
d.default_sampler_cls = BaseSampler
|
||||||
|
print(d.get_sampler())
|
||||||
126
test/tune/test_searcher.py
Normal file
126
test/tune/test_searcher.py
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
from flaml.searcher.blendsearch import CFO
|
||||||
|
import numpy as np
|
||||||
|
try:
|
||||||
|
from ray import __version__ as ray_version
|
||||||
|
assert ray_version >= '1.0.0'
|
||||||
|
from ray.tune import sample
|
||||||
|
except (ImportError, AssertionError):
|
||||||
|
from flaml.tune import sample
|
||||||
|
from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter
|
||||||
|
from flaml.searcher.blendsearch import BlendSearch
|
||||||
|
|
||||||
|
def define_search_space(trial):
|
||||||
|
trial.suggest_float("a", 6, 8)
|
||||||
|
trial.suggest_float("b", 1e-4, 1e-2, log=True)
|
||||||
|
|
||||||
|
def test_searcher():
|
||||||
|
searcher = Searcher()
|
||||||
|
searcher = Searcher(metric=['m1', 'm2'], mode=['max', 'min'])
|
||||||
|
searcher.set_search_properties(None, None, None)
|
||||||
|
searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {}
|
||||||
|
searcher.on_trial_complete = lambda trial_id, result, error: None
|
||||||
|
searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True)
|
||||||
|
searcher.suggest("t1")
|
||||||
|
searcher.suggest("t2")
|
||||||
|
searcher.on_pause("t1")
|
||||||
|
searcher.on_unpause("t1")
|
||||||
|
searcher.suggest("t3")
|
||||||
|
searcher.on_trial_complete("t1", {})
|
||||||
|
searcher.on_trial_complete("t2", {})
|
||||||
|
searcher.set_state({})
|
||||||
|
print(searcher.get_state())
|
||||||
|
import optuna
|
||||||
|
config = {
|
||||||
|
"a": optuna.distributions.UniformDistribution(6, 8),
|
||||||
|
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2),
|
||||||
|
}
|
||||||
|
searcher = OptunaSearch(
|
||||||
|
config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
|
||||||
|
evaluated_rewards=[{'m': 2}], metric='m', mode='max'
|
||||||
|
)
|
||||||
|
config = {
|
||||||
|
"a": sample.uniform(6, 8),
|
||||||
|
"b": sample.loguniform(1e-4, 1e-2)
|
||||||
|
}
|
||||||
|
searcher = OptunaSearch(
|
||||||
|
config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
|
||||||
|
evaluated_rewards=[{'m': 2}], metric='m', mode='max'
|
||||||
|
)
|
||||||
|
searcher = OptunaSearch(
|
||||||
|
define_search_space, points_to_evaluate=[{"a": 6, "b": 1e-3}],
|
||||||
|
# evaluated_rewards=[{'m': 2}], metric='m', mode='max'
|
||||||
|
mode='max'
|
||||||
|
)
|
||||||
|
searcher = OptunaSearch()
|
||||||
|
# searcher.set_search_properties('m', 'min', define_search_space)
|
||||||
|
searcher.set_search_properties('m', 'min', config)
|
||||||
|
searcher.suggest('t1')
|
||||||
|
searcher.on_trial_complete('t1', None, False)
|
||||||
|
searcher.suggest('t2')
|
||||||
|
searcher.on_trial_complete('t2', None, True)
|
||||||
|
searcher.suggest('t3')
|
||||||
|
searcher.on_trial_complete('t3', {'m': np.nan})
|
||||||
|
searcher.save('test/tune/optuna.pickle')
|
||||||
|
searcher.restore('test/tune/optuna.pickle')
|
||||||
|
searcher = BlendSearch(
|
||||||
|
metric="m",
|
||||||
|
global_search_alg=searcher, metric_constraints=[("c", "<", 1)])
|
||||||
|
searcher.set_search_properties(metric="m2", config=config)
|
||||||
|
searcher.set_search_properties(config={"time_budget_s": 0})
|
||||||
|
c = searcher.suggest('t1')
|
||||||
|
searcher.on_trial_complete("t1", {"config": c}, True)
|
||||||
|
c = searcher.suggest('t2')
|
||||||
|
searcher.on_trial_complete(
|
||||||
|
"t2", {"config": c, "m2": 1, "c": 2, "time_total_s": 1})
|
||||||
|
config1 = config.copy()
|
||||||
|
config1['_choice_'] = 0
|
||||||
|
searcher._expand_admissible_region(
|
||||||
|
lower={'root': [{'a': 0.5}, {'a': 0.4}]},
|
||||||
|
upper={'root': [{'a': 0.9}, {'a': 0.8}]},
|
||||||
|
space={'root': config1},
|
||||||
|
)
|
||||||
|
searcher = CFO(
|
||||||
|
metric='m', mode='min', space=config,
|
||||||
|
points_to_evaluate=[{'a': 7, 'b': 1e-3}, {'a': 6, 'b': 3e-4}],
|
||||||
|
evaluated_rewards=[1, 1])
|
||||||
|
searcher.suggest("t1")
|
||||||
|
searcher.suggest("t2")
|
||||||
|
searcher.on_trial_result('t3', {})
|
||||||
|
c = searcher.generate_parameters(1)
|
||||||
|
searcher.receive_trial_result(1, c, {'reward': 0})
|
||||||
|
searcher.update_search_space(
|
||||||
|
{
|
||||||
|
"a": {
|
||||||
|
"_value": [1, 2],
|
||||||
|
"_type": "choice",
|
||||||
|
},
|
||||||
|
"b": {
|
||||||
|
"_value": [1, 3],
|
||||||
|
"_type": "randint",
|
||||||
|
},
|
||||||
|
"c": {
|
||||||
|
"_value": [.1, 3],
|
||||||
|
"_type": "uniform",
|
||||||
|
},
|
||||||
|
"d": {
|
||||||
|
"_value": [2, 8, 2],
|
||||||
|
"_type": "quniform",
|
||||||
|
},
|
||||||
|
"e": {
|
||||||
|
"_value": [2, 8],
|
||||||
|
"_type": "loguniform",
|
||||||
|
},
|
||||||
|
"f": {
|
||||||
|
"_value": [2, 8, 2],
|
||||||
|
"_type": "qloguniform",
|
||||||
|
},
|
||||||
|
"g": {
|
||||||
|
"_value": [0, 2],
|
||||||
|
"_type": "normal",
|
||||||
|
},
|
||||||
|
"h": {
|
||||||
|
"_value": [0, 2, 2],
|
||||||
|
"_type": "qnormal",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
@ -15,7 +15,7 @@ import xgboost as xgb
|
|||||||
import logging
|
import logging
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
os.makedirs('logs', exist_ok=True)
|
os.makedirs('logs', exist_ok=True)
|
||||||
logger.addHandler(logging.FileHandler('logs/tune_xgboost.log'))
|
logger.addHandler(logging.FileHandler('logs/tune.log'))
|
||||||
logger.setLevel(logging.INFO)
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
@ -223,12 +223,22 @@ def test_nested():
|
|||||||
logger.info(f"BlendSearch exp best config: {best_trial.config}")
|
logger.info(f"BlendSearch exp best config: {best_trial.config}")
|
||||||
logger.info(f"BlendSearch exp best result: {best_trial.last_result}")
|
logger.info(f"BlendSearch exp best result: {best_trial.last_result}")
|
||||||
|
|
||||||
|
points_to_evaluate = [
|
||||||
|
{"b": .99, "cost_related": {"a": 3}},
|
||||||
|
{"b": .99, "cost_related": {"a": 2}},
|
||||||
|
]
|
||||||
analysis = tune.run(
|
analysis = tune.run(
|
||||||
simple_func,
|
simple_func,
|
||||||
config=search_space,
|
config=search_space,
|
||||||
low_cost_partial_config={
|
low_cost_partial_config={
|
||||||
"cost_related": {"a": 1}
|
"cost_related": {"a": 1}
|
||||||
},
|
},
|
||||||
|
points_to_evaluate=points_to_evaluate,
|
||||||
|
evaluated_rewards=[
|
||||||
|
(config["cost_related"]["a"] - 4)**2
|
||||||
|
+ (config["b"] - config["cost_related"]["a"])**2
|
||||||
|
for config in points_to_evaluate
|
||||||
|
],
|
||||||
metric="obj",
|
metric="obj",
|
||||||
mode="min",
|
mode="min",
|
||||||
metric_constraints=[("ab", "<=", 4)],
|
metric_constraints=[("ab", "<=", 4)],
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user