warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type
This commit is contained in:
Chi Wang 2021-09-04 01:42:21 -07:00 committed by GitHub
parent 5fdfa2559b
commit e46573a01d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 599 additions and 707 deletions

View File

@ -5,7 +5,6 @@
''' '''
import time import time
from typing import Callable, Optional from typing import Callable, Optional
import warnings
from functools import partial from functools import partial
import numpy as np import numpy as np
from scipy.sparse import issparse from scipy.sparse import issparse
@ -144,9 +143,8 @@ class SearchState:
class AutoMLState: class AutoMLState:
def _prepare_sample_train_data(self, sample_size): def _prepare_sample_train_data(self, sample_size):
full_size = len(self.y_train)
sampled_weight = groups = None sampled_weight = groups = None
if sample_size <= full_size: if sample_size <= self.data_size:
if isinstance(self.X_train, pd.DataFrame): if isinstance(self.X_train, pd.DataFrame):
sampled_X_train = self.X_train.iloc[:sample_size] sampled_X_train = self.X_train.iloc[:sample_size]
else: else:
@ -225,13 +223,13 @@ class AutoMLState:
self, estimator, config_w_resource, sample_size=None self, estimator, config_w_resource, sample_size=None
): ):
if not sample_size: if not sample_size:
sample_size = config_w_resource['FLAML_sample_size'] sample_size = config_w_resource.get(
'FLAML_sample_size', len(self.y_train_all))
config = config_w_resource.get('ml', config_w_resource).copy() config = config_w_resource.get('ml', config_w_resource).copy()
if 'FLAML_sample_size' in config: if 'FLAML_sample_size' in config:
del config['FLAML_sample_size'] del config['FLAML_sample_size']
if "learner" in config: if "learner" in config:
del config['learner'] del config["learner"]
assert sample_size is not None
sampled_X_train, sampled_y_train, sampled_weight, groups = \ sampled_X_train, sampled_y_train, sampled_weight, groups = \
self._prepare_sample_train_data(sample_size) self._prepare_sample_train_data(sample_size)
if sampled_weight is not None: if sampled_weight is not None:
@ -316,10 +314,7 @@ class AutoML:
'''An object with `predict()` and `predict_proba()` method (for '''An object with `predict()` and `predict_proba()` method (for
classification), storing the best trained model. classification), storing the best trained model.
''' '''
if self._trained_estimator: return self.__dict__.get('_trained_estimator')
return self._trained_estimator
else:
return None
def best_model_for_estimator(self, estimator_name): def best_model_for_estimator(self, estimator_name):
'''Return the best model found for a particular estimator '''Return the best model found for a particular estimator
@ -331,11 +326,8 @@ class AutoML:
An object with `predict()` and `predict_proba()` method (for An object with `predict()` and `predict_proba()` method (for
classification), storing the best trained model for estimator_name. classification), storing the best trained model for estimator_name.
''' '''
if estimator_name in self._search_states: state = self._search_states.get(estimator_name)
state = self._search_states[estimator_name] return state and getattr(state, 'trained_estimator', None)
if hasattr(state, 'trained_estimator'):
return state.trained_estimator
return None
@property @property
def best_estimator(self): def best_estimator(self):
@ -374,10 +366,12 @@ class AutoML:
@property @property
def classes_(self): def classes_(self):
'''A list of n_classes elements for class labels.''' '''A list of n_classes elements for class labels.'''
if self._label_transformer: attr = getattr(self, "label_transformer", None)
return self._label_transformer.classes_.tolist() if attr:
if self._trained_estimator: return attr.classes_.tolist()
return self._trained_estimator.classes_.tolist() attr = getattr(self, "_trained_estimator", None)
if attr:
return attr.classes_.tolist()
return None return None
def predict(self, X_test): def predict(self, X_test):
@ -394,12 +388,13 @@ class AutoML:
A array-like of shape n * 1 - - each element is a predicted A array-like of shape n * 1 - - each element is a predicted
label for an instance. label for an instance.
''' '''
if self._trained_estimator is None: estimator = getattr(self, "_trained_estimator", None)
warnings.warn( if estimator is None:
logger.warning(
"No estimator is trained. Please run fit with enough budget.") "No estimator is trained. Please run fit with enough budget.")
return None return None
X_test = self._preprocess(X_test) X_test = self._preprocess(X_test)
y_pred = self._trained_estimator.predict(X_test) y_pred = estimator.predict(X_test)
if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray): if y_pred.ndim > 1 and isinstance(y_pred, np.ndarray):
y_pred = y_pred.flatten() y_pred = y_pred.flatten()
if self._label_transformer: if self._label_transformer:
@ -443,10 +438,9 @@ class AutoML:
dataframe = dataframe.copy() dataframe = dataframe.copy()
dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'}) dataframe = dataframe.rename(columns={label[0]: 'ds', label[1]: 'y'})
elif dataframe is not None: elif dataframe is not None:
if ('ds' not in dataframe) or ('y' not in dataframe): assert 'ds' in dataframe and 'y' in dataframe, (
raise ValueError( 'For forecasting task, dataframe must have columns '
'For forecasting task, dataframe must have columns "ds" and "y" ' '"ds" and "y" with the dates and values respectively.')
'with the dates and values respectively.')
elif (X_train_all is not None) and (y_train_all is not None): elif (X_train_all is not None) and (y_train_all is not None):
dataframe = pd.DataFrame(X_train_all) dataframe = pd.DataFrame(X_train_all)
dataframe = dataframe.rename(columns={dataframe.columns[0]: 'ds'}) dataframe = dataframe.rename(columns={dataframe.columns[0]: 'ds'})
@ -456,30 +450,29 @@ class AutoML:
label = 'y' label = 'y'
if X_train_all is not None and y_train_all is not None: if X_train_all is not None and y_train_all is not None:
if not (isinstance(X_train_all, np.ndarray) or issparse(X_train_all) assert (
or isinstance(X_train_all, pd.DataFrame)): isinstance(X_train_all, np.ndarray) or issparse(X_train_all)
raise ValueError( or isinstance(X_train_all, pd.DataFrame)), (
"X_train_all must be a numpy array, a pandas dataframe, " "X_train_all must be a numpy array, a pandas dataframe, "
"or Scipy sparse matrix.") "or Scipy sparse matrix.")
if not (isinstance(y_train_all, np.ndarray) assert (
or isinstance(y_train_all, pd.Series)): isinstance(y_train_all, np.ndarray)
raise ValueError( or isinstance(y_train_all, pd.Series)), (
"y_train_all must be a numpy array or a pandas series.") "y_train_all must be a numpy array or a pandas series.")
if X_train_all.size == 0 or y_train_all.size == 0: assert X_train_all.size != 0 and y_train_all.size != 0, (
raise ValueError("Input data must not be empty.") "Input data must not be empty.")
if isinstance(y_train_all, np.ndarray): if isinstance(y_train_all, np.ndarray):
y_train_all = y_train_all.flatten() y_train_all = y_train_all.flatten()
if X_train_all.shape[0] != y_train_all.shape[0]: assert X_train_all.shape[0] == y_train_all.shape[0], (
raise ValueError( "# rows in X_train must match length of y_train.")
"# rows in X_train must match length of y_train.")
self._df = isinstance(X_train_all, pd.DataFrame) self._df = isinstance(X_train_all, pd.DataFrame)
self._nrow, self._ndim = X_train_all.shape self._nrow, self._ndim = X_train_all.shape
X, y = X_train_all, y_train_all X, y = X_train_all, y_train_all
elif dataframe is not None and label is not None: elif dataframe is not None and label is not None:
if not isinstance(dataframe, pd.DataFrame): assert isinstance(dataframe, pd.DataFrame), (
raise ValueError("dataframe must be a pandas DataFrame") "dataframe must be a pandas DataFrame")
if label not in dataframe.columns: assert label in dataframe.columns, (
raise ValueError("label must a column name in dataframe") "label must a column name in dataframe")
self._df = True self._df = True
X = dataframe.drop(columns=label) X = dataframe.drop(columns=label)
self._nrow, self._ndim = X.shape self._nrow, self._ndim = X.shape
@ -498,23 +491,21 @@ class AutoML:
self._label_transformer = self._transformer.label_transformer self._label_transformer = self._transformer.label_transformer
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight') self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
if X_val is not None and y_val is not None: if X_val is not None and y_val is not None:
if not (isinstance(X_val, np.ndarray) or issparse(X_val) assert (
or isinstance(X_val, pd.DataFrame)): isinstance(X_val, np.ndarray) or issparse(X_val)
raise ValueError( or isinstance(X_val, pd.DataFrame)), (
"X_val must be None, a numpy array, a pandas dataframe, " "X_val must be None, a numpy array, a pandas dataframe, "
"or Scipy sparse matrix.") "or Scipy sparse matrix.")
if not (isinstance(y_val, np.ndarray) assert (
or isinstance(y_val, pd.Series)): isinstance(y_val, np.ndarray) or isinstance(y_val, pd.Series)
raise ValueError( ), "y_val must be None, a numpy array or a pandas series."
"y_val must be None, a numpy array or a pandas series.") assert X_val.size != 0 and y_val.size != 0, (
if X_val.size == 0 or y_val.size == 0: "Validation data are expected to be nonempty. "
raise ValueError( "Use None for X_val and y_val if no validation data.")
"Validation data are expected to be nonempty. "
"Use None for X_val and y_val if no validation data.")
if isinstance(y_val, np.ndarray): if isinstance(y_val, np.ndarray):
y_val = y_val.flatten() y_val = y_val.flatten()
if X_val.shape[0] != y_val.shape[0]: assert X_val.shape[0] == y_val.shape[0], (
raise ValueError("# rows in X_val must match length of y_val.") "# rows in X_val must match length of y_val.")
if self._transformer: if self._transformer:
self._state.X_val = self._transformer.transform(X_val) self._state.X_val = self._transformer.transform(X_val)
else: else:
@ -548,7 +539,7 @@ class AutoML:
X_train_all, y_train_all = self._X_train_all, self._y_train_all X_train_all, y_train_all = self._X_train_all, self._y_train_all
if issparse(X_train_all): if issparse(X_train_all):
X_train_all = X_train_all.tocsr() X_train_all = X_train_all.tocsr()
if self._state.task in ('binary:logistic', 'multi:softmax') \ if self._state.task in ('binary', 'multi') \
and self._state.fit_kwargs.get('sample_weight') is None \ and self._state.fit_kwargs.get('sample_weight') is None \
and self._split_type != 'time': and self._split_type != 'time':
# logger.info(f"label {pd.unique(y_train_all)}") # logger.info(f"label {pd.unique(y_train_all)}")
@ -638,7 +629,7 @@ class AutoML:
y_train, y_val = y_train_all[train_idx], y_train_all[val_idx] y_train, y_val = y_train_all[train_idx], y_train_all[val_idx]
self._state.groups, self._state.groups_val = self._state.groups[ self._state.groups, self._state.groups_val = self._state.groups[
train_idx], self._state.groups[val_idx] train_idx], self._state.groups[val_idx]
elif self._state.task != 'regression': elif self._state.task in ('binary', 'multi'):
# for classification, make sure the labels are complete in both # for classification, make sure the labels are complete in both
# training and validation data # training and validation data
label_set, first = np.unique(y_train_all, return_index=True) label_set, first = np.unique(y_train_all, return_index=True)
@ -760,7 +751,7 @@ class AutoML:
record_id: An integer of the record ID in the file, record_id: An integer of the record ID in the file,
0 corresponds to the first trial 0 corresponds to the first trial
task: A string of the task type, task: A string of the task type,
'binary', 'multi', or 'regression' 'binary', 'multi', 'regression', 'forecast', 'rank'
Returns: Returns:
An estimator object for the given configuration An estimator object for the given configuration
@ -875,9 +866,10 @@ class AutoML:
best_val_loss = val_loss best_val_loss = val_loss
sample_size = size sample_size = size
if not training_duration: if not training_duration:
logger.warning(
f"No estimator found within time_budget={time_budget}")
from .model import BaseEstimator as Estimator from .model import BaseEstimator as Estimator
self._trained_estimator = Estimator() self._trained_estimator = Estimator()
self._trained_estimator.model = None
return training_duration return training_duration
if not best: if not best:
return return
@ -898,11 +890,7 @@ class AutoML:
elif eval_method == 'auto': elif eval_method == 'auto':
eval_method = self._decide_eval_method(time_budget) eval_method = self._decide_eval_method(time_budget)
self.modelcount = 0 self.modelcount = 0
if self._state.task != 'forecast': self._prepare_data(eval_method, split_ratio, n_splits)
self._prepare_data(eval_method, split_ratio, n_splits)
else:
self._prepare_data(eval_method, split_ratio, n_splits,
period=self._state.fit_kwargs['period'])
self._state.time_budget = None self._state.time_budget = None
self._state.n_jobs = n_jobs self._state.n_jobs = n_jobs
self._trained_estimator = self._state._train_with_config( self._trained_estimator = self._state._train_with_config(
@ -911,9 +899,10 @@ class AutoML:
return training_duration return training_duration
def _decide_split_type(self, split_type): def _decide_split_type(self, split_type):
if self._state.task in ('classification', 'binary', 'multi'): if self._state.task == 'classification':
self._state.task = get_classification_objective( self._state.task = get_classification_objective(
len(np.unique(self._y_train_all))) len(np.unique(self._y_train_all)))
if self._state.task in ('binary', 'multi'):
assert split_type in [None, "stratified", "uniform", "time"] assert split_type in [None, "stratified", "uniform", "time"]
self._split_type = split_type or "stratified" self._split_type = split_type or "stratified"
elif self._state.task == 'regression': elif self._state.task == 'regression':
@ -1248,13 +1237,14 @@ class AutoML:
For time series forecasting, must be None or 'time'. For time series forecasting, must be None or 'time'.
For ranking task, must be None or 'group'. For ranking task, must be None or 'group'.
hpo_method: str or None, default=None | The hyperparameter hpo_method: str or None, default=None | The hyperparameter
optimization method. When it is None, CFO is used. optimization method. By default, CFO is used for sequential
search and BlendSearch is used for parallel search.
No need to set when using flaml's default search space or using No need to set when using flaml's default search space or using
a simple customized search space. When set to 'bs', BlendSearch a simple customized search space. When set to 'bs', BlendSearch
is used. BlendSearch can be tried when the search space is is used. BlendSearch can be tried when the search space is
complex, for example, containing multiple disjoint, discontinuous complex, for example, containing multiple disjoint, discontinuous
subspaces. When set to 'random' and the argument 'n_concurrent_trials' subspaces. When set to 'random' and the argument
is larger than 1, RandomSearch is used. `n_concurrent_trials` is larger than 1, random search is used.
starting_points: A dictionary to specify the starting hyperparameter starting_points: A dictionary to specify the starting hyperparameter
config for the estimators. config for the estimators.
Keys are the name of the estimators, and values are the starting Keys are the name of the estimators, and values are the starting
@ -1355,8 +1345,7 @@ class AutoML:
estimator_list)) estimator_list))
self.estimator_list = estimator_list self.estimator_list = estimator_list
self._hpo_method = hpo_method or ( self._hpo_method = hpo_method or (
'cfo' if n_concurrent_trials == 1 or len(estimator_list) == 1 'cfo' if n_concurrent_trials == 1 else 'bs')
else 'bs')
self._state.time_budget = time_budget self._state.time_budget = time_budget
self._active_estimators = estimator_list.copy() self._active_estimators = estimator_list.copy()
self._ensemble = ensemble self._ensemble = ensemble
@ -1379,14 +1368,16 @@ class AutoML:
if self._best_estimator: if self._best_estimator:
logger.info("fit succeeded") logger.info("fit succeeded")
logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}") logger.info(f"Time taken to find the best model: {self._time_taken_best_iter}")
if self._time_taken_best_iter >= time_budget * 0.7 and not all( if self._hpo_method in ('cfo', 'bs') and (
self._time_taken_best_iter >= time_budget * 0.7) and not all(
state.search_alg and state.search_alg.searcher.is_ls_ever_converged state.search_alg and state.search_alg.searcher.is_ls_ever_converged
for state in self._search_states.values() for state in self._search_states.values()
): ):
logger.warn("Time taken to find the best model is {0:.0f}% of the " logger.warning(
"provided time budget and not all estimators' hyperparameter " "Time taken to find the best model is {0:.0f}% of the "
"search converged. Consider increasing the time budget.".format( "provided time budget and not all estimators' hyperparameter "
self._time_taken_best_iter / time_budget * 100)) "search converged. Consider increasing the time budget.".format(
self._time_taken_best_iter / time_budget * 100))
if not keep_search_state: if not keep_search_state:
# release space # release space
@ -1413,20 +1404,16 @@ class AutoML:
"Please run pip install flaml[ray]") "Please run pip install flaml[ray]")
if self._hpo_method in ('cfo', 'grid'): if self._hpo_method in ('cfo', 'grid'):
from flaml import CFO as SearchAlgo from flaml import CFO as SearchAlgo
elif 'optuna' == self._hpo_method:
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
elif 'bs' == self._hpo_method: elif 'bs' == self._hpo_method:
from flaml import BlendSearch as SearchAlgo from flaml import BlendSearch as SearchAlgo
elif 'cfocat' == self._hpo_method:
from flaml.searcher.cfo_cat import CFOCat as SearchAlgo
elif 'random' == self._hpo_method: elif 'random' == self._hpo_method:
from ray.tune.suggest import BasicVariantGenerator as SearchAlgo from ray.tune.suggest import BasicVariantGenerator as SearchAlgo
from ray.tune.sample import Domain as RayDomain from ray.tune.sample import Domain
from .tune.sample import Domain
else: else:
raise NotImplementedError( raise NotImplementedError(
f"hpo_method={self._hpo_method} is not recognized. " f"hpo_method={self._hpo_method} is not recognized. "
"'cfo' and 'bs' are supported.") "'cfo' and 'bs' are supported.")
space = self.search_space
if self._hpo_method == 'random': if self._hpo_method == 'random':
# Any point in points_to_evaluate must consist of hyperparamters # Any point in points_to_evaluate must consist of hyperparamters
# that are tunable, which can be identified by checking whether # that are tunable, which can be identified by checking whether
@ -1434,19 +1421,19 @@ class AutoML:
# the 'Domain' class from flaml or ray.tune # the 'Domain' class from flaml or ray.tune
points_to_evaluate = self.points_to_evaluate.copy() points_to_evaluate = self.points_to_evaluate.copy()
to_del = [] to_del = []
for k, v in self.search_space.items(): for k, v in space.items():
if not (isinstance(v, Domain) or isinstance(v, RayDomain)): if not isinstance(v, Domain):
to_del.append(k) to_del.append(k)
for k in to_del: for k in to_del:
for p in points_to_evaluate: for p in points_to_evaluate:
del p[k] if k in p:
del p[k]
search_alg = SearchAlgo(max_concurrent=self._n_concurrent_trials, search_alg = SearchAlgo(
points_to_evaluate=points_to_evaluate) max_concurrent=self._n_concurrent_trials,
points_to_evaluate=points_to_evaluate)
else: else:
search_alg = SearchAlgo( search_alg = SearchAlgo(
metric='val_loss', metric='val_loss', space=space,
space=self.search_space,
low_cost_partial_config=self.low_cost_partial_config, low_cost_partial_config=self.low_cost_partial_config,
points_to_evaluate=self.points_to_evaluate, points_to_evaluate=self.points_to_evaluate,
cat_hp_cost=self.cat_hp_cost, cat_hp_cost=self.cat_hp_cost,
@ -1463,7 +1450,7 @@ class AutoML:
resources_per_trial = { resources_per_trial = {
"cpu": self._state.n_jobs} if self._state.n_jobs > 1 else None "cpu": self._state.n_jobs} if self._state.n_jobs > 1 else None
analysis = ray.tune.run( analysis = ray.tune.run(
self.trainable, search_alg=search_alg, config=self.search_space, self.trainable, search_alg=search_alg, config=space,
metric='val_loss', mode='min', resources_per_trial=resources_per_trial, metric='val_loss', mode='min', resources_per_trial=resources_per_trial,
time_budget_s=self._state.time_budget, num_samples=self._max_iter, time_budget_s=self._state.time_budget, num_samples=self._max_iter,
verbose=self.verbose) verbose=self.verbose)
@ -1521,6 +1508,7 @@ class AutoML:
from flaml import CFO as SearchAlgo from flaml import CFO as SearchAlgo
elif 'optuna' == self._hpo_method: elif 'optuna' == self._hpo_method:
try: try:
from ray import __version__ as ray_version
assert ray_version >= '1.0.0' assert ray_version >= '1.0.0'
from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo from ray.tune.suggest.optuna import OptunaSearch as SearchAlgo
except (ImportError, AssertionError): except (ImportError, AssertionError):
@ -1600,7 +1588,9 @@ class AutoML:
else: else:
algo = SearchAlgo( algo = SearchAlgo(
metric='val_loss', mode='min', space=search_space, metric='val_loss', mode='min', space=search_space,
points_to_evaluate=points_to_evaluate, points_to_evaluate=points_to_evaluate
if len(search_state.init_config) == len(
search_space) else None,
) )
search_state.search_alg = ConcurrencyLimiter(algo, search_state.search_alg = ConcurrencyLimiter(algo,
max_concurrent=1) max_concurrent=1)
@ -1710,13 +1700,16 @@ class AutoML:
search_state.best_loss, search_state.best_loss,
self._best_estimator, self._best_estimator,
self._state.best_loss)) self._state.best_loss))
if all(state.search_alg and state.search_alg.searcher.is_ls_ever_converged if self._hpo_method in ('cfo', 'bs') and all(
for state in self._search_states.values()) and ( state.search_alg and state.search_alg.searcher.is_ls_ever_converged
self._state.time_from_start for state in self._search_states.values()) and (
> self._warn_threshold * self._time_taken_best_iter): self._state.time_from_start
logger.warn("All estimator hyperparameters local search has converged at least once, " > self._warn_threshold * self._time_taken_best_iter):
f"and the total search time exceeds {self._warn_threshold} times the time taken " logger.warning(
"to find the best model.") "All estimator hyperparameters local search has "
"converged at least once, and the total search time "
f"exceeds {self._warn_threshold} times the time taken "
"to find the best model.")
self._warn_threshold *= 10 self._warn_threshold *= 10
else: else:
logger.info(f"no enough budget for learner {estimator}") logger.info(f"no enough budget for learner {estimator}")
@ -1766,6 +1759,8 @@ class AutoML:
self._best_estimator = None self._best_estimator = None
self._retrained_config = {} self._retrained_config = {}
self._warn_threshold = 10 self._warn_threshold = 10
self._selected = None
self.modelcount = 0
if self._n_concurrent_trials == 1: if self._n_concurrent_trials == 1:
self._search_sequential() self._search_sequential()
@ -1782,7 +1777,7 @@ class AutoML:
if self._trained_estimator: if self._trained_estimator:
logger.info(f'selected model: {self._trained_estimator.model}') logger.info(f'selected model: {self._trained_estimator.model}')
if self._ensemble and self._state.task in ( if self._ensemble and self._state.task in (
'binary:logistic', 'multi:softmax', 'regression', 'binary', 'multi', 'regression',
): ):
search_states = list(x for x in self._search_states.items() search_states = list(x for x in self._search_states.items()
if x[1].trained_estimator) if x[1].trained_estimator)
@ -1795,7 +1790,7 @@ class AutoML:
logger.info(estimators) logger.info(estimators)
if len(estimators) <= 1: if len(estimators) <= 1:
return return
if self._state.task in ('binary:logistic', 'multi:softmax'): if self._state.task in ('binary', 'multi'):
from sklearn.ensemble import StackingClassifier as Stacker from sklearn.ensemble import StackingClassifier as Stacker
else: else:
from sklearn.ensemble import StackingRegressor as Stacker from sklearn.ensemble import StackingRegressor as Stacker
@ -1838,9 +1833,6 @@ class AutoML:
else: else:
logger.info( logger.info(
"not retraining because the time budget is too small.") "not retraining because the time budget is too small.")
else:
self._selected = self._trained_estimator = None
self.modelcount = 0
if self.model and mlflow is not None and mlflow.active_run(): if self.model and mlflow is not None and mlflow.active_run():
mlflow.sklearn.log_model(self.model, 'best_model') mlflow.sklearn.log_model(self.model, 'best_model')
@ -1886,8 +1878,7 @@ class AutoML:
speed = delta_loss / delta_time speed = delta_loss / delta_time
if speed: if speed:
estimated_cost = max(2 * gap / speed, estimated_cost) estimated_cost = max(2 * gap / speed, estimated_cost)
if estimated_cost == 0: estimated_cost == estimated_cost or 1e-10
estimated_cost = 1e-10
inv.append(1 / estimated_cost) inv.append(1 / estimated_cost)
else: else:
estimated_cost = self._eci[i] estimated_cost = self._eci[i]

View File

@ -261,7 +261,7 @@ class DataTransformer:
cat_columns, num_columns, datetime_columns cat_columns, num_columns, datetime_columns
self._drop = drop self._drop = drop
if task in ('binary:logistic', 'multi:softmax'): if task in ('binary', 'multi', 'classification'):
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
self.label_transformer = LabelEncoder() self.label_transformer = LabelEncoder()
y = self.label_transformer.fit_transform(y) y = self.label_transformer.fit_transform(y)

View File

@ -24,7 +24,7 @@ def get_estimator_class(task, estimator_name):
''' when adding a new learner, need to add an elif branch ''' ''' when adding a new learner, need to add an elif branch '''
if 'xgboost' == estimator_name: if 'xgboost' == estimator_name:
if 'regression' in task: if 'regression' == task:
estimator_class = XGBoostEstimator estimator_class = XGBoostEstimator
else: else:
estimator_class = XGBoostSklearnEstimator estimator_class = XGBoostSklearnEstimator
@ -179,7 +179,8 @@ def _eval_estimator(config, estimator, X_train, y_train, X_test, y_test, weight_
fit_kwargs.get('groups')) fit_kwargs.get('groups'))
if isinstance(metric_for_logging, dict): if isinstance(metric_for_logging, dict):
pred_time = metric_for_logging.get('pred_time', 0) pred_time = metric_for_logging.get('pred_time', 0)
test_pred_y = None # eval_metric may return test_pred_y but not necessarily. Setting None for now. test_pred_y = None
# eval_metric may return test_pred_y but not necessarily. Setting None for now.
return test_loss, metric_for_logging, pred_time, test_pred_y return test_loss, metric_for_logging, pred_time, test_pred_y
@ -193,10 +194,10 @@ def get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_te
# fit_kwargs['X_val'] = X_test # fit_kwargs['X_val'] = X_test
# fit_kwargs['y_val'] = y_test # fit_kwargs['y_val'] = y_test
estimator.fit(X_train, y_train, budget, **fit_kwargs) estimator.fit(X_train, y_train, budget, **fit_kwargs)
test_loss, metric_for_logging, pred_time, _ = _eval_estimator(config, estimator, test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
X_train, y_train, X_test, y_test, config, estimator, X_train, y_train, X_test, y_test,
weight_test, groups_test, eval_metric, obj, weight_test, groups_test, eval_metric, obj,
labels, log_training_metric, fit_kwargs) labels, log_training_metric, fit_kwargs)
train_time = time.time() - start train_time = time.time() - start
return test_loss, metric_for_logging, train_time, pred_time return test_loss, metric_for_logging, train_time, pred_time
@ -212,7 +213,7 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
valid_fold_num = total_fold_num = 0 valid_fold_num = total_fold_num = 0
n = kf.get_n_splits() n = kf.get_n_splits()
X_train_split, y_train_split = X_train_all, y_train_all X_train_split, y_train_split = X_train_all, y_train_all
if task == 'binary:logistics' or task == 'multi:softmax': if task in ('binary', 'multi'):
labels = np.unique(y_train_all) labels = np.unique(y_train_all)
else: else:
labels = None labels = None
@ -346,9 +347,9 @@ def train_estimator(
def get_classification_objective(num_labels: int) -> str: def get_classification_objective(num_labels: int) -> str:
if num_labels == 2: if num_labels == 2:
objective_name = 'binary:logistic' objective_name = 'binary'
else: else:
objective_name = 'multi:softmax' objective_name = 'multi'
return objective_name return objective_name

View File

@ -3,7 +3,6 @@
* Licensed under the MIT License. * Licensed under the MIT License.
''' '''
import warnings
import numpy as np import numpy as np
import xgboost as xgb import xgboost as xgb
import time import time
@ -31,12 +30,12 @@ class BaseEstimator:
for both regression and classification for both regression and classification
''' '''
def __init__(self, task='binary:logistic', **params): def __init__(self, task='binary', **params):
'''Constructor '''Constructor
Args: Args:
task: A string of the task type, one of task: A string of the task type, one of
'binary:logistic', 'multi:softmax', 'regression' 'binary', 'multi', 'regression', 'rank', 'forecast'
n_jobs: An integer of the number of parallel threads n_jobs: An integer of the number of parallel threads
params: A dictionary of the hyperparameter names and values params: A dictionary of the hyperparameter names and values
''' '''
@ -48,7 +47,7 @@ class BaseEstimator:
del self.params['_estimator_type'] del self.params['_estimator_type']
else: else:
self._estimator_type = "classifier" if task in ( self._estimator_type = "classifier" if task in (
'binary:logistic', 'multi:softmax') else "regressor" 'binary', 'multi') else "regressor"
def get_params(self, deep=False): def get_params(self, deep=False):
params = self.params.copy() params = self.params.copy()
@ -145,11 +144,10 @@ class BaseEstimator:
Each element at (i,j) is the probability for instance i to be in Each element at (i,j) is the probability for instance i to be in
class j class j
''' '''
if 'regression' in self._task: assert self._task in ('binary', 'multi'), (
raise ValueError('Regression tasks do not support predict_prob') 'predict_prob() only for classification task.')
else: X_test = self._preprocess(X_test)
X_test = self._preprocess(X_test) return self._model.predict_proba(X_test)
return self._model.predict_proba(X_test)
def cleanup(self): def cleanup(self):
pass pass
@ -193,7 +191,7 @@ class BaseEstimator:
class SKLearnEstimator(BaseEstimator): class SKLearnEstimator(BaseEstimator):
def __init__(self, task='binary:logistic', **params): def __init__(self, task='binary', **params):
super().__init__(task, **params) super().__init__(task, **params)
def _preprocess(self, X): def _preprocess(self, X):
@ -264,21 +262,18 @@ class LGBMEstimator(BaseEstimator):
n_estimators = int(round(config['n_estimators'])) n_estimators = int(round(config['n_estimators']))
return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8 return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
def __init__(self, task='binary:logistic', log_max_bin=8, **params): def __init__(self, task='binary', log_max_bin=8, **params):
super().__init__(task, **params) super().__init__(task, **params)
if "objective" not in self.params: if "objective" not in self.params:
# Default: regression for LGBMRegressor, # Default: regression for LGBMRegressor,
# binary or multiclass for LGBMClassifier # binary or multiclass for LGBMClassifier
if 'regression' == task: objective = 'regression'
objective = 'regression' if 'binary' in task:
elif 'binary' in task:
objective = 'binary' objective = 'binary'
elif 'multi' in task: elif 'multi' in task:
objective = 'multiclass' objective = 'multiclass'
elif 'rank' == task: elif 'rank' == task:
objective = 'lambdarank' objective = 'lambdarank'
else:
objective = 'regression'
self.params["objective"] = objective self.params["objective"] = objective
if "n_estimators" in self.params: if "n_estimators" in self.params:
self.params["n_estimators"] = int(round(self.params["n_estimators"])) self.params["n_estimators"] = int(round(self.params["n_estimators"]))
@ -477,7 +472,7 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
return XGBoostEstimator.cost_relative2lgbm() return XGBoostEstimator.cost_relative2lgbm()
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, self, task='binary', n_jobs=1,
n_estimators=4, max_leaves=4, subsample=1.0, n_estimators=4, max_leaves=4, subsample=1.0,
min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0, min_child_weight=1, learning_rate=0.1, reg_lambda=1.0, reg_alpha=0.0,
colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist', colsample_bylevel=1.0, colsample_bytree=1.0, tree_method='hist',
@ -506,11 +501,10 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
'use_label_encoder': params.get('use_label_encoder', False), 'use_label_encoder': params.get('use_label_encoder', False),
}) })
if 'regression' == task: self.estimator_class = xgb.XGBRegressor
self.estimator_class = xgb.XGBRegressor if 'rank' == task:
elif 'rank' == task:
self.estimator_class = xgb.XGBRanker self.estimator_class = xgb.XGBRanker
else: elif task in ('binary', 'multi'):
self.estimator_class = xgb.XGBClassifier self.estimator_class = xgb.XGBClassifier
self._time_per_iter = None self._time_per_iter = None
self._train_size = 0 self._train_size = 0
@ -543,7 +537,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
'low_cost_init_value': 4, 'low_cost_init_value': 4,
}, },
} }
if task != 'regression': if task in ('binary', 'multi'):
space['criterion'] = { space['criterion'] = {
'domain': tune.choice(['gini', 'entropy']), 'domain': tune.choice(['gini', 'entropy']),
# 'init_value': 'gini', # 'init_value': 'gini',
@ -555,7 +549,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
return 2.0 return 2.0
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, self, task='binary', n_jobs=1,
n_estimators=4, max_features=1.0, criterion='gini', max_leaves=4, n_estimators=4, max_features=1.0, criterion='gini', max_leaves=4,
**params **params
): ):
@ -569,9 +563,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
'max_features': float(max_features), 'max_features': float(max_features),
"max_leaf_nodes": params.get('max_leaf_nodes', int(round(max_leaves))), "max_leaf_nodes": params.get('max_leaf_nodes', int(round(max_leaves))),
}) })
if 'regression' in task: self.estimator_class = RandomForestRegressor
self.estimator_class = RandomForestRegressor if task in ('binary', 'multi'):
else:
self.estimator_class = RandomForestClassifier self.estimator_class = RandomForestClassifier
self.params['criterion'] = criterion self.params['criterion'] = criterion
@ -586,7 +579,7 @@ class ExtraTreeEstimator(RandomForestEstimator):
def cost_relative2lgbm(cls): def cost_relative2lgbm(cls):
return 1.9 return 1.9
def __init__(self, task='binary:logistic', **params): def __init__(self, task='binary', **params):
super().__init__(task, **params) super().__init__(task, **params)
if 'regression' in task: if 'regression' in task:
self.estimator_class = ExtraTreesRegressor self.estimator_class = ExtraTreesRegressor
@ -610,7 +603,7 @@ class LRL1Classifier(SKLearnEstimator):
return 160 return 160
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0, self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
**params **params
): ):
super().__init__(task, **params) super().__init__(task, **params)
@ -621,11 +614,9 @@ class LRL1Classifier(SKLearnEstimator):
'solver': params.get("solver", 'saga'), 'solver': params.get("solver", 'saga'),
'n_jobs': n_jobs, 'n_jobs': n_jobs,
}) })
if 'regression' in task: assert task in ('binary', 'multi'), (
self.estimator_class = None 'LogisticRegression for classification task only')
raise NotImplementedError('LR does not support regression task') self.estimator_class = LogisticRegression
else:
self.estimator_class = LogisticRegression
class LRL2Classifier(SKLearnEstimator): class LRL2Classifier(SKLearnEstimator):
@ -639,7 +630,7 @@ class LRL2Classifier(SKLearnEstimator):
return 25 return 25
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, tol=0.0001, C=1.0, self, task='binary', n_jobs=1, tol=0.0001, C=1.0,
**params **params
): ):
super().__init__(task, **params) super().__init__(task, **params)
@ -650,11 +641,9 @@ class LRL2Classifier(SKLearnEstimator):
'solver': params.get("solver", 'lbfgs'), 'solver': params.get("solver", 'lbfgs'),
'n_jobs': n_jobs, 'n_jobs': n_jobs,
}) })
if 'regression' in task: assert task in ('binary', 'multi'), (
self.estimator_class = None 'LogisticRegression for classification task only')
raise NotImplementedError('LR does not support regression task') self.estimator_class = LogisticRegression
else:
self.estimator_class = LogisticRegression
class CatBoostEstimator(BaseEstimator): class CatBoostEstimator(BaseEstimator):
@ -711,7 +700,7 @@ class CatBoostEstimator(BaseEstimator):
return X return X
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, self, task='binary', n_jobs=1,
n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params n_estimators=8192, learning_rate=0.1, early_stopping_rounds=4, **params
): ):
super().__init__(task, **params) super().__init__(task, **params)
@ -723,10 +712,9 @@ class CatBoostEstimator(BaseEstimator):
'verbose': params.get('verbose', False), 'verbose': params.get('verbose', False),
'random_seed': params.get("random_seed", 10242048), 'random_seed': params.get("random_seed", 10242048),
}) })
if 'regression' in task: from catboost import CatBoostRegressor
from catboost import CatBoostRegressor self.estimator_class = CatBoostRegressor
self.estimator_class = CatBoostRegressor if task in ('binary', 'multi'):
else:
from catboost import CatBoostClassifier from catboost import CatBoostClassifier
self.estimator_class = CatBoostClassifier self.estimator_class = CatBoostClassifier
@ -831,7 +819,7 @@ class KNeighborsEstimator(BaseEstimator):
return 30 return 30
def __init__( def __init__(
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params self, task='binary', n_jobs=1, n_neighbors=5, **params
): ):
super().__init__(task, **params) super().__init__(task, **params)
self.params.update({ self.params.update({
@ -839,10 +827,9 @@ class KNeighborsEstimator(BaseEstimator):
'weights': params.get('weights', 'distance'), 'weights': params.get('weights', 'distance'),
'n_jobs': n_jobs, 'n_jobs': n_jobs,
}) })
if 'regression' in task: from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsRegressor self.estimator_class = KNeighborsRegressor
self.estimator_class = KNeighborsRegressor if task in ('binary', 'multi'):
else:
from sklearn.neighbors import KNeighborsClassifier from sklearn.neighbors import KNeighborsClassifier
self.estimator_class = KNeighborsClassifier self.estimator_class = KNeighborsClassifier
@ -920,7 +907,7 @@ class FBProphet(BaseEstimator):
forecast = self._model.predict(X_test) forecast = self._model.predict(X_test)
return forecast['yhat'] return forecast['yhat']
else: else:
warnings.warn( logger.warning(
"Estimator is not fit yet. Please run fit() before predict().") "Estimator is not fit yet. Please run fit() before predict().")
return np.ones(X_test.shape[0]) return np.ones(X_test.shape[0])
@ -954,8 +941,9 @@ class ARIMA(FBProphet):
return train_df return train_df
def fit(self, X_train, y_train, budget=None, **kwargs): def fit(self, X_train, y_train, budget=None, **kwargs):
from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator import warnings
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
current_time = time.time() current_time = time.time()
train_df = self._join(X_train, y_train) train_df = self._join(X_train, y_train)
model = ARIMA_estimator( model = ARIMA_estimator(

View File

@ -29,12 +29,11 @@ class AutoTransformers:
.. code-block:: python .. code-block:: python
autohf = AutoTransformers() autohf = AutoTransformers()
autohf_settings = {"resources_per_trial": {"cpu": 1}, autohf_settings = {
"num_samples": -1, "resources_per_trial": {"cpu": 1, "gpu": 1},
"time_budget": 100000, "num_samples": -1,
"ckpt_per_epoch": 1, "time_budget": 60,
"fp16": False, }
}
validation_metric, analysis = autohf.fit(**autohf_settings) validation_metric, analysis = autohf.fit(**autohf_settings)
@ -45,10 +44,11 @@ class AutoTransformers:
search_space = {} search_space = {}
if mode == "grid": if mode == "grid":
# TODO add test
for each_hp in config_json.keys(): for each_hp in config_json.keys():
this_config = config_json[each_hp] this_config = config_json[each_hp]
assert isinstance(this_config, dict) or isinstance(this_config, list), \ assert isinstance(this_config, dict) or isinstance(this_config, list), \
"config of " + each_hp + " must be dict or list" "config of " + each_hp + " must be dict or list for grid search"
search_space[each_hp] = ray.tune.grid_search(this_config) search_space[each_hp] = ray.tune.grid_search(this_config)
else: else:
for each_hp in config_json.keys(): for each_hp in config_json.keys():
@ -85,10 +85,6 @@ class AutoTransformers:
search_space_hpo_json, search_space_hpo_json,
mode=self.jobid_config.mod) mode=self.jobid_config.mod)
@staticmethod
def _wrapper(func, *args): # with star
return func(*args)
@staticmethod @staticmethod
def _get_split_name(data_raw, fold_name=None): def _get_split_name(data_raw, fold_name=None):
if fold_name: if fold_name:
@ -179,7 +175,7 @@ class AutoTransformers:
data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat), data_raw = load_dataset(JobID.dataset_list_to_str(self.jobid_config.dat),
self.jobid_config.subdat) self.jobid_config.subdat)
else: else:
data_raw = AutoTransformers._wrapper(load_dataset, *self.jobid_config.dat) data_raw = load_dataset(*self.jobid_config.dat)
self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name( self._train_name, self._dev_name, self._test_name = AutoTransformers._get_split_name(
data_raw, data_raw,
@ -349,6 +345,7 @@ class AutoTransformers:
return training_args_config, per_model_config return training_args_config, per_model_config
def _objective(self, config, reporter, checkpoint_dir=None): def _objective(self, config, reporter, checkpoint_dir=None):
# TODO add test
from transformers.trainer_utils import set_seed from transformers.trainer_utils import set_seed
self._set_transformers_verbosity(self._transformers_verbose) self._set_transformers_verbosity(self._transformers_verbose)
@ -827,6 +824,7 @@ class AutoTransformers:
test_trainer = TrainerForAutoTransformers(best_model, training_args) test_trainer = TrainerForAutoTransformers(best_model, training_args)
if self.jobid_config.spt == "ori": if self.jobid_config.spt == "ori":
# TODO add test
if "label" in self.test_dataset.features.keys(): if "label" in self.test_dataset.features.keys():
self.test_dataset.remove_columns_("label") self.test_dataset.remove_columns_("label")
print("Cleaning the existing label column from test data") print("Cleaning the existing label column from test data")

View File

@ -1,2 +1,2 @@
from .trial_scheduler import TrialScheduler, FIFOScheduler from .trial_scheduler import TrialScheduler
from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler from .online_scheduler import OnlineScheduler, OnlineSuccessiveDoublingScheduler, ChaChaScheduler

View File

@ -1,12 +1,12 @@
import numpy as np import numpy as np
import logging import logging
from typing import Optional, Dict from typing import Dict
from flaml.scheduler import FIFOScheduler, TrialScheduler from flaml.scheduler import TrialScheduler
from flaml.tune import Trial from flaml.tune import Trial
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class OnlineScheduler(FIFOScheduler): class OnlineScheduler(TrialScheduler):
"""Implementation of the OnlineFIFOSchedulers. """Implementation of the OnlineFIFOSchedulers.
Methods: Methods:

View File

@ -17,10 +17,8 @@ This source file is adapted here because ray does not fully support Windows.
Copyright (c) Microsoft Corporation. Copyright (c) Microsoft Corporation.
''' '''
from typing import Dict, Optional
from flaml.tune import trial_runner from flaml.tune import trial_runner
from flaml.tune.result import DEFAULT_METRIC
from flaml.tune.trial import Trial from flaml.tune.trial import Trial
@ -31,127 +29,10 @@ class TrialScheduler:
PAUSE = "PAUSE" #: Status for pausing trial execution PAUSE = "PAUSE" #: Status for pausing trial execution
STOP = "STOP" #: Status for stopping trial execution STOP = "STOP" #: Status for stopping trial execution
_metric = None
@property
def metric(self):
return self._metric
def set_search_properties(self, metric: Optional[str],
mode: Optional[str]) -> bool:
"""Pass search properties to scheduler.
This method acts as an alternative to instantiating schedulers
that react to metrics with their own `metric` and `mode` parameters.
Args:
metric (str): Metric to optimize
mode (str): One of ["min", "max"]. Direction to optimize.
"""
if self._metric and metric:
return False
if metric:
self._metric = metric
if self._metric is None:
# Per default, use anonymous metric
self._metric = DEFAULT_METRIC
return True
def on_trial_add(self, trial_runner: "trial_runner.TrialRunner", def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial): trial: Trial):
"""Called when a new trial is added to the trial runner."""
raise NotImplementedError
def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial):
"""Notification for the error of trial.
This will only be called when the trial is in the RUNNING state."""
raise NotImplementedError
def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial, result: Dict) -> str:
"""Called on each intermediate result returned by a trial.
At this point, the trial scheduler can make a decision by returning
one of CONTINUE, PAUSE, and STOP. This will only be called when the
trial is in the RUNNING state."""
raise NotImplementedError
def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial, result: Dict):
"""Notification for the completion of trial.
This will only be called when the trial is in the RUNNING state and
either completes naturally or by manual termination."""
raise NotImplementedError
def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial):
"""Called to remove trial.
This is called when the trial is in PAUSED or PENDING state. Otherwise,
call `on_trial_complete`."""
raise NotImplementedError
def choose_trial_to_run(
self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
"""Called to choose a new trial to run.
This should return one of the trials in trial_runner that is in
the PENDING or PAUSED state. This function must be idempotent.
If no trial is ready, return None."""
raise NotImplementedError
def debug_string(self) -> str:
"""Returns a human readable message for printing to the console."""
raise NotImplementedError
def save(self, checkpoint_path: str):
"""Save trial scheduler to a checkpoint"""
raise NotImplementedError
def restore(self, checkpoint_path: str):
"""Restore trial scheduler from checkpoint."""
raise NotImplementedError
class FIFOScheduler(TrialScheduler):
"""Simple scheduler that just runs trials in submission order."""
def on_trial_add(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial):
pass
def on_trial_error(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial):
pass
def on_trial_result(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial, result: Dict) -> str:
return TrialScheduler.CONTINUE
def on_trial_complete(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial, result: Dict):
pass pass
def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner", def on_trial_remove(self, trial_runner: "trial_runner.TrialRunner",
trial: Trial): trial: Trial):
pass pass
def choose_trial_to_run(
self, trial_runner: "trial_runner.TrialRunner") -> Optional[Trial]:
for trial in trial_runner.get_trials():
if (trial.status == Trial.PENDING
and trial_runner.has_resources_for_trial(trial)):
return trial
for trial in trial_runner.get_trials():
if (trial.status == Trial.PAUSED
and trial_runner.has_resources_for_trial(trial)):
return trial
return None
def debug_string(self) -> str:
return "Using FIFO scheduling algorithm."

View File

@ -14,14 +14,14 @@ try:
assert ray_version >= '1.0.0' assert ray_version >= '1.0.0'
from ray.tune.suggest import Searcher from ray.tune.suggest import Searcher
from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
from ray.tune.utils.util import unflatten_dict
except (ImportError, AssertionError): except (ImportError, AssertionError):
from .suggestion import Searcher from .suggestion import Searcher
from .suggestion import OptunaSearch as GlobalSearch from .suggestion import OptunaSearch as GlobalSearch
from ..tune.trial import unflatten_dict from ..tune.trial import unflatten_dict, flatten_dict
from .search_thread import SearchThread from .search_thread import SearchThread
from .flow2 import FLOW2 from .flow2 import FLOW2
from ..tune.space import add_cost_to_space, indexof, normalize, define_by_run_func from ..tune.space import (
add_cost_to_space, indexof, normalize, define_by_run_func)
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -40,9 +40,10 @@ class BlendSearch(Searcher):
metric: Optional[str] = None, metric: Optional[str] = None,
mode: Optional[str] = None, mode: Optional[str] = None,
space: Optional[dict] = None, space: Optional[dict] = None,
points_to_evaluate: Optional[List[dict]] = None,
low_cost_partial_config: Optional[dict] = None, low_cost_partial_config: Optional[dict] = None,
cat_hp_cost: Optional[dict] = None, cat_hp_cost: Optional[dict] = None,
points_to_evaluate: Optional[List[dict]] = None,
evaluated_rewards: Optional[List] = None,
prune_attr: Optional[str] = None, prune_attr: Optional[str] = None,
min_resource: Optional[float] = None, min_resource: Optional[float] = None,
max_resource: Optional[float] = None, max_resource: Optional[float] = None,
@ -61,7 +62,6 @@ class BlendSearch(Searcher):
mode: A string in ['min', 'max'] to specify the objective as mode: A string in ['min', 'max'] to specify the objective as
minimization or maximization. minimization or maximization.
space: A dictionary to specify the search space. space: A dictionary to specify the search space.
points_to_evaluate: Initial parameter suggestions to be run first.
low_cost_partial_config: A dictionary from a subset of low_cost_partial_config: A dictionary from a subset of
controlled dimensions to the initial low-cost values. controlled dimensions to the initial low-cost values.
e.g., e.g.,
@ -80,6 +80,13 @@ class BlendSearch(Searcher):
i.e., the relative cost of the i.e., the relative cost of the
three choices of 'tree_method' is 1, 1 and 2 respectively. three choices of 'tree_method' is 1, 1 and 2 respectively.
points_to_evaluate: Initial parameter suggestions to be run first.
evaluated_rewards (list): If you have previously evaluated the
parameters passed in as points_to_evaluate you can avoid
re-running those trials by passing in the reward attributes
as a list so the optimiser can be told the results without
needing to re-compute the trial. Must be the same length as
points_to_evaluate.
prune_attr: A string of the attribute used for pruning. prune_attr: A string of the attribute used for pruning.
Not necessarily in space. Not necessarily in space.
When prune_attr is in space, it is a hyperparameter, e.g., When prune_attr is in space, it is a hyperparameter, e.g.,
@ -122,7 +129,20 @@ class BlendSearch(Searcher):
"consider providing low-cost values for cost-related hps via " "consider providing low-cost values for cost-related hps via "
"'low_cost_partial_config'." "'low_cost_partial_config'."
) )
self._points_to_evaluate = points_to_evaluate or [] if evaluated_rewards and mode:
self._points_to_evaluate = []
self._evaluated_rewards = []
best = max(evaluated_rewards) if mode == 'max' else min(
evaluated_rewards)
# only keep the best points as start points
for i, r in enumerate(evaluated_rewards):
if r == best:
p = points_to_evaluate[i]
self._points_to_evaluate.append(p)
self._evaluated_rewards.append(r)
else:
self._points_to_evaluate = points_to_evaluate or []
self._evaluated_rewards = evaluated_rewards or []
self._config_constraints = config_constraints self._config_constraints = config_constraints
self._metric_constraints = metric_constraints self._metric_constraints = metric_constraints
if self._metric_constraints: if self._metric_constraints:
@ -131,40 +151,45 @@ class BlendSearch(Searcher):
self._cat_hp_cost = cat_hp_cost or {} self._cat_hp_cost = cat_hp_cost or {}
if space: if space:
add_cost_to_space(space, init_config, self._cat_hp_cost) add_cost_to_space(space, init_config, self._cat_hp_cost)
self._ls = self.LocalSearch(
init_config, metric, mode, space, prune_attr,
min_resource, max_resource, reduction_factor, self.cost_attr, seed)
if global_search_alg is not None: if global_search_alg is not None:
self._gs = global_search_alg self._gs = global_search_alg
elif getattr(self, '__name__', None) != 'CFO': elif getattr(self, '__name__', None) != 'CFO':
from functools import partial if space and self._ls.hierarchical:
gs_space = partial(define_by_run_func, space=space) from functools import partial
gs_space = partial(define_by_run_func, space=space)
evaluated_rewards = None # not supproted by define-by-run
else:
gs_space = space
gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32)
if experimental:
import optuna as ot
sampler = ot.samplers.TPESampler(
seed=seed, multivariate=True, group=True)
else:
sampler = None
try: try:
gs_seed = seed - 10 if (seed - 10) >= 0 else seed - 11 + (1 << 32) self._gs = GlobalSearch(
if experimental: space=gs_space, metric=metric, mode=mode, seed=gs_seed,
import optuna as ot sampler=sampler, points_to_evaluate=points_to_evaluate,
sampler = ot.samplers.TPESampler( evaluated_rewards=evaluated_rewards)
seed=seed, multivariate=True, group=True) except ValueError:
else:
sampler = None
self._gs = GlobalSearch( self._gs = GlobalSearch(
space=gs_space, metric=metric, mode=mode, seed=gs_seed, space=gs_space, metric=metric, mode=mode, seed=gs_seed,
sampler=sampler) sampler=sampler)
except TypeError:
self._gs = GlobalSearch(space=gs_space, metric=metric, mode=mode)
self._gs.space = space self._gs.space = space
else: else:
self._gs = None self._gs = None
self._experimental = experimental self._experimental = experimental
if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len( if getattr(self, '__name__', None) == 'CFO' and points_to_evaluate and len(
points_to_evaluate) > 1: self._points_to_evaluate) > 1:
# use the best config in points_to_evaluate as the start point # use the best config in points_to_evaluate as the start point
self._candidate_start_points = {} self._candidate_start_points = {}
self._started_from_low_cost = not low_cost_partial_config self._started_from_low_cost = not low_cost_partial_config
else: else:
self._candidate_start_points = None self._candidate_start_points = None
self._ls = self.LocalSearch(
init_config, metric, mode, space, prune_attr,
min_resource, max_resource, reduction_factor, self.cost_attr, seed)
self._is_ls_ever_converged = False
self._subspace = {} # the subspace for each trial id
if space: if space:
self._init_search() self._init_search()
@ -187,6 +212,7 @@ class BlendSearch(Searcher):
if not self._ls.space: if not self._ls.space:
# the search space can be set only once # the search space can be set only once
if self._gs is not None: if self._gs is not None:
# define-by-run is not supported via set_search_properties
self._gs.set_search_properties(metric, mode, config) self._gs.set_search_properties(metric, mode, config)
self._gs.space = config self._gs.space = config
if config: if config:
@ -216,6 +242,8 @@ class BlendSearch(Searcher):
def _init_search(self): def _init_search(self):
'''initialize the search '''initialize the search
''' '''
self._is_ls_ever_converged = False
self._subspace = {} # the subspace for each trial id
self._metric_target = np.inf * self._ls.metric_op self._metric_target = np.inf * self._ls.metric_op
self._search_thread_pool = { self._search_thread_pool = {
# id: int -> thread: SearchThread # id: int -> thread: SearchThread
@ -239,6 +267,7 @@ class BlendSearch(Searcher):
else: else:
self._metric_constraint_satisfied = True self._metric_constraint_satisfied = True
self._metric_constraint_penalty = None self._metric_constraint_penalty = None
self.best_resource = self._ls.min_resource
def save(self, checkpoint_path: str): def save(self, checkpoint_path: str):
''' save states to a checkpoint path ''' save states to a checkpoint path
@ -295,10 +324,11 @@ class BlendSearch(Searcher):
trial_id, result, error) trial_id, result, error)
del self._trial_proposed_by[trial_id] del self._trial_proposed_by[trial_id]
if result: if result:
config = {} config = result.get('config', {})
for key, value in result.items(): if not config:
if key.startswith('config/'): for key, value in result.items():
config[key[7:]] = value if key.startswith('config/'):
config[key[7:]] = value
signature = self._ls.config_signature( signature = self._ls.config_signature(
config, self._subspace.get(trial_id, {})) config, self._subspace.get(trial_id, {}))
if error: # remove from result cache if error: # remove from result cache
@ -309,17 +339,22 @@ class BlendSearch(Searcher):
objective = result[self._ls.metric] objective = result[self._ls.metric]
if (objective - self._metric_target) * self._ls.metric_op < 0: if (objective - self._metric_target) * self._ls.metric_op < 0:
self._metric_target = objective self._metric_target = objective
if self._ls.resource:
self._best_resource = config[self._ls.prune_attr]
if thread_id: if thread_id:
if not self._metric_constraint_satisfied: if not self._metric_constraint_satisfied:
# no point has been found to satisfy metric constraint # no point has been found to satisfy metric constraint
self._expand_admissible_region( self._expand_admissible_region(
self._ls_bound_min, self._ls_bound_max, self._ls_bound_min, self._ls_bound_max,
self._subspace.get(trial_id, self._ls.space)) self._subspace.get(trial_id, self._ls.space))
# if self._gs is not None and self._experimental: if self._gs is not None and self._experimental and (
# # TODO: recover when supported not self._ls.hierarchical):
# converted = convert_key(config, self._gs.space) self._gs.add_evaluated_point(
# logger.info(converted) flatten_dict(config), objective)
# self._gs.add_evaluated_point(converted, objective) # TODO: recover when supported
# converted = convert_key(config, self._gs.space)
# logger.info(converted)
# self._gs.add_evaluated_point(converted, objective)
elif metric_constraint_satisfied and self._create_condition( elif metric_constraint_satisfied and self._create_condition(
result): result):
# thread creator # thread creator
@ -496,10 +531,12 @@ class BlendSearch(Searcher):
''' '''
if self._init_used and not self._points_to_evaluate: if self._init_used and not self._points_to_evaluate:
choice, backup = self._select_thread() choice, backup = self._select_thread()
if choice < 0: # timeout # if choice < 0: # timeout
return None # return None
config = self._search_thread_pool[choice].suggest(trial_id) config = self._search_thread_pool[choice].suggest(trial_id)
if choice and config is None: if not choice and config is not None and self._ls.resource:
config[self._ls.prune_attr] = self.best_resource
elif choice and config is None:
# local search thread finishes # local search thread finishes
if self._search_thread_pool[choice].converged: if self._search_thread_pool[choice].converged:
self._expand_admissible_region( self._expand_admissible_region(
@ -544,9 +581,6 @@ class BlendSearch(Searcher):
self._trial_proposed_by[trial_id] = backup self._trial_proposed_by[trial_id] = backup
choice = backup choice = backup
if not choice: # global search if not choice: # global search
if self._ls._resource:
# TODO: min or median?
config[self._ls.prune_attr] = self._ls.min_resource
# temporarily relax admissible region for parallel proposals # temporarily relax admissible region for parallel proposals
self._update_admissible_region( self._update_admissible_region(
config, self._gs_admissible_min, self._gs_admissible_max, config, self._gs_admissible_min, self._gs_admissible_max,
@ -563,22 +597,35 @@ class BlendSearch(Searcher):
else: # use init config else: # use init config
if self._candidate_start_points is not None and self._points_to_evaluate: if self._candidate_start_points is not None and self._points_to_evaluate:
self._candidate_start_points[trial_id] = None self._candidate_start_points[trial_id] = None
init_config = self._points_to_evaluate.pop( reward = None
0) if self._points_to_evaluate else self._ls.init_config if self._points_to_evaluate:
init_config = self._points_to_evaluate.pop(0)
if self._evaluated_rewards:
reward = self._evaluated_rewards.pop(0)
else:
init_config = self._ls.init_config
config, space = self._ls.complete_config( config, space = self._ls.complete_config(
init_config, self._ls_bound_min, self._ls_bound_max) init_config, self._ls_bound_min, self._ls_bound_max)
config_signature = self._ls.config_signature(config, space) if reward is None:
result = self._result.get(config_signature) config_signature = self._ls.config_signature(config, space)
if result: # tried before result = self._result.get(config_signature)
return None if result: # tried before
elif result is None: # not tried before return None
self._result[config_signature] = {} elif result is None: # not tried before
else: # running but no result yet self._result[config_signature] = {}
return None else: # running but no result yet
return None
self._init_used = True self._init_used = True
self._trial_proposed_by[trial_id] = 0 self._trial_proposed_by[trial_id] = 0
self._search_thread_pool[0].running += 1 self._search_thread_pool[0].running += 1
self._subspace[trial_id] = space self._subspace[trial_id] = space
if reward is not None:
result = {
self._metric: reward, self.cost_attr: 1,
'config': config
}
self.on_trial_complete(trial_id, result)
return None
return config return config
def _should_skip(self, choice, trial_id, config, space) -> bool: def _should_skip(self, choice, trial_id, config, space) -> bool:
@ -694,79 +741,88 @@ except (ImportError, AssertionError):
try: try:
from nni.tuner import Tuner as NNITuner from nni.tuner import Tuner as NNITuner
from nni.utils import extract_scalar_reward from nni.utils import extract_scalar_reward
class BlendSearchTuner(BlendSearch, NNITuner):
'''Tuner class for NNI
'''
def receive_trial_result(self, parameter_id, parameters, value,
**kwargs):
'''
Receive trial's final result.
parameter_id: int
parameters: object created by 'generate_parameters()'
value: final metrics of the trial, including default metric
'''
result = {}
for key, value in parameters.items():
result['config/' + key] = value
reward = extract_scalar_reward(value)
result[self._metric] = reward
# if nni does not report training cost,
# using sequence as an approximation.
# if no sequence, using a constant 1
result[self.cost_attr] = value.get(self.cost_attr, value.get(
'sequence', 1))
self.on_trial_complete(str(parameter_id), result)
...
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
'''
Returns a set of trial (hyper-)parameters, as a serializable object
parameter_id: int
'''
return self.suggest(str(parameter_id))
...
def update_search_space(self, search_space):
'''
Tuners are advised to support updating search space at run-time.
If a tuner can only set search space once before generating first hyper-parameters,
it should explicitly document this behaviour.
search_space: JSON object created by experiment owner
'''
config = {}
for key, value in search_space.items():
v = value.get("_value")
_type = value['_type']
if _type == 'choice':
config[key] = choice(v)
elif _type == 'randint':
config[key] = randint(v[0], v[1] - 1)
elif _type == 'uniform':
config[key] = uniform(v[0], v[1])
elif _type == 'quniform':
config[key] = quniform(v[0], v[1], v[2])
elif _type == 'loguniform':
config[key] = loguniform(v[0], v[1])
elif _type == 'qloguniform':
config[key] = qloguniform(v[0], v[1], v[2])
elif _type == 'normal':
config[key] = randn(v[1], v[2])
elif _type == 'qnormal':
config[key] = qrandn(v[1], v[2], v[3])
else:
raise ValueError(
f'unsupported type in search_space {_type}')
self._ls.set_search_properties(None, None, config)
if self._gs is not None:
self._gs.set_search_properties(None, None, config)
self._init_search()
except ImportError: except ImportError:
class BlendSearchTuner(BlendSearch): class NNITuner:
pass pass
def extract_scalar_reward(x: Dict):
return x.get('reward')
class BlendSearchTuner(BlendSearch, NNITuner):
'''Tuner class for NNI
'''
def receive_trial_result(self, parameter_id, parameters, value,
**kwargs):
'''
Receive trial's final result.
parameter_id: int
parameters: object created by 'generate_parameters()'
value: final metrics of the trial, including default metric
'''
result = {}
for k, v in parameters.items():
result['config/' + k] = v
reward = extract_scalar_reward(value)
result[self._metric] = reward
# if nni does not report training cost,
# using sequence as an approximation.
# if no sequence, using a constant 1
result[self.cost_attr] = value.get(self.cost_attr, value.get(
'sequence', 1))
self.on_trial_complete(str(parameter_id), result)
...
def generate_parameters(self, parameter_id, **kwargs) -> Dict:
'''
Returns a set of trial (hyper-)parameters, as a serializable object
parameter_id: int
'''
return self.suggest(str(parameter_id))
...
def update_search_space(self, search_space):
'''
Tuners are advised to support updating search space at run-time.
If a tuner can only set search space once before generating first hyper-parameters,
it should explicitly document this behaviour.
search_space: JSON object created by experiment owner
'''
config = {}
for key, value in search_space.items():
v = value.get("_value")
_type = value['_type']
if _type == 'choice':
config[key] = choice(v)
elif _type == 'randint':
config[key] = randint(*v)
elif _type == 'uniform':
config[key] = uniform(*v)
elif _type == 'quniform':
config[key] = quniform(*v)
elif _type == 'loguniform':
config[key] = loguniform(*v)
elif _type == 'qloguniform':
config[key] = qloguniform(*v)
elif _type == 'normal':
config[key] = randn(*v)
elif _type == 'qnormal':
config[key] = qrandn(*v)
else:
raise ValueError(
f'unsupported type in search_space {_type}')
add_cost_to_space(config, {}, {})
self._ls = self.LocalSearch(
{}, self._ls.metric, self._mode, config, cost_attr=self.cost_attr,
seed=self._ls.seed)
if self._gs is not None:
self._gs = GlobalSearch(
space=config, metric=self._metric, mode=self._mode,
sampler=self._gs._sampler)
self._gs.space = config
self._init_search()
class CFO(BlendSearchTuner): class CFO(BlendSearchTuner):
''' class for CFO algorithm ''' class for CFO algorithm

View File

@ -15,8 +15,9 @@ try:
from ray.tune.utils.util import flatten_dict, unflatten_dict from ray.tune.utils.util import flatten_dict, unflatten_dict
except (ImportError, AssertionError): except (ImportError, AssertionError):
from .suggestion import Searcher from .suggestion import Searcher
from .variant_generator import generate_variants, flatten_dict, unflatten_dict from .variant_generator import generate_variants
from ..tune import sample from ..tune import sample
from ..tune.trial import flatten_dict, unflatten_dict
from ..tune.space import complete_config, denormalize, normalize from ..tune.space import complete_config, denormalize, normalize
@ -95,7 +96,7 @@ class FLOW2(Searcher):
self.space = space or {} self.space = space or {}
self._space = flatten_dict(self.space, prevent_delimiter=True) self._space = flatten_dict(self.space, prevent_delimiter=True)
self._random = np.random.RandomState(seed) self._random = np.random.RandomState(seed)
self._seed = seed self.seed = seed
self.init_config = init_config self.init_config = init_config
self.best_config = flatten_dict(init_config) self.best_config = flatten_dict(init_config)
self.prune_attr = prune_attr self.prune_attr = prune_attr
@ -142,7 +143,7 @@ class FLOW2(Searcher):
self._bounded_keys.append(key) self._bounded_keys.append(key)
if not hier: if not hier:
self._space_keys = sorted(self._tunable_keys) self._space_keys = sorted(self._tunable_keys)
self._hierarchical = hier self.hierarchical = hier
if (self.prune_attr and self.prune_attr not in self._space if (self.prune_attr and self.prune_attr not in self._space
and self.max_resource): and self.max_resource):
self.min_resource = self.min_resource or self._min_resource() self.min_resource = self.min_resource or self._min_resource()
@ -253,10 +254,10 @@ class FLOW2(Searcher):
init_config, self.metric, self.mode, init_config, self.metric, self.mode,
space, self.prune_attr, space, self.prune_attr,
self.min_resource, self.max_resource, self.min_resource, self.max_resource,
self.resource_multiple_factor, self.cost_attr, self._seed + 1) self.resource_multiple_factor, self.cost_attr, self.seed + 1)
flow2.best_obj = obj * self.metric_op # minimize internally flow2.best_obj = obj * self.metric_op # minimize internally
flow2.cost_incumbent = cost flow2.cost_incumbent = cost
self._seed += 1 self.seed += 1
return flow2 return flow2
def normalize(self, config, recursive=False) -> Dict: def normalize(self, config, recursive=False) -> Dict:
@ -502,7 +503,7 @@ class FLOW2(Searcher):
value_list = [] value_list = []
# self._space_keys doesn't contain keys with const values, # self._space_keys doesn't contain keys with const values,
# e.g., "eval_metric": ["logloss", "error"]. # e.g., "eval_metric": ["logloss", "error"].
keys = sorted(config.keys()) if self._hierarchical else self._space_keys keys = sorted(config.keys()) if self.hierarchical else self._space_keys
for key in keys: for key in keys:
value = config[key] value = config[key]
if key == self.prune_attr: if key == self.prune_attr:
@ -510,7 +511,7 @@ class FLOW2(Searcher):
else: else:
# key must be in space # key must be in space
domain = space[key] domain = space[key]
if self._hierarchical: if self.hierarchical:
# can't remove constant for hierarchical search space, # can't remove constant for hierarchical search space,
# e.g., learner # e.g., learner
if not (domain is None or type(domain) in (str, int, float) if not (domain is None or type(domain) in (str, int, float)

View File

@ -12,7 +12,7 @@ try:
except (ImportError, AssertionError): except (ImportError, AssertionError):
from .suggestion import Searcher from .suggestion import Searcher
from .flow2 import FLOW2 from .flow2 import FLOW2
from ..tune.space import unflatten_hierarchical from ..tune.space import add_cost_to_space, unflatten_hierarchical
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -46,6 +46,11 @@ class SearchThread:
self.cost_attr = cost_attr self.cost_attr = cost_attr
if search_alg: if search_alg:
self.space = self._space = search_alg.space # unflattened space self.space = self._space = search_alg.space # unflattened space
if self.space and not isinstance(search_alg, FLOW2) and isinstance(
search_alg._space, dict
):
# remember const config
self._const = add_cost_to_space(self.space, {}, {})
@classmethod @classmethod
def set_eps(cls, time_budget_s): def set_eps(cls, time_budget_s):
@ -59,7 +64,12 @@ class SearchThread:
else: else:
try: try:
config = self._search_alg.suggest(trial_id) config = self._search_alg.suggest(trial_id)
config, self.space = unflatten_hierarchical(config, self._space) if isinstance(self._search_alg._space, dict):
config.update(self._const)
else:
# define by run
config, self.space = unflatten_hierarchical(
config, self._space)
except FloatingPointError: except FloatingPointError:
logger.warning( logger.warning(
'The global search method raises FloatingPointError. ' 'The global search method raises FloatingPointError. '

View File

@ -91,15 +91,6 @@ class Searcher:
mode: Optional[str] = None, mode: Optional[str] = None,
max_concurrent: Optional[int] = None, max_concurrent: Optional[int] = None,
use_early_stopped_trials: Optional[bool] = None): use_early_stopped_trials: Optional[bool] = None):
if use_early_stopped_trials is False:
raise DeprecationWarning(
"Early stopped trials are now always used. If this is a "
"problem, file an issue: https://github.com/ray-project/ray.")
if max_concurrent is not None:
logger.warning(
"DeprecationWarning: `max_concurrent` is deprecated for this "
"search algorithm. Use tune.suggest.ConcurrencyLimiter() "
"instead. This will raise an error in future versions of Ray.")
self._metric = metric self._metric = metric
self._mode = mode self._mode = mode
@ -152,83 +143,6 @@ class Searcher:
""" """
pass pass
def on_trial_complete(self,
trial_id: str,
result: Optional[Dict] = None,
error: bool = False):
"""Notification for the completion of trial.
Typically, this method is used for notifying the underlying
optimizer of the result.
Args:
trial_id (str): A unique string ID for the trial.
result (dict): Dictionary of metrics for current training progress.
Note that the result dict may include NaNs or
may not include the optimization metric. It is up to the
subclass implementation to preprocess the result to
avoid breaking the optimization process. Upon errors, this
may also be None.
error (bool): True if the training process raised an error.
"""
raise NotImplementedError
def suggest(self, trial_id: str) -> Optional[Dict]:
"""Queries the algorithm to retrieve the next set of parameters.
Arguments:
trial_id (str): Trial ID used for subsequent notifications.
Returns:
dict | FINISHED | None: Configuration for a trial, if possible.
If FINISHED is returned, Tune will be notified that
no more suggestions/configurations will be provided.
If None is returned, Tune will skip the querying of the
searcher for this step.
"""
raise NotImplementedError
def save(self, checkpoint_path: str):
"""Save state to path for this search algorithm.
Args:
checkpoint_path (str): File where the search algorithm
state is saved. This path should be used later when
restoring from file.
Example:
.. code-block:: python
search_alg = Searcher(...)
analysis = tune.run(
cost,
num_samples=5,
search_alg=search_alg,
name=self.experiment_name,
local_dir=self.tmpdir)
search_alg.save("./my_favorite_path.pkl")
.. versionchanged:: 0.8.7
Save is automatically called by `tune.run`. You can use
`restore_from_dir` to restore from an experiment directory
such as `~/ray_results/trainable`.
"""
raise NotImplementedError
def restore(self, checkpoint_path: str):
"""Restore state for this search algorithm
Args:
checkpoint_path (str): File where the search algorithm
state is saved. This path should be the same
as the one provided to "save".
Example:
.. code-block:: python
search_alg.save("./my_favorite_path.pkl")
search_alg2 = Searcher(...)
search_alg2 = ConcurrencyLimiter(search_alg2, 1)
search_alg2.restore(checkpoint_path)
tune.run(cost, num_samples=5, search_alg=search_alg2)
"""
raise NotImplementedError
def get_state(self) -> Dict:
raise NotImplementedError
def set_state(self, state: Dict):
raise NotImplementedError
@property @property
def metric(self) -> str: def metric(self) -> str:
"""The training result objective value attribute.""" """The training result objective value attribute."""
@ -536,14 +450,6 @@ class OptunaSearch(Searcher):
# Flatten to support nested dicts # Flatten to support nested dicts
space = flatten_dict(space, "/") space = flatten_dict(space, "/")
# Deprecate: 1.5
if isinstance(space, list):
logger.warning(
"Passing lists of `param.suggest_*()` calls to OptunaSearch "
"as a search space is deprecated and will be removed in "
"a future release of Ray. Please pass a dict mapping "
"to `optuna.distributions` objects instead.")
self._space = space self._space = space
self._points_to_evaluate = points_to_evaluate or [] self._points_to_evaluate = points_to_evaluate or []

View File

@ -19,57 +19,16 @@ Copyright (c) Microsoft Corporation.
''' '''
import copy import copy
import logging import logging
from collections.abc import Mapping from typing import Any, Dict, Generator, List, Tuple
from typing import Any, Dict, Generator, List, Optional, Tuple
import numpy import numpy
import random import random
from ..tune.sample import Categorical, Domain, Function from ..tune.sample import Categorical, Domain
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def flatten_dict(dt, delimiter="/", prevent_delimiter=False):
dt = copy.deepcopy(dt)
if prevent_delimiter and any(delimiter in key for key in dt):
# Raise if delimiter is any of the keys
raise ValueError(
"Found delimiter `{}` in key when trying to flatten array."
"Please avoid using the delimiter in your specification.")
while any(isinstance(v, dict) for v in dt.values()):
remove = []
add = {}
for key, value in dt.items():
if isinstance(value, dict):
for subkey, v in value.items():
if prevent_delimiter and delimiter in subkey:
# Raise if delimiter is in any of the subkeys
raise ValueError(
"Found delimiter `{}` in key when trying to "
"flatten array. Please avoid using the delimiter "
"in your specification.")
add[delimiter.join([key, str(subkey)])] = v
remove.append(key)
dt.update(add)
for k in remove:
del dt[k]
return dt
def unflatten_dict(dt, delimiter="/"):
"""Unflatten dict. Does not support unflattening lists."""
dict_type = type(dt)
out = dict_type()
for key, val in dt.items():
path = key.split(delimiter)
item = out
for k in path[:-1]:
item = item.setdefault(k, dict_type())
item[path[-1]] = val
return out
class TuneError(Exception): class TuneError(Exception):
"""General error class raised by ray.tune.""" """General error class raised by ray.tune."""
pass pass
@ -84,16 +43,9 @@ def generate_variants(
variants in combination: variants in combination:
"activation": grid_search(["relu", "tanh"]) "activation": grid_search(["relu", "tanh"])
"learning_rate": grid_search([1e-3, 1e-4, 1e-5]) "learning_rate": grid_search([1e-3, 1e-4, 1e-5])
Lambda functions: These are evaluated to produce a concrete value, and
can express dependencies or conditional distributions between values.
They can also be used to express random search (e.g., by calling
into the `random` or `np` module).
"cpu": lambda spec: spec.config.num_workers
"batch_size": lambda spec: random.uniform(1, 1000)
Finally, to support defining specs in plain JSON / YAML, grid search Finally, to support defining specs in plain JSON / YAML, grid search
and lambda functions can also be defined alternatively as follows: can also be defined alternatively as follows:
"activation": {"grid_search": ["relu", "tanh"]} "activation": {"grid_search": ["relu", "tanh"]}
"cpu": {"eval": "spec.config.num_workers"}
Use `format_vars` to format the returned dict of hyperparameters. Use `format_vars` to format the returned dict of hyperparameters.
Yields: Yields:
(Dict of resolved variables, Spec object) (Dict of resolved variables, Spec object)
@ -242,10 +194,6 @@ def _try_resolve(v) -> Tuple[bool, Any]:
if isinstance(v, Domain): if isinstance(v, Domain):
# Domain to sample from # Domain to sample from
return False, v return False, v
elif isinstance(v, dict) and len(v) == 1 and "eval" in v:
# Lambda function in eval syntax
return False, Function(
lambda spec: eval(v["eval"], _STANDARD_IMPORTS, {"spec": spec}))
elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v: elif isinstance(v, dict) and len(v) == 1 and "grid_search" in v:
# Grid search values # Grid search values
grid_values = v["grid_search"] grid_values = v["grid_search"]

View File

@ -325,11 +325,6 @@ class Categorical(Domain):
new.set_sampler(self._Uniform()) new.set_sampler(self._Uniform())
return new return new
def grid(self):
new = copy(self)
new.set_sampler(Grid())
return new
def __len__(self): def __len__(self):
return len(self.categories) return len(self.categories)
@ -344,55 +339,6 @@ class Categorical(Domain):
return f"{self.categories}" return f"{self.categories}"
class Function(Domain):
class _CallSampler(BaseSampler):
def sample(self,
domain: "Function",
spec: Optional[Union[List[Dict], Dict]] = None,
size: int = 1):
if domain.pass_spec:
items = [
domain.func(spec[i] if isinstance(spec, list) else spec)
for i in range(size)
]
else:
items = [domain.func() for i in range(size)]
return items if len(items) > 1 else domain.cast(items[0])
default_sampler_cls = _CallSampler
def __init__(self, func: Callable):
sig = signature(func)
pass_spec = True # whether we should pass `spec` when calling `func`
try:
sig.bind({})
except TypeError:
pass_spec = False
if not pass_spec:
try:
sig.bind()
except TypeError as exc:
raise ValueError(
"The function passed to a `Function` parameter must be "
"callable with either 0 or 1 parameters.") from exc
self.pass_spec = pass_spec
self.func = func
def is_function(self):
return True
def is_valid(self, value: Any):
return True # This is user-defined, so lets not assume anything
@property
def domain_str(self):
return f"{self.func}()"
class Quantized(Sampler): class Quantized(Sampler):
def __init__(self, sampler: Sampler, q: Union[float, int]): def __init__(self, sampler: Sampler, q: Union[float, int]):
self.sampler = sampler self.sampler = sampler
@ -439,22 +385,6 @@ class PolynomialExpansionSet:
return "PolynomialExpansionSet" return "PolynomialExpansionSet"
# TODO (krfricke): Remove tune.function
def function(func):
logger.warning(
"DeprecationWarning: wrapping {} with tune.function() is no "
"longer needed".format(func))
return func
def sample_from(func: Callable[[Dict], Any]):
"""Specify that tune should sample configuration values from this function.
Arguments:
func: An callable function to draw a sample from.
"""
return Function(func)
def uniform(lower: float, upper: float): def uniform(lower: float, upper: float):
"""Sample a float value uniformly between ``lower`` and ``upper``. """Sample a float value uniformly between ``lower`` and ``upper``.
Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from Sampling from ``tune.uniform(1, 10)`` is equivalent to sampling from

View File

@ -90,30 +90,30 @@ def define_by_run_func(
return config return config
def convert_key( # def convert_key(
conf: Dict, space: Dict, path: str = "" # conf: Dict, space: Dict, path: str = ""
) -> Optional[Dict[str, Any]]: # ) -> Optional[Dict[str, Any]]:
"""Convert config keys to define-by-run keys. # """Convert config keys to define-by-run keys.
Returns: # Returns:
A dict with converted keys. # A dict with converted keys.
""" # """
config = {} # config = {}
for key, domain in space.items(): # for key, domain in space.items():
value = conf[key] # value = conf[key]
if path: # if path:
key = path + '/' + key # key = path + '/' + key
if isinstance(domain, dict): # if isinstance(domain, dict):
config.update(convert_key(conf[key], domain, key)) # config.update(convert_key(conf[key], domain, key))
elif isinstance(domain, sample.Categorical): # elif isinstance(domain, sample.Categorical):
index = indexof(domain, value) # index = indexof(domain, value)
config[key + '_choice_'] = index # config[key + '_choice_'] = index
if isinstance(value, dict): # if isinstance(value, dict):
key += f":{index}" # key += f":{index}"
config.update(convert_key(value, domain.categories[index], key)) # config.update(convert_key(value, domain.categories[index], key))
else: # else:
config[key] = value # config[key] = value
return config # return config
def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]: def unflatten_hierarchical(config: Dict, space: Dict) -> Tuple[Dict, Dict]:
@ -306,10 +306,8 @@ def normalize(
elif str(sampler) == 'Normal': elif str(sampler) == 'Normal':
# N(mean, sd) -> N(0,1) # N(mean, sd) -> N(0,1)
config_norm[key] = (value - sampler.mean) / sampler.sd config_norm[key] = (value - sampler.mean) / sampler.sd
else: # else:
# TODO? elif str(sampler) == 'Base': # sample.Function._CallSampler # config_norm[key] = value
# e.g., {test: sample_from(lambda spec: randn(10, 2).sample() * 0.01)}
config_norm[key] = value
return config_norm return config_norm

View File

@ -13,6 +13,7 @@ try:
from ray.tune.analysis import ExperimentAnalysis as EA from ray.tune.analysis import ExperimentAnalysis as EA
except (ImportError, AssertionError): except (ImportError, AssertionError):
from .analysis import ExperimentAnalysis as EA from .analysis import ExperimentAnalysis as EA
from .result import DEFAULT_METRIC
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -33,7 +34,7 @@ class ExperimentAnalysis(EA):
super().__init__(self, None, trials, metric, mode) super().__init__(self, None, trials, metric, mode)
except (TypeError, ValueError): except (TypeError, ValueError):
self.trials = trials self.trials = trials
self.default_metric = metric or '_default_anonymous_metric' self.default_metric = metric or DEFAULT_METRIC
self.default_mode = mode self.default_mode = mode
@ -82,7 +83,7 @@ def report(_metric=None, **kwargs):
if _verbose == 2: if _verbose == 2:
logger.info(f"result: {kwargs}") logger.info(f"result: {kwargs}")
if _metric: if _metric:
result['_default_anonymous_metric'] = _metric result[DEFAULT_METRIC] = _metric
trial = _runner.running_trial trial = _runner.running_trial
if _running_trial == trial: if _running_trial == trial:
_training_iteration += 1 _training_iteration += 1
@ -105,12 +106,13 @@ def report(_metric=None, **kwargs):
def run(training_function, def run(training_function,
config: Optional[dict] = None, config: Optional[dict] = None,
points_to_evaluate: Optional[List[dict]] = None,
low_cost_partial_config: Optional[dict] = None, low_cost_partial_config: Optional[dict] = None,
cat_hp_cost: Optional[dict] = None, cat_hp_cost: Optional[dict] = None,
metric: Optional[str] = None, metric: Optional[str] = None,
mode: Optional[str] = None, mode: Optional[str] = None,
time_budget_s: Union[int, float, datetime.timedelta] = None, time_budget_s: Union[int, float, datetime.timedelta] = None,
points_to_evaluate: Optional[List[dict]] = None,
evaluated_rewards: Optional[List] = None,
prune_attr: Optional[str] = None, prune_attr: Optional[str] = None,
min_resource: Optional[float] = None, min_resource: Optional[float] = None,
max_resource: Optional[float] = None, max_resource: Optional[float] = None,
@ -155,8 +157,6 @@ def run(training_function,
Args: Args:
training_function: A user-defined training function. training_function: A user-defined training function.
config: A dictionary to specify the search space. config: A dictionary to specify the search space.
points_to_evaluate: A list of initial hyperparameter
configurations to run first.
low_cost_partial_config: A dictionary from a subset of low_cost_partial_config: A dictionary from a subset of
controlled dimensions to the initial low-cost values. controlled dimensions to the initial low-cost values.
e.g., e.g.,
@ -179,6 +179,14 @@ def run(training_function,
mode: A string in ['min', 'max'] to specify the objective as mode: A string in ['min', 'max'] to specify the objective as
minimization or maximization. minimization or maximization.
time_budget_s: A float of the time budget in seconds. time_budget_s: A float of the time budget in seconds.
points_to_evaluate: A list of initial hyperparameter
configurations to run first.
evaluated_rewards (list): If you have previously evaluated the
parameters passed in as points_to_evaluate you can avoid
re-running those trials by passing in the reward attributes
as a list so the optimiser can be told the results without
needing to re-compute the trial. Must be the same length as
points_to_evaluate.
prune_attr: A string of the attribute used for pruning. prune_attr: A string of the attribute used for pruning.
Not necessarily in space. Not necessarily in space.
When prune_attr is in space, it is a hyperparameter, e.g., When prune_attr is in space, it is a hyperparameter, e.g.,
@ -259,9 +267,10 @@ def run(training_function,
if search_alg is None: if search_alg is None:
from ..searcher.blendsearch import BlendSearch from ..searcher.blendsearch import BlendSearch
search_alg = BlendSearch( search_alg = BlendSearch(
metric=metric or '_default_anonymous_metric', mode=mode, metric=metric or DEFAULT_METRIC, mode=mode,
space=config, space=config,
points_to_evaluate=points_to_evaluate, points_to_evaluate=points_to_evaluate,
evaluated_rewards=evaluated_rewards,
low_cost_partial_config=low_cost_partial_config, low_cost_partial_config=low_cost_partial_config,
cat_hp_cost=cat_hp_cost, cat_hp_cost=cat_hp_cost,
prune_attr=prune_attr, prune_attr=prune_attr,

View File

@ -842,12 +842,12 @@
"class MyRegularizedGreedyForest(SKLearnEstimator):\n", "class MyRegularizedGreedyForest(SKLearnEstimator):\n",
"\n", "\n",
"\n", "\n",
" def __init__(self, task='binary:logistic', n_jobs=1, **params):\n", " def __init__(self, task='binary', n_jobs=1, **params):\n",
" '''Constructor\n", " '''Constructor\n",
" \n", " \n",
" Args:\n", " Args:\n",
" task: A string of the task type, one of\n", " task: A string of the task type, one of\n",
" 'binary:logistic', 'multi:softmax', 'regression'\n", " 'binary', 'multi', 'regression'\n",
" n_jobs: An integer of the number of parallel threads\n", " n_jobs: An integer of the number of parallel threads\n",
" params: A dictionary of the hyperparameter names and values\n", " params: A dictionary of the hyperparameter names and values\n",
" '''\n", " '''\n",
@ -855,7 +855,7 @@
" super().__init__(task, **params)\n", " super().__init__(task, **params)\n",
"\n", "\n",
" '''task=regression for RGFRegressor; \n", " '''task=regression for RGFRegressor; \n",
" binary:logistic and multiclass:softmax for RGFClassifier'''\n", " binary or multiclass for RGFClassifier'''\n",
" if 'regression' in task:\n", " if 'regression' in task:\n",
" self.estimator_class = RGFRegressor\n", " self.estimator_class = RGFRegressor\n",
" else:\n", " else:\n",

View File

@ -17,7 +17,7 @@ from flaml import tune
class MyRegularizedGreedyForest(SKLearnEstimator): class MyRegularizedGreedyForest(SKLearnEstimator):
def __init__(self, task='binary:logistic', n_jobs=1, max_leaf=4, def __init__(self, task='binary', n_jobs=1, max_leaf=4,
n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0, n_iter=1, n_tree_search=1, opt_interval=1, learning_rate=1.0,
min_samples_leaf=1, **params): min_samples_leaf=1, **params):
@ -264,6 +264,7 @@ class TestAutoML(unittest.TestCase):
"model_history": True, "model_history": True,
"sample_weight": np.ones(len(y)), "sample_weight": np.ones(len(y)),
"pred_time_limit": 1e-5, "pred_time_limit": 1e-5,
"ensemble": True,
} }
automl_experiment.fit(**automl_settings) automl_experiment.fit(**automl_settings)
print(automl_experiment.classes_) print(automl_experiment.classes_)
@ -382,23 +383,25 @@ class TestAutoML(unittest.TestCase):
def test_roc_auc_ovr(self): def test_roc_auc_ovr(self):
automl_experiment = AutoML() automl_experiment = AutoML()
X_train, y_train = load_iris(return_X_y=True)
automl_settings = { automl_settings = {
"time_budget": 2, "time_budget": 1,
"metric": "roc_auc_ovr", "metric": "roc_auc_ovr",
"task": "classification", "task": "classification",
"log_file_name": "test/roc_auc_ovr.log", "log_file_name": "test/roc_auc_ovr.log",
"log_training_metric": True, "log_training_metric": True,
"n_jobs": 1, "n_jobs": 1,
"sample_weight": np.ones(len(y_train)),
"eval_method": "holdout",
"model_history": True "model_history": True
} }
X_train, y_train = load_iris(return_X_y=True)
automl_experiment.fit( automl_experiment.fit(
X_train=X_train, y_train=y_train, **automl_settings) X_train=X_train, y_train=y_train, **automl_settings)
def test_roc_auc_ovo(self): def test_roc_auc_ovo(self):
automl_experiment = AutoML() automl_experiment = AutoML()
automl_settings = { automl_settings = {
"time_budget": 2, "time_budget": 1,
"metric": "roc_auc_ovo", "metric": "roc_auc_ovo",
"task": "classification", "task": "classification",
"log_file_name": "test/roc_auc_ovo.log", "log_file_name": "test/roc_auc_ovo.log",
@ -438,6 +441,11 @@ class TestAutoML(unittest.TestCase):
log_file_name=automl_settings["log_file_name"], log_file_name=automl_settings["log_file_name"],
X_train=X_train, y_train=y_train, X_train=X_train, y_train=y_train,
train_full=True, time_budget=1) train_full=True, time_budget=1)
automl_experiment.retrain_from_log(
task="regression",
log_file_name=automl_settings["log_file_name"],
X_train=X_train, y_train=y_train,
train_full=True, time_budget=0)
def test_sparse_matrix_classification(self): def test_sparse_matrix_classification(self):
automl_experiment = AutoML() automl_experiment = AutoML()
@ -565,13 +573,14 @@ class TestAutoML(unittest.TestCase):
except ImportError: except ImportError:
return return
def test_parallel_xgboost_random(self): def test_parallel_xgboost_others(self):
# use random search as the hpo_method # use random search as the hpo_method
self.test_parallel_xgboost(hpo_method='random') self.test_parallel_xgboost(hpo_method='random')
def test_random_out_of_memory(self): def test_random_out_of_memory(self):
automl_experiment = AutoML() automl_experiment = AutoML()
automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM) automl_experiment.add_learner(
learner_name='large_lgbm', learner_class=MyLargeLGBM)
automl_settings = { automl_settings = {
"time_budget": 2, "time_budget": 2,
"metric": 'ap', "metric": 'ap',
@ -620,13 +629,13 @@ class TestAutoML(unittest.TestCase):
print(automl_experiment.best_iteration) print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator) print(automl_experiment.best_estimator)
def test_sparse_matrix_regression_cv(self): def test_sparse_matrix_regression_holdout(self):
X_train = scipy.sparse.random(8, 100) X_train = scipy.sparse.random(8, 100)
y_train = np.random.uniform(size=8) y_train = np.random.uniform(size=8)
automl_experiment = AutoML() automl_experiment = AutoML()
automl_settings = { automl_settings = {
"time_budget": 2, "time_budget": 1,
'eval_method': 'cv', 'eval_method': 'holdout',
"task": 'regression', "task": 'regression',
"log_file_name": "test/sparse_regression.log", "log_file_name": "test/sparse_regression.log",
"n_jobs": 1, "n_jobs": 1,

View File

@ -21,6 +21,7 @@ def test_forecast_automl(budget=5):
"task": 'forecast', # task type "task": 'forecast', # task type
"log_file_name": 'CO2_forecast.log', # flaml log file "log_file_name": 'CO2_forecast.log', # flaml log file
"eval_method": "holdout", "eval_method": "holdout",
"label": ('ds', 'y'),
} }
'''The main flaml automl API''' '''The main flaml automl API'''
try: try:

View File

@ -1,7 +1,7 @@
from openml.exceptions import OpenMLServerException from openml.exceptions import OpenMLServerException
def test_automl(budget=5, dataset_format='dataframe'): def test_automl(budget=5, dataset_format='dataframe', hpo_method=None):
from flaml.data import load_openml_dataset from flaml.data import load_openml_dataset
try: try:
X_train, X_test, y_train, y_test = load_openml_dataset( X_train, X_test, y_train, y_test = load_openml_dataset(
@ -18,6 +18,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
"task": 'classification', # task type "task": 'classification', # task type
"log_file_name": 'airlines_experiment.log', # flaml log file "log_file_name": 'airlines_experiment.log', # flaml log file
"seed": 7654321, # random seed "seed": 7654321, # random seed
'hpo_method': hpo_method
} }
'''The main flaml automl API''' '''The main flaml automl API'''
automl.fit(X_train=X_train, y_train=y_train, **settings) automl.fit(X_train=X_train, y_train=y_train, **settings)
@ -52,7 +53,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
def test_automl_array(): def test_automl_array():
test_automl(5, 'array') test_automl(5, 'array', 'bs')
def test_mlflow(): def test_mlflow():
@ -81,8 +82,11 @@ def test_mlflow():
mlflow.set_experiment("flaml") mlflow.set_experiment("flaml")
with mlflow.start_run(): with mlflow.start_run():
'''The main flaml automl API''' '''The main flaml automl API'''
automl.fit(X_train=X_train, y_train=y_train, **settings) automl.fit(
X_train=X_train, y_train=y_train, **settings)
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"]) # subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
automl._mem_thres = 0
print(automl.trainable(automl.points_to_evaluate[0]))
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -41,6 +41,7 @@ class TestLogging(unittest.TestCase):
} }
X_train, y_train = load_boston(return_X_y=True) X_train, y_train = load_boston(return_X_y=True)
n = len(y_train) >> 1 n = len(y_train) >> 1
print(automl.model, automl.classes_, automl.predict(X_train))
automl.fit(X_train=X_train[:n], y_train=y_train[:n], automl.fit(X_train=X_train[:n], y_train=y_train[:n],
X_val=X_train[n:], y_val=y_train[n:], X_val=X_train[n:], y_val=y_train[n:],
**automl_settings) **automl_settings)
@ -81,6 +82,8 @@ class TestLogging(unittest.TestCase):
time_budget_s=1, num_samples=-1) time_budget_s=1, num_samples=-1)
print(min(trial.last_result["val_loss"] print(min(trial.last_result["val_loss"]
for trial in analysis.trials)) for trial in analysis.trials))
config = analysis.trials[-1].last_result['config']['ml']
automl._state._train_with_config(config['learner'], config)
# Check if the log buffer is populated. # Check if the log buffer is populated.
self.assertTrue(len(buf.getvalue()) > 0) self.assertTrue(len(buf.getvalue()) > 0)

View File

@ -16,9 +16,9 @@ class TestTrainingLog(unittest.TestCase):
filename = os.path.join(d, path) filename = os.path.join(d, path)
# Run a simple job. # Run a simple job.
automl_experiment = AutoML() automl = AutoML()
automl_settings = { automl_settings = {
"time_budget": 2, "time_budget": 1,
"metric": 'mse', "metric": 'mse',
"task": 'regression', "task": 'regression',
"log_file_name": filename, "log_file_name": filename,
@ -29,10 +29,12 @@ class TestTrainingLog(unittest.TestCase):
"train_time_limit": 0.01, "train_time_limit": 0.01,
"verbose": 3, "verbose": 3,
"ensemble": True, "ensemble": True,
"keep_search_state": True,
} }
X_train, y_train = load_boston(return_X_y=True) X_train, y_train = load_boston(return_X_y=True)
automl_experiment.fit(X_train=X_train, y_train=y_train, automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
**automl_settings) automl._state._train_with_config(
automl.best_estimator, automl.best_config)
# Check if the training log file is populated. # Check if the training log file is populated.
self.assertTrue(os.path.exists(filename)) self.assertTrue(os.path.exists(filename))
@ -44,8 +46,10 @@ class TestTrainingLog(unittest.TestCase):
self.assertGreater(count, 0) self.assertGreater(count, 0)
automl_settings["log_file_name"] = None automl_settings["log_file_name"] = None
automl_experiment.fit(X_train=X_train, y_train=y_train, automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
**automl_settings) automl._selected.update(None, 0)
automl = AutoML()
automl.fit(X_train=X_train, y_train=y_train, max_iter=0)
def test_illfilename(self): def test_illfilename(self):
try: try:

View File

@ -76,7 +76,7 @@ def test_simple(method=None):
print(analysis.trials[-1]) print(analysis.trials[-1])
def _test_optuna(): def test_optuna():
test_simple(method="optuna") test_simple(method="optuna")

18
test/tune/test_sample.py Normal file
View File

@ -0,0 +1,18 @@
from flaml.tune.sample import (
BaseSampler, PolynomialExpansionSet, Domain,
uniform, quniform, choice, randint, qrandint, randn,
qrandn, loguniform, qloguniform, lograndint, qlograndint)
def test_sampler():
print(randn().sample(size=2))
print(PolynomialExpansionSet(), BaseSampler())
print(qrandn(2, 10, 2).sample(size=2))
c = choice([1, 2])
print(c.domain_str, len(c), c.is_valid(3))
i = randint(1, 10)
print(i.domain_str, i.is_valid(10))
d = Domain()
print(d.domain_str, d.is_function())
d.default_sampler_cls = BaseSampler
print(d.get_sampler())

126
test/tune/test_searcher.py Normal file
View File

@ -0,0 +1,126 @@
from flaml.searcher.blendsearch import CFO
import numpy as np
try:
from ray import __version__ as ray_version
assert ray_version >= '1.0.0'
from ray.tune import sample
except (ImportError, AssertionError):
from flaml.tune import sample
from flaml.searcher.suggestion import OptunaSearch, Searcher, ConcurrencyLimiter
from flaml.searcher.blendsearch import BlendSearch
def define_search_space(trial):
trial.suggest_float("a", 6, 8)
trial.suggest_float("b", 1e-4, 1e-2, log=True)
def test_searcher():
searcher = Searcher()
searcher = Searcher(metric=['m1', 'm2'], mode=['max', 'min'])
searcher.set_search_properties(None, None, None)
searcher.suggest = searcher.on_pause = searcher.on_unpause = lambda _: {}
searcher.on_trial_complete = lambda trial_id, result, error: None
searcher = ConcurrencyLimiter(searcher, max_concurrent=2, batch=True)
searcher.suggest("t1")
searcher.suggest("t2")
searcher.on_pause("t1")
searcher.on_unpause("t1")
searcher.suggest("t3")
searcher.on_trial_complete("t1", {})
searcher.on_trial_complete("t2", {})
searcher.set_state({})
print(searcher.get_state())
import optuna
config = {
"a": optuna.distributions.UniformDistribution(6, 8),
"b": optuna.distributions.LogUniformDistribution(1e-4, 1e-2),
}
searcher = OptunaSearch(
config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
evaluated_rewards=[{'m': 2}], metric='m', mode='max'
)
config = {
"a": sample.uniform(6, 8),
"b": sample.loguniform(1e-4, 1e-2)
}
searcher = OptunaSearch(
config, points_to_evaluate=[{"a": 6, "b": 1e-3}],
evaluated_rewards=[{'m': 2}], metric='m', mode='max'
)
searcher = OptunaSearch(
define_search_space, points_to_evaluate=[{"a": 6, "b": 1e-3}],
# evaluated_rewards=[{'m': 2}], metric='m', mode='max'
mode='max'
)
searcher = OptunaSearch()
# searcher.set_search_properties('m', 'min', define_search_space)
searcher.set_search_properties('m', 'min', config)
searcher.suggest('t1')
searcher.on_trial_complete('t1', None, False)
searcher.suggest('t2')
searcher.on_trial_complete('t2', None, True)
searcher.suggest('t3')
searcher.on_trial_complete('t3', {'m': np.nan})
searcher.save('test/tune/optuna.pickle')
searcher.restore('test/tune/optuna.pickle')
searcher = BlendSearch(
metric="m",
global_search_alg=searcher, metric_constraints=[("c", "<", 1)])
searcher.set_search_properties(metric="m2", config=config)
searcher.set_search_properties(config={"time_budget_s": 0})
c = searcher.suggest('t1')
searcher.on_trial_complete("t1", {"config": c}, True)
c = searcher.suggest('t2')
searcher.on_trial_complete(
"t2", {"config": c, "m2": 1, "c": 2, "time_total_s": 1})
config1 = config.copy()
config1['_choice_'] = 0
searcher._expand_admissible_region(
lower={'root': [{'a': 0.5}, {'a': 0.4}]},
upper={'root': [{'a': 0.9}, {'a': 0.8}]},
space={'root': config1},
)
searcher = CFO(
metric='m', mode='min', space=config,
points_to_evaluate=[{'a': 7, 'b': 1e-3}, {'a': 6, 'b': 3e-4}],
evaluated_rewards=[1, 1])
searcher.suggest("t1")
searcher.suggest("t2")
searcher.on_trial_result('t3', {})
c = searcher.generate_parameters(1)
searcher.receive_trial_result(1, c, {'reward': 0})
searcher.update_search_space(
{
"a": {
"_value": [1, 2],
"_type": "choice",
},
"b": {
"_value": [1, 3],
"_type": "randint",
},
"c": {
"_value": [.1, 3],
"_type": "uniform",
},
"d": {
"_value": [2, 8, 2],
"_type": "quniform",
},
"e": {
"_value": [2, 8],
"_type": "loguniform",
},
"f": {
"_value": [2, 8, 2],
"_type": "qloguniform",
},
"g": {
"_value": [0, 2],
"_type": "normal",
},
"h": {
"_value": [0, 2, 2],
"_type": "qnormal",
},
}
)

View File

@ -15,7 +15,7 @@ import xgboost as xgb
import logging import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
os.makedirs('logs', exist_ok=True) os.makedirs('logs', exist_ok=True)
logger.addHandler(logging.FileHandler('logs/tune_xgboost.log')) logger.addHandler(logging.FileHandler('logs/tune.log'))
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
@ -223,12 +223,22 @@ def test_nested():
logger.info(f"BlendSearch exp best config: {best_trial.config}") logger.info(f"BlendSearch exp best config: {best_trial.config}")
logger.info(f"BlendSearch exp best result: {best_trial.last_result}") logger.info(f"BlendSearch exp best result: {best_trial.last_result}")
points_to_evaluate = [
{"b": .99, "cost_related": {"a": 3}},
{"b": .99, "cost_related": {"a": 2}},
]
analysis = tune.run( analysis = tune.run(
simple_func, simple_func,
config=search_space, config=search_space,
low_cost_partial_config={ low_cost_partial_config={
"cost_related": {"a": 1} "cost_related": {"a": 1}
}, },
points_to_evaluate=points_to_evaluate,
evaluated_rewards=[
(config["cost_related"]["a"] - 4)**2
+ (config["b"] - config["cost_related"]["a"])**2
for config in points_to_evaluate
],
metric="obj", metric="obj",
mode="min", mode="min",
metric_constraints=[("ab", "<=", 4)], metric_constraints=[("ab", "<=", 4)],