Support parallel and add random search (#167)

* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
This commit is contained in:
Qingyun Wu 2021-08-23 19:36:51 -04:00 committed by GitHub
parent 3d0a3d26a2
commit a229a6112a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 5142 additions and 4677 deletions

File diff suppressed because it is too large Load Diff

View File

@ -141,14 +141,14 @@ def get_output_from_log(filename, time_budget):
best_config_list = [] best_config_list = []
with training_log_reader(filename) as reader: with training_log_reader(filename) as reader:
for record in reader.records(): for record in reader.records():
time_used = record.total_search_time time_used = record.wall_clock_time
val_loss = record.validation_loss val_loss = record.validation_loss
config = record.config config = record.config
learner = record.learner.split('_')[0] learner = record.learner.split('_')[0]
sample_size = record.sample_size sample_size = record.sample_size
train_loss = record.logged_metric train_loss = record.logged_metric
if time_used < time_budget: if time_used < time_budget and np.isfinite(val_loss):
if val_loss < best_val_loss: if val_loss < best_val_loss:
best_val_loss = val_loss best_val_loss = val_loss
best_config = config best_config = config

View File

@ -102,8 +102,11 @@ def sklearn_metric_loss_score(
score = log_loss( score = log_loss(
y_true, y_predict, labels=labels, sample_weight=sample_weight) y_true, y_predict, labels=labels, sample_weight=sample_weight)
elif 'mape' in metric_name: elif 'mape' in metric_name:
score = mean_absolute_percentage_error( try:
y_true, y_predict) score = mean_absolute_percentage_error(
y_true, y_predict)
except ValueError:
return np.inf
elif 'micro_f1' in metric_name: elif 'micro_f1' in metric_name:
score = 1 - f1_score( score = 1 - f1_score(
y_true, y_predict, sample_weight=sample_weight, average='micro') y_true, y_predict, sample_weight=sample_weight, average='micro')
@ -141,21 +144,23 @@ def get_y_pred(estimator, X, eval_metric, obj, freq=None):
def get_test_loss( def get_test_loss(
estimator, X_train, y_train, X_test, y_test, weight_test, estimator, X_train, y_train, X_test, y_test, weight_test,
eval_metric, obj, labels=None, budget=None, train_loss=False, fit_kwargs={} eval_metric, obj, labels=None, budget=None, log_training_metric=False, fit_kwargs={}
): ):
start = time.time() start = time.time()
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs) estimator.fit(X_train, y_train, budget, **fit_kwargs)
if isinstance(eval_metric, str): if isinstance(eval_metric, str):
pred_start = time.time() pred_start = time.time()
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj) test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
pred_time = (time.time() - pred_start) / X_test.shape[0] pred_time = (time.time() - pred_start) / X_test.shape[0]
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test, test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
labels, weight_test) labels, weight_test)
if train_loss is not False: if log_training_metric:
test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj) test_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
train_loss = sklearn_metric_loss_score( train_loss = sklearn_metric_loss_score(
eval_metric, test_pred_y, eval_metric, test_pred_y,
y_train, labels, fit_kwargs.get('sample_weight')) y_train, labels, fit_kwargs.get('sample_weight'))
else:
train_loss = None
else: # customized metric function else: # customized metric function
test_loss, metrics = eval_metric( test_loss, metrics = eval_metric(
X_test, y_test, estimator, labels, X_train, y_train, X_test, y_test, estimator, labels, X_train, y_train,
@ -174,40 +179,41 @@ def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
def evaluate_model( def evaluate_model(
estimator, X_train, y_train, X_val, y_val, weight_val, estimator, X_train, y_train, X_val, y_val, weight_val,
budget, kf, task, eval_method, eval_metric, best_val_loss, train_loss=False, budget, kf, task, eval_method, eval_metric, best_val_loss, log_training_metric=False,
fit_kwargs={} fit_kwargs={}
): ):
if 'holdout' in eval_method: if 'holdout' in eval_method:
val_loss, train_loss, train_time, pred_time = evaluate_model_holdout( val_loss, train_loss, train_time, pred_time = evaluate_model_holdout(
estimator, X_train, y_train, X_val, y_val, weight_val, budget, estimator, X_train, y_train, X_val, y_val, weight_val, budget,
task, eval_metric, train_loss=train_loss, task, eval_metric, log_training_metric=log_training_metric,
fit_kwargs=fit_kwargs) fit_kwargs=fit_kwargs)
else: else:
val_loss, train_loss, train_time, pred_time = evaluate_model_CV( val_loss, train_loss, train_time, pred_time = evaluate_model_CV(
estimator, X_train, y_train, budget, kf, task, estimator, X_train, y_train, budget, kf, task,
eval_metric, best_val_loss, train_loss=train_loss, eval_metric, best_val_loss, log_training_metric=log_training_metric,
fit_kwargs=fit_kwargs) fit_kwargs=fit_kwargs)
return val_loss, train_loss, train_time, pred_time return val_loss, train_loss, train_time, pred_time
def evaluate_model_holdout( def evaluate_model_holdout(
estimator, X_train, y_train, X_val, y_val, estimator, X_train, y_train, X_val, y_val,
weight_val, budget, task, eval_metric, train_loss=False, weight_val, budget, task, eval_metric, log_training_metric=False,
fit_kwargs={} fit_kwargs={}
): ):
val_loss, train_time, train_loss, pred_time = get_test_loss( val_loss, train_time, train_loss, pred_time = get_test_loss(
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric, estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs) task, budget=budget, log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
return val_loss, train_loss, train_time, pred_time return val_loss, train_loss, train_time, pred_time
def evaluate_model_CV( def evaluate_model_CV(
estimator, X_train_all, y_train_all, budget, kf, estimator, X_train_all, y_train_all, budget, kf,
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={} task, eval_metric, best_val_loss, log_training_metric=False, fit_kwargs={}
): ):
start_time = time.time() start_time = time.time()
total_val_loss = 0 total_val_loss = 0
total_train_loss = None total_train_loss = None
train_loss = None
train_time = pred_time = 0 train_time = pred_time = 0
valid_fold_num = total_fold_num = 0 valid_fold_num = total_fold_num = 0
n = kf.get_n_splits() n = kf.get_n_splits()
@ -231,7 +237,7 @@ def evaluate_model_CV(
kf = kf.split(X_train_split) kf = kf.split(X_train_split)
rng = np.random.RandomState(2020) rng = np.random.RandomState(2020)
val_loss_list = [] val_loss_list = []
budget_per_train = budget / (n + 1) budget_per_train = budget / n
if 'sample_weight' in fit_kwargs: if 'sample_weight' in fit_kwargs:
weight = fit_kwargs['sample_weight'] weight = fit_kwargs['sample_weight']
weight_val = None weight_val = None
@ -259,13 +265,13 @@ def evaluate_model_CV(
val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss( val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss(
estimator, X_train, y_train, X_val, y_val, weight_val, estimator, X_train, y_train, X_val, y_val, weight_val,
eval_metric, task, labels, budget_per_train, eval_metric, task, labels, budget_per_train,
train_loss=train_loss, fit_kwargs=fit_kwargs) log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
if weight is not None: if weight is not None:
fit_kwargs['sample_weight'] = weight fit_kwargs['sample_weight'] = weight
valid_fold_num += 1 valid_fold_num += 1
total_fold_num += 1 total_fold_num += 1
total_val_loss += val_loss_i total_val_loss += val_loss_i
if train_loss is not False: if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_train_loss, list): if isinstance(total_train_loss, list):
total_train_loss = [ total_train_loss = [
total_train_loss[i] + v for i, v in enumerate(train_loss_i)] total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
@ -286,7 +292,7 @@ def evaluate_model_CV(
break break
val_loss = np.max(val_loss_list) val_loss = np.max(val_loss_list)
n = total_fold_num n = total_fold_num
if train_loss is not False: if log_training_metric or not isinstance(eval_metric, str):
if isinstance(total_train_loss, list): if isinstance(total_train_loss, list):
train_loss = [v / n for v in total_train_loss] train_loss = [v / n for v in total_train_loss]
elif isinstance(total_train_loss, dict): elif isinstance(total_train_loss, dict):
@ -294,17 +300,17 @@ def evaluate_model_CV(
else: else:
train_loss = total_train_loss / n train_loss = total_train_loss / n
pred_time /= n pred_time /= n
budget -= time.time() - start_time # budget -= time.time() - start_time
if val_loss < best_val_loss and budget > budget_per_train: # if val_loss < best_val_loss and budget > budget_per_train:
estimator.cleanup() # estimator.cleanup()
estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs) # estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
return val_loss, train_loss, train_time, pred_time return val_loss, train_loss, train_time, pred_time
def compute_estimator( def compute_estimator(
X_train, y_train, X_val, y_val, weight_val, budget, kf, X_train, y_train, X_val, y_val, weight_val, budget, kf,
config_dic, task, estimator_name, eval_method, eval_metric, config_dic, task, estimator_name, eval_method, eval_metric,
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False, best_val_loss=np.Inf, n_jobs=1, estimator_class=None, log_training_metric=False,
fit_kwargs={} fit_kwargs={}
): ):
estimator_class = estimator_class or get_estimator_class( estimator_class = estimator_class or get_estimator_class(
@ -313,7 +319,7 @@ def compute_estimator(
**config_dic, task=task, n_jobs=n_jobs) **config_dic, task=task, n_jobs=n_jobs)
val_loss, train_loss, train_time, pred_time = evaluate_model( val_loss, train_loss, train_time, pred_time = evaluate_model(
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task, estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
eval_method, eval_metric, best_val_loss, train_loss=train_loss, eval_method, eval_metric, best_val_loss, log_training_metric=log_training_metric,
fit_kwargs=fit_kwargs) fit_kwargs=fit_kwargs)
return estimator, val_loss, train_loss, train_time, pred_time return estimator, val_loss, train_loss, train_time, pred_time

View File

@ -222,10 +222,10 @@ class LGBMEstimator(BaseEstimator):
'domain': tune.loguniform(lower=1 / 1024, upper=1.0), 'domain': tune.loguniform(lower=1 / 1024, upper=1.0),
'init_value': 0.1, 'init_value': 0.1,
}, },
'subsample': { # 'subsample': {
'domain': tune.uniform(lower=0.1, upper=1.0), # 'domain': tune.uniform(lower=0.1, upper=1.0),
'init_value': 1.0, # 'init_value': 1.0,
}, # },
'log_max_bin': { 'log_max_bin': {
'domain': tune.lograndint(lower=3, upper=11), 'domain': tune.lograndint(lower=3, upper=11),
'init_value': 8, 'init_value': 8,
@ -252,28 +252,30 @@ class LGBMEstimator(BaseEstimator):
def __init__(self, task='binary:logistic', log_max_bin=8, **params): def __init__(self, task='binary:logistic', log_max_bin=8, **params):
super().__init__(task, **params) super().__init__(task, **params)
# Default: regression for LGBMRegressor, if "objective" not in self.params:
# binary or multiclass for LGBMClassifier # Default: regression for LGBMRegressor,
if 'regression' in task: # binary or multiclass for LGBMClassifier
objective = 'regression' if 'regression' in task:
elif 'binary' in task: objective = 'regression'
objective = 'binary' elif 'binary' in task:
elif 'multi' in task: objective = 'binary'
objective = 'multiclass' elif 'multi' in task:
else: objective = 'multiclass'
objective = 'regression' else:
objective = 'regression'
self.params["objective"] = objective
if "n_estimators" in self.params: if "n_estimators" in self.params:
self.params["n_estimators"] = int(round(self.params["n_estimators"])) self.params["n_estimators"] = int(round(self.params["n_estimators"]))
if "num_leaves" in self.params: if "num_leaves" in self.params:
self.params["num_leaves"] = int(round(self.params["num_leaves"])) self.params["num_leaves"] = int(round(self.params["num_leaves"]))
if "min_child_samples" in self.params: if "min_child_samples" in self.params:
self.params["min_child_samples"] = int(round(self.params["min_child_samples"])) self.params["min_child_samples"] = int(round(self.params["min_child_samples"]))
if "objective" not in self.params:
self.params["objective"] = objective
if "max_bin" not in self.params: if "max_bin" not in self.params:
self.params['max_bin'] = 1 << int(round(log_max_bin)) - 1 self.params['max_bin'] = 1 << int(round(log_max_bin)) - 1
if "verbose" not in self.params: if "verbose" not in self.params:
self.params['verbose'] = -1 self.params['verbose'] = -1
# if "subsample_freq" not in self.params:
# self.params['subsample_freq'] = 1
if 'regression' in task: if 'regression' in task:
self.estimator_class = LGBMRegressor self.estimator_class = LGBMRegressor
else: else:

View File

@ -748,6 +748,7 @@ class AutoTransformers:
self._set_metric(custom_metric_name, custom_metric_mode_name) self._set_metric(custom_metric_name, custom_metric_mode_name)
self._set_task() self._set_task()
self._fp16 = fp16 self._fp16 = fp16
ray.shutdown()
ray.init(local_mode=ray_local_mode) ray.init(local_mode=ray_local_mode)
self._set_search_space(**custom_hpo_args) self._set_search_space(**custom_hpo_args)

View File

@ -3,6 +3,7 @@
* Licensed under the MIT License. See LICENSE file in the * Licensed under the MIT License. See LICENSE file in the
* project root for license information. * project root for license information.
''' '''
from flaml.tune.sample import Domain
from typing import Dict, Optional, Tuple from typing import Dict, Optional, Tuple
import numpy as np import numpy as np
try: try:
@ -140,7 +141,7 @@ class FLOW2(Searcher):
if str(sampler) != 'Normal': if str(sampler) != 'Normal':
self._bounded_keys.append(key) self._bounded_keys.append(key)
if not hier: if not hier:
self._space_keys = sorted(self._space.keys()) self._space_keys = sorted(self._tunable_keys)
self._hierarchical = hier self._hierarchical = hier
if (self.prune_attr and self.prune_attr not in self._space if (self.prune_attr and self.prune_attr not in self._space
and self.max_resource): and self.max_resource):
@ -499,18 +500,28 @@ class FLOW2(Searcher):
else: else:
space = self._space space = self._space
value_list = [] value_list = []
# self._space_keys doesn't contain keys with const values,
# e.g., "eval_metric": ["logloss", "error"].
keys = sorted(config.keys()) if self._hierarchical else self._space_keys keys = sorted(config.keys()) if self._hierarchical else self._space_keys
for key in keys: for key in keys:
value = config[key] value = config[key]
if key == self.prune_attr: if key == self.prune_attr:
value_list.append(value) value_list.append(value)
# else key must be in self.space
# get rid of list type or constant,
# e.g., "eval_metric": ["logloss", "error"]
elif isinstance(space[key], sample.Integer):
value_list.append(int(round(value)))
else: else:
value_list.append(value) # key must be in space
domain = space[key]
if self._hierarchical:
# can't remove constant for hierarchical search space,
# e.g., learner
if not (domain is None or type(domain) in (str, int, float)
or isinstance(domain, sample.Domain)):
# not domain or hashable
# get rid of list type for hierarchical search space.
continue
if isinstance(domain, sample.Integer):
value_list.append(int(round(value)))
else:
value_list.append(value)
return tuple(value_list) return tuple(value_list)
@property @property

View File

@ -16,7 +16,7 @@ class TrainingLogRecord(object):
iter_per_learner: int, iter_per_learner: int,
logged_metric: float, logged_metric: float,
trial_time: float, trial_time: float,
total_search_time: float, wall_clock_time: float,
validation_loss, validation_loss,
config, config,
best_validation_loss, best_validation_loss,
@ -27,7 +27,7 @@ class TrainingLogRecord(object):
self.iter_per_learner = iter_per_learner self.iter_per_learner = iter_per_learner
self.logged_metric = logged_metric self.logged_metric = logged_metric
self.trial_time = trial_time self.trial_time = trial_time
self.total_search_time = total_search_time self.wall_clock_time = wall_clock_time
self.validation_loss = validation_loss self.validation_loss = validation_loss
self.config = config self.config = config
self.best_validation_loss = best_validation_loss self.best_validation_loss = best_validation_loss
@ -71,7 +71,7 @@ class TrainingLogWriter(object):
it_counter: int, it_counter: int,
train_loss: float, train_loss: float,
trial_time: float, trial_time: float,
total_search_time: float, wall_clock_time: float,
validation_loss, validation_loss,
config, config,
best_validation_loss, best_validation_loss,
@ -86,7 +86,7 @@ class TrainingLogWriter(object):
it_counter, it_counter,
train_loss, train_loss,
trial_time, trial_time,
total_search_time, wall_clock_time,
validation_loss, validation_loss,
config, config,
best_validation_loss, best_validation_loss,
@ -95,6 +95,7 @@ class TrainingLogWriter(object):
sample_size) sample_size)
if validation_loss < self.current_best_loss or \ if validation_loss < self.current_best_loss or \
validation_loss == self.current_best_loss and \ validation_loss == self.current_best_loss and \
self.current_sample_size is not None and \
sample_size > self.current_sample_size: sample_size > self.current_sample_size:
self.current_best_loss = validation_loss self.current_best_loss = validation_loss
self.current_sample_size = sample_size self.current_sample_size = sample_size

View File

@ -363,6 +363,7 @@ def indexof(domain: Dict, config: Dict) -> int:
continue continue
# print(domain.const[i]) # print(domain.const[i])
if all(config[key] == value for key, value in domain.const[i].items()): if all(config[key] == value for key, value in domain.const[i].items()):
# assumption: the concatenation of constants is a unique identifier
return i return i
return None return None

View File

@ -1 +1 @@
__version__ = "0.5.13" __version__ = "0.6.0"

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -10,7 +10,7 @@ from datetime import datetime
from flaml import AutoML from flaml import AutoML
from flaml.data import get_output_from_log from flaml.data import get_output_from_log
from flaml.model import SKLearnEstimator, XGBoostEstimator from flaml.model import LGBMEstimator, SKLearnEstimator, XGBoostEstimator
from rgf.sklearn import RGFClassifier, RGFRegressor from rgf.sklearn import RGFClassifier, RGFRegressor
from flaml import tune from flaml import tune
@ -92,6 +92,24 @@ class MyXGB2(XGBoostEstimator):
super().__init__(objective='reg:squarederror', **params) super().__init__(objective='reg:squarederror', **params)
class MyLargeLGBM(LGBMEstimator):
@classmethod
def search_space(cls, **params):
return {
'n_estimators': {
'domain': tune.lograndint(lower=4, upper=32768),
'init_value': 32768,
'low_cost_init_value': 4,
},
'num_leaves': {
'domain': tune.lograndint(lower=4, upper=32768),
'init_value': 32768,
'low_cost_init_value': 4,
},
}
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train, def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
weight_test=None, weight_train=None): weight_test=None, weight_train=None):
from sklearn.metrics import log_loss from sklearn.metrics import log_loss
@ -477,6 +495,66 @@ class TestAutoML(unittest.TestCase):
print(automl_experiment.best_iteration) print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator) print(automl_experiment.best_estimator)
def test_parallel_xgboost(self, hpo_method=None):
automl_experiment = AutoML()
automl_settings = {
"time_budget": 10,
"metric": 'ap',
"task": 'classification',
"log_file_name": "test/sparse_classification.log",
"estimator_list": ["xgboost"],
"log_type": "all",
"n_jobs": 1,
"n_concurrent_trials": 2,
"hpo_method": hpo_method,
}
X_train = scipy.sparse.eye(900000)
y_train = np.random.randint(2, size=900000)
try:
automl_experiment.fit(X_train=X_train, y_train=y_train,
**automl_settings)
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
return
def test_parallel_xgboost_random(self):
# use random search as the hpo_method
self.test_parallel_xgboost(hpo_method='random')
def test_random_out_of_memory(self):
automl_experiment = AutoML()
automl_experiment.add_learner(learner_name='large_lgbm', learner_class=MyLargeLGBM)
automl_settings = {
"time_budget": 2,
"metric": 'ap',
"task": 'classification',
"log_file_name": "test/sparse_classification_oom.log",
"estimator_list": ["large_lgbm"],
"log_type": "all",
"n_jobs": 1,
"n_concurrent_trials": 2,
"hpo_method": 'random',
}
X_train = scipy.sparse.eye(900000)
y_train = np.random.randint(2, size=900000)
try:
automl_experiment.fit(X_train=X_train, y_train=y_train,
**automl_settings)
print(automl_experiment.predict(X_train))
print(automl_experiment.model)
print(automl_experiment.config_history)
print(automl_experiment.model_history)
print(automl_experiment.best_iteration)
print(automl_experiment.best_estimator)
except ImportError:
return
def test_sparse_matrix_lr(self): def test_sparse_matrix_lr(self):
automl_experiment = AutoML() automl_experiment = AutoML()
automl_settings = { automl_settings = {

View File

@ -17,6 +17,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
"metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2'] "metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
"task": 'classification', # task type "task": 'classification', # task type
"log_file_name": 'airlines_experiment.log', # flaml log file "log_file_name": 'airlines_experiment.log', # flaml log file
"seed": 7654321, # random seed
} }
'''The main flaml automl API''' '''The main flaml automl API'''
automl.fit(X_train=X_train, y_train=y_train, **settings) automl.fit(X_train=X_train, y_train=y_train, **settings)

View File

@ -45,7 +45,7 @@ class TestLogging(unittest.TestCase):
**automl_settings) **automl_settings)
logger.info(automl.search_space) logger.info(automl.search_space)
logger.info(automl.low_cost_partial_config) logger.info(automl.low_cost_partial_config)
logger.info(automl.points_to_evalaute) logger.info(automl.points_to_evaluate)
logger.info(automl.cat_hp_cost) logger.info(automl.cat_hp_cost)
import optuna as ot import optuna as ot
study = ot.create_study() study = ot.create_study()
@ -62,16 +62,18 @@ class TestLogging(unittest.TestCase):
config['learner'] = automl.best_estimator config['learner'] = automl.best_estimator
automl.trainable({"ml": config}) automl.trainable({"ml": config})
from flaml import tune, CFO from flaml import tune, CFO
from flaml.automl import size
from functools import partial
search_alg = CFO( search_alg = CFO(
metric='val_loss', metric='val_loss',
space=automl.search_space, space=automl.search_space,
low_cost_partial_config=automl.low_cost_partial_config, low_cost_partial_config=automl.low_cost_partial_config,
points_to_evaluate=automl.points_to_evalaute, points_to_evaluate=automl.points_to_evaluate,
cat_hp_cost=automl.cat_hp_cost, cat_hp_cost=automl.cat_hp_cost,
prune_attr=automl.prune_attr, prune_attr=automl.prune_attr,
min_resource=automl.min_resource, min_resource=automl.min_resource,
max_resource=automl.max_resource, max_resource=automl.max_resource,
config_constraints=[(automl.size, '<=', automl._mem_thres)], config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
metric_constraints=automl.metric_constraints) metric_constraints=automl.metric_constraints)
analysis = tune.run( analysis = tune.run(
automl.trainable, search_alg=search_alg, # verbose=2, automl.trainable, search_alg=search_alg, # verbose=2,

View File

@ -40,6 +40,7 @@ def test_simple(method=None):
"n_jobs": 1, "n_jobs": 1,
"hpo_method": method, "hpo_method": method,
"log_type": "all", "log_type": "all",
"retrain_full": "budget",
"time_budget": 1 "time_budget": 1
} }
from sklearn.externals._arff import ArffException from sklearn.externals._arff import ArffException
@ -53,21 +54,23 @@ def test_simple(method=None):
automl.fit(X_train=X_train, y_train=y_train, **automl_settings) automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
print(automl.estimator_list) print(automl.estimator_list)
print(automl.search_space) print(automl.search_space)
print(automl.points_to_evalaute) print(automl.points_to_evaluate)
config = automl.best_config.copy() config = automl.best_config.copy()
config['learner'] = automl.best_estimator config['learner'] = automl.best_estimator
automl.trainable(config) automl.trainable(config)
from flaml import tune from flaml import tune
from flaml.automl import size
from functools import partial
analysis = tune.run( analysis = tune.run(
automl.trainable, automl.search_space, metric='val_loss', mode="min", automl.trainable, automl.search_space, metric='val_loss', mode="min",
low_cost_partial_config=automl.low_cost_partial_config, low_cost_partial_config=automl.low_cost_partial_config,
points_to_evaluate=automl.points_to_evalaute, points_to_evaluate=automl.points_to_evaluate,
cat_hp_cost=automl.cat_hp_cost, cat_hp_cost=automl.cat_hp_cost,
prune_attr=automl.prune_attr, prune_attr=automl.prune_attr,
min_resource=automl.min_resource, min_resource=automl.min_resource,
max_resource=automl.max_resource, max_resource=automl.max_resource,
time_budget_s=automl._state.time_budget, time_budget_s=automl._state.time_budget,
config_constraints=[(automl.size, '<=', automl._mem_thres)], config_constraints=[(partial(size, automl._state), '<=', automl._mem_thres)],
metric_constraints=automl.metric_constraints, num_samples=5) metric_constraints=automl.metric_constraints, num_samples=5)
print(analysis.trials[-1]) print(analysis.trials[-1])

View File

@ -27,6 +27,8 @@ def test_blendsearch_tune(smoke_test=True):
except ImportError: except ImportError:
print('ray[tune] is not installed, skipping test') print('ray[tune] is not installed, skipping test')
return return
import numpy as np
algo = BlendSearch() algo = BlendSearch()
algo = ConcurrencyLimiter(algo, max_concurrent=4) algo = ConcurrencyLimiter(algo, max_concurrent=4)
scheduler = AsyncHyperBandScheduler() scheduler = AsyncHyperBandScheduler()
@ -42,7 +44,8 @@ def test_blendsearch_tune(smoke_test=True):
"width": tune.uniform(0, 20), "width": tune.uniform(0, 20),
"height": tune.uniform(-100, 100), "height": tune.uniform(-100, 100),
# This is an ignored parameter. # This is an ignored parameter.
"activation": tune.choice(["relu", "tanh"]) "activation": tune.choice(["relu", "tanh"]),
"test4": np.zeros((3, 1)),
}) })
print("Best hyperparameters found were: ", analysis.best_config) print("Best hyperparameters found were: ", analysis.best_config)

View File

@ -63,6 +63,7 @@ def _test_xgboost(method='BlendSearch'):
time_budget_s = 60 time_budget_s = 60
for n_cpu in [4]: for n_cpu in [4]:
start_time = time.time() start_time = time.time()
ray.shutdown()
ray.init(num_cpus=n_cpu, num_gpus=0) ray.init(num_cpus=n_cpu, num_gpus=0)
# ray.init(address='auto') # ray.init(address='auto')
if method == 'BlendSearch': if method == 'BlendSearch':