mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-12 16:14:48 +00:00
parent
b04b00dc9d
commit
072e9e4588
@ -162,7 +162,6 @@ class AutoMLState:
|
|||||||
def _compute_with_config_base(self,
|
def _compute_with_config_base(self,
|
||||||
estimator,
|
estimator,
|
||||||
config_w_resource):
|
config_w_resource):
|
||||||
compute_start_time = time.time()
|
|
||||||
if 'FLAML_sample_size' in config_w_resource:
|
if 'FLAML_sample_size' in config_w_resource:
|
||||||
sample_size = int(config_w_resource['FLAML_sample_size'])
|
sample_size = int(config_w_resource['FLAML_sample_size'])
|
||||||
else:
|
else:
|
||||||
@ -181,14 +180,14 @@ class AutoMLState:
|
|||||||
budget = time_left if sample_size == self.data_size else \
|
budget = time_left if sample_size == self.data_size else \
|
||||||
time_left / 2 * sample_size / self.data_size
|
time_left / 2 * sample_size / self.data_size
|
||||||
|
|
||||||
trained_estimator, val_loss, train_loss, time2eval, _ = \
|
trained_estimator, val_loss, train_loss, time2eval, pred_time = \
|
||||||
compute_estimator(
|
compute_estimator(
|
||||||
sampled_X_train,
|
sampled_X_train,
|
||||||
sampled_y_train,
|
sampled_y_train,
|
||||||
self.X_val,
|
self.X_val,
|
||||||
self.y_val,
|
self.y_val,
|
||||||
self.weight_val,
|
self.weight_val,
|
||||||
budget,
|
min(budget, self.train_time_limit),
|
||||||
self.kf,
|
self.kf,
|
||||||
config,
|
config,
|
||||||
self.task,
|
self.task,
|
||||||
@ -201,7 +200,7 @@ class AutoMLState:
|
|||||||
self.log_training_metric,
|
self.log_training_metric,
|
||||||
self.fit_kwargs)
|
self.fit_kwargs)
|
||||||
result = {
|
result = {
|
||||||
'total_time': time.time() - compute_start_time,
|
'pred_time': pred_time,
|
||||||
'time2eval': time2eval,
|
'time2eval': time2eval,
|
||||||
'train_loss': train_loss,
|
'train_loss': train_loss,
|
||||||
'val_loss': val_loss,
|
'val_loss': val_loss,
|
||||||
@ -799,6 +798,8 @@ class AutoML:
|
|||||||
n_splits=N_SPLITS,
|
n_splits=N_SPLITS,
|
||||||
log_training_metric=False,
|
log_training_metric=False,
|
||||||
mem_thres=MEM_THRES,
|
mem_thres=MEM_THRES,
|
||||||
|
pred_time_limit=np.inf,
|
||||||
|
train_time_limit=np.inf,
|
||||||
X_val=None,
|
X_val=None,
|
||||||
y_val=None,
|
y_val=None,
|
||||||
sample_weight_val=None,
|
sample_weight_val=None,
|
||||||
@ -813,7 +814,7 @@ class AutoML:
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_train: A numpy array or a pandas dataframe of training data in
|
X_train: A numpy array or a pandas dataframe of training data in
|
||||||
shape (n, m)
|
shape (n, m)
|
||||||
y_train: A numpy array or a pandas series of labels in shape (n,)
|
y_train: A numpy array or a pandas series of labels in shape (n,)
|
||||||
dataframe: A dataframe of training data including label column
|
dataframe: A dataframe of training data including label column
|
||||||
label: A str of the label column name
|
label: A str of the label column name
|
||||||
@ -835,7 +836,7 @@ class AutoML:
|
|||||||
return metric_to_minimize, metrics_to_log
|
return metric_to_minimize, metrics_to_log
|
||||||
|
|
||||||
which returns a float number as the minimization objective,
|
which returns a float number as the minimization objective,
|
||||||
and a tuple of floats as the metrics to log
|
and a tuple of floats or a dictionary as the metrics to log
|
||||||
task: A string of the task type, e.g.,
|
task: A string of the task type, e.g.,
|
||||||
'classification', 'regression'
|
'classification', 'regression'
|
||||||
n_jobs: An integer of the number of threads for training
|
n_jobs: An integer of the number of threads for training
|
||||||
@ -865,6 +866,8 @@ class AutoML:
|
|||||||
log_training_metric: A boolean of whether to log the training
|
log_training_metric: A boolean of whether to log the training
|
||||||
metric for each model.
|
metric for each model.
|
||||||
mem_thres: A float of the memory size constraint in bytes
|
mem_thres: A float of the memory size constraint in bytes
|
||||||
|
pred_time_limit: A float of the prediction latency constraint in seconds
|
||||||
|
train_time_limit: A float of the training time constraint in seconds
|
||||||
X_val: None or a numpy array or a pandas dataframe of validation data
|
X_val: None or a numpy array or a pandas dataframe of validation data
|
||||||
y_val: None or a numpy array or a pandas series of validation labels
|
y_val: None or a numpy array or a pandas series of validation labels
|
||||||
sample_weight_val: None or a numpy array of the sample weight of
|
sample_weight_val: None or a numpy array of the sample weight of
|
||||||
@ -955,6 +958,8 @@ class AutoML:
|
|||||||
self._ensemble = ensemble
|
self._ensemble = ensemble
|
||||||
self._max_iter = max_iter
|
self._max_iter = max_iter
|
||||||
self._mem_thres = mem_thres
|
self._mem_thres = mem_thres
|
||||||
|
self._pred_time_limit = pred_time_limit
|
||||||
|
self._state.train_time_limit = train_time_limit
|
||||||
self._log_type = log_type
|
self._log_type = log_type
|
||||||
self.split_ratio = split_ratio
|
self.split_ratio = split_ratio
|
||||||
self._save_model_history = model_history
|
self._save_model_history = model_history
|
||||||
@ -1047,6 +1052,10 @@ class AutoML:
|
|||||||
points_to_evaluate = [search_state.init_config]
|
points_to_evaluate = [search_state.init_config]
|
||||||
low_cost_partial_config = search_state.low_cost_partial_config
|
low_cost_partial_config = search_state.low_cost_partial_config
|
||||||
if self._hpo_method in ('bs', 'cfo', 'grid'):
|
if self._hpo_method in ('bs', 'cfo', 'grid'):
|
||||||
|
metric_constraints = []
|
||||||
|
if np.isfinite(self._pred_time_limit):
|
||||||
|
metric_constraints.append(
|
||||||
|
('pred_time', '<=', self._pred_time_limit))
|
||||||
algo = SearchAlgo(
|
algo = SearchAlgo(
|
||||||
metric='val_loss', mode='min', space=search_space,
|
metric='val_loss', mode='min', space=search_space,
|
||||||
points_to_evaluate=points_to_evaluate,
|
points_to_evaluate=points_to_evaluate,
|
||||||
@ -1055,7 +1064,10 @@ class AutoML:
|
|||||||
prune_attr=prune_attr,
|
prune_attr=prune_attr,
|
||||||
min_resource=min_resource,
|
min_resource=min_resource,
|
||||||
max_resource=max_resource,
|
max_resource=max_resource,
|
||||||
config_constraints=[(learner_class.size, '<=', self._mem_thres)]
|
config_constraints=[
|
||||||
|
(learner_class.size, '<=', self._mem_thres)
|
||||||
|
],
|
||||||
|
metric_constraints=metric_constraints,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
algo = SearchAlgo(
|
algo = SearchAlgo(
|
||||||
@ -1077,7 +1089,7 @@ class AutoML:
|
|||||||
analysis = tune.run(
|
analysis = tune.run(
|
||||||
search_state.training_function,
|
search_state.training_function,
|
||||||
search_alg=search_state.search_alg,
|
search_alg=search_state.search_alg,
|
||||||
time_budget_s=budget_left,
|
time_budget_s=min(budget_left, self._state.train_time_limit),
|
||||||
verbose=max(self.verbose - 1, 0),
|
verbose=max(self.verbose - 1, 0),
|
||||||
use_ray=False)
|
use_ray=False)
|
||||||
time_used = time.time() - start_run_time
|
time_used = time.time() - start_run_time
|
||||||
|
|||||||
52
flaml/ml.py
52
flaml/ml.py
@ -4,6 +4,7 @@
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
from joblib.externals.cloudpickle.cloudpickle import instance
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
|
from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
|
||||||
@ -127,7 +128,9 @@ def get_test_loss(
|
|||||||
start = time.time()
|
start = time.time()
|
||||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||||
if isinstance(eval_metric, str):
|
if isinstance(eval_metric, str):
|
||||||
|
pred_start = time.time()
|
||||||
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
|
||||||
|
pred_time = (time.time() - pred_start) / X_test.shape[0]
|
||||||
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
|
||||||
labels, weight_test)
|
labels, weight_test)
|
||||||
if train_loss is not False:
|
if train_loss is not False:
|
||||||
@ -136,11 +139,14 @@ def get_test_loss(
|
|||||||
eval_metric, test_pred_y,
|
eval_metric, test_pred_y,
|
||||||
y_train, labels, fit_kwargs.get('sample_weight'))
|
y_train, labels, fit_kwargs.get('sample_weight'))
|
||||||
else: # customized metric function
|
else: # customized metric function
|
||||||
test_loss, train_loss = eval_metric(
|
test_loss, metrics = eval_metric(
|
||||||
X_test, y_test, estimator, labels, X_train, y_train,
|
X_test, y_test, estimator, labels, X_train, y_train,
|
||||||
weight_test, fit_kwargs.get('sample_weight'))
|
weight_test, fit_kwargs.get('sample_weight'))
|
||||||
|
if isinstance(metrics, dict):
|
||||||
|
pred_time = metrics.get('pred_time', 0)
|
||||||
|
train_loss = metrics
|
||||||
train_time = time.time() - start
|
train_time = time.time() - start
|
||||||
return test_loss, train_time, train_loss
|
return test_loss, train_time, train_loss, pred_time
|
||||||
|
|
||||||
|
|
||||||
def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
|
def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
|
||||||
@ -154,27 +160,27 @@ def evaluate_model(
|
|||||||
fit_kwargs={}
|
fit_kwargs={}
|
||||||
):
|
):
|
||||||
if 'holdout' in eval_method:
|
if 'holdout' in eval_method:
|
||||||
val_loss, train_loss, train_time = evaluate_model_holdout(
|
val_loss, train_loss, train_time, pred_time = evaluate_model_holdout(
|
||||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget,
|
estimator, X_train, y_train, X_val, y_val, weight_val, budget,
|
||||||
task, eval_metric, best_val_loss, train_loss=train_loss,
|
task, eval_metric, train_loss=train_loss,
|
||||||
fit_kwargs=fit_kwargs)
|
fit_kwargs=fit_kwargs)
|
||||||
else:
|
else:
|
||||||
val_loss, train_loss, train_time = evaluate_model_CV(
|
val_loss, train_loss, train_time, pred_time = evaluate_model_CV(
|
||||||
estimator, X_train, y_train, budget, kf, task,
|
estimator, X_train, y_train, budget, kf, task,
|
||||||
eval_metric, best_val_loss, train_loss=train_loss,
|
eval_metric, best_val_loss, train_loss=train_loss,
|
||||||
fit_kwargs=fit_kwargs)
|
fit_kwargs=fit_kwargs)
|
||||||
return val_loss, train_loss, train_time
|
return val_loss, train_loss, train_time, pred_time
|
||||||
|
|
||||||
|
|
||||||
def evaluate_model_holdout(
|
def evaluate_model_holdout(
|
||||||
estimator, X_train, y_train, X_val, y_val,
|
estimator, X_train, y_train, X_val, y_val,
|
||||||
weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
|
weight_val, budget, task, eval_metric, train_loss=False,
|
||||||
fit_kwargs={}
|
fit_kwargs={}
|
||||||
):
|
):
|
||||||
val_loss, train_time, train_loss = get_test_loss(
|
val_loss, train_time, train_loss, pred_time = get_test_loss(
|
||||||
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
|
estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
|
||||||
task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||||
return val_loss, train_loss, train_time
|
return val_loss, train_loss, train_time, pred_time
|
||||||
|
|
||||||
|
|
||||||
def evaluate_model_CV(
|
def evaluate_model_CV(
|
||||||
@ -182,9 +188,10 @@ def evaluate_model_CV(
|
|||||||
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
|
task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
|
||||||
):
|
):
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
total_val_loss = total_train_loss = 0
|
total_val_loss = 0
|
||||||
train_time = 0
|
total_train_loss = None
|
||||||
valid_fold_num = 0
|
train_time = pred_time = 0
|
||||||
|
valid_fold_num = total_fold_num = 0
|
||||||
n = kf.get_n_splits()
|
n = kf.get_n_splits()
|
||||||
X_train_split, y_train_split = X_train_all, y_train_all
|
X_train_split, y_train_split = X_train_all, y_train_all
|
||||||
if task == 'regression':
|
if task == 'regression':
|
||||||
@ -224,23 +231,28 @@ def evaluate_model_CV(
|
|||||||
if weight is not None:
|
if weight is not None:
|
||||||
fit_kwargs['sample_weight'], weight_val = weight[
|
fit_kwargs['sample_weight'], weight_val = weight[
|
||||||
train_index], weight[val_index]
|
train_index], weight[val_index]
|
||||||
val_loss_i, train_time_i, train_loss_i = get_test_loss(
|
val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss(
|
||||||
estimator, X_train, y_train, X_val, y_val, weight_val,
|
estimator, X_train, y_train, X_val, y_val, weight_val,
|
||||||
eval_metric, task, labels, budget_per_train,
|
eval_metric, task, labels, budget_per_train,
|
||||||
train_loss=train_loss, fit_kwargs=fit_kwargs)
|
train_loss=train_loss, fit_kwargs=fit_kwargs)
|
||||||
if weight is not None:
|
if weight is not None:
|
||||||
fit_kwargs['sample_weight'] = weight
|
fit_kwargs['sample_weight'] = weight
|
||||||
valid_fold_num += 1
|
valid_fold_num += 1
|
||||||
|
total_fold_num += 1
|
||||||
total_val_loss += val_loss_i
|
total_val_loss += val_loss_i
|
||||||
if train_loss is not False:
|
if train_loss is not False:
|
||||||
if isinstance(total_train_loss, list):
|
if isinstance(total_train_loss, list):
|
||||||
total_train_loss = [
|
total_train_loss = [
|
||||||
total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
|
total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
|
||||||
elif total_train_loss != 0:
|
elif isinstance(total_train_loss, dict):
|
||||||
|
total_train_loss = {
|
||||||
|
k: total_train_loss[k] + v for k, v in train_loss_i.items()}
|
||||||
|
elif total_train_loss is not None:
|
||||||
total_train_loss += train_loss_i
|
total_train_loss += train_loss_i
|
||||||
else:
|
else:
|
||||||
total_train_loss = train_loss_i
|
total_train_loss = train_loss_i
|
||||||
train_time += train_time_i
|
train_time += train_time_i
|
||||||
|
pred_time += pred_time_i
|
||||||
if valid_fold_num == n:
|
if valid_fold_num == n:
|
||||||
val_loss_list.append(total_val_loss / valid_fold_num)
|
val_loss_list.append(total_val_loss / valid_fold_num)
|
||||||
total_val_loss = valid_fold_num = 0
|
total_val_loss = valid_fold_num = 0
|
||||||
@ -248,16 +260,20 @@ def evaluate_model_CV(
|
|||||||
val_loss_list.append(total_val_loss / valid_fold_num)
|
val_loss_list.append(total_val_loss / valid_fold_num)
|
||||||
break
|
break
|
||||||
val_loss = np.max(val_loss_list)
|
val_loss = np.max(val_loss_list)
|
||||||
|
n = total_fold_num
|
||||||
if train_loss is not False:
|
if train_loss is not False:
|
||||||
if isinstance(total_train_loss, list):
|
if isinstance(total_train_loss, list):
|
||||||
train_loss = [v / n for v in total_train_loss]
|
train_loss = [v / n for v in total_train_loss]
|
||||||
|
elif isinstance(total_train_loss, dict):
|
||||||
|
train_loss = {k: v / n for k, v in total_train_loss.items()}
|
||||||
else:
|
else:
|
||||||
train_loss = total_train_loss / n
|
train_loss = total_train_loss / n
|
||||||
|
pred_time /= n
|
||||||
budget -= time.time() - start_time
|
budget -= time.time() - start_time
|
||||||
if val_loss < best_val_loss and budget > budget_per_train:
|
if val_loss < best_val_loss and budget > budget_per_train:
|
||||||
estimator.cleanup()
|
estimator.cleanup()
|
||||||
estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
|
estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
|
||||||
return val_loss, train_loss, train_time
|
return val_loss, train_loss, train_time, pred_time
|
||||||
|
|
||||||
|
|
||||||
def compute_estimator(
|
def compute_estimator(
|
||||||
@ -266,17 +282,15 @@ def compute_estimator(
|
|||||||
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
|
||||||
fit_kwargs={}
|
fit_kwargs={}
|
||||||
):
|
):
|
||||||
start_time = time.time()
|
|
||||||
estimator_class = estimator_class or get_estimator_class(
|
estimator_class = estimator_class or get_estimator_class(
|
||||||
task, estimator_name)
|
task, estimator_name)
|
||||||
estimator = estimator_class(
|
estimator = estimator_class(
|
||||||
**config_dic, task=task, n_jobs=n_jobs)
|
**config_dic, task=task, n_jobs=n_jobs)
|
||||||
val_loss, train_loss, train_time = evaluate_model(
|
val_loss, train_loss, train_time, pred_time = evaluate_model(
|
||||||
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
|
estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
|
||||||
eval_method, eval_metric, best_val_loss, train_loss=train_loss,
|
eval_method, eval_metric, best_val_loss, train_loss=train_loss,
|
||||||
fit_kwargs=fit_kwargs)
|
fit_kwargs=fit_kwargs)
|
||||||
all_time = time.time() - start_time
|
return estimator, val_loss, train_loss, train_time, pred_time
|
||||||
return estimator, val_loss, train_loss, train_time, all_time
|
|
||||||
|
|
||||||
|
|
||||||
def train_estimator(
|
def train_estimator(
|
||||||
|
|||||||
@ -42,6 +42,7 @@ class BaseEstimator:
|
|||||||
self._task = task
|
self._task = task
|
||||||
if '_estimator_type' in params:
|
if '_estimator_type' in params:
|
||||||
self._estimator_type = params['_estimator_type']
|
self._estimator_type = params['_estimator_type']
|
||||||
|
del self.params['_estimator_type']
|
||||||
else:
|
else:
|
||||||
self._estimator_type = "regressor" if task == 'regression' \
|
self._estimator_type = "regressor" if task == 'regression' \
|
||||||
else "classifier"
|
else "classifier"
|
||||||
@ -152,7 +153,7 @@ class BaseEstimator:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def size(cls, config):
|
def size(cls, config: dict) -> float:
|
||||||
'''[optional method] memory size of the estimator in bytes
|
'''[optional method] memory size of the estimator in bytes
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@ -165,7 +166,7 @@ class BaseEstimator:
|
|||||||
return 1.0
|
return 1.0
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def cost_relative2lgbm(cls):
|
def cost_relative2lgbm(cls) -> float:
|
||||||
'''[optional method] relative cost compared to lightgbm'''
|
'''[optional method] relative cost compared to lightgbm'''
|
||||||
return 1.0
|
return 1.0
|
||||||
|
|
||||||
@ -445,7 +446,8 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
**params
|
**params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
self.params = params
|
del self.params['objective']
|
||||||
|
del self.params['max_bin']
|
||||||
self.params.update({
|
self.params.update({
|
||||||
"n_estimators": int(round(n_estimators)),
|
"n_estimators": int(round(n_estimators)),
|
||||||
'max_leaves': int(round(max_leaves)),
|
'max_leaves': int(round(max_leaves)),
|
||||||
@ -514,7 +516,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
n_estimators=4, max_features=1.0, criterion='gini', **params
|
n_estimators=4, max_features=1.0, criterion='gini', **params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
self.params = params
|
del self.params['objective']
|
||||||
|
del self.params['max_bin']
|
||||||
self.params.update({
|
self.params.update({
|
||||||
"n_estimators": int(round(n_estimators)),
|
"n_estimators": int(round(n_estimators)),
|
||||||
"n_jobs": n_jobs,
|
"n_jobs": n_jobs,
|
||||||
@ -525,8 +528,6 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
|
|||||||
else:
|
else:
|
||||||
self.estimator_class = RandomForestClassifier
|
self.estimator_class = RandomForestClassifier
|
||||||
self.params['criterion'] = criterion
|
self.params['criterion'] = criterion
|
||||||
self._time_per_iter = None
|
|
||||||
self._train_size = 0
|
|
||||||
|
|
||||||
def get_params(self, deep=False):
|
def get_params(self, deep=False):
|
||||||
params = super().get_params()
|
params = super().get_params()
|
||||||
@ -761,7 +762,6 @@ class KNeighborsEstimator(BaseEstimator):
|
|||||||
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
|
self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
|
||||||
):
|
):
|
||||||
super().__init__(task, **params)
|
super().__init__(task, **params)
|
||||||
self.params = params
|
|
||||||
self.params.update({
|
self.params.update({
|
||||||
'n_neighbors': int(round(n_neighbors)),
|
'n_neighbors': int(round(n_neighbors)),
|
||||||
'weights': params.get('weights', 'distance'),
|
'weights': params.get('weights', 'distance'),
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
__version__ = "0.5.6"
|
__version__ = "0.5.7"
|
||||||
|
|||||||
@ -95,14 +95,19 @@ class MyXGB2(XGBoostEstimator):
|
|||||||
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
|
||||||
weight_test=None, weight_train=None):
|
weight_test=None, weight_train=None):
|
||||||
from sklearn.metrics import log_loss
|
from sklearn.metrics import log_loss
|
||||||
|
import time
|
||||||
|
start = time.time()
|
||||||
y_pred = estimator.predict_proba(X_test)
|
y_pred = estimator.predict_proba(X_test)
|
||||||
|
pred_time = (time.time() - start) / len(X_test)
|
||||||
test_loss = log_loss(y_test, y_pred, labels=labels,
|
test_loss = log_loss(y_test, y_pred, labels=labels,
|
||||||
sample_weight=weight_test)
|
sample_weight=weight_test)
|
||||||
y_pred = estimator.predict_proba(X_train)
|
y_pred = estimator.predict_proba(X_train)
|
||||||
train_loss = log_loss(y_train, y_pred, labels=labels,
|
train_loss = log_loss(y_train, y_pred, labels=labels,
|
||||||
sample_weight=weight_train)
|
sample_weight=weight_train)
|
||||||
alpha = 0.5
|
alpha = 0.5
|
||||||
return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]
|
return test_loss * (1 + alpha) - alpha * train_loss, {
|
||||||
|
"test_loss": test_loss, "train_loss": train_loss, "pred_time": pred_time
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class TestAutoML(unittest.TestCase):
|
class TestAutoML(unittest.TestCase):
|
||||||
@ -133,8 +138,8 @@ class TestAutoML(unittest.TestCase):
|
|||||||
learner_class=MyRegularizedGreedyForest)
|
learner_class=MyRegularizedGreedyForest)
|
||||||
X_train, y_train = load_wine(return_X_y=True)
|
X_train, y_train = load_wine(return_X_y=True)
|
||||||
settings = {
|
settings = {
|
||||||
"time_budget": 10, # total running time in seconds
|
"time_budget": 5, # total running time in seconds
|
||||||
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
|
"estimator_list": ['rf', 'xgboost', 'catboost'],
|
||||||
"task": 'classification', # task type
|
"task": 'classification', # task type
|
||||||
"sample": True, # whether to subsample training data
|
"sample": True, # whether to subsample training data
|
||||||
"log_file_name": "test/wine.log",
|
"log_file_name": "test/wine.log",
|
||||||
@ -163,6 +168,7 @@ class TestAutoML(unittest.TestCase):
|
|||||||
"n_jobs": 1,
|
"n_jobs": 1,
|
||||||
"model_history": True,
|
"model_history": True,
|
||||||
"sample_weight": np.ones(len(y_train)),
|
"sample_weight": np.ones(len(y_train)),
|
||||||
|
"pred_time_limit": 1e-5,
|
||||||
}
|
}
|
||||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||||
**automl_settings)
|
**automl_settings)
|
||||||
|
|||||||
@ -26,7 +26,8 @@ class TestTrainingLog(unittest.TestCase):
|
|||||||
"mem_thres": 1024 * 1024,
|
"mem_thres": 1024 * 1024,
|
||||||
"n_jobs": 1,
|
"n_jobs": 1,
|
||||||
"model_history": True,
|
"model_history": True,
|
||||||
"verbose": 2,
|
"train_time_limit": 0.01,
|
||||||
|
"verbose": 3,
|
||||||
}
|
}
|
||||||
X_train, y_train = load_boston(return_X_y=True)
|
X_train, y_train = load_boston(return_X_y=True)
|
||||||
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
automl_experiment.fit(X_train=X_train, y_train=y_train,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user