no retraining when max_iter=0 and not retrain_full

This commit is contained in:
Chi Wang 2021-11-06 11:37:57 -07:00
parent 0d9439212f
commit c4d5986ee8
2 changed files with 43 additions and 42 deletions

View File

@ -310,7 +310,7 @@ def size(state: AutoMLState, config: dict) -> float:
class AutoML:
"""The AutoML class
"""The AutoML class.
Example:
@ -358,10 +358,10 @@ class AutoML:
return self.__dict__.get("_trained_estimator")
def best_model_for_estimator(self, estimator_name):
"""Return the best model found for a particular estimator
"""Return the best model found for a particular estimator.
Args:
estimator_name: a str of the estimator's name
estimator_name: a str of the estimator's name.
Returns:
An object with `predict()` and `predict_proba()` method (for
@ -397,7 +397,7 @@ class AutoML:
@property
def best_loss(self):
"""A float of the best loss found"""
"""A float of the best loss found."""
return self._state.best_loss
@property
@ -420,7 +420,7 @@ class AutoML:
@property
def time_to_find_best_model(self) -> float:
"""Time taken to find best model in seconds"""
"""Time taken to find best model in seconds."""
return self.__dict__.get("_time_taken_best_iter")
def predict(self, X_test):
@ -849,26 +849,26 @@ class AutoML:
)
def add_learner(self, learner_name, learner_class):
"""Add a customized learner
"""Add a customized learner.
Args:
learner_name: A string of the learner's name
learner_class: A subclass of flaml.model.BaseEstimator
learner_name: A string of the learner's name.
learner_class: A subclass of flaml.model.BaseEstimator.
"""
self._state.learner_classes[learner_name] = learner_class
def get_estimator_from_log(self, log_file_name, record_id, task):
"""Get the estimator from log file
"""Get the estimator from log file.
Args:
log_file_name: A string of the log file name
log_file_name: A string of the log file name.
record_id: An integer of the record ID in the file,
0 corresponds to the first trial
0 corresponds to the first trial.
task: A string of the task type,
'binary', 'multi', 'regression', 'ts_forecast', 'rank'
'binary', 'multi', 'regression', 'ts_forecast', 'rank'.
Returns:
An estimator object for the given configuration
An estimator object for the given configuration.
"""
with training_log_reader(log_file_name) as reader:
@ -907,7 +907,7 @@ class AutoML:
auto_augment=True,
**fit_kwargs,
):
"""Retrain from log file
"""Retrain from log file.
Args:
log_file_name: A string of the log file name.
@ -1077,11 +1077,13 @@ class AutoML:
@property
def search_space(self) -> dict:
"""Search space
Must be called after fit(...) (use max_iter=0 to prevent actual fitting)
"""Search space.
Must be called after fit(...)
(use max_iter=0 and retrain_final=False to prevent actual fitting).
Returns:
A dict of the search space
A dict of the search space.
"""
estimator_list = self.estimator_list
if len(estimator_list) == 1:
@ -1098,7 +1100,7 @@ class AutoML:
@property
def low_cost_partial_config(self) -> dict:
"""Low cost partial config
"""Low cost partial config.
Returns:
A dict.
@ -1109,7 +1111,6 @@ class AutoML:
to each learner's low_cost_partial_config; the estimator index as
an integer corresponding to the cheapest learner is appended to the
list at the end.
"""
if len(self.estimator_list) == 1:
estimator = self.estimator_list[0]
@ -1143,7 +1144,6 @@ class AutoML:
a list of the cat_hp_cost's as the value, corresponding
to each learner's cat_hp_cost; the cost relative to lgbm for each
learner (as a list itself) is appended to the list at the end.
"""
if len(self.estimator_list) == 1:
estimator = self.estimator_list[0]
@ -1195,28 +1195,28 @@ class AutoML:
@property
def min_resource(self) -> Optional[float]:
"""Attribute for pruning
"""Attribute for pruning.
Returns:
A float for the minimal sample size or None
A float for the minimal sample size or None.
"""
return self._min_sample_size if self._sample else None
@property
def max_resource(self) -> Optional[float]:
"""Attribute for pruning
"""Attribute for pruning.
Returns:
A float for the maximal sample size or None
A float for the maximal sample size or None.
"""
return self._state.data_size if self._sample else None
@property
def trainable(self) -> Callable[[dict], Optional[float]]:
"""Training function
"""Training function.
Returns:
A function that evaluates each config and returns the loss
A function that evaluates each config and returns the loss.
"""
self._state.time_from_start = 0
for estimator in self.estimator_list:
@ -1252,10 +1252,10 @@ class AutoML:
@property
def metric_constraints(self) -> list:
"""Metric constraints
"""Metric constraints.
Returns:
A list of the metric constraints
A list of the metric constraints.
"""
constraints = []
if np.isfinite(self._pred_time_limit):
@ -1307,7 +1307,7 @@ class AutoML:
use_ray=False,
**fit_kwargs,
):
"""Find a model for a given task
"""Find a model for a given task.
Args:
X_train: A numpy array or a pandas dataframe of training data in
@ -1496,6 +1496,7 @@ class AutoML:
and eval_method == "holdout"
and self._state.X_val is None
or eval_method == "cv"
and (max_iter > 0 or retrain_full is True)
or max_iter == 1
)
self._auto_augment = auto_augment

View File

@ -81,7 +81,7 @@ def sklearn_metric_loss_score(
sample_weight=None,
groups=None,
):
"""Loss using the specified metric
"""Loss using the specified metric.
Args:
metric_name: A string of the metric name, one of
@ -484,15 +484,15 @@ def get_classification_objective(num_labels: int) -> str:
def norm_confusion_matrix(y_true, y_pred):
"""normalized confusion matrix
"""normalized confusion matrix.
Args:
estimator: A multi-class classification estimator
y_true: A numpy array or a pandas series of true labels
y_pred: A numpy array or a pandas series of predicted labels
estimator: A multi-class classification estimator.
y_true: A numpy array or a pandas series of true labels.
y_pred: A numpy array or a pandas series of predicted labels.
Returns:
A normalized confusion matrix
A normalized confusion matrix.
"""
from sklearn.metrics import confusion_matrix
@ -502,19 +502,19 @@ def norm_confusion_matrix(y_true, y_pred):
def multi_class_curves(y_true, y_pred_proba, curve_func):
"""Binarize the data for multi-class tasks and produce ROC or precision-recall curves
"""Binarize the data for multi-class tasks and produce ROC or precision-recall curves.
Args:
y_true: A numpy array or a pandas series of true labels
y_pred_proba: A numpy array or a pandas dataframe of predicted probabilites
curve_func: A function to produce a curve (e.g., roc_curve or precision_recall_curve)
y_true: A numpy array or a pandas series of true labels.
y_pred_proba: A numpy array or a pandas dataframe of predicted probabilites.
curve_func: A function to produce a curve (e.g., roc_curve or precision_recall_curve).
Returns:
A tuple of two dictionaries with the same set of keys (class indices)
A tuple of two dictionaries with the same set of keys (class indices).
The first dictionary curve_x stores the x coordinates of each curve, e.g.,
curve_x[0] is an 1D array of the x coordinates of class 0
curve_x[0] is an 1D array of the x coordinates of class 0.
The second dictionary curve_y stores the y coordinates of each curve, e.g.,
curve_y[0] is an 1D array of the y coordinates of class 0
curve_y[0] is an 1D array of the y coordinates of class 0.
"""
from sklearn.preprocessing import label_binarize