mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-26 08:34:10 +00:00
n_estimators for catboost
This commit is contained in:
parent
9e9356f436
commit
7d6e860102
@ -89,6 +89,7 @@ class SearchState:
|
||||
and starting_point.get(name) is not None
|
||||
):
|
||||
self.init_config[name] = starting_point[name]
|
||||
|
||||
if isinstance(starting_point, list):
|
||||
self.init_config = starting_point
|
||||
self._hp_names = list(self._search_space_domain.keys())
|
||||
@ -104,7 +105,6 @@ class SearchState:
|
||||
self.trained_estimator = None
|
||||
self.sample_size = None
|
||||
self.trial_time = 0
|
||||
self.best_n_iter = None
|
||||
|
||||
def update(self, result, time_used, save_model_history=False):
|
||||
if result:
|
||||
@ -122,13 +122,12 @@ class SearchState:
|
||||
if (
|
||||
n_iter is not None
|
||||
and "n_estimators" in config
|
||||
and n_iter >= self._search_space_domain["n_estimators"].lower
|
||||
# and n_iter >= self._search_space_domain["n_estimators"].lower
|
||||
):
|
||||
config["n_estimators"] = n_iter
|
||||
n_iter = None
|
||||
else:
|
||||
obj, time2eval, trained_estimator = np.inf, 0.0, None
|
||||
metric_for_logging = config = n_iter = None
|
||||
metric_for_logging = config = None
|
||||
self.trial_time = time2eval
|
||||
self.total_time_used += time_used
|
||||
self.total_iter += 1
|
||||
@ -156,10 +155,8 @@ class SearchState:
|
||||
self.trained_estimator.cleanup()
|
||||
if trained_estimator:
|
||||
self.trained_estimator = trained_estimator
|
||||
self.best_n_iter = n_iter
|
||||
self.metric_for_logging = metric_for_logging
|
||||
self.val_loss, self.config = obj, config
|
||||
self.n_iter = n_iter
|
||||
|
||||
def get_hist_config_sig(self, sample_size, config):
|
||||
config_values = tuple([config[k] for k in self._hp_names])
|
||||
@ -262,9 +259,7 @@ class AutoMLState:
|
||||
# tune.report(**result)
|
||||
return result
|
||||
|
||||
def _train_with_config(
|
||||
self, estimator, config_w_resource, sample_size=None, n_iter=None
|
||||
):
|
||||
def _train_with_config(self, estimator, config_w_resource, sample_size=None):
|
||||
if not sample_size:
|
||||
sample_size = config_w_resource.get(
|
||||
"FLAML_sample_size", len(self.y_train_all)
|
||||
@ -301,7 +296,6 @@ class AutoMLState:
|
||||
self.n_jobs,
|
||||
self.learner_classes.get(estimator),
|
||||
budget,
|
||||
n_iter,
|
||||
self.fit_kwargs,
|
||||
)
|
||||
if sampled_weight is not None:
|
||||
@ -1030,7 +1024,7 @@ class AutoML:
|
||||
self._state.time_budget = None
|
||||
self._state.n_jobs = n_jobs
|
||||
self._trained_estimator = self._state._train_with_config(
|
||||
best_estimator, best_config, sample_size, best.n_iter
|
||||
best_estimator, best_config, sample_size
|
||||
)[0]
|
||||
logger.info("retrain from log succeeded")
|
||||
return training_duration
|
||||
@ -1731,7 +1725,6 @@ class AutoML:
|
||||
config,
|
||||
estimator,
|
||||
search_state.sample_size,
|
||||
search_state.n_iter,
|
||||
)
|
||||
|
||||
def _search_sequential(self):
|
||||
@ -1953,7 +1946,6 @@ class AutoML:
|
||||
search_state.config,
|
||||
estimator,
|
||||
search_state.sample_size,
|
||||
search_state.n_iter,
|
||||
)
|
||||
if mlflow is not None and mlflow.active_run():
|
||||
with mlflow.start_run(nested=True):
|
||||
@ -2031,7 +2023,6 @@ class AutoML:
|
||||
self._best_estimator,
|
||||
state.best_config,
|
||||
self.data_size_full,
|
||||
state.best_n_iter,
|
||||
)
|
||||
logger.info(
|
||||
"retrain {} for {:.1f}s".format(self._best_estimator, retrain_time)
|
||||
@ -2144,7 +2135,6 @@ class AutoML:
|
||||
self._best_estimator,
|
||||
state.best_config,
|
||||
self.data_size_full,
|
||||
state.best_n_iter,
|
||||
)
|
||||
logger.info(
|
||||
"retrain {} for {:.1f}s".format(
|
||||
|
@ -465,14 +465,11 @@ def train_estimator(
|
||||
n_jobs=1,
|
||||
estimator_class=None,
|
||||
budget=None,
|
||||
n_iter=None,
|
||||
fit_kwargs={},
|
||||
):
|
||||
start_time = time.time()
|
||||
estimator_class = estimator_class or get_estimator_class(task, estimator_name)
|
||||
estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
|
||||
if n_iter is not None:
|
||||
estimator.params["n_estimators"] = n_iter
|
||||
if X_train is not None:
|
||||
train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
|
||||
else:
|
||||
|
@ -645,11 +645,15 @@ class CatBoostEstimator(BaseEstimator):
|
||||
"domain": tune.loguniform(lower=0.005, upper=0.2),
|
||||
"init_value": 0.1,
|
||||
},
|
||||
"n_estimators": {
|
||||
"domain": 8192,
|
||||
"init_value": 8192,
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def size(cls, config):
|
||||
n_estimators = 8192
|
||||
n_estimators = config.get("n_estimators", 8192)
|
||||
max_leaves = 64
|
||||
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||||
|
||||
|
@ -23,7 +23,6 @@ class TrainingLogRecord(object):
|
||||
config: dict,
|
||||
learner: str,
|
||||
sample_size: int,
|
||||
n_iter: int,
|
||||
):
|
||||
self.record_id = record_id
|
||||
self.iter_per_learner = iter_per_learner
|
||||
@ -34,7 +33,6 @@ class TrainingLogRecord(object):
|
||||
self.config = config
|
||||
self.learner = learner
|
||||
self.sample_size = sample_size
|
||||
self.n_iter = n_iter # n_estimators for catboost
|
||||
|
||||
def dump(self, fp: IO[str]):
|
||||
d = vars(self)
|
||||
@ -79,7 +77,6 @@ class TrainingLogWriter(object):
|
||||
config,
|
||||
learner,
|
||||
sample_size,
|
||||
n_iter,
|
||||
):
|
||||
if self.file is None:
|
||||
raise IOError("Call open() to open the outpute file first.")
|
||||
@ -95,7 +92,6 @@ class TrainingLogWriter(object):
|
||||
config,
|
||||
learner,
|
||||
sample_size,
|
||||
n_iter,
|
||||
)
|
||||
if (
|
||||
validation_loss < self.current_best_loss
|
||||
|
@ -9,7 +9,7 @@ from flaml.training_log import training_log_reader
|
||||
|
||||
|
||||
class TestTrainingLog(unittest.TestCase):
|
||||
def test_training_log(self, path="test_training_log.log"):
|
||||
def test_training_log(self, path="test_training_log.log", estimator_list="auto"):
|
||||
|
||||
with TemporaryDirectory() as d:
|
||||
filename = os.path.join(d, path)
|
||||
@ -27,8 +27,9 @@ class TestTrainingLog(unittest.TestCase):
|
||||
"model_history": True,
|
||||
"train_time_limit": 0.1,
|
||||
"verbose": 3,
|
||||
"ensemble": True,
|
||||
# "ensemble": True,
|
||||
"keep_search_state": True,
|
||||
"estimator_list": estimator_list,
|
||||
}
|
||||
X_train, y_train = fetch_california_housing(return_X_y=True)
|
||||
automl.fit(X_train=X_train, y_train=y_train, **automl_settings)
|
||||
@ -37,31 +38,34 @@ class TestTrainingLog(unittest.TestCase):
|
||||
if automl.best_estimator:
|
||||
estimator, config = automl.best_estimator, automl.best_config
|
||||
model0 = automl.best_model_for_estimator(estimator)
|
||||
print(model0.params["n_estimators"], model0.estimator)
|
||||
print(model0.params["n_estimators"], config)
|
||||
|
||||
# train on full data with no time limit
|
||||
automl._state.time_budget = None
|
||||
model, _ = automl._state._train_with_config(estimator, config)
|
||||
print(model.estimator)
|
||||
# model0 and model are equivalent unless model0's n_estimator is out of search space range
|
||||
assert (
|
||||
str(model0.estimator) == str(model.estimator)
|
||||
or model0.params["n_estimators"] < 4
|
||||
)
|
||||
|
||||
# assuming estimator & config are saved and loaded as follows
|
||||
automl = AutoML()
|
||||
automl.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
max_iter=0,
|
||||
max_iter=1,
|
||||
task="regression",
|
||||
estimator_list=[estimator],
|
||||
n_jobs=1,
|
||||
starting_points={estimator: config},
|
||||
)
|
||||
print(automl.best_config)
|
||||
# then the fitted model should be equivalent to model
|
||||
# print(str(model.estimator), str(automl.model.estimator))
|
||||
assert str(model.estimator) == str(automl.model.estimator)
|
||||
assert (
|
||||
str(model.estimator) == str(automl.model.estimator)
|
||||
or estimator == "xgboost"
|
||||
and str(model.estimator.get_dump())
|
||||
== str(automl.model.estimator.get_dump())
|
||||
or estimator == "catboost"
|
||||
and str(model.estimator.get_all_params())
|
||||
== str(automl.model.estimator.get_all_params())
|
||||
)
|
||||
|
||||
with training_log_reader(filename) as reader:
|
||||
count = 0
|
||||
@ -83,3 +87,10 @@ class TestTrainingLog(unittest.TestCase):
|
||||
print("IsADirectoryError happens as expected in linux.")
|
||||
except PermissionError:
|
||||
print("PermissionError happens as expected in windows.")
|
||||
|
||||
def test_each_estimator(self):
|
||||
self.test_training_log(estimator_list=["xgboost"])
|
||||
self.test_training_log(estimator_list=["catboost"])
|
||||
self.test_training_log(estimator_list=["extra_tree"])
|
||||
self.test_training_log(estimator_list=["rf"])
|
||||
self.test_training_log(estimator_list=["lgbm"])
|
||||
|
Loading…
x
Reference in New Issue
Block a user