mirror of
https://github.com/microsoft/autogen.git
synced 2026-01-06 12:10:58 +00:00
use_best_model for catboost (#679)
* use_best_model for catboost * bump version to 1.0.11
This commit is contained in:
parent
3d1a28bfc0
commit
dffa802b3e
@ -1626,15 +1626,26 @@ class CatBoostEstimator(BaseEstimator):
|
||||
cat_features = list(X_train.select_dtypes(include="category").columns)
|
||||
else:
|
||||
cat_features = []
|
||||
n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
|
||||
use_best_model = kwargs.get("use_best_model", True)
|
||||
n = (
|
||||
max(int(len(y_train) * 0.9), len(y_train) - 1000)
|
||||
if use_best_model
|
||||
else len(y_train)
|
||||
)
|
||||
X_tr, y_tr = X_train[:n], y_train[:n]
|
||||
from catboost import Pool, __version__
|
||||
|
||||
eval_set = (
|
||||
Pool(data=X_train[n:], label=y_train[n:], cat_features=cat_features)
|
||||
if use_best_model
|
||||
else None
|
||||
)
|
||||
if "sample_weight" in kwargs:
|
||||
weight = kwargs["sample_weight"]
|
||||
if weight is not None:
|
||||
kwargs["sample_weight"] = weight[:n]
|
||||
else:
|
||||
weight = None
|
||||
from catboost import Pool, __version__
|
||||
|
||||
model = self.estimator_class(train_dir=train_dir, **self.params)
|
||||
if __version__ >= "0.26":
|
||||
@ -1642,10 +1653,10 @@ class CatBoostEstimator(BaseEstimator):
|
||||
X_tr,
|
||||
y_tr,
|
||||
cat_features=cat_features,
|
||||
eval_set=Pool(
|
||||
data=X_train[n:], label=y_train[n:], cat_features=cat_features
|
||||
eval_set=eval_set,
|
||||
callbacks=CatBoostEstimator._callbacks(
|
||||
start_time, deadline, FREE_MEM_RATIO if use_best_model else None
|
||||
),
|
||||
callbacks=CatBoostEstimator._callbacks(start_time, deadline),
|
||||
**kwargs,
|
||||
)
|
||||
else:
|
||||
@ -1653,9 +1664,7 @@ class CatBoostEstimator(BaseEstimator):
|
||||
X_tr,
|
||||
y_tr,
|
||||
cat_features=cat_features,
|
||||
eval_set=Pool(
|
||||
data=X_train[n:], label=y_train[n:], cat_features=cat_features
|
||||
),
|
||||
eval_set=eval_set,
|
||||
**kwargs,
|
||||
)
|
||||
shutil.rmtree(train_dir, ignore_errors=True)
|
||||
@ -1667,7 +1676,7 @@ class CatBoostEstimator(BaseEstimator):
|
||||
return train_time
|
||||
|
||||
@classmethod
|
||||
def _callbacks(cls, start_time, deadline):
|
||||
def _callbacks(cls, start_time, deadline, free_mem_ratio):
|
||||
class ResourceLimit:
|
||||
def after_iteration(self, info) -> bool:
|
||||
now = time.time()
|
||||
@ -1675,9 +1684,9 @@ class CatBoostEstimator(BaseEstimator):
|
||||
self._time_per_iter = now - start_time
|
||||
if now + self._time_per_iter > deadline:
|
||||
return False
|
||||
if psutil is not None:
|
||||
if psutil is not None and free_mem_ratio is not None:
|
||||
mem = psutil.virtual_memory()
|
||||
if mem.available / mem.total < FREE_MEM_RATIO:
|
||||
if mem.available / mem.total < free_mem_ratio:
|
||||
return False
|
||||
return True # can continue
|
||||
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__ = "1.0.10"
|
||||
__version__ = "1.0.11"
|
||||
|
||||
@ -98,8 +98,8 @@ class TestRegression(unittest.TestCase):
|
||||
y_train = np.random.uniform(size=300)
|
||||
X_val = scipy.sparse.random(100, 900, density=0.0001)
|
||||
y_val = np.random.uniform(size=100)
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
automl = AutoML()
|
||||
settings = {
|
||||
"time_budget": 2,
|
||||
"metric": "mae",
|
||||
"task": "regression",
|
||||
@ -110,23 +110,34 @@ class TestRegression(unittest.TestCase):
|
||||
"verbose": 0,
|
||||
"early_stop": True,
|
||||
}
|
||||
automl_experiment.fit(
|
||||
X_train=X_train,
|
||||
y_train=y_train,
|
||||
X_val=X_val,
|
||||
y_val=y_val,
|
||||
**automl_settings
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
|
||||
)
|
||||
assert automl._state.X_val.shape == X_val.shape
|
||||
print(automl.predict(X_train))
|
||||
print(automl.model)
|
||||
print(automl.config_history)
|
||||
print(automl.best_model_for_estimator("rf"))
|
||||
print(automl.best_iteration)
|
||||
print(automl.best_estimator)
|
||||
print(automl.best_config)
|
||||
print(automl.best_loss)
|
||||
print(automl.best_config_train_time)
|
||||
|
||||
settings.update(
|
||||
{
|
||||
"estimator_list": ["catboost"],
|
||||
"keep_search_state": False,
|
||||
"model_history": False,
|
||||
"use_best_model": False,
|
||||
"time_budget": None,
|
||||
"max_iter": 2,
|
||||
"custom_hp": {"catboost": {"n_estimators": {"domain": 100}}},
|
||||
}
|
||||
)
|
||||
automl.fit(
|
||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **settings
|
||||
)
|
||||
assert automl_experiment._state.X_val.shape == X_val.shape
|
||||
print(automl_experiment.predict(X_train))
|
||||
print(automl_experiment.model)
|
||||
print(automl_experiment.config_history)
|
||||
print(automl_experiment.best_model_for_estimator("rf"))
|
||||
print(automl_experiment.best_iteration)
|
||||
print(automl_experiment.best_estimator)
|
||||
print(automl_experiment.best_config)
|
||||
print(automl_experiment.best_loss)
|
||||
print(automl_experiment.best_config_train_time)
|
||||
|
||||
def test_parallel(self, hpo_method=None):
|
||||
automl_experiment = AutoML()
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user