mirror of
https://github.com/microsoft/autogen.git
synced 2025-07-25 01:41:01 +00:00

* v0.2.2 separate the HPO part into the module flaml.tune enhanced implementation of FLOW^2, CFO and BlendSearch support parallel tuning using ray tune add support for sample_weight and generic fit arguments enable mlflow logging Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
201 lines
6.9 KiB
Python
201 lines
6.9 KiB
Python
import unittest
|
|
import os
|
|
import time
|
|
from sklearn.model_selection import train_test_split
|
|
import sklearn.metrics
|
|
import sklearn.datasets
|
|
try:
|
|
from ray.tune.integration.xgboost import TuneReportCheckpointCallback
|
|
except ImportError:
|
|
print("skip test_tune because ray tune cannot be imported.")
|
|
import xgboost as xgb
|
|
|
|
import logging
|
|
logger = logging.getLogger(__name__)
|
|
logger.addHandler(logging.FileHandler('test/tune_xgboost.log'))
|
|
|
|
|
|
def train_breast_cancer(config: dict):
|
|
# This is a simple training function to be passed into Tune
|
|
# Load dataset
|
|
data, labels = sklearn.datasets.load_breast_cancer(return_X_y=True)
|
|
# Split into train and test set
|
|
train_x, test_x, train_y, test_y = train_test_split(
|
|
data, labels, test_size=0.25)
|
|
# Build input matrices for XGBoost
|
|
train_set = xgb.DMatrix(train_x, label=train_y)
|
|
test_set = xgb.DMatrix(test_x, label=test_y)
|
|
# HyperOpt returns a tuple
|
|
config = config.copy()
|
|
config["eval_metric"] = ["logloss", "error"]
|
|
config["objective"] = "binary:logistic"
|
|
# Train the classifier, using the Tune callback
|
|
xgb.train(
|
|
config,
|
|
train_set,
|
|
evals=[(test_set, "eval")],
|
|
verbose_eval=False,
|
|
callbacks=[TuneReportCheckpointCallback(filename="model.xgb")])
|
|
|
|
|
|
def _test_xgboost(method='BlendSearch'):
|
|
try:
|
|
import ray
|
|
except ImportError:
|
|
return
|
|
if method == 'BlendSearch':
|
|
from flaml import tune
|
|
else:
|
|
from ray import tune
|
|
search_space = {
|
|
# You can mix constants with search space objects.
|
|
"max_depth": tune.randint(1, 8) if method in [
|
|
"BlendSearch", "BOHB", "Optuna"] else tune.randint(1, 9),
|
|
"min_child_weight": tune.choice([1, 2, 3]),
|
|
"subsample": tune.uniform(0.5, 1.0),
|
|
"eta": tune.loguniform(1e-4, 1e-1)
|
|
}
|
|
max_iter = 10
|
|
for num_samples in [256]:
|
|
time_budget_s = None
|
|
for n_cpu in [8]:
|
|
start_time = time.time()
|
|
ray.init(num_cpus=n_cpu, num_gpus=0)
|
|
if method == 'BlendSearch':
|
|
analysis = tune.run(
|
|
train_breast_cancer,
|
|
init_config={
|
|
"max_depth": 1,
|
|
"min_child_weight": 3,
|
|
},
|
|
cat_hp_cost={
|
|
"min_child_weight": [6, 3, 2],
|
|
},
|
|
metric="eval-logloss",
|
|
mode="min",
|
|
max_resource=max_iter,
|
|
min_resource=1,
|
|
report_intermediate_result=True,
|
|
# You can add "gpu": 0.1 to allocate GPUs
|
|
resources_per_trial={"cpu": 1},
|
|
config=search_space,
|
|
local_dir='logs/',
|
|
num_samples=num_samples*n_cpu,
|
|
time_budget_s=time_budget_s,
|
|
use_ray=True)
|
|
else:
|
|
if 'ASHA' == method:
|
|
algo = None
|
|
elif 'BOHB' == method:
|
|
from ray.tune.schedulers import HyperBandForBOHB
|
|
from ray.tune.suggest.bohb import TuneBOHB
|
|
algo = TuneBOHB(max_concurrent=n_cpu)
|
|
scheduler = HyperBandForBOHB(max_t=max_iter)
|
|
elif 'Optuna' == method:
|
|
from ray.tune.suggest.optuna import OptunaSearch
|
|
algo = OptunaSearch()
|
|
elif 'CFO' == method:
|
|
from flaml import CFO
|
|
algo = CFO(points_to_evaluate=[{
|
|
"max_depth": 1,
|
|
"min_child_weight": 3,
|
|
}], cat_hp_cost={
|
|
"min_child_weight": [6, 3, 2],
|
|
})
|
|
elif 'Dragonfly' == method:
|
|
from ray.tune.suggest.dragonfly import DragonflySearch
|
|
algo = DragonflySearch()
|
|
elif 'SkOpt' == method:
|
|
from ray.tune.suggest.skopt import SkOptSearch
|
|
algo = SkOptSearch()
|
|
elif 'Nevergrad' == method:
|
|
from ray.tune.suggest.nevergrad import NevergradSearch
|
|
import nevergrad as ng
|
|
algo = NevergradSearch(optimizer=ng.optimizers.OnePlusOne)
|
|
elif 'ZOOpt' == method:
|
|
from ray.tune.suggest.zoopt import ZOOptSearch
|
|
algo = ZOOptSearch(budget=num_samples*n_cpu)
|
|
elif 'Ax' == method:
|
|
from ray.tune.suggest.ax import AxSearch
|
|
algo = AxSearch()
|
|
elif 'HyperOpt' == method:
|
|
from ray.tune.suggest.hyperopt import HyperOptSearch
|
|
algo = HyperOptSearch()
|
|
scheduler = None
|
|
if method != 'BOHB':
|
|
from ray.tune.schedulers import ASHAScheduler
|
|
scheduler = ASHAScheduler(
|
|
max_t=max_iter,
|
|
grace_period=1)
|
|
analysis = tune.run(
|
|
train_breast_cancer,
|
|
metric="eval-logloss",
|
|
mode="min",
|
|
# You can add "gpu": 0.1 to allocate GPUs
|
|
resources_per_trial={"cpu": 1},
|
|
config=search_space, local_dir='logs/',
|
|
num_samples=num_samples*n_cpu, time_budget_s=time_budget_s,
|
|
scheduler=scheduler, search_alg=algo)
|
|
ray.shutdown()
|
|
# # Load the best model checkpoint
|
|
# best_bst = xgb.Booster()
|
|
# best_bst.load_model(os.path.join(analysis.best_checkpoint,
|
|
# "model.xgb"))
|
|
best_trial = analysis.get_best_trial("eval-logloss","min","all")
|
|
accuracy = 1. - best_trial.metric_analysis["eval-error"]["min"]
|
|
logloss = best_trial.metric_analysis["eval-logloss"]["min"]
|
|
logger.info(f"method={method}")
|
|
logger.info(f"n_samples={num_samples*n_cpu}")
|
|
logger.info(f"time={time.time()-start_time}")
|
|
logger.info(f"Best model eval loss: {logloss:.4f}")
|
|
logger.info(f"Best model total accuracy: {accuracy:.4f}")
|
|
logger.info(f"Best model parameters: {best_trial.config}")
|
|
|
|
|
|
def test_xgboost_bs():
|
|
_test_xgboost()
|
|
|
|
|
|
def test_xgboost_cfo():
|
|
_test_xgboost('CFO')
|
|
|
|
|
|
def _test_xgboost_dragonfly():
|
|
_test_xgboost('Dragonfly')
|
|
|
|
|
|
def _test_xgboost_skopt():
|
|
_test_xgboost('SkOpt')
|
|
|
|
|
|
def _test_xgboost_nevergrad():
|
|
_test_xgboost('Nevergrad')
|
|
|
|
|
|
def _test_xgboost_zoopt():
|
|
_test_xgboost('ZOOpt')
|
|
|
|
|
|
def _test_xgboost_ax():
|
|
_test_xgboost('Ax')
|
|
|
|
|
|
def __test_xgboost_hyperopt():
|
|
_test_xgboost('HyperOpt')
|
|
|
|
|
|
def _test_xgboost_optuna():
|
|
_test_xgboost('Optuna')
|
|
|
|
|
|
def _test_xgboost_asha():
|
|
_test_xgboost('ASHA')
|
|
|
|
|
|
def _test_xgboost_bohb():
|
|
_test_xgboost('BOHB')
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|