autogen/test/tune.py

from flaml import tune
from flaml.model import LGBMEstimator
import lightgbm
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error

X, y = fetch_california_housing(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42
)


def train_lgbm(config: dict) -> dict:
    # convert config dict to lgbm params
    params = LGBMEstimator(**config).params
    # train the model
    train_set = lightgbm.Dataset(X_train, y_train)
    model = lightgbm.train(params, train_set)
    # evaluate the model
    pred = model.predict(X_test)
    mse = mean_squared_error(y_test, pred)
    # return eval results as a dictionary
    return {"mse": mse}


# load a built-in search space from flaml
flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)
# specify the search space as a dict from hp name to domain; you can define your own search space same way
config_search_space = {
    hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()
}
# give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}
low_cost_partial_config = {
    hp: space["low_cost_init_value"]
    for hp, space in flaml_lgbm_search_space.items()
    if "low_cost_init_value" in space
}
# run the tuning, minimizing mse, with total time budget 3 seconds
analysis = tune.run(
    train_lgbm,
    metric="mse",
    mode="min",
    config=config_search_space,
    low_cost_partial_config=low_cost_partial_config,
    time_budget_s=3,
    num_samples=-1,
)
Deploy a new doc website (#338) A new documentation website. And: * add actions for doc * update docstr * installation instructions for doc dev * unify README and Getting Started * rename notebook * doc about best_model_for_estimator #340 * docstr for keep_search_state #340 * DNN Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Z.sk <shaokunzhang@psu.edu> 2021-12-16 17:11:33 -08:00			`from flaml import tune`
			`from flaml.model import LGBMEstimator`
			`import lightgbm`
			`from sklearn.model_selection import train_test_split`
			`from sklearn.datasets import fetch_california_housing`
			`from sklearn.metrics import mean_squared_error`

			`X, y = fetch_california_housing(return_X_y=True)`
			`X_train, X_test, y_train, y_test = train_test_split(`
			`X, y, test_size=0.33, random_state=42`
			`)`


			`def train_lgbm(config: dict) -> dict:`
			`# convert config dict to lgbm params`
			`params = LGBMEstimator(**config).params`
			`# train the model`
			`train_set = lightgbm.Dataset(X_train, y_train)`
Simplify lgbm example (#358) * simplify lgbm examples * provide link to lgbm example script. * simply lgbm example in the example script. Co-authored-by: Chi Wang <wang.chi@microsoft.com> 2021-12-23 23:05:14 -08:00			`model = lightgbm.train(params, train_set)`
Deploy a new doc website (#338) A new documentation website. And: * add actions for doc * update docstr * installation instructions for doc dev * unify README and Getting Started * rename notebook * doc about best_model_for_estimator #340 * docstr for keep_search_state #340 * DNN Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: Z.sk <shaokunzhang@psu.edu> 2021-12-16 17:11:33 -08:00			`# evaluate the model`
			`pred = model.predict(X_test)`
			`mse = mean_squared_error(y_test, pred)`
			`# return eval results as a dictionary`
			`return {"mse": mse}`


			`# load a built-in search space from flaml`
			`flaml_lgbm_search_space = LGBMEstimator.search_space(X_train.shape)`
			`# specify the search space as a dict from hp name to domain; you can define your own search space same way`
			`config_search_space = {`
			`hp: space["domain"] for hp, space in flaml_lgbm_search_space.items()`
			`}`
			`# give guidance about hp values corresponding to low training cost, i.e., {"n_estimators": 4, "num_leaves": 4}`
			`low_cost_partial_config = {`
			`hp: space["low_cost_init_value"]`
			`for hp, space in flaml_lgbm_search_space.items()`
			`if "low_cost_init_value" in space`
			`}`
			`# run the tuning, minimizing mse, with total time budget 3 seconds`
			`analysis = tune.run(`
			`train_lgbm,`
			`metric="mse",`
			`mode="min",`
			`config=config_search_space,`
			`low_cost_partial_config=low_cost_partial_config,`
			`time_budget_s=3,`
			`num_samples=-1,`
			`)`