mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-21 14:13:58 +00:00
125 lines
3.6 KiB
Python
125 lines
3.6 KiB
Python
![]() |
from flaml.tune.spark.utils import broadcast_code
|
||
|
|
||
|
custom_code = """
|
||
|
from flaml import tune
|
||
|
from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
|
||
|
from flaml.automl.data import CLASSIFICATION, get_output_from_log
|
||
|
|
||
|
class MyRegularizedGreedyForest(SKLearnEstimator):
|
||
|
def __init__(self, task="binary", **config):
|
||
|
|
||
|
super().__init__(task, **config)
|
||
|
|
||
|
if task in CLASSIFICATION:
|
||
|
from rgf.sklearn import RGFClassifier
|
||
|
|
||
|
self.estimator_class = RGFClassifier
|
||
|
else:
|
||
|
from rgf.sklearn import RGFRegressor
|
||
|
|
||
|
self.estimator_class = RGFRegressor
|
||
|
|
||
|
@classmethod
|
||
|
def search_space(cls, data_size, task):
|
||
|
space = {
|
||
|
"max_leaf": {
|
||
|
"domain": tune.lograndint(lower=4, upper=data_size[0]),
|
||
|
"init_value": 4,
|
||
|
},
|
||
|
"n_iter": {
|
||
|
"domain": tune.lograndint(lower=1, upper=data_size[0]),
|
||
|
"init_value": 1,
|
||
|
},
|
||
|
"n_tree_search": {
|
||
|
"domain": tune.lograndint(lower=1, upper=32768),
|
||
|
"init_value": 1,
|
||
|
},
|
||
|
"opt_interval": {
|
||
|
"domain": tune.lograndint(lower=1, upper=10000),
|
||
|
"init_value": 100,
|
||
|
},
|
||
|
"learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)},
|
||
|
"min_samples_leaf": {
|
||
|
"domain": tune.lograndint(lower=1, upper=20),
|
||
|
"init_value": 20,
|
||
|
},
|
||
|
}
|
||
|
return space
|
||
|
|
||
|
@classmethod
|
||
|
def size(cls, config):
|
||
|
max_leaves = int(round(config.get("max_leaf", 1)))
|
||
|
n_estimators = int(round(config.get("n_iter", 1)))
|
||
|
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
||
|
|
||
|
@classmethod
|
||
|
def cost_relative2lgbm(cls):
|
||
|
return 1.0
|
||
|
|
||
|
|
||
|
class MyLargeXGB(XGBoostSklearnEstimator):
|
||
|
@classmethod
|
||
|
def search_space(cls, **params):
|
||
|
return {
|
||
|
"n_estimators": {
|
||
|
"domain": tune.lograndint(lower=4, upper=32768),
|
||
|
"init_value": 32768,
|
||
|
"low_cost_init_value": 4,
|
||
|
},
|
||
|
"max_leaves": {
|
||
|
"domain": tune.lograndint(lower=4, upper=3276),
|
||
|
"init_value": 3276,
|
||
|
"low_cost_init_value": 4,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
|
||
|
class MyLargeLGBM(LGBMEstimator):
|
||
|
@classmethod
|
||
|
def search_space(cls, **params):
|
||
|
return {
|
||
|
"n_estimators": {
|
||
|
"domain": tune.lograndint(lower=4, upper=32768),
|
||
|
"init_value": 32768,
|
||
|
"low_cost_init_value": 4,
|
||
|
},
|
||
|
"num_leaves": {
|
||
|
"domain": tune.lograndint(lower=4, upper=3276),
|
||
|
"init_value": 3276,
|
||
|
"low_cost_init_value": 4,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
|
||
|
def custom_metric(
|
||
|
X_val,
|
||
|
y_val,
|
||
|
estimator,
|
||
|
labels,
|
||
|
X_train,
|
||
|
y_train,
|
||
|
weight_val=None,
|
||
|
weight_train=None,
|
||
|
config=None,
|
||
|
groups_val=None,
|
||
|
groups_train=None,
|
||
|
):
|
||
|
from sklearn.metrics import log_loss
|
||
|
import time
|
||
|
|
||
|
start = time.time()
|
||
|
y_pred = estimator.predict_proba(X_val)
|
||
|
pred_time = (time.time() - start) / len(X_val)
|
||
|
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
||
|
y_pred = estimator.predict_proba(X_train)
|
||
|
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
||
|
alpha = 0.5
|
||
|
return val_loss * (1 + alpha) - alpha * train_loss, {
|
||
|
"val_loss": val_loss,
|
||
|
"train_loss": train_loss,
|
||
|
"pred_time": pred_time,
|
||
|
}
|
||
|
"""
|
||
|
|
||
|
_ = broadcast_code(custom_code=custom_code)
|