mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-21 22:23:44 +00:00

* merging * clean commit * Delete mylearner.py This file is not needed. * fix py4j import error * more tolerant cancelling time * fix problems following suggestions * Update flaml/tune/spark/utils.py Co-authored-by: Li Jiang <bnujli@gmail.com> * remove redundant model * Update test/spark/custom_mylearner.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> * add docstr * reverse change in gitignore * Update test/spark/custom_mylearner.py Co-authored-by: Chi Wang <wang.chi@microsoft.com> --------- Co-authored-by: Li Jiang <bnujli@gmail.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
156 lines
4.4 KiB
Python
156 lines
4.4 KiB
Python
from flaml.tune.spark.utils import broadcast_code
|
|
|
|
custom_code = """
|
|
from flaml import tune
|
|
import time
|
|
from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
|
|
from flaml.automl.data import CLASSIFICATION, get_output_from_log
|
|
|
|
class MyRegularizedGreedyForest(SKLearnEstimator):
|
|
def __init__(self, task="binary", **config):
|
|
|
|
super().__init__(task, **config)
|
|
|
|
if task in CLASSIFICATION:
|
|
from rgf.sklearn import RGFClassifier
|
|
|
|
self.estimator_class = RGFClassifier
|
|
else:
|
|
from rgf.sklearn import RGFRegressor
|
|
|
|
self.estimator_class = RGFRegressor
|
|
|
|
@classmethod
|
|
def search_space(cls, data_size, task):
|
|
space = {
|
|
"max_leaf": {
|
|
"domain": tune.lograndint(lower=4, upper=data_size[0]),
|
|
"init_value": 4,
|
|
},
|
|
"n_iter": {
|
|
"domain": tune.lograndint(lower=1, upper=data_size[0]),
|
|
"init_value": 1,
|
|
},
|
|
"n_tree_search": {
|
|
"domain": tune.lograndint(lower=1, upper=32768),
|
|
"init_value": 1,
|
|
},
|
|
"opt_interval": {
|
|
"domain": tune.lograndint(lower=1, upper=10000),
|
|
"init_value": 100,
|
|
},
|
|
"learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)},
|
|
"min_samples_leaf": {
|
|
"domain": tune.lograndint(lower=1, upper=20),
|
|
"init_value": 20,
|
|
},
|
|
}
|
|
return space
|
|
|
|
@classmethod
|
|
def size(cls, config):
|
|
max_leaves = int(round(config.get("max_leaf", 1)))
|
|
n_estimators = int(round(config.get("n_iter", 1)))
|
|
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
|
|
|
@classmethod
|
|
def cost_relative2lgbm(cls):
|
|
return 1.0
|
|
|
|
|
|
class MyLargeXGB(XGBoostSklearnEstimator):
|
|
@classmethod
|
|
def search_space(cls, **params):
|
|
return {
|
|
"n_estimators": {
|
|
"domain": tune.lograndint(lower=4, upper=32768),
|
|
"init_value": 32768,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
"max_leaves": {
|
|
"domain": tune.lograndint(lower=4, upper=3276),
|
|
"init_value": 3276,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
}
|
|
|
|
|
|
class MyLargeLGBM(LGBMEstimator):
|
|
@classmethod
|
|
def search_space(cls, **params):
|
|
return {
|
|
"n_estimators": {
|
|
"domain": tune.lograndint(lower=4, upper=32768),
|
|
"init_value": 32768,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
"num_leaves": {
|
|
"domain": tune.lograndint(lower=4, upper=3276),
|
|
"init_value": 3276,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
}
|
|
|
|
|
|
|
|
def custom_metric(
|
|
X_val,
|
|
y_val,
|
|
estimator,
|
|
labels,
|
|
X_train,
|
|
y_train,
|
|
weight_val=None,
|
|
weight_train=None,
|
|
config=None,
|
|
groups_val=None,
|
|
groups_train=None,
|
|
):
|
|
from sklearn.metrics import log_loss
|
|
import time
|
|
|
|
start = time.time()
|
|
y_pred = estimator.predict_proba(X_val)
|
|
pred_time = (time.time() - start) / len(X_val)
|
|
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
|
y_pred = estimator.predict_proba(X_train)
|
|
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
|
alpha = 0.5
|
|
return val_loss * (1 + alpha) - alpha * train_loss, {
|
|
"val_loss": val_loss,
|
|
"train_loss": train_loss,
|
|
"pred_time": pred_time,
|
|
}
|
|
|
|
def lazy_metric(
|
|
X_val,
|
|
y_val,
|
|
estimator,
|
|
labels,
|
|
X_train,
|
|
y_train,
|
|
weight_val=None,
|
|
weight_train=None,
|
|
config=None,
|
|
groups_val=None,
|
|
groups_train=None,
|
|
):
|
|
from sklearn.metrics import log_loss
|
|
|
|
time.sleep(2)
|
|
start = time.time()
|
|
y_pred = estimator.predict_proba(X_val)
|
|
pred_time = (time.time() - start) / len(X_val)
|
|
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
|
y_pred = estimator.predict_proba(X_train)
|
|
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
|
alpha = 0.5
|
|
return val_loss * (1 + alpha) - alpha * train_loss, {
|
|
"val_loss": val_loss,
|
|
"train_loss": train_loss,
|
|
"pred_time": pred_time,
|
|
}
|
|
"""
|
|
|
|
_ = broadcast_code(custom_code=custom_code)
|