mirror of
https://github.com/microsoft/autogen.git
synced 2025-09-19 21:24:10 +00:00

* Added spark support for parallel training. * Added tests and fixed a bug * Added more tests and updated docs * Updated setup.py and docs * Added customize_learner and tests * Update spark tests and setup.py * Update docs and verbose * Update logging, fix issue in cloud notebook * Update github workflow for spark tests * Update github workflow * Remove hack of handling _choice_ * Allow for failures * Fix tests, update docs * Update setup.py * Update Dockerfile for Spark * Update tests, remove some warnings * Add test for notebooks, update utils * Add performance test for Spark * Fix lru_cache maxsize * Fix test failures on some platforms * Fix coverage report failure * resovle PR comments * resovle PR comments 2nd round * resovle PR comments 3rd round * fix lint and rename test class * resovle PR comments 4th round * refactor customize_learner to broadcast_code
125 lines
3.6 KiB
Python
125 lines
3.6 KiB
Python
from flaml.tune.spark.utils import broadcast_code
|
|
|
|
custom_code = """
|
|
from flaml import tune
|
|
from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator
|
|
from flaml.automl.data import CLASSIFICATION, get_output_from_log
|
|
|
|
class MyRegularizedGreedyForest(SKLearnEstimator):
|
|
def __init__(self, task="binary", **config):
|
|
|
|
super().__init__(task, **config)
|
|
|
|
if task in CLASSIFICATION:
|
|
from rgf.sklearn import RGFClassifier
|
|
|
|
self.estimator_class = RGFClassifier
|
|
else:
|
|
from rgf.sklearn import RGFRegressor
|
|
|
|
self.estimator_class = RGFRegressor
|
|
|
|
@classmethod
|
|
def search_space(cls, data_size, task):
|
|
space = {
|
|
"max_leaf": {
|
|
"domain": tune.lograndint(lower=4, upper=data_size[0]),
|
|
"init_value": 4,
|
|
},
|
|
"n_iter": {
|
|
"domain": tune.lograndint(lower=1, upper=data_size[0]),
|
|
"init_value": 1,
|
|
},
|
|
"n_tree_search": {
|
|
"domain": tune.lograndint(lower=1, upper=32768),
|
|
"init_value": 1,
|
|
},
|
|
"opt_interval": {
|
|
"domain": tune.lograndint(lower=1, upper=10000),
|
|
"init_value": 100,
|
|
},
|
|
"learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)},
|
|
"min_samples_leaf": {
|
|
"domain": tune.lograndint(lower=1, upper=20),
|
|
"init_value": 20,
|
|
},
|
|
}
|
|
return space
|
|
|
|
@classmethod
|
|
def size(cls, config):
|
|
max_leaves = int(round(config.get("max_leaf", 1)))
|
|
n_estimators = int(round(config.get("n_iter", 1)))
|
|
return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
|
|
|
|
@classmethod
|
|
def cost_relative2lgbm(cls):
|
|
return 1.0
|
|
|
|
|
|
class MyLargeXGB(XGBoostSklearnEstimator):
|
|
@classmethod
|
|
def search_space(cls, **params):
|
|
return {
|
|
"n_estimators": {
|
|
"domain": tune.lograndint(lower=4, upper=32768),
|
|
"init_value": 32768,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
"max_leaves": {
|
|
"domain": tune.lograndint(lower=4, upper=3276),
|
|
"init_value": 3276,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
}
|
|
|
|
|
|
class MyLargeLGBM(LGBMEstimator):
|
|
@classmethod
|
|
def search_space(cls, **params):
|
|
return {
|
|
"n_estimators": {
|
|
"domain": tune.lograndint(lower=4, upper=32768),
|
|
"init_value": 32768,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
"num_leaves": {
|
|
"domain": tune.lograndint(lower=4, upper=3276),
|
|
"init_value": 3276,
|
|
"low_cost_init_value": 4,
|
|
},
|
|
}
|
|
|
|
|
|
def custom_metric(
|
|
X_val,
|
|
y_val,
|
|
estimator,
|
|
labels,
|
|
X_train,
|
|
y_train,
|
|
weight_val=None,
|
|
weight_train=None,
|
|
config=None,
|
|
groups_val=None,
|
|
groups_train=None,
|
|
):
|
|
from sklearn.metrics import log_loss
|
|
import time
|
|
|
|
start = time.time()
|
|
y_pred = estimator.predict_proba(X_val)
|
|
pred_time = (time.time() - start) / len(X_val)
|
|
val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val)
|
|
y_pred = estimator.predict_proba(X_train)
|
|
train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train)
|
|
alpha = 0.5
|
|
return val_loss * (1 + alpha) - alpha * train_loss, {
|
|
"val_loss": val_loss,
|
|
"train_loss": train_loss,
|
|
"pred_time": pred_time,
|
|
}
|
|
"""
|
|
|
|
_ = broadcast_code(custom_code=custom_code)
|