from flaml.tune.spark.utils import broadcast_code custom_code = """ from flaml import tune import time from flaml.automl.model import LGBMEstimator, XGBoostSklearnEstimator, SKLearnEstimator from flaml.automl.data import get_output_from_log from flaml.automl.task.task import CLASSIFICATION class MyRegularizedGreedyForest(SKLearnEstimator): def __init__(self, task="binary", **config): super().__init__(task, **config) if isinstance(task, str): from flaml.automl.task.factory import task_factory task = task_factory(task) if task.is_classification(): from rgf.sklearn import RGFClassifier self.estimator_class = RGFClassifier else: from rgf.sklearn import RGFRegressor self.estimator_class = RGFRegressor @classmethod def search_space(cls, data_size, task): space = { "max_leaf": { "domain": tune.lograndint(lower=4, upper=data_size[0]), "init_value": 4, }, "n_iter": { "domain": tune.lograndint(lower=1, upper=data_size[0]), "init_value": 1, }, "n_tree_search": { "domain": tune.lograndint(lower=1, upper=32768), "init_value": 1, }, "opt_interval": { "domain": tune.lograndint(lower=1, upper=10000), "init_value": 100, }, "learning_rate": {"domain": tune.loguniform(lower=0.01, upper=20.0)}, "min_samples_leaf": { "domain": tune.lograndint(lower=1, upper=20), "init_value": 20, }, } return space @classmethod def size(cls, config): max_leaves = int(round(config.get("max_leaf", 1))) n_estimators = int(round(config.get("n_iter", 1))) return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8 @classmethod def cost_relative2lgbm(cls): return 1.0 class MyLargeXGB(XGBoostSklearnEstimator): @classmethod def search_space(cls, **params): return { "n_estimators": { "domain": tune.lograndint(lower=4, upper=32768), "init_value": 32768, "low_cost_init_value": 4, }, "max_leaves": { "domain": tune.lograndint(lower=4, upper=3276), "init_value": 3276, "low_cost_init_value": 4, }, } class MyLargeLGBM(LGBMEstimator): @classmethod def search_space(cls, **params): return { "n_estimators": { "domain": tune.lograndint(lower=4, upper=32768), "init_value": 32768, "low_cost_init_value": 4, }, "num_leaves": { "domain": tune.lograndint(lower=4, upper=3276), "init_value": 3276, "low_cost_init_value": 4, }, } def custom_metric( X_val, y_val, estimator, labels, X_train, y_train, weight_val=None, weight_train=None, config=None, groups_val=None, groups_train=None, ): from sklearn.metrics import log_loss import time start = time.time() y_pred = estimator.predict_proba(X_val) pred_time = (time.time() - start) / len(X_val) val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val) y_pred = estimator.predict_proba(X_train) train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train) alpha = 0.5 return val_loss * (1 + alpha) - alpha * train_loss, { "val_loss": val_loss, "train_loss": train_loss, "pred_time": pred_time, } def lazy_metric( X_val, y_val, estimator, labels, X_train, y_train, weight_val=None, weight_train=None, config=None, groups_val=None, groups_train=None, ): from sklearn.metrics import log_loss time.sleep(2) start = time.time() y_pred = estimator.predict_proba(X_val) pred_time = (time.time() - start) / len(X_val) val_loss = log_loss(y_val, y_pred, labels=labels, sample_weight=weight_val) y_pred = estimator.predict_proba(X_train) train_loss = log_loss(y_train, y_pred, labels=labels, sample_weight=weight_train) alpha = 0.5 return val_loss * (1 + alpha) - alpha * train_loss, { "val_loss": val_loss, "train_loss": train_loss, "pred_time": pred_time, } """ _ = broadcast_code(custom_code=custom_code)