mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-11-04 11:49:45 +00:00 
			
		
		
		
	add max_depth to xgboost search space (#282)
* add max_depth to xgboost search space * notebook update * two learners for xgboost (max_depth or max_leaves)
This commit is contained in:
		
							parent
							
								
									d937b03e42
								
							
						
					
					
						commit
						ea6d28d7bd
					
				@ -79,7 +79,9 @@ class SearchState:
 | 
			
		||||
        self.learner_class = learner_class
 | 
			
		||||
        search_space = learner_class.search_space(data_size=data_size, task=task)
 | 
			
		||||
        for name, space in search_space.items():
 | 
			
		||||
            assert "domain" in space
 | 
			
		||||
            assert (
 | 
			
		||||
                "domain" in space
 | 
			
		||||
            ), f"{name}'s domain is missing in the search space spec {space}"
 | 
			
		||||
            self._search_space_domain[name] = space["domain"]
 | 
			
		||||
            if "init_value" in space:
 | 
			
		||||
                self.init_config[name] = space["init_value"]
 | 
			
		||||
@ -434,7 +436,7 @@ class AutoML(BaseEstimator):
 | 
			
		||||
 | 
			
		||||
                .. code-block:: python
 | 
			
		||||
 | 
			
		||||
                    ['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
 | 
			
		||||
                    ['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']
 | 
			
		||||
 | 
			
		||||
            time_budget: A float number of the time budget in seconds.
 | 
			
		||||
                Use -1 if no time limit.
 | 
			
		||||
@ -1659,7 +1661,7 @@ class AutoML(BaseEstimator):
 | 
			
		||||
 | 
			
		||||
                .. code-block:: python
 | 
			
		||||
 | 
			
		||||
                    ['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
 | 
			
		||||
                    ['lgbm', 'xgboost', 'xgb_limitdepth', 'catboost', 'rf', 'extra_tree']
 | 
			
		||||
 | 
			
		||||
            time_budget: A float number of the time budget in seconds.
 | 
			
		||||
                Use -1 if no time limit.
 | 
			
		||||
@ -1939,16 +1941,29 @@ class AutoML(BaseEstimator):
 | 
			
		||||
                except ImportError:
 | 
			
		||||
                    estimator_list = ["arima", "sarimax"]
 | 
			
		||||
            elif self._state.task == "rank":
 | 
			
		||||
                estimator_list = ["lgbm", "xgboost"]
 | 
			
		||||
                estimator_list = ["lgbm", "xgboost", "xgb_limitdepth"]
 | 
			
		||||
            elif _is_nlp_task(self._state.task):
 | 
			
		||||
                estimator_list = ["transformer"]
 | 
			
		||||
            else:
 | 
			
		||||
                try:
 | 
			
		||||
                    import catboost
 | 
			
		||||
 | 
			
		||||
                    estimator_list = ["lgbm", "rf", "catboost", "xgboost", "extra_tree"]
 | 
			
		||||
                    estimator_list = [
 | 
			
		||||
                        "lgbm",
 | 
			
		||||
                        "rf",
 | 
			
		||||
                        "catboost",
 | 
			
		||||
                        "xgboost",
 | 
			
		||||
                        "extra_tree",
 | 
			
		||||
                        "xgb_limitdepth",
 | 
			
		||||
                    ]
 | 
			
		||||
                except ImportError:
 | 
			
		||||
                    estimator_list = ["lgbm", "rf", "xgboost", "extra_tree"]
 | 
			
		||||
                    estimator_list = [
 | 
			
		||||
                        "lgbm",
 | 
			
		||||
                        "rf",
 | 
			
		||||
                        "xgboost",
 | 
			
		||||
                        "extra_tree",
 | 
			
		||||
                        "xgb_limitdepth",
 | 
			
		||||
                    ]
 | 
			
		||||
                if "regression" != self._state.task:
 | 
			
		||||
                    estimator_list += ["lrl1"]
 | 
			
		||||
        for estimator_name in estimator_list:
 | 
			
		||||
 | 
			
		||||
@ -20,6 +20,7 @@ from sklearn.metrics import (
 | 
			
		||||
from sklearn.model_selection import RepeatedStratifiedKFold, GroupKFold, TimeSeriesSplit
 | 
			
		||||
from .model import (
 | 
			
		||||
    XGBoostSklearnEstimator,
 | 
			
		||||
    XGBoostLimitDepthEstimator,
 | 
			
		||||
    RandomForestEstimator,
 | 
			
		||||
    LGBMEstimator,
 | 
			
		||||
    LRL1Classifier,
 | 
			
		||||
@ -42,6 +43,8 @@ def get_estimator_class(task, estimator_name):
 | 
			
		||||
    # when adding a new learner, need to add an elif branch
 | 
			
		||||
    if "xgboost" == estimator_name:
 | 
			
		||||
        estimator_class = XGBoostSklearnEstimator
 | 
			
		||||
    elif "xgb_limitdepth" == estimator_name:
 | 
			
		||||
        estimator_class = XGBoostLimitDepthEstimator
 | 
			
		||||
    elif "rf" == estimator_name:
 | 
			
		||||
        estimator_class = RandomForestEstimator
 | 
			
		||||
    elif "lgbm" == estimator_name:
 | 
			
		||||
 | 
			
		||||
@ -625,7 +625,13 @@ class LGBMEstimator(BaseEstimator):
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def size(cls, config):
 | 
			
		||||
        num_leaves = int(round(config.get("num_leaves") or config["max_leaves"]))
 | 
			
		||||
        num_leaves = int(
 | 
			
		||||
            round(
 | 
			
		||||
                config.get("num_leaves")
 | 
			
		||||
                or config.get("max_leaves")
 | 
			
		||||
                or 1 << config["max_depth"]
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
        n_estimators = int(round(config["n_estimators"]))
 | 
			
		||||
        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
 | 
			
		||||
 | 
			
		||||
@ -794,6 +800,10 @@ class XGBoostEstimator(SKLearnEstimator):
 | 
			
		||||
                "init_value": 4,
 | 
			
		||||
                "low_cost_init_value": 4,
 | 
			
		||||
            },
 | 
			
		||||
            "max_depth": {
 | 
			
		||||
                "domain": tune.choice([0, 6, 12]),
 | 
			
		||||
                "init_value": 0,
 | 
			
		||||
            },
 | 
			
		||||
            "min_child_weight": {
 | 
			
		||||
                "domain": tune.loguniform(lower=0.001, upper=128),
 | 
			
		||||
                "init_value": 1,
 | 
			
		||||
@ -834,11 +844,12 @@ class XGBoostEstimator(SKLearnEstimator):
 | 
			
		||||
 | 
			
		||||
    def config2params(cls, config: dict) -> dict:
 | 
			
		||||
        params = config.copy()
 | 
			
		||||
        params["max_depth"] = params.get("max_depth", 0)
 | 
			
		||||
        params["grow_policy"] = params.get("grow_policy", "lossguide")
 | 
			
		||||
        params["booster"] = params.get("booster", "gbtree")
 | 
			
		||||
        max_depth = params["max_depth"] = params.get("max_depth", 0)
 | 
			
		||||
        if max_depth == 0:
 | 
			
		||||
            params["grow_policy"] = params.get("grow_policy", "lossguide")
 | 
			
		||||
            params["tree_method"] = params.get("tree_method", "hist")
 | 
			
		||||
        # params["booster"] = params.get("booster", "gbtree")
 | 
			
		||||
        params["use_label_encoder"] = params.get("use_label_encoder", False)
 | 
			
		||||
        params["tree_method"] = params.get("tree_method", "hist")
 | 
			
		||||
        if "n_jobs" in config:
 | 
			
		||||
            params["nthread"] = params.pop("n_jobs")
 | 
			
		||||
        return params
 | 
			
		||||
@ -923,24 +934,25 @@ class XGBoostEstimator(SKLearnEstimator):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
 | 
			
		||||
    """The class for tuning XGBoost (for classification), using sklearn API."""
 | 
			
		||||
    """The class for tuning XGBoost with unlimited depth, using sklearn API."""
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def search_space(cls, data_size, **params):
 | 
			
		||||
        return XGBoostEstimator.search_space(data_size)
 | 
			
		||||
        space = XGBoostEstimator.search_space(data_size)
 | 
			
		||||
        space.pop("max_depth")
 | 
			
		||||
        return space
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def cost_relative2lgbm(cls):
 | 
			
		||||
        return XGBoostEstimator.cost_relative2lgbm()
 | 
			
		||||
 | 
			
		||||
    def config2params(cls, config: dict) -> dict:
 | 
			
		||||
        # TODO: test
 | 
			
		||||
        params = config.copy()
 | 
			
		||||
        params["max_depth"] = 0
 | 
			
		||||
        params["grow_policy"] = params.get("grow_policy", "lossguide")
 | 
			
		||||
        params["booster"] = params.get("booster", "gbtree")
 | 
			
		||||
        max_depth = params["max_depth"] = params.get("max_depth", 0)
 | 
			
		||||
        if max_depth == 0:
 | 
			
		||||
            params["grow_policy"] = params.get("grow_policy", "lossguide")
 | 
			
		||||
            params["tree_method"] = params.get("tree_method", "hist")
 | 
			
		||||
        params["use_label_encoder"] = params.get("use_label_encoder", False)
 | 
			
		||||
        params["tree_method"] = params.get("tree_method", "hist")
 | 
			
		||||
        return params
 | 
			
		||||
 | 
			
		||||
    def __init__(
 | 
			
		||||
@ -968,6 +980,28 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
 | 
			
		||||
        return XGBoostEstimator._callbacks(start_time, deadline)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class XGBoostLimitDepthEstimator(XGBoostSklearnEstimator):
 | 
			
		||||
    """The class for tuning XGBoost with limited depth, using sklearn API."""
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def search_space(cls, data_size, **params):
 | 
			
		||||
        space = XGBoostEstimator.search_space(data_size)
 | 
			
		||||
        space.pop("max_leaves")
 | 
			
		||||
        upper = max(6, int(np.log2(data_size)))
 | 
			
		||||
        space["max_depth"] = {
 | 
			
		||||
            "domain": tune.randint(lower=1, upper=min(upper, 16)),
 | 
			
		||||
            "init_value": 6,
 | 
			
		||||
            "low_cost_init_value": 1,
 | 
			
		||||
        }
 | 
			
		||||
        space["learning_rate"]["init_value"] = 0.3
 | 
			
		||||
        space["n_estimators"]["init_value"] = 10
 | 
			
		||||
        return space
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def cost_relative2lgbm(cls):
 | 
			
		||||
        return 64
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
 | 
			
		||||
    """The class for tuning Random Forest."""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -129,11 +129,11 @@ class FLOW2(Searcher):
 | 
			
		||||
                    sampler = sampler.get_sampler()
 | 
			
		||||
                    if str(sampler) == "Uniform":
 | 
			
		||||
                        self._step_lb = min(
 | 
			
		||||
                            self._step_lb, q / (domain.upper - domain.lower)
 | 
			
		||||
                            self._step_lb, q / (domain.upper - domain.lower + 1)
 | 
			
		||||
                        )
 | 
			
		||||
                elif isinstance(domain, sample.Integer) and str(sampler) == "Uniform":
 | 
			
		||||
                    self._step_lb = min(
 | 
			
		||||
                        self._step_lb, 1.0 / (domain.upper - 1 - domain.lower)
 | 
			
		||||
                        self._step_lb, 1.0 / (domain.upper - domain.lower)
 | 
			
		||||
                    )
 | 
			
		||||
                if isinstance(domain, sample.Categorical):
 | 
			
		||||
                    if not domain.ordered:
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user