autogen/flaml/default/estimator.py

import sklearn.ensemble as ensemble
from functools import wraps
from flaml.automl.data import CLASSIFICATION
from .suggest import preprocess_and_suggest_hyperparams

DEFAULT_LOCATION = "default_location"


def flamlize_estimator(super_class, name: str, task: str, alternatives=None):
    """Enhance an estimator class with flaml's data-dependent default hyperparameter settings.

    Example:

    ```python
    import sklearn.ensemble as ensemble
    RandomForestRegressor = flamlize_estimator(
        ensemble.RandomForestRegressor, "rf", "regression"
    )
    ```

    Args:
        super_class: an scikit-learn compatible estimator class.
        name: a str of the estimator's name.
        task: a str of the task type.
        alternatives: (Optional) a list for alternative estimator names. For example,
            ```[("max_depth", 0, "xgboost")]``` means if the "max_depth" is set to 0
            in the constructor, then look for the learned defaults for estimator "xgboost".
    """

    class EstimatorClass(super_class):
        """**Enhanced with flaml's data-dependent default hyperparameter settings.**"""

        @wraps(super_class.__init__)
        def __init__(self, **params):
            if DEFAULT_LOCATION in params:
                self._default_location = params.pop(DEFAULT_LOCATION)
            else:
                self._default_location = None
            self._params = params
            super().__init__(**params)

        @classmethod
        @wraps(super_class._get_param_names)
        def _get_param_names(cls):
            return super_class._get_param_names()

        def suggest_hyperparams(self, X, y):
            """Suggest hyperparameters.

            Example:

            ```python
            from flaml.default import LGBMRegressor

            estimator = LGBMRegressor()
            hyperparams, estimator_name, X_transformed, y_transformed = estimator.fit(X_train, y_train)
            print(hyperparams)
            ```

            Args:
                X: A dataframe of training data in shape n*m.
                y: A series of labels in shape n*1.

            Returns:
                hyperparams: A dict of the hyperparameter configurations.
                estimator_name: A str of the underlying estimator name, e.g., 'xgb_limitdepth'.
                X_transformed: the preprocessed X.
                y_transformed: the preprocessed y.
            """
            estimator_name = name
            if alternatives:
                for alternative in alternatives:
                    if self._params.get(alternative[0]) == alternative[1]:
                        estimator_name = alternative[2]
                        break
            estimator_name = (
                "choose_xgb"
                if (
                    estimator_name == "xgb_limitdepth"
                    and "max_depth" not in self._params
                )
                else estimator_name
            )
            (
                hyperparams,
                estimator_class,
                X_transformed,
                y_transformed,
                self._feature_transformer,
                self._label_transformer,
            ) = preprocess_and_suggest_hyperparams(
                task, X, y, estimator_name, self._default_location
            )
            assert estimator_class == super_class
            hyperparams.update(self._params)
            return hyperparams, estimator_name, X_transformed, y_transformed

        @wraps(super_class.fit)
        def fit(self, X, y, *args, **params):
            hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(
                X, y
            )
            self.set_params(**hyperparams)
            if self._label_transformer and estimator_name in [
                "rf",
                "extra_tree",
                "xgboost",
                "xgb_limitdepth",
                "choose_xgb",
            ]:
                # rf and et have trouble in handling boolean labels; xgboost requires integer labels
                fitted = super().fit(X, y_transformed, *args, **params)
                # if hasattr(self, "_classes"):
                #     self._classes = self._label_transformer.classes_
                # else:
                self.classes_ = self._label_transformer.classes_
                if "xgb" not in estimator_name:
                    # rf and et would do inverse transform automatically; xgb doesn't
                    self._label_transformer = None
            else:
                # lgbm doesn't need label transformation except for non-str/num labels
                try:
                    fitted = super().fit(X, y, *args, **params)
                    self._label_transformer = None
                except ValueError:
                    # Unknown label type: 'unknown'
                    fitted = super().fit(X, y_transformed, *args, **params)
                    self._classes = self._label_transformer.classes_
            return fitted

        @wraps(super_class.predict)
        def predict(self, X, *args, **params):
            if name != "lgbm" or task not in CLASSIFICATION:
                X = self._feature_transformer.transform(X)
            y_pred = super().predict(X, *args, **params)
            if self._label_transformer and y_pred.ndim == 1:
                y_pred = self._label_transformer.inverse_transform(y_pred)
            return y_pred

        if hasattr(super_class, "predict_proba"):

            @wraps(super_class.predict_proba)
            def predict_proba(self, X, *args, **params):
                X_test = self._feature_transformer.transform(X)
                y_pred = super().predict_proba(X_test, *args, **params)
                return y_pred

    EstimatorClass.__doc__ += " " + super_class.__doc__
    EstimatorClass.__name__ = super_class.__name__
    return EstimatorClass


RandomForestRegressor = flamlize_estimator(
    ensemble.RandomForestRegressor, "rf", "regression"
)
RandomForestClassifier = flamlize_estimator(
    ensemble.RandomForestClassifier, "rf", "classification"
)
ExtraTreesRegressor = flamlize_estimator(
    ensemble.ExtraTreesRegressor, "extra_tree", "regression"
)
ExtraTreesClassifier = flamlize_estimator(
    ensemble.ExtraTreesClassifier, "extra_tree", "classification"
)

try:
    import lightgbm

    LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression")
    LGBMClassifier = flamlize_estimator(
        lightgbm.LGBMClassifier, "lgbm", "classification"
    )
except ImportError:
    pass

try:
    import xgboost

    XGBRegressor = flamlize_estimator(
        xgboost.XGBRegressor,
        "xgb_limitdepth",
        "regression",
        [("max_depth", 0, "xgboost")],
    )
    XGBClassifier = flamlize_estimator(
        xgboost.XGBClassifier,
        "xgb_limitdepth",
        "classification",
        [("max_depth", 0, "xgboost")],
    )
except ImportError:
    pass
Zero-shot AutoML (#468) * Prepare for release Co-authored-by: Moe Kayali <t-moekayali@microsoft.com> * bug fix * improve doc and code quality Co-authored-by: Qingyun Wu 2022-03-01 15:39:09 -08:00			`import sklearn.ensemble as ensemble`
			`from functools import wraps`
Refactor into automl subpackage (#809) * Refactor into automl subpackage Moved some of the packages into an automl subpackage to tidy before the task-based refactor. This is in response to discussions with the group and a comment on the first task-based PR. Only changes here are moving subpackages and modules into the new automl, fixing imports to work with this structure and fixing some dependencies in setup.py. * Fix doc building post automl subpackage refactor * Fix broken links in website post automl subpackage refactor * Fix broken links in website post automl subpackage refactor * Remove vw from test deps as this is breaking the build * Move default back to the top-level I'd moved this to automl as that's where it's used internally, but had missed that this is actually part of the public interface so makes sense to live where it was. * Re-add top level modules with deprecation warnings flaml.data, flaml.ml and flaml.model are re-added to the top level, being re-exported from flaml.automl for backwards compatability. Adding a deprecation warning so that we can have a planned removal later. * Fix model.py line-endings * Pin pytorch-lightning to less than 1.8.0 We're seeing strange lightning related bugs from pytorch-forecasting since the release of lightning 1.8.0. Going to try constraining this to see if we have a fix. * Fix the lightning version pin Was optimistic with setting it in the 1.7.x range, but that isn't compatible with python 3.6 * Remove lightning version pin * Revert dependency version changes * Minor change to retrigger the build * Fix line endings in ml.py and model.py Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu> Co-authored-by: EgorKraevTransferwise <egor.kraev@transferwise.com> 2022-12-06 20:46:08 +00:00			`from flaml.automl.data import CLASSIFICATION`
Zero-shot AutoML (#468) * Prepare for release Co-authored-by: Moe Kayali <t-moekayali@microsoft.com> * bug fix * improve doc and code quality Co-authored-by: Qingyun Wu 2022-03-01 15:39:09 -08:00			`from .suggest import preprocess_and_suggest_hyperparams`

			`DEFAULT_LOCATION = "default_location"`


			`def flamlize_estimator(super_class, name: str, task: str, alternatives=None):`
			`"""Enhance an estimator class with flaml's data-dependent default hyperparameter settings.`

			`Example:`

			```python
			`import sklearn.ensemble as ensemble`
			`RandomForestRegressor = flamlize_estimator(`
			`ensemble.RandomForestRegressor, "rf", "regression"`
			`)`
			```

			`Args:`
			`super_class: an scikit-learn compatible estimator class.`
			`name: a str of the estimator's name.`
			`task: a str of the task type.`
			`alternatives: (Optional) a list for alternative estimator names. For example,`
			```[("max_depth", 0, "xgboost")]``` means if the "max_depth" is set to 0
			`in the constructor, then look for the learned defaults for estimator "xgboost".`
			`"""`

			`class EstimatorClass(super_class):`
			`"""Enhanced with flaml's data-dependent default hyperparameter settings."""`

			`@wraps(super_class.__init__)`
			`def __init__(self, **params):`
			`if DEFAULT_LOCATION in params:`
			`self._default_location = params.pop(DEFAULT_LOCATION)`
			`else:`
			`self._default_location = None`
			`self._params = params`
			`super().__init__(**params)`

			`@classmethod`
Fix AttributeError: readonly attribute for Python 3.10.4 2022-04-15 23:23:43 +05:30			`@wraps(super_class._get_param_names)`
Zero-shot AutoML (#468) * Prepare for release Co-authored-by: Moe Kayali <t-moekayali@microsoft.com> * bug fix * improve doc and code quality Co-authored-by: Qingyun Wu 2022-03-01 15:39:09 -08:00			`def _get_param_names(cls):`
			`return super_class._get_param_names()`

			`def suggest_hyperparams(self, X, y):`
			`"""Suggest hyperparameters.`

			`Example:`

			```python
			`from flaml.default import LGBMRegressor`

			`estimator = LGBMRegressor()`
			`hyperparams, estimator_name, X_transformed, y_transformed = estimator.fit(X_train, y_train)`
			`print(hyperparams)`
			```

			`Args:`
			`X: A dataframe of training data in shape n*m.`
			`y: A series of labels in shape n*1.`

			`Returns:`
			`hyperparams: A dict of the hyperparameter configurations.`
			`estimator_name: A str of the underlying estimator name, e.g., 'xgb_limitdepth'.`
			`X_transformed: the preprocessed X.`
			`y_transformed: the preprocessed y.`
			`"""`
			`estimator_name = name`
			`if alternatives:`
			`for alternative in alternatives:`
			`if self._params.get(alternative[0]) == alternative[1]:`
			`estimator_name = alternative[2]`
			`break`
			`estimator_name = (`
			`"choose_xgb"`
			`if (`
			`estimator_name == "xgb_limitdepth"`
			`and "max_depth" not in self._params`
			`)`
			`else estimator_name`
			`)`
			`(`
			`hyperparams,`
			`estimator_class,`
			`X_transformed,`
			`y_transformed,`
			`self._feature_transformer,`
			`self._label_transformer,`
			`) = preprocess_and_suggest_hyperparams(`
			`task, X, y, estimator_name, self._default_location`
			`)`
			`assert estimator_class == super_class`
			`hyperparams.update(self._params)`
			`return hyperparams, estimator_name, X_transformed, y_transformed`

			`@wraps(super_class.fit)`
			`def fit(self, X, y, args, *params):`
			`hyperparams, estimator_name, X, y_transformed = self.suggest_hyperparams(`
			`X, y`
			`)`
			`self.set_params(**hyperparams)`
			`if self._label_transformer and estimator_name in [`
			`"rf",`
			`"extra_tree",`
			`"xgboost",`
			`"xgb_limitdepth",`
			`"choose_xgb",`
			`]:`
			`# rf and et have trouble in handling boolean labels; xgboost requires integer labels`
			`fitted = super().fit(X, y_transformed, args, *params)`
			`# if hasattr(self, "_classes"):`
			`# self._classes = self._label_transformer.classes_`
			`# else:`
			`self.classes_ = self._label_transformer.classes_`
			`if "xgb" not in estimator_name:`
			`# rf and et would do inverse transform automatically; xgb doesn't`
			`self._label_transformer = None`
			`else:`
			`# lgbm doesn't need label transformation except for non-str/num labels`
			`try:`
			`fitted = super().fit(X, y, args, *params)`
			`self._label_transformer = None`
			`except ValueError:`
			`# Unknown label type: 'unknown'`
			`fitted = super().fit(X, y_transformed, args, *params)`
			`self._classes = self._label_transformer.classes_`
			`return fitted`

			`@wraps(super_class.predict)`
			`def predict(self, X, args, *params):`
			`if name != "lgbm" or task not in CLASSIFICATION:`
			`X = self._feature_transformer.transform(X)`
			`y_pred = super().predict(X, args, *params)`
			`if self._label_transformer and y_pred.ndim == 1:`
			`y_pred = self._label_transformer.inverse_transform(y_pred)`
			`return y_pred`

			`if hasattr(super_class, "predict_proba"):`

			`@wraps(super_class.predict_proba)`
			`def predict_proba(self, X, args, *params):`
			`X_test = self._feature_transformer.transform(X)`
			`y_pred = super().predict_proba(X_test, args, *params)`
			`return y_pred`

			`EstimatorClass.__doc__ += " " + super_class.__doc__`
			`EstimatorClass.__name__ = super_class.__name__`
			`return EstimatorClass`


			`RandomForestRegressor = flamlize_estimator(`
			`ensemble.RandomForestRegressor, "rf", "regression"`
			`)`
			`RandomForestClassifier = flamlize_estimator(`
			`ensemble.RandomForestClassifier, "rf", "classification"`
			`)`
			`ExtraTreesRegressor = flamlize_estimator(`
			`ensemble.ExtraTreesRegressor, "extra_tree", "regression"`
			`)`
			`ExtraTreesClassifier = flamlize_estimator(`
			`ensemble.ExtraTreesClassifier, "extra_tree", "classification"`
			`)`

			`try:`
			`import lightgbm`

			`LGBMRegressor = flamlize_estimator(lightgbm.LGBMRegressor, "lgbm", "regression")`
			`LGBMClassifier = flamlize_estimator(`
			`lightgbm.LGBMClassifier, "lgbm", "classification"`
			`)`
			`except ImportError:`
			`pass`

			`try:`
			`import xgboost`

			`XGBRegressor = flamlize_estimator(`
			`xgboost.XGBRegressor,`
			`"xgb_limitdepth",`
			`"regression",`
			`[("max_depth", 0, "xgboost")],`
			`)`
			`XGBClassifier = flamlize_estimator(`
			`xgboost.XGBClassifier,`
			`"xgb_limitdepth",`
			`"classification",`
			`[("max_depth", 0, "xgboost")],`
			`)`
			`except ImportError:`
			`pass`