autogen/flaml/model.py

"""!
 * Copyright (c) Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License.
"""

import numpy as np
import xgboost as xgb
import time
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker
from scipy.sparse import issparse
import pandas as pd
from . import tune
from .data import group_counts

import logging

logger = logging.getLogger(__name__)


class BaseEstimator:
    """The abstract class for all learners

    Typical example:
        XGBoostEstimator: for regression
        XGBoostSklearnEstimator: for classification
        LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier:
            for both regression and classification
    """

    def __init__(self, task="binary", **params):
        """Constructor

        Args:
            task: A string of the task type, one of
                'binary', 'multi', 'regression', 'rank', 'forecast'
            n_jobs: An integer of the number of parallel threads
            params: A dictionary of the hyperparameter names and values
        """
        self.params = params
        self.estimator_class = self._model = None
        self._task = task
        if "_estimator_type" in params:
            self._estimator_type = params["_estimator_type"]
            del self.params["_estimator_type"]
        else:
            self._estimator_type = (
                "classifier" if task in ("binary", "multi") else "regressor"
            )

    def get_params(self, deep=False):
        params = self.params.copy()
        params["task"] = self._task
        if hasattr(self, "_estimator_type"):
            params["_estimator_type"] = self._estimator_type
        return params

    @property
    def classes_(self):
        return self._model.classes_

    @property
    def n_features_in_(self):
        return self.model.n_features_in_

    @property
    def model(self):
        """Trained model after fit() is called, or None before fit() is called"""
        return self._model

    @property
    def estimator(self):
        """Trained model after fit() is called, or None before fit() is called"""
        return self._model

    def _preprocess(self, X):
        return X

    def _fit(self, X_train, y_train, **kwargs):

        current_time = time.time()
        if "groups" in kwargs:
            kwargs = kwargs.copy()
            if self._task == "rank":
                kwargs["group"] = group_counts(kwargs["groups"])
                # groups_val = kwargs.get('groups_val')
                # if groups_val is not None:
                #     kwargs['eval_group'] = [group_counts(groups_val)]
                #     kwargs['eval_set'] = [
                #         (kwargs['X_val'], kwargs['y_val'])]
                #     kwargs['verbose'] = False
                #     del kwargs['groups_val'], kwargs['X_val'], kwargs['y_val']
            del kwargs["groups"]
        X_train = self._preprocess(X_train)
        model = self.estimator_class(**self.params)
        model.fit(X_train, y_train, **kwargs)
        train_time = time.time() - current_time
        self._model = model
        return train_time

    def fit(self, X_train, y_train, budget=None, **kwargs):
        """Train the model from given training data

        Args:
            X_train: A numpy array of training data in shape n*m
            y_train: A numpy array of labels in shape n*1
            budget: A float of the time budget in seconds

        Returns:
            train_time: A float of the training time in seconds
        """
        return self._fit(X_train, y_train, **kwargs)

    def predict(self, X_test):
        """Predict label from features

        Args:
            X_test: A numpy array of featurized instances, shape n*m

        Returns:
            A numpy array of shape n*1.
            Each element is the label for a instance
        """
        if self._model is not None:
            X_test = self._preprocess(X_test)
            return self._model.predict(X_test)
        else:
            return np.ones(X_test.shape[0])

    def predict_proba(self, X_test):
        """Predict the probability of each class from features

        Only works for classification problems

        Args:
            model: An object of trained model with method predict_proba()
            X_test: A numpy array of featurized instances, shape n*m

        Returns:
            A numpy array of shape n*c. c is the # classes
            Each element at (i,j) is the probability for instance i to be in
                class j
        """
        assert self._task in (
            "binary",
            "multi",
        ), "predict_prob() only for classification task."
        X_test = self._preprocess(X_test)
        return self._model.predict_proba(X_test)

    def cleanup(self):
        pass

    @classmethod
    def search_space(cls, **params):
        """[required method] search space

        Returns:
            A dictionary of the search space.
            Each key is the name of a hyperparameter, and value is a dict with
                its domain and init_value (optional), cat_hp_cost (optional)
                e.g.,
                {'domain': tune.randint(lower=1, upper=10), 'init_value': 1}
        """
        return {}

    @classmethod
    def size(cls, config: dict) -> float:
        """[optional method] memory size of the estimator in bytes

        Args:
            config - the dict of the hyperparameter config

        Returns:
            A float of the memory size required by the estimator to train the
            given config
        """
        return 1.0

    @classmethod
    def cost_relative2lgbm(cls) -> float:
        """[optional method] relative cost compared to lightgbm"""
        return 1.0

    @classmethod
    def init(cls):
        """[optional method] initialize the class"""
        pass


class SKLearnEstimator(BaseEstimator):
    def __init__(self, task="binary", **params):
        super().__init__(task, **params)

    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            cat_columns = X.select_dtypes(include=["category"]).columns
            if not cat_columns.empty:
                X = X.copy()
                X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # numpy array is not of numeric dtype
            X = pd.DataFrame(X)
            for col in X.columns:
                if isinstance(X[col][0], str):
                    X[col] = X[col].astype("category").cat.codes
            X = X.to_numpy()
        return X


class LGBMEstimator(BaseEstimator):
    @classmethod
    def search_space(cls, data_size, **params):
        upper = min(32768, int(data_size))
        return {
            "n_estimators": {
                "domain": tune.lograndint(lower=4, upper=upper),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
            "num_leaves": {
                "domain": tune.lograndint(lower=4, upper=upper),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
            "min_child_samples": {
                "domain": tune.lograndint(lower=2, upper=2 ** 7 + 1),
                "init_value": 20,
            },
            "learning_rate": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1.0),
                "init_value": 0.1,
            },
            # 'subsample': {
            #     'domain': tune.uniform(lower=0.1, upper=1.0),
            #     'init_value': 1.0,
            # },
            "log_max_bin": {  # log transformed with base 2
                "domain": tune.lograndint(lower=3, upper=11),
                "init_value": 8,
            },
            "colsample_bytree": {
                "domain": tune.uniform(lower=0.01, upper=1.0),
                "init_value": 1.0,
            },
            "reg_alpha": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
                "init_value": 1 / 1024,
            },
            "reg_lambda": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
                "init_value": 1.0,
            },
        }

    @classmethod
    def size(cls, config):
        num_leaves = int(round(config.get("num_leaves") or config["max_leaves"]))
        n_estimators = int(round(config["n_estimators"]))
        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8

    def __init__(self, task="binary", log_max_bin=8, **params):
        super().__init__(task, **params)
        if "objective" not in self.params:
            # Default: ‘regression’ for LGBMRegressor,
            # ‘binary’ or ‘multiclass’ for LGBMClassifier
            objective = "regression"
            if "binary" in task:
                objective = "binary"
            elif "multi" in task:
                objective = "multiclass"
            elif "rank" == task:
                objective = "lambdarank"
            self.params["objective"] = objective
        if "n_estimators" in self.params:
            self.params["n_estimators"] = int(round(self.params["n_estimators"]))
        if "num_leaves" in self.params:
            self.params["num_leaves"] = int(round(self.params["num_leaves"]))
        if "min_child_samples" in self.params:
            self.params["min_child_samples"] = int(
                round(self.params["min_child_samples"])
            )
        if "max_bin" not in self.params:
            self.params["max_bin"] = 1 << int(round(log_max_bin)) - 1
        if "verbose" not in self.params:
            self.params["verbose"] = -1
        # if "subsample_freq" not in self.params:
        #     self.params['subsample_freq'] = 1
        if "regression" == task:
            self.estimator_class = LGBMRegressor
        elif "rank" == task:
            self.estimator_class = LGBMRanker
        else:
            self.estimator_class = LGBMClassifier
        self._time_per_iter = None
        self._train_size = 0

    def _preprocess(self, X):
        if (
            not isinstance(X, pd.DataFrame)
            and issparse(X)
            and np.issubdtype(X.dtype, np.integer)
        ):
            X = X.astype(float)
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # numpy array is not of numeric dtype
            X = pd.DataFrame(X)
            for col in X.columns:
                if isinstance(X[col][0], str):
                    X[col] = X[col].astype("category").cat.codes
            X = X.to_numpy()
        return X

    def fit(self, X_train, y_train, budget=None, **kwargs):
        start_time = time.time()
        n_iter = self.params["n_estimators"]
        if (
            not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4
        ) and budget is not None:
            self.params["n_estimators"] = 1
            self._t1 = self._fit(X_train, y_train, **kwargs)
            if self._t1 >= budget:
                self.params["n_estimators"] = n_iter
                return self._t1
            self.params["n_estimators"] = 4
            self._t2 = self._fit(X_train, y_train, **kwargs)
            self._time_per_iter = (
                (self._t2 - self._t1) / (self.params["n_estimators"] - 1)
                if self._t2 > self._t1
                else self._t1
                if self._t1
                else 0.001
            )
            self._train_size = X_train.shape[0]
            if self._t1 + self._t2 >= budget or n_iter == self.params["n_estimators"]:
                self.params["n_estimators"] = n_iter
                return time.time() - start_time
        if budget is not None:
            self.params["n_estimators"] = min(
                n_iter,
                int(
                    (budget - time.time() + start_time - self._t1) / self._time_per_iter
                    + 1
                ),
            )
        if self.params["n_estimators"] > 0:
            self._fit(X_train, y_train, **kwargs)
        self.params["n_estimators"] = n_iter
        train_time = time.time() - start_time
        return train_time


class XGBoostEstimator(SKLearnEstimator):
    """not using sklearn API, used for regression"""

    @classmethod
    def search_space(cls, data_size, **params):
        upper = min(32768, int(data_size))
        return {
            "n_estimators": {
                "domain": tune.lograndint(lower=4, upper=upper),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
            "max_leaves": {
                "domain": tune.lograndint(lower=4, upper=upper),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
            "min_child_weight": {
                "domain": tune.loguniform(lower=0.001, upper=128),
                "init_value": 1,
            },
            "learning_rate": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1.0),
                "init_value": 0.1,
            },
            "subsample": {
                "domain": tune.uniform(lower=0.1, upper=1.0),
                "init_value": 1.0,
            },
            "colsample_bylevel": {
                "domain": tune.uniform(lower=0.01, upper=1.0),
                "init_value": 1.0,
            },
            "colsample_bytree": {
                "domain": tune.uniform(lower=0.01, upper=1.0),
                "init_value": 1.0,
            },
            "reg_alpha": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
                "init_value": 1 / 1024,
            },
            "reg_lambda": {
                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
                "init_value": 1.0,
            },
        }

    @classmethod
    def size(cls, config):
        return LGBMEstimator.size(config)

    @classmethod
    def cost_relative2lgbm(cls):
        return 1.6

    def __init__(
        self,
        task="regression",
        all_thread=False,
        n_jobs=1,
        n_estimators=4,
        max_leaves=4,
        subsample=1.0,
        min_child_weight=1,
        learning_rate=0.1,
        reg_lambda=1.0,
        reg_alpha=0.0,
        colsample_bylevel=1.0,
        colsample_bytree=1.0,
        tree_method="auto",
        **params,
    ):
        super().__init__(task, **params)
        self._n_estimators = int(round(n_estimators))
        self.params.update(
            {
                "max_leaves": int(round(max_leaves)),
                "max_depth": params.get("max_depth", 0),
                "grow_policy": params.get("grow_policy", "lossguide"),
                "tree_method": tree_method,
                "verbosity": params.get("verbosity", 0),
                "nthread": n_jobs,
                "learning_rate": float(learning_rate),
                "subsample": float(subsample),
                "reg_alpha": float(reg_alpha),
                "reg_lambda": float(reg_lambda),
                "min_child_weight": float(min_child_weight),
                "booster": params.get("booster", "gbtree"),
                "colsample_bylevel": float(colsample_bylevel),
                "colsample_bytree": float(colsample_bytree),
                "objective": params.get("objective"),
            }
        )
        if all_thread:
            del self.params["nthread"]

    def get_params(self, deep=False):
        params = super().get_params()
        params["n_jobs"] = params["nthread"]
        return params

    def fit(self, X_train, y_train, budget=None, **kwargs):
        start_time = time.time()
        if not issparse(X_train):
            self.params["tree_method"] = "hist"
            X_train = self._preprocess(X_train)
        if "sample_weight" in kwargs:
            dtrain = xgb.DMatrix(X_train, label=y_train, weight=kwargs["sample_weight"])
        else:
            dtrain = xgb.DMatrix(X_train, label=y_train)

        objective = self.params.get("objective")
        if isinstance(objective, str):
            obj = None
        else:
            obj = objective
            if "objective" in self.params:
                del self.params["objective"]
        self._model = xgb.train(self.params, dtrain, self._n_estimators, obj=obj)
        self.params["objective"] = objective
        del dtrain
        train_time = time.time() - start_time
        return train_time

    def predict(self, X_test):
        if not issparse(X_test):
            X_test = self._preprocess(X_test)
        dtest = xgb.DMatrix(X_test)
        return super().predict(dtest)


class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
    """using sklearn API, used for classification"""

    @classmethod
    def search_space(cls, data_size, **params):
        return XGBoostEstimator.search_space(data_size)

    @classmethod
    def cost_relative2lgbm(cls):
        return XGBoostEstimator.cost_relative2lgbm()

    def __init__(
        self,
        task="binary",
        n_jobs=1,
        n_estimators=4,
        max_leaves=4,
        subsample=1.0,
        min_child_weight=1,
        learning_rate=0.1,
        reg_lambda=1.0,
        reg_alpha=0.0,
        colsample_bylevel=1.0,
        colsample_bytree=1.0,
        tree_method="hist",
        **params,
    ):
        super().__init__(task, **params)
        del self.params["objective"]
        del self.params["max_bin"]
        del self.params["verbose"]
        self.params.update(
            {
                "n_estimators": int(round(n_estimators)),
                "max_leaves": int(round(max_leaves)),
                "max_depth": 0,
                "grow_policy": params.get("grow_policy", "lossguide"),
                "tree_method": tree_method,
                "n_jobs": n_jobs,
                "verbosity": 0,
                "learning_rate": float(learning_rate),
                "subsample": float(subsample),
                "reg_alpha": float(reg_alpha),
                "reg_lambda": float(reg_lambda),
                "min_child_weight": float(min_child_weight),
                "booster": params.get("booster", "gbtree"),
                "colsample_bylevel": float(colsample_bylevel),
                "colsample_bytree": float(colsample_bytree),
                "use_label_encoder": params.get("use_label_encoder", False),
            }
        )

        self.estimator_class = xgb.XGBRegressor
        if "rank" == task:
            self.estimator_class = xgb.XGBRanker
        elif task in ("binary", "multi"):
            self.estimator_class = xgb.XGBClassifier
        self._time_per_iter = None
        self._train_size = 0

    def fit(self, X_train, y_train, budget=None, **kwargs):
        if issparse(X_train):
            self.params["tree_method"] = "auto"
        return super().fit(X_train, y_train, budget, **kwargs)


class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
    @classmethod
    def search_space(cls, data_size, task, **params):
        data_size = int(data_size)
        upper = min(2048, data_size)
        space = {
            "n_estimators": {
                "domain": tune.lograndint(lower=4, upper=upper),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
            "max_features": {
                "domain": tune.loguniform(lower=0.1, upper=1.0),
                "init_value": 1.0,
            },
            "max_leaves": {
                "domain": tune.lograndint(lower=4, upper=min(32768, data_size)),
                "init_value": 4,
                "low_cost_init_value": 4,
            },
        }
        if task in ("binary", "multi"):
            space["criterion"] = {
                "domain": tune.choice(["gini", "entropy"]),
                # 'init_value': 'gini',
            }
        return space

    @classmethod
    def cost_relative2lgbm(cls):
        return 2.0

    def __init__(
        self,
        task="binary",
        n_jobs=1,
        n_estimators=4,
        max_features=1.0,
        criterion="gini",
        max_leaves=4,
        **params,
    ):
        super().__init__(task, **params)
        del self.params["objective"]
        del self.params["max_bin"]
        self.params.update(
            {
                "n_estimators": int(round(n_estimators)),
                "n_jobs": n_jobs,
                "verbose": 0,
                "max_features": float(max_features),
                "max_leaf_nodes": params.get("max_leaf_nodes", int(round(max_leaves))),
            }
        )
        self.estimator_class = RandomForestRegressor
        if task in ("binary", "multi"):
            self.estimator_class = RandomForestClassifier
            self.params["criterion"] = criterion

    def get_params(self, deep=False):
        params = super().get_params()
        return params


class ExtraTreeEstimator(RandomForestEstimator):
    @classmethod
    def cost_relative2lgbm(cls):
        return 1.9

    def __init__(self, task="binary", **params):
        super().__init__(task, **params)
        if "regression" in task:
            self.estimator_class = ExtraTreesRegressor
        else:
            self.estimator_class = ExtraTreesClassifier


class LRL1Classifier(SKLearnEstimator):
    @classmethod
    def search_space(cls, **params):
        return {
            "C": {
                "domain": tune.loguniform(lower=0.03125, upper=32768.0),
                "init_value": 1.0,
            },
        }

    @classmethod
    def cost_relative2lgbm(cls):
        return 160

    def __init__(self, task="binary", n_jobs=1, tol=0.0001, C=1.0, **params):
        super().__init__(task, **params)
        self.params.update(
            {
                "penalty": params.get("penalty", "l1"),
                "tol": float(tol),
                "C": float(C),
                "solver": params.get("solver", "saga"),
                "n_jobs": n_jobs,
            }
        )
        assert task in (
            "binary",
            "multi",
        ), "LogisticRegression for classification task only"
        self.estimator_class = LogisticRegression


class LRL2Classifier(SKLearnEstimator):
    @classmethod
    def search_space(cls, **params):
        return LRL1Classifier.search_space(**params)

    @classmethod
    def cost_relative2lgbm(cls):
        return 25

    def __init__(self, task="binary", n_jobs=1, tol=0.0001, C=1.0, **params):
        super().__init__(task, **params)
        self.params.update(
            {
                "penalty": params.get("penalty", "l2"),
                "tol": float(tol),
                "C": float(C),
                "solver": params.get("solver", "lbfgs"),
                "n_jobs": n_jobs,
            }
        )
        assert task in (
            "binary",
            "multi",
        ), "LogisticRegression for classification task only"
        self.estimator_class = LogisticRegression


class CatBoostEstimator(BaseEstimator):
    _time_per_iter = None
    _train_size = 0

    @classmethod
    def search_space(cls, data_size, **params):
        upper = max(min(round(1500000 / data_size), 150), 12)
        return {
            "early_stopping_rounds": {
                "domain": tune.lograndint(lower=10, upper=upper),
                "init_value": 10,
                "low_cost_init_value": 10,
            },
            "learning_rate": {
                "domain": tune.loguniform(lower=0.005, upper=0.2),
                "init_value": 0.1,
            },
        }

    @classmethod
    def size(cls, config):
        n_estimators = 8192
        max_leaves = 64
        return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8

    @classmethod
    def cost_relative2lgbm(cls):
        return 15

    @classmethod
    def init(cls):
        CatBoostEstimator._time_per_iter = None
        CatBoostEstimator._train_size = 0

    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            cat_columns = X.select_dtypes(include=["category"]).columns
            if not cat_columns.empty:
                X = X.copy()
                X[cat_columns] = X[cat_columns].apply(
                    lambda x: x.cat.rename_categories(
                        [
                            str(c) if isinstance(c, float) else c
                            for c in x.cat.categories
                        ]
                    )
                )
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # numpy array is not of numeric dtype
            X = pd.DataFrame(X)
            for col in X.columns:
                if isinstance(X[col][0], str):
                    X[col] = X[col].astype("category").cat.codes
            X = X.to_numpy()
        return X

    def __init__(
        self,
        task="binary",
        n_jobs=1,
        n_estimators=8192,
        learning_rate=0.1,
        early_stopping_rounds=4,
        **params,
    ):
        super().__init__(task, **params)
        self.params.update(
            {
                "early_stopping_rounds": int(round(early_stopping_rounds)),
                "n_estimators": n_estimators,
                "learning_rate": learning_rate,
                "thread_count": n_jobs,
                "verbose": params.get("verbose", False),
                "random_seed": params.get("random_seed", 10242048),
            }
        )
        from catboost import CatBoostRegressor

        self.estimator_class = CatBoostRegressor
        if task in ("binary", "multi"):
            from catboost import CatBoostClassifier

            self.estimator_class = CatBoostClassifier

    def get_params(self, deep=False):
        params = super().get_params()
        params["n_jobs"] = params["thread_count"]
        return params

    def fit(self, X_train, y_train, budget=None, **kwargs):
        import shutil

        start_time = time.time()
        train_dir = f"catboost_{str(start_time)}"
        n_iter = self.params["n_estimators"]
        X_train = self._preprocess(X_train)
        if isinstance(X_train, pd.DataFrame):
            cat_features = list(X_train.select_dtypes(include="category").columns)
        else:
            cat_features = []
        # from catboost import CatBoostError
        # try:
        if (
            not CatBoostEstimator._time_per_iter
            or abs(CatBoostEstimator._train_size - len(y_train)) > 4
        ) and budget:
            # measure the time per iteration
            self.params["n_estimators"] = 1
            CatBoostEstimator._smallmodel = self.estimator_class(
                train_dir=train_dir, **self.params
            )
            CatBoostEstimator._smallmodel.fit(
                X_train, y_train, cat_features=cat_features, **kwargs
            )
            CatBoostEstimator._t1 = time.time() - start_time
            if CatBoostEstimator._t1 >= budget:
                self.params["n_estimators"] = n_iter
                self._model = CatBoostEstimator._smallmodel
                shutil.rmtree(train_dir, ignore_errors=True)
                return CatBoostEstimator._t1
            self.params["n_estimators"] = 4
            CatBoostEstimator._smallmodel = self.estimator_class(
                train_dir=train_dir, **self.params
            )
            CatBoostEstimator._smallmodel.fit(
                X_train, y_train, cat_features=cat_features, **kwargs
            )
            CatBoostEstimator._time_per_iter = (
                time.time() - start_time - CatBoostEstimator._t1
            ) / (self.params["n_estimators"] - 1)
            if CatBoostEstimator._time_per_iter <= 0:
                CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
            CatBoostEstimator._train_size = len(y_train)
            if (
                time.time() - start_time >= budget
                or n_iter == self.params["n_estimators"]
            ):
                self.params["n_estimators"] = n_iter
                self._model = CatBoostEstimator._smallmodel
                shutil.rmtree(train_dir, ignore_errors=True)
                return time.time() - start_time
        if budget:
            train_times = 1
            self.params["n_estimators"] = min(
                n_iter,
                int(
                    (budget - time.time() + start_time - CatBoostEstimator._t1)
                    / train_times
                    / CatBoostEstimator._time_per_iter
                    + 1
                ),
            )
            self._model = CatBoostEstimator._smallmodel
        if self.params["n_estimators"] > 0:
            n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
            X_tr, y_tr = X_train[:n], y_train[:n]
            if "sample_weight" in kwargs:
                weight = kwargs["sample_weight"]
                if weight is not None:
                    kwargs["sample_weight"] = weight[:n]
            else:
                weight = None
            from catboost import Pool

            model = self.estimator_class(train_dir=train_dir, **self.params)
            model.fit(
                X_tr,
                y_tr,
                cat_features=cat_features,
                eval_set=Pool(
                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
                ),
                **kwargs,
            )  # model.get_best_iteration()
            shutil.rmtree(train_dir, ignore_errors=True)
            if weight is not None:
                kwargs["sample_weight"] = weight
            self._model = model
        # except CatBoostError:
        #     self._model = None
        self.params["n_estimators"] = n_iter
        train_time = time.time() - start_time
        return train_time


class KNeighborsEstimator(BaseEstimator):
    @classmethod
    def search_space(cls, data_size, **params):
        upper = min(512, int(data_size / 2))
        return {
            "n_neighbors": {
                "domain": tune.lograndint(lower=1, upper=upper),
                "init_value": 5,
                "low_cost_init_value": 1,
            },
        }

    @classmethod
    def cost_relative2lgbm(cls):
        return 30

    def __init__(self, task="binary", n_jobs=1, n_neighbors=5, **params):
        super().__init__(task, **params)
        self.params.update(
            {
                "n_neighbors": int(round(n_neighbors)),
                "weights": params.get("weights", "distance"),
                "n_jobs": n_jobs,
            }
        )
        from sklearn.neighbors import KNeighborsRegressor

        self.estimator_class = KNeighborsRegressor
        if task in ("binary", "multi"):
            from sklearn.neighbors import KNeighborsClassifier

            self.estimator_class = KNeighborsClassifier

    def _preprocess(self, X):
        if isinstance(X, pd.DataFrame):
            cat_columns = X.select_dtypes(["category"]).columns
            if X.shape[1] == len(cat_columns):
                raise ValueError("kneighbor requires at least one numeric feature")
            X = X.drop(cat_columns, axis=1)
        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
            # drop categocial columns if any
            X = pd.DataFrame(X)
            cat_columns = []
            for col in X.columns:
                if isinstance(X[col][0], str):
                    cat_columns.append(col)
            X = X.drop(cat_columns, axis=1)
            X = X.to_numpy()
        return X


class Prophet(BaseEstimator):
    @classmethod
    def search_space(cls, **params):
        space = {
            "changepoint_prior_scale": {
                "domain": tune.loguniform(lower=0.001, upper=1000),
                "init_value": 0.01,
                "low_cost_init_value": 0.001,
            },
            "seasonality_prior_scale": {
                "domain": tune.loguniform(lower=0.01, upper=100),
                "init_value": 1,
            },
            "holidays_prior_scale": {
                "domain": tune.loguniform(lower=0.01, upper=100),
                "init_value": 1,
            },
            "seasonality_mode": {
                "domain": tune.choice(["additive", "multiplicative"]),
                "init_value": "multiplicative",
            },
        }
        return space

    def __init__(self, task="forecast", **params):
        if "n_jobs" in params:
            params.pop("n_jobs")
        super().__init__(task, **params)

    def _join(self, X_train, y_train):
        assert "ds" in X_train, (
            "Dataframe for training forecast model must have column"
            ' "ds" with the dates in X_train.'
        )
        y_train = pd.DataFrame(y_train, columns=["y"])
        train_df = X_train.join(y_train)
        return train_df

    def fit(self, X_train, y_train, budget=None, **kwargs):
        from prophet import Prophet

        current_time = time.time()
        train_df = self._join(X_train, y_train)
        model = Prophet(**self.params).fit(train_df)
        train_time = time.time() - current_time
        self._model = model
        return train_time

    def predict(self, X_test):
        if isinstance(X_test, int):
            raise ValueError(
                "predict() with steps is only supported for arima/sarimax."
                " For Prophet, pass a dataframe with a date colum named ds."
            )
        if self._model is not None:
            forecast = self._model.predict(X_test)
            return forecast["yhat"]
        else:
            logger.warning(
                "Estimator is not fit yet. Please run fit() before predict()."
            )
            return np.ones(X_test.shape[0])


class ARIMA(Prophet):
    @classmethod
    def search_space(cls, **params):
        space = {
            "p": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
            "d": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
            "q": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
        }
        return space

    def _join(self, X_train, y_train):
        train_df = super()._join(X_train, y_train)
        train_df.index = pd.to_datetime(train_df["ds"])
        train_df = train_df.drop("ds", axis=1)
        return train_df

    def fit(self, X_train, y_train, budget=None, **kwargs):
        import warnings

        warnings.filterwarnings("ignore")
        from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator

        current_time = time.time()
        train_df = self._join(X_train, y_train)
        model = ARIMA_estimator(
            train_df,
            order=(self.params["p"], self.params["d"], self.params["q"]),
            enforce_stationarity=False,
            enforce_invertibility=False,
        )
        model = model.fit()
        train_time = time.time() - current_time
        self._model = model
        return train_time

    def predict(self, X_test):
        if self._model is not None:
            if isinstance(X_test, int):
                forecast = self._model.forecast(steps=X_test)
            elif isinstance(X_test, pd.DataFrame):
                start = X_test.iloc[0, 0]
                end = X_test.iloc[-1, 0]
                forecast = self._model.predict(start=start, end=end)
            else:
                raise ValueError(
                    "X_test needs to be either a pd.Dataframe with dates as column ds)"
                    " or an int number of periods for predict()."
                )
            return forecast
        else:
            return np.ones(X_test if isinstance(X_test, int) else X_test.shape[0])


class SARIMAX(ARIMA):
    @classmethod
    def search_space(cls, **params):
        space = {
            "p": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
            "d": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
            "q": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 2,
                "low_cost_init_value": 0,
            },
            "P": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 1,
                "low_cost_init_value": 0,
            },
            "D": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 1,
                "low_cost_init_value": 0,
            },
            "Q": {
                "domain": tune.quniform(lower=0, upper=10, q=1),
                "init_value": 1,
                "low_cost_init_value": 0,
            },
            "s": {
                "domain": tune.choice([1, 4, 6, 12]),
                "init_value": 12,
            },
        }
        return space

    def fit(self, X_train, y_train, budget=None, **kwargs):
        from statsmodels.tsa.statespace.sarimax import SARIMAX as SARIMAX_estimator

        current_time = time.time()
        train_df = self._join(X_train, y_train)
        model = SARIMAX_estimator(
            train_df,
            order=(self.params["p"], self.params["d"], self.params["q"]),
            seasonality_order=(
                self.params["P"],
                self.params["D"],
                self.params["Q"],
                self.params["s"],
            ),
            enforce_stationarity=False,
            enforce_invertibility=False,
        )
        model = model.fit()
        train_time = time.time() - current_time
        self._model = model
        return train_time
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								"""!
 								 * Copyright (c) Microsoft Corporation. All rights reserved.
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								 * Licensed under the MIT License.
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								"""
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								import numpy as np
 								import xgboost as xgb
 								import time
 								from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 								from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
 								from sklearn.linear_model import LogisticRegression
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								from scipy.sparse import issparse
 								import pandas as pd
 								from . import tune
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								from .data import group_counts
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								import logging
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								logger = logging.getLogger(__name__)
 								class BaseEstimator:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    """The abstract class for all learners
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    Typical example:
 								        XGBoostEstimator: for regression
 								        XGBoostSklearnEstimator: for classification
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								        LGBMEstimator, RandomForestEstimator, LRL1Classifier, LRL2Classifier:
 								            for both regression and classification
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    """
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", **params):
 								        """Constructor
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        Args:
 								            task: A string of the task type, one of
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								                'binary', 'multi', 'regression', 'rank', 'forecast'
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            n_jobs: An integer of the number of parallel threads
 								            params: A dictionary of the hyperparameter names and values
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        self.params = params
 								        self.estimator_class = self._model = None
 								        self._task = task
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "_estimator_type" in params:
 								            self._estimator_type = params["_estimator_type"]
 								            del self.params["_estimator_type"]
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        else:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self._estimator_type = (
 								                "classifier" if task in ("binary", "multi") else "regressor"
 								            )
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    def get_params(self, deep=False):
 								        params = self.params.copy()
 								        params["task"] = self._task
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if hasattr(self, "_estimator_type"):
 								            params["_estimator_type"] = self._estimator_type
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return params
 								    @property
 								    def classes_(self):
 								        return self._model.classes_
 								    @property
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def n_features_in_(self):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return self.model.n_features_in_
 								    @property
 								    def model(self):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """Trained model after fit() is called, or None before fit() is called"""
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return self._model
-												Cha cha (#127)

* unordered categorical

* allow cost attribute to be None

* tensorboardX version

* quote

* cfo cat

* trunc

* Update version.py

* incumbent is normalized

* python 3.9

* remove ConcurrencyLimiter

* seed

* estimator

* update autovw notebook

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
											
										
										
											2021-07-05 21:17:26 -04:00
+								    @property
 								    def estimator(self):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """Trained model after fit() is called, or None before fit() is called"""
-												Cha cha (#127)

* unordered categorical

* allow cost attribute to be None

* tensorboardX version

* quote

* cfo cat

* trunc

* Update version.py

* incumbent is normalized

* python 3.9

* remove ConcurrencyLimiter

* seed

* estimator

* update autovw notebook

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
											
										
										
											2021-07-05 21:17:26 -04:00
+								        return self._model
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								    def _preprocess(self, X):
 								        return X
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def _fit(self, X_train, y_train, **kwargs):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
-												v0.2.10 (#51)

* increase search space

* None check

											
										
										
											2021-03-28 17:54:25 -07:00
+								        current_time = time.time()
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "groups" in kwargs:
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            kwargs = kwargs.copy()
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            if self._task == "rank":
 								                kwargs["group"] = group_counts(kwargs["groups"])
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								                # groups_val = kwargs.get('groups_val')
 								                # if groups_val is not None:
 								                #     kwargs['eval_group'] = [group_counts(groups_val)]
 								                #     kwargs['eval_set'] = [
 								                #         (kwargs['X_val'], kwargs['y_val'])]
 								                #     kwargs['verbose'] = False
 								                #     del kwargs['groups_val'], kwargs['X_val'], kwargs['y_val']
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            del kwargs["groups"]
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        X_train = self._preprocess(X_train)
 								        model = self.estimator_class(**self.params)
 								        model.fit(X_train, y_train, **kwargs)
-												v0.2.10 (#51)

* increase search space

* None check

											
										
										
											2021-03-28 17:54:25 -07:00
+								        train_time = time.time() - current_time
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        self._model = model
 								        return train_time
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """Train the model from given training data
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        Args:
 								            X_train: A numpy array of training data in shape n*m
 								            y_train: A numpy array of labels in shape n*1
 								            budget: A float of the time budget in seconds
 								        Returns:
 								            train_time: A float of the training time in seconds
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return self._fit(X_train, y_train, **kwargs)
 								    def predict(self, X_test):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """Predict label from features
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        Args:
 								            X_test: A numpy array of featurized instances, shape n*m
 								        Returns:
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            A numpy array of shape n*1.
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            Each element is the label for a instance
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
-												update image url (#71)

* update image url

* ArffException

* OpenMLError is ValueError

* CatBoostError

* reduce build on push

Co-authored-by: Chi Wang (MSR) <wang.chi@microsoft.com>
											
										
										
											2021-04-21 04:36:06 -04:00
+								        if self._model is not None:
 								            X_test = self._preprocess(X_test)
 								            return self._model.predict(X_test)
 								        else:
 								            return np.ones(X_test.shape[0])
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    def predict_proba(self, X_test):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """Predict the probability of each class from features
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								        Only works for classification problems
 								        Args:
 								            model: An object of trained model with method predict_proba()
 								            X_test: A numpy array of featurized instances, shape n*m
 								        Returns:
 								            A numpy array of shape n*c. c is the # classes
 								            Each element at (i,j) is the probability for instance i to be in
 								                class j
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
 								        assert self._task in (
 								            "binary",
 								            "multi",
 								        ), "predict_prob() only for classification task."
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        X_test = self._preprocess(X_test)
 								        return self._model.predict_proba(X_test)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def cleanup(self):
 								        pass
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, **params):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """[required method] search space
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								        Returns:
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            A dictionary of the search space.
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            Each key is the name of a hyperparameter, and value is a dict with
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								                its domain and init_value (optional), cat_hp_cost (optional)
 								                e.g.,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								                {'domain': tune.randint(lower=1, upper=10), 'init_value': 1}
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {}
 								    @classmethod
-												constraint (#132)

* constraint

* ensemble
											
										
										
											2021-07-10 09:02:17 -07:00
+								    def size(cls, config: dict) -> float:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """[optional method] memory size of the estimator in bytes
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        Args:
 								            config - the dict of the hyperparameter config
 								        Returns:
 								            A float of the memory size required by the estimator to train the
 								            given config
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return 1.0
 								    @classmethod
-												constraint (#132)

* constraint

* ensemble
											
										
										
											2021-07-10 09:02:17 -07:00
+								    def cost_relative2lgbm(cls) -> float:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """[optional method] relative cost compared to lightgbm"""
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return 1.0
-												metric constraint (#90)

* penalty change

* metric modification

* catboost init

											
										
										
											2021-05-22 08:51:38 -07:00
+								    @classmethod
 								    def init(cls):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        """[optional method] initialize the class"""
-												metric constraint (#90)

* penalty change

* metric modification

* catboost init

											
										
										
											2021-05-22 08:51:38 -07:00
+								        pass
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								class SKLearnEstimator(BaseEstimator):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", **params):
-												api doc for chacha (#105)

* api doc for chacha

* update params

* link to paper

* update dataset id

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
											
										
										
											2021-06-11 10:25:45 -07:00
+								        super().__init__(task, **params)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								    def _preprocess(self, X):
 								        if isinstance(X, pd.DataFrame):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            cat_columns = X.select_dtypes(include=["category"]).columns
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            if not cat_columns.empty:
 								                X = X.copy()
 								                X[cat_columns] = X[cat_columns].apply(lambda x: x.cat.codes)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
-												max_leaves (#138)

* max_leaf_nodes in rf and extra_tree

* preprocess numpy str

* free up mem after training

											
										
										
											2021-07-27 18:02:49 -07:00
+								            # numpy array is not of numeric dtype
 								            X = pd.DataFrame(X)
 								            for col in X.columns:
 								                if isinstance(X[col][0], str):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    X[col] = X[col].astype("category").cat.codes
-												max_leaves (#138)

* max_leaf_nodes in rf and extra_tree

* preprocess numpy str

* free up mem after training

											
										
										
											2021-07-27 18:02:49 -07:00
+								            X = X.to_numpy()
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return X
 								class LGBMEstimator(BaseEstimator):
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, **params):
 								        upper = min(32768, int(data_size))
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "n_estimators": {
 								                "domain": tune.lograndint(lower=4, upper=upper),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "num_leaves": {
 								                "domain": tune.lograndint(lower=4, upper=upper),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "min_child_samples": {
 								                "domain": tune.lograndint(lower=2, upper=2 ** 7 + 1),
 								                "init_value": 20,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "learning_rate": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1.0),
 								                "init_value": 0.1,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												Support parallel and add random search  (#167)

* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
											
										
										
											2021-08-23 19:36:51 -04:00
+								            # 'subsample': {
 								            #     'domain': tune.uniform(lower=0.1, upper=1.0),
 								            #     'init_value': 1.0,
 								            # },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "log_max_bin": {  # log transformed with base 2
 								                "domain": tune.lograndint(lower=3, upper=11),
 								                "init_value": 8,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "colsample_bytree": {
 								                "domain": tune.uniform(lower=0.01, upper=1.0),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "reg_alpha": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
 								                "init_value": 1 / 1024,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "reg_lambda": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        }
 								    @classmethod
 								    def size(cls, config):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        num_leaves = int(round(config.get("num_leaves") or config["max_leaves"]))
 								        n_estimators = int(round(config["n_estimators"]))
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								        return (num_leaves * 3 + (num_leaves - 1) * 4 + 1.0) * n_estimators * 8
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", log_max_bin=8, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												Support parallel and add random search  (#167)

* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
											
										
										
											2021-08-23 19:36:51 -04:00
+								        if "objective" not in self.params:
 								            # Default: ‘regression’ for LGBMRegressor,
 								            # ‘binary’ or ‘multiclass’ for LGBMClassifier
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            objective = "regression"
 								            if "binary" in task:
 								                objective = "binary"
 								            elif "multi" in task:
 								                objective = "multiclass"
 								            elif "rank" == task:
 								                objective = "lambdarank"
-												Support parallel and add random search  (#167)

* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
											
										
										
											2021-08-23 19:36:51 -04:00
+								            self.params["objective"] = objective
-												api doc for chacha (#105)

* api doc for chacha

* update params

* link to paper

* update dataset id

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
											
										
										
											2021-06-11 10:25:45 -07:00
+								        if "n_estimators" in self.params:
 								            self.params["n_estimators"] = int(round(self.params["n_estimators"]))
 								        if "num_leaves" in self.params:
 								            self.params["num_leaves"] = int(round(self.params["num_leaves"]))
 								        if "min_child_samples" in self.params:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["min_child_samples"] = int(
 								                round(self.params["min_child_samples"])
 								            )
-												api doc for chacha (#105)

* api doc for chacha

* update params

* link to paper

* update dataset id

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
											
										
										
											2021-06-11 10:25:45 -07:00
+								        if "max_bin" not in self.params:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["max_bin"] = 1 << int(round(log_max_bin)) - 1
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								        if "verbose" not in self.params:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["verbose"] = -1
-												Support parallel and add random search  (#167)

* non hashable value out of signature

* parallel trials

* add random in _search_parallel

* fix bug in retraining

* check memory constraint before training

* retrain_full

* log custom metric

* retraining budget check

* sample size check before retrain

* remove 'time2eval' from result

* report 'total_search_time' in result

* rename total_search_time to wall_clock_time

* rename train_loss boolean to log_training_metric

* set default train_loss to None

* exclude oom result

* log retrained model

* no subsample

* doc str

* notebook

* predicted value is NaN for sarimax

* version

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
											
										
										
											2021-08-23 19:36:51 -04:00
+								        # if "subsample_freq" not in self.params:
 								        #     self.params['subsample_freq'] = 1
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "regression" == task:
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = LGBMRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        elif "rank" == task:
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            self.estimator_class = LGBMRanker
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        else:
 								            self.estimator_class = LGBMClassifier
 								        self._time_per_iter = None
 								        self._train_size = 0
 								    def _preprocess(self, X):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if (
 								            not isinstance(X, pd.DataFrame)
 								            and issparse(X)
 								            and np.issubdtype(X.dtype, np.integer)
 								        ):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            X = X.astype(float)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            # numpy array is not of numeric dtype
 								            X = pd.DataFrame(X)
 								            for col in X.columns:
 								                if isinstance(X[col][0], str):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    X[col] = X[col].astype("category").cat.codes
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            X = X.to_numpy()
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return X
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
 								        start_time = time.time()
 								        n_iter = self.params["n_estimators"]
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if (
 								            not self._time_per_iter or abs(self._train_size - X_train.shape[0]) > 4
 								        ) and budget is not None:
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.params["n_estimators"] = 1
 								            self._t1 = self._fit(X_train, y_train, **kwargs)
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            if self._t1 >= budget:
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								                self.params["n_estimators"] = n_iter
 								                return self._t1
 								            self.params["n_estimators"] = 4
 								            self._t2 = self._fit(X_train, y_train, **kwargs)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self._time_per_iter = (
 								                (self._t2 - self._t1) / (self.params["n_estimators"] - 1)
 								                if self._t2 > self._t1
 								                else self._t1
 								                if self._t1
 								                else 0.001
 								            )
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self._train_size = X_train.shape[0]
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            if self._t1 + self._t2 >= budget or n_iter == self.params["n_estimators"]:
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								                self.params["n_estimators"] = n_iter
 								                return time.time() - start_time
 								        if budget is not None:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["n_estimators"] = min(
 								                n_iter,
 								                int(
 								                    (budget - time.time() + start_time - self._t1) / self._time_per_iter
 								                    + 1
 								                ),
 								            )
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        if self.params["n_estimators"] > 0:
 								            self._fit(X_train, y_train, **kwargs)
 								        self.params["n_estimators"] = n_iter
 								        train_time = time.time() - start_time
 								        return train_time
 								class XGBoostEstimator(SKLearnEstimator):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    """not using sklearn API, used for regression"""
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, **params):
 								        upper = min(32768, int(data_size))
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "n_estimators": {
 								                "domain": tune.lograndint(lower=4, upper=upper),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "max_leaves": {
 								                "domain": tune.lograndint(lower=4, upper=upper),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "min_child_weight": {
 								                "domain": tune.loguniform(lower=0.001, upper=128),
 								                "init_value": 1,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "learning_rate": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1.0),
 								                "init_value": 0.1,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "subsample": {
 								                "domain": tune.uniform(lower=0.1, upper=1.0),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "colsample_bylevel": {
 								                "domain": tune.uniform(lower=0.01, upper=1.0),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "colsample_bytree": {
 								                "domain": tune.uniform(lower=0.01, upper=1.0),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "reg_alpha": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
 								                "init_value": 1 / 1024,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "reg_lambda": {
 								                "domain": tune.loguniform(lower=1 / 1024, upper=1024),
 								                "init_value": 1.0,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            },
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        }
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								    @classmethod
 								    def size(cls, config):
 								        return LGBMEstimator.size(config)
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 1.6
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def __init__(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self,
 								        task="regression",
 								        all_thread=False,
 								        n_jobs=1,
 								        n_estimators=4,
 								        max_leaves=4,
 								        subsample=1.0,
 								        min_child_weight=1,
 								        learning_rate=0.1,
 								        reg_lambda=1.0,
 								        reg_alpha=0.0,
 								        colsample_bylevel=1.0,
 								        colsample_bytree=1.0,
 								        tree_method="auto",
 								        **params,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    ):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
 								        self._n_estimators = int(round(n_estimators))
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self.params.update(
 								            {
 								                "max_leaves": int(round(max_leaves)),
 								                "max_depth": params.get("max_depth", 0),
 								                "grow_policy": params.get("grow_policy", "lossguide"),
 								                "tree_method": tree_method,
 								                "verbosity": params.get("verbosity", 0),
 								                "nthread": n_jobs,
 								                "learning_rate": float(learning_rate),
 								                "subsample": float(subsample),
 								                "reg_alpha": float(reg_alpha),
 								                "reg_lambda": float(reg_lambda),
 								                "min_child_weight": float(min_child_weight),
 								                "booster": params.get("booster", "gbtree"),
 								                "colsample_bylevel": float(colsample_bylevel),
 								                "colsample_bytree": float(colsample_bytree),
 								                "objective": params.get("objective"),
 								            }
 								        )
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        if all_thread:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            del self.params["nthread"]
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    def get_params(self, deep=False):
 								        params = super().get_params()
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        params["n_jobs"] = params["nthread"]
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return params
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								        start_time = time.time()
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        if not issparse(X_train):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["tree_method"] = "hist"
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            X_train = self._preprocess(X_train)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "sample_weight" in kwargs:
 								            dtrain = xgb.DMatrix(X_train, label=y_train, weight=kwargs["sample_weight"])
-												sample weight in xgboost (#54)


											
										
										
											2021-03-31 22:11:56 -07:00
+								        else:
 								            dtrain = xgb.DMatrix(X_train, label=y_train)
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        objective = self.params.get("objective")
-												Lgbm w customized obj (#64)

* add customized lgbm learner

* add comments

* fix format issue

* format

* OpenMLError

* add test

* add notebook

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
											
										
										
											2021-04-10 21:14:28 -04:00
+								        if isinstance(objective, str):
 								            obj = None
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        else:
-												Lgbm w customized obj (#64)

* add customized lgbm learner

* add comments

* fix format issue

* format

* OpenMLError

* add test

* add notebook

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
											
										
										
											2021-04-10 21:14:28 -04:00
+								            obj = objective
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            if "objective" in self.params:
 								                del self.params["objective"]
 								        self._model = xgb.train(self.params, dtrain, self._n_estimators, obj=obj)
 								        self.params["objective"] = objective
-												Lgbm w customized obj (#64)

* add customized lgbm learner

* add comments

* fix format issue

* format

* OpenMLError

* add test

* add notebook

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
											
										
										
											2021-04-10 21:14:28 -04:00
+								        del dtrain
 								        train_time = time.time() - start_time
 								        return train_time
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    def predict(self, X_test):
 								        if not issparse(X_test):
 								            X_test = self._preprocess(X_test)
 								        dtest = xgb.DMatrix(X_test)
 								        return super().predict(dtest)
 								class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    """using sklearn API, used for classification"""
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return XGBoostEstimator.search_space(data_size)
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return XGBoostEstimator.cost_relative2lgbm()
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def __init__(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self,
 								        task="binary",
 								        n_jobs=1,
 								        n_estimators=4,
 								        max_leaves=4,
 								        subsample=1.0,
 								        min_child_weight=1,
 								        learning_rate=0.1,
 								        reg_lambda=1.0,
 								        reg_alpha=0.0,
 								        colsample_bylevel=1.0,
 								        colsample_bytree=1.0,
 								        tree_method="hist",
 								        **params,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    ):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        del self.params["objective"]
 								        del self.params["max_bin"]
 								        del self.params["verbose"]
 								        self.params.update(
 								            {
 								                "n_estimators": int(round(n_estimators)),
 								                "max_leaves": int(round(max_leaves)),
 								                "max_depth": 0,
 								                "grow_policy": params.get("grow_policy", "lossguide"),
 								                "tree_method": tree_method,
 								                "n_jobs": n_jobs,
 								                "verbosity": 0,
 								                "learning_rate": float(learning_rate),
 								                "subsample": float(subsample),
 								                "reg_alpha": float(reg_alpha),
 								                "reg_lambda": float(reg_lambda),
 								                "min_child_weight": float(min_child_weight),
 								                "booster": params.get("booster", "gbtree"),
 								                "colsample_bylevel": float(colsample_bylevel),
 								                "colsample_bytree": float(colsample_bytree),
 								                "use_label_encoder": params.get("use_label_encoder", False),
 								            }
 								        )
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = xgb.XGBRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "rank" == task:
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            self.estimator_class = xgb.XGBRanker
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        elif task in ("binary", "multi"):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = xgb.XGBClassifier
 								        self._time_per_iter = None
 								        self._train_size = 0
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
 								        if issparse(X_train):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["tree_method"] = "auto"
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return super().fit(X_train, y_train, budget, **kwargs)
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, task, **params):
-												max_leaves (#138)

* max_leaf_nodes in rf and extra_tree

* preprocess numpy str

* free up mem after training

											
										
										
											2021-07-27 18:02:49 -07:00
+								        data_size = int(data_size)
 								        upper = min(2048, data_size)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        space = {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "n_estimators": {
 								                "domain": tune.lograndint(lower=4, upper=upper),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "max_features": {
 								                "domain": tune.loguniform(lower=0.1, upper=1.0),
 								                "init_value": 1.0,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "max_leaves": {
 								                "domain": tune.lograndint(lower=4, upper=min(32768, data_size)),
 								                "init_value": 4,
 								                "low_cost_init_value": 4,
-												max_leaves (#138)

* max_leaf_nodes in rf and extra_tree

* preprocess numpy str

* free up mem after training

											
										
										
											2021-07-27 18:02:49 -07:00
+								            },
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        }
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if task in ("binary", "multi"):
 								            space["criterion"] = {
 								                "domain": tune.choice(["gini", "entropy"]),
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								                # 'init_value': 'gini',
 								            }
 								        return space
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 2.0
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def __init__(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self,
 								        task="binary",
 								        n_jobs=1,
 								        n_estimators=4,
 								        max_features=1.0,
 								        criterion="gini",
 								        max_leaves=4,
 								        **params,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    ):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        del self.params["objective"]
 								        del self.params["max_bin"]
 								        self.params.update(
 								            {
 								                "n_estimators": int(round(n_estimators)),
 								                "n_jobs": n_jobs,
 								                "verbose": 0,
 								                "max_features": float(max_features),
 								                "max_leaf_nodes": params.get("max_leaf_nodes", int(round(max_leaves))),
 								            }
 								        )
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = RandomForestRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if task in ("binary", "multi"):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = RandomForestClassifier
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["criterion"] = criterion
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    def get_params(self, deep=False):
 								        params = super().get_params()
 								        return params
 								class ExtraTreeEstimator(RandomForestEstimator):
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 1.9
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if "regression" in task:
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = ExtraTreesRegressor
 								        else:
 								            self.estimator_class = ExtraTreesClassifier
 								class LRL1Classifier(SKLearnEstimator):
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "C": {
 								                "domain": tune.loguniform(lower=0.03125, upper=32768.0),
 								                "init_value": 1.0,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
 								        }
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 160
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", n_jobs=1, tol=0.0001, C=1.0, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self.params.update(
 								            {
 								                "penalty": params.get("penalty", "l1"),
 								                "tol": float(tol),
 								                "C": float(C),
 								                "solver": params.get("solver", "saga"),
 								                "n_jobs": n_jobs,
 								            }
 								        )
 								        assert task in (
 								            "binary",
 								            "multi",
 								        ), "LogisticRegression for classification task only"
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = LogisticRegression
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								class LRL2Classifier(SKLearnEstimator):
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return LRL1Classifier.search_space(**params)
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 25
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", n_jobs=1, tol=0.0001, C=1.0, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self.params.update(
 								            {
 								                "penalty": params.get("penalty", "l2"),
 								                "tol": float(tol),
 								                "C": float(C),
 								                "solver": params.get("solver", "lbfgs"),
 								                "n_jobs": n_jobs,
 								            }
 								        )
 								        assert task in (
 								            "binary",
 								            "multi",
 								        ), "LogisticRegression for classification task only"
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = LogisticRegression
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								class CatBoostEstimator(BaseEstimator):
 								    _time_per_iter = None
 								    _train_size = 0
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, **params):
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								        upper = max(min(round(1500000 / data_size), 150), 12)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "early_stopping_rounds": {
 								                "domain": tune.lograndint(lower=10, upper=upper),
 								                "init_value": 10,
 								                "low_cost_init_value": 10,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "learning_rate": {
 								                "domain": tune.loguniform(lower=0.005, upper=0.2),
 								                "init_value": 0.1,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
 								        }
 								    @classmethod
 								    def size(cls, config):
 								        n_estimators = 8192
 								        max_leaves = 64
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								        return (max_leaves * 3 + (max_leaves - 1) * 4 + 1.0) * n_estimators * 8
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 15
-												metric constraint (#90)

* penalty change

* metric modification

* catboost init

											
										
										
											2021-05-22 08:51:38 -07:00
+								    @classmethod
 								    def init(cls):
 								        CatBoostEstimator._time_per_iter = None
 								        CatBoostEstimator._train_size = 0
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								    def _preprocess(self, X):
 								        if isinstance(X, pd.DataFrame):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            cat_columns = X.select_dtypes(include=["category"]).columns
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            if not cat_columns.empty:
 								                X = X.copy()
 								                X[cat_columns] = X[cat_columns].apply(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    lambda x: x.cat.rename_categories(
 								                        [
 								                            str(c) if isinstance(c, float) else c
 								                            for c in x.cat.categories
 								                        ]
 								                    )
 								                )
 								        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            # numpy array is not of numeric dtype
 								            X = pd.DataFrame(X)
 								            for col in X.columns:
 								                if isinstance(X[col][0], str):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    X[col] = X[col].astype("category").cat.codes
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            X = X.to_numpy()
 								        return X
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def __init__(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self,
 								        task="binary",
 								        n_jobs=1,
 								        n_estimators=8192,
 								        learning_rate=0.1,
 								        early_stopping_rounds=4,
 								        **params,
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    ):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self.params.update(
 								            {
 								                "early_stopping_rounds": int(round(early_stopping_rounds)),
 								                "n_estimators": n_estimators,
 								                "learning_rate": learning_rate,
 								                "thread_count": n_jobs,
 								                "verbose": params.get("verbose", False),
 								                "random_seed": params.get("random_seed", 10242048),
 								            }
 								        )
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        from catboost import CatBoostRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = CatBoostRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if task in ("binary", "multi"):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            from catboost import CatBoostClassifier
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = CatBoostClassifier
 								    def get_params(self, deep=False):
 								        params = super().get_params()
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        params["n_jobs"] = params["thread_count"]
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return params
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        import shutil
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        start_time = time.time()
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        train_dir = f"catboost_{str(start_time)}"
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        n_iter = self.params["n_estimators"]
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								        X_train = self._preprocess(X_train)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        if isinstance(X_train, pd.DataFrame):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            cat_features = list(X_train.select_dtypes(include="category").columns)
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        else:
 								            cat_features = []
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								        # from catboost import CatBoostError
 								        # try:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if (
 								            not CatBoostEstimator._time_per_iter
 								            or abs(CatBoostEstimator._train_size - len(y_train)) > 4
 								        ) and budget:
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            # measure the time per iteration
 								            self.params["n_estimators"] = 1
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            CatBoostEstimator._smallmodel = self.estimator_class(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                train_dir=train_dir, **self.params
 								            )
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            CatBoostEstimator._smallmodel.fit(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                X_train, y_train, cat_features=cat_features, **kwargs
 								            )
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            CatBoostEstimator._t1 = time.time() - start_time
 								            if CatBoostEstimator._t1 >= budget:
 								                self.params["n_estimators"] = n_iter
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								                self._model = CatBoostEstimator._smallmodel
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								                shutil.rmtree(train_dir, ignore_errors=True)
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								                return CatBoostEstimator._t1
 								            self.params["n_estimators"] = 4
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            CatBoostEstimator._smallmodel = self.estimator_class(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                train_dir=train_dir, **self.params
 								            )
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            CatBoostEstimator._smallmodel.fit(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                X_train, y_train, cat_features=cat_features, **kwargs
 								            )
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            CatBoostEstimator._time_per_iter = (
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                time.time() - start_time - CatBoostEstimator._t1
 								            ) / (self.params["n_estimators"] - 1)
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            if CatBoostEstimator._time_per_iter <= 0:
 								                CatBoostEstimator._time_per_iter = CatBoostEstimator._t1
 								            CatBoostEstimator._train_size = len(y_train)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            if (
 								                time.time() - start_time >= budget
 								                or n_iter == self.params["n_estimators"]
 								            ):
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								                self.params["n_estimators"] = n_iter
 								                self._model = CatBoostEstimator._smallmodel
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								                shutil.rmtree(train_dir, ignore_errors=True)
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								                return time.time() - start_time
 								        if budget:
 								            train_times = 1
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            self.params["n_estimators"] = min(
 								                n_iter,
 								                int(
 								                    (budget - time.time() + start_time - CatBoostEstimator._t1)
 								                    / train_times
 								                    / CatBoostEstimator._time_per_iter
 								                    + 1
 								                ),
 								            )
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            self._model = CatBoostEstimator._smallmodel
 								        if self.params["n_estimators"] > 0:
 								            n = max(int(len(y_train) * 0.9), len(y_train) - 1000)
 								            X_tr, y_tr = X_train[:n], y_train[:n]
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            if "sample_weight" in kwargs:
 								                weight = kwargs["sample_weight"]
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								                if weight is not None:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    kwargs["sample_weight"] = weight[:n]
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            else:
 								                weight = None
 								            from catboost import Pool
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            model = self.estimator_class(train_dir=train_dir, **self.params)
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            model.fit(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                X_tr,
 								                y_tr,
 								                cat_features=cat_features,
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								                eval_set=Pool(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    data=X_train[n:], label=y_train[n:], cat_features=cat_features
 								                ),
 								                **kwargs,
 								            )  # model.get_best_iteration()
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            shutil.rmtree(train_dir, ignore_errors=True)
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            if weight is not None:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                kwargs["sample_weight"] = weight
-												v0.5.12 (#150)

* remove extra comma

* exclusive bound

* log file name

* add cost to space

* dataset_format

* add load_openml_dataset test

* docstr

* revise test format

* simplify restore

* order categories

* openml server exception in test

* process space

* add warning

* log format

* reduce n_cpu

* nested space

* hierarchical search space for CFO

* non hierarchical for bs

* unflatten hierarchical config

* connection error

* random sample

* config signature

* check ray version

* preprocess numpy array

* catboost preprocess

* time budget

* seed, verbose, hpo_method

* test cfocat

* shallow copy in flatten_dict
prevent lgbm model duplication

* match estimator name

* quantize and log

* test qloguniform and qrandint

* test qlograndint

* thread.running

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qingyunwu@Qingyuns-MacBook-Pro-2.local>
											
										
										
											2021-08-12 02:02:22 -04:00
+								            self._model = model
 								        # except CatBoostError:
 								        #     self._model = None
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        self.params["n_estimators"] = n_iter
 								        train_time = time.time() - start_time
 								        return train_time
 								class KNeighborsEstimator(BaseEstimator):
 								    @classmethod
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								    def search_space(cls, data_size, **params):
 								        upper = min(512, int(data_size / 2))
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "n_neighbors": {
 								                "domain": tune.lograndint(lower=1, upper=upper),
 								                "init_value": 5,
 								                "low_cost_init_value": 1,
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            },
 								        }
 								    @classmethod
 								    def cost_relative2lgbm(cls):
 								        return 30
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="binary", n_jobs=1, n_neighbors=5, **params):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        super().__init__(task, **params)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        self.params.update(
 								            {
 								                "n_neighbors": int(round(n_neighbors)),
 								                "weights": params.get("weights", "distance"),
 								                "n_jobs": n_jobs,
 								            }
 								        )
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        from sklearn.neighbors import KNeighborsRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        self.estimator_class = KNeighborsRegressor
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        if task in ("binary", "multi"):
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            from sklearn.neighbors import KNeighborsClassifier
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            self.estimator_class = KNeighborsClassifier
 								    def _preprocess(self, X):
 								        if isinstance(X, pd.DataFrame):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            cat_columns = X.select_dtypes(["category"]).columns
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								            if X.shape[1] == len(cat_columns):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                raise ValueError("kneighbor requires at least one numeric feature")
-												Issue58 (#59)

* iter per learner

* code cleanup

											
										
										
											2021-04-08 09:29:55 -07:00
+								            X = X.drop(cat_columns, axis=1)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        elif isinstance(X, np.ndarray) and X.dtype.kind not in "buif":
-												max_leaves (#138)

* max_leaf_nodes in rf and extra_tree

* preprocess numpy str

* free up mem after training

											
										
										
											2021-07-27 18:02:49 -07:00
+								            # drop categocial columns if any
 								            X = pd.DataFrame(X)
 								            cat_columns = []
 								            for col in X.columns:
 								                if isinstance(X[col][0], str):
 								                    cat_columns.append(col)
 								            X = X.drop(cat_columns, axis=1)
 								            X = X.to_numpy()
-												V0.2.2 (#19)

* v0.2.2

separate the HPO part into the module flaml.tune
enhanced implementation of FLOW^2, CFO and BlendSearch
support parallel tuning using ray tune
add support for sample_weight and generic fit arguments
enable mlflow logging

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: qingyun-wu <qw2ky@virginia.edu>
											
										
										
											2021-02-05 21:41:14 -08:00
+								        return X
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								class Prophet(BaseEstimator):
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								    @classmethod
 								    def search_space(cls, **params):
 								        space = {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "changepoint_prior_scale": {
 								                "domain": tune.loguniform(lower=0.001, upper=1000),
 								                "init_value": 0.01,
 								                "low_cost_init_value": 0.001,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "seasonality_prior_scale": {
 								                "domain": tune.loguniform(lower=0.01, upper=100),
 								                "init_value": 1,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "holidays_prior_scale": {
 								                "domain": tune.loguniform(lower=0.01, upper=100),
 								                "init_value": 1,
 								            },
 								            "seasonality_mode": {
 								                "domain": tune.choice(["additive", "multiplicative"]),
 								                "init_value": "multiplicative",
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
 								        }
 								        return space
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								    def __init__(self, task="forecast", **params):
 								        if "n_jobs" in params:
 								            params.pop("n_jobs")
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        super().__init__(task, **params)
 								    def _join(self, X_train, y_train):
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        assert "ds" in X_train, (
 								            "Dataframe for training forecast model must have column"
 								            ' "ds" with the dates in X_train.'
 								        )
 								        y_train = pd.DataFrame(y_train, columns=["y"])
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        train_df = X_train.join(y_train)
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        return train_df
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								    def fit(self, X_train, y_train, budget=None, **kwargs):
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        from prophet import Prophet
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        current_time = time.time()
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        train_df = self._join(X_train, y_train)
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        model = Prophet(**self.params).fit(train_df)
 								        train_time = time.time() - current_time
 								        self._model = model
 								        return train_time
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								    def predict(self, X_test):
 								        if isinstance(X_test, int):
 								            raise ValueError(
 								                "predict() with steps is only supported for arima/sarimax."
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                " For Prophet, pass a dataframe with a date colum named ds."
 								            )
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        if self._model is not None:
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            forecast = self._model.predict(X_test)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            return forecast["yhat"]
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        else:
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								            logger.warning(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                "Estimator is not fit yet. Please run fit() before predict()."
 								            )
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            return np.ones(X_test.shape[0])
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								class ARIMA(Prophet):
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								    @classmethod
 								    def search_space(cls, **params):
 								        space = {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "p": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "d": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
 								            },
 								            "q": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
 								        }
 								        return space
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								    def _join(self, X_train, y_train):
 								        train_df = super()._join(X_train, y_train)
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								        train_df.index = pd.to_datetime(train_df["ds"])
 								        train_df = train_df.drop("ds", axis=1)
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        return train_df
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								    def fit(self, X_train, y_train, budget=None, **kwargs):
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        import warnings
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        warnings.filterwarnings("ignore")
-												warmstart blendsearch (#186)

* increase test coverage

* use define by run only when needed

* warmstart bs

* classification -> binary, multi

* warm start with evaluated rewards

* data transformer; resource attr for gs

* BlendSearchTuner bug fix and unittest

* bug fix

* docstr and import

* task type

											
										
										
											2021-09-04 01:42:21 -07:00
+								        from statsmodels.tsa.arima.model import ARIMA as ARIMA_estimator
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        current_time = time.time()
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        train_df = self._join(X_train, y_train)
 								        model = ARIMA_estimator(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            train_df,
 								            order=(self.params["p"], self.params["d"], self.params["q"]),
 								            enforce_stationarity=False,
 								            enforce_invertibility=False,
 								        )
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        model = model.fit()
 								        train_time = time.time() - current_time
 								        self._model = model
 								        return train_time
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								    def predict(self, X_test):
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        if self._model is not None:
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            if isinstance(X_test, int):
 								                forecast = self._model.forecast(steps=X_test)
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            elif isinstance(X_test, pd.DataFrame):
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								                start = X_test.iloc[0, 0]
 								                end = X_test.iloc[-1, 0]
 								                forecast = self._model.predict(start=start, end=end)
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            else:
 								                raise ValueError(
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								                    "X_test needs to be either a pd.Dataframe with dates as column ds)"
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                    " or an int number of periods for predict()."
 								                )
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            return forecast
 								        else:
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            return np.ones(X_test if isinstance(X_test, int) else X_test.shape[0])
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								class SARIMAX(ARIMA):
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								    @classmethod
 								    def search_space(cls, **params):
 								        space = {
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "p": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "d": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "q": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 2,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "P": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 1,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "D": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 1,
 								                "low_cost_init_value": 0,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            "Q": {
 								                "domain": tune.quniform(lower=0, upper=10, q=1),
 								                "init_value": 1,
 								                "low_cost_init_value": 0,
 								            },
 								            "s": {
 								                "domain": tune.choice([1, 4, 6, 12]),
 								                "init_value": 12,
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								            },
 								        }
 								        return space
 								    def fit(self, X_train, y_train, budget=None, **kwargs):
 								        from statsmodels.tsa.statespace.sarimax import SARIMAX as SARIMAX_estimator
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        current_time = time.time()
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								        train_df = self._join(X_train, y_train)
 								        model = SARIMAX_estimator(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								            train_df,
 								            order=(self.params["p"], self.params["d"], self.params["q"]),
-												remove catboost training dir; ensemble api; blendsearch for hierarchical space; ranking task; forecast improvement (#178)

* remove catboost training dir

* close #48

* bs for hierarchical space. close #85

* retrain for hierarchical space

* clean ml (#180)

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>

* support ranking task

* examples

* cv shuffle

* forecast api and implementation cleaner

* period constraints

* delete groups after fit

											
										
										
											2021-09-01 16:25:04 -07:00
+								            seasonality_order=(
-												package name in setup (#198)

* package name

* learning to rank example: close #200

* try import prophet #201 
											
										
										
											2021-09-11 21:19:18 -07:00
+								                self.params["P"],
 								                self.params["D"],
 								                self.params["Q"],
 								                self.params["s"],
 								            ),
 								            enforce_stationarity=False,
 								            enforce_invertibility=False,
 								        )
-												Forecast (#162)

* added 'forecast' task with estimators ['fbprophet', 'arima', 'sarimax']

* update setup.py

* add TimeSeriesSplit to 'regression' and 'classification' task

* add 'time' split_type for 'classification' and 'regression' task

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>

* feature importance

* variable name

* Update test/test_split.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* Update test/test_forecast.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

* prophet installation fail in windows

* upload flaml_forecast.ipynb

Signed-off-by: Kevin Chen <chenkevin.8787@gmail.com>


											
										
										
											2021-08-23 16:26:46 -04:00
+								        model = model.fit()
 								        train_time = time.time() - current_time
 								        self._model = model
 								        return train_time