package name in setup (#198)

* package name * learning to rank example: close #200 * try import prophet #201
2025-10-27 15:59:35 +00:00 · 2021-09-11 21:19:18 -07:00 · 2021-09-11 21:19:18 -07:00 · f4529dfe89
commit f4529dfe89
parent 8f9f08cebc
8 changed files with 941 additions and 648 deletions
--- a/README.md
+++ b/README.md
@ -75,7 +75,7 @@ And they can be used in distributed HPO frameworks such as ray tune or nni.
 ## Examples
- A basic classification example.
+* A basic classification example.
 ```python
 from flaml import AutoML
@ -99,7 +99,7 @@ print(automl.predict_proba(X_train))
 print(automl.model)
 ```
- A basic regression example.
+* A basic regression example.
 ```python
 from flaml import AutoML
@ -123,7 +123,7 @@ print(automl.predict(X_train))
 print(automl.model)
 ```
- Time series forecasting.
+* Time series forecasting.
 ```python
 # pip install flaml[forecast]
@ -141,14 +141,15 @@ automl.fit(X_train=X_train[:72],  # a single column of timestamp
 print(automl.predict(X_train[72:]))
 ```
- Learning to rank.
+* Learning to rank.
 ```python
 from sklearn.datasets import fetch_openml
 from flaml import AutoML
-X, y = fetch_openml(name="credit-g", return_X_y=True)  
+X_train, y_train = fetch_openml(name="credit-g", return_X_y=True, as_frame=False)
 y_train = y_train.cat.codes
 # not a real learning to rank dataaset
-groups = [200] * 4 + [100] * 2,    # group counts
+groups = [200] * 4 + [100] * 2    # group counts
 automl = AutoML()
 automl.fit(
    X_train, y_train, groups=groups,
@ -207,17 +208,21 @@ pip install -e .[test,notebook]
 ```
 ### Docker
 We provide a simple [Dockerfile](https://github.com/microsoft/FLAML/blob/main/Dockerfile).
-```
+
 ```bash
 docker build git://github.com/microsoft/FLAML -t flaml-dev
 docker run -it flaml-dev
 ```
 ### Develop in Remote Container
 If you use vscode, you can open the FLAML folder in a [Container](https://code.visualstudio.com/docs/remote/containers).
-We have provided the configuration in (.devcontainer)[(https://github.com/microsoft/FLAML/blob/main/.devcontainer)].
+We have provided the configuration in [.devcontainer]((https://github.com/microsoft/FLAML/blob/main/.devcontainer)).
 ### Pre-commit
 Run `pre-commit install` to install pre-commit into your git hooks. Before you commit, run
 `pre-commit run` to check if you meet the pre-commit requirements. If you use Windows (without WSL) and can't commit after installing pre-commit, you can run `pre-commit uninstall` to uninstall the hook. In WSL or Linux this is supposed to work.
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -1474,7 +1474,12 @@ class AutoML:
        if "auto" == estimator_list:
            if self._state.task == "forecast":
-                estimator_list = ["fbprophet", "arima", "sarimax"]
+                try:
                    import prophet
                    estimator_list = ["prophet", "arima", "sarimax"]
                except ImportError:
                    estimator_list = ["arima", "sarimax"]
            elif self._state.task == "rank":
                estimator_list = ["lgbm", "xgboost"]
            else:
--- a/flaml/data.py
+++ b/flaml/data.py
@ -1,7 +1,7 @@
-'''!
+"""!
 * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License.
-'''
+"""
 import numpy as np
 from scipy.sparse import vstack, issparse
@ -11,9 +11,10 @@ from .training_log import training_log_reader
 from datetime import datetime
-def load_openml_dataset(dataset_id, data_dir=None, random_state=0,
+def load_openml_dataset(
-                        dataset_format='dataframe'):
+    dataset_id, data_dir=None, random_state=0, dataset_format="dataframe"
-    '''Load dataset from open ML.
+):
    """Load dataset from open ML.
    If the file is not cached locally, download it from open ML.
@ -30,41 +31,43 @@ def load_openml_dataset(dataset_id, data_dir=None, random_state=0,
        X_test:  Test data
        y_train: A series or array of labels for training data
        y_test:  A series or array of labels for test data
-    '''
+    """
    import os
    import openml
    import pickle
    from sklearn.model_selection import train_test_split
-    filename = 'openml_ds' + str(dataset_id) + '.pkl'
+    filename = "openml_ds" + str(dataset_id) + ".pkl"
    filepath = os.path.join(data_dir, filename)
    if os.path.isfile(filepath):
-        print('load dataset from', filepath)
+        print("load dataset from", filepath)
-        with open(filepath, 'rb') as f:
+        with open(filepath, "rb") as f:
            dataset = pickle.load(f)
    else:
-        print('download dataset from openml')
+        print("download dataset from openml")
        dataset = openml.datasets.get_dataset(dataset_id)
        if not os.path.exists(data_dir):
            os.makedirs(data_dir)
-        with open(filepath, 'wb') as f:
+        with open(filepath, "wb") as f:
            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
-    print('Dataset name:', dataset.name)
+    print("Dataset name:", dataset.name)
-    X, y, * \
+    X, y, *__ = dataset.get_data(
-        __ = dataset.get_data(
+        target=dataset.default_target_attribute, dataset_format=dataset_format
-            target=dataset.default_target_attribute, dataset_format=dataset_format)
+    )
-    X_train, X_test, y_train, y_test = train_test_split(
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=random_state)
        X, y, random_state=random_state)
    print(
-        'X_train.shape: {}, y_train.shape: {};\nX_test.shape: {}, y_test.shape: {}'.format(
+        "X_train.shape: {}, y_train.shape: {};\nX_test.shape: {}, y_test.shape: {}".format(
-            X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+            X_train.shape,
            y_train.shape,
            X_test.shape,
            y_test.shape,
        )
    )
    return X_train, X_test, y_train, y_test
 def load_openml_task(task_id, data_dir):
-    '''Load task from open ML.
+    """Load task from open ML.
    Use the first fold of the task.
    If the file is not cached locally, download it from open ML.
@ -78,21 +81,22 @@ def load_openml_task(task_id, data_dir):
        X_test:  A dataframe of test data
        y_train: A series of labels for training data
        y_test:  A series of labels for test data
-    '''
+    """
    import os
    import openml
    import pickle
    task = openml.tasks.get_task(task_id)
-    filename = 'openml_task' + str(task_id) + '.pkl'
+    filename = "openml_task" + str(task_id) + ".pkl"
    filepath = os.path.join(data_dir, filename)
    if os.path.isfile(filepath):
-        print('load dataset from', filepath)
+        print("load dataset from", filepath)
-        with open(filepath, 'rb') as f:
+        with open(filepath, "rb") as f:
            dataset = pickle.load(f)
    else:
-        print('download dataset from openml')
+        print("download dataset from openml")
        dataset = task.get_dataset()
-        with open(filepath, 'wb') as f:
+        with open(filepath, "wb") as f:
            pickle.dump(dataset, f, pickle.HIGHEST_PROTOCOL)
    X, y, _, _ = dataset.get_data(task.target_name)
    train_indices, test_indices = task.get_train_test_split_indices(
@ -105,15 +109,18 @@ def load_openml_task(task_id, data_dir):
    X_test = X.iloc[test_indices]
    y_test = y[test_indices]
    print(
-        'X_train.shape: {}, y_train.shape: {},\nX_test.shape: {}, y_test.shape: {}'.format(
+        "X_train.shape: {}, y_train.shape: {},\nX_test.shape: {}, y_test.shape: {}".format(
-            X_train.shape, y_train.shape, X_test.shape, y_test.shape,
+            X_train.shape,
            y_train.shape,
            X_test.shape,
            y_test.shape,
        )
    )
    return X_train, X_test, y_train, y_test
 def get_output_from_log(filename, time_budget):
-    '''Get output from log file
+    """Get output from log file
    Args:
        filename: A string of the log file name
@ -127,11 +134,11 @@ def get_output_from_log(filename, time_budget):
        config_list:
            A list of the estimator, sample size and config of each logged iter
        logged_metric_list: A list of the logged metric of each logged iter
-    '''
+    """
    best_config = None
    best_learner = None
-    best_val_loss = float('+inf')
+    best_val_loss = float("+inf")
    search_time_list = []
    config_list = []
@ -144,7 +151,7 @@ def get_output_from_log(filename, time_budget):
            time_used = record.wall_clock_time
            val_loss = record.validation_loss
            config = record.config
-            learner = record.learner.split('_')[0]
+            learner = record.learner.split("_")[0]
            sample_size = record.sample_size
            metric = record.logged_metric
@ -158,27 +165,34 @@ def get_output_from_log(filename, time_budget):
                best_error_list.append(best_val_loss)
                logged_metric_list.append(metric)
                error_list.append(val_loss)
-                config_list.append({"Current Learner": learner,
+                config_list.append(
-                                    "Current Sample": sample_size,
+                    {
-                                    "Current Hyper-parameters": record.config,
+                        "Current Learner": learner,
-                                    "Best Learner": best_learner,
+                        "Current Sample": sample_size,
-                                    "Best Hyper-parameters": best_config})
+                        "Current Hyper-parameters": record.config,
                        "Best Learner": best_learner,
                        "Best Hyper-parameters": best_config,
                    }
                )
-    return (search_time_list, best_error_list, error_list, config_list,
+    return (
-            logged_metric_list)
+        search_time_list,
        best_error_list,
        error_list,
        config_list,
        logged_metric_list,
    )
 def concat(X1, X2):
-    '''concatenate two matrices vertically
+    """concatenate two matrices vertically"""
    '''
    if isinstance(X1, pd.DataFrame) or isinstance(X1, pd.Series):
        df = pd.concat([X1, X2], sort=False)
        df.reset_index(drop=True, inplace=True)
        if isinstance(X1, pd.DataFrame):
-            cat_columns = X1.select_dtypes(
+            cat_columns = X1.select_dtypes(include="category").columns
                include='category').columns
            if len(cat_columns):
-                df[cat_columns] = df[cat_columns].astype('category')
+                df[cat_columns] = df[cat_columns].astype("category")
        return df
    if issparse(X1):
        return vstack((X1, X2))
@ -187,8 +201,7 @@ def concat(X1, X2):
 class DataTransformer:
-    '''transform X, y
+    """transform X, y"""
    '''
    def fit_transform(self, X, y, task):
        if isinstance(X, pd.DataFrame):
@ -198,19 +211,25 @@ class DataTransformer:
            drop = False
            for column in X.columns:
                # sklearn\utils\validation.py needs int/float values
-                if X[column].dtype.name in ('object', 'category'):
+                if X[column].dtype.name in ("object", "category"):
-                    if X[column].nunique() == 1 or X[column].nunique(
+                    if (
-                            dropna=True) == n - X[column].isnull().sum():
+                        X[column].nunique() == 1
                        or X[column].nunique(dropna=True)
                        == n - X[column].isnull().sum()
                    ):
                        X.drop(columns=column, inplace=True)
                        drop = True
-                    elif X[column].dtype.name == 'category':
+                    elif X[column].dtype.name == "category":
                        current_categories = X[column].cat.categories
-                        if '__NAN__' not in current_categories:
+                        if "__NAN__" not in current_categories:
-                            X[column] = X[column].cat.add_categories(
+                            X[column] = (
-                                '__NAN__').fillna('__NAN__')
+                                X[column]
                                .cat.add_categories("__NAN__")
                                .fillna("__NAN__")
                            )
                        cat_columns.append(column)
                    else:
-                        X[column] = X[column].fillna('__NAN__')
+                        X[column] = X[column].fillna("__NAN__")
                        cat_columns.append(column)
                else:
                    # print(X[column].dtype.name)
@ -218,17 +237,27 @@ class DataTransformer:
                        X.drop(columns=column, inplace=True)
                        drop = True
                    else:
-                        if X[column].dtype.name == 'datetime64[ns]':
+                        if X[column].dtype.name == "datetime64[ns]":
                            tmp_dt = X[column].dt
-                            new_columns_dict = {f'year_{column}': tmp_dt.year, f'month_{column}': tmp_dt.month,
+                            new_columns_dict = {
-                                                f'day_{column}': tmp_dt.day, f'hour_{column}': tmp_dt.hour,
+                                f"year_{column}": tmp_dt.year,
-                                                f'minute_{column}': tmp_dt.minute, f'second_{column}': tmp_dt.second,
+                                f"month_{column}": tmp_dt.month,
-                                                f'dayofweek_{column}': tmp_dt.dayofweek,
+                                f"day_{column}": tmp_dt.day,
-                                                f'dayofyear_{column}': tmp_dt.dayofyear,
+                                f"hour_{column}": tmp_dt.hour,
-                                                f'quarter_{column}': tmp_dt.quarter}
+                                f"minute_{column}": tmp_dt.minute,
                                f"second_{column}": tmp_dt.second,
                                f"dayofweek_{column}": tmp_dt.dayofweek,
                                f"dayofyear_{column}": tmp_dt.dayofyear,
                                f"quarter_{column}": tmp_dt.quarter,
                            }
                            for new_col_name in new_columns_dict.keys():
-                                if new_col_name not in X.columns and \
+                                if (
-                                        new_columns_dict.get(new_col_name).nunique(dropna=False) >= 2:
+                                    new_col_name not in X.columns
                                    and new_columns_dict.get(new_col_name).nunique(
                                        dropna=False
                                    )
                                    >= 2
                                ):
                                    X[new_col_name] = new_columns_dict.get(new_col_name)
                                    num_columns.append(new_col_name)
                            X[column] = X[column].map(datetime.toordinal)
@ -239,11 +268,12 @@ class DataTransformer:
                            num_columns.append(column)
            X = X[cat_columns + num_columns]
            if cat_columns:
-                X[cat_columns] = X[cat_columns].astype('category')
+                X[cat_columns] = X[cat_columns].astype("category")
            if num_columns:
                X_num = X[num_columns]
                if np.issubdtype(X_num.columns.dtype, np.integer) and (
-                    drop or min(X_num.columns) != 0
+                    drop
                    or min(X_num.columns) != 0
                    or max(X_num.columns) != X_num.shape[1] - 1
                ):
                    X_num.columns = range(X_num.shape[1])
@ -252,17 +282,31 @@ class DataTransformer:
                    drop = False
                from sklearn.impute import SimpleImputer
                from sklearn.compose import ColumnTransformer
-                self.transformer = ColumnTransformer([(
+
-                    'continuous',
+                self.transformer = ColumnTransformer(
-                    SimpleImputer(missing_values=np.nan, strategy='median'),
+                    [
-                    X_num.columns)])
+                        (
                            "continuous",
                            SimpleImputer(missing_values=np.nan, strategy="median"),
                            X_num.columns,
                        )
                    ]
                )
                X[num_columns] = self.transformer.fit_transform(X_num)
-            self._cat_columns, self._num_columns, self._datetime_columns = \
+            self._cat_columns, self._num_columns, self._datetime_columns = (
-                cat_columns, num_columns, datetime_columns
+                cat_columns,
                num_columns,
                datetime_columns,
            )
            self._drop = drop
-        if task in ('binary', 'multi', 'classification'):
+        if task in (
            "binary",
            "multi",
            "classification",
        ) or not pd.api.types.is_numeric_dtype(y):
            from sklearn.preprocessing import LabelEncoder
            self.label_transformer = LabelEncoder()
            y = self.label_transformer.fit_transform(y)
        else:
@ -272,34 +316,46 @@ class DataTransformer:
    def transform(self, X):
        X = X.copy()
        if isinstance(X, pd.DataFrame):
-            cat_columns, num_columns, datetime_columns = self._cat_columns, \
+            cat_columns, num_columns, datetime_columns = (
-                self._num_columns, self._datetime_columns
+                self._cat_columns,
                self._num_columns,
                self._datetime_columns,
            )
            if datetime_columns:
                for column in datetime_columns:
                    tmp_dt = X[column].dt
-                    new_columns_dict = {f'year_{column}': tmp_dt.year, f'month_{column}': tmp_dt.month,
+                    new_columns_dict = {
-                                        f'day_{column}': tmp_dt.day, f'hour_{column}': tmp_dt.hour,
+                        f"year_{column}": tmp_dt.year,
-                                        f'minute_{column}': tmp_dt.minute, f'second_{column}': tmp_dt.second,
+                        f"month_{column}": tmp_dt.month,
-                                        f'dayofweek_{column}': tmp_dt.dayofweek,
+                        f"day_{column}": tmp_dt.day,
-                                        f'dayofyear_{column}': tmp_dt.dayofyear,
+                        f"hour_{column}": tmp_dt.hour,
-                                        f'quarter_{column}': tmp_dt.quarter}
+                        f"minute_{column}": tmp_dt.minute,
                        f"second_{column}": tmp_dt.second,
                        f"dayofweek_{column}": tmp_dt.dayofweek,
                        f"dayofyear_{column}": tmp_dt.dayofyear,
                        f"quarter_{column}": tmp_dt.quarter,
                    }
                    for new_col_name in new_columns_dict.keys():
-                        if new_col_name not in X.columns and \
+                        if (
-                                new_columns_dict.get(new_col_name).nunique(dropna=False) >= 2:
+                            new_col_name not in X.columns
                            and new_columns_dict.get(new_col_name).nunique(dropna=False)
                            >= 2
                        ):
                            X[new_col_name] = new_columns_dict.get(new_col_name)
                    X[column] = X[column].map(datetime.toordinal)
                    del tmp_dt
            X = X[cat_columns + num_columns].copy()
            for column in cat_columns:
-                if X[column].dtype.name == 'object':
+                if X[column].dtype.name == "object":
-                    X[column] = X[column].fillna('__NAN__')
+                    X[column] = X[column].fillna("__NAN__")
-                elif X[column].dtype.name == 'category':
+                elif X[column].dtype.name == "category":
                    current_categories = X[column].cat.categories
-                    if '__NAN__' not in current_categories:
+                    if "__NAN__" not in current_categories:
-                        X[column] = X[column].cat.add_categories(
+                        X[column] = (
-                            '__NAN__').fillna('__NAN__')
+                            X[column].cat.add_categories("__NAN__").fillna("__NAN__")
                        )
            if cat_columns:
-                X[cat_columns] = X[cat_columns].astype('category')
+                X[cat_columns] = X[cat_columns].astype("category")
            if num_columns:
                X_num = X[num_columns].fillna(np.nan)
                if self._drop:
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -1,65 +1,90 @@
-'''!
+"""!
- * Copyright (c) 2020-2021 Microsoft Corporation. All rights reserved.
+ * Copyright (c) Microsoft Corporation. All rights reserved.
 * Licensed under the MIT License.
-'''
+"""
 import time
 import numpy as np
 import pandas as pd
-from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
+from sklearn.metrics import (
-    accuracy_score, mean_absolute_error, log_loss, average_precision_score, \
+    mean_squared_error,
-    f1_score, mean_absolute_percentage_error, ndcg_score
+    r2_score,
    roc_auc_score,
    accuracy_score,
    mean_absolute_error,
    log_loss,
    average_precision_score,
    f1_score,
    mean_absolute_percentage_error,
    ndcg_score,
 )
 from sklearn.model_selection import RepeatedStratifiedKFold, GroupKFold, TimeSeriesSplit
 from .model import (
-    XGBoostEstimator, XGBoostSklearnEstimator, RandomForestEstimator,
+    XGBoostEstimator,
-    LGBMEstimator, LRL1Classifier, LRL2Classifier, CatBoostEstimator,
+    XGBoostSklearnEstimator,
-    ExtraTreeEstimator, KNeighborsEstimator, FBProphet, ARIMA, SARIMAX)
+    RandomForestEstimator,
    LGBMEstimator,
    LRL1Classifier,
    LRL2Classifier,
    CatBoostEstimator,
    ExtraTreeEstimator,
    KNeighborsEstimator,
    Prophet,
    ARIMA,
    SARIMAX,
 )
 from .data import group_counts
 import logging
 logger = logging.getLogger(__name__)
 def get_estimator_class(task, estimator_name):
-    ''' when adding a new learner, need to add an elif branch '''
+    """when adding a new learner, need to add an elif branch"""
-    if 'xgboost' == estimator_name:
+    if "xgboost" == estimator_name:
-        if 'regression' == task:
+        if "regression" == task:
            estimator_class = XGBoostEstimator
        else:
            estimator_class = XGBoostSklearnEstimator
-    elif 'rf' == estimator_name:
+    elif "rf" == estimator_name:
        estimator_class = RandomForestEstimator
-    elif 'lgbm' == estimator_name:
+    elif "lgbm" == estimator_name:
        estimator_class = LGBMEstimator
-    elif 'lrl1' == estimator_name:
+    elif "lrl1" == estimator_name:
        estimator_class = LRL1Classifier
-    elif 'lrl2' == estimator_name:
+    elif "lrl2" == estimator_name:
        estimator_class = LRL2Classifier
-    elif 'catboost' == estimator_name:
+    elif "catboost" == estimator_name:
        estimator_class = CatBoostEstimator
-    elif 'extra_tree' == estimator_name:
+    elif "extra_tree" == estimator_name:
        estimator_class = ExtraTreeEstimator
-    elif 'kneighbor' == estimator_name:
+    elif "kneighbor" == estimator_name:
        estimator_class = KNeighborsEstimator
-    elif 'prophet' in estimator_name:
+    elif "prophet" in estimator_name:
-        estimator_class = FBProphet
+        estimator_class = Prophet
-    elif estimator_name == 'arima':
+    elif estimator_name == "arima":
        estimator_class = ARIMA
-    elif estimator_name == 'sarimax':
+    elif estimator_name == "sarimax":
        estimator_class = SARIMAX
    else:
        raise ValueError(
-            estimator_name + ' is not a built-in learner. '
+            estimator_name + " is not a built-in learner. "
-            'Please use AutoML.add_learner() to add a customized learner.')
+            "Please use AutoML.add_learner() to add a customized learner."
        )
    return estimator_class
 def sklearn_metric_loss_score(
-    metric_name, y_predict, y_true, labels=None, sample_weight=None,
+    metric_name,
    y_predict,
    y_true,
    labels=None,
    sample_weight=None,
    groups=None,
 ):
-    '''Loss using the specified metric
+    """Loss using the specified metric
    Args:
        metric_name: A string of the metric name, one of
@ -76,60 +101,63 @@ def sklearn_metric_loss_score(
    Returns:
        score: A float number of the loss, the lower the better.
-    '''
+    """
    metric_name = metric_name.lower()
-    if 'r2' == metric_name:
+    if "r2" == metric_name:
        score = 1.0 - r2_score(y_true, y_predict, sample_weight=sample_weight)
-    elif metric_name == 'rmse':
+    elif metric_name == "rmse":
-        score = np.sqrt(mean_squared_error(
+        score = np.sqrt(
-            y_true, y_predict, sample_weight=sample_weight))
+            mean_squared_error(y_true, y_predict, sample_weight=sample_weight)
-    elif metric_name == 'mae':
+        )
-        score = mean_absolute_error(
+    elif metric_name == "mae":
-            y_true, y_predict, sample_weight=sample_weight)
+        score = mean_absolute_error(y_true, y_predict, sample_weight=sample_weight)
-    elif metric_name == 'mse':
+    elif metric_name == "mse":
-        score = mean_squared_error(
+        score = mean_squared_error(y_true, y_predict, sample_weight=sample_weight)
-            y_true, y_predict, sample_weight=sample_weight)
+    elif metric_name == "accuracy":
-    elif metric_name == 'accuracy':
+        score = 1.0 - accuracy_score(y_true, y_predict, sample_weight=sample_weight)
-        score = 1.0 - accuracy_score(
+    elif metric_name == "roc_auc":
-            y_true, y_predict, sample_weight=sample_weight)
+        score = 1.0 - roc_auc_score(y_true, y_predict, sample_weight=sample_weight)
-    elif metric_name == 'roc_auc':
+    elif metric_name == "roc_auc_ovr":
        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight)
+            y_true, y_predict, sample_weight=sample_weight, multi_class="ovr"
-    elif metric_name == 'roc_auc_ovr':
+        )
    elif metric_name == "roc_auc_ovo":
        score = 1.0 - roc_auc_score(
-            y_true, y_predict, sample_weight=sample_weight, multi_class='ovr')
+            y_true, y_predict, sample_weight=sample_weight, multi_class="ovo"
-    elif metric_name == 'roc_auc_ovo':
+        )
-        score = 1.0 - roc_auc_score(
+    elif "log_loss" == metric_name:
-            y_true, y_predict, sample_weight=sample_weight, multi_class='ovo')
+        score = log_loss(y_true, y_predict, labels=labels, sample_weight=sample_weight)
-    elif 'log_loss' == metric_name:
+    elif "mape" == metric_name:
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
    elif 'mape' == metric_name:
        try:
-            score = mean_absolute_percentage_error(
+            score = mean_absolute_percentage_error(y_true, y_predict)
                y_true, y_predict)
        except ValueError:
            return np.inf
-    elif 'micro_f1' == metric_name:
+    elif "micro_f1" == metric_name:
        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average='micro')
+            y_true, y_predict, sample_weight=sample_weight, average="micro"
-    elif 'macro_f1' == metric_name:
+        )
    elif "macro_f1" == metric_name:
        score = 1 - f1_score(
-            y_true, y_predict, sample_weight=sample_weight, average='macro')
+            y_true, y_predict, sample_weight=sample_weight, average="macro"
-    elif 'f1' == metric_name:
+        )
    elif "f1" == metric_name:
        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
-    elif 'ap' == metric_name:
+    elif "ap" == metric_name:
        score = 1 - average_precision_score(
-            y_true, y_predict, sample_weight=sample_weight)
+            y_true, y_predict, sample_weight=sample_weight
-    elif 'ndcg' in metric_name:
+        )
-        if '@' in metric_name:
+    elif "ndcg" in metric_name:
-            k = int(metric_name.split('@', 1)[-1])
+        if "@" in metric_name:
            k = int(metric_name.split("@", 1)[-1])
            counts = group_counts(groups)
            score = 0
            psum = 0
            for c in counts:
-                score -= ndcg_score(np.asarray([y_true[psum:psum + c]]),
+                score -= ndcg_score(
-                                    np.asarray([y_predict[psum:psum + c]]), k=k)
+                    np.asarray([y_true[psum : psum + c]]),
                    np.asarray([y_predict[psum : psum + c]]),
                    k=k,
                )
                psum += c
            score /= len(counts)
            score += 1
@ -137,56 +165,96 @@ def sklearn_metric_loss_score(
            score = 1 - ndcg_score([y_true], [y_predict])
    else:
        raise ValueError(
-            metric_name + ' is not a built-in metric, '
+            metric_name + " is not a built-in metric, "
-            'currently built-in metrics are: '
+            "currently built-in metrics are: "
-            'r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,'
+            "r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,"
-            'log_loss, mape, f1, micro_f1, macro_f1, ap. '
+            "log_loss, mape, f1, micro_f1, macro_f1, ap. "
-            'please pass a customized metric function to AutoML.fit(metric=func)')
+            "please pass a customized metric function to AutoML.fit(metric=func)"
        )
    return score
 def get_y_pred(estimator, X, eval_metric, obj):
-    if eval_metric in ['roc_auc', 'ap'] and 'binary' in obj:
+    if eval_metric in ["roc_auc", "ap"] and "binary" in obj:
        y_pred_classes = estimator.predict_proba(X)
-        y_pred = y_pred_classes[
+        y_pred = y_pred_classes[:, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-            :, 1] if y_pred_classes.ndim > 1 else y_pred_classes
+    elif eval_metric in ["log_loss", "roc_auc", "roc_auc_ovr", "roc_auc_ovo"]:
    elif eval_metric in ['log_loss', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']:
        y_pred = estimator.predict_proba(X)
    else:
        y_pred = estimator.predict(X)
    return y_pred
-def _eval_estimator(config, estimator, X_train, y_train, X_test, y_test, weight_test,
+def _eval_estimator(
-                    groups_test, eval_metric, obj, labels=None,
+    config,
-                    log_training_metric=False, fit_kwargs={}):
+    estimator,
    X_train,
    y_train,
    X_test,
    y_test,
    weight_test,
    groups_test,
    eval_metric,
    obj,
    labels=None,
    log_training_metric=False,
    fit_kwargs={},
 ):
    if isinstance(eval_metric, str):
        pred_start = time.time()
        test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
        pred_time = (time.time() - pred_start) / X_test.shape[0]
-        test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
+        test_loss = sklearn_metric_loss_score(
-                                              labels, weight_test, groups_test)
+            eval_metric, test_pred_y, y_test, labels, weight_test, groups_test
        )
        metric_for_logging = {}
        if log_training_metric:
            train_pred_y = get_y_pred(estimator, X_train, eval_metric, obj)
-            metric_for_logging['train_loss'] = sklearn_metric_loss_score(
+            metric_for_logging["train_loss"] = sklearn_metric_loss_score(
-                eval_metric, train_pred_y, y_train, labels,
+                eval_metric,
-                fit_kwargs.get('sample_weight'), fit_kwargs.get('groups'))
+                train_pred_y,
                y_train,
                labels,
                fit_kwargs.get("sample_weight"),
                fit_kwargs.get("groups"),
            )
    else:  # customized metric function
        test_loss, metric_for_logging = eval_metric(
-            X_test, y_test, estimator, labels, X_train, y_train, weight_test,
+            X_test,
-            fit_kwargs.get('sample_weight'), config, groups_test,
+            y_test,
-            fit_kwargs.get('groups'))
+            estimator,
            labels,
            X_train,
            y_train,
            weight_test,
            fit_kwargs.get("sample_weight"),
            config,
            groups_test,
            fit_kwargs.get("groups"),
        )
        if isinstance(metric_for_logging, dict):
-            pred_time = metric_for_logging.get('pred_time', 0)
+            pred_time = metric_for_logging.get("pred_time", 0)
        test_pred_y = None
        # eval_metric may return test_pred_y but not necessarily. Setting None for now.
    return test_loss, metric_for_logging, pred_time, test_pred_y
-def get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_test,
+def get_test_loss(
-                  groups_test, eval_metric, obj, labels=None, budget=None,
+    config,
-                  log_training_metric=False, fit_kwargs={}):
+    estimator,
    X_train,
    y_train,
    X_test,
    y_test,
    weight_test,
    groups_test,
    eval_metric,
    obj,
    labels=None,
    budget=None,
    log_training_metric=False,
    fit_kwargs={},
 ):
    start = time.time()
    # if groups_test is not None:
@ -195,16 +263,37 @@ def get_test_loss(config, estimator, X_train, y_train, X_test, y_test, weight_te
    #     fit_kwargs['y_val'] = y_test
    estimator.fit(X_train, y_train, budget, **fit_kwargs)
    test_loss, metric_for_logging, pred_time, _ = _eval_estimator(
-        config, estimator, X_train, y_train, X_test, y_test,
+        config,
-        weight_test, groups_test, eval_metric, obj,
+        estimator,
-        labels, log_training_metric, fit_kwargs)
+        X_train,
        y_train,
        X_test,
        y_test,
        weight_test,
        groups_test,
        eval_metric,
        obj,
        labels,
        log_training_metric,
        fit_kwargs,
    )
    train_time = time.time() - start
    return test_loss, metric_for_logging, train_time, pred_time
-def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
+def evaluate_model_CV(
-                      task, eval_metric, best_val_loss,
+    config,
-                      log_training_metric=False, fit_kwargs={}):
+    estimator,
    X_train_all,
    y_train_all,
    budget,
    kf,
    task,
    eval_metric,
    best_val_loss,
    log_training_metric=False,
    fit_kwargs={},
 ):
    start_time = time.time()
    total_val_loss = 0
    total_metric = None
@ -213,7 +302,7 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
    valid_fold_num = total_fold_num = 0
    n = kf.get_n_splits()
    X_train_split, y_train_split = X_train_all, y_train_all
-    if task in ('binary', 'multi'):
+    if task in ("binary", "multi"):
        labels = np.unique(y_train_all)
    else:
        labels = None
@ -225,8 +314,8 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
        groups = kf.groups
        kf = kf.split(X_train_split, y_train_split, groups)
        shuffle = False
-    elif isinstance(kf, TimeSeriesSplit) and task == 'forecast':
+    elif isinstance(kf, TimeSeriesSplit) and task == "forecast":
-        y_train_all = pd.DataFrame(y_train_all, columns=['y'])
+        y_train_all = pd.DataFrame(y_train_all, columns=["y"])
        train = X_train_all.join(y_train_all)
        kf = kf.split(train)
        shuffle = False
@ -237,8 +326,8 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
    rng = np.random.RandomState(2020)
    val_loss_list = []
    budget_per_train = budget / n
-    if 'sample_weight' in fit_kwargs:
+    if "sample_weight" in fit_kwargs:
-        weight = fit_kwargs['sample_weight']
+        weight = fit_kwargs["sample_weight"]
        weight_val = None
    else:
        weight = weight_val = None
@ -246,37 +335,48 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
        if shuffle:
            train_index = rng.permutation(train_index)
        if isinstance(X_train_all, pd.DataFrame):
-            X_train, X_val = X_train_split.iloc[
+            X_train = X_train_split.iloc[train_index]
-                train_index], X_train_split.iloc[val_index]
+            X_val = X_train_split.iloc[val_index]
        else:
-            X_train, X_val = X_train_split[
+            X_train, X_val = X_train_split[train_index], X_train_split[val_index]
                train_index], X_train_split[val_index]
        y_train, y_val = y_train_split[train_index], y_train_split[val_index]
        estimator.cleanup()
        if weight is not None:
-            fit_kwargs['sample_weight'], weight_val = weight[
+            fit_kwargs["sample_weight"], weight_val = (
-                train_index], weight[val_index]
+                weight[train_index],
                weight[val_index],
            )
        if groups is not None:
-            fit_kwargs['groups'] = groups[train_index]
+            fit_kwargs["groups"] = groups[train_index]
            groups_val = groups[val_index]
        else:
            groups_val = None
        val_loss_i, metric_i, train_time_i, pred_time_i = get_test_loss(
-            config, estimator, X_train, y_train, X_val, y_val, weight_val,
+            config,
-            groups_val, eval_metric, task, labels, budget_per_train,
+            estimator,
-            log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
+            X_train,
            y_train,
            X_val,
            y_val,
            weight_val,
            groups_val,
            eval_metric,
            task,
            labels,
            budget_per_train,
            log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs,
        )
        if weight is not None:
-            fit_kwargs['sample_weight'] = weight
+            fit_kwargs["sample_weight"] = weight
        valid_fold_num += 1
        total_fold_num += 1
        total_val_loss += val_loss_i
        if log_training_metric or not isinstance(eval_metric, str):
            if isinstance(total_metric, list):
-                total_metric = [
+                total_metric = [total_metric[i] + v for i, v in enumerate(metric_i)]
                    total_metric[i] + v for i, v in enumerate(metric_i)]
            elif isinstance(total_metric, dict):
-                total_metric = {
+                total_metric = {k: total_metric[k] + v for k, v in metric_i.items()}
                    k: total_metric[k] + v for k, v in metric_i.items()}
            elif total_metric is not None:
                total_metric += metric_i
            else:
@ -307,35 +407,73 @@ def evaluate_model_CV(config, estimator, X_train_all, y_train_all, budget, kf,
 def compute_estimator(
-    X_train, y_train, X_val, y_val, weight_val, groups_val, budget, kf,
+    X_train,
-    config_dic, task, estimator_name, eval_method, eval_metric,
+    y_train,
-    best_val_loss=np.Inf, n_jobs=1, estimator_class=None, log_training_metric=False,
+    X_val,
-    fit_kwargs={}
+    y_val,
    weight_val,
    groups_val,
    budget,
    kf,
    config_dic,
    task,
    estimator_name,
    eval_method,
    eval_metric,
    best_val_loss=np.Inf,
    n_jobs=1,
    estimator_class=None,
    log_training_metric=False,
    fit_kwargs={},
 ):
-    estimator_class = estimator_class or get_estimator_class(
+    estimator_class = estimator_class or get_estimator_class(task, estimator_name)
-        task, estimator_name)
+    estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
-    estimator = estimator_class(
+    if "holdout" in eval_method:
        **config_dic, task=task, n_jobs=n_jobs)
    if 'holdout' in eval_method:
        val_loss, metric_for_logging, train_time, pred_time = get_test_loss(
-            config_dic, estimator, X_train, y_train, X_val, y_val, weight_val,
+            config_dic,
-            groups_val, eval_metric, task, budget=budget,
+            estimator,
-            log_training_metric=log_training_metric, fit_kwargs=fit_kwargs)
+            X_train,
            y_train,
            X_val,
            y_val,
            weight_val,
            groups_val,
            eval_metric,
            task,
            budget=budget,
            log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs,
        )
    else:
        val_loss, metric_for_logging, train_time, pred_time = evaluate_model_CV(
-            config_dic, estimator, X_train, y_train, budget, kf, task,
+            config_dic,
-            eval_metric, best_val_loss, log_training_metric=log_training_metric,
+            estimator,
-            fit_kwargs=fit_kwargs)
+            X_train,
            y_train,
            budget,
            kf,
            task,
            eval_metric,
            best_val_loss,
            log_training_metric=log_training_metric,
            fit_kwargs=fit_kwargs,
        )
    return estimator, val_loss, metric_for_logging, train_time, pred_time
 def train_estimator(
-    X_train, y_train, config_dic, task,
+    X_train,
-    estimator_name, n_jobs=1, estimator_class=None, budget=None, fit_kwargs={}
+    y_train,
    config_dic,
    task,
    estimator_name,
    n_jobs=1,
    estimator_class=None,
    budget=None,
    fit_kwargs={},
 ):
    start_time = time.time()
-    estimator_class = estimator_class or get_estimator_class(
+    estimator_class = estimator_class or get_estimator_class(task, estimator_name)
        task, estimator_name)
    estimator = estimator_class(**config_dic, task=task, n_jobs=n_jobs)
    if X_train is not None:
        train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
@ -347,14 +485,14 @@ def train_estimator(
 def get_classification_objective(num_labels: int) -> str:
    if num_labels == 2:
-        objective_name = 'binary'
+        objective_name = "binary"
    else:
-        objective_name = 'multi'
+        objective_name = "multi"
    return objective_name
 def norm_confusion_matrix(y_true, y_pred):
-    '''normalized confusion matrix
+    """normalized confusion matrix
    Args:
        estimator: A multi-class classification estimator
@ -363,15 +501,16 @@ def norm_confusion_matrix(y_true, y_pred):
    Returns:
        A normalized confusion matrix
-    '''
+    """
    from sklearn.metrics import confusion_matrix
    conf_mat = confusion_matrix(y_true, y_pred)
-    norm_conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis]
+    norm_conf_mat = conf_mat.astype("float") / conf_mat.sum(axis=1)[:, np.newaxis]
    return norm_conf_mat
 def multi_class_curves(y_true, y_pred_proba, curve_func):
-    '''Binarize the data for multi-class tasks and produce ROC or precision-recall curves
+    """Binarize the data for multi-class tasks and produce ROC or precision-recall curves
    Args:
        y_true: A numpy array or a pandas series of true labels
@ -384,8 +523,9 @@ def multi_class_curves(y_true, y_pred_proba, curve_func):
            curve_x[0] is an 1D array of the x coordinates of class 0
        The second dictionary curve_y stores the y coordinates of each curve, e.g.,
            curve_y[0] is an 1D array of the y coordinates of class 0
-    '''
+    """
    from sklearn.preprocessing import label_binarize
    classes = np.unique(y_true)
    y_true_binary = label_binarize(y_true, classes=classes)
--- a/flaml/model.py
+++ b/flaml/model.py
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.6.3"
+__version__ = "0.6.4"
--- a/setup.py
+++ b/setup.py
@ -32,7 +32,7 @@ setuptools.setup(
    long_description=long_description,
    long_description_content_type="text/markdown",
    url="https://github.com/microsoft/FLAML",
-    packages=setuptools.find_packages(),
+    packages=setuptools.find_packages(include=["flaml*"]),
    install_requires=install_requires,
    extras_require={
        "notebook": [
--- a/test/test_forecast.py
+++ b/test/test_forecast.py
@ -30,9 +30,11 @@ def test_forecast_automl(budget=5):
    }
    """The main flaml automl API"""
    try:
        import prophet
        automl.fit(dataframe=df, **settings, period=time_horizon)
    except ImportError:
-        print("not using FBProphet due to ImportError")
+        print("not using prophet due to ImportError")
        automl.fit(
            dataframe=df,
            **settings,
@ -79,7 +81,7 @@ def test_forecast_automl(budget=5):
    try:
        automl.fit(X_train=X_train, y_train=y_train, **settings, period=time_horizon)
    except ImportError:
-        print("not using FBProphet due to ImportError")
+        print("not using prophet due to ImportError")
        automl.fit(
            X_train=X_train,
            y_train=y_train,
@ -94,6 +96,8 @@ def test_numpy():
    y_train = np.random.random(size=72)
    automl = AutoML()
    try:
        import prophet
        automl.fit(
            X_train=X_train[:60],  # a single column of timestamp
            y_train=y_train,  # value for each timestamp
@ -105,9 +109,9 @@ def test_numpy():
        print(automl.predict(X_train[60:]))
        print(automl.predict(12))
    except ValueError:
-        print("ValueError for FBProphet is raised as expected.")
+        print("ValueError for prophet is raised as expected.")
    except ImportError:
-        print("not using FBProphet due to ImportError")
+        print("not using prophet due to ImportError")
        automl = AutoML()
        automl.fit(
            X_train=X_train[:72],  # a single column of timestamp