diff --git a/flaml/automl.py b/flaml/automl.py
index 782af015b..e682bfb5c 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -162,7 +162,6 @@ class AutoMLState:
     def _compute_with_config_base(self,
                                   estimator,
                                   config_w_resource):
-        compute_start_time = time.time()
         if 'FLAML_sample_size' in config_w_resource:
             sample_size = int(config_w_resource['FLAML_sample_size'])
         else:
@@ -181,14 +180,14 @@ class AutoMLState:
         budget = time_left if sample_size == self.data_size else \
             time_left / 2 * sample_size / self.data_size
 
-        trained_estimator, val_loss, train_loss, time2eval, _ = \
+        trained_estimator, val_loss, train_loss, time2eval, pred_time = \
             compute_estimator(
                 sampled_X_train,
                 sampled_y_train,
                 self.X_val,
                 self.y_val,
                 self.weight_val,
-                budget,
+                min(budget, self.train_time_limit),
                 self.kf,
                 config,
                 self.task,
@@ -201,7 +200,7 @@ class AutoMLState:
                 self.log_training_metric,
                 self.fit_kwargs)
         result = {
-            'total_time': time.time() - compute_start_time,
+            'pred_time': pred_time,
             'time2eval': time2eval,
             'train_loss': train_loss,
             'val_loss': val_loss,
@@ -799,6 +798,8 @@ class AutoML:
             n_splits=N_SPLITS,
             log_training_metric=False,
             mem_thres=MEM_THRES,
+            pred_time_limit=np.inf,
+            train_time_limit=np.inf,
             X_val=None,
             y_val=None,
             sample_weight_val=None,
@@ -813,7 +814,7 @@ class AutoML:
 
         Args:
             X_train: A numpy array or a pandas dataframe of training data in
-             shape (n, m)
+                shape (n, m)
             y_train: A numpy array or a pandas series of labels in shape (n,)
             dataframe: A dataframe of training data including label column
             label: A str of the label column name
@@ -835,7 +836,7 @@ class AutoML:
                         return metric_to_minimize, metrics_to_log
 
                 which returns a float number as the minimization objective,
-                and a tuple of floats as the metrics to log
+                and a tuple of floats or a dictionary as the metrics to log
             task: A string of the task type, e.g.,
                 'classification', 'regression'
             n_jobs: An integer of the number of threads for training
@@ -865,6 +866,8 @@ class AutoML:
             log_training_metric: A boolean of whether to log the training
                 metric for each model.
             mem_thres: A float of the memory size constraint in bytes
+            pred_time_limit: A float of the prediction latency constraint in seconds
+            train_time_limit: A float of the training time constraint in seconds
             X_val: None or a numpy array or a pandas dataframe of validation data
             y_val: None or a numpy array or a pandas series of validation labels
             sample_weight_val: None or a numpy array of the sample weight of
@@ -955,6 +958,8 @@ class AutoML:
             self._ensemble = ensemble
             self._max_iter = max_iter
             self._mem_thres = mem_thres
+            self._pred_time_limit = pred_time_limit
+            self._state.train_time_limit = train_time_limit
             self._log_type = log_type
             self.split_ratio = split_ratio
             self._save_model_history = model_history
@@ -1047,6 +1052,10 @@ class AutoML:
                     points_to_evaluate = [search_state.init_config]
                     low_cost_partial_config = search_state.low_cost_partial_config
                 if self._hpo_method in ('bs', 'cfo', 'grid'):
+                    metric_constraints = []
+                    if np.isfinite(self._pred_time_limit):
+                        metric_constraints.append(
+                            ('pred_time', '<=', self._pred_time_limit))
                     algo = SearchAlgo(
                         metric='val_loss', mode='min', space=search_space,
                         points_to_evaluate=points_to_evaluate,
@@ -1055,7 +1064,10 @@ class AutoML:
                         prune_attr=prune_attr,
                         min_resource=min_resource,
                         max_resource=max_resource,
-                        config_constraints=[(learner_class.size, '<=', self._mem_thres)]
+                        config_constraints=[
+                            (learner_class.size, '<=', self._mem_thres)
+                        ],
+                        metric_constraints=metric_constraints,
                     )
                 else:
                     algo = SearchAlgo(
@@ -1077,7 +1089,7 @@ class AutoML:
             analysis = tune.run(
                 search_state.training_function,
                 search_alg=search_state.search_alg,
-                time_budget_s=budget_left,
+                time_budget_s=min(budget_left, self._state.train_time_limit),
                 verbose=max(self.verbose - 1, 0),
                 use_ray=False)
             time_used = time.time() - start_run_time
diff --git a/flaml/ml.py b/flaml/ml.py
index c9606e62d..8847f407c 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -4,6 +4,7 @@
 '''
 
 import time
+from joblib.externals.cloudpickle.cloudpickle import instance
 import numpy as np
 import pandas as pd
 from sklearn.metrics import mean_squared_error, r2_score, roc_auc_score, \
@@ -127,7 +128,9 @@ def get_test_loss(
     start = time.time()
     train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs)
     if isinstance(eval_metric, str):
+        pred_start = time.time()
         test_pred_y = get_y_pred(estimator, X_test, eval_metric, obj)
+        pred_time = (time.time() - pred_start) / X_test.shape[0]
         test_loss = sklearn_metric_loss_score(eval_metric, test_pred_y, y_test,
                                               labels, weight_test)
         if train_loss is not False:
@@ -136,11 +139,14 @@ def get_test_loss(
                 eval_metric, test_pred_y,
                 y_train, labels, fit_kwargs.get('sample_weight'))
     else:  # customized metric function
-        test_loss, train_loss = eval_metric(
+        test_loss, metrics = eval_metric(
             X_test, y_test, estimator, labels, X_train, y_train,
             weight_test, fit_kwargs.get('sample_weight'))
+        if isinstance(metrics, dict):
+            pred_time = metrics.get('pred_time', 0)
+        train_loss = metrics
     train_time = time.time() - start
-    return test_loss, train_time, train_loss
+    return test_loss, train_time, train_loss, pred_time
 
 
 def train_model(estimator, X_train, y_train, budget, fit_kwargs={}):
@@ -154,27 +160,27 @@ def evaluate_model(
     fit_kwargs={}
 ):
     if 'holdout' in eval_method:
-        val_loss, train_loss, train_time = evaluate_model_holdout(
+        val_loss, train_loss, train_time, pred_time = evaluate_model_holdout(
             estimator, X_train, y_train, X_val, y_val, weight_val, budget,
-            task, eval_metric, best_val_loss, train_loss=train_loss,
+            task, eval_metric, train_loss=train_loss,
             fit_kwargs=fit_kwargs)
     else:
-        val_loss, train_loss, train_time = evaluate_model_CV(
+        val_loss, train_loss, train_time, pred_time = evaluate_model_CV(
             estimator, X_train, y_train, budget, kf, task,
             eval_metric, best_val_loss, train_loss=train_loss,
             fit_kwargs=fit_kwargs)
-    return val_loss, train_loss, train_time
+    return val_loss, train_loss, train_time, pred_time
 
 
 def evaluate_model_holdout(
     estimator, X_train, y_train, X_val, y_val,
-    weight_val, budget, task, eval_metric, best_val_loss, train_loss=False,
+    weight_val, budget, task, eval_metric, train_loss=False,
     fit_kwargs={}
 ):
-    val_loss, train_time, train_loss = get_test_loss(
+    val_loss, train_time, train_loss, pred_time = get_test_loss(
         estimator, X_train, y_train, X_val, y_val, weight_val, eval_metric,
         task, budget=budget, train_loss=train_loss, fit_kwargs=fit_kwargs)
-    return val_loss, train_loss, train_time
+    return val_loss, train_loss, train_time, pred_time
 
 
 def evaluate_model_CV(
@@ -182,9 +188,10 @@ def evaluate_model_CV(
     task, eval_metric, best_val_loss, train_loss=False, fit_kwargs={}
 ):
     start_time = time.time()
-    total_val_loss = total_train_loss = 0
-    train_time = 0
-    valid_fold_num = 0
+    total_val_loss = 0
+    total_train_loss = None
+    train_time = pred_time = 0
+    valid_fold_num = total_fold_num = 0
     n = kf.get_n_splits()
     X_train_split, y_train_split = X_train_all, y_train_all
     if task == 'regression':
@@ -224,23 +231,28 @@ def evaluate_model_CV(
         if weight is not None:
             fit_kwargs['sample_weight'], weight_val = weight[
                 train_index], weight[val_index]
-        val_loss_i, train_time_i, train_loss_i = get_test_loss(
+        val_loss_i, train_time_i, train_loss_i, pred_time_i = get_test_loss(
             estimator, X_train, y_train, X_val, y_val, weight_val,
             eval_metric, task, labels, budget_per_train,
             train_loss=train_loss, fit_kwargs=fit_kwargs)
         if weight is not None:
             fit_kwargs['sample_weight'] = weight
         valid_fold_num += 1
+        total_fold_num += 1
         total_val_loss += val_loss_i
         if train_loss is not False:
             if isinstance(total_train_loss, list):
                 total_train_loss = [
                     total_train_loss[i] + v for i, v in enumerate(train_loss_i)]
-            elif total_train_loss != 0:
+            elif isinstance(total_train_loss, dict):
+                total_train_loss = {
+                    k: total_train_loss[k] + v for k, v in train_loss_i.items()}
+            elif total_train_loss is not None:
                 total_train_loss += train_loss_i
             else:
                 total_train_loss = train_loss_i
         train_time += train_time_i
+        pred_time += pred_time_i
         if valid_fold_num == n:
             val_loss_list.append(total_val_loss / valid_fold_num)
             total_val_loss = valid_fold_num = 0
@@ -248,16 +260,20 @@ def evaluate_model_CV(
             val_loss_list.append(total_val_loss / valid_fold_num)
             break
     val_loss = np.max(val_loss_list)
+    n = total_fold_num
     if train_loss is not False:
         if isinstance(total_train_loss, list):
             train_loss = [v / n for v in total_train_loss]
+        elif isinstance(total_train_loss, dict):
+            train_loss = {k: v / n for k, v in total_train_loss.items()}
         else:
             train_loss = total_train_loss / n
+    pred_time /= n
     budget -= time.time() - start_time
     if val_loss < best_val_loss and budget > budget_per_train:
         estimator.cleanup()
         estimator.fit(X_train_all, y_train_all, budget, **fit_kwargs)
-    return val_loss, train_loss, train_time
+    return val_loss, train_loss, train_time, pred_time
 
 
 def compute_estimator(
@@ -266,17 +282,15 @@ def compute_estimator(
     best_val_loss=np.Inf, n_jobs=1, estimator_class=None, train_loss=False,
     fit_kwargs={}
 ):
-    start_time = time.time()
     estimator_class = estimator_class or get_estimator_class(
         task, estimator_name)
     estimator = estimator_class(
         **config_dic, task=task, n_jobs=n_jobs)
-    val_loss, train_loss, train_time = evaluate_model(
+    val_loss, train_loss, train_time, pred_time = evaluate_model(
         estimator, X_train, y_train, X_val, y_val, weight_val, budget, kf, task,
         eval_method, eval_metric, best_val_loss, train_loss=train_loss,
         fit_kwargs=fit_kwargs)
-    all_time = time.time() - start_time
-    return estimator, val_loss, train_loss, train_time, all_time
+    return estimator, val_loss, train_loss, train_time, pred_time
 
 
 def train_estimator(
diff --git a/flaml/model.py b/flaml/model.py
index 1363816cc..2ba3dae89 100644
--- a/flaml/model.py
+++ b/flaml/model.py
@@ -42,6 +42,7 @@ class BaseEstimator:
         self._task = task
         if '_estimator_type' in params:
             self._estimator_type = params['_estimator_type']
+            del self.params['_estimator_type']
         else:
             self._estimator_type = "regressor" if task == 'regression' \
                 else "classifier"
@@ -152,7 +153,7 @@ class BaseEstimator:
         return {}
 
     @classmethod
-    def size(cls, config):
+    def size(cls, config: dict) -> float:
         '''[optional method] memory size of the estimator in bytes
 
         Args:
@@ -165,7 +166,7 @@ class BaseEstimator:
         return 1.0
 
     @classmethod
-    def cost_relative2lgbm(cls):
+    def cost_relative2lgbm(cls) -> float:
         '''[optional method] relative cost compared to lightgbm'''
         return 1.0
 
@@ -445,7 +446,8 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator):
         **params
     ):
         super().__init__(task, **params)
-        self.params = params
+        del self.params['objective']
+        del self.params['max_bin']
         self.params.update({
             "n_estimators": int(round(n_estimators)),
             'max_leaves': int(round(max_leaves)),
@@ -514,7 +516,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
         n_estimators=4, max_features=1.0, criterion='gini', **params
     ):
         super().__init__(task, **params)
-        self.params = params
+        del self.params['objective']
+        del self.params['max_bin']
         self.params.update({
             "n_estimators": int(round(n_estimators)),
             "n_jobs": n_jobs,
@@ -525,8 +528,6 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator):
         else:
             self.estimator_class = RandomForestClassifier
             self.params['criterion'] = criterion
-        self._time_per_iter = None
-        self._train_size = 0
 
     def get_params(self, deep=False):
         params = super().get_params()
@@ -761,7 +762,6 @@ class KNeighborsEstimator(BaseEstimator):
         self, task='binary:logistic', n_jobs=1, n_neighbors=5, **params
     ):
         super().__init__(task, **params)
-        self.params = params
         self.params.update({
             'n_neighbors': int(round(n_neighbors)),
             'weights': params.get('weights', 'distance'),
diff --git a/flaml/version.py b/flaml/version.py
index a779a4426..1cc82e6b8 100644
--- a/flaml/version.py
+++ b/flaml/version.py
@@ -1 +1 @@
-__version__ = "0.5.6"
+__version__ = "0.5.7"
diff --git a/test/test_automl.py b/test/test_automl.py
index e8b7fece6..3bc977838 100644
--- a/test/test_automl.py
+++ b/test/test_automl.py
@@ -95,14 +95,19 @@ class MyXGB2(XGBoostEstimator):
 def custom_metric(X_test, y_test, estimator, labels, X_train, y_train,
                   weight_test=None, weight_train=None):
     from sklearn.metrics import log_loss
+    import time
+    start = time.time()
     y_pred = estimator.predict_proba(X_test)
+    pred_time = (time.time() - start) / len(X_test)
     test_loss = log_loss(y_test, y_pred, labels=labels,
                          sample_weight=weight_test)
     y_pred = estimator.predict_proba(X_train)
     train_loss = log_loss(y_train, y_pred, labels=labels,
                           sample_weight=weight_train)
     alpha = 0.5
-    return test_loss * (1 + alpha) - alpha * train_loss, [test_loss, train_loss]
+    return test_loss * (1 + alpha) - alpha * train_loss, {
+        "test_loss": test_loss, "train_loss": train_loss, "pred_time": pred_time
+    }
 
 
 class TestAutoML(unittest.TestCase):
@@ -133,8 +138,8 @@ class TestAutoML(unittest.TestCase):
                            learner_class=MyRegularizedGreedyForest)
         X_train, y_train = load_wine(return_X_y=True)
         settings = {
-            "time_budget": 10,  # total running time in seconds
-            "estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
+            "time_budget": 5,  # total running time in seconds
+            "estimator_list": ['rf', 'xgboost', 'catboost'],
             "task": 'classification',  # task type
             "sample": True,  # whether to subsample training data
             "log_file_name": "test/wine.log",
@@ -163,6 +168,7 @@ class TestAutoML(unittest.TestCase):
             "n_jobs": 1,
             "model_history": True,
             "sample_weight": np.ones(len(y_train)),
+            "pred_time_limit": 1e-5,
         }
         automl_experiment.fit(X_train=X_train, y_train=y_train,
                               **automl_settings)
diff --git a/test/test_training_log.py b/test/test_training_log.py
index 61dc9c87c..c07eea1f6 100644
--- a/test/test_training_log.py
+++ b/test/test_training_log.py
@@ -26,7 +26,8 @@ class TestTrainingLog(unittest.TestCase):
                 "mem_thres": 1024 * 1024,
                 "n_jobs": 1,
                 "model_history": True,
-                "verbose": 2,
+                "train_time_limit": 0.01,
+                "verbose": 3,
             }
             X_train, y_train = load_boston(return_X_y=True)
             automl_experiment.fit(X_train=X_train, y_train=y_train,