Merge branch 'microsoft:main' into main

2025-12-26 14:38:50 +00:00 · 2022-11-07 01:34:15 +05:30 · 2022-11-07 01:34:15 +05:30 · 289f4d61d3
commit 289f4d61d3
parent f1d4adf4ff 1ed342617e
33 changed files with 1064 additions and 100 deletions
--- a/.github/workflows/deploy-website.yml
+++ b/.github/workflows/deploy-website.yml
@ -22,7 +22,7 @@ jobs:
          # cache: yarn
          # cache-dependency-path: '**/yarn.lock'
      - name: setup python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: pydoc-markdown install
@ -58,7 +58,7 @@ jobs:
          # cache: yarn
          # cache-dependency-path: '**/yarn.lock'
      - name: setup python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: "3.8"
      - name: pydoc-markdown install
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@ -21,7 +21,7 @@ jobs:
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v4
        with:
          python-version: ${{ matrix.python-version }}
      - name: On mac + python 3.10, install libomp to facilitate lgbm and xgboost install
@ -77,7 +77,7 @@ jobs:
          coverage xml
      - name: Upload coverage to Codecov
        if: (matrix.python-version == '3.7') && matrix.os != 'macos-latest' || matrix.python-version == '3.10'
-        uses: codecov/codecov-action@v1
+        uses: codecov/codecov-action@v3
        with:
          file: ./coverage.xml
          flags: unittests
@ -89,7 +89,7 @@ jobs:
  #   steps:
  #     - uses: actions/checkout@v2
  #     - name: Setup Python
-  #       uses: actions/setup-python@v2
+  #       uses: actions/setup-python@v4
  #       with:
  #         python-version: '3.8'
  #     - name: Compile documentation
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -498,7 +498,7 @@ class AutoML(BaseEstimator):
                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
                'mape'. Default is 'auto'.
                If passing a customized metric function, the function needs to
-                have the follwing signature:
+                have the following input arguments:

        ```python
        def custom_metric(
@ -2175,7 +2175,7 @@ class AutoML(BaseEstimator):
                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2',
                'mape'. Default is 'auto'.
                If passing a customized metric function, the function needs to
-                have the following signature:
+                have the following input arguments:

        ```python
        def custom_metric(
@ -2370,15 +2370,13 @@ class AutoML(BaseEstimator):
        ```

            cv_score_agg_func: customized cross-validation scores aggregate function. Default to average metrics across folds. If specificed, this function needs to
-                have the following signature:
+                have the following input arguments:
+
+                * val_loss_folds: list of floats, the loss scores of each fold;
+                * log_metrics_folds: list of dicts/floats, the metrics of each fold to log.

-        ```python
-        def cv_score_agg_func(val_loss_folds, log_metrics_folds):
-            return metric_to_minimize, metrics_to_log
-        ```
-                “val_loss_folds” - list of floats, the loss scores of each fold; “log_metrics_folds” - list of dicts/floats, the metrics of each fold to log.
                This function should return the final aggregate result of all folds. A float number of the minimization objective, and a dictionary as the metrics to log or None.
-                E.g.,
+                    E.g.,

        ```python
        def cv_score_agg_func(val_loss_folds, log_metrics_folds):
@ -2393,16 +2391,16 @@ class AutoML(BaseEstimator):
                    metrics_to_log += single_fold
            if metrics_to_log:
                n = len(val_loss_folds)
-                metrics_to_log = {k: v / n for k, v in metrics_to_log.items()} if isinstance(metrics_to_log, dict) else metrics_to_log / n
+                metrics_to_log = (
+                    {k: v / n for k, v in metrics_to_log.items()}
+                    if isinstance(metrics_to_log, dict)
+                    else metrics_to_log / n
+                )
            return metric_to_minimize, metrics_to_log
        ```

+            skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
            fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
-                    For TransformersEstimator, available fit_kwargs can be found from
-                    [TrainingArgumentsForAuto](nlp/huggingface/training_args).
-                    e.g.,
-        skip_transform: boolean, default=False | Whether to pre-process data prior to modeling.
-        fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name.
                For TransformersEstimator, available fit_kwargs can be found from
                [TrainingArgumentsForAuto](nlp/huggingface/training_args).
                e.g.,
--- a/flaml/model.py
+++ b/flaml/model.py
@ -921,7 +921,16 @@ class TransformersEstimatorModelSelection(TransformersEstimator):


 class SKLearnEstimator(BaseEstimator):
-    """The base class for tuning scikit-learn estimators."""
+    """
+    The base class for tuning scikit-learn estimators.
+
+    Subclasses can modify the function signature of ``__init__`` to
+    ignore the values in ``config`` that are not relevant to the constructor
+    of their underlying estimator. For example, some regressors in ``scikit-learn``
+    don't accept the ``n_jobs`` parameter contained in ``config``. For these,
+    one can add ``n_jobs=None,`` before ``**config`` to make sure ``config`` doesn't
+    contain an ``n_jobs`` key.
+    """

    def __init__(self, task="binary", **config):
        super().__init__(task, **config)
--- a/flaml/nlp/huggingface/data_collator.py
+++ b/flaml/nlp/huggingface/data_collator.py
@ -11,7 +11,7 @@ from flaml.data import (
    MULTICHOICECLASSIFICATION,
    SUMMARIZATION,
    SEQCLASSIFICATION,
-    SEQREGRESSION
+    SEQREGRESSION,
 )


--- a/flaml/onlineml/README.md
+++ b/flaml/onlineml/README.md
@ -1,6 +1,6 @@
 # ChaCha for Online AutoML

-FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time;  and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.  
+FLAML includes *ChaCha* which is an automatic hyperparameter tuning solution for online machine learning. Online machine learning has the following properties: (1) data comes in sequential order; and (2) the performance of the machine learning model is evaluated online, i.e., at every iteration. *ChaCha* performs online AutoML respecting the aforementioned properties of online learning, and at the same time respecting the following constraints: (1) only a small constant number of 'live' models are allowed to perform online learning at the same time;  and (2) no model persistence or offline training is allowed, which means that once we decide to replace a 'live' model with a new one, the replaced model can no longer be retrieved.

 For more technical details about *ChaCha*, please check our paper.

--- a/flaml/tune/README.md
+++ b/flaml/tune/README.md
@ -20,7 +20,7 @@ def evaluate_config(config):
    # and the cost could be related to certain hyperparameters
    # in this example, we assume it's proportional to x
    time.sleep(config['x']/100000)
-    # use tune.report to report the metric to optimize  
+    # use tune.report to report the metric to optimize
    tune.report(metric=metric)

 analysis = tune.run(
@ -35,7 +35,7 @@ analysis = tune.run(
    num_samples=-1,    # the maximal number of configs to try, -1 means infinite
    time_budget_s=60,   # the time budget in seconds
    local_dir='logs/',  # the local directory to store logs
-    # verbose=0,          # verbosity  
+    # verbose=0,          # verbosity
    # use_ray=True, # uncomment when performing parallel tuning using ray
    )

@ -59,7 +59,7 @@ def evaluate_config(config):
    # and the cost could be related to certain hyperparameters
    # in this example, we assume it's proportional to x
    time.sleep(config['x']/100000)
-    # use tune.report to report the metric to optimize  
+    # use tune.report to report the metric to optimize
    tune.report(metric=metric)

 # provide a time budget (in seconds) for the tuning process
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@ -18,7 +18,7 @@
 from typing import Dict, Optional
 import numpy as np
 from .trial import Trial
-
+from collections import defaultdict
 import logging

 logger = logging.getLogger(__name__)
@ -68,7 +68,6 @@ class ExperimentAnalysis:
    @property
    def results(self) -> Dict[str, Dict]:
        """Get the last result of all the trials of the experiment"""
-
        return {trial.trial_id: trial.last_result for trial in self.trials}

    def _validate_metric(self, metric: str) -> str:
@ -122,7 +121,6 @@ class ExperimentAnalysis:
        """
        metric = self._validate_metric(metric)
        mode = self._validate_mode(mode)
-
        if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
@ -138,7 +136,6 @@ class ExperimentAnalysis:
        for trial in self.trials:
            if metric not in trial.metric_analysis:
                continue
-
            if scope in ["last", "avg", "last-5-avg", "last-10-avg"]:
                metric_score = trial.metric_analysis[metric][scope]
            else:
@ -158,7 +155,6 @@ class ExperimentAnalysis:
            elif (mode == "min") and (best_metric_score > metric_score):
                best_metric_score = metric_score
                best_trial = trial
-
        if not best_trial:
            logger.warning(
                "Could not find best trial. Did you pass the correct `metric` "
--- a/flaml/tune/searcher/blendsearch.py
+++ b/flaml/tune/searcher/blendsearch.py
@ -63,6 +63,7 @@ class BlendSearch(Searcher):
        seed: Optional[int] = 20,
        cost_attr: Optional[str] = "auto",
        experimental: Optional[bool] = False,
+        lexico_objectives: Optional[dict] = None,
        use_incumbent_result_in_evaluation=False,
    ):
        """Constructor.
@ -112,6 +113,27 @@ class BlendSearch(Searcher):
                Default is "auto", which means that we will automatically chose the cost attribute to use (depending
                on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted
                in our search algorithm.
+            lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+                optimization with lexicographic preferences. This is only supported in CFO currently.
+                When lexico_objectives is not None, the arguments metric, mode will be invalid.
+                This dictionary shall contain the  following fields of key-value pairs:
+                - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+                objectives.
+                - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+                objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+                - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+                metric names (provided in "metric"), and the values are the numerical target values.
+                - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+                metric names (provided in "metrics"), and the values are the numerical tolerances values.
+                E.g.,
+                ```python
+                lexico_objectives = {
+                    "metrics": ["error_rate", "pred_time"],
+                    "modes": ["min", "min"],
+                    "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                    "targets": {"error_rate": 0.0},
+                }
+                ```
            experimental: A bool of whether to use experimental features.
        """
        self._eps = SEARCH_THREAD_EPS
@ -127,6 +149,7 @@ class BlendSearch(Searcher):
        self.penalty = PENALTY  # penalty term for constraints
        self._metric, self._mode = metric, mode
        self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation
+        self.lexico_objectives = lexico_objectives
        init_config = low_cost_partial_config or {}
        if not init_config:
            logger.info(
@ -177,6 +200,7 @@ class BlendSearch(Searcher):
            reduction_factor,
            self.cost_attr,
            seed,
+            self.lexico_objectives,
        )
        if global_search_alg is not None:
            self._gs = global_search_alg
@ -480,11 +504,15 @@ class BlendSearch(Searcher):
            del self._subspace[trial_id]

    def _create_thread(self, config, result, space):
+        if self.lexico_objectives is None:
+            obj = result[self._ls.metric]
+        else:
+            obj = {k: result[k] for k in self.lexico_objectives["metrics"]}
        self._search_thread_pool[self._thread_count] = SearchThread(
            self._ls.mode,
            self._ls.create(
                config,
-                result[self._ls.metric],
+                obj,
                cost=result.get(self.cost_attr, 1),
                space=space,
            ),
@ -1045,6 +1073,7 @@ class BlendSearchTuner(BlendSearch, NNITuner):
            self._ls.resource_multiple_factor,
            cost_attr=self.cost_attr,
            seed=self._ls.seed,
+            lexico_objectives=self.lexico_objectives,
        )
        if self._gs is not None:
            self._gs = GlobalSearch(
--- a/flaml/tune/searcher/flow2.py
+++ b/flaml/tune/searcher/flow2.py
@ -5,6 +5,7 @@
 from typing import Dict, Optional, Tuple
 import numpy as np
 import logging
+from collections import defaultdict

 try:
    from ray import __version__ as ray_version
@ -49,6 +50,7 @@ class FLOW2(Searcher):
        resource_multiple_factor: Optional[float] = None,
        cost_attr: Optional[str] = "time_total_s",
        seed: Optional[int] = 20,
+        lexico_objectives=None,
    ):
        """Constructor.

@ -69,6 +71,26 @@ class FLOW2(Searcher):
                used for increasing resource.
            cost_attr: A string of the attribute used for cost.
            seed: An integer of the random seed.
+            lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+                optimization with lexicographic preferences. When lexico_objectives is not None, the arguments metric,
+                mode will be invalid. This dictionary shall contain the following fields of key-value pairs:
+                - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+                objectives.
+                - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+                objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives
+                - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+                metric names (provided in "metric"), and the values are the numerical target values.
+                - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+                metric names (provided in "metrics"), and the values are the numerical tolerances values.
+                E.g.,
+                ```python
+                lexico_objectives = {
+                    "metrics": ["error_rate", "pred_time"],
+                    "modes": ["min", "min"],
+                    "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                    "targets": {"error_rate": 0.0},
+                }
+                ```
        """
        if mode:
            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
@ -90,13 +112,30 @@ class FLOW2(Searcher):
        self.best_config = flatten_dict(init_config)
        self.resource_attr = resource_attr
        self.min_resource = min_resource
+        self.lexico_objectives = lexico_objectives
+        if self.lexico_objectives is not None:
+            if "modes" not in self.lexico_objectives.keys():
+                self.lexico_objectives["modes"] = ["min"] * len(
+                    self.lexico_objectives["metrics"]
+                )
+            for t_metric, t_mode in zip(
+                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+            ):
+                if t_metric not in self.lexico_objectives["tolerances"].keys():
+                    self.lexico_objectives["tolerances"][t_metric] = 0
+                if t_metric not in self.lexico_objectives["targets"].keys():
+                    self.lexico_objectives["targets"][t_metric] = (
+                        -float("inf") if t_mode == "min" else float("inf")
+                    )
        self.resource_multiple_factor = (
            resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR
        )
        self.cost_attr = cost_attr
        self.max_resource = max_resource
        self._resource = None
+        self._f_best = None  # only use for lexico_comapre. It represent the best value achieved by lexico_flow.
        self._step_lb = np.Inf
+        self._histories = None  # only use for lexico_comapre. It records the result of historical configurations.
        if space is not None:
            self._init_search()

@ -264,8 +303,21 @@ class FLOW2(Searcher):
            self.resource_multiple_factor,
            self.cost_attr,
            self.seed + 1,
+            self.lexico_objectives,
        )
-        flow2.best_obj = obj * self.metric_op  # minimize internally
+        if self.lexico_objectives is not None:
+            flow2.best_obj = {}
+            for k, v in obj.items():
+                flow2.best_obj[k] = (
+                    -v
+                    if self.lexico_objectives["modes"][
+                        self.lexico_objectives["metrics"].index(k)
+                    ]
+                    == "max"
+                    else v
+                )
+        else:
+            flow2.best_obj = obj * self.metric_op  # minimize internally
        flow2.cost_incumbent = cost
        self.seed += 1
        return flow2
@ -303,6 +355,80 @@ class FLOW2(Searcher):
            self._init_search()
        return True

+    def update_fbest(
+        self,
+    ):
+        # TODO: Improve the efficiency
+        obj_initial = self.lexico_objectives["metrics"][0]
+        feasible_index = [*range(len(self._histories[obj_initial]))]
+        for k_metric in self.lexico_objectives["metrics"]:
+            k_values = np.array(self._histories[k_metric])
+            self._f_best[k_metric] = np.min(k_values.take(feasible_index))
+            feasible_index_prior = np.where(
+                k_values
+                <= max(
+                    [
+                        self._f_best[k_metric]
+                        + self.lexico_objectives["tolerances"][k_metric],
+                        self.lexico_objectives["targets"][k_metric],
+                    ]
+                )
+            )[0].tolist()
+            feasible_index = [
+                val for val in feasible_index if val in feasible_index_prior
+            ]
+
+    def lexico_compare(self, result) -> bool:
+        if self._histories is None:
+            self._histories, self._f_best = defaultdict(list), {}
+            for k in self.lexico_objectives["metrics"]:
+                self._histories[k].append(result[k])
+            self.update_fbest()
+            return True
+        else:
+            for k in self.lexico_objectives["metrics"]:
+                self._histories[k].append(result[k])
+            self.update_fbest()
+            for k_metric, k_mode in zip(
+                self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+            ):
+                k_target = (
+                    self.lexico_objectives["targets"][k_metric]
+                    if k_mode == "min"
+                    else -self.lexico_objectives["targets"][k_metric]
+                )
+                if (
+                    result[k_metric]
+                    < max(
+                        [
+                            self._f_best[k_metric]
+                            + self.lexico_objectives["tolerances"][k_metric],
+                            k_target,
+                        ]
+                    )
+                ) and (
+                    self.best_obj[k_metric]
+                    < max(
+                        [
+                            self._f_best[k_metric]
+                            + self.lexico_objectives["tolerances"][k_metric],
+                            k_target,
+                        ]
+                    )
+                ):
+                    continue
+                elif result[k_metric] < self.best_obj[k_metric]:
+                    return True
+                else:
+                    return False
+            for k_metr in self.lexico_objectives["metrics"]:
+                if result[k_metr] == self.best_obj[k_metr]:
+                    continue
+                elif result[k_metr] < self.best_obj[k_metr]:
+                    return True
+                else:
+                    return False
+
    def on_trial_complete(
        self, trial_id: str, result: Optional[Dict] = None, error: bool = False
    ):
@ -313,10 +439,28 @@ class FLOW2(Searcher):
        """
        self.trial_count_complete += 1
        if not error and result:
-            obj = result.get(self._metric)
+            obj = (
+                result.get(self._metric)
+                if self.lexico_objectives is None
+                else {k: result[k] for k in self.lexico_objectives["metrics"]}
+            )
            if obj:
-                obj *= self.metric_op
-                if self.best_obj is None or obj < self.best_obj:
+                obj = (
+                    {
+                        k: -obj[k] if m == "max" else obj[k]
+                        for k, m in zip(
+                            self.lexico_objectives["metrics"],
+                            self.lexico_objectives["modes"],
+                        )
+                    }
+                    if isinstance(obj, dict)
+                    else obj * self.metric_op
+                )
+                if (
+                    self.best_obj is None
+                    or (self.lexico_objectives is None and obj < self.best_obj)
+                    or (self.lexico_objectives is not None and self.lexico_compare(obj))
+                ):
                    self.best_obj = obj
                    self.best_config, self.step = self._configs[trial_id]
                    self.incumbent = self.normalize(self.best_config)
@ -329,7 +473,6 @@ class FLOW2(Searcher):
                    self._num_allowed4incumbent = 2 * self.dim
                    self._proposed_by.clear()
                    if self._K > 0:
-                        # self._oldK must have been set when self._K>0
                        self.step *= np.sqrt(self._K / self._oldK)
                    self.step = min(self.step, self.step_ub)
                    self._iter_best_config = self.trial_count_complete
@ -340,7 +483,6 @@ class FLOW2(Searcher):
                    self._trunc = max(self._trunc >> 1, 1)
        proposed_by = self._proposed_by.get(trial_id)
        if proposed_by == self.incumbent:
-            # proposed by current incumbent and no better
            self._num_complete4incumbent += 1
            cost = (
                result.get(self.cost_attr, 1)
@ -357,17 +499,34 @@ class FLOW2(Searcher):
            if self._num_complete4incumbent == self.dir and (
                not self._resource or self._resource == self.max_resource
            ):
-                # check stuck condition if using max resource
                self._num_complete4incumbent -= 2
                self._num_allowed4incumbent = max(self._num_allowed4incumbent, 2)

    def on_trial_result(self, trial_id: str, result: Dict):
        """Early update of incumbent."""
        if result:
-            obj = result.get(self._metric)
+            obj = (
+                result.get(self._metric)
+                if self.lexico_objectives is None
+                else {k: result[k] for k in self.lexico_objectives["metrics"]}
+            )
            if obj:
-                obj *= self.metric_op
-                if self.best_obj is None or obj < self.best_obj:
+                obj = (
+                    {
+                        k: -obj[k] if m == "max" else obj[k]
+                        for k, m in zip(
+                            self.lexico_objectives["metrics"],
+                            self.lexico_objectives["modes"],
+                        )
+                    }
+                    if isinstance(obj, dict)
+                    else obj * self.metric_op
+                )
+                if (
+                    self.best_obj is None
+                    or (self.lexico_objectives is None and obj < self.best_obj)
+                    or (self.lexico_objectives is not None and self.lexico_compare(obj))
+                ):
                    self.best_obj = obj
                    config = self._configs[trial_id][0]
                    if self.best_config != config:
--- a/flaml/tune/searcher/search_thread.py
+++ b/flaml/tune/searcher/search_thread.py
@ -137,7 +137,11 @@ class SearchThread:
        if result:
            self.cost_last = result.get(self.cost_attr, 1)
            self.cost_total += self.cost_last
-            if self._search_alg.metric in result:
+            if self._search_alg.metric in result and (
+                getattr(self._search_alg, "lexico_objectives", None) is None
+            ):
+                # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
+                # related variables are not callable.
                obj = result[self._search_alg.metric] * self._metric_op
                if obj < self.obj_best1 or self.best_result is None:
                    self.cost_best2 = self.cost_best1
@ -146,7 +150,10 @@ class SearchThread:
                    self.obj_best1 = obj
                    self.cost_best = self.cost_last
                    self.best_result = result
-            self._update_speed()
+            if getattr(self._search_alg, "lexico_objectives", None) is None:
+                # TODO: Improve this behavior. When lexico_objectives is provided to CFO,
+                # related variables are not callable.
+                self._update_speed()
        self.running -= 1
        assert self.running >= 0

--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@ -2,11 +2,12 @@
 #  * Copyright (c) FLAML authors. All rights reserved.
 #  * Licensed under the MIT License. See LICENSE file in the
 #  * project root for license information.
-from typing import Optional, Union, List, Callable, Tuple
+from typing import Optional, Union, List, Callable, Tuple, Dict
 import numpy as np
 import datetime
 import time
 import os
+from collections import defaultdict

 try:
    from ray import __version__ as ray_version
@ -36,13 +37,92 @@ INCUMBENT_RESULT = "__incumbent_result__"
 class ExperimentAnalysis(EA):
    """Class for storing the experiment results."""

-    def __init__(self, trials, metric, mode):
+    def __init__(self, trials, metric, mode, lexico_objectives=None):
        try:
            super().__init__(self, None, trials, metric, mode)
+            self.lexico_objectives = lexico_objectives
        except (TypeError, ValueError):
            self.trials = trials
            self.default_metric = metric or DEFAULT_METRIC
            self.default_mode = mode
+            self.lexico_objectives = lexico_objectives
+
+    @property
+    def best_trial(self) -> Trial:
+        if self.lexico_objectives is None:
+            return super().best_trial
+        else:
+            return self.get_best_trial(self.default_metric, self.default_mode)
+
+    @property
+    def best_config(self) -> Dict:
+        if self.lexico_objectives is None:
+            return super().best_config
+        else:
+            return self.get_best_config(self.default_metric, self.default_mode)
+
+    def lexico_best(self, trials):
+        results = {index: trial.last_result for index, trial in enumerate(trials)}
+        metrics = self.lexico_objectives["metrics"]
+        modes = self.lexico_objectives["modes"]
+        f_best = {}
+        keys = list(results.keys())
+        length = len(keys)
+        histories = defaultdict(list)
+        for time_index in range(length):
+            for objective, mode in zip(metrics, modes):
+                histories[objective].append(
+                    results[keys[time_index]][objective]
+                    if mode == "min"
+                    else -trials[keys[time_index]][objective]
+                )
+        obj_initial = self.lexico_objectives["metrics"][0]
+        feasible_index = [*range(len(histories[obj_initial]))]
+        for k_metric, k_mode in zip(
+            self.lexico_objectives["metrics"], self.lexico_objectives["modes"]
+        ):
+            k_values = np.array(histories[k_metric])
+            k_target = (
+                -self.lexico_objectives["targets"][k_metric]
+                if k_mode == "max"
+                else self.lexico_objectives["targets"][k_metric]
+            )
+            f_best[k_metric] = np.min(k_values.take(feasible_index))
+            feasible_index_prior = np.where(
+                k_values
+                <= max(
+                    [
+                        f_best[k_metric]
+                        + self.lexico_objectives["tolerances"][k_metric],
+                        k_target,
+                    ]
+                )
+            )[0].tolist()
+            feasible_index = [
+                val for val in feasible_index if val in feasible_index_prior
+            ]
+        best_trial = trials[feasible_index[-1]]
+        return best_trial
+
+    def get_best_trial(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+        filter_nan_and_inf: bool = True,
+    ) -> Optional[Trial]:
+        if self.lexico_objectives is not None:
+            best_trial = self.lexico_best(self.trials)
+        else:
+            best_trial = super().get_best_trial(metric, mode, scope, filter_nan_and_inf)
+        return best_trial
+
+    @property
+    def best_result(self) -> Dict:
+        if self.lexico_best is None:
+            return super().best_result
+        else:
+            return self.best_trial.last_result


 def report(_metric=None, **kwargs):
@ -149,6 +229,7 @@ def run(
    use_ray: Optional[bool] = False,
    use_incumbent_result_in_evaluation: Optional[bool] = None,
    log_file_name: Optional[str] = None,
+    lexico_objectives: Optional[dict] = None,
    **ray_args,
 ):
    """The trigger for HPO.
@ -305,6 +386,28 @@ def run(
                if local_dir is not given, no log file is created;
                if local_dir is given, the log file name will be autogenerated under local_dir.
            Only valid when verbose > 0 or use_ray is True.
+        lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective
+            optimization with lexicographic preferences. When lexico_objectives is not None, the arguments metric,
+            mode, will be invalid, and flaml's tune uses CFO
+            as the `search_alg`, which makes the input (if provided) `search_alg' invalid.
+            This dictionary shall contain the following fields of key-value pairs:
+            - "metrics":  a list of optimization objectives with the orders reflecting the priorities/preferences of the
+            objectives.
+            - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the
+            objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+            - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the
+            metric names (provided in "metric"), and the values are the numerical target values.
+            - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the
+            metric names (provided in "metrics"), and the values are the numerical tolerances values.
+            E.g.,
+            ```python
+            lexico_objectives = {
+                "metrics": ["error_rate", "pred_time"],
+                "modes": ["min", "min"],
+                "tolerances": {"error_rate": 0.01, "pred_time": 0.0},
+                "targets": {"error_rate": 0.0},
+            }
+            ```
        **ray_args: keyword arguments to pass to ray.tune.run().
            Only valid when use_ray=True.
    """
@ -357,6 +460,11 @@ def run(

    from .searcher.blendsearch import BlendSearch, CFO

+    if lexico_objectives is not None:
+        logger.warning(
+            "If lexico_objectives is not None, search_alg is forced to be CFO"
+        )
+        search_alg = None
    if search_alg is None:
        flaml_scheduler_resource_attr = (
            flaml_scheduler_min_resource
@ -371,18 +479,30 @@ def run(
            flaml_scheduler_max_resource = max_resource
            flaml_scheduler_reduction_factor = reduction_factor
            scheduler = None
-        try:
-            import optuna as _
+        if lexico_objectives is None:
+            try:
+                import optuna as _

-            SearchAlgorithm = BlendSearch
-        except ImportError:
+                SearchAlgorithm = BlendSearch
+                logger.info(
+                    "Using search algorithm {}.".format(
+                        SearchAlgorithm.__class__.__name__
+                    )
+                )
+            except ImportError:
+                SearchAlgorithm = CFO
+                logger.warning(
+                    "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
+                )
+            metric = metric or DEFAULT_METRIC
+        else:
            SearchAlgorithm = CFO
-            logger.warning(
-                "Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]"
+            logger.info(
+                "Using search algorithm {}.".format(SearchAlgorithm.__class__.__name__)
            )
-
+            metric = lexico_objectives["metrics"][0] or DEFAULT_METRIC
        search_alg = SearchAlgorithm(
-            metric=metric or DEFAULT_METRIC,
+            metric=metric,
            mode=mode,
            space=config,
            points_to_evaluate=points_to_evaluate,
@ -398,6 +518,7 @@ def run(
            config_constraints=config_constraints,
            metric_constraints=metric_constraints,
            use_incumbent_result_in_evaluation=use_incumbent_result_in_evaluation,
+            lexico_objectives=lexico_objectives,
        )
    else:
        if metric is None or mode is None:
@ -532,7 +653,12 @@ def run(
            logger.warning(
                f"fail to sample a trial for {max_failure} times in a row, stopping."
            )
-        analysis = ExperimentAnalysis(_runner.get_trials(), metric=metric, mode=mode)
+        analysis = ExperimentAnalysis(
+            _runner.get_trials(),
+            metric=metric,
+            mode=mode,
+            lexico_objectives=lexico_objectives,
+        )
        return analysis
    finally:
        # recover the global variables in case of nested run
--- a/notebook/tune_lexicographic.ipynb
+++ b/notebook/tune_lexicographic.ipynb
@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tune neural networks with leicographic preference across objectives\n",
+    "This example is to tune neural networks model with two objectives \"error_rate\", \"flops\" on FashionMnist dataset. \n",
+    "\n",
+    "**Requirements.** This notebook requires:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# %pip install torch torchvision flaml[blendsearch,ray] thop"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import thop\n",
+    "import torch.nn as nn\n",
+    "from flaml import tune\n",
+    "import torch.nn.functional as F\n",
+    "import torchvision\n",
+    "import numpy as np\n",
+    "import os\n",
+    "\n",
+    "DEVICE = torch.device(\"cpu\")\n",
+    "BATCHSIZE = 128\n",
+    "N_TRAIN_EXAMPLES = BATCHSIZE * 30\n",
+    "N_VALID_EXAMPLES = BATCHSIZE * 10\n",
+    "data_dir = os.path.abspath(\"data\")\n",
+    "\n",
+    "train_dataset = torchvision.datasets.FashionMNIST(\n",
+    "    data_dir,\n",
+    "    train=True,\n",
+    "    download=True,\n",
+    "    transform=torchvision.transforms.ToTensor(),\n",
+    ")\n",
+    "\n",
+    "train_loader = torch.utils.data.DataLoader(\n",
+    "    torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),\n",
+    "    batch_size=BATCHSIZE,\n",
+    "    shuffle=True,\n",
+    ")\n",
+    "\n",
+    "val_dataset = torchvision.datasets.FashionMNIST(\n",
+    "    data_dir, train=False, transform=torchvision.transforms.ToTensor()\n",
+    ")\n",
+    "\n",
+    "val_loader = torch.utils.data.DataLoader(\n",
+    "    torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),\n",
+    "    batch_size=BATCHSIZE,\n",
+    "    shuffle=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Specify the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def define_model(configuration):\n",
+    "    n_layers = configuration[\"n_layers\"]\n",
+    "    layers = []\n",
+    "    in_features = 28 * 28\n",
+    "    for i in range(n_layers):\n",
+    "        out_features = configuration[\"n_units_l{}\".format(i)]\n",
+    "        layers.append(nn.Linear(in_features, out_features))\n",
+    "        layers.append(nn.ReLU())\n",
+    "        p = configuration[\"dropout_{}\".format(i)]\n",
+    "        layers.append(nn.Dropout(p))\n",
+    "        in_features = out_features\n",
+    "    layers.append(nn.Linear(in_features, 10))\n",
+    "    layers.append(nn.LogSoftmax(dim=1))\n",
+    "    return nn.Sequential(*layers)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_model(model, optimizer, train_loader):\n",
+    "    model.train()\n",
+    "    for batch_idx, (data, target) in enumerate(train_loader):\n",
+    "        data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)\n",
+    "        optimizer.zero_grad()\n",
+    "        F.nll_loss(model(data), target).backward()\n",
+    "        optimizer.step()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Metrics "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def eval_model(model, valid_loader):\n",
+    "    model.eval()\n",
+    "    correct = 0\n",
+    "    with torch.no_grad():\n",
+    "        for batch_idx, (data, target) in enumerate(valid_loader):\n",
+    "            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)\n",
+    "            pred = model(data).argmax(dim=1, keepdim=True)\n",
+    "            correct += pred.eq(target.view_as(pred)).sum().item()\n",
+    "\n",
+    "    accuracy = correct / N_VALID_EXAMPLES\n",
+    "    flops, params = thop.profile(\n",
+    "        model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False\n",
+    "    )\n",
+    "    return np.log2(flops), 1 - accuracy, params"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Evaluate function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_function(configuration):\n",
+    "    model = define_model(configuration).to(DEVICE)\n",
+    "    optimizer = torch.optim.Adam(model.parameters(), configuration[\"lr\"])\n",
+    "    n_epoch = configuration[\"n_epoch\"]\n",
+    "    for epoch in range(n_epoch):\n",
+    "        train_model(model, optimizer, train_loader)\n",
+    "    flops, error_rate, params = eval_model(model, val_loader)\n",
+    "    return {\"error_rate\": error_rate, \"flops\": flops, \"params\": params}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Lexicographic information across objectives"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lexico_objectives = {}\n",
+    "lexico_objectives[\"metrics\"] = [\"error_rate\", \"flops\"]\n",
+    "lexico_objectives[\"tolerances\"] = {\"error_rate\": 0.02, \"flops\": 0.0}\n",
+    "lexico_objectives[\"targets\"] = {\"error_rate\": 0.0, \"flops\": 0.0}\n",
+    "lexico_objectives[\"modes\"] = [\"min\", \"min\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Search space"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "search_space = {\n",
+    "    \"n_layers\": tune.randint(lower=1, upper=3),\n",
+    "    \"n_units_l0\": tune.randint(lower=4, upper=128),\n",
+    "    \"n_units_l1\": tune.randint(lower=4, upper=128),\n",
+    "    \"n_units_l2\": tune.randint(lower=4, upper=128),\n",
+    "    \"dropout_0\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"dropout_1\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"dropout_2\": tune.uniform(lower=0.2, upper=0.5),\n",
+    "    \"lr\": tune.loguniform(lower=1e-5, upper=1e-1),\n",
+    "    \"n_epoch\": tune.randint(lower=1, upper=20),\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Launch the tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "low_cost_partial_config = {\n",
+    "    \"n_layers\": 1,\n",
+    "    \"n_units_l0\": 4,\n",
+    "    \"n_units_l1\": 4,\n",
+    "    \"n_units_l2\": 4,\n",
+    "    \"n_epoch\": 1,\n",
+    "}\n",
+    "\n",
+    "analysis = tune.run(\n",
+    "    evaluate_function,\n",
+    "    num_samples=-1,\n",
+    "    time_budget_s=100,\n",
+    "    config=search_space,\n",
+    "    use_ray=False,\n",
+    "    lexico_objectives=lexico_objectives,\n",
+    "    low_cost_partial_config=low_cost_partial_config,\n",
+    ")\n",
+    "result = analysis.best_result\n",
+    "print(result)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.14 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.14"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/setup.py
+++ b/setup.py
@ -48,9 +48,12 @@ setuptools.setup(
        ],
        "test": [
            "flake8>=3.8.4",
+            "thop",
            "pytest>=6.1.1",
            "coverage>=5.3",
            "pre-commit",
+            "torch",
+            "torchvision",
            "catboost>=0.26",
            "rgf-python",
            "optuna==2.8.0",
--- a/test/pipeline_tuning_example/data_prep/data_prep.py
+++ b/test/pipeline_tuning_example/data_prep/data_prep.py
@ -20,7 +20,7 @@ def main():

    logger.info(" ".join(f"{k}={v}" for k, v in vars(args).items()))

-    data_path = os.path.join(args.data, 'data.csv')
+    data_path = os.path.join(args.data, "data.csv")
    df = pd.read_csv(data_path)

    train_df, test_df = train_test_split(
--- a/test/pipeline_tuning_example/data_prep/data_prep.yaml
+++ b/test/pipeline_tuning_example/data_prep/data_prep.yaml
@ -19,7 +19,7 @@ environment:
  os: Linux

 command: >-
-  python data_prep.py 
+  python data_prep.py
  --data {inputs.data}
  --test_train_ratio {inputs.test_train_ratio}
  --train_data {outputs.train_data}
--- a/test/pipeline_tuning_example/submit_train_pipeline.py
+++ b/test/pipeline_tuning_example/submit_train_pipeline.py
@ -83,10 +83,10 @@ def build_and_submit_aml_pipeline(config):
    ################################################
    # load component functions
    ################################################
-    data_prep_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                              / "data_prep/data_prep.yaml")
-    train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                          / "train/train.yaml")
+    data_prep_component = Component.from_yaml(
+        ws, yaml_file=LOCAL_DIR / "data_prep/data_prep.yaml"
+    )
+    train_component = Component.from_yaml(ws, yaml_file=LOCAL_DIR / "train/train.yaml")

    ################################################
    # build pipeline
--- a/test/pipeline_tuning_example/submit_tuner_pipeline.py
+++ b/test/pipeline_tuning_example/submit_tuner_pipeline.py
@ -14,16 +14,19 @@ def remote_run():
    ################################################
    # connect to your Azure ML workspace
    ################################################
-    ws = Workspace(subscription_id=args.subscription_id,
-                   resource_group=args.resource_group,
-                   workspace_name=args.workspace)
+    ws = Workspace(
+        subscription_id=args.subscription_id,
+        resource_group=args.resource_group,
+        workspace_name=args.workspace,
+    )

    ################################################
    # load component functions
    ################################################

-    pipeline_tuning_func = Component.from_yaml(ws, yaml_file=LOCAL_DIR
-                                               / "tuner/component_spec.yaml")
+    pipeline_tuning_func = Component.from_yaml(
+        ws, yaml_file=LOCAL_DIR / "tuner/component_spec.yaml"
+    )

    ################################################
    # build pipeline
@ -44,6 +47,7 @@ def remote_run():
 def local_run():
    logger.info("Run tuner locally.")
    from tuner import tuner_func
+
    tuner_func.tune_pipeline(concurrent_run=2)


@ -52,15 +56,18 @@ if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_mutually_exclusive_group(required=False)
    parser.add_argument(
-        "--subscription_id", type=str, help="your_subscription_id", required=False,
+        "--subscription_id",
+        type=str,
+        help="your_subscription_id",
+        required=False,
    )
    parser.add_argument(
-        "--resource_group", type=str, help="your_resource_group", required=False)
-    parser.add_argument(
-        "--workspace", type=str, help="your_workspace", required=False)
+        "--resource_group", type=str, help="your_resource_group", required=False
+    )
+    parser.add_argument("--workspace", type=str, help="your_workspace", required=False)

-    parser.add_argument('--remote', dest='remote', action='store_true')
-    parser.add_argument('--local', dest='remote', action='store_false')
+    parser.add_argument("--remote", dest="remote", action="store_true")
+    parser.add_argument("--local", dest="remote", action="store_false")
    parser.set_defaults(remote=True)
    args = parser.parse_args()

--- a/test/pipeline_tuning_example/train/train.py
+++ b/test/pipeline_tuning_example/train/train.py
@ -5,7 +5,7 @@ import pandas as pd
 from azureml.core import Run


-class LightGBMCallbackHandler():
+class LightGBMCallbackHandler:
    def __init__(self):
        pass

@ -24,16 +24,22 @@ class LightGBMCallbackHandler():
 def main(args):
    """Main function of the script."""

-    train_path = os.path.join(args.train_data, 'data.csv')
+    train_path = os.path.join(args.train_data, "data.csv")
    print("traning_path:", train_path)

-    test_path = os.path.join(args.test_data, 'data.csv')
+    test_path = os.path.join(args.test_data, "data.csv")

    train_set = lgb.Dataset(train_path)
    test_set = lgb.Dataset(test_path)
    callbacks_handler = LightGBMCallbackHandler()
-    config = {"header": True, "objective": "binary", "label_column": 30, "metric": "binary_error",
-              "n_estimators": args.n_estimators, "learning_rate": args.learning_rate}
+    config = {
+        "header": True,
+        "objective": "binary",
+        "label_column": 30,
+        "metric": "binary_error",
+        "n_estimators": args.n_estimators,
+        "learning_rate": args.learning_rate,
+    }
    gbm = lgb.train(
        config,
        train_set,
@ -44,9 +50,9 @@ def main(args):
        ],
    )

-    print('Saving model...')
+    print("Saving model...")
    # save model to file
-    gbm.save_model(os.path.join(args.model, 'model.txt'))
+    gbm.save_model(os.path.join(args.model, "model.txt"))


 if __name__ == "__main__":
--- a/test/pipeline_tuning_example/train/train.yaml
+++ b/test/pipeline_tuning_example/train/train.yaml
@ -4,9 +4,9 @@ name: classifier
 version: 0.0.1
 display_name: Train lgbm classifier
 inputs:
-  train_data: 
+  train_data:
    type: path
-  test_data: 
+  test_data:
    type: path
  learning_rate:
    type: float
@ -20,8 +20,8 @@ environment:
  conda_dependencies_file: env.yaml
 os: Linux
 command: >-
-  python train.py 
-  --train_data {inputs.train_data} 
+  python train.py
+  --train_data {inputs.train_data}
  --test_data {inputs.test_data}
  --learning_rate {inputs.learning_rate}
  --n_estimators {inputs.n_estimators}
--- a/test/pipeline_tuning_example/tuner/component_spec.yaml
+++ b/test/pipeline_tuning_example/tuner/component_spec.yaml
@ -9,4 +9,4 @@ environment:
  conda_dependencies_file: env.yaml
 os: Linux
 command: >-
-  python tuner/tuner_func.py 
+  python tuner/tuner_func.py
--- a/test/pipeline_tuning_example/tuner/tuner_func.py
+++ b/test/pipeline_tuning_example/tuner/tuner_func.py
@ -8,8 +8,7 @@ logger = logging.getLogger(__name__)


 def run_with_config(config: dict):
-    """Run the pipeline with a given config dict
-    """
+    """Run the pipeline with a given config dict"""

    # pass the hyperparameters to AzureML jobs by overwriting the config file.
    overrides = [f"{key}={value}" for key, value in config.items()]
@ -24,25 +23,25 @@ def run_with_config(config: dict):
    while not stop:
        # get status
        status = run._core_run.get_status()
-        print(f'status: {status}')
+        print(f"status: {status}")

        # get metrics
        metrics = run._core_run.get_metrics(recursive=True)
        if metrics:
            run_metrics = list(metrics.values())

-            new_metric = run_metrics[0]['eval_binary_error']
+            new_metric = run_metrics[0]["eval_binary_error"]

            if type(new_metric) == list:
                new_metric = new_metric[-1]

-            print(f'eval_binary_error: {new_metric}')
+            print(f"eval_binary_error: {new_metric}")

            tune.report(eval_binary_error=new_metric)

        time.sleep(5)

-        if status == 'FAILED' or status == 'Completed':
+        if status == "FAILED" or status == "Completed":
            stop = True

    print("The run is terminated.")
--- a/test/tune/test_lexiflow.py
+++ b/test/tune/test_lexiflow.py
@ -0,0 +1,142 @@
+import torch
+import thop
+import torch.nn as nn
+from flaml import tune
+import torch.nn.functional as F
+import torchvision
+import numpy as np
+
+DEVICE = torch.device("cpu")
+BATCHSIZE = 128
+N_TRAIN_EXAMPLES = BATCHSIZE * 30
+N_VALID_EXAMPLES = BATCHSIZE * 10
+
+
+def test_lexiflow():
+    train_dataset = torchvision.datasets.FashionMNIST(
+        "test/data",
+        train=True,
+        download=True,
+        transform=torchvision.transforms.ToTensor(),
+    )
+
+    train_loader = torch.utils.data.DataLoader(
+        torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
+        batch_size=BATCHSIZE,
+        shuffle=True,
+    )
+
+    val_dataset = torchvision.datasets.FashionMNIST(
+        "test/data", train=False, transform=torchvision.transforms.ToTensor()
+    )
+
+    val_loader = torch.utils.data.DataLoader(
+        torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
+        batch_size=BATCHSIZE,
+        shuffle=True,
+    )
+
+    def define_model(configuration):
+        n_layers = configuration["n_layers"]
+        layers = []
+        in_features = 28 * 28
+        for i in range(n_layers):
+            out_features = configuration["n_units_l{}".format(i)]
+            layers.append(nn.Linear(in_features, out_features))
+            layers.append(nn.ReLU())
+            p = configuration["dropout_{}".format(i)]
+            layers.append(nn.Dropout(p))
+            in_features = out_features
+        layers.append(nn.Linear(in_features, 10))
+        layers.append(nn.LogSoftmax(dim=1))
+        return nn.Sequential(*layers)
+
+    def train_model(model, optimizer, train_loader):
+        model.train()
+        for batch_idx, (data, target) in enumerate(train_loader):
+            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+            optimizer.zero_grad()
+            F.nll_loss(model(data), target).backward()
+            optimizer.step()
+
+    def eval_model(model, valid_loader):
+        model.eval()
+        correct = 0
+        with torch.no_grad():
+            for batch_idx, (data, target) in enumerate(valid_loader):
+                data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+                pred = model(data).argmax(dim=1, keepdim=True)
+                correct += pred.eq(target.view_as(pred)).sum().item()
+
+        accuracy = correct / N_VALID_EXAMPLES
+        flops, params = thop.profile(
+            model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
+        )
+        return np.log2(flops), 1 - accuracy, params
+
+    def evaluate_function(configuration):
+        model = define_model(configuration).to(DEVICE)
+        optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
+        n_epoch = configuration["n_epoch"]
+        for epoch in range(n_epoch):
+            train_model(model, optimizer, train_loader)
+        flops, error_rate, params = eval_model(model, val_loader)
+        return {"error_rate": error_rate, "flops": flops, "params": params}
+
+    lexico_objectives = {}
+    lexico_objectives["metrics"] = ["error_rate", "flops"]
+    lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0}
+    lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0}
+    lexico_objectives["modes"] = ["min", "min"]
+
+    search_space = {
+        "n_layers": tune.randint(lower=1, upper=3),
+        "n_units_l0": tune.randint(lower=4, upper=128),
+        "n_units_l1": tune.randint(lower=4, upper=128),
+        "n_units_l2": tune.randint(lower=4, upper=128),
+        "dropout_0": tune.uniform(lower=0.2, upper=0.5),
+        "dropout_1": tune.uniform(lower=0.2, upper=0.5),
+        "dropout_2": tune.uniform(lower=0.2, upper=0.5),
+        "lr": tune.loguniform(lower=1e-5, upper=1e-1),
+        "n_epoch": tune.randint(lower=1, upper=20),
+    }
+
+    low_cost_partial_config = {
+        "n_layers": 1,
+        "n_units_l0": 4,
+        "n_units_l1": 4,
+        "n_units_l2": 4,
+        "n_epoch": 1,
+    }
+
+    # lexico tune
+    analysis = tune.run(
+        evaluate_function,
+        num_samples=5,
+        config=search_space,
+        use_ray=False,
+        lexico_objectives=lexico_objectives,
+        low_cost_partial_config=low_cost_partial_config,
+    )
+    print(analysis.best_trial)
+    print(analysis.best_config)
+    print(analysis.best_result)
+
+    # Non lexico tune
+    analysis = tune.run(
+        evaluate_function,
+        metric="error_rate",
+        mode="min",
+        num_samples=5,
+        config=search_space,
+        use_ray=False,
+        lexico_objectives=None,
+        low_cost_partial_config=low_cost_partial_config,
+    )
+    print(analysis.best_trial)
+    print(analysis.best_config)
+    print(analysis.best_result)
+
+
+if __name__ == "__main__":
+    test_lexiflow()
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@ -64,6 +64,8 @@ git clone https://github.com/microsoft/FLAML.git
 pip install -e FLAML[test,notebook]
 ```

+In case the `pip install` command fails, try escaping the brackets such as `pip install -e FLAML\[test,notebook\]`
+
 ### Docker

 We provide a simple [Dockerfile](https://github.com/microsoft/FLAML/blob/main/Dockerfile).
--- a/website/docs/Examples/Integrate
+++ b/website/docs/Examples/Integrate
@ -37,7 +37,7 @@ automl = AutoML()
 settings = {
    "time_budget": 60,  # total running time in seconds
    "metric": "accuracy",  # metric to optimize
-    "task": "classification",  # task type  
+    "task": "classification",  # task type
    "log_file_name": "airlines_experiment.log",  # flaml log file
 }
 experiment = mlflow.set_experiment("flaml")  # the experiment name in AzureML workspace
--- a/website/docs/Examples/Tune-AzureML-pipeline.md
+++ b/website/docs/Examples/Tune-AzureML-pipeline.md
@ -205,7 +205,7 @@ Overall, to tune the hyperparameters of the AzureML pipeline, run:

 ```bash
 # the training job will run remotely as an AzureML job in both choices
-# run the tuning job locally 
+# run the tuning job locally
 python submit_tune.py --local
 # run the tuning job remotely
 python submit_tune.py --remote --subscription_id <your subscription_id> --resource_group <your resource_group> --workspace <your workspace>
--- a/website/docs/Examples/Tune-Lexicographic-objectives.md
+++ b/website/docs/Examples/Tune-Lexicographic-objectives.md
@ -0,0 +1,165 @@
+# Tune - Lexicographic Objectives
+
+## Requirements
+
+```python
+pip install flaml thop torchvision torch
+```
+
+## Tuning accurate and efficient neural networks with lexicographic preference
+
+### Data
+
+```python
+import torch
+import thop
+import torch.nn as nn
+from flaml import tune
+import torch.nn.functional as F
+import torchvision
+import numpy as np
+import os
+
+DEVICE = torch.device("cpu")
+BATCHSIZE = 128
+N_TRAIN_EXAMPLES = BATCHSIZE * 30
+N_VALID_EXAMPLES = BATCHSIZE * 10
+data_dir = os.path.abspath("data")
+
+train_dataset = torchvision.datasets.FashionMNIST(
+    data_dir,
+    train=True,
+    download=True,
+    transform=torchvision.transforms.ToTensor(),
+)
+
+train_loader = torch.utils.data.DataLoader(
+    torch.utils.data.Subset(train_dataset, list(range(N_TRAIN_EXAMPLES))),
+    batch_size=BATCHSIZE,
+    shuffle=True,
+)
+
+val_dataset = torchvision.datasets.FashionMNIST(
+    data_dir, train=False, transform=torchvision.transforms.ToTensor()
+)
+
+val_loader = torch.utils.data.DataLoader(
+    torch.utils.data.Subset(val_dataset, list(range(N_VALID_EXAMPLES))),
+    batch_size=BATCHSIZE,
+    shuffle=True,
+```
+
+### Specific the model
+
+```python
+def define_model(configuration):
+    n_layers = configuration["n_layers"]
+    layers = []
+    in_features = 28 * 28
+    for i in range(n_layers):
+        out_features = configuration["n_units_l{}".format(i)]
+        layers.append(nn.Linear(in_features, out_features))
+        layers.append(nn.ReLU())
+        p = configuration["dropout_{}".format(i)]
+        layers.append(nn.Dropout(p))
+        in_features = out_features
+    layers.append(nn.Linear(in_features, 10))
+    layers.append(nn.LogSoftmax(dim=1))
+    return nn.Sequential(*layers)
+```
+
+### Train
+
+```python
+def train_model(model, optimizer, train_loader):
+    model.train()
+    for batch_idx, (data, target) in enumerate(train_loader):
+        data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+        optimizer.zero_grad()
+        F.nll_loss(model(data), target).backward()
+        optimizer.step()
+```
+
+### Metrics
+
+```python
+def eval_model(model, valid_loader):
+    model.eval()
+    correct = 0
+    with torch.no_grad():
+        for batch_idx, (data, target) in enumerate(valid_loader):
+            data, target = data.view(-1, 28 * 28).to(DEVICE), target.to(DEVICE)
+            pred = model(data).argmax(dim=1, keepdim=True)
+            correct += pred.eq(target.view_as(pred)).sum().item()
+
+    accuracy = correct / N_VALID_EXAMPLES
+    flops, params = thop.profile(
+        model, inputs=(torch.randn(1, 28 * 28).to(DEVICE),), verbose=False
+    )
+    return np.log2(flops), 1 - accuracy, params
+```
+
+
+
+### Evaluation function
+
+```python
+def evaluate_function(configuration):
+    model = define_model(configuration).to(DEVICE)
+    optimizer = torch.optim.Adam(model.parameters(), configuration["lr"])
+    n_epoch = configuration["n_epoch"]
+    for epoch in range(n_epoch):
+        train_model(model, optimizer, train_loader)
+    flops, error_rate, params = eval_model(model, val_loader)
+    return {"error_rate": error_rate, "flops": flops, "params": params}
+```
+
+### Search space
+```python
+search_space = {
+    "n_layers": tune.randint(lower=1, upper=3),
+    "n_units_l0": tune.randint(lower=4, upper=128),
+    "n_units_l1": tune.randint(lower=4, upper=128),
+    "n_units_l2": tune.randint(lower=4, upper=128),
+    "dropout_0": tune.uniform(lower=0.2, upper=0.5),
+    "dropout_1": tune.uniform(lower=0.2, upper=0.5),
+    "dropout_2": tune.uniform(lower=0.2, upper=0.5),
+    "lr": tune.loguniform(lower=1e-5, upper=1e-1),
+    "n_epoch": tune.randint(lower=1, upper=20),
+}
+```
+
+### Launch the tuning process
+
+```python
+
+# Low cost initial point
+low_cost_partial_config = {
+    "n_layers": 1,
+    "n_units_l0": 4,
+    "n_units_l1": 4,
+    "n_units_l2": 4,
+    "n_epoch": 1,
+}
+
+# Specific lexicographic preference
+lexico_objectives = {}
+lexico_objectives["metrics"] = ["error_rate", "flops"]
+lexico_objectives["tolerances"] = {"error_rate": 0.02, "flops": 0.0}
+lexico_objectives["targets"] = {"error_rate": 0.0, "flops": 0.0}
+lexico_objectives["modes"] = ["min", "min"]
+
+# launch the tuning process
+analysis = tune.run(
+    evaluate_function,
+    num_samples=-1,
+    time_budget_s=100,
+    config=search_space, # search space of NN
+    use_ray=False,
+    lexico_objectives=lexico_objectives,
+    low_cost_partial_config=low_cost_partial_config, # low cost initial point
+)
+```
+
+
+[Link to notebook](https://github.com/microsoft/FLAML/blob/main/notebook/tune_lexicographic.ipynb) | [Open in colab](https://colab.research.google.com/github/microsoft/FLAML/blob/main/notebook/tune_lexicographic.ipynb)
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@ -78,7 +78,7 @@ Please see this [script](https://github.com/microsoft/FLAML/blob/main/test/tune_

 #### [Zero-shot AutoML](Use-Cases/Zero-Shot-AutoML)

-FLAML offers a unique, seamless and effortless way to leverage AutoML for the commonly used classifiers and regressors such as LightGBM and XGBoost. For example, if you are using `lightgbm.LGBMClassifier` as your current learner, all you need to do is to replace `from ligthgbm import LGBMClassifier` by:
+FLAML offers a unique, seamless and effortless way to leverage AutoML for the commonly used classifiers and regressors such as LightGBM and XGBoost. For example, if you are using `lightgbm.LGBMClassifier` as your current learner, all you need to do is to replace `from lightgbm import LGBMClassifier` by:

 ```python
 from flaml.default import LGBMClassifier
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@ -79,10 +79,10 @@ You can use FLAML in .NET in the following ways:
 **Low-code**

 - [*Model Builder*](https://dotnet.microsoft.com/apps/machinelearning-ai/ml-dotnet/model-builder) - A Visual Studio extension for training ML models using FLAML. For more information on how to install the, see the [install Model Builder](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-model-builder?tabs=visual-studio-2022) guide.
- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide. 
+- [*ML.NET CLI*](https://docs.microsoft.com/dotnet/machine-learning/automate-training-with-cli) - A dotnet CLI tool for training machine learning models using FLAML on Windows, MacOS, and Linux. For more information on how to install the ML.NET CLI, see the [install the ML.NET CLI](https://docs.microsoft.com/dotnet/machine-learning/how-to-guides/install-ml-net-cli?tabs=windows) guide.

 **Code-first**

- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides. 
+- [*Microsoft.ML.AutoML*](https://www.nuget.org/packages/Microsoft.ML.AutoML/0.20.0-preview.22313.1) - NuGet package that provides direct access to the FLAML AutoML APIs that power low-code solutions like Model Builder and the ML.NET CLI. For more information on installing NuGet packages, see the install and use a NuGet package in [Visual Studio](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-in-visual-studio) or [dotnet CLI](https://docs.microsoft.com/nuget/quickstart/install-and-use-a-package-using-the-dotnet-cli) guides.

 To get started with the ML.NET API and AutoML, see the [csharp-notebooks](https://github.com/dotnet/csharp-notebooks#machine-learning).
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@ -169,7 +169,7 @@ class MyRegularizedGreedyForest(SKLearnEstimator):
        return space
 ```

-In the constructor, we set `self.estimator_class` as `RGFClassifier` or `RGFRegressor` according to the task type. If the estimator you want to tune does not have a scikit-learn style `fit()` and `predict()` API, you can override the `fit()` and `predict()` function of `flaml.model.BaseEstimator`, like [XGBoostEstimator](../reference/model#xgboostestimator-objects).
+In the constructor, we set `self.estimator_class` as `RGFClassifier` or `RGFRegressor` according to the task type. If the estimator you want to tune does not have a scikit-learn style `fit()` and `predict()` API, you can override the `fit()` and `predict()` function of `flaml.model.BaseEstimator`, like [XGBoostEstimator](../reference/model#xgboostestimator-objects). Importantly, we also add the `task="binary"` parameter in the signature of `__init__` so that it doesn't get grouped together with the `**config` kwargs that determines the parameters with which the underlying estimator (`self.estimator_class`) is constructed. If your estimator doesn't use one of the parameters that it is passed, for example some regressors in `scikit-learn` don't use the `n_jobs` parameter, it is enough to add `n_jobs=None` to the signature so that it is ignored by the `**config` dict.

 2. Give the custom estimator a name and add it in AutoML. E.g.,

@ -422,7 +422,6 @@ automl2.fit(X_train, y_train, time_budget=7200, starting_points=automl1.best_con

 `starting_points` is a dictionary or a str to specify the starting hyperparameter config. (1) When it is a dictionary, the keys are the estimator names. If you do not need to specify starting points for an estimator, exclude its name from the dictionary. The value for each key can be either a dictionary of a list of dictionaries, corresponding to one hyperparameter configuration, or multiple hyperparameter configurations, respectively. (2) When it is a str: if "data", use data-dependent defaults; if "data:path", use data-dependent defaults which are stored at path; if "static", use data-independent defaults. Please find more details about data-dependent defaults in [zero shot AutoML](Zero-Shot-AutoML#combine-zero-shot-automl-and-hyperparameter-tuning).

-
 ### Log the trials

 The trials are logged in a file if a `log_file_name` is passed.
@ -574,7 +573,7 @@ The curve suggests that increasing the time budget may further improve the accur
 2. set t2 as the time budget, and also set `early_stop=True`. If the early stopping is triggered, you will see a warning like
 > WARNING - All estimator hyperparameters local search has converged at least once, and the total search time exceeds 10 times the time taken to find the best model.

-> WARNING - Stopping search as early_stop is set to True.
+ > WARNING - Stopping search as early_stop is set to True.

 ### How much time is needed to find the best model

--- a/website/docs/Use-Cases/Tune-User-Defined-Function.md
+++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md
@ -130,7 +130,6 @@ You can find the corresponding search space choice in the table below once you h
 | log scale      | tune.lograndint(lower: int, upper: int, base: float = 10 | tune.loguniform(lower: float, upper: float, base: float = 10)|
 | linear scale with quantization| tune.qrandint(lower: int, upper: int, q: int = 1)| tune.quniform(lower: float, upper: float, q: float = 1)|
 log scale with quantization  | tune.qlograndint(lower: int, upper, q: int = 1, base: float = 10)| tune.qloguniform(lower: float, upper, q: float = 1, base: float = 10)
-|


 See the example below for the commonly used types of domains.
@ -515,6 +514,31 @@ analysis = tune.run(
 )
 ```

+### Lexicographic Objectives
+We support tuning multiple objectives with lexicographic preference by providing argument `lexico_objectives` for `tune.tun()`.
+`lexico_objectives` is a dictionary that contains the following fields of key-value pairs:
+ - `metrics`: a list of optimization objectives with the orders reflecting the priorities/preferences of the objectives.
+ - `modes`: (optional) a list of optimization modes (each mode either "min" or "max") corresponding to the objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives.
+ - `tolerances`: (optional) a dictionary to specify the optimality tolerances on objectives. The keys are the metric names (provided in "metrics"), and the values are the numerical tolerances values.
+ - `targets`: (optional) a dictionary to specify the optimization targets on the objectives. The keys are the metric names (provided in "metric"), and the values are the numerical target values.
+
+In the following example, we want to minimize `val_loss` and `pred_time` of the model where `val_loss` has high priority. The tolerances for `val_loss` and `pre_time` are 0.02 and 0 respectively. We do not set targets for these two objectives and we set them to -inf for both objectives.
+
+```python
+lexico_objectives = {}
+lexico_objectives["metrics"] = ["val_loss", "pred_time"]
+lexico_objectives["pred_time"] = ["min", "min"]
+lexico_objectives["tolerances"] = {"val_loss": 0.02, "pred_time": 0.0}
+lexico_objectives["targets"] = {"val_loss": -float('inf'), "pred_time": -float('inf')}
+
+# provide the lexico_objectives to tune.run
+tune.run(..., search_alg=None, lexico_objectives=lexico_objectives)
+```
+NOTE:
+
+1. When lexico_objectives is not None, the arguments metric, mode, will be invalid, and flaml's tune uses CFO as the `search_alg`, which makes the input (if provided) `search_alg` invalid.
+
+2. This is a new feature that will be released in version 1.1.0 and is subject to change in the future version.

 ## Hyperparameter Optimization Algorithm

--- a/website/package.json
+++ b/website/package.json
@ -33,7 +33,8 @@
    "rehype-katex": "4",
    "remark-math": "3",
    "trim": "^0.0.3",
-    "url-loader": "^4.1.1"
+    "url-loader": "^4.1.1",
+    "minimatch": "3.0.5"
  },
  "browserslist": {
    "production": [
--- a/website/yarn.lock
+++ b/website/yarn.lock
@ -5462,6 +5462,13 @@ minimatch@3.0.4:
  dependencies:
    brace-expansion "^1.1.7"

+minimatch@3.0.5:
+  version "3.0.5"
+  resolved "https://registry.npmmirror.com/minimatch/-/minimatch-3.0.5.tgz#4da8f1290ee0f0f8e83d60ca69f8f134068604a3"
+  integrity sha512-tUpxzX0VAzJHjLu0xUfFv1gwVp9ba3IOuRAVH2EGuRW8a5emA2FlACLqiT/lDVtS1W+TGNwqz3sWaNyLgDJWuw==
+  dependencies:
+    brace-expansion "^1.1.7"
+
 minimatch@^3.0.4, minimatch@^3.1.1:
  version "3.1.2"
  resolved "https://registry.npmmirror.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b"