From b1115d5347e00326d10e5d20836b912bb2cc95e9 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qw2ky@virginia.edu>
Date: Sun, 19 Sep 2021 20:44:25 -0400
Subject: [PATCH] add consistency test (#216)

* add consistency test

* test_consistency and format

* add results attribute

* skip when ray is not installed

* Update flaml/tune/analysis.py

Co-authored-by: Chi Wang <wang.chi@microsoft.com>

Co-authored-by: Qingyun Wu <qxw5138@psu.edu>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 flaml/tune/analysis.py                      |  59 ++++++----
 test/tune/test_flaml_raytune_consistency.py | 123 ++++++++++++++++++++
 2 files changed, 161 insertions(+), 21 deletions(-)
 create mode 100644 test/tune/test_flaml_raytune_consistency.py

diff --git a/flaml/tune/analysis.py b/flaml/tune/analysis.py
index bd8135b94..320b0d279 100644
--- a/flaml/tune/analysis.py
+++ b/flaml/tune/analysis.py
@@ -1,4 +1,4 @@
-'''
+"""
 Copyright 2020 The Ray Authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
@@ -16,12 +16,13 @@ limitations under the License.
 This source file is adapted here because ray does not fully support Windows.
 
 Copyright (c) Microsoft Corporation.
-'''
+"""
 from typing import Dict, Optional
 import numpy as np
 from .trial import Trial
 
 import logging
+
 logger = logging.getLogger(__name__)
 
 
@@ -30,8 +31,7 @@ def is_nan_or_inf(value):
 
 
 class ExperimentAnalysis:
-    """Analyze results from a Tune experiment.
-    """
+    """Analyze results from a Tune experiment."""
 
     @property
     def best_trial(self) -> Trial:
@@ -46,7 +46,8 @@ class ExperimentAnalysis:
                 "To fetch the `best_trial`, pass a `metric` and `mode` "
                 "parameter to `tune.run()`. Alternatively, use the "
                 "`get_best_trial(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
+            )
         return self.get_best_trial(self.default_metric, self.default_mode)
 
     @property
@@ -62,30 +63,41 @@ class ExperimentAnalysis:
                 "To fetch the `best_config`, pass a `metric` and `mode` "
                 "parameter to `tune.run()`. Alternatively, use the "
                 "`get_best_config(metric, mode)` method to set the metric "
-                "and mode explicitly.")
+                "and mode explicitly."
+            )
         return self.get_best_config(self.default_metric, self.default_mode)
 
+    @property
+    def results(self) -> Dict[str, Dict]:
+        """Get the last result of all the trials of the experiment"""
+
+        return {trial.trial_id: trial.last_result for trial in self.trials}
+
     def _validate_metric(self, metric: str) -> str:
         if not metric and not self.default_metric:
             raise ValueError(
                 "No `metric` has been passed and  `default_metric` has "
-                "not been set. Please specify the `metric` parameter.")
+                "not been set. Please specify the `metric` parameter."
+            )
         return metric or self.default_metric
 
     def _validate_mode(self, mode: str) -> str:
         if not mode and not self.default_mode:
             raise ValueError(
                 "No `mode` has been passed and  `default_mode` has "
-                "not been set. Please specify the `mode` parameter.")
+                "not been set. Please specify the `mode` parameter."
+            )
         if mode and mode not in ["min", "max"]:
             raise ValueError("If set, `mode` has to be one of [min, max]")
         return mode or self.default_mode
 
-    def get_best_trial(self,
-                       metric: Optional[str] = None,
-                       mode: Optional[str] = None,
-                       scope: str = "last",
-                       filter_nan_and_inf: bool = True) -> Optional[Trial]:
+    def get_best_trial(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+        filter_nan_and_inf: bool = True,
+    ) -> Optional[Trial]:
         """Retrieve the best trial object.
         Compares all trials' scores on ``metric``.
         If ``metric`` is not specified, ``self.default_metric`` will be used.
@@ -116,11 +128,13 @@ class ExperimentAnalysis:
         if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
             raise ValueError(
                 "ExperimentAnalysis: attempting to get best trial for "
-                "metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
-                "\"last-5-avg\", \"last-10-avg\"]. "
+                'metric {} for scope {} not in ["all", "last", "avg", '
+                '"last-5-avg", "last-10-avg"]. '
                 "If you didn't pass a `metric` parameter to `tune.run()`, "
                 "you have to pass one when fetching the best trial.".format(
-                    metric, scope))
+                    metric, scope
+                )
+            )
         best_trial = None
         best_metric_score = None
         for trial in self.trials:
@@ -150,13 +164,16 @@ class ExperimentAnalysis:
         if not best_trial:
             logger.warning(
                 "Could not find best trial. Did you pass the correct `metric` "
-                "parameter?")
+                "parameter?"
+            )
         return best_trial
 
-    def get_best_config(self,
-                        metric: Optional[str] = None,
-                        mode: Optional[str] = None,
-                        scope: str = "last") -> Optional[Dict]:
+    def get_best_config(
+        self,
+        metric: Optional[str] = None,
+        mode: Optional[str] = None,
+        scope: str = "last",
+    ) -> Optional[Dict]:
         """Retrieve the best config corresponding to the trial.
         Compares all trials' scores on `metric`.
         If ``metric`` is not specified, ``self.default_metric`` will be used.
diff --git a/test/tune/test_flaml_raytune_consistency.py b/test/tune/test_flaml_raytune_consistency.py
new file mode 100644
index 000000000..dee393c3a
--- /dev/null
+++ b/test/tune/test_flaml_raytune_consistency.py
@@ -0,0 +1,123 @@
+# import unittest
+import numpy as np
+
+# require: pip install flaml[blendsearch, ray]
+# require: pip install flaml[ray]
+import time
+from flaml import tune
+
+
+def evaluate_config(config):
+    """evaluate a hyperparameter configuration"""
+    # we uss a toy example with 2 hyperparameters
+    metric = (round(config["x"]) - 85000) ** 2 - config["x"] / config["y"]
+    # usually the evaluation takes an non-neglible cost
+    # and the cost could be related to certain hyperparameters
+    # in this example, we assume it's proportional to x
+    time.sleep(config["x"] / 100000)
+    # use tune.report to report the metric to optimize
+    tune.report(metric=metric)
+
+
+config_search_space = {
+    "x": tune.lograndint(lower=1, upper=100000),
+    "y": tune.randint(lower=1, upper=100000),
+}
+
+low_cost_partial_config = {"x": 1}
+
+
+def setup_searcher(searcher_name):
+    from flaml.searcher.blendsearch import BlendSearch, CFO, RandomSearch
+
+    if "cfo" in searcher_name:
+        searcher = CFO(
+            space=config_search_space, low_cost_partial_config=low_cost_partial_config
+        )
+    elif searcher_name == "bs":
+        searcher = BlendSearch(
+            metric="metric",
+            mode="min",
+            space=config_search_space,
+            low_cost_partial_config=low_cost_partial_config,
+        )
+    elif searcher_name == "random":
+        searcher = RandomSearch(space=config_search_space)
+    else:
+        return None
+    return searcher
+
+
+def _test_flaml_raytune_consistency(
+    num_samples=-1, max_concurrent_trials=1, searcher_name="cfo"
+):
+    try:
+        from ray import tune as raytune
+    except ImportError:
+        print(
+            "skip _test_flaml_raytune_consistency because ray tune cannot be imported."
+        )
+        return
+    np.random.seed(100)
+    searcher = setup_searcher(searcher_name)
+    analysis = tune.run(
+        evaluate_config,  # the function to evaluate a config
+        config=config_search_space,  # the search space
+        low_cost_partial_config=low_cost_partial_config,  # a initial (partial) config with low cost
+        metric="metric",  # the name of the metric used for optimization
+        mode="min",  # the optimization mode, 'min' or 'max'
+        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
+        time_budget_s=None,  # the time budget in seconds
+        local_dir="logs/",  # the local directory to store logs
+        search_alg=searcher,
+        # verbose=0,          # verbosity
+        # use_ray=True, # uncomment when performing parallel tuning using ray
+    )
+    flaml_best_config = analysis.best_config
+    flaml_config_in_results = [v["config"] for v in analysis.results.values()]
+    print(analysis.best_trial.last_result)  # the best trial's result
+    print("best flaml", searcher_name, flaml_best_config)  # the best config
+    print("flaml config in results", searcher_name, flaml_config_in_results)
+
+    np.random.seed(100)
+    searcher = setup_searcher(searcher_name)
+    from ray.tune.suggest import ConcurrencyLimiter
+
+    search_alg = ConcurrencyLimiter(searcher, max_concurrent_trials)
+    analysis = raytune.run(
+        evaluate_config,  # the function to evaluate a config
+        config=config_search_space,
+        metric="metric",  # the name of the metric used for optimization
+        mode="min",  # the optimization mode, 'min' or 'max'
+        num_samples=num_samples,  # the maximal number of configs to try, -1 means infinite
+        local_dir="logs/",  # the local directory to store logs
+        # max_concurrent_trials=max_concurrent_trials,
+        # resources_per_trial={"cpu": max_concurrent_trials, "gpu": 0},
+        search_alg=search_alg,
+    )
+    ray_best_config = analysis.best_config
+    ray_config_in_results = [v["config"] for v in analysis.results.values()]
+    print(analysis.best_trial.last_result)  # the best trial's result
+    print("ray best", searcher_name, analysis.best_config)  # the best config
+    print("ray config in results", searcher_name, ray_config_in_results)
+    assert ray_best_config == flaml_best_config, "best config should be the same"
+    assert (
+        flaml_config_in_results == ray_config_in_results
+    ), "results from raytune and flaml should be the same"
+
+
+def test_consistency():
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="random"
+    )
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="cfo"
+    )
+    _test_flaml_raytune_consistency(
+        num_samples=5, max_concurrent_trials=1, searcher_name="bs"
+    )
+
+
+if __name__ == "__main__":
+    # unittest.main()
+    test_consistency()