From b7a91e0385c83c9f59a75dba460776a795047826 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Tue, 6 Apr 2021 11:37:52 -0700 Subject: [PATCH] V0.3.0 (#55) * flaml v0.3 * low cost partial config --- .github/workflows/python-package.yml | 2 +- README.md | 2 +- flaml/automl.py | 8 ++- flaml/model.py | 7 ++ flaml/searcher/blendsearch.py | 24 +++---- flaml/searcher/flow2.py | 6 +- flaml/tune/README.md | 8 +-- flaml/tune/tune.py | 30 +++++---- flaml/version.py | 2 +- notebook/flaml_finetune_transformer.ipynb | 2 +- setup.py | 2 +- test/hf/test_deberta.py | 8 +-- test/hf/test_distillbert.py | 8 +-- test/hf/test_electra.py | 8 +-- test/hf/test_roberta.py | 8 +-- test/nni/flaml_nni_wrap.py | 4 +- test/test_pytorch_cifar10.py | 8 +-- test/test_tune.py | 14 ++-- test/test_xgboost2d_sample_size.py | 78 +++++++++++++++++++++++ 19 files changed, 161 insertions(+), 68 deletions(-) create mode 100644 test/test_xgboost2d_sample_size.py diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 79b56f42e..22ec3d7eb 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -38,7 +38,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -e .[test] - - name: If linux or max, install ray + - name: If linux or mac, install ray if: matrix.os == 'macOS-latest' || matrix.os == 'ubuntu-latest' run: | pip install -e .[ray] diff --git a/README.md b/README.md index f2aebd0ae..8f3c58b5d 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ automl.fit(X_train, y_train, task="classification", estimator_list=["lgbm"]) * You can also run generic ray-tune style hyperparameter tuning for a custom function. ```python from flaml import tune -tune.run(train_with_config, config={…}, init_config={…}, time_budget_s=3600) +tune.run(train_with_config, config={…}, low_cost_partial_config={…}, time_budget_s=3600) ``` ## Installation diff --git a/flaml/automl.py b/flaml/automl.py index 2e24c57dd..db9a76f26 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -51,6 +51,7 @@ class SearchState: self.init_eci = learner_class.cost_relative2lgbm() self._search_space_domain = {} self.init_config = {} + self.low_cost_partial_config = {} self.cat_hp_cost = {} self.data_size = data_size search_space = learner_class.search_space( @@ -60,6 +61,9 @@ class SearchState: self._search_space_domain[name] = space['domain'] if 'init_value' in space: self.init_config[name] = space['init_value'] + if 'low_cost_init_value' in space: + self.low_cost_partial_config[name] = space[ + 'low_cost_init_value'] if 'cat_hp_cost' in space: self.cat_hp_cost[name] = space['cat_hp_cost'] self._hp_names = list(self._search_space_domain.keys()) @@ -1017,12 +1021,15 @@ class AutoML: keys[1]: x2, }) self._max_iter_per_learner = len(points_to_evaluate) + low_cost_partial_config = None else: points_to_evaluate=[search_state.init_config] + low_cost_partial_config = search_state.low_cost_partial_config if self._hpo_method in ('bs', 'cfo', 'grid'): algo = SearchAlgo(metric='val_loss', mode='min', space=search_space, points_to_evaluate=points_to_evaluate, + low_cost_partial_config=low_cost_partial_config, cat_hp_cost=search_state.cat_hp_cost, prune_attr=prune_attr, min_resource=min_resource, @@ -1048,7 +1055,6 @@ class AutoML: start_run_time = time.time() # warnings.filterwarnings("ignore") analysis = tune.run(search_state.training_function, - init_config=None, search_alg=search_state.search_alg, time_budget_s=budget_left, verbose=max(self.verbose-1,0), #local_dir='logs/tune_results', diff --git a/flaml/model.py b/flaml/model.py index 70b0c816e..4a15c1fa1 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -182,10 +182,12 @@ class LGBMEstimator(BaseEstimator): 'n_estimators': { 'domain': tune.qloguniform(lower=4, upper=upper, q=1), 'init_value': 4, + 'low_cost_init_value': 4, }, 'max_leaves': { 'domain': tune.qloguniform(lower=4, upper=upper, q=1), 'init_value': 4, + 'low_cost_init_value': 4, }, 'min_data_in_leaf': { 'domain': tune.qloguniform(lower=2, upper=2**7, q=1), @@ -304,10 +306,12 @@ class XGBoostEstimator(SKLearnEstimator): 'n_estimators': { 'domain': tune.qloguniform(lower=4, upper=upper, q=1), 'init_value': 4, + 'low_cost_init_value': 4, }, 'max_leaves': { 'domain': tune.qloguniform(lower=4, upper=upper, q=1), 'init_value': 4, + 'low_cost_init_value': 4, }, 'min_child_weight': { 'domain': tune.loguniform(lower=0.001, upper=128), @@ -463,6 +467,7 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator): 'n_estimators': { 'domain': tune.qloguniform(lower=4, upper=upper, q=1), 'init_value': 4, + 'low_cost_init_value': 4, }, 'max_features': { 'domain': tune.loguniform(lower=0.1, upper=1.0), @@ -596,6 +601,7 @@ class CatBoostEstimator(BaseEstimator): 'early_stopping_rounds': { 'domain': tune.qloguniform(lower=10, upper=upper, q=1), 'init_value': 10, + 'low_cost_init_value': 10, }, 'learning_rate': { 'domain': tune.loguniform(lower=.005, upper=.2), @@ -707,6 +713,7 @@ class KNeighborsEstimator(BaseEstimator): 'n_neighbors': { 'domain': tune.qloguniform(lower=1, upper=upper, q=1), 'init_value': 5, + 'low_cost_init_value': 1, }, } diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py index 1afe54829..2ace727bd 100644 --- a/flaml/searcher/blendsearch.py +++ b/flaml/searcher/blendsearch.py @@ -32,7 +32,8 @@ class BlendSearch(Searcher): metric: Optional[str] = None, mode: Optional[str] = None, space: Optional[dict] = None, - points_to_evaluate: Optional[List[Dict]] = None, + points_to_evaluate: Optional[List[dict]] = None, + low_cost_partial_config: Optional[dict] = None, cat_hp_cost: Optional[dict] = None, prune_attr: Optional[str] = None, min_resource: Optional[float] = None, @@ -50,14 +51,14 @@ class BlendSearch(Searcher): mode: A string in ['min', 'max'] to specify the objective as space: A dictionary to specify the search space. points_to_evaluate: Initial parameter suggestions to be run first. - The first element needs to be a dictionary from a subset of - controlled dimensions to the initial low-cost values. - e.g., - - .. code-block:: python - - [{'epochs': 1}] + low_cost_partial_config: A dictionary from a subset of + controlled dimensions to the initial low-cost values. + e.g., + .. code-block:: python + + {'n_estimators': 4, 'max_leaves': 4} + cat_hp_cost: A dictionary from a subset of categorical dimensions to the relative cost of each choice. e.g., @@ -92,9 +93,8 @@ class BlendSearch(Searcher): seed: An integer of the random seed. ''' self._metric, self._mode = metric, mode - if points_to_evaluate: init_config = points_to_evaluate[0] - else: init_config = {} - self._points_to_evaluate = points_to_evaluate + init_config = low_cost_partial_config or {} + self._points_to_evaluate = points_to_evaluate or [] if global_search_alg is not None: self._gs = global_search_alg elif getattr(self, '__name__', None) != 'CFO': @@ -301,10 +301,10 @@ class BlendSearch(Searcher): # logger.debug(f"random config {config}") skip = self._should_skip(choice, trial_id, config) if skip: return None - # if not choice: print(config) if choice or self._valid(config): # LS or valid or no backup choice self._trial_proposed_by[trial_id] = choice + if not choice: print(config) else: # invalid config proposed by GS # if not self._use_rs: # self._search_thread_pool[choice].on_trial_complete( diff --git a/flaml/searcher/flow2.py b/flaml/searcher/flow2.py index b3c6f60a3..e20274555 100644 --- a/flaml/searcher/flow2.py +++ b/flaml/searcher/flow2.py @@ -42,8 +42,10 @@ class FLOW2(Searcher): '''Constructor Args: - init_config: a dictionary from a subset of controlled dimensions - to the initial low-cost values. e.g. {'epochs':1} + init_config: a dictionary of a partial or full initial config, + e.g. from a subset of controlled dimensions + to the initial low-cost values. + e.g. {'epochs':1} metric: A string of the metric name to optimize for. minimization or maximization. mode: A string in ['min', 'max'] to specify the objective as diff --git a/flaml/tune/README.md b/flaml/tune/README.md index c2b3fced3..759c42603 100644 --- a/flaml/tune/README.md +++ b/flaml/tune/README.md @@ -27,7 +27,7 @@ analysis = tune.run( 'x': tune.qloguniform(lower=1, upper=100000, q=1), 'y': tune.randint(lower=1, upper=100000) }, # the search space - init_config={'x':1}, # a initial (partial) config with low cost + low_cost_partial_config={'x':1}, # a initial (partial) config with low cost metric='metric', # the name of the metric used for optimization mode='min', # the optimization mode, 'min' or 'max' num_samples=-1, # the maximal number of configs to try, -1 means infinite @@ -71,7 +71,7 @@ analysis = raytune.run( num_samples=-1, # the maximal number of configs to try, -1 means infinite time_budget_s=60, # the time budget in seconds local_dir='logs/', # the local directory to store logs - search_alg=CFO(points_to_evaluate=[{'x':1}]) # or BlendSearch + search_alg=CFO(low_cost_partial_config=[{'x':1}]) # or BlendSearch ) print(analysis.best_trial.last_result) # the best trial's result @@ -124,7 +124,7 @@ Example: ```python from flaml import CFO tune.run(... - search_alg = CFO(points_to_evaluate=[init_config]), + search_alg = CFO(low_cost_partial_config=low_cost_partial_config), ) ``` @@ -157,7 +157,7 @@ Example: # require: pip install flaml[blendsearch] from flaml import BlendSearch tune.run(... - search_alg = BlendSearch(points_to_evaluate=[init_config]), + search_alg = BlendSearch(low_cost_partial_config=low_cost_partial_config), ) ``` diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index ae00d741c..c77c41efa 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -3,7 +3,7 @@ * Licensed under the MIT License. See LICENSE file in the * project root for license information. ''' -from typing import Optional, Union +from typing import Optional, Union, List import datetime, time try: from ray.tune.analysis import ExperimentAnalysis as EA @@ -53,7 +53,6 @@ def report(_metric=None, **kwargs): analysis = tune.run( compute_with_config, - init_config={}, config={ 'x': tune.qloguniform(lower=1, upper=1000000, q=1), 'y': tune.randint(lower=1, upper=1000000) @@ -100,8 +99,9 @@ def report(_metric=None, **kwargs): def run(training_function, - init_config: dict, config: Optional[dict] = None, + points_to_evaluate: Optional[List[dict]] = None, + low_cost_partial_config: Optional[dict] = None, cat_hp_cost: Optional[dict] = None, metric: Optional[str] = None, mode: Optional[str] = None, @@ -136,7 +136,6 @@ def run(training_function, analysis = tune.run( compute_with_config, - init_config={}, config={ 'x': tune.qloguniform(lower=1, upper=1000000, q=1), 'y': tune.randint(lower=1, upper=1000000) @@ -148,15 +147,17 @@ def run(training_function, Args: training_function: A user-defined training function. - init_config: A dictionary from a subset of controlled dimensions - to the initial low-cost values. e.g., + config: A dictionary to specify the search space. + points_to_evaluate: A list of initial hyperparameter + configurations to run first. + low_cost_partial_config: A dictionary from a subset of + controlled dimensions to the initial low-cost values. + e.g., .. code-block:: python - {'epochs': 1} - - If no such dimension, pass an empty dict {}. - config: A dictionary to specify the search space. + {'n_estimators': 4, 'max_leaves': 4} + cat_hp_cost: A dictionary from a subset of categorical dimensions to the relative cost of each choice. e.g., @@ -195,9 +196,9 @@ def run(training_function, from flaml import BlendSearch algo = BlendSearch(metric='val_loss', mode='min', space=search_space, - points_to_evaluate=points_to_evaluate) + low_cost_partial_config=low_cost_partial_config) for i in range(10): - analysis = tune.run(compute_with_config, init_config=None, + analysis = tune.run(compute_with_config, search_alg=algo, use_ray=False) print(analysis.trials[-1].last_result) @@ -242,8 +243,9 @@ def run(training_function, if search_alg is None: from ..searcher.blendsearch import BlendSearch - search_alg = BlendSearch(points_to_evaluate=[init_config], - metric=metric, mode=mode, + search_alg = BlendSearch(metric=metric, mode=mode, + points_to_evaluate=points_to_evaluate, + low_cost_partial_config=low_cost_partial_config, cat_hp_cost=cat_hp_cost, space=config, prune_attr=prune_attr, min_resource=min_resource, diff --git a/flaml/version.py b/flaml/version.py index 6232f7ab1..493f7415d 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "0.2.10" +__version__ = "0.3.0" diff --git a/notebook/flaml_finetune_transformer.ipynb b/notebook/flaml_finetune_transformer.ipynb index 830bc8401..9d4b532af 100644 --- a/notebook/flaml_finetune_transformer.ipynb +++ b/notebook/flaml_finetune_transformer.ipynb @@ -1139,7 +1139,7 @@ "analysis = flaml.tune.run(\n", " train_distilbert,\n", " config=search_space,\n", - " init_config={\n", + " low_cost_partial_config={\n", " \"num_train_epochs\": 1,\n", " },\n", " metric=HP_METRIC,\n", diff --git a/setup.py b/setup.py index 438580f34..5fb3dcb0e 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ setuptools.setup( ], "nni": [ "nni", - ] + ], }, classifiers=[ "Programming Language :: Python :: 3", diff --git a/test/hf/test_deberta.py b/test/hf/test_deberta.py index 28ae37a9a..f09c2aee6 100644 --- a/test/hf/test_deberta.py +++ b/test/hf/test_deberta.py @@ -148,16 +148,16 @@ def _test_deberta(method='BlendSearch'): algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'BlendSearch' == method: from flaml import BlendSearch - algo = BlendSearch(points_to_evaluate=[{ + algo = BlendSearch(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() diff --git a/test/hf/test_distillbert.py b/test/hf/test_distillbert.py index 75b52d79f..82ca17263 100644 --- a/test/hf/test_distillbert.py +++ b/test/hf/test_distillbert.py @@ -119,14 +119,14 @@ def _test_distillbert(method='BlendSearch'): algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "num_train_epochs": 1, - }]) + }) elif 'BlendSearch' == method: from flaml import BlendSearch - algo = BlendSearch(points_to_evaluate=[{ + algo = BlendSearch(low_cost_partial_config={ "num_train_epochs": 1, - }]) + }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() diff --git a/test/hf/test_electra.py b/test/hf/test_electra.py index e0fd8e0da..8258adba3 100644 --- a/test/hf/test_electra.py +++ b/test/hf/test_electra.py @@ -148,16 +148,16 @@ def _test_electra(method='BlendSearch'): algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'BlendSearch' == method: from flaml import BlendSearch - algo = BlendSearch(points_to_evaluate=[{ + algo = BlendSearch(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() diff --git a/test/hf/test_roberta.py b/test/hf/test_roberta.py index 7cba82957..b04d58cfe 100644 --- a/test/hf/test_roberta.py +++ b/test/hf/test_roberta.py @@ -148,16 +148,16 @@ def _test_roberta(method='BlendSearch'): algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'BlendSearch' == method: from flaml import BlendSearch - algo = BlendSearch(points_to_evaluate=[{ + algo = BlendSearch(low_cost_partial_config={ "num_train_epochs": 1, "per_device_train_batch_size": 128, - }]) + }) elif 'Dragonfly' == method: from ray.tune.suggest.dragonfly import DragonflySearch algo = DragonflySearch() diff --git a/test/nni/flaml_nni_wrap.py b/test/nni/flaml_nni_wrap.py index 67a675847..fa2eaecb3 100644 --- a/test/nni/flaml_nni_wrap.py +++ b/test/nni/flaml_nni_wrap.py @@ -3,5 +3,5 @@ from flaml.searcher.blendsearch import BlendSearchTuner as BST class BlendSearchTuner(BST): # for best performance pass low cost initial parameters here - def __init__(self, points_to_evaluate=[{"hidden_size":128}]): - super.__init__(self, points_to_evaluate=points_to_evaluate) + def __init__(self, low_cost_partial_config={"hidden_size":128}): + super.__init__(self, low_cost_partial_config=low_cost_partial_config) diff --git a/test/test_pytorch_cifar10.py b/test/test_pytorch_cifar10.py index 7bb0e625f..2b38060b7 100644 --- a/test/test_pytorch_cifar10.py +++ b/test/test_pytorch_cifar10.py @@ -230,7 +230,8 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100, if method == 'BlendSearch': result = tune.run( ray.tune.with_parameters(train_cifar, data_dir=data_dir), - init_config={ + config=config, + low_cost_partial_config={ "l1": 2, "l2": 2, "num_epochs": 1, @@ -242,7 +243,6 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100, min_resource=1, report_intermediate_result=True, resources_per_trial={"cpu": 2, "gpu": gpus_per_trial}, - config=config, local_dir='logs/', num_samples=num_samples, time_budget_s=time_budget_s, @@ -260,12 +260,12 @@ def cifar10_main(method='BlendSearch', num_samples=10, max_num_epochs=100, algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "l1": 2, "l2": 2, "num_epochs": 1, "batch_size": 4, - }]) + }) elif 'Nevergrad' == method: from ray.tune.suggest.nevergrad import NevergradSearch import nevergrad as ng diff --git a/test/test_tune.py b/test/test_tune.py index e959daa01..5de27caa4 100644 --- a/test/test_tune.py +++ b/test/test_tune.py @@ -64,9 +64,9 @@ def _test_xgboost(method='BlendSearch'): if method == 'BlendSearch': analysis = tune.run( train_breast_cancer, - init_config={ + config=search_space, + low_cost_partial_config={ "max_depth": 1, - "min_child_weight": 3, }, cat_hp_cost={ "min_child_weight": [6, 3, 2], @@ -78,7 +78,6 @@ def _test_xgboost(method='BlendSearch'): report_intermediate_result=True, # You can add "gpu": 0.1 to allocate GPUs resources_per_trial={"cpu": 1}, - config=search_space, local_dir='logs/', num_samples=num_samples*n_cpu, time_budget_s=time_budget_s, @@ -96,10 +95,9 @@ def _test_xgboost(method='BlendSearch'): algo = OptunaSearch() elif 'CFO' == method: from flaml import CFO - algo = CFO(points_to_evaluate=[{ + algo = CFO(low_cost_partial_config={ "max_depth": 1, - "min_child_weight": 3, - }], cat_hp_cost={ + }, cat_hp_cost={ "min_child_weight": [6, 3, 2], }) elif 'Dragonfly' == method: @@ -169,12 +167,12 @@ def test_nested(): analysis = tune.run( simple_func, - init_config={ + config=search_space, + low_cost_partial_config={ "cost_related": {"a": 1,} }, metric="metric", mode="min", - config=search_space, local_dir='logs/', num_samples=-1, time_budget_s=1) diff --git a/test/test_xgboost2d_sample_size.py b/test/test_xgboost2d_sample_size.py new file mode 100644 index 000000000..266098b1b --- /dev/null +++ b/test/test_xgboost2d_sample_size.py @@ -0,0 +1,78 @@ +import unittest + +from sklearn.datasets import fetch_openml +from sklearn.model_selection import train_test_split +import numpy as np +from flaml.automl import AutoML +from flaml.model import XGBoostSklearnEstimator +from flaml import tune + + +dataset = "credit-g" + + +class XGBoost2D(XGBoostSklearnEstimator): + + @classmethod + def search_space(cls, data_size, task): + upper = min(32768,int(data_size)) + return { + 'n_estimators': { + 'domain': tune.qloguniform(lower=4, upper=upper, q=1), + 'init_value': 4, + }, + 'max_leaves': { + 'domain': tune.qloguniform(lower=4, upper=upper, q=1), + 'init_value': 4, + }, + } + + +def _test_simple(method=None, size_ratio=1.0): + automl = AutoML() + automl.add_learner(learner_name = 'XGBoost2D', + learner_class = XGBoost2D) + + + try: + X, y = fetch_openml(name=dataset, return_X_y=True) + except: + from sklearn.datasets import load_wine + X, y = load_wine(return_X_y=True) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, + random_state=42) + + final_size = int(len(y_train)*size_ratio) + X_train = X_train[:final_size] + y_train = y_train[:final_size] + automl_settings = { + "estimator_list": ['XGBoost2D'], + # "metric": 'accuracy', + "task": 'classification', + "log_file_name": f"test/xgboost2d_{dataset}_{method}_{final_size}.log", + # "model_history": True, + # "log_training_metric": True, + # "split_type": split_type, + "n_jobs": 1, + "hpo_method": method, + "log_type": "all", + "time_budget": 3600, + } + automl.fit(X_train=X_train, y_train=y_train, **automl_settings) + + +def _test_grid_1(): + _test_simple(method="grid", size_ratio=1.0/3.0) + +def _test_grid_2(): + _test_simple(method="grid", size_ratio=2.0/3.0) + +def _test_grid_4(): + _test_simple(method="grid", size_ratio=0.5) + +def _test_grid_3(): + _test_simple(method="grid", size_ratio=1.0) + + +if __name__ == "__main__": + unittest.main()