diff --git a/flaml/automl.py b/flaml/automl.py index a05b5ba3a..e50c796e3 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -2404,37 +2404,23 @@ class AutoML(BaseEstimator): [TrainingArgumentsForAuto](nlp/huggingface/training_args). e.g., skip_transform: boolean, default=False | Whether to pre-process data prior to modeling. - lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective - optimization with lexicographic preferences. When lexico_objectives it not None, flaml's AutoML uses "cfo" - as the `hpo_method`, which makes the input (if provided) `hpo_method' invalid. This dictionary shall - contain the following fields of key-value pairs: - - "metrics": a list of optimization objectives with the orders reflecting the priorities/preferences of the - objectives. - - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the - objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the - metric names (provided in "metric"), and the values are the numerical target values. - - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the - metric names (provided in "metrics"), and the values are the numerical tolerances values. - E.g., - ```python - lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], - "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0}} - ``` - It specifics the information used for multiple objectives optimization with lexicographic preference. - e.g., - ```python - lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], - "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}} - ``` - Either "metrics" or "modes" is a list of str. - It represents the optimization objectives, the objective as minimization or maximization respectively. - Both "metrics" and "modes" are ordered by priorities from high to low. - "tolerances" is a dictionary to specify the optimality tolerance of each objective. - "targets" is a dictionary to specify the optimization targets for each objective. - If providing lexico_objectives, the arguments metric, hpo_method will be invalid. - + optimization with lexicographic preferences. When lexico_objectives it not None, the argument metric will be invaild, + and flaml's AutoML uses "cfo" as the `hpo_method`, which makes the input (if provided) `hpo_method' invalid. + This dictionary shall contain the following fields of key-value pairs: + - "metrics": a list of optimization objectives with the orders reflecting the priorities/preferences of the + objectives. + - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the + objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives + - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the + metric names (provided in "metric"), and the values are the numerical target values. + - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the + metric names (provided in "metrics"), and the values are the numerical tolerances values. + E.g., + ```python + lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0}} + ``` fit_kwargs_by_estimator: dict, default=None | The user specified keywords arguments, grouped by estimator name. For TransformersEstimator, available fit_kwargs can be found from @@ -3065,7 +3051,7 @@ class AutoML(BaseEstimator): search_alg = ConcurrencyLimiter(search_alg, self._n_concurrent_trials) resources_per_trial = self._state.resources_per_trial - analysis = ray.tune.run( + analysis = ray.tune.run( self.trainable, search_alg=search_alg, config=space, diff --git a/flaml/tune/result.py b/flaml/tune/result.py index e9f1edc94..461c991f1 100644 --- a/flaml/tune/result.py +++ b/flaml/tune/result.py @@ -43,9 +43,6 @@ PID = "pid" # (Optional) Default (anonymous) metric when using tune.report(x) DEFAULT_METRIC = "_metric" -# (Optional) Default (anonymous) mode when using tune.report(x) -DEFAULT_MODE = "min" - # (Optional) Mean reward for current training iteration EPISODE_REWARD_MEAN = "episode_reward_mean" diff --git a/flaml/tune/searcher/blendsearch.py b/flaml/tune/searcher/blendsearch.py index 244ac0a8e..3990a317f 100644 --- a/flaml/tune/searcher/blendsearch.py +++ b/flaml/tune/searcher/blendsearch.py @@ -113,19 +113,23 @@ class BlendSearch(Searcher): Default is "auto", which means that we will automatically chose the cost attribute to use (depending on the nature of the resource budget). When cost_attr is set to None, cost differences between different trials will be omitted in our search algorithm. - lexico_objectives: A dictionary with four elements. - It specifics the information used for multiple objectives optimization with lexicographic preference. - e.g., - ```python - lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], - "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}} - ``` - Either "metrics" or "modes" is a list of str. - It represents the optimization objectives, the objective as minimization or maximization respectively. - Both "metrics" and "modes" are ordered by priorities from high to low. - "tolerances" is a dictionary to specify the optimality tolerance of each objective. - "targets" is a dictionary to specify the optimization targets for each objective. - If providing lexico_objectives, the arguments metric, mode will be invalid. + lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective + optimization with lexicographic preferences. This is only supported in CFO. + When lexico_objectives it not None, the arguments metric, mode will be invalid. + This dictionary shall contain the following fields of key-value pairs: + - "metrics": a list of optimization objectives with the orders reflecting the priorities/preferences of the + objectives. + - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the + objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives + - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the + metric names (provided in "metric"), and the values are the numerical target values. + - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the + metric names (provided in "metrics"), and the values are the numerical tolerances values. + E.g., + ```python + lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0}} + ``` experimental: A bool of whether to use experimental features. """ self._eps = SEARCH_THREAD_EPS diff --git a/flaml/tune/searcher/flow2.py b/flaml/tune/searcher/flow2.py index 5134f3b49..e8ef92da6 100644 --- a/flaml/tune/searcher/flow2.py +++ b/flaml/tune/searcher/flow2.py @@ -70,19 +70,22 @@ class FLOW2(Searcher): resource_multiple_factor: A float of the multiplicative factor used for increasing resource. cost_attr: A string of the attribute used for cost. - lexico_objectives: A dictionary with four elements. - It specifics the information used for multiple objectives optimization with lexicographic preference. - e.g., - ```python - lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], - "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}} - ``` - Either "metrics" or "modes" is a list of str. - It represents the optimization objectives, the objective as minimization or maximization respectively. - Both "metrics" and "modes" are ordered by priorities from high to low. - "tolerances" is a dictionary to specify the optimality tolerance of each objective. - "targets" is a dictionary to specify the optimization targets for each objective. - If providing lexico_objectives, the arguments metric, mode will be invalid. + lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective + optimization with lexicographic preferences. When lexico_objectives it not None, the arguments metric, + mode will be invalid. This dictionary shall contain the following fields of key-value pairs: + - "metrics": a list of optimization objectives with the orders reflecting the priorities/preferences of the + objectives. + - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the + objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives + - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the + metric names (provided in "metric"), and the values are the numerical target values. + - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the + metric names (provided in "metrics"), and the values are the numerical tolerances values. + E.g., + ```python + lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0}} + ``` seed: An integer of the random seed. """ if mode: @@ -106,6 +109,14 @@ class FLOW2(Searcher): self.resource_attr = resource_attr self.min_resource = min_resource self.lexico_objectives = lexico_objectives + if self.lexico_objectives is not None: + if "modes" not in self.lexico_objectives.keys(): + self.lexico_objectives["modes"] = ["min"]*len(self.lexico_objectives["metrics"]) + for t_metric, t_mode in zip(self.lexico_objectives["metrics"], self.lexico_objectives["modes"]): + if t_metric not in self.lexico_objectives["tolerances"].keys(): + self.lexico_objectives["tolerances"][t_metric] = 0 + if t_metric not in self.lexico_objectives["targets"].keys(): + self.lexico_objectives["targets"][t_metric] = -float("inf") if t_mode == "min" else float("inf") self.resource_multiple_factor = ( resource_multiple_factor or SAMPLE_MULTIPLY_FACTOR ) @@ -365,11 +376,10 @@ class FLOW2(Searcher): for k in self.lexico_objectives["metrics"]: self._histories[k].append(result[k]) update_fbest() - for k_metric in self.lexico_objectives["metrics"]: - k_T = self.lexico_objectives["tolerances"][k_metric] - k_c = self.lexico_objectives["targets"][k_metric] - if (result[k_metric] < max([self._f_best[k_metric] + k_T, k_c])) and ( - self.best_obj[k_metric] < max([self._f_best[k_metric] + k_T, k_c]) + for k_metric, k_mode in zip(self.lexico_objectives["metrics"],self.lexico_objectives["modes"]): + k_c = self.lexico_objectives["targets"][k_metric] if k_mode == "min" else -1*self.lexico_objectives["targets"][k_metric] + if (result[k_metric] < max([self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric], k_c])) and ( + self.best_obj[k_metric] < max([self._f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric], k_c]) ): continue elif result[k_metric] < self.best_obj[k_metric]: diff --git a/flaml/tune/searcher/search_thread.py b/flaml/tune/searcher/search_thread.py index 8f0c160f7..6550aa0ba 100644 --- a/flaml/tune/searcher/search_thread.py +++ b/flaml/tune/searcher/search_thread.py @@ -151,8 +151,7 @@ class SearchThread: self.cost_best = self.cost_last self.best_result = result if ( - not hasattr(self._search_alg, "lexico_objectives") - or self._search_alg.lexico_objectives is None + getattr(self._search_alg, "lexico_objectives", None) is None ): # TODO: Improve this behavior. When lexico_objectives is provided to CFO, # related variables are not callable. diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index fc06d2f3e..0bea6f6bc 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -21,7 +21,7 @@ except (ImportError, AssertionError): from .analysis import ExperimentAnalysis as EA from .trial import Trial -from .result import DEFAULT_METRIC, DEFAULT_MODE +from .result import DEFAULT_METRIC import logging logger = logging.getLogger(__name__) @@ -40,6 +40,7 @@ class ExperimentAnalysis(EA): def __init__(self, trials, metric, mode, lexico_objectives=None): try: super().__init__(self, None, trials, metric, mode) + self.lexico_objectives = lexico_objectives except (TypeError, ValueError): self.trials = trials self.default_metric = metric or DEFAULT_METRIC @@ -77,8 +78,9 @@ class ExperimentAnalysis(EA): ) obj_initial = self.lexico_objectives["metrics"][0] feasible_index = [*range(len(histories[obj_initial]))] - for k_metric in self.lexico_objectives["metrics"]: + for k_metric, k_mode in zip(self.lexico_objectives["metrics"],self.lexico_objectives["modes"]): k_values = np.array(histories[k_metric]) + k_c = self.lexico_objectives["targets"][k_metric] * -1 if k_mode == "max" else self.lexico_objectives["targets"][k_metric] f_best[k_metric] = np.min(k_values.take(feasible_index)) feasible_index_prior = np.where( k_values @@ -86,7 +88,7 @@ class ExperimentAnalysis(EA): [ f_best[k_metric] + self.lexico_objectives["tolerances"][k_metric], - self.lexico_objectives["targets"][k_metric], + k_c, ] ) )[0].tolist() @@ -373,20 +375,24 @@ def run( max_failure: int | the maximal consecutive number of failures to sample a trial before the tuning is terminated. use_ray: A boolean of whether to use ray as the backend. - lexico_objectives: A dictionary with four elements. - It specifics the information used for multiple objectives optimization with lexicographic preference. - e.g., - ```python - lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], - "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0,"pred_time":0.0}} - ``` - Either "metrics" or "modes" is a list of str. - It represents the optimization objectives, the objective as minimization or maximization respectively. - Both "metrics" and "modes" are ordered by priorities from high to low. - "tolerances" is a dictionary to specify the optimality tolerance of each objective. - "targets" is a dictionary to specify the optimization targets for each objective. - If providing lexico_objectives, the arguments metric, mode, and search_alg will be invalid. - + lexico_objectives: dict, default=None | It specifics information needed to perform multi-objective + optimization with lexicographic preferences. When lexico_objectives it not None, the arguments metric, + mode, will be invalid, and flaml's tune uses CFO + as the `search_alg`, which makes the input (if provided) `search_alg' invalid. + This dictionary shall contain the following fields of key-value pairs: + - "metrics": a list of optimization objectives with the orders reflecting the priorities/preferences of the + objectives. + - "modes" (optional): a list of optimization modes (each mode either "min" or "max") corresponding to the + objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives. + - "targets" (optional): a dictionary to specify the optimization targets on the objectives. The keys are the + metric names (provided in "metric"), and the values are the numerical target values. + - "tolerances"(optional): a dictionary to specify the optimality tolerances on objectives. The keys are the + metric names (provided in "metrics"), and the values are the numerical tolerances values. + E.g., + ```python + lexico_objectives = {"metrics":["error_rate","pred_time"], "modes":["min","min"], + "tolerances":{"error_rate":0.01,"pred_time":0.0}, "targets":{"error_rate":0.0}} + ``` log_file_name: A string of the log file name. Default to None. When set to None: if local_dir is not given, no log file is created; @@ -443,7 +449,8 @@ def run( logger.setLevel(logging.CRITICAL) from .searcher.blendsearch import BlendSearch, CFO - + if lexico_objectives != None: + search_alg = None if search_alg is None: flaml_scheduler_resource_attr = ( flaml_scheduler_min_resource @@ -465,6 +472,9 @@ def run( SearchAlgorithm = BlendSearch else: SearchAlgorithm = CFO + logger.info( + "Using search algorithm {}.".format(SearchAlgorithm.__class__.__name__) + ) except ImportError: SearchAlgorithm = CFO logger.warning( diff --git a/test/automl/test_lexiflow.py b/test/automl/test_lexiflow.py index c6d5afb56..67052feab 100644 --- a/test/automl/test_lexiflow.py +++ b/test/automl/test_lexiflow.py @@ -24,7 +24,10 @@ def test_lexiflow(): "mem_thres": 128 * (1024**3), } automl.fit(X_train=X_train, y_train=y_train, X_val=X_test, y_val=y_test, **settings) - - + print(automl.predict(X_train)) + print(automl.model) + print(automl.config_history) + print(automl.best_iteration) + print(automl.best_estimator) if __name__ == "__main__": test_lexiflow() diff --git a/test/tune/test_lexiflow.py b/test/tune/test_lexiflow.py index 056750528..ba6526f1b 100644 --- a/test/tune/test_lexiflow.py +++ b/test/tune/test_lexiflow.py @@ -109,15 +109,35 @@ def test_lexiflow(): "n_epoch": 1, } - tune.run( + # lexico tune + analysis = tune.run( evaluate_function, num_samples=-1, - time_budget_s=100, + time_budget_s=50, config=search_space, use_ray=False, lexico_objectives=lexico_objectives, low_cost_partial_config=low_cost_partial_config, ) + print(analysis.best_trial) + print(analysis.best_config) + print(analysis.best_result) + + # Non lexico tune + analysis = tune.run( + evaluate_function, + metric = "error_rate", + mode = "min", + num_samples=-1, + time_budget_s=50, + config=search_space, + use_ray=False, + lexico_objectives=None, + low_cost_partial_config=low_cost_partial_config, + ) + print(analysis.best_trial) + print(analysis.best_config) + print(analysis.best_result) if __name__ == "__main__": diff --git a/website/docs/Examples/Tune-Lexicographic-objectives.md b/website/docs/Examples/Tune-Lexicographic-objectives.md index eef68088a..8da152863 100644 --- a/website/docs/Examples/Tune-Lexicographic-objectives.md +++ b/website/docs/Examples/Tune-Lexicographic-objectives.md @@ -3,7 +3,7 @@ ## Requirements ```python -pip install thop torchvision torch +pip install flaml thop torchvision torch ``` ## Tuning accurate and efficient neural networks with lexicographic preference diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md index dfce055d3..3f0d833c0 100644 --- a/website/docs/Use-Cases/Task-Oriented-AutoML.md +++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md @@ -424,11 +424,11 @@ automl2.fit(X_train, y_train, time_budget=7200, starting_points=automl1.best_con ### Lexicographic objectives We support automl for multiple objectives with lexicographic preference by providing argument `lexico_objectives` for `automl.fit()`. -`lexico_objectives` is a dictionary with four mandatory elements: - - `metrics`: A list of optimization objectives. The objectives are ordered by their priority from high to low. - - `modes`: A list to specify each objective as minimization or maximization in `metrics` correspondingly. - - `tolerances`: A dictionary to specify the "tolerance" for each objective. "tolerance" is the amount of performance degradation the user is willing to compromise in order to find choices with better performance on the objectives of lower priorities. - - `targets`: A dictionary to specify the "goals" for each objective. When the objective is better than or equal to the "goals", further minimization is no longer needed. +`lexico_objectives` is a dictionary with four elements: + - `metrics`: a list of optimization objectives with the orders reflecting the priorities/preferences of the objectives. + - `modes`: (optional) a list of optimization modes (each mode either "min" or "max") corresponding to the objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives. + - `tolerances`: (optional) a dictionary to specify the optimality tolerances on objectives. The keys are the metric names (provided in "metrics"), and the values are the numerical tolerances values. + - `targets`: (optional) a dictionary to specify the optimization targets on the objectives. The keys are the metric names (provided in "metric"), and the values are the numerical target values. In the following example, we want to minimize `val_loss` and `pred_time` of the model where `val_loss` has high priority. The tolerances for `val_loss` and `pre_time` are 0.02 and 0 respectively. We do not set targets for these two objectives and we set them to -inf for both objectives. @@ -440,16 +440,12 @@ lexico_objectives["tolerances"] = {"val_loss": 0.02, "pred_time":0.0} lexico_objectives["targets"] = {"val_loss": -float('inf'), "pred_time": -float('inf')} # provide the lexico_objectives to automl.fit -automl.fit(..., lexico_objectives=lexico_objectives, ...) +automl.fit(..., hpo_method = "cfo", lexico_objectives=lexico_objectives, ...) ``` *Please note that this is a new feature in version 1.1.0 and subject to change in the future version* - - - - ### Log the trials The trials are logged in a file if a `log_file_name` is passed. diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md index 5ded45b93..6a870a3a9 100644 --- a/website/docs/Use-Cases/Tune-User-Defined-Function.md +++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md @@ -517,11 +517,11 @@ analysis = tune.run( ### Lexicographic Objectives We support tuning multiple objectives with lexicographic preference by providing argument `lexico_objectives` for `tune.tun()`. -`lexico_objectives` is a dictionary with four mandatory elements: - - `metrics`: A list of optimization objectives. The objectives are ordered by their priority from high to low. - - `modes`: A list to specify each objective as minimization or maximization in `metrics` correspondingly. - - `tolerances`: A dictionary to specify the "tolerance" for each objective. "tolerance" is the amount of performance degradation the user is willing to compromise in order to find choices with better performance on the objectives of lower priorities. - - `targets`: A dictionary to specify the "goals" for each objective. When the objective is better than or equal to the "goals", further minimization is no longer needed. +`lexico_objectives` is a dictionary with four elements: + - `metrics`: a list of optimization objectives with the orders reflecting the priorities/preferences of the objectives. + - `modes`: (optional) a list of optimization modes (each mode either "min" or "max") corresponding to the objectives in the metric list. If not provided, we use "min" as the default mode for all the objectives. + - `tolerances`: (optional) a dictionary to specify the optimality tolerances on objectives. The keys are the metric names (provided in "metrics"), and the values are the numerical tolerances values. + - `targets`: (optional) a dictionary to specify the optimization targets on the objectives. The keys are the metric names (provided in "metric"), and the values are the numerical target values. In the following example, we want to minimize `val_loss` and `pred_time` of the model where `val_loss` has high priority. The tolerances for `val_loss` and `pre_time` are 0.02 and 0 respectively. We do not set targets for these two objectives and we set them to -inf for both objectives. @@ -533,12 +533,10 @@ lexico_objectives["tolerances"] = {"val_loss": 0.02, "pred_time":0.0} lexico_objectives["targets"] = {"val_loss": -float('inf'), "pred_time": -float('inf')} # provide the lexico_objectives to tune.run -tune.run(..., lexico_objectives=lexico_objectives, ...) +tune.run(..., search_alg = None, lexico_objectives=lexico_objectives, ...) ``` *Please note that this is a new feature in version 1.1.0 and subject to change in the future version* - - ## Hyperparameter Optimization Algorithm To tune the hyperparameters toward your objective, you will want to use a hyperparameter optimization algorithm which can help suggest hyperparameters with better performance (regarding your objective). `flaml` offers two HPO methods: CFO and BlendSearch. `flaml.tune` uses BlendSearch by default when the option [blendsearch] is installed.