From 9128c8811ad7bccde6cf2fb68c89ec4d9a0ec219 Mon Sep 17 00:00:00 2001 From: Chi Wang Date: Mon, 28 Mar 2022 16:57:52 -0700 Subject: [PATCH] handle failing trials (#505) * handle failing trials * clarify when to return {} * skip ensemble in accuracy check --- flaml/tune/trial_runner.py | 4 ++++ flaml/tune/tune.py | 13 ++++++++++++- test/automl/test_notebook_example.py | 3 ++- test/nlp/test_autohf.py | 1 - test/tune/test_tune.py | 14 ++++++++++++++ 5 files changed, 32 insertions(+), 3 deletions(-) diff --git a/flaml/tune/trial_runner.py b/flaml/tune/trial_runner.py index cb3a894a2..6aa2bcd5b 100644 --- a/flaml/tune/trial_runner.py +++ b/flaml/tune/trial_runner.py @@ -103,6 +103,10 @@ class BaseTrialRunner: trial.set_status(Trial.TERMINATED) elif self._scheduler_alg: self._scheduler_alg.on_trial_remove(self, trial) + if trial.status == Trial.ERROR: + self._search_alg.on_trial_complete( + trial.trial_id, trial.last_result, error=True + ) class SequentialTrialRunner(BaseTrialRunner): diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py index be33ae48e..8044f5f51 100644 --- a/flaml/tune/tune.py +++ b/flaml/tune/tune.py @@ -18,6 +18,7 @@ except (ImportError, AssertionError): ray_import = False from .analysis import ExperimentAnalysis as EA +from .trial import Trial from .result import DEFAULT_METRIC import logging @@ -154,6 +155,12 @@ def run( metric2minimize = (round(config['x'])-95000)**2 time2eval = time.time() - current_time tune.report(metric2minimize=metric2minimize, time2eval=time2eval) + # if the evaluation fails unexpectedly and the exception is caught, + # and it doesn't inform the goodness of the config, + # return {} + # if the failure indicates a config is bad, + # report a bad metric value like np.inf or -np.inf + # depending on metric mode being min or max analysis = tune.run( compute_with_config, @@ -451,7 +458,11 @@ def run( result = evaluation_function(trial_to_run.config) if result is not None: if isinstance(result, dict): - report(**result) + if result: + report(**result) + else: + # When the result returned is an empty dict, set the trial status to error + trial_to_run.set_status(Trial.ERROR) else: report(_metric=result) _runner.stop_trial(trial_to_run) diff --git a/test/automl/test_notebook_example.py b/test/automl/test_notebook_example.py index badea66d9..3b0ce4fdc 100644 --- a/test/automl/test_notebook_example.py +++ b/test/automl/test_notebook_example.py @@ -84,7 +84,8 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): print(automl.resource_attr) print(automl.max_resource) print(automl.min_resource) - automl.fit(X_train=X_train, y_train=y_train, ensemble=True, **settings) + if budget < performance_check_budget: + automl.fit(X_train=X_train, y_train=y_train, ensemble=True, **settings) def test_automl_array(): diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py index b4f76aa1e..ca0e397ad 100644 --- a/test/nlp/test_autohf.py +++ b/test/nlp/test_autohf.py @@ -1,7 +1,6 @@ import sys import pytest import pickle -import shutil import requests diff --git a/test/tune/test_tune.py b/test/tune/test_tune.py index be2c011e7..f816f6adb 100644 --- a/test/tune/test_tune.py +++ b/test/tune/test_tune.py @@ -308,6 +308,20 @@ def test_run_training_function_return_value(): mode="max", ) + # Test empty return value + def evaluate_config_empty(config): + return {} + + tune.run( + evaluate_config_empty, + config={ + "x": tune.qloguniform(lower=1, upper=100000, q=1), + "y": tune.qlograndint(lower=2, upper=100000, q=2), + }, + num_samples=10, + mode="max", + ) + def test_xgboost_bs(): _test_xgboost()