stepsize (#86)

* decrease step size in suggest * initialization of the counters * increase step size * init phase * check converge in suggest
2025-11-02 10:50:03 +00:00 · 2021-05-07 04:29:38 +00:00 · 2021-05-07 04:29:38 +00:00 · 0b23c3a028
commit 0b23c3a028
parent 363197cef8
6 changed files with 75 additions and 25 deletions
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -89,9 +89,11 @@ def sklearn_metric_loss_score(
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
    elif 'micro_f1' in metric_name:
-        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='micro')
+        score = 1 - f1_score(
+            y_true, y_predict, sample_weight=sample_weight, average='micro')
    elif 'macro_f1' in metric_name:
-        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight, average='macro')
+        score = 1 - f1_score(
+            y_true, y_predict, sample_weight=sample_weight, average='macro')
    elif 'f1' in metric_name:
        score = 1 - f1_score(y_true, y_predict, sample_weight=sample_weight)
    elif 'ap' in metric_name:
--- a/flaml/model.py
+++ b/flaml/model.py
@ -124,8 +124,7 @@ class BaseEstimator:
                class j
        '''
        if 'regression' in self._task:
-            print('Regression tasks do not support predict_prob')
-            raise ValueError
+            raise ValueError('Regression tasks do not support predict_prob')
        else:
            X_test = self._preprocess(X_test)
            return self._model.predict_proba(X_test)
--- a/flaml/searcher/blendsearch.py
+++ b/flaml/searcher/blendsearch.py
@ -255,12 +255,15 @@ class BlendSearch(Searcher):
                    break
        if self._search_thread_pool[thread_id].converged:
            todelete.add(thread_id)
-            for key in self._ls_bound_max:
-                self._ls_bound_max[key] += self._ls.STEPSIZE
-                self._ls_bound_min[key] -= self._ls.STEPSIZE
+            self._expand_admissible_region()
        for id in todelete:
            del self._search_thread_pool[id]

+    def _expand_admissible_region(self):
+        for key in self._ls_bound_max:
+            self._ls_bound_max[key] += self._ls.STEPSIZE
+            self._ls_bound_min[key] -= self._ls.STEPSIZE        
+
    def _inferior(self, id1: int, id2: int) -> bool:
        ''' whether thread id1 is inferior to id2
        '''
@ -291,6 +294,12 @@ class BlendSearch(Searcher):
                return None
            self._use_rs = False
            config = self._search_thread_pool[choice].suggest(trial_id)
+            if choice and config is None:
+                # local search thread finishes
+                if self._search_thread_pool[choice].converged:
+                    self._expand_admissible_region()
+                    del self._search_thread_pool[choice]
+                return None
            # preliminary check; not checking config validation
            skip = self._should_skip(choice, trial_id, config)
            if skip:
--- a/flaml/searcher/flow2.py
+++ b/flaml/searcher/flow2.py
@ -188,12 +188,15 @@ class FLOW2(Searcher):
            self.step = self.step_ub
        # maximal # consecutive no improvements
        self.dir = 2**(self.dim)
-        self._configs = {}  # dict from trial_id to config
+        self._configs = {}  # dict from trial_id to (config, stepsize)
        self._K = 0
-        self._iter_best_config = self.trial_count = 1
+        self._iter_best_config = self.trial_count_proposed = self.trial_count_complete = 1
+        self._num_proposedby_incumbent = 0
        self._reset_times = 0
        # record intermediate trial cost
        self._trial_cost = {}
+        self._same = False  # whether the proposedd config is the same as best_config
+        self._init_phrase = True  # initial phase to increase initial stepsize

    @property
    def step_lower_bound(self) -> float:
@ -426,20 +429,21 @@ class FLOW2(Searcher):
        '''
        # if better, move, reset num_complete and num_proposed
        # if not better and num_complete >= 2*dim, num_allowed += 2
-        self.trial_count += 1
+        self.trial_count_complete += 1
        if not error and result:
            obj = result.get(self._metric)
            if obj:
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
-                    self.best_obj, self.best_config = obj, self._configs[
-                        trial_id]
+                    self.best_obj = obj
+                    self.best_config, self.step = self._configs[trial_id]
                    self.incumbent = self.normalize(self.best_config)
                    self.cost_incumbent = result.get(self.cost_attr)
                    if self._resource:
                        self._resource = self.best_config[self.prune_attr]
                    self._num_complete4incumbent = 0
                    self._cost_complete4incumbent = 0
+                    self._num_proposedby_incumbent = 0
                    self._num_allowed4incumbent = 2 * self.dim
                    self._proposed_by.clear()
                    if self._K > 0:
@ -447,7 +451,7 @@ class FLOW2(Searcher):
                        self.step *= np.sqrt(self._K / self._oldK)
                    if self.step > self.step_ub:
                        self.step = self.step_ub
-                    self._iter_best_config = self.trial_count
+                    self._iter_best_config = self.trial_count_complete
                    return
        proposed_by = self._proposed_by.get(trial_id)
        if proposed_by == self.incumbent:
@ -463,11 +467,6 @@ class FLOW2(Searcher):
            if self._num_complete4incumbent == self.dir and (
                    not self._resource or self._resource == self.max_resource):
                # check stuck condition if using max resource
-                if self.step >= self.step_lower_bound:
-                    # decrease step size
-                    self._oldK = self._K if self._K else self._iter_best_config
-                    self._K = self.trial_count + 1
-                    self.step *= np.sqrt(self._oldK / self._K)
                self._num_complete4incumbent -= 2
                if self._num_allowed4incumbent < 2:
                    self._num_allowed4incumbent = 2
@ -482,7 +481,7 @@ class FLOW2(Searcher):
                obj *= self.metric_op
                if self.best_obj is None or obj < self.best_obj:
                    self.best_obj = obj
-                    config = self._configs[trial_id]
+                    config = self._configs[trial_id][0]
                    if self.best_config != config:
                        self.best_config = config
                        if self._resource:
@ -491,9 +490,10 @@ class FLOW2(Searcher):
                        self.cost_incumbent = result.get(self.cost_attr)
                        self._cost_complete4incumbent = 0
                        self._num_complete4incumbent = 0
+                        self._num_proposedby_incumbent = 0
                        self._num_allowed4incumbent = 2 * self.dim
                        self._proposed_by.clear()
-                        self._iter_best_config = self.trial_count
+                        self._iter_best_config = self.trial_count_complete
            cost = result.get(self.cost_attr)
            # record the cost in case it is pruned and cost info is lost
            self._trial_cost[trial_id] = cost
@ -509,18 +509,21 @@ class FLOW2(Searcher):
        2. same resource, move from the incumbent to a random direction
        3. same resource, move from the incumbent to the opposite direction
        '''
+        self.trial_count_proposed += 1
        if self._num_complete4incumbent > 0 and self.cost_incumbent and \
            self._resource and self._resource < self.max_resource and (
                self._cost_complete4incumbent
                >= self.cost_incumbent * self.resource_multiple_factor):
            # consider increasing resource using sum eval cost of complete
            # configs
+            old_resource = self._resource
            self._resource = self._round(
                self._resource * self.resource_multiple_factor)
+            self.cost_incumbent *= self._resource / old_resource
            config = self.best_config.copy()
            config[self.prune_attr] = self._resource
            self._direction_tried = None
-            self._configs[trial_id] = config
+            self._configs[trial_id] = (config, self.step)
            return config
        self._num_allowed4incumbent -= 1
        move = self.incumbent.copy()
@ -538,7 +541,42 @@ class FLOW2(Searcher):
        self._project(move)
        config = self.denormalize(move)
        self._proposed_by[trial_id] = self.incumbent
-        self._configs[trial_id] = config
+        self._configs[trial_id] = (config, self.step)
+        self._num_proposedby_incumbent += 1
+        if self._init_phrase:
+            if self._direction_tried is None:            
+                if self._same:
+                    # check if the new config is different from self.best_config
+                    same = True
+                    for key, value in config.items():
+                        if key not in self.best_config or value != self.best_config[key]:
+                            same = False
+                            break
+                    if same:
+                        # increase step size
+                        self.step += self.STEPSIZE
+                        if self.step > self.step_ub:
+                            self.step = self.step_ub
+            else:
+                # check if the new config is different from self.best_config
+                same = True
+                for key, value in config.items():
+                    if key not in self.best_config or value != self.best_config[key]:
+                        same = False
+                        break
+                self._same = same
+        if self._num_proposedby_incumbent == self.dir and (
+            not self._resource or self._resource == self.max_resource):
+                # check stuck condition if using max resource
+                self._num_proposedby_incumbent -= 2
+                self._init_phrase = False
+                if self.step >= self.step_lower_bound:
+                    # decrease step size
+                    self._oldK = self._K if self._K else self._iter_best_config
+                    self._K = self.trial_count_proposed + 1
+                    self.step *= np.sqrt(self._oldK / self._K)
+                else:
+                    return None
        return unflatten_dict(config)

    def _project(self, config):
--- a/flaml/version.py
+++ b/flaml/version.py
@ -1 +1 @@
-__version__ = "0.3.5"
+__version__ = "0.3.6"
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -255,8 +255,10 @@ class TestAutoML(unittest.TestCase):
        }

        X_train, y_train = load_iris(return_X_y=True)
-        automl_experiment.fit(X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
-        automl_experiment_macro.fit(X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, metric='micro_f1', **automl_settings)
+        automl_experiment_macro.fit(
+            X_train=X_train, y_train=y_train, metric='macro_f1', **automl_settings)

    def test_regression(self):