mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 01:40:58 +00:00 
			
		
		
		
	constraints (#88)
* pre-training constraints * metric constraints after training
This commit is contained in:
		
							parent
							
								
									3083229e40
								
							
						
					
					
						commit
						0925e2b308
					
				| @ -1036,9 +1036,8 @@ class AutoML: | ||||
|                         prune_attr=prune_attr, | ||||
|                         min_resource=min_resource, | ||||
|                         max_resource=max_resource, | ||||
|                         resources_per_trial={"cpu": self._state.n_jobs, | ||||
|                                              "mem": self._mem_thres}, | ||||
|                         mem_size=learner_class.size) | ||||
|                         config_constraints=[(learner_class.size, '<=', self._mem_thres)] | ||||
|                     ) | ||||
|                 else: | ||||
|                     algo = SearchAlgo( | ||||
|                         metric='val_loss', mode='min', space=search_space, | ||||
|  | ||||
| @ -237,8 +237,8 @@ class DataTransformer: | ||||
|                     SimpleImputer(missing_values=np.nan, strategy='median'), | ||||
|                     X_num.columns)]) | ||||
|                 X[num_columns] = self.transformer.fit_transform(X_num) | ||||
|             self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \ | ||||
|                                                                            num_columns, datetime_columns | ||||
|             self._cat_columns, self._num_columns, self._datetime_columns = \ | ||||
|                 cat_columns, num_columns, datetime_columns | ||||
|             self._drop = drop | ||||
| 
 | ||||
|         if task == 'regression': | ||||
| @ -275,4 +275,3 @@ class DataTransformer: | ||||
|                     X_num.columns = range(X_num.shape[1]) | ||||
|                 X[num_columns] = self.transformer.transform(X_num) | ||||
|         return X | ||||
| 
 | ||||
|  | ||||
| @ -39,9 +39,11 @@ class BlendSearch(Searcher): | ||||
|                  min_resource: Optional[float] = None, | ||||
|                  max_resource: Optional[float] = None, | ||||
|                  reduction_factor: Optional[float] = None, | ||||
|                  resources_per_trial: Optional[dict] = None, | ||||
|                  global_search_alg: Optional[Searcher] = None, | ||||
|                  mem_size: Callable[[dict], float] = None, | ||||
|                  config_constraints: Optional[ | ||||
|                      List[Tuple[Callable[[dict], float], str, float]]] = None, | ||||
|                  metric_constraints: Optional[ | ||||
|                      List[Tuple[str, str, float]]] = None, | ||||
|                  seed: Optional[int] = 20): | ||||
|         '''Constructor | ||||
| 
 | ||||
| @ -82,14 +84,23 @@ class BlendSearch(Searcher): | ||||
|                 prune_attr; only valid if prune_attr is not in space. | ||||
|             reduction_factor: A float of the reduction factor used for | ||||
|                 incremental pruning. | ||||
|             resources_per_trial: A dictionary of the resources permitted per | ||||
|                 trial, such as 'mem'. | ||||
|             global_search_alg: A Searcher instance as the global search | ||||
|                 instance. If omitted, Optuna is used. The following algos have | ||||
|                 known issues when used as global_search_alg: | ||||
|                 - HyperOptSearch raises exception sometimes | ||||
|                 - TuneBOHB has its own scheduler | ||||
|             mem_size: A function to estimate the memory size for a given config. | ||||
|             config_constraints: A list of config constraints to be satisfied. | ||||
|                 e.g., | ||||
| 
 | ||||
|                 .. code-block: python | ||||
| 
 | ||||
|                     config_constraints = [(mem_size, '<=', 1024**3)] | ||||
| 
 | ||||
|                 mem_size is a function which produces a float number for the bytes | ||||
|                 needed for a config. | ||||
|                 It is used to skip configs which do not fit in memory. | ||||
|             metric_constraints: A list of metric constraints to be satisfied. | ||||
|                 e.g., `['precision', '>=', 0.9]` | ||||
|             seed: An integer of the random seed. | ||||
|         ''' | ||||
|         self._metric, self._mode = metric, mode | ||||
| @ -104,10 +115,8 @@ class BlendSearch(Searcher): | ||||
|         self._ls = LocalSearch( | ||||
|             init_config, metric, mode, cat_hp_cost, space, | ||||
|             prune_attr, min_resource, max_resource, reduction_factor, seed) | ||||
|         self._resources_per_trial = resources_per_trial | ||||
|         self._mem_size = mem_size | ||||
|         self._mem_threshold = resources_per_trial.get( | ||||
|             'mem') if resources_per_trial else None | ||||
|         self._config_constraints = config_constraints | ||||
|         self._metric_constraints = metric_constraints | ||||
|         self._init_search() | ||||
| 
 | ||||
|     def set_search_properties(self, | ||||
| @ -171,9 +180,8 @@ class BlendSearch(Searcher): | ||||
|         self._points_to_evaluate = state._points_to_evaluate | ||||
|         self._gs = state._gs | ||||
|         self._ls = state._ls | ||||
|         self._resources_per_trial = state._resources_per_trial | ||||
|         self._mem_size = state._mem_size | ||||
|         self._mem_threshold = state._mem_threshold | ||||
|         self._config_constraints = state._config_constraints | ||||
|         self._metric_constraints = state._metric_constraints | ||||
| 
 | ||||
|     def restore_from_dir(self, checkpoint_dir: str): | ||||
|         super.restore_from_dir(checkpoint_dir) | ||||
| @ -182,6 +190,20 @@ class BlendSearch(Searcher): | ||||
|                           error: bool = False): | ||||
|         ''' search thread updater and cleaner | ||||
|         ''' | ||||
|         if result and not error and self._metric_constraints: | ||||
|             # accout for metric constraints if any | ||||
|             objective = result[self._metric] | ||||
|             for constraint in self._metric_constraints: | ||||
|                 metric_constraint, sign, threshold = constraint | ||||
|                 value = result.get(metric_constraint) | ||||
|                 if value: | ||||
|                     # sign is <= or >= | ||||
|                     sign_op = 1 if sign == '<=' else -1 | ||||
|                     violation = (value - threshold) * sign_op | ||||
|                     if violation > 0: | ||||
|                         # add penalty term to the metric | ||||
|                         objective += 1e+10 * violation * self._ls.metric_op | ||||
|             result[self._metric] = objective | ||||
|         thread_id = self._trial_proposed_by.get(trial_id) | ||||
|         if thread_id in self._search_thread_pool: | ||||
|             self._search_thread_pool[thread_id].on_trial_complete( | ||||
| @ -197,14 +219,15 @@ class BlendSearch(Searcher): | ||||
|             else:  # add to result cache | ||||
|                 self._result[self._ls.config_signature(config)] = result | ||||
|                 # update target metric if improved | ||||
|             if (result[self._metric] - self._metric_target) * self._ls.metric_op < 0: | ||||
|                 self._metric_target = result[self._metric] | ||||
|                 objective = result[self._metric] | ||||
|                 if (objective - self._metric_target) * self._ls.metric_op < 0: | ||||
|                     self._metric_target = objective | ||||
|                 if not thread_id and self._create_condition(result): | ||||
|                     # thread creator | ||||
|                     self._search_thread_pool[self._thread_count] = SearchThread( | ||||
|                         self._ls.mode, | ||||
|                     self._ls.create(config, result[self._metric], cost=result[ | ||||
|                         self.cost_attr]) | ||||
|                         self._ls.create( | ||||
|                             config, objective, cost=result[self.cost_attr]) | ||||
|                     ) | ||||
|                     thread_id = self._thread_count | ||||
|                     self._thread_count += 1 | ||||
| @ -362,20 +385,26 @@ class BlendSearch(Searcher): | ||||
|         return config | ||||
| 
 | ||||
|     def _should_skip(self, choice, trial_id, config) -> bool: | ||||
|         ''' if config is None or config's result is known or above mem threshold | ||||
|         ''' if config is None or config's result is known or constraints are violated | ||||
|             return True; o.w. return False | ||||
|         ''' | ||||
|         if config is None: | ||||
|             return True | ||||
|         config_signature = self._ls.config_signature(config) | ||||
|         exists = config_signature in self._result | ||||
|         # check mem constraint | ||||
|         if not exists and self._mem_threshold and self._mem_size( | ||||
|                 config) > self._mem_threshold: | ||||
|         # check constraints | ||||
|         if not exists and self._config_constraints: | ||||
|             for constraint in self._config_constraints: | ||||
|                 func, sign, threshold = constraint | ||||
|                 value = func(config) | ||||
|                 if (sign == '<=' and value > threshold | ||||
|                         or sign == '>=' and value < threshold): | ||||
|                     self._result[config_signature] = { | ||||
|                 self._metric: np.inf * self._ls.metric_op, 'time_total_s': 1 | ||||
|                         self._metric: np.inf * self._ls.metric_op, | ||||
|                         'time_total_s': 1, | ||||
|                     } | ||||
|                     exists = True | ||||
|                     break | ||||
|         if exists: | ||||
|             if not self._use_rs: | ||||
|                 result = self._result.get(config_signature) | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
|  * Licensed under the MIT License. See LICENSE file in the | ||||
|  * project root for license information. | ||||
| ''' | ||||
| from typing import Optional, Union, List, Callable | ||||
| from typing import Optional, Union, List, Callable, Tuple | ||||
| import datetime | ||||
| import time | ||||
| try: | ||||
| @ -118,7 +118,10 @@ def run(training_function, | ||||
|         local_dir: Optional[str] = None, | ||||
|         num_samples: Optional[int] = 1, | ||||
|         resources_per_trial: Optional[dict] = None, | ||||
|         mem_size: Callable[[dict], float] = None, | ||||
|         config_constraints: Optional[ | ||||
|             List[Tuple[Callable[[dict], float], str, float]]] = None, | ||||
|         metric_constraints: Optional[ | ||||
|             List[Tuple[str, str, float]]] = None, | ||||
|         use_ray: Optional[bool] = False): | ||||
|     '''The trigger for HPO. | ||||
| 
 | ||||
| @ -210,11 +213,19 @@ def run(training_function, | ||||
|             used; or a local dir to save the tuning log. | ||||
|         num_samples: An integer of the number of configs to try. Defaults to 1. | ||||
|         resources_per_trial: A dictionary of the hardware resources to allocate | ||||
|             per trial, e.g., `{'mem': 1024**3}`. When not using ray backend, | ||||
|             only 'mem' is used as approximate resource constraints | ||||
|             (in conjunction with mem_size). | ||||
|         mem_size: A function to estimate the memory size for a given config. | ||||
|             per trial, e.g., `{'cpu': 1}`. Only valid when using ray backend. | ||||
|         config_constraints: A list of config constraints to be satisfied. | ||||
|             e.g., | ||||
| 
 | ||||
|             .. code-block: python | ||||
| 
 | ||||
|                 config_constraints = [(mem_size, '<=', 1024**3)] | ||||
| 
 | ||||
|             mem_size is a function which produces a float number for the bytes | ||||
|             needed for a config. | ||||
|             It is used to skip configs which do not fit in memory. | ||||
|         metric_constraints: A list of metric constraints to be satisfied. | ||||
|             e.g., `['precision', '>=', 0.9]` | ||||
|         use_ray: A boolean of whether to use ray as the backend | ||||
|     ''' | ||||
|     global _use_ray | ||||
| @ -252,8 +263,8 @@ def run(training_function, | ||||
|             prune_attr=prune_attr, | ||||
|             min_resource=min_resource, max_resource=max_resource, | ||||
|             reduction_factor=reduction_factor, | ||||
|             resources_per_trial=resources_per_trial, | ||||
|             mem_size=mem_size) | ||||
|             config_constraints=config_constraints, | ||||
|             metric_constraints=metric_constraints) | ||||
|     if time_budget_s: | ||||
|         search_alg.set_search_properties(metric, mode, config={ | ||||
|             'time_budget_s': time_budget_s}) | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| __version__ = "0.3.6" | ||||
| __version__ = "0.4.0" | ||||
|  | ||||
| @ -237,9 +237,11 @@ class TestAutoML(unittest.TestCase): | ||||
| 
 | ||||
|         fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]}) | ||||
|         y = np.array([0, 1]) | ||||
|         automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings) | ||||
|         automl_experiment.fit( | ||||
|             X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings) | ||||
| 
 | ||||
|         y_pred = automl_experiment.predict(fake_df) | ||||
|         print(y_pred) | ||||
| 
 | ||||
|     def test_micro_macro_f1(self): | ||||
|         automl_experiment = AutoML() | ||||
|  | ||||
							
								
								
									
										0
									
								
								test/tune/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								test/tune/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -1,19 +1,21 @@ | ||||
| '''Require: pip install flaml[test,ray] | ||||
| ''' | ||||
| import unittest | ||||
| import time | ||||
| import os | ||||
| from sklearn.model_selection import train_test_split | ||||
| import sklearn.metrics | ||||
| import sklearn.datasets | ||||
| try: | ||||
|     from ray.tune.integration.xgboost import TuneReportCheckpointCallback | ||||
| except ImportError: | ||||
|     print("skip test_tune because ray tune cannot be imported.") | ||||
|     print("skip test_xgboost because ray tune cannot be imported.") | ||||
| import xgboost as xgb | ||||
| 
 | ||||
| import logging | ||||
| logger = logging.getLogger(__name__) | ||||
| logger.addHandler(logging.FileHandler('test/tune_xgboost.log')) | ||||
| os.makedirs('logs', exist_ok=True) | ||||
| logger.addHandler(logging.FileHandler('logs/tune_xgboost.log')) | ||||
| logger.setLevel(logging.INFO) | ||||
| 
 | ||||
| 
 | ||||
| def train_breast_cancer(config: dict): | ||||
| @ -61,6 +63,7 @@ def _test_xgboost(method='BlendSearch'): | ||||
|         for n_cpu in [8]: | ||||
|             start_time = time.time() | ||||
|             ray.init(num_cpus=n_cpu, num_gpus=0) | ||||
|             # ray.init(address='auto') | ||||
|             if method == 'BlendSearch': | ||||
|                 analysis = tune.run( | ||||
|                     train_breast_cancer, | ||||
| @ -163,21 +166,28 @@ def test_nested(): | ||||
|     } | ||||
| 
 | ||||
|     def simple_func(config): | ||||
|         tune.report(metric=(config["cost_related"]["a"] - 4)**2 | ||||
|                     * (config["b"] - 0.7)**2) | ||||
|         obj = (config["cost_related"]["a"] - 4)**2 \ | ||||
|             + (config["b"] - config["cost_related"]["a"])**2 | ||||
|         tune.report(obj=obj) | ||||
|         tune.report(obj=obj, ab=config["cost_related"]["a"] * config["b"]) | ||||
| 
 | ||||
|     tune.run( | ||||
|     analysis = tune.run( | ||||
|         simple_func, | ||||
|         config=search_space, | ||||
|         low_cost_partial_config={ | ||||
|             "cost_related": {"a": 1} | ||||
|         }, | ||||
|         metric="metric", | ||||
|         metric="obj", | ||||
|         mode="min", | ||||
|         metric_constraints=[("ab", "<=", 4)], | ||||
|         local_dir='logs/', | ||||
|         num_samples=-1, | ||||
|         time_budget_s=1) | ||||
| 
 | ||||
|     best_trial = analysis.get_best_trial() | ||||
|     logger.info(f"Best config: {best_trial.config}") | ||||
|     logger.info(f"Best result: {best_trial.last_result}") | ||||
| 
 | ||||
| 
 | ||||
| def test_xgboost_bs(): | ||||
|     _test_xgboost() | ||||
| @ -224,4 +234,4 @@ def _test_xgboost_bohb(): | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
|     test_xgboost_bs() | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Chi Wang
						Chi Wang