mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 09:50:11 +00:00 
			
		
		
		
	make performance test reproducible (#837)
* make performance test reproducible * fix test error * Doc update and disable logging * document random_state and version * remove hardcoded budget * fix test error and dependency; close #777 * iloc
This commit is contained in:
		
							parent
							
								
									3b3b0bfa8e
								
							
						
					
					
						commit
						92b79221b6
					
				| @ -119,8 +119,9 @@ class SearchState: | |||||||
|         period=None, |         period=None, | ||||||
|         custom_hp=None, |         custom_hp=None, | ||||||
|         max_iter=None, |         max_iter=None, | ||||||
|  |         budget=None, | ||||||
|     ): |     ): | ||||||
|         self.init_eci = learner_class.cost_relative2lgbm() |         self.init_eci = learner_class.cost_relative2lgbm() if budget >= 0 else 1 | ||||||
|         self._search_space_domain = {} |         self._search_space_domain = {} | ||||||
|         self.init_config = None |         self.init_config = None | ||||||
|         self.low_cost_partial_config = {} |         self.low_cost_partial_config = {} | ||||||
| @ -128,6 +129,7 @@ class SearchState: | |||||||
|         self.data_size = data_size |         self.data_size = data_size | ||||||
|         self.ls_ever_converged = False |         self.ls_ever_converged = False | ||||||
|         self.learner_class = learner_class |         self.learner_class = learner_class | ||||||
|  |         self._budget = budget | ||||||
|         if task in TS_FORECAST: |         if task in TS_FORECAST: | ||||||
|             search_space = learner_class.search_space( |             search_space = learner_class.search_space( | ||||||
|                 data_size=data_size, task=task, pred_horizon=period |                 data_size=data_size, task=task, pred_horizon=period | ||||||
| @ -240,7 +242,7 @@ class SearchState: | |||||||
|             obj, time2eval, trained_estimator = np.inf, 0.0, None |             obj, time2eval, trained_estimator = np.inf, 0.0, None | ||||||
|             metric_for_logging = config = None |             metric_for_logging = config = None | ||||||
|         self.trial_time = time2eval |         self.trial_time = time2eval | ||||||
|         self.total_time_used += time_used |         self.total_time_used += time_used if self._budget >= 0 else 1 | ||||||
|         self.total_iter += 1 |         self.total_iter += 1 | ||||||
| 
 | 
 | ||||||
|         if self.base_eci is None: |         if self.base_eci is None: | ||||||
| @ -291,14 +293,25 @@ class AutoMLState: | |||||||
|                 sampled_X_train = self.X_train.iloc[:sample_size] |                 sampled_X_train = self.X_train.iloc[:sample_size] | ||||||
|             else: |             else: | ||||||
|                 sampled_X_train = self.X_train[:sample_size] |                 sampled_X_train = self.X_train[:sample_size] | ||||||
|  |             if isinstance(self.y_train, pd.Series): | ||||||
|  |                 sampled_y_train = self.y_train.iloc[:sample_size] | ||||||
|  |             else: | ||||||
|                 sampled_y_train = self.y_train[:sample_size] |                 sampled_y_train = self.y_train[:sample_size] | ||||||
|             weight = self.fit_kwargs.get( |             weight = self.fit_kwargs.get( | ||||||
|                 "sample_weight" |                 "sample_weight" | ||||||
|             )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator |             )  # NOTE: _prepare_sample_train_data is before kwargs is updated to fit_kwargs_by_estimator | ||||||
|             if weight is not None: |             if weight is not None: | ||||||
|                 sampled_weight = weight[:sample_size] |                 sampled_weight = ( | ||||||
|  |                     weight.iloc[:sample_size] | ||||||
|  |                     if isinstance(weight, pd.Series) | ||||||
|  |                     else weight[:sample_size] | ||||||
|  |                 ) | ||||||
|             if self.groups is not None: |             if self.groups is not None: | ||||||
|                 groups = self.groups[:sample_size] |                 groups = ( | ||||||
|  |                     self.groups.iloc[:sample_size] | ||||||
|  |                     if isinstance(self.groups, pd.Series) | ||||||
|  |                     else self.groups[:sample_size] | ||||||
|  |                 ) | ||||||
|         else: |         else: | ||||||
|             sampled_X_train = self.X_train_all |             sampled_X_train = self.X_train_all | ||||||
|             sampled_y_train = self.y_train_all |             sampled_y_train = self.y_train_all | ||||||
| @ -336,7 +349,7 @@ class AutoMLState: | |||||||
|             del config["FLAML_sample_size"] |             del config["FLAML_sample_size"] | ||||||
|         budget = ( |         budget = ( | ||||||
|             None |             None | ||||||
|             if state.time_budget is None |             if state.time_budget < 0 | ||||||
|             else state.time_budget - state.time_from_start |             else state.time_budget - state.time_from_start | ||||||
|             if sample_size == state.data_size[0] |             if sample_size == state.data_size[0] | ||||||
|             else (state.time_budget - state.time_from_start) |             else (state.time_budget - state.time_from_start) | ||||||
| @ -360,7 +373,7 @@ class AutoMLState: | |||||||
|             state.groups_val, |             state.groups_val, | ||||||
|             state.train_time_limit |             state.train_time_limit | ||||||
|             if budget is None |             if budget is None | ||||||
|             else min(budget, state.train_time_limit), |             else min(budget, state.train_time_limit or np.inf), | ||||||
|             state.kf, |             state.kf, | ||||||
|             config, |             config, | ||||||
|             state.task, |             state.task, | ||||||
| @ -373,6 +386,7 @@ class AutoMLState: | |||||||
|             state.cv_score_agg_func, |             state.cv_score_agg_func, | ||||||
|             state.log_training_metric, |             state.log_training_metric, | ||||||
|             this_estimator_kwargs, |             this_estimator_kwargs, | ||||||
|  |             state.free_mem_ratio, | ||||||
|         ) |         ) | ||||||
|         if state.retrain_final and not state.model_history: |         if state.retrain_final and not state.model_history: | ||||||
|             trained_estimator.cleanup() |             trained_estimator.cleanup() | ||||||
| @ -432,9 +446,7 @@ class AutoMLState: | |||||||
|             ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator |             ] = groups  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator | ||||||
| 
 | 
 | ||||||
|         budget = ( |         budget = ( | ||||||
|             None |             None if self.time_budget < 0 else self.time_budget - self.time_from_start | ||||||
|             if self.time_budget is None |  | ||||||
|             else self.time_budget - self.time_from_start |  | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         estimator, train_time = train_estimator( |         estimator, train_time = train_estimator( | ||||||
| @ -448,6 +460,7 @@ class AutoMLState: | |||||||
|             budget=budget, |             budget=budget, | ||||||
|             fit_kwargs=this_estimator_kwargs,  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator |             fit_kwargs=this_estimator_kwargs,  # NOTE: _train_with_config is after kwargs is updated to fit_kwargs_by_estimator | ||||||
|             eval_metric=self.metric if hasattr(self, "metric") else "train_time", |             eval_metric=self.metric if hasattr(self, "metric") else "train_time", | ||||||
|  |             free_mem_ratio=self.free_mem_ratio, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         if sampled_weight is not None: |         if sampled_weight is not None: | ||||||
| @ -648,6 +661,7 @@ class AutoML(BaseEstimator): | |||||||
|                 datasets, but will incur more overhead in time. |                 datasets, but will incur more overhead in time. | ||||||
|                 If dict: the dict contains the keywords arguments to be passed to |                 If dict: the dict contains the keywords arguments to be passed to | ||||||
|                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). |                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). | ||||||
|  |             free_mem_ratio: float between 0 and 1, default=0. The free memory ratio to keep during training. | ||||||
|             metric_constraints: list, default=[] | The list of metric constraints. |             metric_constraints: list, default=[] | The list of metric constraints. | ||||||
|                 Each element in this list is a 3-tuple, which shall be expressed |                 Each element in this list is a 3-tuple, which shall be expressed | ||||||
|                 in the following format: the first element of the 3-tuple is the name of the |                 in the following format: the first element of the 3-tuple is the name of the | ||||||
| @ -724,7 +738,7 @@ class AutoML(BaseEstimator): | |||||||
|         settings["log_training_metric"] = settings.get("log_training_metric", False) |         settings["log_training_metric"] = settings.get("log_training_metric", False) | ||||||
|         settings["mem_thres"] = settings.get("mem_thres", MEM_THRES) |         settings["mem_thres"] = settings.get("mem_thres", MEM_THRES) | ||||||
|         settings["pred_time_limit"] = settings.get("pred_time_limit", np.inf) |         settings["pred_time_limit"] = settings.get("pred_time_limit", np.inf) | ||||||
|         settings["train_time_limit"] = settings.get("train_time_limit", np.inf) |         settings["train_time_limit"] = settings.get("train_time_limit", None) | ||||||
|         settings["verbose"] = settings.get("verbose", 3) |         settings["verbose"] = settings.get("verbose", 3) | ||||||
|         settings["retrain_full"] = settings.get("retrain_full", True) |         settings["retrain_full"] = settings.get("retrain_full", True) | ||||||
|         settings["split_type"] = settings.get("split_type", "auto") |         settings["split_type"] = settings.get("split_type", "auto") | ||||||
| @ -738,6 +752,7 @@ class AutoML(BaseEstimator): | |||||||
|         settings["append_log"] = settings.get("append_log", False) |         settings["append_log"] = settings.get("append_log", False) | ||||||
|         settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN) |         settings["min_sample_size"] = settings.get("min_sample_size", MIN_SAMPLE_TRAIN) | ||||||
|         settings["use_ray"] = settings.get("use_ray", False) |         settings["use_ray"] = settings.get("use_ray", False) | ||||||
|  |         settings["free_mem_ratio"] = settings.get("free_mem_ratio", 0) | ||||||
|         settings["metric_constraints"] = settings.get("metric_constraints", []) |         settings["metric_constraints"] = settings.get("metric_constraints", []) | ||||||
|         settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None) |         settings["cv_score_agg_func"] = settings.get("cv_score_agg_func", None) | ||||||
|         settings["fit_kwargs_by_estimator"] = settings.get( |         settings["fit_kwargs_by_estimator"] = settings.get( | ||||||
| @ -1271,6 +1286,8 @@ class AutoML(BaseEstimator): | |||||||
|                 ] = ( |                 ] = ( | ||||||
|                     self._state.sample_weight_all |                     self._state.sample_weight_all | ||||||
|                 )  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator |                 )  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator | ||||||
|  |                 if isinstance(self._state.sample_weight_all, pd.Series): | ||||||
|  |                     self._state.sample_weight_all.reset_index(drop=True, inplace=True) | ||||||
|             else: |             else: | ||||||
|                 X_train_all, y_train_all = shuffle( |                 X_train_all, y_train_all = shuffle( | ||||||
|                     X_train_all, y_train_all, random_state=RANDOM_SEED |                     X_train_all, y_train_all, random_state=RANDOM_SEED | ||||||
| @ -1394,6 +1411,7 @@ class AutoML(BaseEstimator): | |||||||
|                             rest |                             rest | ||||||
|                         ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator |                         ],  # NOTE: _prepare_data is before kwargs is updated to fit_kwargs_by_estimator | ||||||
|                         test_size=split_ratio, |                         test_size=split_ratio, | ||||||
|  |                         stratify=stratify, | ||||||
|                         random_state=RANDOM_SEED, |                         random_state=RANDOM_SEED, | ||||||
|                     ) |                     ) | ||||||
|                     weight1 = self._state.fit_kwargs["sample_weight"][ |                     weight1 = self._state.fit_kwargs["sample_weight"][ | ||||||
| @ -1796,7 +1814,8 @@ class AutoML(BaseEstimator): | |||||||
|         self.modelcount = 0 |         self.modelcount = 0 | ||||||
|         self._auto_augment = auto_augment |         self._auto_augment = auto_augment | ||||||
|         self._prepare_data(eval_method, split_ratio, n_splits) |         self._prepare_data(eval_method, split_ratio, n_splits) | ||||||
|         self._state.time_budget = None |         self._state.time_budget = -1 | ||||||
|  |         self._state.free_mem_ratio = 0 | ||||||
|         self._state.n_jobs = n_jobs |         self._state.n_jobs = n_jobs | ||||||
|         import os |         import os | ||||||
| 
 | 
 | ||||||
| @ -1885,7 +1904,7 @@ class AutoML(BaseEstimator): | |||||||
|             return eval_method |             return eval_method | ||||||
|         nrow, dim = self._nrow, self._ndim |         nrow, dim = self._nrow, self._ndim | ||||||
|         if ( |         if ( | ||||||
|             time_budget is None |             time_budget < 0 | ||||||
|             or nrow * dim / 0.9 < SMALL_LARGE_THRES * (time_budget / 3600) |             or nrow * dim / 0.9 < SMALL_LARGE_THRES * (time_budget / 3600) | ||||||
|             and nrow < CV_HOLDOUT_THRESHOLD |             and nrow < CV_HOLDOUT_THRESHOLD | ||||||
|         ): |         ): | ||||||
| @ -2145,6 +2164,7 @@ class AutoML(BaseEstimator): | |||||||
|         auto_augment=None, |         auto_augment=None, | ||||||
|         min_sample_size=None, |         min_sample_size=None, | ||||||
|         use_ray=None, |         use_ray=None, | ||||||
|  |         free_mem_ratio=0, | ||||||
|         metric_constraints=None, |         metric_constraints=None, | ||||||
|         custom_hp=None, |         custom_hp=None, | ||||||
|         cv_score_agg_func=None, |         cv_score_agg_func=None, | ||||||
| @ -2250,7 +2270,7 @@ class AutoML(BaseEstimator): | |||||||
|             mem_thres: A float of the memory size constraint in bytes. |             mem_thres: A float of the memory size constraint in bytes. | ||||||
|             pred_time_limit: A float of the prediction latency constraint in seconds. |             pred_time_limit: A float of the prediction latency constraint in seconds. | ||||||
|                 It refers to the average prediction time per row in validation data. |                 It refers to the average prediction time per row in validation data. | ||||||
|             train_time_limit: A float of the training time constraint in seconds. |             train_time_limit: None or a float of the training time constraint in seconds. | ||||||
|             X_val: None or a numpy array or a pandas dataframe of validation data. |             X_val: None or a numpy array or a pandas dataframe of validation data. | ||||||
|             y_val: None or a numpy array or a pandas series of validation labels. |             y_val: None or a numpy array or a pandas series of validation labels. | ||||||
|             sample_weight_val: None or a numpy array of the sample weight of |             sample_weight_val: None or a numpy array of the sample weight of | ||||||
| @ -2337,6 +2357,7 @@ class AutoML(BaseEstimator): | |||||||
|                 datasets, but will incur more overhead in time. |                 datasets, but will incur more overhead in time. | ||||||
|                 If dict: the dict contains the keywords arguments to be passed to |                 If dict: the dict contains the keywords arguments to be passed to | ||||||
|                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). |                 [ray.tune.run](https://docs.ray.io/en/latest/tune/api_docs/execution.html). | ||||||
|  |             free_mem_ratio: float between 0 and 1, default=0. The free memory ratio to keep during training. | ||||||
|             metric_constraints: list, default=[] | The list of metric constraints. |             metric_constraints: list, default=[] | The list of metric constraints. | ||||||
|                 Each element in this list is a 3-tuple, which shall be expressed |                 Each element in this list is a 3-tuple, which shall be expressed | ||||||
|                 in the following format: the first element of the 3-tuple is the name of the |                 in the following format: the first element of the 3-tuple is the name of the | ||||||
| @ -2523,7 +2544,7 @@ class AutoML(BaseEstimator): | |||||||
|             self._settings.get("early_stop") if early_stop is None else early_stop |             self._settings.get("early_stop") if early_stop is None else early_stop | ||||||
|         ) |         ) | ||||||
|         # no search budget is provided? |         # no search budget is provided? | ||||||
|         no_budget = time_budget == -1 and max_iter is None and not early_stop |         no_budget = time_budget < 0 and max_iter is None and not early_stop | ||||||
|         append_log = ( |         append_log = ( | ||||||
|             self._settings.get("append_log") if append_log is None else append_log |             self._settings.get("append_log") if append_log is None else append_log | ||||||
|         ) |         ) | ||||||
| @ -2562,7 +2583,11 @@ class AutoML(BaseEstimator): | |||||||
|                 X_train = ray.get(X_train) |                 X_train = ray.get(X_train) | ||||||
|             elif isinstance(dataframe, ray.ObjectRef): |             elif isinstance(dataframe, ray.ObjectRef): | ||||||
|                 dataframe = ray.get(dataframe) |                 dataframe = ray.get(dataframe) | ||||||
| 
 |         self._state.free_mem_ratio = ( | ||||||
|  |             self._settings.get("free_mem_ratio") | ||||||
|  |             if free_mem_ratio is None | ||||||
|  |             else free_mem_ratio | ||||||
|  |         ) | ||||||
|         self._state.task = task |         self._state.task = task | ||||||
|         self._state.log_training_metric = log_training_metric |         self._state.log_training_metric = log_training_metric | ||||||
| 
 | 
 | ||||||
| @ -2835,8 +2860,8 @@ class AutoML(BaseEstimator): | |||||||
|             except FileNotFoundError: |             except FileNotFoundError: | ||||||
|                 pass |                 pass | ||||||
| 
 | 
 | ||||||
|  |         self._state.time_budget = time_budget | ||||||
|         starting_points = {} if starting_points == "static" else starting_points |         starting_points = {} if starting_points == "static" else starting_points | ||||||
| 
 |  | ||||||
|         for estimator_name in estimator_list: |         for estimator_name in estimator_list: | ||||||
|             estimator_class = self._state.learner_classes[estimator_name] |             estimator_class = self._state.learner_classes[estimator_name] | ||||||
|             estimator_class.init() |             estimator_class.init() | ||||||
| @ -2869,10 +2894,10 @@ class AutoML(BaseEstimator): | |||||||
|                 max_iter=max_iter / len(estimator_list) |                 max_iter=max_iter / len(estimator_list) | ||||||
|                 if self._learner_selector == "roundrobin" |                 if self._learner_selector == "roundrobin" | ||||||
|                 else max_iter, |                 else max_iter, | ||||||
|  |                 budget=self._state.time_budget, | ||||||
|             ) |             ) | ||||||
|         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list)) |         logger.info("List of ML learners in AutoML Run: {}".format(estimator_list)) | ||||||
|         self.estimator_list = estimator_list |         self.estimator_list = estimator_list | ||||||
|         self._state.time_budget = time_budget if time_budget > 0 else 1e10 |  | ||||||
|         self._active_estimators = estimator_list.copy() |         self._active_estimators = estimator_list.copy() | ||||||
|         self._ensemble = ensemble |         self._ensemble = ensemble | ||||||
|         self._max_iter = max_iter |         self._max_iter = max_iter | ||||||
| @ -2907,6 +2932,7 @@ class AutoML(BaseEstimator): | |||||||
|             ) |             ) | ||||||
|             if ( |             if ( | ||||||
|                 self._hpo_method in ("cfo", "bs") |                 self._hpo_method in ("cfo", "bs") | ||||||
|  |                 and self._state.time_budget > 0 | ||||||
|                 and (self._time_taken_best_iter >= self._state.time_budget * 0.7) |                 and (self._time_taken_best_iter >= self._state.time_budget * 0.7) | ||||||
|                 and not all( |                 and not all( | ||||||
|                     state.search_alg and state.search_alg.searcher.is_ls_ever_converged |                     state.search_alg and state.search_alg.searcher.is_ls_ever_converged | ||||||
| @ -2973,7 +2999,11 @@ class AutoML(BaseEstimator): | |||||||
|             ) |             ) | ||||||
|         space = self.search_space |         space = self.search_space | ||||||
|         self._state.time_from_start = time.time() - self._start_time_flag |         self._state.time_from_start = time.time() - self._start_time_flag | ||||||
|         time_left = self._state.time_budget - self._state.time_from_start |         time_budget_s = ( | ||||||
|  |             self._state.time_budget - self._state.time_from_start | ||||||
|  |             if self._state.time_budget >= 0 | ||||||
|  |             else None | ||||||
|  |         ) | ||||||
|         if self._hpo_method != "optuna": |         if self._hpo_method != "optuna": | ||||||
|             min_resource = self.min_resource |             min_resource = self.min_resource | ||||||
|             if isinstance(min_resource, dict): |             if isinstance(min_resource, dict): | ||||||
| @ -2999,7 +3029,8 @@ class AutoML(BaseEstimator): | |||||||
|                 ], |                 ], | ||||||
|                 metric_constraints=self.metric_constraints, |                 metric_constraints=self.metric_constraints, | ||||||
|                 seed=self._seed, |                 seed=self._seed, | ||||||
|                 time_budget_s=time_left, |                 time_budget_s=time_budget_s, | ||||||
|  |                 num_samples=self._max_iter, | ||||||
|                 allow_empty_config=True, |                 allow_empty_config=True, | ||||||
|             ) |             ) | ||||||
|         else: |         else: | ||||||
| @ -3032,7 +3063,7 @@ class AutoML(BaseEstimator): | |||||||
|             metric="val_loss", |             metric="val_loss", | ||||||
|             mode="min", |             mode="min", | ||||||
|             resources_per_trial=resources_per_trial, |             resources_per_trial=resources_per_trial, | ||||||
|             time_budget_s=self._state.time_budget, |             time_budget_s=time_budget_s, | ||||||
|             num_samples=self._max_iter, |             num_samples=self._max_iter, | ||||||
|             verbose=max(self.verbose - 2, 0), |             verbose=max(self.verbose - 2, 0), | ||||||
|             raise_on_failed_trial=False, |             raise_on_failed_trial=False, | ||||||
| @ -3217,6 +3248,11 @@ class AutoML(BaseEstimator): | |||||||
|                     points_to_evaluate = search_state.init_config.copy() |                     points_to_evaluate = search_state.init_config.copy() | ||||||
| 
 | 
 | ||||||
|                     low_cost_partial_config = search_state.low_cost_partial_config |                     low_cost_partial_config = search_state.low_cost_partial_config | ||||||
|  |                 time_budget_s = ( | ||||||
|  |                     min(budget_left, self._state.train_time_limit or np.inf) | ||||||
|  |                     if self._state.time_budget >= 0 | ||||||
|  |                     else None | ||||||
|  |                 ) | ||||||
|                 if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"): |                 if self._hpo_method in ("bs", "cfo", "grid", "cfocat", "random"): | ||||||
|                     algo = SearchAlgo( |                     algo = SearchAlgo( | ||||||
|                         metric="val_loss", |                         metric="val_loss", | ||||||
| @ -3234,6 +3270,8 @@ class AutoML(BaseEstimator): | |||||||
|                         metric_constraints=self.metric_constraints, |                         metric_constraints=self.metric_constraints, | ||||||
|                         seed=self._seed, |                         seed=self._seed, | ||||||
|                         allow_empty_config=True, |                         allow_empty_config=True, | ||||||
|  |                         time_budget_s=time_budget_s, | ||||||
|  |                         num_samples=self._max_iter, | ||||||
|                     ) |                     ) | ||||||
|                 else: |                 else: | ||||||
|                     # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match |                     # if self._hpo_method is bo, sometimes the search space and the initial config dimension do not match | ||||||
| @ -3272,7 +3310,7 @@ class AutoML(BaseEstimator): | |||||||
|             analysis = tune.run( |             analysis = tune.run( | ||||||
|                 search_state.training_function, |                 search_state.training_function, | ||||||
|                 search_alg=search_state.search_alg, |                 search_alg=search_state.search_alg, | ||||||
|                 time_budget_s=min(budget_left, self._state.train_time_limit), |                 time_budget_s=time_budget_s, | ||||||
|                 verbose=max(self.verbose - 3, 0), |                 verbose=max(self.verbose - 3, 0), | ||||||
|                 use_ray=False, |                 use_ray=False, | ||||||
|             ) |             ) | ||||||
| @ -3408,7 +3446,7 @@ class AutoML(BaseEstimator): | |||||||
|                 est_retrain_time = 0 |                 est_retrain_time = 0 | ||||||
|             self._state.time_from_start = time.time() - self._start_time_flag |             self._state.time_from_start = time.time() - self._start_time_flag | ||||||
|             if ( |             if ( | ||||||
|                 self._state.time_from_start >= self._state.time_budget |                 self._state.time_from_start >= self._state.time_budget >= 0 | ||||||
|                 or not self._active_estimators |                 or not self._active_estimators | ||||||
|             ): |             ): | ||||||
|                 break |                 break | ||||||
| @ -3581,17 +3619,18 @@ class AutoML(BaseEstimator): | |||||||
|             elif self._state.retrain_final: |             elif self._state.retrain_final: | ||||||
|                 # reset time budget for retraining |                 # reset time budget for retraining | ||||||
|                 if self._max_iter > 1: |                 if self._max_iter > 1: | ||||||
|                     self._state.time_from_start -= self._state.time_budget |                     self._state.time_budget = -1 | ||||||
|                 if ( |                 if ( | ||||||
|                     self._state.task in TS_FORECAST |                     self._state.task in TS_FORECAST | ||||||
|                     or self._trained_estimator is None |                     or self._trained_estimator is None | ||||||
|                     or self._trained_estimator.model is None |                     or self._trained_estimator.model is None | ||||||
|                     or ( |                     or ( | ||||||
|                         self._state.time_budget - self._state.time_from_start |                         self._state.time_budget < 0 | ||||||
|  |                         or self._state.time_budget - self._state.time_from_start | ||||||
|                         > self._selected.est_retrain_time(self.data_size_full) |                         > self._selected.est_retrain_time(self.data_size_full) | ||||||
|  |                     ) | ||||||
|                     and self._selected.best_config_sample_size |                     and self._selected.best_config_sample_size | ||||||
|                     == self._state.data_size[0] |                     == self._state.data_size[0] | ||||||
|                     ) |  | ||||||
|                 ): |                 ): | ||||||
|                     state = self._search_states[self._best_estimator] |                     state = self._search_states[self._best_estimator] | ||||||
|                     ( |                     ( | ||||||
| @ -3638,7 +3677,8 @@ class AutoML(BaseEstimator): | |||||||
|             ):  # sample_size=None meaning no result |             ):  # sample_size=None meaning no result | ||||||
|                 search_state = self._search_states[estimator] |                 search_state = self._search_states[estimator] | ||||||
|                 if ( |                 if ( | ||||||
|                     self._search_states[estimator].time2eval_best |                     self._state.time_budget >= 0 | ||||||
|  |                     and self._search_states[estimator].time2eval_best | ||||||
|                     > self._state.time_budget - self._state.time_from_start |                     > self._state.time_budget - self._state.time_from_start | ||||||
|                     or self._iter_per_learner_fullsize[estimator] |                     or self._iter_per_learner_fullsize[estimator] | ||||||
|                     >= self._max_iter_per_learner |                     >= self._max_iter_per_learner | ||||||
| @ -3646,7 +3686,10 @@ class AutoML(BaseEstimator): | |||||||
|                     inv.append(0) |                     inv.append(0) | ||||||
|                     continue |                     continue | ||||||
|                 estimated_cost = search_state.estimated_cost4improvement |                 estimated_cost = search_state.estimated_cost4improvement | ||||||
|                 if search_state.sample_size < self._state.data_size[0]: |                 if ( | ||||||
|  |                     search_state.sample_size < self._state.data_size[0] | ||||||
|  |                     and self._state.time_budget >= 0 | ||||||
|  |                 ): | ||||||
|                     estimated_cost = min( |                     estimated_cost = min( | ||||||
|                         estimated_cost, |                         estimated_cost, | ||||||
|                         search_state.time2eval_best |                         search_state.time2eval_best | ||||||
|  | |||||||
							
								
								
									
										17
									
								
								flaml/ml.py
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								flaml/ml.py
									
									
									
									
									
								
							| @ -432,6 +432,7 @@ def get_val_loss( | |||||||
|     budget=None, |     budget=None, | ||||||
|     log_training_metric=False, |     log_training_metric=False, | ||||||
|     fit_kwargs={}, |     fit_kwargs={}, | ||||||
|  |     free_mem_ratio=0, | ||||||
| ): | ): | ||||||
| 
 | 
 | ||||||
|     start = time.time() |     start = time.time() | ||||||
| @ -439,7 +440,7 @@ def get_val_loss( | |||||||
|     #     fit_kwargs['groups_val'] = groups_val |     #     fit_kwargs['groups_val'] = groups_val | ||||||
|     #     fit_kwargs['X_val'] = X_val |     #     fit_kwargs['X_val'] = X_val | ||||||
|     #     fit_kwargs['y_val'] = y_val |     #     fit_kwargs['y_val'] = y_val | ||||||
|     estimator.fit(X_train, y_train, budget, **fit_kwargs) |     estimator.fit(X_train, y_train, budget, free_mem_ratio, **fit_kwargs) | ||||||
|     val_loss, metric_for_logging, pred_time, _ = _eval_estimator( |     val_loss, metric_for_logging, pred_time, _ = _eval_estimator( | ||||||
|         config, |         config, | ||||||
|         estimator, |         estimator, | ||||||
| @ -494,6 +495,7 @@ def evaluate_model_CV( | |||||||
|     cv_score_agg_func=None, |     cv_score_agg_func=None, | ||||||
|     log_training_metric=False, |     log_training_metric=False, | ||||||
|     fit_kwargs={}, |     fit_kwargs={}, | ||||||
|  |     free_mem_ratio=0, | ||||||
| ): | ): | ||||||
|     if cv_score_agg_func is None: |     if cv_score_agg_func is None: | ||||||
|         cv_score_agg_func = default_cv_score_agg_func |         cv_score_agg_func = default_cv_score_agg_func | ||||||
| @ -524,7 +526,7 @@ def evaluate_model_CV( | |||||||
|     else: |     else: | ||||||
|         kf = kf.split(X_train_split) |         kf = kf.split(X_train_split) | ||||||
|     rng = np.random.RandomState(2020) |     rng = np.random.RandomState(2020) | ||||||
|     budget_per_train = budget / n |     budget_per_train = budget and budget / n | ||||||
|     if "sample_weight" in fit_kwargs: |     if "sample_weight" in fit_kwargs: | ||||||
|         weight = fit_kwargs["sample_weight"] |         weight = fit_kwargs["sample_weight"] | ||||||
|         weight_val = None |         weight_val = None | ||||||
| @ -565,6 +567,7 @@ def evaluate_model_CV( | |||||||
|             budget_per_train, |             budget_per_train, | ||||||
|             log_training_metric=log_training_metric, |             log_training_metric=log_training_metric, | ||||||
|             fit_kwargs=fit_kwargs, |             fit_kwargs=fit_kwargs, | ||||||
|  |             free_mem_ratio=free_mem_ratio, | ||||||
|         ) |         ) | ||||||
|         if isinstance(metric_i, dict) and "intermediate_results" in metric_i.keys(): |         if isinstance(metric_i, dict) and "intermediate_results" in metric_i.keys(): | ||||||
|             del metric_i["intermediate_results"] |             del metric_i["intermediate_results"] | ||||||
| @ -575,7 +578,7 @@ def evaluate_model_CV( | |||||||
|         log_metric_folds.append(metric_i) |         log_metric_folds.append(metric_i) | ||||||
|         train_time += train_time_i |         train_time += train_time_i | ||||||
|         pred_time += pred_time_i |         pred_time += pred_time_i | ||||||
|         if time.time() - start_time >= budget: |         if budget and time.time() - start_time >= budget: | ||||||
|             break |             break | ||||||
|     val_loss, metric = cv_score_agg_func(val_loss_folds, log_metric_folds) |     val_loss, metric = cv_score_agg_func(val_loss_folds, log_metric_folds) | ||||||
|     n = total_fold_num |     n = total_fold_num | ||||||
| @ -603,6 +606,7 @@ def compute_estimator( | |||||||
|     cv_score_agg_func=None, |     cv_score_agg_func=None, | ||||||
|     log_training_metric=False, |     log_training_metric=False, | ||||||
|     fit_kwargs={}, |     fit_kwargs={}, | ||||||
|  |     free_mem_ratio=0, | ||||||
| ): | ): | ||||||
|     estimator_class = estimator_class or get_estimator_class(task, estimator_name) |     estimator_class = estimator_class or get_estimator_class(task, estimator_name) | ||||||
|     estimator = estimator_class( |     estimator = estimator_class( | ||||||
| @ -635,6 +639,7 @@ def compute_estimator( | |||||||
|             budget=budget, |             budget=budget, | ||||||
|             log_training_metric=log_training_metric, |             log_training_metric=log_training_metric, | ||||||
|             fit_kwargs=fit_kwargs, |             fit_kwargs=fit_kwargs, | ||||||
|  |             free_mem_ratio=0, | ||||||
|         ) |         ) | ||||||
|     else: |     else: | ||||||
|         val_loss, metric_for_logging, train_time, pred_time = evaluate_model_CV( |         val_loss, metric_for_logging, train_time, pred_time = evaluate_model_CV( | ||||||
| @ -650,6 +655,7 @@ def compute_estimator( | |||||||
|             cv_score_agg_func, |             cv_score_agg_func, | ||||||
|             log_training_metric=log_training_metric, |             log_training_metric=log_training_metric, | ||||||
|             fit_kwargs=fit_kwargs, |             fit_kwargs=fit_kwargs, | ||||||
|  |             free_mem_ratio=0, | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     if isinstance(estimator, TransformersEstimator): |     if isinstance(estimator, TransformersEstimator): | ||||||
| @ -669,6 +675,7 @@ def train_estimator( | |||||||
|     budget=None, |     budget=None, | ||||||
|     fit_kwargs={}, |     fit_kwargs={}, | ||||||
|     eval_metric=None, |     eval_metric=None, | ||||||
|  |     free_mem_ratio=0, | ||||||
| ): | ): | ||||||
|     start_time = time.time() |     start_time = time.time() | ||||||
|     estimator_class = estimator_class or get_estimator_class(task, estimator_name) |     estimator_class = estimator_class or get_estimator_class(task, estimator_name) | ||||||
| @ -681,7 +688,9 @@ def train_estimator( | |||||||
|         fit_kwargs["metric"] = eval_metric |         fit_kwargs["metric"] = eval_metric | ||||||
| 
 | 
 | ||||||
|     if X_train is not None: |     if X_train is not None: | ||||||
|         train_time = estimator.fit(X_train, y_train, budget, **fit_kwargs) |         train_time = estimator.fit( | ||||||
|  |             X_train, y_train, budget, free_mem_ratio, **fit_kwargs | ||||||
|  |         ) | ||||||
|     else: |     else: | ||||||
|         estimator = estimator.estimator_class(**estimator.params) |         estimator = estimator.estimator_class(**estimator.params) | ||||||
|     train_time = time.time() - start_time |     train_time = time.time() - start_time | ||||||
|  | |||||||
| @ -44,7 +44,7 @@ except ImportError: | |||||||
|     resource = None |     resource = None | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger("flaml.automl") | logger = logging.getLogger("flaml.automl") | ||||||
| FREE_MEM_RATIO = 0.2 | # FREE_MEM_RATIO = 0.2 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def TimeoutHandler(sig, frame): | def TimeoutHandler(sig, frame): | ||||||
| @ -201,13 +201,14 @@ class BaseEstimator: | |||||||
|         self._model = model |         self._model = model | ||||||
|         return train_time |         return train_time | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         """Train the model from given training data. |         """Train the model from given training data. | ||||||
| 
 | 
 | ||||||
|         Args: |         Args: | ||||||
|             X_train: A numpy array or a dataframe of training data in shape n*m. |             X_train: A numpy array or a dataframe of training data in shape n*m. | ||||||
|             y_train: A numpy array or a series of labels in shape n*1. |             y_train: A numpy array or a series of labels in shape n*1. | ||||||
|             budget: A float of the time budget in seconds. |             budget: A float of the time budget in seconds. | ||||||
|  |             free_mem_ratio: A float between 0 and 1 for the free memory ratio to keep during training. | ||||||
| 
 | 
 | ||||||
|         Returns: |         Returns: | ||||||
|             train_time: A float of the training time in seconds. |             train_time: A float of the training time in seconds. | ||||||
| @ -221,7 +222,7 @@ class BaseEstimator: | |||||||
|             mem = psutil.virtual_memory() if psutil is not None else None |             mem = psutil.virtual_memory() if psutil is not None else None | ||||||
|             try: |             try: | ||||||
|                 with limit_resource( |                 with limit_resource( | ||||||
|                     mem.available * (1 - FREE_MEM_RATIO) |                     mem.available * (1 - free_mem_ratio) | ||||||
|                     + psutil.Process(os.getpid()).memory_info().rss |                     + psutil.Process(os.getpid()).memory_info().rss | ||||||
|                     if mem is not None |                     if mem is not None | ||||||
|                     else -1, |                     else -1, | ||||||
| @ -596,6 +597,7 @@ class TransformersEstimator(BaseEstimator): | |||||||
|         X_train: DataFrame, |         X_train: DataFrame, | ||||||
|         y_train: Series, |         y_train: Series, | ||||||
|         budget=None, |         budget=None, | ||||||
|  |         free_mem_ratio=0, | ||||||
|         X_val=None, |         X_val=None, | ||||||
|         y_val=None, |         y_val=None, | ||||||
|         gpu_per_trial=None, |         gpu_per_trial=None, | ||||||
| @ -1036,7 +1038,7 @@ class LGBMEstimator(BaseEstimator): | |||||||
|         self._time_per_iter = None |         self._time_per_iter = None | ||||||
|         self._train_size = 0 |         self._train_size = 0 | ||||||
|         self._mem_per_iter = -1 |         self._mem_per_iter = -1 | ||||||
|         self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0) is not None |         self.HAS_CALLBACK = self.HAS_CALLBACK and self._callbacks(0, 0, 0) is not None | ||||||
| 
 | 
 | ||||||
|     def _preprocess(self, X): |     def _preprocess(self, X): | ||||||
|         if ( |         if ( | ||||||
| @ -1054,7 +1056,7 @@ class LGBMEstimator(BaseEstimator): | |||||||
|             X = X.to_numpy() |             X = X.to_numpy() | ||||||
|         return X |         return X | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         start_time = time.time() |         start_time = time.time() | ||||||
|         deadline = start_time + budget if budget else np.inf |         deadline = start_time + budget if budget else np.inf | ||||||
|         n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER) |         n_iter = self.params.get(self.ITER_HP, self.DEFAULT_ITER) | ||||||
| @ -1118,7 +1120,7 @@ class LGBMEstimator(BaseEstimator): | |||||||
|                     ) |                     ) | ||||||
|                     if budget is not None |                     if budget is not None | ||||||
|                     else n_iter, |                     else n_iter, | ||||||
|                     int((1 - FREE_MEM_RATIO) * mem0 / self._mem_per_iter) |                     int((1 - free_mem_ratio) * mem0 / self._mem_per_iter) | ||||||
|                     if psutil is not None and self._mem_per_iter > 0 |                     if psutil is not None and self._mem_per_iter > 0 | ||||||
|                     else n_iter, |                     else n_iter, | ||||||
|                 ) |                 ) | ||||||
| @ -1129,10 +1131,12 @@ class LGBMEstimator(BaseEstimator): | |||||||
|         if self.HAS_CALLBACK: |         if self.HAS_CALLBACK: | ||||||
|             kwargs_callbacks = kwargs.get("callbacks") |             kwargs_callbacks = kwargs.get("callbacks") | ||||||
|             if kwargs_callbacks: |             if kwargs_callbacks: | ||||||
|                 callbacks = kwargs_callbacks + self._callbacks(start_time, deadline) |                 callbacks = kwargs_callbacks + self._callbacks( | ||||||
|  |                     start_time, deadline, free_mem_ratio | ||||||
|  |                 ) | ||||||
|                 kwargs.pop("callbacks") |                 kwargs.pop("callbacks") | ||||||
|             else: |             else: | ||||||
|                 callbacks = self._callbacks(start_time, deadline) |                 callbacks = self._callbacks(start_time, deadline, free_mem_ratio) | ||||||
|             if isinstance(self, XGBoostSklearnEstimator): |             if isinstance(self, XGBoostSklearnEstimator): | ||||||
|                 from xgboost import __version__ |                 from xgboost import __version__ | ||||||
| 
 | 
 | ||||||
| @ -1162,10 +1166,10 @@ class LGBMEstimator(BaseEstimator): | |||||||
|         train_time = time.time() - start_time |         train_time = time.time() - start_time | ||||||
|         return train_time |         return train_time | ||||||
| 
 | 
 | ||||||
|     def _callbacks(self, start_time, deadline) -> List[Callable]: |     def _callbacks(self, start_time, deadline, free_mem_ratio) -> List[Callable]: | ||||||
|         return [partial(self._callback, start_time, deadline)] |         return [partial(self._callback, start_time, deadline, free_mem_ratio)] | ||||||
| 
 | 
 | ||||||
|     def _callback(self, start_time, deadline, env) -> None: |     def _callback(self, start_time, deadline, free_mem_ratio, env) -> None: | ||||||
|         from lightgbm.callback import EarlyStopException |         from lightgbm.callback import EarlyStopException | ||||||
| 
 | 
 | ||||||
|         now = time.time() |         now = time.time() | ||||||
| @ -1175,7 +1179,7 @@ class LGBMEstimator(BaseEstimator): | |||||||
|             raise EarlyStopException(env.iteration, env.evaluation_result_list) |             raise EarlyStopException(env.iteration, env.evaluation_result_list) | ||||||
|         if psutil is not None: |         if psutil is not None: | ||||||
|             mem = psutil.virtual_memory() |             mem = psutil.virtual_memory() | ||||||
|             if mem.available / mem.total < FREE_MEM_RATIO: |             if mem.available / mem.total < free_mem_ratio: | ||||||
|                 raise EarlyStopException(env.iteration, env.evaluation_result_list) |                 raise EarlyStopException(env.iteration, env.evaluation_result_list) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -1260,7 +1264,7 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|         super().__init__(task, **config) |         super().__init__(task, **config) | ||||||
|         self.params["verbosity"] = 0 |         self.params["verbosity"] = 0 | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         import xgboost as xgb |         import xgboost as xgb | ||||||
| 
 | 
 | ||||||
|         start_time = time.time() |         start_time = time.time() | ||||||
| @ -1284,7 +1288,7 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|             if "objective" in self.params: |             if "objective" in self.params: | ||||||
|                 del self.params["objective"] |                 del self.params["objective"] | ||||||
|         _n_estimators = self.params.pop("n_estimators") |         _n_estimators = self.params.pop("n_estimators") | ||||||
|         callbacks = XGBoostEstimator._callbacks(start_time, deadline) |         callbacks = XGBoostEstimator._callbacks(start_time, deadline, free_mem_ratio) | ||||||
|         if callbacks: |         if callbacks: | ||||||
|             self._model = xgb.train( |             self._model = xgb.train( | ||||||
|                 self.params, |                 self.params, | ||||||
| @ -1311,7 +1315,7 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|         return super().predict(dtest, **kwargs) |         return super().predict(dtest, **kwargs) | ||||||
| 
 | 
 | ||||||
|     @classmethod |     @classmethod | ||||||
|     def _callbacks(cls, start_time, deadline): |     def _callbacks(cls, start_time, deadline, free_mem_ratio): | ||||||
|         try: |         try: | ||||||
|             from xgboost.callback import TrainingCallback |             from xgboost.callback import TrainingCallback | ||||||
|         except ImportError:  # for xgboost<1.3 |         except ImportError:  # for xgboost<1.3 | ||||||
| @ -1326,7 +1330,7 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|                     return True |                     return True | ||||||
|                 if psutil is not None: |                 if psutil is not None: | ||||||
|                     mem = psutil.virtual_memory() |                     mem = psutil.virtual_memory() | ||||||
|                     if mem.available / mem.total < FREE_MEM_RATIO: |                     if mem.available / mem.total < free_mem_ratio: | ||||||
|                         return True |                         return True | ||||||
|                 return False |                 return False | ||||||
| 
 | 
 | ||||||
| @ -1374,17 +1378,17 @@ class XGBoostSklearnEstimator(SKLearnEstimator, LGBMEstimator): | |||||||
|             self.estimator_class = xgb.XGBClassifier |             self.estimator_class = xgb.XGBClassifier | ||||||
|         self._xgb_version = xgb.__version__ |         self._xgb_version = xgb.__version__ | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         if issparse(X_train) and self._xgb_version < "1.6.0": |         if issparse(X_train) and self._xgb_version < "1.6.0": | ||||||
|             # "auto" fails for sparse input since xgboost 1.6.0 |             # "auto" fails for sparse input since xgboost 1.6.0 | ||||||
|             self.params["tree_method"] = "auto" |             self.params["tree_method"] = "auto" | ||||||
|         if kwargs.get("gpu_per_trial"): |         if kwargs.get("gpu_per_trial"): | ||||||
|             self.params["tree_method"] = "gpu_hist" |             self.params["tree_method"] = "gpu_hist" | ||||||
|             kwargs.pop("gpu_per_trial") |             kwargs.pop("gpu_per_trial") | ||||||
|         return super().fit(X_train, y_train, budget, **kwargs) |         return super().fit(X_train, y_train, budget, free_mem_ratio, **kwargs) | ||||||
| 
 | 
 | ||||||
|     def _callbacks(self, start_time, deadline) -> List[Callable]: |     def _callbacks(self, start_time, deadline, free_mem_ratio) -> List[Callable]: | ||||||
|         return XGBoostEstimator._callbacks(start_time, deadline) |         return XGBoostEstimator._callbacks(start_time, deadline, free_mem_ratio) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class XGBoostLimitDepthEstimator(XGBoostSklearnEstimator): | class XGBoostLimitDepthEstimator(XGBoostSklearnEstimator): | ||||||
| @ -1459,6 +1463,8 @@ class RandomForestEstimator(SKLearnEstimator, LGBMEstimator): | |||||||
|             ) |             ) | ||||||
|         if self._task not in CLASSIFICATION and "criterion" in config: |         if self._task not in CLASSIFICATION and "criterion" in config: | ||||||
|             params.pop("criterion") |             params.pop("criterion") | ||||||
|  |         if "random_state" not in params: | ||||||
|  |             params["random_state"] = 12032022 | ||||||
|         return params |         return params | ||||||
| 
 | 
 | ||||||
|     def __init__( |     def __init__( | ||||||
| @ -1627,7 +1633,7 @@ class CatBoostEstimator(BaseEstimator): | |||||||
| 
 | 
 | ||||||
|             self.estimator_class = CatBoostClassifier |             self.estimator_class = CatBoostClassifier | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         start_time = time.time() |         start_time = time.time() | ||||||
|         deadline = start_time + budget if budget else np.inf |         deadline = start_time + budget if budget else np.inf | ||||||
|         train_dir = f"catboost_{str(start_time)}" |         train_dir = f"catboost_{str(start_time)}" | ||||||
| @ -1665,7 +1671,7 @@ class CatBoostEstimator(BaseEstimator): | |||||||
|                 cat_features=cat_features, |                 cat_features=cat_features, | ||||||
|                 eval_set=eval_set, |                 eval_set=eval_set, | ||||||
|                 callbacks=CatBoostEstimator._callbacks( |                 callbacks=CatBoostEstimator._callbacks( | ||||||
|                     start_time, deadline, FREE_MEM_RATIO if use_best_model else None |                     start_time, deadline, free_mem_ratio if use_best_model else None | ||||||
|                 ), |                 ), | ||||||
|                 **kwargs, |                 **kwargs, | ||||||
|             ) |             ) | ||||||
| @ -1791,7 +1797,7 @@ class Prophet(SKLearnEstimator): | |||||||
|         train_df = X_train.join(y_train) |         train_df = X_train.join(y_train) | ||||||
|         return train_df |         return train_df | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         from prophet import Prophet |         from prophet import Prophet | ||||||
| 
 | 
 | ||||||
|         current_time = time.time() |         current_time = time.time() | ||||||
| @ -1869,7 +1875,7 @@ class ARIMA(Prophet): | |||||||
|         train_df = train_df.drop(TS_TIMESTAMP_COL, axis=1) |         train_df = train_df.drop(TS_TIMESTAMP_COL, axis=1) | ||||||
|         return train_df |         return train_df | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         import warnings |         import warnings | ||||||
| 
 | 
 | ||||||
|         warnings.filterwarnings("ignore") |         warnings.filterwarnings("ignore") | ||||||
| @ -1969,7 +1975,7 @@ class SARIMAX(ARIMA): | |||||||
|         } |         } | ||||||
|         return space |         return space | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         import warnings |         import warnings | ||||||
| 
 | 
 | ||||||
|         warnings.filterwarnings("ignore") |         warnings.filterwarnings("ignore") | ||||||
| @ -2094,7 +2100,7 @@ class TS_SKLearn(SKLearnEstimator): | |||||||
|             model = self.hcrystaball_model.model.fit(X_fit, y_fit) |             model = self.hcrystaball_model.model.fit(X_fit, y_fit) | ||||||
|             self._model = model |             self._model = model | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         current_time = time.time() |         current_time = time.time() | ||||||
|         self._fit(X_train, y_train, budget=budget, **kwargs) |         self._fit(X_train, y_train, budget=budget, **kwargs) | ||||||
|         train_time = time.time() - current_time |         train_time = time.time() - current_time | ||||||
| @ -2266,11 +2272,10 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator): | |||||||
| 
 | 
 | ||||||
|         return training, train_dataloader, val_dataloader |         return training, train_dataloader, val_dataloader | ||||||
| 
 | 
 | ||||||
|     def fit(self, X_train, y_train, budget=None, **kwargs): |     def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): | ||||||
|         import warnings |         import warnings | ||||||
|         import pytorch_lightning as pl |         import pytorch_lightning as pl | ||||||
|         from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor |         from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor | ||||||
|         from pytorch_lightning.loggers import TensorBoardLogger |  | ||||||
|         import torch |         import torch | ||||||
|         from pytorch_forecasting import TemporalFusionTransformer |         from pytorch_forecasting import TemporalFusionTransformer | ||||||
|         from pytorch_forecasting.metrics import QuantileLoss |         from pytorch_forecasting.metrics import QuantileLoss | ||||||
| @ -2287,7 +2292,6 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator): | |||||||
|         early_stop_callback = EarlyStopping( |         early_stop_callback = EarlyStopping( | ||||||
|             monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min" |             monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min" | ||||||
|         ) |         ) | ||||||
|         lr_logger = LearningRateMonitor()  # log the learning rate |  | ||||||
| 
 | 
 | ||||||
|         def _fit(log): |         def _fit(log): | ||||||
|             default_trainer_kwargs = dict( |             default_trainer_kwargs = dict( | ||||||
| @ -2296,7 +2300,9 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator): | |||||||
|                 else None, |                 else None, | ||||||
|                 max_epochs=max_epochs, |                 max_epochs=max_epochs, | ||||||
|                 gradient_clip_val=gradient_clip_val, |                 gradient_clip_val=gradient_clip_val, | ||||||
|                 callbacks=[lr_logger, early_stop_callback] if log else False, |                 callbacks=[LearningRateMonitor(), early_stop_callback] | ||||||
|  |                 if log | ||||||
|  |                 else [early_stop_callback], | ||||||
|                 logger=log, |                 logger=log, | ||||||
|             ) |             ) | ||||||
|             trainer = pl.Trainer( |             trainer = pl.Trainer( | ||||||
| @ -2308,7 +2314,7 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator): | |||||||
|                 lstm_layers=2,  # 2 is mostly optimal according to documentation |                 lstm_layers=2,  # 2 is mostly optimal according to documentation | ||||||
|                 output_size=7,  # 7 quantiles by default |                 output_size=7,  # 7 quantiles by default | ||||||
|                 loss=QuantileLoss(), |                 loss=QuantileLoss(), | ||||||
|                 log_interval=10, |                 log_interval=10 if log else 0, | ||||||
|                 # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches |                 # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches | ||||||
|                 reduce_on_plateau_patience=4, |                 reduce_on_plateau_patience=4, | ||||||
|             ) |             ) | ||||||
| @ -2320,12 +2326,14 @@ class TemporalFusionTransformerEstimator(SKLearnEstimator): | |||||||
|             ) |             ) | ||||||
|             return trainer |             return trainer | ||||||
| 
 | 
 | ||||||
|         try: |         # try: | ||||||
|             logger = TensorBoardLogger( |         #     from pytorch_lightning.loggers import TensorBoardLogger | ||||||
|                 kwargs.get("log_dir", "lightning_logs") | 
 | ||||||
|             )  # logging results to a tensorboard |         #     logger = TensorBoardLogger( | ||||||
|             trainer = _fit(log=logger) |         #         kwargs.get("log_dir", "lightning_logs") | ||||||
|         except ValueError: |         #     )  # logging results to a tensorboard | ||||||
|  |         #     trainer = _fit(log=logger) | ||||||
|  |         # except ValueError: | ||||||
|         # issue with pytorch forecasting model log_prediction() function |         # issue with pytorch forecasting model log_prediction() function | ||||||
|         # pytorch-forecasting issue #1145 |         # pytorch-forecasting issue #1145 | ||||||
|         trainer = _fit(log=False) |         trainer = _fit(log=False) | ||||||
|  | |||||||
| @ -146,7 +146,6 @@ class BlendSearch(Searcher): | |||||||
|                 self.cost_attr = None |                 self.cost_attr = None | ||||||
|         else: |         else: | ||||||
|             self.cost_attr = cost_attr |             self.cost_attr = cost_attr | ||||||
| 
 |  | ||||||
|         self.penalty = PENALTY  # penalty term for constraints |         self.penalty = PENALTY  # penalty term for constraints | ||||||
|         self._metric, self._mode = metric, mode |         self._metric, self._mode = metric, mode | ||||||
|         self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation |         self._use_incumbent_result_in_evaluation = use_incumbent_result_in_evaluation | ||||||
| @ -310,7 +309,7 @@ class BlendSearch(Searcher): | |||||||
|                 self._time_used += now - self._start_time |                 self._time_used += now - self._start_time | ||||||
|                 self._start_time = now |                 self._start_time = now | ||||||
|                 self._set_deadline() |                 self._set_deadline() | ||||||
|                 if self._input_cost_attr == "auto": |                 if self._input_cost_attr == "auto" and self._time_budget_s: | ||||||
|                     self.cost_attr = self._ls.cost_attr = TIME_TOTAL_S |                     self.cost_attr = self._ls.cost_attr = TIME_TOTAL_S | ||||||
|             if "metric_target" in spec: |             if "metric_target" in spec: | ||||||
|                 self._metric_target = spec.get("metric_target") |                 self._metric_target = spec.get("metric_target") | ||||||
|  | |||||||
| @ -1 +1 @@ | |||||||
| __version__ = "1.0.14" | __version__ = "1.1.0" | ||||||
|  | |||||||
| @ -38,10 +38,10 @@ | |||||||
|    "metadata": {}, |    "metadata": {}, | ||||||
|    "outputs": [], |    "outputs": [], | ||||||
|    "source": [ |    "source": [ | ||||||
|     "%pip install flaml[notebook] openml==0.10.2\n", |     "%pip install flaml[notebook]\n", | ||||||
|     "# From v0.6.6, catboost is made an optional dependency to build conda package.\n", |     "# From v0.6.6, catboost is made an optional dependency to build conda package.\n", | ||||||
|     "# To install catboost, you can run:\n", |     "# To install catboost, you can run:\n", | ||||||
|     "%pip install flaml[catboost]" |     "# %pip install flaml[catboost]" | ||||||
|    ] |    ] | ||||||
|   }, |   }, | ||||||
|   { |   { | ||||||
| @ -112,7 +112,7 @@ | |||||||
|    "source": [ |    "source": [ | ||||||
|     "settings = {\n", |     "settings = {\n", | ||||||
|     "    \"time_budget\": 600,  # total running time in seconds\n", |     "    \"time_budget\": 600,  # total running time in seconds\n", | ||||||
|     "    \"metric\": 'accuracy', ", |     "    \"metric\": 'accuracy', \n", | ||||||
|     "                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)\n", |     "                        # check the documentation for options of metrics (https://microsoft.github.io/FLAML/docs/Use-Cases/Task-Oriented-AutoML#optimization-metric)\n", | ||||||
|     "    \"task\": 'classification',  # task type\n", |     "    \"task\": 'classification',  # task type\n", | ||||||
|     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n", |     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n", | ||||||
| @ -1269,7 +1269,7 @@ | |||||||
|  ], |  ], | ||||||
|  "metadata": { |  "metadata": { | ||||||
|   "kernelspec": { |   "kernelspec": { | ||||||
|    "display_name": "Python 3.9.7 ('base')", |    "display_name": "Python 3.9.15 64-bit", | ||||||
|    "language": "python", |    "language": "python", | ||||||
|    "name": "python3" |    "name": "python3" | ||||||
|   }, |   }, | ||||||
| @ -1283,11 +1283,11 @@ | |||||||
|    "name": "python", |    "name": "python", | ||||||
|    "nbconvert_exporter": "python", |    "nbconvert_exporter": "python", | ||||||
|    "pygments_lexer": "ipython3", |    "pygments_lexer": "ipython3", | ||||||
|    "version": "3.9.7" |    "version": "3.9.15" | ||||||
|   }, |   }, | ||||||
|   "vscode": { |   "vscode": { | ||||||
|    "interpreter": { |    "interpreter": { | ||||||
|     "hash": "e811209110f5aa4d8c2189eeb3ff7b9b4d146931cb9189ef6041ff71605c541d" |     "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1" | ||||||
|    } |    } | ||||||
|   } |   } | ||||||
|  }, |  }, | ||||||
|  | |||||||
							
								
								
									
										1
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
									
									
									
									
								
							| @ -42,6 +42,7 @@ setuptools.setup( | |||||||
|         "notebook": [ |         "notebook": [ | ||||||
|             "jupyter", |             "jupyter", | ||||||
|             "matplotlib", |             "matplotlib", | ||||||
|  |             "openml==0.10.2", | ||||||
|         ], |         ], | ||||||
|         "test": [ |         "test": [ | ||||||
|             "flake8>=3.8.4", |             "flake8>=3.8.4", | ||||||
|  | |||||||
| @ -380,6 +380,7 @@ class TestMultiClass(unittest.TestCase): | |||||||
|             "estimator_list": ["large_lgbm"], |             "estimator_list": ["large_lgbm"], | ||||||
|             "log_type": "all", |             "log_type": "all", | ||||||
|             "hpo_method": "random", |             "hpo_method": "random", | ||||||
|  |             "free_mem_ratio": 0.2, | ||||||
|         } |         } | ||||||
|         X_train, y_train = load_iris(return_X_y=True, as_frame=True) |         X_train, y_train = load_iris(return_X_y=True, as_frame=True) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -17,7 +17,7 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): | |||||||
|         budget = performance_check_budget  # revise the buget on macos |         budget = performance_check_budget  # revise the buget on macos | ||||||
|     if budget == performance_check_budget: |     if budget == performance_check_budget: | ||||||
|         budget = None |         budget = None | ||||||
|         max_iter = 100 |         max_iter = 60 | ||||||
|     else: |     else: | ||||||
|         max_iter = None |         max_iter = None | ||||||
|     try: |     try: | ||||||
| @ -44,6 +44,15 @@ def test_automl(budget=5, dataset_format="dataframe", hpo_method=None): | |||||||
|         "log_file_name": "airlines_experiment.log",  # flaml log file |         "log_file_name": "airlines_experiment.log",  # flaml log file | ||||||
|         "seed": 7654321,  # random seed |         "seed": 7654321,  # random seed | ||||||
|         "hpo_method": hpo_method, |         "hpo_method": hpo_method, | ||||||
|  |         "log_type": "all", | ||||||
|  |         "estimator_list": [ | ||||||
|  |             "lgbm", | ||||||
|  |             "xgboost", | ||||||
|  |             "xgb_limitdepth", | ||||||
|  |             "rf", | ||||||
|  |             "extra_tree", | ||||||
|  |         ],  # list of ML learners | ||||||
|  |         "eval_method": "holdout", | ||||||
|     } |     } | ||||||
|     """The main flaml automl API""" |     """The main flaml automl API""" | ||||||
|     automl.fit(X_train=X_train, y_train=y_train, **settings) |     automl.fit(X_train=X_train, y_train=y_train, **settings) | ||||||
| @ -130,6 +139,7 @@ def test_mlflow(): | |||||||
|         "task": "classification",  # task type |         "task": "classification",  # task type | ||||||
|         "sample": False,  # whether to subsample training data |         "sample": False,  # whether to subsample training data | ||||||
|         "log_file_name": "adult.log",  # flaml log file |         "log_file_name": "adult.log",  # flaml log file | ||||||
|  |         "learner_selector": "roundrobin", | ||||||
|     } |     } | ||||||
|     mlflow.set_experiment("flaml") |     mlflow.set_experiment("flaml") | ||||||
|     with mlflow.start_run() as run: |     with mlflow.start_run() as run: | ||||||
|  | |||||||
| @ -43,7 +43,7 @@ class TestTrainingLog(unittest.TestCase): | |||||||
|                 print(model0.params["n_estimators"], config) |                 print(model0.params["n_estimators"], config) | ||||||
| 
 | 
 | ||||||
|                 # train on full data with no time limit |                 # train on full data with no time limit | ||||||
|                 automl._state.time_budget = None |                 automl._state.time_budget = -1 | ||||||
|                 model, _ = automl._state._train_with_config(estimator, config) |                 model, _ = automl._state._train_with_config(estimator, config) | ||||||
| 
 | 
 | ||||||
|                 # assuming estimator & config are saved and loaded as follows |                 # assuming estimator & config are saved and loaded as follows | ||||||
|  | |||||||
| @ -1,5 +1,14 @@ | |||||||
| # AutoML for LightGBM | # AutoML for LightGBM | ||||||
| 
 | 
 | ||||||
|  | ### Prerequisites for this example | ||||||
|  | 
 | ||||||
|  | Install the [notebook] option. | ||||||
|  | ```bash | ||||||
|  | pip install "flaml[notebook]" | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | This option is not necessary in general. | ||||||
|  | 
 | ||||||
| ### Use built-in LGBMEstimator | ### Use built-in LGBMEstimator | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
|  | |||||||
| @ -1,5 +1,14 @@ | |||||||
| # AutoML for XGBoost | # AutoML for XGBoost | ||||||
| 
 | 
 | ||||||
|  | ### Prerequisites for this example | ||||||
|  | 
 | ||||||
|  | Install the [notebook] option. | ||||||
|  | ```bash | ||||||
|  | pip install "flaml[notebook]" | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | This option is not necessary in general. | ||||||
|  | 
 | ||||||
| ### Use built-in XGBoostSklearnEstimator | ### Use built-in XGBoostSklearnEstimator | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
|  | |||||||
| @ -1,5 +1,7 @@ | |||||||
| As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline. | As FLAML's AutoML module can be used a transformer in the Sklearn's pipeline we can get all the benefits of pipeline. | ||||||
| 
 | 
 | ||||||
|  | This example requires openml==0.10.2. | ||||||
|  | 
 | ||||||
| ### Load data | ### Load data | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
|  | |||||||
| @ -3,7 +3,7 @@ | |||||||
| ## Requirements | ## Requirements | ||||||
| 
 | 
 | ||||||
| ```python | ```python | ||||||
| pip install flaml thop torchvision torch | pip install "flaml>=1.1.0" thop torchvision torch | ||||||
| ``` | ``` | ||||||
| 
 | 
 | ||||||
| ## Tuning accurate and efficient neural networks with lexicographic preference | ## Tuning accurate and efficient neural networks with lexicographic preference | ||||||
|  | |||||||
| @ -12,7 +12,7 @@ | |||||||
|     - 'regression': regression with tabular data. |     - 'regression': regression with tabular data. | ||||||
|     - 'ts_forecast': time series forecasting. |     - 'ts_forecast': time series forecasting. | ||||||
|     - 'ts_forecast_classification': time series forecasting for classification. |     - 'ts_forecast_classification': time series forecasting for classification. | ||||||
|     <!-- - 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series). --> |     - 'ts_forecast_panel': time series forecasting for panel datasets (multiple time series). | ||||||
|     - 'rank': learning to rank. |     - 'rank': learning to rank. | ||||||
|     - 'seq-classification': sequence classification. |     - 'seq-classification': sequence classification. | ||||||
|     - 'seq-regression': sequence regression. |     - 'seq-regression': sequence regression. | ||||||
| @ -20,7 +20,7 @@ | |||||||
|     - 'token-classification': token classification. |     - 'token-classification': token classification. | ||||||
|     - 'multichoice-classification': multichoice classification. |     - 'multichoice-classification': multichoice classification. | ||||||
| 
 | 
 | ||||||
| Two optional inputs are `time_budget` and `max_iter` for searching models and hyperparameters. When both are unspecified, only one model per estimator will be trained (using our [zero-shot](Zero-Shot-AutoML) technique). | Two optional inputs are `time_budget` and `max_iter` for searching models and hyperparameters. When both are unspecified, only one model per estimator will be trained (using our [zero-shot](Zero-Shot-AutoML) technique). When `time_budget` is provided, there can be randomness in the result due to runtime variance. | ||||||
| 
 | 
 | ||||||
| A typical way to use `flaml.AutoML`: | A typical way to use `flaml.AutoML`: | ||||||
| 
 | 
 | ||||||
| @ -112,9 +112,12 @@ The estimator list can contain one or more estimator names, each corresponding t | |||||||
| #### Estimator | #### Estimator | ||||||
| * Built-in estimator. | * Built-in estimator. | ||||||
|     - 'lgbm': LGBMEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, num_leaves, min_child_samples, learning_rate, log_max_bin (logarithm of (max_bin + 1) with base 2), colsample_bytree, reg_alpha, reg_lambda. |     - 'lgbm': LGBMEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, num_leaves, min_child_samples, learning_rate, log_max_bin (logarithm of (max_bin + 1) with base 2), colsample_bytree, reg_alpha, reg_lambda. | ||||||
|     - 'xgboost': XGBoostSkLearnEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_leaves, max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda. |     - 'xgboost': XGBoostSkLearnEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_leaves, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda. | ||||||
|     - 'rf': RandomForestEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). |     - 'xgb_limitdepth': XGBoostLimitDepthEstimator for task "classification", "regression", "rank", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators,  max_depth, min_child_weight, learning_rate, subsample, colsample_bylevel, colsample_bytree, reg_alpha, reg_lambda. | ||||||
|     - 'extra_tree': ExtraTreesEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). |     - 'rf': RandomForestEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0, | ||||||
|  |     it uses a fixed ranndom_state by default. | ||||||
|  |     - 'extra_tree': ExtraTreesEstimator for task "classification", "regression", "ts_forecast" and "ts_forecast_classification". Hyperparameters: n_estimators, max_features, max_leaves, criterion (for classification only). Starting from v1.1.0, | ||||||
|  |     it uses a fixed ranndom_state by default. | ||||||
|     - 'lrl1': LRL1Classifier (sklearn.LogisticRegression with L1 regularization) for task "classification". Hyperparameters: C. |     - 'lrl1': LRL1Classifier (sklearn.LogisticRegression with L1 regularization) for task "classification". Hyperparameters: C. | ||||||
|     - 'lrl2': LRL2Classifier (sklearn.LogisticRegression with L2 regularization) for task "classification". Hyperparameters: C. |     - 'lrl2': LRL2Classifier (sklearn.LogisticRegression with L2 regularization) for task "classification". Hyperparameters: C. | ||||||
|     - 'catboost': CatBoostEstimator for task "classification" and "regression". Hyperparameters: early_stopping_rounds, learning_rate, n_estimators. |     - 'catboost': CatBoostEstimator for task "classification" and "regression". Hyperparameters: early_stopping_rounds, learning_rate, n_estimators. | ||||||
| @ -123,7 +126,7 @@ The estimator list can contain one or more estimator names, each corresponding t | |||||||
|     - 'arima': ARIMA for task "ts_forecast". Hyperparameters: p, d, q. |     - 'arima': ARIMA for task "ts_forecast". Hyperparameters: p, d, q. | ||||||
|     - 'sarimax': SARIMAX for task "ts_forecast". Hyperparameters: p, d, q, P, D, Q, s. |     - 'sarimax': SARIMAX for task "ts_forecast". Hyperparameters: p, d, q, P, D, Q, s. | ||||||
|     - 'transformer': Huggingface transformer models for task "seq-classification", "seq-regression", "multichoice-classification", "token-classification" and "summarization". Hyperparameters: learning_rate, num_train_epochs, per_device_train_batch_size, warmup_ratio, weight_decay, adam_epsilon, seed. |     - 'transformer': Huggingface transformer models for task "seq-classification", "seq-regression", "multichoice-classification", "token-classification" and "summarization". Hyperparameters: learning_rate, num_train_epochs, per_device_train_batch_size, warmup_ratio, weight_decay, adam_epsilon, seed. | ||||||
|     <!-- - 'temporal_fusion_transform': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate. --> |     - 'temporal_fusion_transformer': TemporalFusionTransformerEstimator for task "ts_forecast_panel". Hyperparameters: gradient_clip_val, hidden_size, hidden_continuous_size, attention_head_size, dropout, learning_rate. There is a [known issue](https://github.com/jdb78/pytorch-forecasting/issues/1145) with pytorch-forecast logging. | ||||||
| * Custom estimator. Use custom estimator for: | * Custom estimator. Use custom estimator for: | ||||||
|     - tuning an estimator that is not built-in; |     - tuning an estimator that is not built-in; | ||||||
|     - customizing search space for a built-in estimator. |     - customizing search space for a built-in estimator. | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Chi Wang
						Chi Wang