diff --git a/flaml/automl.py b/flaml/automl.py index a0315a625..08cd66519 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -104,6 +104,7 @@ class SearchState: self.trained_estimator = None self.sample_size = None self.trial_time = 0 + self.best_n_iter = None def update(self, result, time_used, save_model_history=False): if result: @@ -430,7 +431,7 @@ class AutoML: @property def time_to_find_best_model(self) -> float: - """time taken to find best model in seconds""" + """Time taken to find best model in seconds""" return self.__dict__.get("_time_taken_best_iter") def predict(self, X_test): @@ -1768,6 +1769,17 @@ class AutoML: better = True # whether we find a better model in one trial if self._ensemble: self.best_model = {} + if self._max_iter < 2 and self.estimator_list: + # when max_iter is 1, no need to search + self._max_iter = 0 + self._best_estimator = estimator = self.estimator_list[0] + self._selected = state = self._search_states[estimator] + state.best_config_sample_size = self._state.data_size + state.best_config = ( + state.init_config + if isinstance(state.init_config, dict) + else state.init_config[0] + ) for self._track_iter in range(self._max_iter): if self._estimator_index is None: estimator = self._active_estimators[0] @@ -1844,9 +1856,9 @@ class AutoML: metric="val_loss", mode="min", space=search_space, - points_to_evaluate=points_to_evaluate - if len(search_state.init_config) == len(search_space) - else None, + points_to_evaluate=[ + p for p in points_to_evaluate if len(p) == len(search_space) + ], ) search_state.search_alg = ConcurrencyLimiter(algo, max_concurrent=1) # search_state.search_alg = algo diff --git a/flaml/model.py b/flaml/model.py index 385bef499..740515507 100644 --- a/flaml/model.py +++ b/flaml/model.py @@ -465,7 +465,7 @@ class XGBoostEstimator(SKLearnEstimator): def predict(self, X_test): import xgboost as xgb - + if not issparse(X_test): X_test = self._preprocess(X_test) dtest = xgb.DMatrix(X_test) diff --git a/test/test_automl.py b/test/test_automl.py index 97297c8b3..7ccbda76b 100644 --- a/test/test_automl.py +++ b/test/test_automl.py @@ -31,7 +31,7 @@ class MyRegularizedGreedyForest(SKLearnEstimator): self.estimator_class = RGFClassifier else: from rgf.sklearn import RGFRegressor - + self.estimator_class = RGFRegressor @classmethod diff --git a/test/test_training_log.py b/test/test_training_log.py index 73de45d25..20c728c01 100644 --- a/test/test_training_log.py +++ b/test/test_training_log.py @@ -25,23 +25,49 @@ class TestTrainingLog(unittest.TestCase): "mem_thres": 1024 * 1024, "n_jobs": 1, "model_history": True, - "train_time_limit": 0.01, + "train_time_limit": 0.1, "verbose": 3, "ensemble": True, "keep_search_state": True, } X_train, y_train = fetch_california_housing(return_X_y=True) automl.fit(X_train=X_train, y_train=y_train, **automl_settings) - automl._state._train_with_config(automl.best_estimator, automl.best_config) - # Check if the training log file is populated. self.assertTrue(os.path.exists(filename)) - with training_log_reader(filename) as reader: - count = 0 - for record in reader.records(): - print(record) - count += 1 - self.assertGreater(count, 0) + if automl.best_estimator: + estimator, config = automl.best_estimator, automl.best_config + model0 = automl.best_model_for_estimator(estimator) + print(model0.estimator) + + automl.time_budget = None + model, _ = automl._state._train_with_config(estimator, config) + # model0 and model are equivalent unless model0's n_estimator is out of search space range + assert ( + str(model0.estimator) == str(model.estimator) + or model0["n_estimators"] < 4 + ) + + # assuming estimator & config are saved and loaded as follows + automl = AutoML() + automl.fit( + X_train=X_train, + y_train=y_train, + max_iter=0, + task="regression", + estimator_list=[estimator], + n_jobs=1, + starting_points={estimator: config}, + ) + # then the fitted model should be equivalent to model + # print(str(model.estimator), str(automl.model.estimator)) + assert str(model.estimator) == str(automl.model.estimator) + + with training_log_reader(filename) as reader: + count = 0 + for record in reader.records(): + print(record) + count += 1 + self.assertGreater(count, 0) automl_settings["log_file_name"] = None automl.fit(X_train=X_train, y_train=y_train, **automl_settings)