mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 01:40:58 +00:00 
			
		
		
		
	Lgbm w customized obj (#64)
* add customized lgbm learner * add comments * fix format issue * format * OpenMLError * add test * add notebook Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
		
							parent
							
								
									72d17b37c2
								
							
						
					
					
						commit
						06045703bf
					
				| @ -362,7 +362,6 @@ class XGBoostEstimator(SKLearnEstimator): | ||||
|     ): | ||||
|         super().__init__(task, **params) | ||||
|         self._n_estimators = int(round(n_estimators)) | ||||
|         self._max_leaves = int(round(max_leaves)) | ||||
|         self.params = { | ||||
|             'max_leaves': int(round(max_leaves)), | ||||
|             'max_depth': params.get('max_depth', 0), | ||||
| @ -378,6 +377,7 @@ class XGBoostEstimator(SKLearnEstimator): | ||||
|             'booster': params.get('booster', 'gbtree'), | ||||
|             'colsample_bylevel': float(colsample_bylevel), | ||||
|             'colsample_bytree': float(colsample_bytree), | ||||
|             'objective': params.get("objective") | ||||
|         } | ||||
|         if all_thread: | ||||
|             del self.params['nthread'] | ||||
| @ -398,13 +398,19 @@ class XGBoostEstimator(SKLearnEstimator): | ||||
|         else: | ||||
|             dtrain = xgb.DMatrix(X_train, label=y_train) | ||||
| 
 | ||||
|         if self._max_leaves > 0: | ||||
|             self._model = xgb.train(self.params, dtrain, self._n_estimators) | ||||
|             del dtrain | ||||
|             train_time = time.time() - start_time | ||||
|             return train_time | ||||
|         objective = self.params.get('objective') | ||||
|         if isinstance(objective, str): | ||||
|             obj = None | ||||
|         else: | ||||
|             return None | ||||
|             obj = objective | ||||
|             if 'objective' in self.params: | ||||
|                 del self.params['objective'] | ||||
|         self._model = xgb.train(self.params, dtrain, self._n_estimators, | ||||
|                                 obj=obj) | ||||
|         self.params['objective'] = objective | ||||
|         del dtrain | ||||
|         train_time = time.time() - start_time | ||||
|         return train_time | ||||
| 
 | ||||
|     def predict(self, X_test): | ||||
|         if not issparse(X_test): | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| __version__ = "0.3.0" | ||||
| __version__ = "0.3.1" | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -7,7 +7,7 @@ from sklearn.datasets import load_boston, load_iris, load_wine | ||||
| from flaml import AutoML | ||||
| from flaml.data import get_output_from_log | ||||
| 
 | ||||
| from flaml.model import SKLearnEstimator | ||||
| from flaml.model import SKLearnEstimator, XGBoostEstimator | ||||
| from rgf.sklearn import RGFClassifier, RGFRegressor | ||||
| from flaml import tune | ||||
| 
 | ||||
| @ -65,6 +65,30 @@ class MyRegularizedGreedyForest(SKLearnEstimator): | ||||
|         return 1.0 | ||||
| 
 | ||||
| 
 | ||||
| def logregobj(preds, dtrain): | ||||
|     labels = dtrain.get_label() | ||||
|     preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight | ||||
|     grad = preds - labels | ||||
|     hess = preds * (1.0 - preds) | ||||
|     return grad, hess | ||||
| 
 | ||||
| 
 | ||||
| class MyXGB1(XGBoostEstimator): | ||||
|     '''XGBoostEstimator with logregobj as the objective function | ||||
|     ''' | ||||
| 
 | ||||
|     def __init__(self, **params): | ||||
|         super().__init__(objective=logregobj, **params)  | ||||
| 
 | ||||
| 
 | ||||
| class MyXGB2(XGBoostEstimator): | ||||
|     '''XGBoostEstimator with 'reg:squarederror' as the objective function | ||||
|     ''' | ||||
| 
 | ||||
|     def __init__(self, **params): | ||||
|         super().__init__(objective='reg:squarederror', **params) | ||||
| 
 | ||||
| 
 | ||||
| def custom_metric(X_test, y_test, estimator, labels, X_train, y_train, | ||||
|                   weight_test=None, weight_train=None): | ||||
|     from sklearn.metrics import log_loss | ||||
| @ -345,6 +369,36 @@ class TestAutoML(unittest.TestCase): | ||||
|         print(automl_experiment.best_iteration) | ||||
|         print(automl_experiment.best_estimator) | ||||
| 
 | ||||
|     def test_regression_xgboost(self): | ||||
|         X_train = scipy.sparse.random(300, 900, density=0.0001) | ||||
|         y_train = np.random.uniform(size=300) | ||||
|         X_val = scipy.sparse.random(100, 900, density=0.0001) | ||||
|         y_val = np.random.uniform(size=100) | ||||
|         automl_experiment = AutoML() | ||||
|         automl_experiment.add_learner(learner_name='my_xgb1', learner_class=MyXGB1) | ||||
|         automl_experiment.add_learner(learner_name='my_xgb2', learner_class=MyXGB2) | ||||
|         automl_settings = { | ||||
|             "time_budget": 2, | ||||
|             "estimator_list": ['my_xgb1', 'my_xgb2'], | ||||
|             "task": 'regression', | ||||
|             "log_file_name": 'test/regression_xgboost.log', | ||||
|             "n_jobs": 1, | ||||
|             "model_history": True, | ||||
|         } | ||||
|         automl_experiment.fit(X_train=X_train, y_train=y_train, | ||||
|                               X_val=X_val, y_val=y_val, | ||||
|                               **automl_settings) | ||||
|         assert automl_experiment._state.X_val.shape == X_val.shape | ||||
|         print(automl_experiment.predict(X_train)) | ||||
|         print(automl_experiment.model) | ||||
|         print(automl_experiment.config_history) | ||||
|         print(automl_experiment.model_history) | ||||
|         print(automl_experiment.best_iteration) | ||||
|         print(automl_experiment.best_estimator) | ||||
|         print(automl_experiment.best_config) | ||||
|         print(automl_experiment.best_loss) | ||||
|         print(automl_experiment.best_config_train_time) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
|  | ||||
| @ -44,7 +44,7 @@ def test_simple(method=None): | ||||
|     } | ||||
|     try: | ||||
|         X, y = fetch_openml(name=dataset, return_X_y=True) | ||||
|     except FileNotFoundError: | ||||
|     except ValueError: | ||||
|         from sklearn.datasets import load_wine | ||||
|         X, y = load_wine(return_X_y=True) | ||||
|     X_train, X_test, y_train, y_test = train_test_split( | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Qingyun Wu
						Qingyun Wu