mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-10-31 17:59:50 +00:00 
			
		
		
		
	Lgbm w customized obj (#64)
* add customized lgbm learner * add comments * fix format issue * format * OpenMLError * add test * add notebook Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com>
This commit is contained in:
		
							parent
							
								
									72d17b37c2
								
							
						
					
					
						commit
						06045703bf
					
				| @ -362,7 +362,6 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|     ): |     ): | ||||||
|         super().__init__(task, **params) |         super().__init__(task, **params) | ||||||
|         self._n_estimators = int(round(n_estimators)) |         self._n_estimators = int(round(n_estimators)) | ||||||
|         self._max_leaves = int(round(max_leaves)) |  | ||||||
|         self.params = { |         self.params = { | ||||||
|             'max_leaves': int(round(max_leaves)), |             'max_leaves': int(round(max_leaves)), | ||||||
|             'max_depth': params.get('max_depth', 0), |             'max_depth': params.get('max_depth', 0), | ||||||
| @ -378,6 +377,7 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|             'booster': params.get('booster', 'gbtree'), |             'booster': params.get('booster', 'gbtree'), | ||||||
|             'colsample_bylevel': float(colsample_bylevel), |             'colsample_bylevel': float(colsample_bylevel), | ||||||
|             'colsample_bytree': float(colsample_bytree), |             'colsample_bytree': float(colsample_bytree), | ||||||
|  |             'objective': params.get("objective") | ||||||
|         } |         } | ||||||
|         if all_thread: |         if all_thread: | ||||||
|             del self.params['nthread'] |             del self.params['nthread'] | ||||||
| @ -398,13 +398,19 @@ class XGBoostEstimator(SKLearnEstimator): | |||||||
|         else: |         else: | ||||||
|             dtrain = xgb.DMatrix(X_train, label=y_train) |             dtrain = xgb.DMatrix(X_train, label=y_train) | ||||||
| 
 | 
 | ||||||
|         if self._max_leaves > 0: |         objective = self.params.get('objective') | ||||||
|             self._model = xgb.train(self.params, dtrain, self._n_estimators) |         if isinstance(objective, str): | ||||||
|  |             obj = None | ||||||
|  |         else: | ||||||
|  |             obj = objective | ||||||
|  |             if 'objective' in self.params: | ||||||
|  |                 del self.params['objective'] | ||||||
|  |         self._model = xgb.train(self.params, dtrain, self._n_estimators, | ||||||
|  |                                 obj=obj) | ||||||
|  |         self.params['objective'] = objective | ||||||
|         del dtrain |         del dtrain | ||||||
|         train_time = time.time() - start_time |         train_time = time.time() - start_time | ||||||
|         return train_time |         return train_time | ||||||
|         else: |  | ||||||
|             return None |  | ||||||
| 
 | 
 | ||||||
|     def predict(self, X_test): |     def predict(self, X_test): | ||||||
|         if not issparse(X_test): |         if not issparse(X_test): | ||||||
|  | |||||||
| @ -1 +1 @@ | |||||||
| __version__ = "0.3.0" | __version__ = "0.3.1" | ||||||
|  | |||||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -7,7 +7,7 @@ from sklearn.datasets import load_boston, load_iris, load_wine | |||||||
| from flaml import AutoML | from flaml import AutoML | ||||||
| from flaml.data import get_output_from_log | from flaml.data import get_output_from_log | ||||||
| 
 | 
 | ||||||
| from flaml.model import SKLearnEstimator | from flaml.model import SKLearnEstimator, XGBoostEstimator | ||||||
| from rgf.sklearn import RGFClassifier, RGFRegressor | from rgf.sklearn import RGFClassifier, RGFRegressor | ||||||
| from flaml import tune | from flaml import tune | ||||||
| 
 | 
 | ||||||
| @ -65,6 +65,30 @@ class MyRegularizedGreedyForest(SKLearnEstimator): | |||||||
|         return 1.0 |         return 1.0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def logregobj(preds, dtrain): | ||||||
|  |     labels = dtrain.get_label() | ||||||
|  |     preds = 1.0 / (1.0 + np.exp(-preds)) # transform raw leaf weight | ||||||
|  |     grad = preds - labels | ||||||
|  |     hess = preds * (1.0 - preds) | ||||||
|  |     return grad, hess | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class MyXGB1(XGBoostEstimator): | ||||||
|  |     '''XGBoostEstimator with logregobj as the objective function | ||||||
|  |     ''' | ||||||
|  | 
 | ||||||
|  |     def __init__(self, **params): | ||||||
|  |         super().__init__(objective=logregobj, **params)  | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class MyXGB2(XGBoostEstimator): | ||||||
|  |     '''XGBoostEstimator with 'reg:squarederror' as the objective function | ||||||
|  |     ''' | ||||||
|  | 
 | ||||||
|  |     def __init__(self, **params): | ||||||
|  |         super().__init__(objective='reg:squarederror', **params) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def custom_metric(X_test, y_test, estimator, labels, X_train, y_train, | def custom_metric(X_test, y_test, estimator, labels, X_train, y_train, | ||||||
|                   weight_test=None, weight_train=None): |                   weight_test=None, weight_train=None): | ||||||
|     from sklearn.metrics import log_loss |     from sklearn.metrics import log_loss | ||||||
| @ -345,6 +369,36 @@ class TestAutoML(unittest.TestCase): | |||||||
|         print(automl_experiment.best_iteration) |         print(automl_experiment.best_iteration) | ||||||
|         print(automl_experiment.best_estimator) |         print(automl_experiment.best_estimator) | ||||||
| 
 | 
 | ||||||
|  |     def test_regression_xgboost(self): | ||||||
|  |         X_train = scipy.sparse.random(300, 900, density=0.0001) | ||||||
|  |         y_train = np.random.uniform(size=300) | ||||||
|  |         X_val = scipy.sparse.random(100, 900, density=0.0001) | ||||||
|  |         y_val = np.random.uniform(size=100) | ||||||
|  |         automl_experiment = AutoML() | ||||||
|  |         automl_experiment.add_learner(learner_name='my_xgb1', learner_class=MyXGB1) | ||||||
|  |         automl_experiment.add_learner(learner_name='my_xgb2', learner_class=MyXGB2) | ||||||
|  |         automl_settings = { | ||||||
|  |             "time_budget": 2, | ||||||
|  |             "estimator_list": ['my_xgb1', 'my_xgb2'], | ||||||
|  |             "task": 'regression', | ||||||
|  |             "log_file_name": 'test/regression_xgboost.log', | ||||||
|  |             "n_jobs": 1, | ||||||
|  |             "model_history": True, | ||||||
|  |         } | ||||||
|  |         automl_experiment.fit(X_train=X_train, y_train=y_train, | ||||||
|  |                               X_val=X_val, y_val=y_val, | ||||||
|  |                               **automl_settings) | ||||||
|  |         assert automl_experiment._state.X_val.shape == X_val.shape | ||||||
|  |         print(automl_experiment.predict(X_train)) | ||||||
|  |         print(automl_experiment.model) | ||||||
|  |         print(automl_experiment.config_history) | ||||||
|  |         print(automl_experiment.model_history) | ||||||
|  |         print(automl_experiment.best_iteration) | ||||||
|  |         print(automl_experiment.best_estimator) | ||||||
|  |         print(automl_experiment.best_config) | ||||||
|  |         print(automl_experiment.best_loss) | ||||||
|  |         print(automl_experiment.best_config_train_time) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| if __name__ == "__main__": | if __name__ == "__main__": | ||||||
|     unittest.main() |     unittest.main() | ||||||
|  | |||||||
| @ -44,7 +44,7 @@ def test_simple(method=None): | |||||||
|     } |     } | ||||||
|     try: |     try: | ||||||
|         X, y = fetch_openml(name=dataset, return_X_y=True) |         X, y = fetch_openml(name=dataset, return_X_y=True) | ||||||
|     except FileNotFoundError: |     except ValueError: | ||||||
|         from sklearn.datasets import load_wine |         from sklearn.datasets import load_wine | ||||||
|         X, y = load_wine(return_X_y=True) |         X, y = load_wine(return_X_y=True) | ||||||
|     X_train, X_test, y_train, y_test = train_test_split( |     X_train, X_test, y_train, y_test = train_test_split( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Qingyun Wu
						Qingyun Wu