mirror of
				https://github.com/microsoft/autogen.git
				synced 2025-11-04 11:49:45 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			228 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			228 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
import unittest
 | 
						|
import numpy as np
 | 
						|
import scipy.sparse
 | 
						|
from sklearn.datasets import (
 | 
						|
    fetch_california_housing,
 | 
						|
)
 | 
						|
 | 
						|
from flaml import AutoML
 | 
						|
from flaml.data import get_output_from_log
 | 
						|
from flaml.model import XGBoostEstimator
 | 
						|
 | 
						|
 | 
						|
def logregobj(preds, dtrain):
 | 
						|
    labels = dtrain.get_label()
 | 
						|
    preds = 1.0 / (1.0 + np.exp(-preds))  # transform raw leaf weight
 | 
						|
    grad = preds - labels
 | 
						|
    hess = preds * (1.0 - preds)
 | 
						|
    return grad, hess
 | 
						|
 | 
						|
 | 
						|
class MyXGB1(XGBoostEstimator):
 | 
						|
    """XGBoostEstimator with logregobj as the objective function"""
 | 
						|
 | 
						|
    def __init__(self, **config):
 | 
						|
        super().__init__(objective=logregobj, **config)
 | 
						|
 | 
						|
 | 
						|
class MyXGB2(XGBoostEstimator):
 | 
						|
    """XGBoostEstimator with 'reg:squarederror' as the objective function"""
 | 
						|
 | 
						|
    def __init__(self, **config):
 | 
						|
        super().__init__(objective="reg:squarederror", **config)
 | 
						|
 | 
						|
 | 
						|
class TestRegression(unittest.TestCase):
 | 
						|
    def test_regression(self):
 | 
						|
        automl_experiment = AutoML()
 | 
						|
        automl_settings = {
 | 
						|
            "time_budget": 2,
 | 
						|
            "task": "regression",
 | 
						|
            "log_file_name": "test/california.log",
 | 
						|
            "log_training_metric": True,
 | 
						|
            "n_jobs": 1,
 | 
						|
            "model_history": True,
 | 
						|
        }
 | 
						|
        X_train, y_train = fetch_california_housing(return_X_y=True)
 | 
						|
        n = int(len(y_train) * 9 // 10)
 | 
						|
        automl_experiment.fit(
 | 
						|
            X_train=X_train[:n],
 | 
						|
            y_train=y_train[:n],
 | 
						|
            X_val=X_train[n:],
 | 
						|
            y_val=y_train[n:],
 | 
						|
            **automl_settings
 | 
						|
        )
 | 
						|
        assert automl_experiment._state.eval_method == "holdout"
 | 
						|
        print(automl_experiment.predict(X_train))
 | 
						|
        print(automl_experiment.model)
 | 
						|
        print(automl_experiment.config_history)
 | 
						|
        print(automl_experiment.best_model_for_estimator("xgboost"))
 | 
						|
        print(automl_experiment.best_iteration)
 | 
						|
        print(automl_experiment.best_estimator)
 | 
						|
        print(get_output_from_log(automl_settings["log_file_name"], 1))
 | 
						|
        automl_experiment.retrain_from_log(
 | 
						|
            task="regression",
 | 
						|
            log_file_name=automl_settings["log_file_name"],
 | 
						|
            X_train=X_train,
 | 
						|
            y_train=y_train,
 | 
						|
            train_full=True,
 | 
						|
            time_budget=1,
 | 
						|
        )
 | 
						|
        automl_experiment.retrain_from_log(
 | 
						|
            task="regression",
 | 
						|
            log_file_name=automl_settings["log_file_name"],
 | 
						|
            X_train=X_train,
 | 
						|
            y_train=y_train,
 | 
						|
            train_full=True,
 | 
						|
            time_budget=0,
 | 
						|
        )
 | 
						|
 | 
						|
    def test_sparse_matrix_regression(self):
 | 
						|
        X_train = scipy.sparse.random(300, 900, density=0.0001)
 | 
						|
        y_train = np.random.uniform(size=300)
 | 
						|
        X_val = scipy.sparse.random(100, 900, density=0.0001)
 | 
						|
        y_val = np.random.uniform(size=100)
 | 
						|
        automl_experiment = AutoML()
 | 
						|
        automl_settings = {
 | 
						|
            "time_budget": 2,
 | 
						|
            "metric": "mae",
 | 
						|
            "task": "regression",
 | 
						|
            "log_file_name": "test/sparse_regression.log",
 | 
						|
            "n_jobs": 1,
 | 
						|
            "model_history": True,
 | 
						|
            "keep_search_state": True,
 | 
						|
            "verbose": 0,
 | 
						|
            "early_stop": True,
 | 
						|
        }
 | 
						|
        automl_experiment.fit(
 | 
						|
            X_train=X_train,
 | 
						|
            y_train=y_train,
 | 
						|
            X_val=X_val,
 | 
						|
            y_val=y_val,
 | 
						|
            **automl_settings
 | 
						|
        )
 | 
						|
        assert automl_experiment._state.X_val.shape == X_val.shape
 | 
						|
        print(automl_experiment.predict(X_train))
 | 
						|
        print(automl_experiment.model)
 | 
						|
        print(automl_experiment.config_history)
 | 
						|
        print(automl_experiment.best_model_for_estimator("rf"))
 | 
						|
        print(automl_experiment.best_iteration)
 | 
						|
        print(automl_experiment.best_estimator)
 | 
						|
        print(automl_experiment.best_config)
 | 
						|
        print(automl_experiment.best_loss)
 | 
						|
        print(automl_experiment.best_config_train_time)
 | 
						|
 | 
						|
    def test_parallel(self, hpo_method=None):
 | 
						|
        automl_experiment = AutoML()
 | 
						|
        automl_settings = {
 | 
						|
            "time_budget": 10,
 | 
						|
            "task": "regression",
 | 
						|
            "log_file_name": "test/california.log",
 | 
						|
            "log_type": "all",
 | 
						|
            "n_jobs": 1,
 | 
						|
            "n_concurrent_trials": 10,
 | 
						|
            "hpo_method": hpo_method,
 | 
						|
        }
 | 
						|
        X_train, y_train = fetch_california_housing(return_X_y=True)
 | 
						|
        try:
 | 
						|
            automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
 | 
						|
            print(automl_experiment.predict(X_train))
 | 
						|
            print(automl_experiment.model)
 | 
						|
            print(automl_experiment.config_history)
 | 
						|
            print(automl_experiment.best_model_for_estimator("xgboost"))
 | 
						|
            print(automl_experiment.best_iteration)
 | 
						|
            print(automl_experiment.best_estimator)
 | 
						|
        except ImportError:
 | 
						|
            return
 | 
						|
 | 
						|
    def test_sparse_matrix_regression_holdout(self):
 | 
						|
        X_train = scipy.sparse.random(8, 100)
 | 
						|
        y_train = np.random.uniform(size=8)
 | 
						|
        automl_experiment = AutoML()
 | 
						|
        automl_settings = {
 | 
						|
            "time_budget": 1,
 | 
						|
            "eval_method": "holdout",
 | 
						|
            "task": "regression",
 | 
						|
            "log_file_name": "test/sparse_regression.log",
 | 
						|
            "n_jobs": 1,
 | 
						|
            "model_history": True,
 | 
						|
            "metric": "mse",
 | 
						|
            "sample_weight": np.ones(len(y_train)),
 | 
						|
            "early_stop": True,
 | 
						|
        }
 | 
						|
        automl_experiment.fit(X_train=X_train, y_train=y_train, **automl_settings)
 | 
						|
        print(automl_experiment.predict(X_train))
 | 
						|
        print(automl_experiment.model)
 | 
						|
        print(automl_experiment.config_history)
 | 
						|
        print(automl_experiment.best_model_for_estimator("rf"))
 | 
						|
        print(automl_experiment.best_iteration)
 | 
						|
        print(automl_experiment.best_estimator)
 | 
						|
 | 
						|
    def test_regression_xgboost(self):
 | 
						|
        X_train = scipy.sparse.random(300, 900, density=0.0001)
 | 
						|
        y_train = np.random.uniform(size=300)
 | 
						|
        X_val = scipy.sparse.random(100, 900, density=0.0001)
 | 
						|
        y_val = np.random.uniform(size=100)
 | 
						|
        automl_experiment = AutoML()
 | 
						|
        automl_experiment.add_learner(learner_name="my_xgb1", learner_class=MyXGB1)
 | 
						|
        automl_experiment.add_learner(learner_name="my_xgb2", learner_class=MyXGB2)
 | 
						|
        automl_settings = {
 | 
						|
            "time_budget": 2,
 | 
						|
            "estimator_list": ["my_xgb1", "my_xgb2"],
 | 
						|
            "task": "regression",
 | 
						|
            "log_file_name": "test/regression_xgboost.log",
 | 
						|
            "n_jobs": 1,
 | 
						|
            "model_history": True,
 | 
						|
            "keep_search_state": True,
 | 
						|
            "early_stop": True,
 | 
						|
        }
 | 
						|
        automl_experiment.fit(
 | 
						|
            X_train=X_train,
 | 
						|
            y_train=y_train,
 | 
						|
            X_val=X_val,
 | 
						|
            y_val=y_val,
 | 
						|
            **automl_settings
 | 
						|
        )
 | 
						|
        assert automl_experiment._state.X_val.shape == X_val.shape
 | 
						|
        print(automl_experiment.predict(X_train))
 | 
						|
        print(automl_experiment.model)
 | 
						|
        print(automl_experiment.config_history)
 | 
						|
        print(automl_experiment.best_model_for_estimator("my_xgb2"))
 | 
						|
        print(automl_experiment.best_iteration)
 | 
						|
        print(automl_experiment.best_estimator)
 | 
						|
        print(automl_experiment.best_config)
 | 
						|
        print(automl_experiment.best_loss)
 | 
						|
        print(automl_experiment.best_config_train_time)
 | 
						|
 | 
						|
 | 
						|
def test_multioutput():
 | 
						|
    from sklearn.datasets import make_regression
 | 
						|
    from sklearn.model_selection import train_test_split
 | 
						|
    from sklearn.multioutput import MultiOutputRegressor, RegressorChain
 | 
						|
 | 
						|
    # create regression data
 | 
						|
    X, y = make_regression(n_targets=3)
 | 
						|
 | 
						|
    # split into train and test data
 | 
						|
    X_train, X_test, y_train, y_test = train_test_split(
 | 
						|
        X, y, test_size=0.30, random_state=42
 | 
						|
    )
 | 
						|
 | 
						|
    # train the model
 | 
						|
    model = MultiOutputRegressor(AutoML(task="regression", time_budget=1))
 | 
						|
    model.fit(X_train, y_train)
 | 
						|
 | 
						|
    # predict
 | 
						|
    print(model.predict(X_test))
 | 
						|
 | 
						|
    # train the model
 | 
						|
    model = RegressorChain(AutoML(task="regression", time_budget=1))
 | 
						|
    model.fit(X_train, y_train)
 | 
						|
 | 
						|
    # predict
 | 
						|
    print(model.predict(X_test))
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    unittest.main()
 |