mirror of
https://github.com/microsoft/autogen.git
synced 2025-08-02 13:52:39 +00:00

* subspace in flow2 * search space and trainable from AutoML * experimental features: multivariate TPE, grouping, add_evaluated_points * test experimental features * readme * define by run * set time_budget_s for bs Co-authored-by: liususan091219 <Xqq630517> * version * acl * test define_by_run_func * size * constraints Co-authored-by: Chi Wang <wang.chi@microsoft.com>
72 lines
3.3 KiB
Python
72 lines
3.3 KiB
Python
def test_automl(budget=5):
|
|
from flaml.data import load_openml_dataset
|
|
X_train, X_test, y_train, y_test = load_openml_dataset(dataset_id=1169, data_dir='test/')
|
|
''' import AutoML class from flaml package '''
|
|
from flaml import AutoML
|
|
automl = AutoML()
|
|
settings = {
|
|
"time_budget": budget, # total running time in seconds
|
|
"metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
|
|
"task": 'classification', # task type
|
|
"log_file_name": 'airlines_experiment.log', # flaml log file
|
|
}
|
|
'''The main flaml automl API'''
|
|
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
|
''' retrieve best config and best learner'''
|
|
print('Best ML leaner:', automl.best_estimator)
|
|
print('Best hyperparmeter config:', automl.best_config)
|
|
print('Best accuracy on validation data: {0:.4g}'.format(1 - automl.best_loss))
|
|
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))
|
|
print(automl.model.estimator)
|
|
''' pickle and save the automl object '''
|
|
import pickle
|
|
with open('automl.pkl', 'wb') as f:
|
|
pickle.dump(automl, f, pickle.HIGHEST_PROTOCOL)
|
|
''' compute predictions of testing dataset '''
|
|
y_pred = automl.predict(X_test)
|
|
print('Predicted labels', y_pred)
|
|
print('True labels', y_test)
|
|
y_pred_proba = automl.predict_proba(X_test)[:, 1]
|
|
''' compute different metric values on testing dataset'''
|
|
from flaml.ml import sklearn_metric_loss_score
|
|
print('accuracy', '=', 1 - sklearn_metric_loss_score('accuracy', y_pred, y_test))
|
|
print('roc_auc', '=', 1 - sklearn_metric_loss_score('roc_auc', y_pred_proba, y_test))
|
|
print('log_loss', '=', sklearn_metric_loss_score('log_loss', y_pred_proba, y_test))
|
|
from flaml.data import get_output_from_log
|
|
time_history, best_valid_loss_history, valid_loss_history, config_history, train_loss_history = \
|
|
get_output_from_log(filename=settings['log_file_name'], time_budget=60)
|
|
for config in config_history:
|
|
print(config)
|
|
print(automl.prune_attr)
|
|
print(automl.max_resource)
|
|
print(automl.min_resource)
|
|
|
|
|
|
def test_mlflow():
|
|
import subprocess
|
|
import sys
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "mlflow"])
|
|
import mlflow
|
|
from flaml.data import load_openml_task
|
|
X_train, X_test, y_train, y_test = load_openml_task(task_id=7592, data_dir='test/')
|
|
''' import AutoML class from flaml package '''
|
|
from flaml import AutoML
|
|
automl = AutoML()
|
|
settings = {
|
|
"time_budget": 5, # total running time in seconds
|
|
"metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
|
|
"estimator_list": ['lgbm', 'rf', 'xgboost'], # list of ML learners
|
|
"task": 'classification', # task type
|
|
"sample": False, # whether to subsample training data
|
|
"log_file_name": 'adult.log', # flaml log file
|
|
}
|
|
mlflow.set_experiment("flaml")
|
|
with mlflow.start_run():
|
|
'''The main flaml automl API'''
|
|
automl.fit(X_train=X_train, y_train=y_train, **settings)
|
|
# subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "mlflow"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
test_automl(300)
|