diff --git a/flaml/automl.py b/flaml/automl.py index 9e3f78169..8cd8f6738 100644 --- a/flaml/automl.py +++ b/flaml/automl.py @@ -1018,8 +1018,8 @@ class AutoML: dataframe and label are ignored; If not, dataframe and label must be provided. metric: A string of the metric name or a function, - e.g., 'accuracy', 'roc_auc', 'f1', 'micro_f1', 'macro_f1', - 'log_loss', 'mae', 'mse', 'r2' + e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', + 'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2' if passing a customized metric function, the function needs to have the follwing signature: @@ -1133,7 +1133,8 @@ class AutoML: else: metric = 'r2' self._state.metric = metric - if metric in ['r2', 'accuracy', 'roc_auc', 'f1', 'ap', 'micro_f1', 'macro_f1']: + if metric in ['r2', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', + 'f1', 'ap', 'micro_f1', 'macro_f1']: error_metric = f"1-{metric}" elif isinstance(metric, str): error_metric = metric diff --git a/flaml/ml.py b/flaml/ml.py index 3fc0bbf73..78f2ce4c7 100644 --- a/flaml/ml.py +++ b/flaml/ml.py @@ -56,8 +56,8 @@ def sklearn_metric_loss_score( Args: metric_name: A string of the metric name, one of - 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss', - 'f1', 'ap', 'micro_f1', 'macro_f1' + 'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr', + 'roc_auc_ovo', 'log_loss', 'f1', 'ap', 'micro_f1', 'macro_f1' y_predict: A 1d or 2d numpy array of the predictions which can be used to calculate the metric. E.g., 2d for log_loss and 1d for others. @@ -83,9 +83,15 @@ def sklearn_metric_loss_score( elif metric_name == 'accuracy': score = 1.0 - accuracy_score( y_true, y_predict, sample_weight=sample_weight) - elif 'roc_auc' in metric_name: + elif metric_name == 'roc_auc': score = 1.0 - roc_auc_score( y_true, y_predict, sample_weight=sample_weight) + elif metric_name == 'roc_auc_ovr': + score = 1.0 - roc_auc_score( + y_true, y_predict, sample_weight=sample_weight, multi_class='ovr') + elif metric_name == 'roc_auc_ovo': + score = 1.0 - roc_auc_score( + y_true, y_predict, sample_weight=sample_weight, multi_class='ovo') elif 'log_loss' in metric_name: score = log_loss( y_true, y_predict, labels=labels, sample_weight=sample_weight) @@ -104,7 +110,8 @@ def sklearn_metric_loss_score( raise ValueError( metric_name + ' is not a built-in metric, ' 'currently built-in metrics are: ' - 'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, micro_f1, macro_f1, ap. ' + 'r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,' + 'log_loss, f1, micro_f1, macro_f1, ap. ' 'please pass a customized metric function to AutoML.fit(metric=func)') return score @@ -114,7 +121,7 @@ def get_y_pred(estimator, X, eval_metric, obj): y_pred_classes = estimator.predict_proba(X) y_pred = y_pred_classes[ :, 1] if y_pred_classes.ndim > 1 else y_pred_classes - elif eval_metric in ['log_loss', 'roc_auc']: + elif eval_metric in ['log_loss', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']: y_pred = estimator.predict_proba(X) else: y_pred = estimator.predict(X) diff --git a/notebook/automl_in_sklearn_pipeline.ipynb b/notebook/automl_in_sklearn_pipeline.ipynb index c38b7ad5e..644ffd04c 100644 --- a/notebook/automl_in_sklearn_pipeline.ipynb +++ b/notebook/automl_in_sklearn_pipeline.ipynb @@ -200,7 +200,7 @@ "source": [ "settings = {\n", " \"time_budget\": 60, # total running time in seconds\n", - " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n", + " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'f1','log_loss','mae','mse','r2']\n", " \"task\": 'classification', # task type \n", " \"estimator_list\":['xgboost','catboost','lgbm'],\n", " \"log_file_name\": 'airlines_experiment.log', # flaml log file\n", @@ -962,4 +962,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/notebook/flaml_automl.ipynb b/notebook/flaml_automl.ipynb index 37bf3d8ed..47dc9344b 100644 --- a/notebook/flaml_automl.ipynb +++ b/notebook/flaml_automl.ipynb @@ -121,7 +121,7 @@ "source": [ "settings = {\n", " \"time_budget\": 300, # total running time in seconds\n", - " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n", + " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n", " \"task\": 'classification', # task type \n", " \"log_file_name\": 'airlines_experiment.log', # flaml log file\n", "}" diff --git a/notebook/flaml_azureml.ipynb b/notebook/flaml_azureml.ipynb index a7ce27b5c..95ebd0669 100644 --- a/notebook/flaml_azureml.ipynb +++ b/notebook/flaml_azureml.ipynb @@ -141,7 +141,7 @@ "source": [ "settings = {\n", " \"time_budget\": 60, # total running time in seconds\n", - " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n", + " \"metric\": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n", " \"estimator_list\": ['lgbm', 'rf', 'xgboost'], # list of ML learners\n", " \"task\": 'classification', # task type \n", " \"sample\": False, # whether to subsample training data\n", diff --git a/test/test_automl.py b/test/test_automl.py index 7d5b2df16..a4aa1118a 100644 --- a/test/test_automl.py +++ b/test/test_automl.py @@ -344,6 +344,36 @@ class TestAutoML(unittest.TestCase): print(multi_class_curves(y_train, y_pred_proba, roc_curve)) print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve)) + def test_roc_auc_ovr(self): + automl_experiment = AutoML() + automl_settings = { + "time_budget": 2, + "metric": "roc_auc_ovr", + "task": "classification", + "log_file_name": "test/roc_auc_ovr.log", + "log_training_metric": True, + "n_jobs": 1, + "model_history": True + } + X_train, y_train = load_iris(return_X_y=True) + automl_experiment.fit( + X_train=X_train, y_train=y_train, **automl_settings) + + def test_roc_auc_ovo(self): + automl_experiment = AutoML() + automl_settings = { + "time_budget": 2, + "metric": "roc_auc_ovo", + "task": "classification", + "log_file_name": "test/roc_auc_ovo.log", + "log_training_metric": True, + "n_jobs": 1, + "model_history": True + } + X_train, y_train = load_iris(return_X_y=True) + automl_experiment.fit( + X_train=X_train, y_train=y_train, **automl_settings) + def test_regression(self): automl_experiment = AutoML() automl_settings = { diff --git a/test/test_notebook_example.py b/test/test_notebook_example.py index 12056ba26..a3dd39510 100644 --- a/test/test_notebook_example.py +++ b/test/test_notebook_example.py @@ -14,7 +14,7 @@ def test_automl(budget=5, dataset_format='dataframe'): automl = AutoML() settings = { "time_budget": budget, # total running time in seconds - "metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2'] + "metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2'] "task": 'classification', # task type "log_file_name": 'airlines_experiment.log', # flaml log file } @@ -71,7 +71,7 @@ def test_mlflow(): automl = AutoML() settings = { "time_budget": 5, # total running time in seconds - "metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2'] + "metric": 'accuracy', # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2'] "estimator_list": ['lgbm', 'rf', 'xgboost'], # list of ML learners "task": 'classification', # task type "sample": False, # whether to subsample training data