diff --git a/flaml/automl.py b/flaml/automl.py
index 9e3f78169..8cd8f6738 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -1018,8 +1018,8 @@ class AutoML:
                 dataframe and label are ignored;
                 If not, dataframe and label must be provided.
             metric: A string of the metric name or a function,
-                e.g., 'accuracy', 'roc_auc', 'f1', 'micro_f1', 'macro_f1',
-                'log_loss', 'mae', 'mse', 'r2'
+                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
+                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2'
                 if passing a customized metric function, the function needs to
                 have the follwing signature:
 
@@ -1133,7 +1133,8 @@ class AutoML:
             else:
                 metric = 'r2'
         self._state.metric = metric
-        if metric in ['r2', 'accuracy', 'roc_auc', 'f1', 'ap', 'micro_f1', 'macro_f1']:
+        if metric in ['r2', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
+                      'f1', 'ap', 'micro_f1', 'macro_f1']:
             error_metric = f"1-{metric}"
         elif isinstance(metric, str):
             error_metric = metric
diff --git a/flaml/ml.py b/flaml/ml.py
index 3fc0bbf73..78f2ce4c7 100644
--- a/flaml/ml.py
+++ b/flaml/ml.py
@@ -56,8 +56,8 @@ def sklearn_metric_loss_score(
 
     Args:
         metric_name: A string of the metric name, one of
-            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
-            'f1', 'ap', 'micro_f1', 'macro_f1'
+            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
+            'roc_auc_ovo', 'log_loss', 'f1', 'ap', 'micro_f1', 'macro_f1'
         y_predict: A 1d or 2d numpy array of the predictions which can be
             used to calculate the metric. E.g., 2d for log_loss and 1d
             for others.
@@ -83,9 +83,15 @@ def sklearn_metric_loss_score(
     elif metric_name == 'accuracy':
         score = 1.0 - accuracy_score(
             y_true, y_predict, sample_weight=sample_weight)
-    elif 'roc_auc' in metric_name:
+    elif metric_name == 'roc_auc':
         score = 1.0 - roc_auc_score(
             y_true, y_predict, sample_weight=sample_weight)
+    elif metric_name == 'roc_auc_ovr':
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight, multi_class='ovr')
+    elif metric_name == 'roc_auc_ovo':
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight, multi_class='ovo')
     elif 'log_loss' in metric_name:
         score = log_loss(
             y_true, y_predict, labels=labels, sample_weight=sample_weight)
@@ -104,7 +110,8 @@ def sklearn_metric_loss_score(
         raise ValueError(
             metric_name + ' is not a built-in metric, '
             'currently built-in metrics are: '
-            'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, micro_f1, macro_f1, ap. '
+            'r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,'
+            'log_loss, f1, micro_f1, macro_f1, ap. '
             'please pass a customized metric function to AutoML.fit(metric=func)')
     return score
 
@@ -114,7 +121,7 @@ def get_y_pred(estimator, X, eval_metric, obj):
         y_pred_classes = estimator.predict_proba(X)
         y_pred = y_pred_classes[
             :, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-    elif eval_metric in ['log_loss', 'roc_auc']:
+    elif eval_metric in ['log_loss', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']:
         y_pred = estimator.predict_proba(X)
     else:
         y_pred = estimator.predict(X)
diff --git a/notebook/automl_in_sklearn_pipeline.ipynb b/notebook/automl_in_sklearn_pipeline.ipynb
index c38b7ad5e..644ffd04c 100644
--- a/notebook/automl_in_sklearn_pipeline.ipynb
+++ b/notebook/automl_in_sklearn_pipeline.ipynb
@@ -200,7 +200,7 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 60,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'f1','log_loss','mae','mse','r2']\n",
     "    \"task\": 'classification',  # task type   \n",
     "    \"estimator_list\":['xgboost','catboost','lgbm'],\n",
     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
@@ -962,4 +962,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/notebook/flaml_automl.ipynb b/notebook/flaml_automl.ipynb
index 37bf3d8ed..47dc9344b 100644
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
@@ -121,7 +121,7 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 300,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n",
     "    \"task\": 'classification',  # task type    \n",
     "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
     "}"
diff --git a/notebook/flaml_azureml.ipynb b/notebook/flaml_azureml.ipynb
index a7ce27b5c..95ebd0669 100644
--- a/notebook/flaml_azureml.ipynb
+++ b/notebook/flaml_azureml.ipynb
@@ -141,7 +141,7 @@
    "source": [
     "settings = {\n",
     "    \"time_budget\": 60,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n",
     "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # list of ML learners\n",
     "    \"task\": 'classification',  # task type    \n",
     "    \"sample\": False,  # whether to subsample training data\n",
diff --git a/test/test_automl.py b/test/test_automl.py
index 7d5b2df16..a4aa1118a 100644
--- a/test/test_automl.py
+++ b/test/test_automl.py
@@ -344,6 +344,36 @@ class TestAutoML(unittest.TestCase):
         print(multi_class_curves(y_train, y_pred_proba, roc_curve))
         print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve))
 
+    def test_roc_auc_ovr(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "roc_auc_ovr",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_ovr.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, **automl_settings)
+
+    def test_roc_auc_ovo(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "roc_auc_ovo",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_ovo.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, **automl_settings)
+
     def test_regression(self):
         automl_experiment = AutoML()
         automl_settings = {
diff --git a/test/test_notebook_example.py b/test/test_notebook_example.py
index 12056ba26..a3dd39510 100644
--- a/test/test_notebook_example.py
+++ b/test/test_notebook_example.py
@@ -14,7 +14,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
     automl = AutoML()
     settings = {
         "time_budget": budget,  # total running time in seconds
-        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
+        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
         "task": 'classification',  # task type
         "log_file_name": 'airlines_experiment.log',  # flaml log file
     }
@@ -71,7 +71,7 @@ def test_mlflow():
     automl = AutoML()
     settings = {
         "time_budget": 5,  # total running time in seconds
-        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
+        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
         "estimator_list": ['lgbm', 'rf', 'xgboost'],  # list of ML learners
         "task": 'classification',  # task type
         "sample": False,  # whether to subsample training data