diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 116a70261..f1098eb4d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,19 +1,28 @@
+default_language_version:
+  python: python3
+
+ci:
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: 'quarterly'
 
 repos:
   - repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 23.1.0
     hooks:
     - id: black
-      language_version: python3
-  -   repo: https://github.com/pycqa/flake8
-      rev: 4.0.1
-      hooks:
-      -   id: flake8
+  - repo: https://github.com/pycqa/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
     - id: check-added-large-files
     - id: check-ast
+    - id: check-yaml
+    - id: check-toml
+    - id: check-json
     - id: check-byte-order-marker
     - id: check-merge-conflict
     - id: detect-private-key
diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py
index 42576a8a2..a6497e9c7 100644
--- a/flaml/automl/automl.py
+++ b/flaml/automl/automl.py
@@ -1104,7 +1104,6 @@ class AutoML(BaseEstimator):
         groups_val=None,
         groups=None,
     ):
-
         if X_train_all is not None and y_train_all is not None:
             assert (
                 isinstance(X_train_all, np.ndarray)
@@ -1266,7 +1265,6 @@ class AutoML(BaseEstimator):
             self._state.groups = groups
 
     def _prepare_data(self, eval_method, split_ratio, n_splits):
-
         X_val, y_val = self._state.X_val, self._state.y_val
         if issparse(X_val):
             X_val = X_val.tocsr()
diff --git a/flaml/automl/ml.py b/flaml/automl/ml.py
index dd17cffea..4d85679b4 100644
--- a/flaml/automl/ml.py
+++ b/flaml/automl/ml.py
@@ -439,7 +439,6 @@ def get_val_loss(
     fit_kwargs={},
     free_mem_ratio=0,
 ):
-
     start = time.time()
     # if groups_val is not None:
     #     fit_kwargs['groups_val'] = groups_val
diff --git a/flaml/automl/model.py b/flaml/automl/model.py
index bccad6470..0d8b32c7d 100644
--- a/flaml/automl/model.py
+++ b/flaml/automl/model.py
@@ -175,7 +175,6 @@ class BaseEstimator:
         return X
 
     def _fit(self, X_train, y_train, **kwargs):
-
         current_time = time.time()
         if "groups" in kwargs:
             kwargs = kwargs.copy()
@@ -447,7 +446,7 @@ class TransformersEstimator(BaseEstimator):
     def _set_training_args(self, **kwargs):
         from .nlp.utils import date_str, Counter
 
-        for (key, val) in kwargs.items():
+        for key, val in kwargs.items():
             assert key not in self.params, (
                 "Since {} is in the search space, it cannot exist in 'custom_fit_kwargs' at the same time."
                 "If you need to fix the value of {} to {}, the only way is to add a single-value domain in the search "
diff --git a/flaml/automl/nlp/huggingface/training_args.py b/flaml/automl/nlp/huggingface/training_args.py
index b064f51f3..7461b1caa 100644
--- a/flaml/automl/nlp/huggingface/training_args.py
+++ b/flaml/automl/nlp/huggingface/training_args.py
@@ -112,7 +112,6 @@ class TrainingArgumentsForAuto(TrainingArguments):
 
 @dataclass
 class Seq2SeqTrainingArgumentsForAuto(TrainingArgumentsForAuto):
-
     model_path: str = field(
         default="t5-small",
         metadata={
diff --git a/flaml/automl/nlp/huggingface/utils.py b/flaml/automl/nlp/huggingface/utils.py
index ba7558f09..afea65c10 100644
--- a/flaml/automl/nlp/huggingface/utils.py
+++ b/flaml/automl/nlp/huggingface/utils.py
@@ -135,7 +135,6 @@ def tokenize_and_align_labels(
 
 
 def tokenize_text_tokclassification(X, Y, tokenizer, hf_args=None):
-
     # If the label_all_tokens flag is True, prepare two dicts label_to_id and b_to_i_label to convert the B- labels to I- labels
     label_to_id = {i: i for i in range(len(hf_args.label_list))}
     b_to_i_label = []
@@ -275,7 +274,6 @@ def tokenize_row(
 
 
 def tokenize_text_multiplechoice(X, tokenizer, hf_args=None):
-
     t = X[["sent1", "sent2", "ending0", "ending1", "ending2", "ending3"]]
     _, tokenized_column_names = tokenize_swag(
         t.iloc[0],
diff --git a/flaml/automl/nlp/utils.py b/flaml/automl/nlp/utils.py
index 872c1d037..431bf4219 100644
--- a/flaml/automl/nlp/utils.py
+++ b/flaml/automl/nlp/utils.py
@@ -11,7 +11,6 @@ from flaml.automl.data import (
 
 
 def load_default_huggingface_metric_for_task(task):
-
     if task == SEQCLASSIFICATION:
         return "accuracy"
     elif task == SEQREGRESSION:
diff --git a/flaml/tune/sample.py b/flaml/tune/sample.py
index 66223ca1c..b59ced92d 100644
--- a/flaml/tune/sample.py
+++ b/flaml/tune/sample.py
@@ -647,5 +647,4 @@ def qrandn(mean: float, sd: float, q: float):
 def polynomial_expansion_set(
     init_monomials: set, highest_poly_order: int = None, allow_self_inter: bool = False
 ):
-
     return PolynomialExpansionSet(init_monomials, highest_poly_order, allow_self_inter)
diff --git a/flaml/tune/searcher/variant_generator.py b/flaml/tune/searcher/variant_generator.py
index fb0c8ed0e..9d7decc0a 100644
--- a/flaml/tune/searcher/variant_generator.py
+++ b/flaml/tune/searcher/variant_generator.py
@@ -282,9 +282,9 @@ def _split_resolved_unresolved_values(
                 _resolved_children,
                 _unresolved_children,
             ) = _split_resolved_unresolved_values(v)
-            for (path, value) in _resolved_children.items():
+            for path, value in _resolved_children.items():
                 resolved_vars[(k,) + path] = value
-            for (path, value) in _unresolved_children.items():
+            for path, value in _unresolved_children.items():
                 unresolved_vars[(k,) + path] = value
         elif isinstance(v, list):
             # Recurse into a list
@@ -293,9 +293,9 @@ def _split_resolved_unresolved_values(
                     _resolved_children,
                     _unresolved_children,
                 ) = _split_resolved_unresolved_values({i: elem})
-                for (path, value) in _resolved_children.items():
+                for path, value in _resolved_children.items():
                     resolved_vars[(k,) + path] = value
-                for (path, value) in _unresolved_children.items():
+                for path, value in _unresolved_children.items():
                     unresolved_vars[(k,) + path] = value
         else:
             resolved_vars[(k,)] = v
diff --git a/flaml/tune/tune.py b/flaml/tune/tune.py
index a6046cd83..76921f199 100644
--- a/flaml/tune/tune.py
+++ b/flaml/tune/tune.py
@@ -142,7 +142,6 @@ class ExperimentAnalysis(EA):
 
 
 def report(_metric=None, **kwargs):
-
     """A function called by the HPO application to report final or intermediate
     results.
 
diff --git a/test/automl/test_multiclass.py b/test/automl/test_multiclass.py
index b9f7b177a..3b6f70732 100644
--- a/test/automl/test_multiclass.py
+++ b/test/automl/test_multiclass.py
@@ -11,7 +11,6 @@ from flaml.automl.training_log import training_log_reader
 
 class MyRegularizedGreedyForest(SKLearnEstimator):
     def __init__(self, task="binary", **config):
-
         super().__init__(task, **config)
 
         if task in CLASSIFICATION:
diff --git a/test/automl/test_python_log.py b/test/automl/test_python_log.py
index 720e48665..d1e38fa78 100644
--- a/test/automl/test_python_log.py
+++ b/test/automl/test_python_log.py
@@ -10,11 +10,9 @@ import io
 
 class TestLogging(unittest.TestCase):
     def test_logging_level(self):
-
         from flaml import logger, logger_formatter
 
         with tempfile.TemporaryDirectory() as d:
-
             training_log = os.path.join(d, "training.log")
 
             # Configure logging for the FLAML logger
diff --git a/test/automl/test_training_log.py b/test/automl/test_training_log.py
index d8949e6d1..8d15bacf0 100644
--- a/test/automl/test_training_log.py
+++ b/test/automl/test_training_log.py
@@ -12,7 +12,6 @@ class TestTrainingLog(unittest.TestCase):
     def test_training_log(
         self, path="test_training_log.log", estimator_list="auto", use_ray=False
     ):
-
         with TemporaryDirectory() as d:
             filename = os.path.join(d, path)
 
diff --git a/test/nlp/utils.py b/test/nlp/utils.py
index f8536b960..6d133a08c 100644
--- a/test/nlp/utils.py
+++ b/test/nlp/utils.py
@@ -1574,7 +1574,6 @@ def get_toy_data_tokenclassification_tokenlabel():
 
 
 def get_automl_settings(estimator_name="transformer"):
-
     automl_settings = {
         "gpu_per_trial": 0,
         "max_iter": 3,
diff --git a/test/test_autovw.py b/test/test_autovw.py
index f75e527af..59f7b9943 100644
--- a/test/test_autovw.py
+++ b/test/test_autovw.py
@@ -101,7 +101,7 @@ def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):
         target_attribute = ds.default_target_attribute
         # if target_attribute is None and did in OML_target_attribute_dict:
         #     target_attribute = OML_target_attribute_dict[did]
-    except (SSLError) as e:
+    except SSLError as e:
         print(e)
         return
 
diff --git a/test/tune/test_record_incumbent.py b/test/tune/test_record_incumbent.py
index 9cec3c1da..fdf5bb5e7 100644
--- a/test/tune/test_record_incumbent.py
+++ b/test/tune/test_record_incumbent.py
@@ -17,7 +17,6 @@ def rosenbrock_function(config: dict):
 
 
 def test_record_incumbent(method="BlendSearch"):
-
     if method != "CFOCat":
         search_space = {
             "x1": tune.randint(1, 9),
diff --git a/test/tune/test_reproducibility.py b/test/tune/test_reproducibility.py
index 0c68a9136..8b3c867a5 100644
--- a/test/tune/test_reproducibility.py
+++ b/test/tune/test_reproducibility.py
@@ -35,7 +35,6 @@ def test_tune(externally_setup_searcher=False, use_ray=False, use_raytune=False)
         "height": tune.uniform(-100, 100),
     }
     if externally_setup_searcher is True:
-
         searcher = BlendSearch(
             space=search_space,
             time_budget_s=5,