diff --git a/flaml/automl.py b/flaml/automl.py
index 7190cda6b..8df3543eb 100644
--- a/flaml/automl.py
+++ b/flaml/automl.py
@@ -502,12 +502,13 @@ class AutoML(BaseEstimator):
                 'budget' - do best effort to retrain without violating the time
                 budget.
             split_type: str or splitter object, default="auto" | the data split type.
-                A valid splitter object is an instance of a derived class of scikit-learn KFold
-                (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
+                * A valid splitter object is an instance of a derived class of scikit-learn
+                [KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
                 and have ``split`` and ``get_n_splits`` methods with the same signatures.
-                Valid str options depend on different tasks.
-                For classification tasks, valid choices are [
-                    "auto", 'stratified', 'uniform', 'time']. "auto" -> stratified.
+                Set eval_method to "cv" to use the splitter object.
+                * Valid str options depend on different tasks.
+                For classification tasks, valid choices are
+                    ["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
                 For regression tasks, valid choices are ["auto", 'uniform', 'time'].
                     "auto" -> uniform.
                 For ts_forecast tasks, must be "auto" or 'time'.
@@ -1287,12 +1288,13 @@ class AutoML(BaseEstimator):
             split_ratio: A float of the validation data percentage for holdout.
             n_splits: An integer of the number of folds for cross-validation.
             split_type: str or splitter object, default="auto" | the data split type.
-                A valid splitter object is an instance of a derived class of scikit-learn KFold
-                (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
+                * A valid splitter object is an instance of a derived class of scikit-learn
+                [KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
                 and have ``split`` and ``get_n_splits`` methods with the same signatures.
-                Valid str options depend on different tasks.
-                For classification tasks, valid choices are [
-                    "auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
+                Set eval_method to "cv" to use the splitter object.
+                * Valid str options depend on different tasks.
+                For classification tasks, valid choices are
+                    ["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
                 For regression tasks, valid choices are ["auto", 'uniform', 'time'].
                     "auto" -> uniform.
                 For ts_forecast tasks, must be "auto" or 'time'.
@@ -1806,12 +1808,13 @@ class AutoML(BaseEstimator):
                 'budget' - do best effort to retrain without violating the time
                 budget.
             split_type: str or splitter object, default="auto" | the data split type.
-                A valid splitter object is an instance of a derived class of scikit-learn KFold
-                (https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
+                * A valid splitter object is an instance of a derived class of scikit-learn
+                [KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
                 and have ``split`` and ``get_n_splits`` methods with the same signatures.
-                Valid str options depend on different tasks.
-                For classification tasks, valid choices are [
-                    "auto", 'stratified', 'uniform', 'time']. "auto" -> stratified.
+                Set eval_method to "cv" to use the splitter object.
+                * Valid str options depend on different tasks.
+                For classification tasks, valid choices are
+                    ["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
                 For regression tasks, valid choices are ["auto", 'uniform', 'time'].
                     "auto" -> uniform.
                 For ts_forecast tasks, must be "auto" or 'time'.
diff --git a/website/docs/Examples/AutoML-NLP.md b/website/docs/Examples/AutoML-NLP.md
index 9390fcced..a9e06ae0f 100644
--- a/website/docs/Examples/AutoML-NLP.md
+++ b/website/docs/Examples/AutoML-NLP.md
@@ -88,7 +88,7 @@ automl.fit(
 )
 ```
 
-### Sample output
+#### Sample output
 
 ```
 [flaml.automl: 12-20 11:47:28] {1965} INFO - task = seq-regression
@@ -137,7 +137,7 @@ automl.fit(
     X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
 )
 ```
-### Sample Output
+#### Sample Output
 
 ```
 [flaml.automl: 12-20 11:44:03] {1965} INFO - task = summarization
@@ -203,4 +203,6 @@ Model config T5Config {
   "use_cache": true,
   "vocab_size": 32128
 }
-```
\ No newline at end of file
+```
+
+For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
\ No newline at end of file
diff --git a/website/docs/Examples/Tune-HuggingFace.md b/website/docs/Examples/Tune-HuggingFace.md
index e6d583ff5..25b5e13ce 100644
--- a/website/docs/Examples/Tune-HuggingFace.md
+++ b/website/docs/Examples/Tune-HuggingFace.md
@@ -2,6 +2,10 @@
 
 This example uses flaml to finetune a transformer model from Huggingface transformers library.
 
+*Note*: `flaml.AutoML` has built-in support for certain finetuning tasks with a
+[higher-level API](AutoML-NLP).
+It may be easier to use that API unless you have special requirements not handled by that API.
+
 ### Requirements
 
 This example requires GPU. Install dependencies:
diff --git a/website/docs/Use-Cases/Task-Oriented-AutoML.md b/website/docs/Use-Cases/Task-Oriented-AutoML.md
index 22b8e7798..a24a1ece1 100644
--- a/website/docs/Use-Cases/Task-Oriented-AutoML.md
+++ b/website/docs/Use-Cases/Task-Oriented-AutoML.md
@@ -303,6 +303,10 @@ By default, flaml uses the following method to split the data:
 
 The data split method for classification can be changed into uniform split by setting `split_type="uniform"`. For both classification and regression, time-based split can be enforced if the data are sorted by timestamps, by setting `split_type="time"`.
 
+When `eval_method="cv"`, `split_type` can also be set as a custom splitter. It needs to be an instance of a derived class of scikit-learn
+[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
+and have ``split`` and ``get_n_splits`` methods with the same signatures.
+
 ### Parallel tuning
 
 When you have parallel resources, you can either spend them in training and keep the model search sequential, or perform parallel search. Following scikit-learn, the parameter `n_jobs` specifies how many CPU cores to use for each training job. The number of parallel trials is specified via the parameter `n_concurrent_trials`. By default, `n_jobs=-1, n_concurrent_trials=1`. That is, all the CPU cores (in a single compute node) are used for training a single model and the search is sequential. When you have more resources than what each single training job needs, you can consider increasing `n_concurrent_trials`.
diff --git a/website/docs/Use-Cases/Tune-User-Defined-Function.md b/website/docs/Use-Cases/Tune-User-Defined-Function.md
index 5cabe6458..d6d13d7d3 100644
--- a/website/docs/Use-Cases/Tune-User-Defined-Function.md
+++ b/website/docs/Use-Cases/Tune-User-Defined-Function.md
@@ -436,12 +436,12 @@ analysis = tune.run(
 
 ### Reproducibility
 
-By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
+By default, there is randomness in our tuning process (for versions <= 0.9.1). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
 With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
 
 ```python
 import numpy as np
-np.random.seed(100)  # This line is not needed starting from version v0.9.1.
+np.random.seed(100)  # This line is not needed starting from version v0.9.2.
 analysis = tune.run(
     simple_obj,
     config=config_search_space,