mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-23 13:36:31 +00:00
parent
0b25e89f29
commit
baa0359324
@ -502,12 +502,13 @@ class AutoML(BaseEstimator):
|
|||||||
'budget' - do best effort to retrain without violating the time
|
'budget' - do best effort to retrain without violating the time
|
||||||
budget.
|
budget.
|
||||||
split_type: str or splitter object, default="auto" | the data split type.
|
split_type: str or splitter object, default="auto" | the data split type.
|
||||||
A valid splitter object is an instance of a derived class of scikit-learn KFold
|
* A valid splitter object is an instance of a derived class of scikit-learn
|
||||||
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||||
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||||
Valid str options depend on different tasks.
|
Set eval_method to "cv" to use the splitter object.
|
||||||
For classification tasks, valid choices are [
|
* Valid str options depend on different tasks.
|
||||||
"auto", 'stratified', 'uniform', 'time']. "auto" -> stratified.
|
For classification tasks, valid choices are
|
||||||
|
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||||
"auto" -> uniform.
|
"auto" -> uniform.
|
||||||
For ts_forecast tasks, must be "auto" or 'time'.
|
For ts_forecast tasks, must be "auto" or 'time'.
|
||||||
@ -1287,12 +1288,13 @@ class AutoML(BaseEstimator):
|
|||||||
split_ratio: A float of the validation data percentage for holdout.
|
split_ratio: A float of the validation data percentage for holdout.
|
||||||
n_splits: An integer of the number of folds for cross-validation.
|
n_splits: An integer of the number of folds for cross-validation.
|
||||||
split_type: str or splitter object, default="auto" | the data split type.
|
split_type: str or splitter object, default="auto" | the data split type.
|
||||||
A valid splitter object is an instance of a derived class of scikit-learn KFold
|
* A valid splitter object is an instance of a derived class of scikit-learn
|
||||||
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||||
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||||
Valid str options depend on different tasks.
|
Set eval_method to "cv" to use the splitter object.
|
||||||
For classification tasks, valid choices are [
|
* Valid str options depend on different tasks.
|
||||||
"auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
For classification tasks, valid choices are
|
||||||
|
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||||
"auto" -> uniform.
|
"auto" -> uniform.
|
||||||
For ts_forecast tasks, must be "auto" or 'time'.
|
For ts_forecast tasks, must be "auto" or 'time'.
|
||||||
@ -1806,12 +1808,13 @@ class AutoML(BaseEstimator):
|
|||||||
'budget' - do best effort to retrain without violating the time
|
'budget' - do best effort to retrain without violating the time
|
||||||
budget.
|
budget.
|
||||||
split_type: str or splitter object, default="auto" | the data split type.
|
split_type: str or splitter object, default="auto" | the data split type.
|
||||||
A valid splitter object is an instance of a derived class of scikit-learn KFold
|
* A valid splitter object is an instance of a derived class of scikit-learn
|
||||||
(https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||||
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||||
Valid str options depend on different tasks.
|
Set eval_method to "cv" to use the splitter object.
|
||||||
For classification tasks, valid choices are [
|
* Valid str options depend on different tasks.
|
||||||
"auto", 'stratified', 'uniform', 'time']. "auto" -> stratified.
|
For classification tasks, valid choices are
|
||||||
|
["auto", 'stratified', 'uniform', 'time', 'group']. "auto" -> stratified.
|
||||||
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
For regression tasks, valid choices are ["auto", 'uniform', 'time'].
|
||||||
"auto" -> uniform.
|
"auto" -> uniform.
|
||||||
For ts_forecast tasks, must be "auto" or 'time'.
|
For ts_forecast tasks, must be "auto" or 'time'.
|
||||||
|
|||||||
@ -88,7 +88,7 @@ automl.fit(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### Sample output
|
#### Sample output
|
||||||
|
|
||||||
```
|
```
|
||||||
[flaml.automl: 12-20 11:47:28] {1965} INFO - task = seq-regression
|
[flaml.automl: 12-20 11:47:28] {1965} INFO - task = seq-regression
|
||||||
@ -137,7 +137,7 @@ automl.fit(
|
|||||||
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
X_train=X_train, y_train=y_train, X_val=X_val, y_val=y_val, **automl_settings
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
### Sample Output
|
#### Sample Output
|
||||||
|
|
||||||
```
|
```
|
||||||
[flaml.automl: 12-20 11:44:03] {1965} INFO - task = summarization
|
[flaml.automl: 12-20 11:44:03] {1965} INFO - task = summarization
|
||||||
@ -204,3 +204,5 @@ Model config T5Config {
|
|||||||
"vocab_size": 32128
|
"vocab_size": 32128
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
For tasks that are not currently supported, use `flaml.tune` for [customized tuning](Tune-HuggingFace).
|
||||||
@ -2,6 +2,10 @@
|
|||||||
|
|
||||||
This example uses flaml to finetune a transformer model from Huggingface transformers library.
|
This example uses flaml to finetune a transformer model from Huggingface transformers library.
|
||||||
|
|
||||||
|
*Note*: `flaml.AutoML` has built-in support for certain finetuning tasks with a
|
||||||
|
[higher-level API](AutoML-NLP).
|
||||||
|
It may be easier to use that API unless you have special requirements not handled by that API.
|
||||||
|
|
||||||
### Requirements
|
### Requirements
|
||||||
|
|
||||||
This example requires GPU. Install dependencies:
|
This example requires GPU. Install dependencies:
|
||||||
|
|||||||
@ -303,6 +303,10 @@ By default, flaml uses the following method to split the data:
|
|||||||
|
|
||||||
The data split method for classification can be changed into uniform split by setting `split_type="uniform"`. For both classification and regression, time-based split can be enforced if the data are sorted by timestamps, by setting `split_type="time"`.
|
The data split method for classification can be changed into uniform split by setting `split_type="uniform"`. For both classification and regression, time-based split can be enforced if the data are sorted by timestamps, by setting `split_type="time"`.
|
||||||
|
|
||||||
|
When `eval_method="cv"`, `split_type` can also be set as a custom splitter. It needs to be an instance of a derived class of scikit-learn
|
||||||
|
[KFold](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html#sklearn.model_selection.KFold)
|
||||||
|
and have ``split`` and ``get_n_splits`` methods with the same signatures.
|
||||||
|
|
||||||
### Parallel tuning
|
### Parallel tuning
|
||||||
|
|
||||||
When you have parallel resources, you can either spend them in training and keep the model search sequential, or perform parallel search. Following scikit-learn, the parameter `n_jobs` specifies how many CPU cores to use for each training job. The number of parallel trials is specified via the parameter `n_concurrent_trials`. By default, `n_jobs=-1, n_concurrent_trials=1`. That is, all the CPU cores (in a single compute node) are used for training a single model and the search is sequential. When you have more resources than what each single training job needs, you can consider increasing `n_concurrent_trials`.
|
When you have parallel resources, you can either spend them in training and keep the model search sequential, or perform parallel search. Following scikit-learn, the parameter `n_jobs` specifies how many CPU cores to use for each training job. The number of parallel trials is specified via the parameter `n_concurrent_trials`. By default, `n_jobs=-1, n_concurrent_trials=1`. That is, all the CPU cores (in a single compute node) are used for training a single model and the search is sequential. When you have more resources than what each single training job needs, you can consider increasing `n_concurrent_trials`.
|
||||||
|
|||||||
@ -436,12 +436,12 @@ analysis = tune.run(
|
|||||||
|
|
||||||
### Reproducibility
|
### Reproducibility
|
||||||
|
|
||||||
By default, there is randomness in our tuning process (for versions <= 0.9.0). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
By default, there is randomness in our tuning process (for versions <= 0.9.1). If reproducibility is desired, you could manually set a random seed before calling `tune.run()`. For example, in the following code, we call `np.random.seed(100)` to set the random seed.
|
||||||
With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
|
With this random seed, running the following code multiple times will generate exactly the same search trajectory. The reproducibility can only be guaranteed in sequential tuning.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(100) # This line is not needed starting from version v0.9.1.
|
np.random.seed(100) # This line is not needed starting from version v0.9.2.
|
||||||
analysis = tune.run(
|
analysis = tune.run(
|
||||||
simple_obj,
|
simple_obj,
|
||||||
config=config_search_space,
|
config=config_search_space,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user