sample_weight; dependency; notebook

This commit is contained in:
Chi Wang (MSR) 2021-02-13 10:43:11 -08:00
parent d18d292081
commit bd16eeee69
9 changed files with 676 additions and 229 deletions

View File

@ -402,7 +402,7 @@ class AutoML:
self._X_train_all, self._y_train_all = \ self._X_train_all, self._y_train_all = \
self._transformer.fit_transform(X, y, self._state.task) self._transformer.fit_transform(X, y, self._state.task)
self._label_transformer = self._transformer.label_transformer self._label_transformer = self._transformer.label_transformer
self._sample_weight_full = self._state.fit_kwargs.get('sample_weight')
if X_val is not None and y_val is not None: if X_val is not None and y_val is not None:
if not (isinstance(X_val, np.ndarray) or if not (isinstance(X_val, np.ndarray) or
issparse(X_val) or issparse(X_val) or
@ -446,7 +446,8 @@ class AutoML:
self._X_train_all, self._y_train_all self._X_train_all, self._y_train_all
if issparse(X_train_all): if issparse(X_train_all):
X_train_all = X_train_all.tocsr() X_train_all = X_train_all.tocsr()
if self._state.task != 'regression': if self._state.task != 'regression' and self._state.fit_kwargs.get(
'sample_weight') is None:
# logger.info(f"label {pd.unique(y_train_all)}") # logger.info(f"label {pd.unique(y_train_all)}")
label_set, counts = np.unique(y_train_all, return_counts=True) label_set, counts = np.unique(y_train_all, return_counts=True)
# augment rare classes # augment rare classes
@ -1151,7 +1152,11 @@ class AutoML:
stacker = Stacker(estimators, best_m, stacker = Stacker(estimators, best_m,
n_jobs=self._state.n_jobs, n_jobs=self._state.n_jobs,
passthrough=True) passthrough=True)
stacker.fit(self._X_train_all, self._y_train_all) if self._sample_weight_full is not None:
self._state.fit_kwargs[
'sample_weight'] = self._sample_weight_full
stacker.fit(self._X_train_all, self._y_train_all,
**self._state.fit_kwargs)
logger.info(f'ensemble: {stacker}') logger.info(f'ensemble: {stacker}')
self._trained_estimator = stacker self._trained_estimator = stacker
self._trained_estimator.model = stacker self._trained_estimator.model = stacker

View File

@ -146,6 +146,7 @@ based on optimism in face of uncertainty.
Example: Example:
```python ```python
# requirements: pip install flaml[blendsearch]
from flaml import BlendSearch from flaml import BlendSearch
tune.run(... tune.run(...
search_alg = BlendSearch(points_to_evaluate=[init_config]), search_alg = BlendSearch(points_to_evaluate=[init_config]),

View File

@ -1 +1 @@
__version__ = "0.2.3" __version__ = "0.2.4"

View File

@ -6,11 +6,16 @@
"source": [ "source": [
"This notebook uses the Huggingface transformers library to finetune a transformer model.\n", "This notebook uses the Huggingface transformers library to finetune a transformer model.\n",
"\n", "\n",
"**Requirements.** This notebook has additional requirements:\n", "**Requirements.** This notebook has additional requirements:"
"\n", ]
"```bash\n", },
"pip install -r transformers_requirements.txt\n", {
"```" "cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install torch transformers datasets ipywidgets"
] ]
}, },
{ {
@ -699,7 +704,7 @@
"source": [ "source": [
"### Step 3. Launch with `flaml.tune.run`\n", "### Step 3. Launch with `flaml.tune.run`\n",
"\n", "\n",
"We are now ready to laungh the tuning using `flaml.tune.run`:" "We are now ready to launch the tuning using `flaml.tune.run`:"
], ],
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {} "metadata": {}
@ -766,9 +771,13 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "flaml", "name": "python3",
"language": "python", "display_name": "Python 3.7.7 64-bit ('flaml': conda)",
"name": "flaml" "metadata": {
"interpreter": {
"hash": "bfcd9a6a9254a5e160761a1fd7a9e444f011592c6770d9f4180dde058a9df5dd"
}
}
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
@ -780,7 +789,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.7.6" "version": "3.7.7-final"
} }
}, },
"nbformat": 4, "nbformat": 4,

File diff suppressed because one or more lines are too long

View File

@ -1,4 +0,0 @@
torch
transformers
datasets
ipywidgets

View File

@ -20,7 +20,6 @@ install_requires = [
"scipy>=1.4.1", "scipy>=1.4.1",
"catboost>=0.23", "catboost>=0.23",
"scikit-learn>=0.23.2", "scikit-learn>=0.23.2",
"optuna==2.3.0"
], ],
@ -48,6 +47,10 @@ setuptools.setup(
"coverage>=5.3", "coverage>=5.3",
"xgboost<1.3", "xgboost<1.3",
"rgf-python", "rgf-python",
"optuna==2.3.0",
],
"blendsearch": [
"optuna==2.3.0"
], ],
"ray": [ "ray": [
"ray[tune]==1.1.0", "ray[tune]==1.1.0",

View File

@ -1,3 +1,5 @@
'''Require: pip install torchvision ray
'''
import unittest import unittest
import os import os
import time import time

View File

@ -1,5 +1,6 @@
'''Require: pip install flaml[test,ray]
'''
import unittest import unittest
import os
import time import time
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import sklearn.metrics import sklearn.metrics
@ -138,6 +139,7 @@ def _test_xgboost(method='BlendSearch'):
scheduler=scheduler, search_alg=algo) scheduler=scheduler, search_alg=algo)
ray.shutdown() ray.shutdown()
# # Load the best model checkpoint # # Load the best model checkpoint
# import os
# best_bst = xgb.Booster() # best_bst = xgb.Booster()
# best_bst.load_model(os.path.join(analysis.best_checkpoint, # best_bst.load_model(os.path.join(analysis.best_checkpoint,
# "model.xgb")) # "model.xgb"))