mirror of
https://github.com/microsoft/autogen.git
synced 2025-11-02 02:40:21 +00:00
parent
4ce908f42e
commit
0fb3e04fc3
101
.github/workflows/python-package.yml
vendored
101
.github/workflows/python-package.yml
vendored
@ -19,41 +19,66 @@ jobs:
|
||||
python-version: [3.6, 3.7, 3.8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: If mac, install libomp to facilitate lgbm install
|
||||
if: matrix.os == 'macOS-latest'
|
||||
run: |
|
||||
brew install libomp
|
||||
export CC=/usr/bin/clang
|
||||
export CXX=/usr/bin/clang++
|
||||
export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
|
||||
export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
|
||||
export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
|
||||
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
|
||||
- name: Install packages and dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest coverage
|
||||
pip install -e .
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest test
|
||||
- name: Coverage
|
||||
run: |
|
||||
coverage run -a -m pytest test
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: If mac, install libomp to facilitate lgbm install
|
||||
if: matrix.os == 'macOS-latest'
|
||||
run: |
|
||||
brew install libomp
|
||||
export CC=/usr/bin/clang
|
||||
export CXX=/usr/bin/clang++
|
||||
export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
|
||||
export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
|
||||
export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
|
||||
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
|
||||
- name: Install packages and dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install flake8 pytest coverage
|
||||
pip install -e .
|
||||
- name: Lint with flake8
|
||||
run: |
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
pytest test
|
||||
- name: Coverage
|
||||
run: |
|
||||
coverage run -a -m pytest test
|
||||
coverage xml
|
||||
- name: Upload coverage to Codecov
|
||||
uses: codecov/codecov-action@v1
|
||||
with:
|
||||
file: ./coverage.xml
|
||||
flags: unittests
|
||||
|
||||
docs:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: '3.8'
|
||||
- name: Compile documentation
|
||||
run: |
|
||||
pip install -e .
|
||||
python -m pip install sphinx sphinx_rtd_theme
|
||||
cd docs
|
||||
make html
|
||||
- name: Deploy to GitHub pages
|
||||
if: ${{ github.ref == 'refs/heads/main' }}
|
||||
uses: JamesIves/github-pages-deploy-action@3.6.2
|
||||
with:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
BRANCH: gh-pages
|
||||
FOLDER: docs/_build/html
|
||||
CLEAN: true
|
||||
20
docs/Makefile
Normal file
20
docs/Makefile
Normal file
@ -0,0 +1,20 @@
|
||||
# Minimal makefile for Sphinx documentation
|
||||
#
|
||||
|
||||
# You can set these variables from the command line, and also
|
||||
# from the environment for the first two.
|
||||
SPHINXOPTS ?=
|
||||
SPHINXBUILD ?= sphinx-build
|
||||
SOURCEDIR = .
|
||||
BUILDDIR = _build
|
||||
|
||||
# Put it first so that "make" without argument is like "make help".
|
||||
help:
|
||||
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
|
||||
.PHONY: help Makefile
|
||||
|
||||
# Catch-all target: route all unknown targets to Sphinx using the new
|
||||
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
|
||||
%: Makefile
|
||||
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
|
||||
60
docs/conf.py
Normal file
60
docs/conf.py
Normal file
@ -0,0 +1,60 @@
|
||||
# Configuration file for the Sphinx documentation builder.
|
||||
#
|
||||
# This file only contains a selection of the most common options. For a full
|
||||
# list see the documentation:
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html
|
||||
|
||||
# -- Path setup --------------------------------------------------------------
|
||||
|
||||
# If extensions (or modules to document with autodoc) are in another directory,
|
||||
# add these directories to sys.path here. If the directory is relative to the
|
||||
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||
#
|
||||
# import os
|
||||
# import sys
|
||||
# sys.path.insert(0, os.path.abspath('.'))
|
||||
|
||||
|
||||
# -- Project information -----------------------------------------------------
|
||||
|
||||
project = 'FLAML'
|
||||
copyright = '2020, FLAML Team'
|
||||
author = 'FLAML Team'
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
||||
# Add any Sphinx extension module names here, as strings. They can be
|
||||
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
||||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinx.ext.napoleon',
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.coverage',
|
||||
'sphinx.ext.mathjax',
|
||||
'sphinx.ext.viewcode',
|
||||
'sphinx.ext.githubpages',
|
||||
'sphinx_rtd_theme',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
|
||||
|
||||
|
||||
# -- Options for HTML output -------------------------------------------------
|
||||
|
||||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
29
docs/index.rst
Normal file
29
docs/index.rst
Normal file
@ -0,0 +1,29 @@
|
||||
.. FLAML documentation master file, created by
|
||||
sphinx-quickstart on Mon Dec 14 23:33:24 2020.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
.. Welcome to FLAML's documentation!
|
||||
.. =================================
|
||||
|
||||
.. .. toctree::
|
||||
.. :maxdepth: 2
|
||||
.. :caption: Contents:
|
||||
|
||||
|
||||
FLAML API Documentation
|
||||
=======================
|
||||
|
||||
AutoML
|
||||
------
|
||||
|
||||
.. autoclass:: flaml.AutoML
|
||||
:members:
|
||||
|
||||
|
||||
.. Indices and tables
|
||||
.. ==================
|
||||
|
||||
.. * :ref:`genindex`
|
||||
.. * :ref:`modindex`
|
||||
.. * :ref:`search`
|
||||
35
docs/make.bat
Normal file
35
docs/make.bat
Normal file
@ -0,0 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=.
|
||||
set BUILDDIR=_build
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.http://sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
119
flaml/automl.py
119
flaml/automl.py
@ -28,33 +28,20 @@ logger = logging.getLogger(__name__)
|
||||
class AutoML:
|
||||
'''The AutoML class
|
||||
|
||||
Attributes:
|
||||
model: An object with predict() and predict_proba() method (for
|
||||
classification), storing the best trained model.
|
||||
model_history: A dictionary of iter->model, storing the models when
|
||||
the best model is updated each time
|
||||
config_history: A dictionary of iter->(estimator, config, time),
|
||||
storing the best estimator, config, and the time when the best
|
||||
model is updated each time
|
||||
classes_: A list of n_classes elements for class labels
|
||||
best_iteration: An integer of the iteration number where the best
|
||||
config is found
|
||||
best_estimator: A string indicating the best estimator found.
|
||||
best_config: A dictionary of the best configuration.
|
||||
best_config_train_time: A float of the seconds taken by training the
|
||||
best config
|
||||
Example:
|
||||
|
||||
Typical usage example:
|
||||
.. code-block:: python
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 60,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": 'test/mylog.log',
|
||||
}
|
||||
automl.fit(X_train = X_train, y_train = y_train,
|
||||
**automl_settings)
|
||||
|
||||
automl = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 60,
|
||||
"metric": 'accuracy',
|
||||
"task": 'classification',
|
||||
"log_file_name": 'test/mylog.log',
|
||||
}
|
||||
automl.fit(X_train = X_train, y_train = y_train,
|
||||
**automl_settings)
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
@ -66,14 +53,24 @@ class AutoML:
|
||||
|
||||
@property
|
||||
def model_history(self):
|
||||
'''A dictionary of iter->model, storing the models when
|
||||
the best model is updated each time.
|
||||
'''
|
||||
return self._model_history
|
||||
|
||||
@property
|
||||
def config_history(self):
|
||||
'''A dictionary of iter->(estimator, config, time),
|
||||
storing the best estimator, config, and the time when the best
|
||||
model is updated each time.
|
||||
'''
|
||||
return self._config_history
|
||||
|
||||
@property
|
||||
def model(self):
|
||||
'''An object with `predict()` and `predict_proba()` method (for
|
||||
classification), storing the best trained model.
|
||||
'''
|
||||
if self._trained_estimator:
|
||||
return self._trained_estimator.model
|
||||
else:
|
||||
@ -81,14 +78,18 @@ class AutoML:
|
||||
|
||||
@property
|
||||
def best_estimator(self):
|
||||
'''A string indicating the best estimator found.'''
|
||||
return self._best_estimator
|
||||
|
||||
@property
|
||||
def best_iteration(self):
|
||||
'''An integer of the iteration number where the best
|
||||
config is found.'''
|
||||
return self._best_iteration
|
||||
|
||||
@property
|
||||
def best_config(self):
|
||||
'''A dictionary of the best configuration.'''
|
||||
return self._selected.best_config[0]
|
||||
|
||||
@property
|
||||
@ -97,10 +98,13 @@ class AutoML:
|
||||
|
||||
@property
|
||||
def best_config_train_time(self):
|
||||
'''A float of the seconds taken by training the
|
||||
best config.'''
|
||||
return self.best_train_time
|
||||
|
||||
@property
|
||||
def classes_(self):
|
||||
'''A list of n_classes elements for class labels.'''
|
||||
if self.label_transformer:
|
||||
return self.label_transformer.classes_.tolist()
|
||||
if self._trained_estimator:
|
||||
@ -111,10 +115,10 @@ class AutoML:
|
||||
'''Predict label from features.
|
||||
|
||||
Args:
|
||||
X_test: A numpy array of featurized instances, shape n*m.
|
||||
X_test: A numpy array of featurized instances, shape n * m.
|
||||
|
||||
Returns:
|
||||
A numpy array of shape n*1 -- each element is a predicted class
|
||||
A numpy array of shape n * 1 - - each element is a predicted class
|
||||
label for an instance.
|
||||
'''
|
||||
X_test = self.preprocess(X_test)
|
||||
@ -132,11 +136,11 @@ class AutoML:
|
||||
classification problems.
|
||||
|
||||
Args:
|
||||
X_test: A numpy array of featurized instances, shape n*m.
|
||||
X_test: A numpy array of featurized instances, shape n * m.
|
||||
|
||||
Returns:
|
||||
A numpy array of shape n*c. c is the # classes. Each element at
|
||||
(i,j) is the probability for instance i to be in class j.
|
||||
A numpy array of shape n * c. c is the # classes. Each element at
|
||||
(i, j) is the probability for instance i to be in class j.
|
||||
'''
|
||||
X_test = self.preprocess(X_test)
|
||||
proba = self._trained_estimator.predict_proba(X_test)
|
||||
@ -298,14 +302,14 @@ class AutoML:
|
||||
random_state=1)
|
||||
X_train = concat(X_first, X_train)
|
||||
y_train = concat(label_set,
|
||||
y_train) if self.df else np.concatenate([label_set, y_train])
|
||||
y_train) if self.df else np.concatenate([label_set, y_train])
|
||||
X_val = concat(X_first, X_val)
|
||||
y_val = concat(label_set,
|
||||
y_val) if self.df else np.concatenate([label_set, y_val])
|
||||
y_val) if self.df else np.concatenate([label_set, y_val])
|
||||
_, y_train_counts_elements = np.unique(y_train,
|
||||
return_counts=True)
|
||||
return_counts=True)
|
||||
_, y_val_counts_elements = np.unique(y_val,
|
||||
return_counts=True)
|
||||
return_counts=True)
|
||||
logger.debug(
|
||||
f"""{self.split_type} split for y_train \
|
||||
{y_train_counts_elements}, \
|
||||
@ -396,7 +400,7 @@ class AutoML:
|
||||
learner_class: A subclass of BaseEstimator
|
||||
size_estimate: A function from a config to its memory size in float
|
||||
cost_relative2lgbm: A float number for the training cost ratio with
|
||||
respect to lightgbm (when both use the initial config)
|
||||
respect to lightgbm(when both use the initial config)
|
||||
'''
|
||||
self._custom_learners[learner_name] = learner_class
|
||||
self._eti_ini[learner_name] = cost_relative2lgbm
|
||||
@ -450,14 +454,14 @@ class AutoML:
|
||||
Args:
|
||||
time_budget: A float number of the time budget in seconds
|
||||
log_file_name: A string of the log file name
|
||||
X_train: A numpy array of training data in shape n*m
|
||||
y_train: A numpy array of labels in shape n*1
|
||||
X_train: A numpy array of training data in shape n * m
|
||||
y_train: A numpy array of labels in shape n * 1
|
||||
task: A string of the task type, e.g.,
|
||||
'classification', 'regression'
|
||||
eval_method: A string of resampling strategy, one of
|
||||
['auto', 'cv', 'holdout']
|
||||
split_ratio: A float of the validation data percentage for holdout
|
||||
n_splits: An integer of the number of folds for cross-validation
|
||||
n_splits: An integer of the number of folds for cross - validation
|
||||
n_jobs: An integer of the number of threads for training
|
||||
train_best: A boolean of whether to train the best config in the
|
||||
time budget; if false, train the last config in the budget
|
||||
@ -507,7 +511,8 @@ class AutoML:
|
||||
self._trained_estimator = BaseEstimator()
|
||||
self._trained_estimator.model = None
|
||||
return training_duration
|
||||
if not best: return
|
||||
if not best:
|
||||
return
|
||||
best_estimator = best.learner
|
||||
best_config = best.config
|
||||
sample_size = len(self.y_train_all) if train_full \
|
||||
@ -581,29 +586,36 @@ class AutoML:
|
||||
|
||||
Args:
|
||||
X_train: A numpy array or a pandas dataframe of training data in
|
||||
shape n*m
|
||||
y_train: A numpy array or a pandas series of labels in shape n*1
|
||||
shape n * m
|
||||
y_train: A numpy array or a pandas series of labels in shape n * 1
|
||||
dataframe: A dataframe of training data including label column
|
||||
label: A str of the label column name
|
||||
Note: If X_train and y_train are provided,
|
||||
Note: If X_train and y_train are provided,
|
||||
dataframe and label are ignored;
|
||||
If not, dataframe and label must be provided.
|
||||
metric: A string of the metric name or a function,
|
||||
e.g., 'accuracy','roc_auc','f1','log_loss','mae','mse','r2'
|
||||
e.g., 'accuracy', 'roc_auc', 'f1', 'log_loss', 'mae', 'mse', 'r2'
|
||||
if passing a customized metric function, the function needs to
|
||||
have the follwing signature
|
||||
have the follwing signature:
|
||||
|
||||
def metric(X_test, y_test, estimator, labels, X_train, y_train):
|
||||
return metric_to_minimize, metrics_to_log
|
||||
.. code-block:: python
|
||||
|
||||
which returns a float number as the minimization objective,
|
||||
def metric(X_test, y_test, estimator, labels, X_train, y_train):
|
||||
return metric_to_minimize, metrics_to_log
|
||||
|
||||
which returns a float number as the minimization objective,
|
||||
and a tuple of floats as the metrics to log
|
||||
task: A string of the task type, e.g.,
|
||||
'classification', 'regression'
|
||||
n_jobs: An integer of the number of threads for training
|
||||
log_file_name: A string of the log file name
|
||||
estimator_list: A list of strings for estimator names, or 'auto'
|
||||
e.g., ['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
|
||||
e.g.,
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
|
||||
|
||||
time_budget: A float number of the time budget in seconds
|
||||
max_iter: An integer of the maximal number of iterations
|
||||
sample: A boolean of whether to sample the training data during
|
||||
@ -611,16 +623,17 @@ class AutoML:
|
||||
eval_method: A string of resampling strategy, one of
|
||||
['auto', 'cv', 'holdout']
|
||||
split_ratio: A float of the valiation data percentage for holdout
|
||||
n_splits: An integer of the number of folds for cross-validation
|
||||
log_type: A string of the log type, one of ['better', 'all', 'new']
|
||||
n_splits: An integer of the number of folds for cross - validation
|
||||
log_type: A string of the log type, one of
|
||||
['better', 'all', 'new']
|
||||
'better' only logs configs with better loss than previos iters
|
||||
'all' logs all the tried configs
|
||||
'new' only logs non-redundant configs
|
||||
'new' only logs non - redundant configs
|
||||
model_history: A boolean of whether to keep the history of best
|
||||
models in the history property. Make sure memory is large
|
||||
enough if setting to True.
|
||||
log_training_metric: A boolean of whether to log the training
|
||||
metric for each model.
|
||||
log_training_metric: A boolean of whether to log the training
|
||||
metric for each model.
|
||||
mem_thres: A float of the memory size constraint in bytes
|
||||
X_val: None | a numpy array or a pandas dataframe of validation data
|
||||
y_val: None | a numpy array or a pandas series of validation labels
|
||||
|
||||
@ -1 +1 @@
|
||||
__version__="0.1.1"
|
||||
__version__ = "0.1.2"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user