API docs #6 (#13) and update version to 0.1.2

This commit is contained in:
Eric Zhu 2020-12-15 00:57:30 -08:00 committed by GitHub
parent 4ce908f42e
commit 0fb3e04fc3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 274 additions and 92 deletions

View File

@ -19,41 +19,66 @@ jobs:
python-version: [3.6, 3.7, 3.8]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: If mac, install libomp to facilitate lgbm install
if: matrix.os == 'macOS-latest'
run: |
brew install libomp
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
- name: Install packages and dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest coverage
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest test
- name: Coverage
run: |
coverage run -a -m pytest test
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
file: ./coverage.xml
flags: unittests
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: If mac, install libomp to facilitate lgbm install
if: matrix.os == 'macOS-latest'
run: |
brew install libomp
export CC=/usr/bin/clang
export CXX=/usr/bin/clang++
export CPPFLAGS="$CPPFLAGS -Xpreprocessor -fopenmp"
export CFLAGS="$CFLAGS -I/usr/local/opt/libomp/include"
export CXXFLAGS="$CXXFLAGS -I/usr/local/opt/libomp/include"
export LDFLAGS="$LDFLAGS -Wl,-rpath,/usr/local/opt/libomp/lib -L/usr/local/opt/libomp/lib -lomp"
- name: Install packages and dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest coverage
pip install -e .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest test
- name: Coverage
run: |
coverage run -a -m pytest test
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
file: ./coverage.xml
flags: unittests
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.8'
- name: Compile documentation
run: |
pip install -e .
python -m pip install sphinx sphinx_rtd_theme
cd docs
make html
- name: Deploy to GitHub pages
if: ${{ github.ref == 'refs/heads/main' }}
uses: JamesIves/github-pages-deploy-action@3.6.2
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BRANCH: gh-pages
FOLDER: docs/_build/html
CLEAN: true

20
docs/Makefile Normal file
View File

@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

60
docs/conf.py Normal file
View File

@ -0,0 +1,60 @@
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
# import os
# import sys
# sys.path.insert(0, os.path.abspath('.'))
# -- Project information -----------------------------------------------------
project = 'FLAML'
copyright = '2020, FLAML Team'
author = 'FLAML Team'
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.napoleon',
'sphinx.ext.doctest',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.viewcode',
'sphinx.ext.githubpages',
'sphinx_rtd_theme',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']

29
docs/index.rst Normal file
View File

@ -0,0 +1,29 @@
.. FLAML documentation master file, created by
sphinx-quickstart on Mon Dec 14 23:33:24 2020.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
.. Welcome to FLAML's documentation!
.. =================================
.. .. toctree::
.. :maxdepth: 2
.. :caption: Contents:
FLAML API Documentation
=======================
AutoML
------
.. autoclass:: flaml.AutoML
:members:
.. Indices and tables
.. ==================
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`

35
docs/make.bat Normal file
View File

@ -0,0 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
if "%1" == "" goto help
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

View File

@ -28,33 +28,20 @@ logger = logging.getLogger(__name__)
class AutoML:
'''The AutoML class
Attributes:
model: An object with predict() and predict_proba() method (for
classification), storing the best trained model.
model_history: A dictionary of iter->model, storing the models when
the best model is updated each time
config_history: A dictionary of iter->(estimator, config, time),
storing the best estimator, config, and the time when the best
model is updated each time
classes_: A list of n_classes elements for class labels
best_iteration: An integer of the iteration number where the best
config is found
best_estimator: A string indicating the best estimator found.
best_config: A dictionary of the best configuration.
best_config_train_time: A float of the seconds taken by training the
best config
Example:
Typical usage example:
.. code-block:: python
automl = AutoML()
automl_settings = {
"time_budget": 60,
"metric": 'accuracy',
"task": 'classification',
"log_file_name": 'test/mylog.log',
}
automl.fit(X_train = X_train, y_train = y_train,
**automl_settings)
automl = AutoML()
automl_settings = {
"time_budget": 60,
"metric": 'accuracy',
"task": 'classification',
"log_file_name": 'test/mylog.log',
}
automl.fit(X_train = X_train, y_train = y_train,
**automl_settings)
'''
def __init__(self):
@ -66,14 +53,24 @@ class AutoML:
@property
def model_history(self):
'''A dictionary of iter->model, storing the models when
the best model is updated each time.
'''
return self._model_history
@property
def config_history(self):
'''A dictionary of iter->(estimator, config, time),
storing the best estimator, config, and the time when the best
model is updated each time.
'''
return self._config_history
@property
def model(self):
'''An object with `predict()` and `predict_proba()` method (for
classification), storing the best trained model.
'''
if self._trained_estimator:
return self._trained_estimator.model
else:
@ -81,14 +78,18 @@ class AutoML:
@property
def best_estimator(self):
'''A string indicating the best estimator found.'''
return self._best_estimator
@property
def best_iteration(self):
'''An integer of the iteration number where the best
config is found.'''
return self._best_iteration
@property
def best_config(self):
'''A dictionary of the best configuration.'''
return self._selected.best_config[0]
@property
@ -97,10 +98,13 @@ class AutoML:
@property
def best_config_train_time(self):
'''A float of the seconds taken by training the
best config.'''
return self.best_train_time
@property
def classes_(self):
'''A list of n_classes elements for class labels.'''
if self.label_transformer:
return self.label_transformer.classes_.tolist()
if self._trained_estimator:
@ -111,10 +115,10 @@ class AutoML:
'''Predict label from features.
Args:
X_test: A numpy array of featurized instances, shape n*m.
X_test: A numpy array of featurized instances, shape n * m.
Returns:
A numpy array of shape n*1 -- each element is a predicted class
A numpy array of shape n * 1 - - each element is a predicted class
label for an instance.
'''
X_test = self.preprocess(X_test)
@ -132,11 +136,11 @@ class AutoML:
classification problems.
Args:
X_test: A numpy array of featurized instances, shape n*m.
X_test: A numpy array of featurized instances, shape n * m.
Returns:
A numpy array of shape n*c. c is the # classes. Each element at
(i,j) is the probability for instance i to be in class j.
A numpy array of shape n * c. c is the # classes. Each element at
(i, j) is the probability for instance i to be in class j.
'''
X_test = self.preprocess(X_test)
proba = self._trained_estimator.predict_proba(X_test)
@ -298,14 +302,14 @@ class AutoML:
random_state=1)
X_train = concat(X_first, X_train)
y_train = concat(label_set,
y_train) if self.df else np.concatenate([label_set, y_train])
y_train) if self.df else np.concatenate([label_set, y_train])
X_val = concat(X_first, X_val)
y_val = concat(label_set,
y_val) if self.df else np.concatenate([label_set, y_val])
y_val) if self.df else np.concatenate([label_set, y_val])
_, y_train_counts_elements = np.unique(y_train,
return_counts=True)
return_counts=True)
_, y_val_counts_elements = np.unique(y_val,
return_counts=True)
return_counts=True)
logger.debug(
f"""{self.split_type} split for y_train \
{y_train_counts_elements}, \
@ -396,7 +400,7 @@ class AutoML:
learner_class: A subclass of BaseEstimator
size_estimate: A function from a config to its memory size in float
cost_relative2lgbm: A float number for the training cost ratio with
respect to lightgbm (when both use the initial config)
respect to lightgbm(when both use the initial config)
'''
self._custom_learners[learner_name] = learner_class
self._eti_ini[learner_name] = cost_relative2lgbm
@ -450,14 +454,14 @@ class AutoML:
Args:
time_budget: A float number of the time budget in seconds
log_file_name: A string of the log file name
X_train: A numpy array of training data in shape n*m
y_train: A numpy array of labels in shape n*1
X_train: A numpy array of training data in shape n * m
y_train: A numpy array of labels in shape n * 1
task: A string of the task type, e.g.,
'classification', 'regression'
eval_method: A string of resampling strategy, one of
['auto', 'cv', 'holdout']
split_ratio: A float of the validation data percentage for holdout
n_splits: An integer of the number of folds for cross-validation
n_splits: An integer of the number of folds for cross - validation
n_jobs: An integer of the number of threads for training
train_best: A boolean of whether to train the best config in the
time budget; if false, train the last config in the budget
@ -507,7 +511,8 @@ class AutoML:
self._trained_estimator = BaseEstimator()
self._trained_estimator.model = None
return training_duration
if not best: return
if not best:
return
best_estimator = best.learner
best_config = best.config
sample_size = len(self.y_train_all) if train_full \
@ -581,29 +586,36 @@ class AutoML:
Args:
X_train: A numpy array or a pandas dataframe of training data in
shape n*m
y_train: A numpy array or a pandas series of labels in shape n*1
shape n * m
y_train: A numpy array or a pandas series of labels in shape n * 1
dataframe: A dataframe of training data including label column
label: A str of the label column name
Note: If X_train and y_train are provided,
Note: If X_train and y_train are provided,
dataframe and label are ignored;
If not, dataframe and label must be provided.
metric: A string of the metric name or a function,
e.g., 'accuracy','roc_auc','f1','log_loss','mae','mse','r2'
e.g., 'accuracy', 'roc_auc', 'f1', 'log_loss', 'mae', 'mse', 'r2'
if passing a customized metric function, the function needs to
have the follwing signature
have the follwing signature:
def metric(X_test, y_test, estimator, labels, X_train, y_train):
return metric_to_minimize, metrics_to_log
.. code-block:: python
which returns a float number as the minimization objective,
def metric(X_test, y_test, estimator, labels, X_train, y_train):
return metric_to_minimize, metrics_to_log
which returns a float number as the minimization objective,
and a tuple of floats as the metrics to log
task: A string of the task type, e.g.,
'classification', 'regression'
n_jobs: An integer of the number of threads for training
log_file_name: A string of the log file name
estimator_list: A list of strings for estimator names, or 'auto'
e.g., ['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
e.g.,
.. code-block:: python
['lgbm', 'xgboost', 'catboost', 'rf', 'extra_tree']
time_budget: A float number of the time budget in seconds
max_iter: An integer of the maximal number of iterations
sample: A boolean of whether to sample the training data during
@ -611,16 +623,17 @@ class AutoML:
eval_method: A string of resampling strategy, one of
['auto', 'cv', 'holdout']
split_ratio: A float of the valiation data percentage for holdout
n_splits: An integer of the number of folds for cross-validation
log_type: A string of the log type, one of ['better', 'all', 'new']
n_splits: An integer of the number of folds for cross - validation
log_type: A string of the log type, one of
['better', 'all', 'new']
'better' only logs configs with better loss than previos iters
'all' logs all the tried configs
'new' only logs non-redundant configs
'new' only logs non - redundant configs
model_history: A boolean of whether to keep the history of best
models in the history property. Make sure memory is large
enough if setting to True.
log_training_metric: A boolean of whether to log the training
metric for each model.
log_training_metric: A boolean of whether to log the training
metric for each model.
mem_thres: A float of the memory size constraint in bytes
X_val: None | a numpy array or a pandas dataframe of validation data
y_val: None | a numpy array or a pandas series of validation labels

View File

@ -1 +1 @@
__version__="0.1.1"
__version__ = "0.1.2"