v0.1.3 Set default logging level to INFO (#14)

* set default logging level to INFO

* remove unnecessary import

* API future compatibility

* add test for customized learner

* test dependency

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
This commit is contained in:
Chi Wang 2020-12-15 08:10:43 -08:00 committed by GitHub
parent bea2ba8135
commit cb5ce4e3a6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 219 additions and 136 deletions

View File

@ -37,8 +37,7 @@ jobs:
- name: Install packages and dependencies - name: Install packages and dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install flake8 pytest coverage pip install -e .[test]
pip install -e .
- name: Lint with flake8 - name: Lint with flake8
run: | run: |
# stop the build if there are Python syntax errors or undefined names # stop the build if there are Python syntax errors or undefined names

View File

@ -1,12 +1,10 @@
from flaml.automl import AutoML from flaml.automl import AutoML
import logging
from flaml.model import BaseEstimator
from flaml.data import get_output_from_log
from flaml.version import __version__ from flaml.version import __version__
import logging
# Set the root logger. # Set the root logger.
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# Add the console handler. # Add the console handler.
_ch = logging.StreamHandler() _ch = logging.StreamHandler()
@ -14,4 +12,4 @@ logger_formatter = logging.Formatter(
'[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s', '[%(name)s: %(asctime)s] {%(lineno)d} %(levelname)s - %(message)s',
'%m-%d %H:%M:%S') '%m-%d %H:%M:%S')
_ch.setFormatter(logger_formatter) _ch.setFormatter(logger_formatter)
logger.addHandler(_ch) logger.addHandler(_ch)

View File

@ -390,22 +390,22 @@ class AutoML:
def add_learner(self, def add_learner(self,
learner_name, learner_name,
learner_class, learner_class):
size_estimate=lambda config: 'unknown',
cost_relative2lgbm=1):
'''Add a customized learner '''Add a customized learner
Args: Args:
learner_name: A string of the learner's name learner_name: A string of the learner's name
learner_class: A subclass of BaseEstimator learner_class: A subclass of BaseEstimator
size_estimate: A function from a config to its memory size in float
cost_relative2lgbm: A float number for the training cost ratio with
respect to lightgbm(when both use the initial config)
''' '''
self._custom_learners[learner_name] = learner_class self._custom_learners[learner_name] = learner_class
cost_relative2lgbm = 1
# cost_relative2lgbm: A float number for the training cost ratio with
# respect to lightgbm(when both use the initial config)
self._eti_ini[learner_name] = cost_relative2lgbm self._eti_ini[learner_name] = cost_relative2lgbm
self._config_space_info[learner_name] = \ self._config_space_info[learner_name] = \
learner_class.params_configsearch_info learner_class.params_configsearch_info
# size_estimate: A function from a config to its memory size in float
size_estimate = lambda config: 1.0
self._custom_size_estimate[learner_name] = size_estimate self._custom_size_estimate[learner_name] = size_estimate
def get_estimator_from_log(self, log_file_name, record_id, objective): def get_estimator_from_log(self, log_file_name, record_id, objective):

View File

@ -6,7 +6,6 @@
import numpy as np import numpy as np
from scipy.sparse import vstack, issparse from scipy.sparse import vstack, issparse
import pandas as pd import pandas as pd
from sklearn.preprocessing import LabelEncoder
from .training_log import training_log_reader from .training_log import training_log_reader

View File

@ -1 +1 @@
__version__ = "0.1.2" __version__ = "0.1.3"

File diff suppressed because one or more lines are too long

View File

@ -45,6 +45,7 @@ setuptools.setup(
"flake8>=3.8.4", "flake8>=3.8.4",
"pytest>=6.1.1", "pytest>=6.1.1",
"coverage>=5.3", "coverage>=5.3",
"rgf-python",
], ],
}, },
classifiers=[ classifiers=[

View File

@ -2,9 +2,55 @@ import unittest
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
from sklearn.datasets import load_boston, load_iris from sklearn.datasets import load_boston, load_iris, load_wine
from flaml import AutoML, get_output_from_log from flaml import AutoML
from flaml.data import get_output_from_log
from flaml.model import BaseEstimator
from flaml.space import ConfigSearchInfo
from rgf.sklearn import RGFClassifier, RGFRegressor
class MyRegularizedGreedyForest(BaseEstimator):
# search space
params_configsearch_info = {
'max_leaf': ConfigSearchInfo(name = 'max_leaf',
type = int, lower = 4, init = 4, upper = 10000),
'n_iter': ConfigSearchInfo(name = 'n_iter', type = int, lower = 1,
init = 1, upper = 32768),
'n_tree_search': ConfigSearchInfo(name = 'n_tree_search', type = int,
lower = 1, init = 1, upper = 32768),
'opt_interval': ConfigSearchInfo(name = 'opt_interval', type = int,
lower = 1, init = 100, upper = 10000),
'learning_rate': ConfigSearchInfo(name = 'learning_rate', type = float,
lower = 0.01, init = 1.0, upper = 20.0),
'min_samples_leaf': ConfigSearchInfo(name = 'min_samples_leaf',
type = int, lower = 1, init = 20, upper = 20)
}
def __init__(self, objective_name = 'binary:logistic', n_jobs = 1,
max_leaf = 1000, n_iter = 1, n_tree_search = 1, opt_interval = 1,
learning_rate = 1.0, min_samples_leaf = 1):
self.objective_name = objective_name
if 'regression' in objective_name:
self.estimator_class = RGFRegressor
else:
self.estimator_class = RGFClassifier
# round integer hyperparameters
self.params = {
'max_leaf': int(round(max_leaf)),
'n_iter': int(round(n_iter)),
'n_tree_search': int(round(n_tree_search)),
'opt_interval': int(round(opt_interval)),
'learning_rate': learning_rate,
'min_samples_leaf':int(round(min_samples_leaf)),
"n_jobs": n_jobs,
}
def custom_metric(X_test, y_test, estimator, labels, X_train, y_train): def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
@ -19,6 +65,23 @@ def custom_metric(X_test, y_test, estimator, labels, X_train, y_train):
class TestAutoML(unittest.TestCase): class TestAutoML(unittest.TestCase):
def test_custom_learner(self):
automl = AutoML()
automl.add_learner(learner_name = 'RGF',
learner_class = MyRegularizedGreedyForest)
X_train, y_train = load_wine(return_X_y=True)
settings = {
"time_budget": 10, # total running time in seconds
"estimator_list": ['RGF', 'lgbm', 'rf', 'xgboost'],
"task": 'classification', # task type
"sample": True, # whether to subsample training data
"log_file_name": "test/wine.log",
"log_training_metric": True, # whether to log training metric
}
'''The main flaml automl API'''
automl.fit(X_train = X_train, y_train = y_train, **settings)
def test_dataframe(self): def test_dataframe(self):
self.test_classification(True) self.test_classification(True)

View File

@ -36,9 +36,9 @@ class TestLogging(unittest.TestCase):
"model_history": True "model_history": True
} }
X_train, y_train = load_boston(return_X_y=True) X_train, y_train = load_boston(return_X_y=True)
n = len(y_train) n = len(y_train) >> 1
automl_experiment.fit(X_train=X_train[:n >> 1], y_train=y_train[:n >> 1], automl_experiment.fit(X_train=X_train[:n], y_train=y_train[:n],
X_val=X_train[n >> 1:], y_val=y_train[n >> 1:], X_val=X_train[n:], y_val=y_train[n:],
**automl_settings) **automl_settings)
# Check if the log buffer is populated. # Check if the log buffer is populated.

View File

@ -1,7 +1,5 @@
import os import os
import unittest import unittest
import logging
import json
from tempfile import TemporaryDirectory from tempfile import TemporaryDirectory
from sklearn.datasets import load_boston from sklearn.datasets import load_boston