autogen/test/test_autovw.py

import unittest

import numpy as np
import scipy.sparse

import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import time
import logging
from flaml.tune import loguniform, polynomial_expansion_set
from vowpalwabbit import pyvw
from flaml import AutoVW
import string
import os
import openml
VW_DS_DIR = 'test/data/'
NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)
logger = logging.getLogger(__name__)


def oml_to_vw_w_grouping(X, y, ds_dir, fname, orginal_dim, group_num,
                         grouping_method='sequential'):
    # split all_indexes into # group_num of groups
    max_size_per_group = int(np.ceil(orginal_dim / float(group_num)))
    # sequential grouping
    if grouping_method == 'sequential':
        group_indexes = []  # lists of lists
        for i in range(group_num):
            indexes = [ind for ind in range(i * max_size_per_group,
                       min((i + 1) * max_size_per_group, orginal_dim))]
            if len(indexes) > 0:
                group_indexes.append(indexes)
        print(group_indexes)
    else:
        NotImplementedError
    if group_indexes:
        if not os.path.exists(ds_dir):
            os.makedirs(ds_dir)
        with open(os.path.join(ds_dir, fname), 'w') as f:
            if isinstance(X, pd.DataFrame):
                raise NotImplementedError
            elif isinstance(X, np.ndarray):
                for i in range(len(X)):
                    NS_content = []
                    for zz in range(len(group_indexes)):
                        ns_features = ' '.join('{}:{:.6f}'.format(ind, X[i][ind]
                                                                  ) for ind in group_indexes[zz])
                        NS_content.append(ns_features)
                    ns_line = '{} |{}'.format(str(y[i]), '|'.join(
                                              '{} {}'.format(NS_LIST[j], NS_content[j]
                                                             ) for j in range(len(group_indexes))))
                    f.write(ns_line)
                    f.write('\n')
            elif isinstance(X, scipy.sparse.csr_matrix):
                print('NotImplementedError for sparse data')
                NotImplementedError


def save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression):
    """ convert openml dataset to vw example and save to file
    """
    print('is_regression', is_regression)
    if is_regression:
        fname = 'ds_{}_{}_{}.vw'.format(did, max_ns_num, 0)
        print('dataset size', X.shape[0], X.shape[1])
        print('saving data', did, ds_dir, fname)
        dim = X.shape[1]
        oml_to_vw_w_grouping(X, y, ds_dir, fname, dim, group_num=max_ns_num)
    else:
        NotImplementedError


def shuffle_data(X, y, seed):
    try:
        n = len(X)
    except ValueError:
        n = X.getnnz()

    perm = np.random.RandomState(seed=seed).permutation(n)
    X_shuf = X[perm, :]
    y_shuf = y[perm]
    return X_shuf, y_shuf


def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):
    success = False
    print('-----getting oml dataset-------', did)
    ds = openml.datasets.get_dataset(did)
    target_attribute = ds.default_target_attribute
    # if target_attribute is None and did in OML_target_attribute_dict:
    #     target_attribute = OML_target_attribute_dict[did]

    print('target=ds.default_target_attribute', target_attribute)
    data = ds.get_data(target=target_attribute, dataset_format='array')
    X, y = data[0], data[1]  # return X: pd DataFrame, y: pd series
    import scipy
    if scipy.sparse.issparse(X):
        X = scipy.sparse.csr_matrix.toarray(X)
        print('is sparse matrix')
    if data and isinstance(X, np.ndarray):
        print('-----converting oml to vw and and saving oml dataset-------')
        save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression=True)
        success = True
    else:
        print('---failed to convert/save oml dataset to vw!!!----')
    try:
        X, y = data[0], data[1]  # return X: pd DataFrame, y: pd series
        if data and isinstance(X, np.ndarray):
            print('-----converting oml to vw and and saving oml dataset-------')
            save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression=True)
            success = True
        else:
            print('---failed to convert/save oml dataset to vw!!!----')
    except ValueError:
        print('-------------failed to get oml dataset!!!', did)
    return success


def load_vw_dataset(did, ds_dir, is_regression, max_ns_num):
    import os
    if is_regression:
        # the second field specifies the largest number of namespaces using.
        fname = 'ds_{}_{}_{}.vw'.format(did, max_ns_num, 0)
        vw_dataset_file = os.path.join(ds_dir, fname)
        # if file does not exist, generate and save the datasets
        if not os.path.exists(vw_dataset_file) or os.stat(vw_dataset_file).st_size < 1000:
            get_oml_to_vw(did, max_ns_num)
        print(ds_dir, vw_dataset_file)
        if not os.path.exists(ds_dir):
            os.makedirs(ds_dir)
        with open(os.path.join(ds_dir, fname), 'r') as f:
            vw_content = f.read().splitlines()
            print(type(vw_content), len(vw_content))
        return vw_content


def get_data(iter_num=None, dataset_id=None, vw_format=True,
             max_ns_num=10, shuffle=False, use_log=True, dataset_type='regression'):
    logging.info('generating data')
    LOG_TRANSFORMATION_THRESHOLD = 100
    # get data from simulation
    import random
    vw_examples = None
    data_id = int(dataset_id)
    # loading oml dataset
    # data = OpenML2VWData(data_id, max_ns_num, dataset_type)
    # Y = data.Y
    if vw_format:
        # vw_examples = data.vw_examples
        vw_examples = load_vw_dataset(did=data_id, ds_dir=VW_DS_DIR, is_regression=True,
                                      max_ns_num=max_ns_num)
        Y = []
        for i, e in enumerate(vw_examples):
            Y.append(float(e.split('|')[0]))
    logger.debug('first data %s', vw_examples[0])
    # do data shuffling or log transformation for oml data when needed
    if shuffle:
        random.seed(54321)
        random.shuffle(vw_examples)

    # do log transformation
    unique_y = set(Y)
    min_y = min(unique_y)
    max_y = max(unique_y)
    if use_log and max((max_y - min_y), max_y) >= LOG_TRANSFORMATION_THRESHOLD:
        log_vw_examples = []
        for v in vw_examples:
            org_y = v.split('|')[0]
            y = float(v.split('|')[0])
            # shift y to ensure all y are positive
            if min_y <= 0:
                y = y + abs(min_y) + 1
            log_y = np.log(y)
            log_vw = v.replace(org_y + '|', str(log_y) + ' |')
            log_vw_examples.append(log_vw)
        logger.info('log_vw_examples %s', log_vw_examples[0:2])
        if log_vw_examples:
            return log_vw_examples
    return vw_examples, Y


class VowpalWabbitNamesspaceTuningProblem:

    def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs):
        use_log = kwargs.get('use_log', True),
        shuffle = kwargs.get('shuffle', False)
        vw_format = kwargs.get('vw_format', True)
        print('dataset_id', dataset_id)
        self.vw_examples, self.Y = get_data(max_iter_num, dataset_id=dataset_id,
                                            vw_format=vw_format, max_ns_num=ns_num,
                                            shuffle=shuffle, use_log=use_log
                                            )
        self.max_iter_num = min(max_iter_num, len(self.Y))
        self._problem_info = {'max_iter_num': self.max_iter_num,
                              'dataset_id': dataset_id,
                              'ns_num': ns_num,
                              }
        self._problem_info.update(kwargs)
        self._fixed_hp_config = kwargs.get('fixed_hp_config', {})
        self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(self.vw_examples[0])
        self._raw_namespaces = list(self.namespace_feature_dim.keys())
        self._setup_search()

    def _setup_search(self):
        self._search_space = self._fixed_hp_config.copy()
        self._init_config = self._fixed_hp_config.copy()
        search_space = {'interactions':
                        polynomial_expansion_set(
                            init_monomials=set(self._raw_namespaces),
                            highest_poly_order=len(self._raw_namespaces),
                            allow_self_inter=False),
                        }
        init_config = {'interactions': set()}
        self._search_space.update(search_space)
        self._init_config.update(init_config)
        logger.info('search space %s %s %s', self._search_space, self._init_config, self._fixed_hp_config)

    @property
    def init_config(self):
        return self._init_config

    @property
    def search_space(self):
        return self._search_space


class VowpalWabbitNamesspaceLRTuningProblem(VowpalWabbitNamesspaceTuningProblem):

    def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs):
        super().__init__(max_iter_num, dataset_id, ns_num, **kwargs)
        self._setup_search()

    def _setup_search(self):
        self._search_space = self._fixed_hp_config.copy()
        self._init_config = self._fixed_hp_config.copy()
        search_space = {'interactions':
                        polynomial_expansion_set(
                            init_monomials=set(self._raw_namespaces),
                            highest_poly_order=len(self._raw_namespaces),
                            allow_self_inter=False),
                        'learning_rate': loguniform(lower=2e-10, upper=1.0)
                        }
        init_config = {'interactions': set(), 'learning_rate': 0.5}
        self._search_space.update(search_space)
        self._init_config.update(init_config)
        logger.info('search space %s %s %s', self._search_space, self._init_config, self._fixed_hp_config)


def get_y_from_vw_example(vw_example):
    """ get y from a vw_example. this works for regression dataset
    """
    return float(vw_example.split('|')[0])


def get_loss(y_pred, y_true, loss_func='squared'):
    if 'squared' in loss_func:
        loss = mean_squared_error([y_pred], [y_true])
    elif 'absolute' in loss_func:
        loss = mean_absolute_error([y_pred], [y_true])
    else:
        loss = None
        raise NotImplementedError
    return loss


def online_learning_loop(iter_num, vw_examples, vw_alg, loss_func, method_name=''):
    """Implements the online learning loop.
    Args:
        iter_num (int): The total number of iterations
        vw_examples (list): A list of vw examples
        alg (alg instance): An algorithm instance has the following functions:
            - alg.learn(example)
            - alg.predict(example)
        loss_func (str): loss function
    Outputs:
        cumulative_loss_list (list): the list of cumulative loss from each iteration.
            It is returned for the convenience of visualization.
    """
    print('rerunning exp....', len(vw_examples), iter_num)
    loss_list = []
    y_predict_list = []
    for i in range(iter_num):
        vw_x = vw_examples[i]
        y_true = get_y_from_vw_example(vw_x)
        # predict step
        y_pred = vw_alg.predict(vw_x)
        # learn step
        vw_alg.learn(vw_x)
        # calculate one step loss
        loss = get_loss(y_pred, y_true, loss_func)
        loss_list.append(loss)
        y_predict_list.append([y_pred, y_true])

    return loss_list


def get_vw_tuning_problem(tuning_hp='NamesapceInteraction'):
    online_vw_exp_setting = {"max_live_model_num": 5,
                             "fixed_hp_config": {'alg': 'supervised', 'loss_function': 'squared'},
                             "ns_num": 10,
                             "max_iter_num": 10000,
                             }

    # construct openml problem setting based on basic experiment setting
    vw_oml_problem_args = {"max_iter_num": online_vw_exp_setting['max_iter_num'],
                           "dataset_id": '42183',
                           "ns_num": online_vw_exp_setting['ns_num'],
                           "fixed_hp_config": online_vw_exp_setting['fixed_hp_config'],
                           }
    if tuning_hp == 'NamesapceInteraction':
        vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(**vw_oml_problem_args)
    elif tuning_hp == 'NamesapceInteraction+LearningRate':
        vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(**vw_oml_problem_args)
    else:
        NotImplementedError

    return vw_oml_problem_args, vw_online_aml_problem


class TestAutoVW(unittest.TestCase):

    def test_vw_oml_problem_and_vanilla_vw(self):
        vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()
        vanilla_vw = pyvw.vw(**vw_oml_problem_args["fixed_hp_config"])
        cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,
                                                    vw_online_aml_problem.vw_examples,
                                                    vanilla_vw,
                                                    loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
                                                    )
        print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_supervised_vw_tune_namespace(self):
        # basic experiment setting
        vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()
        autovw = AutoVW(max_live_model_num=5,
                        search_space=vw_online_aml_problem.search_space,
                        init_config=vw_online_aml_problem.init_config,
                        min_resource_lease='auto',
                        random_seed=2345)

        cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,
                                                    vw_online_aml_problem.vw_examples,
                                                    autovw,
                                                    loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
                                                    )
        print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_supervised_vw_tune_namespace_learningrate(self):
        # basic experiment setting
        vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem(tuning_hp='NamesapceInteraction+LearningRate')
        autovw = AutoVW(max_live_model_num=5,
                        search_space=vw_online_aml_problem.search_space,
                        init_config=vw_online_aml_problem.init_config,
                        min_resource_lease='auto',
                        random_seed=2345)

        cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,
                                                    vw_online_aml_problem.vw_examples,
                                                    autovw,
                                                    loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),
                                                    )
        print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))

    def test_bandit_vw_tune_namespace(self):
        pass

    def test_bandit_vw_tune_namespace_learningrate(self):
        pass


if __name__ == "__main__":
    unittest.main()
Add ChaCha (#92) * pickle the AutoML object * get best model per estimator * test deberta * stateless API * pickle the AutoML object * get best model per estimator * test deberta * stateless API * prevent divide by zero * test roberta * BlendSearchTuner * sync * version number * update gitignore * delta time * reindex columns when dropping int-indexed columns * add seed * add seed in Args * merge * init upload of ChaCha * remove redundancy * add back catboost * improve AutoVW API * set min_resource_lease in VWOnlineTrial * docstr * rename * docstr * add docstr * improve API and documentation * fix name * docstr * naming * remove max_resource in scheduler * add TODO in flow2 * remove redundancy in rearcher * add input type * adapt code from ray.tune * move files * naming * documentation * fix import error * fix format issues * remove cb in worse than test * improve _generate_all_comb * remove ray tune * naming * VowpalWabbitTrial * import error * import error * merge test code * scheduler import * fix import * remove * import, minor bug and version * Float or Categorical * fix default * add test_autovw.py * add vowpalwabbit and openml * lint * reorg * lint * indent * add autovw notebook * update notebook * update log msg and autovw notebook * update autovw notebook * update autovw notebook * add available strings for model_select_policy * string for metric * Update vw format in flaml/onlineml/trial.py Co-authored-by: olgavrou <olgavrou@gmail.com> * make init_config optional * add _setup_trial_runner and update notebook * space Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com> Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qiw@microsoft.com> Co-authored-by: olgavrou <olgavrou@gmail.com> 2021-06-02 22:08:24 -04:00			`import unittest`

			`import numpy as np`
			`import scipy.sparse`

			`import pandas as pd`
			`from sklearn.metrics import mean_squared_error, mean_absolute_error`
			`import time`
			`import logging`
			`from flaml.tune import loguniform, polynomial_expansion_set`
			`from vowpalwabbit import pyvw`
			`from flaml import AutoVW`
			`import string`
			`import os`
			`import openml`
			`VW_DS_DIR = 'test/data/'`
			`NS_LIST = list(string.ascii_lowercase) + list(string.ascii_uppercase)`
			`logger = logging.getLogger(__name__)`


			`def oml_to_vw_w_grouping(X, y, ds_dir, fname, orginal_dim, group_num,`
			`grouping_method='sequential'):`
			`# split all_indexes into # group_num of groups`
			`max_size_per_group = int(np.ceil(orginal_dim / float(group_num)))`
			`# sequential grouping`
			`if grouping_method == 'sequential':`
			`group_indexes = [] # lists of lists`
			`for i in range(group_num):`
			`indexes = [ind for ind in range(i * max_size_per_group,`
			`min((i + 1) * max_size_per_group, orginal_dim))]`
			`if len(indexes) > 0:`
			`group_indexes.append(indexes)`
			`print(group_indexes)`
			`else:`
			`NotImplementedError`
			`if group_indexes:`
			`if not os.path.exists(ds_dir):`
			`os.makedirs(ds_dir)`
			`with open(os.path.join(ds_dir, fname), 'w') as f:`
			`if isinstance(X, pd.DataFrame):`
			`raise NotImplementedError`
			`elif isinstance(X, np.ndarray):`
			`for i in range(len(X)):`
			`NS_content = []`
			`for zz in range(len(group_indexes)):`
			`ns_features = ' '.join('{}:{:.6f}'.format(ind, X[i][ind]`
			`) for ind in group_indexes[zz])`
			`NS_content.append(ns_features)`
			`ns_line = '{} \|{}'.format(str(y[i]), '\|'.join(`
			`'{} {}'.format(NS_LIST[j], NS_content[j]`
			`) for j in range(len(group_indexes))))`
			`f.write(ns_line)`
			`f.write('\n')`
			`elif isinstance(X, scipy.sparse.csr_matrix):`
			`print('NotImplementedError for sparse data')`
			`NotImplementedError`


			`def save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression):`
			`""" convert openml dataset to vw example and save to file`
			`"""`
			`print('is_regression', is_regression)`
			`if is_regression:`
			`fname = 'ds_{}_{}_{}.vw'.format(did, max_ns_num, 0)`
			`print('dataset size', X.shape[0], X.shape[1])`
			`print('saving data', did, ds_dir, fname)`
			`dim = X.shape[1]`
			`oml_to_vw_w_grouping(X, y, ds_dir, fname, dim, group_num=max_ns_num)`
			`else:`
			`NotImplementedError`


			`def shuffle_data(X, y, seed):`
			`try:`
			`n = len(X)`
			`except ValueError:`
			`n = X.getnnz()`

			`perm = np.random.RandomState(seed=seed).permutation(n)`
			`X_shuf = X[perm, :]`
			`y_shuf = y[perm]`
			`return X_shuf, y_shuf`


			`def get_oml_to_vw(did, max_ns_num, ds_dir=VW_DS_DIR):`
			`success = False`
			`print('-----getting oml dataset-------', did)`
			`ds = openml.datasets.get_dataset(did)`
			`target_attribute = ds.default_target_attribute`
			`# if target_attribute is None and did in OML_target_attribute_dict:`
			`# target_attribute = OML_target_attribute_dict[did]`

			`print('target=ds.default_target_attribute', target_attribute)`
			`data = ds.get_data(target=target_attribute, dataset_format='array')`
			`X, y = data[0], data[1] # return X: pd DataFrame, y: pd series`
			`import scipy`
			`if scipy.sparse.issparse(X):`
			`X = scipy.sparse.csr_matrix.toarray(X)`
			`print('is sparse matrix')`
			`if data and isinstance(X, np.ndarray):`
			`print('-----converting oml to vw and and saving oml dataset-------')`
			`save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression=True)`
			`success = True`
			`else:`
			`print('---failed to convert/save oml dataset to vw!!!----')`
			`try:`
			`X, y = data[0], data[1] # return X: pd DataFrame, y: pd series`
			`if data and isinstance(X, np.ndarray):`
			`print('-----converting oml to vw and and saving oml dataset-------')`
			`save_vw_dataset_w_ns(X, y, did, ds_dir, max_ns_num, is_regression=True)`
			`success = True`
			`else:`
			`print('---failed to convert/save oml dataset to vw!!!----')`
			`except ValueError:`
			`print('-------------failed to get oml dataset!!!', did)`
			`return success`


			`def load_vw_dataset(did, ds_dir, is_regression, max_ns_num):`
			`import os`
			`if is_regression:`
			`# the second field specifies the largest number of namespaces using.`
			`fname = 'ds_{}_{}_{}.vw'.format(did, max_ns_num, 0)`
			`vw_dataset_file = os.path.join(ds_dir, fname)`
			`# if file does not exist, generate and save the datasets`
			`if not os.path.exists(vw_dataset_file) or os.stat(vw_dataset_file).st_size < 1000:`
			`get_oml_to_vw(did, max_ns_num)`
			`print(ds_dir, vw_dataset_file)`
			`if not os.path.exists(ds_dir):`
			`os.makedirs(ds_dir)`
			`with open(os.path.join(ds_dir, fname), 'r') as f:`
			`vw_content = f.read().splitlines()`
			`print(type(vw_content), len(vw_content))`
			`return vw_content`


			`def get_data(iter_num=None, dataset_id=None, vw_format=True,`
			`max_ns_num=10, shuffle=False, use_log=True, dataset_type='regression'):`
			`logging.info('generating data')`
			`LOG_TRANSFORMATION_THRESHOLD = 100`
			`# get data from simulation`
			`import random`
			`vw_examples = None`
			`data_id = int(dataset_id)`
			`# loading oml dataset`
			`# data = OpenML2VWData(data_id, max_ns_num, dataset_type)`
			`# Y = data.Y`
			`if vw_format:`
			`# vw_examples = data.vw_examples`
			`vw_examples = load_vw_dataset(did=data_id, ds_dir=VW_DS_DIR, is_regression=True,`
			`max_ns_num=max_ns_num)`
			`Y = []`
			`for i, e in enumerate(vw_examples):`
			`Y.append(float(e.split('\|')[0]))`
			`logger.debug('first data %s', vw_examples[0])`
			`# do data shuffling or log transformation for oml data when needed`
			`if shuffle:`
			`random.seed(54321)`
			`random.shuffle(vw_examples)`

			`# do log transformation`
			`unique_y = set(Y)`
			`min_y = min(unique_y)`
			`max_y = max(unique_y)`
			`if use_log and max((max_y - min_y), max_y) >= LOG_TRANSFORMATION_THRESHOLD:`
			`log_vw_examples = []`
			`for v in vw_examples:`
			`org_y = v.split('\|')[0]`
			`y = float(v.split('\|')[0])`
			`# shift y to ensure all y are positive`
			`if min_y <= 0:`
			`y = y + abs(min_y) + 1`
			`log_y = np.log(y)`
			`log_vw = v.replace(org_y + '\|', str(log_y) + ' \|')`
			`log_vw_examples.append(log_vw)`
			`logger.info('log_vw_examples %s', log_vw_examples[0:2])`
			`if log_vw_examples:`
			`return log_vw_examples`
			`return vw_examples, Y`


			`class VowpalWabbitNamesspaceTuningProblem:`

			`def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs):`
			`use_log = kwargs.get('use_log', True),`
			`shuffle = kwargs.get('shuffle', False)`
			`vw_format = kwargs.get('vw_format', True)`
			`print('dataset_id', dataset_id)`
			`self.vw_examples, self.Y = get_data(max_iter_num, dataset_id=dataset_id,`
			`vw_format=vw_format, max_ns_num=ns_num,`
			`shuffle=shuffle, use_log=use_log`
			`)`
			`self.max_iter_num = min(max_iter_num, len(self.Y))`
			`self._problem_info = {'max_iter_num': self.max_iter_num,`
			`'dataset_id': dataset_id,`
			`'ns_num': ns_num,`
			`}`
			`self._problem_info.update(kwargs)`
			`self._fixed_hp_config = kwargs.get('fixed_hp_config', {})`
			`self.namespace_feature_dim = AutoVW.get_ns_feature_dim_from_vw_example(self.vw_examples[0])`
			`self._raw_namespaces = list(self.namespace_feature_dim.keys())`
			`self._setup_search()`

			`def _setup_search(self):`
			`self._search_space = self._fixed_hp_config.copy()`
			`self._init_config = self._fixed_hp_config.copy()`
			`search_space = {'interactions':`
			`polynomial_expansion_set(`
			`init_monomials=set(self._raw_namespaces),`
			`highest_poly_order=len(self._raw_namespaces),`
			`allow_self_inter=False),`
			`}`
			`init_config = {'interactions': set()}`
			`self._search_space.update(search_space)`
			`self._init_config.update(init_config)`
			`logger.info('search space %s %s %s', self._search_space, self._init_config, self._fixed_hp_config)`

			`@property`
			`def init_config(self):`
			`return self._init_config`

			`@property`
			`def search_space(self):`
			`return self._search_space`


			`class VowpalWabbitNamesspaceLRTuningProblem(VowpalWabbitNamesspaceTuningProblem):`

			`def __init__(self, max_iter_num, dataset_id, ns_num, **kwargs):`
			`super().__init__(max_iter_num, dataset_id, ns_num, **kwargs)`
			`self._setup_search()`

			`def _setup_search(self):`
			`self._search_space = self._fixed_hp_config.copy()`
			`self._init_config = self._fixed_hp_config.copy()`
			`search_space = {'interactions':`
			`polynomial_expansion_set(`
			`init_monomials=set(self._raw_namespaces),`
			`highest_poly_order=len(self._raw_namespaces),`
			`allow_self_inter=False),`
			`'learning_rate': loguniform(lower=2e-10, upper=1.0)`
			`}`
			`init_config = {'interactions': set(), 'learning_rate': 0.5}`
			`self._search_space.update(search_space)`
			`self._init_config.update(init_config)`
			`logger.info('search space %s %s %s', self._search_space, self._init_config, self._fixed_hp_config)`


			`def get_y_from_vw_example(vw_example):`
			`""" get y from a vw_example. this works for regression dataset`
			`"""`
			`return float(vw_example.split('\|')[0])`


			`def get_loss(y_pred, y_true, loss_func='squared'):`
			`if 'squared' in loss_func:`
			`loss = mean_squared_error([y_pred], [y_true])`
			`elif 'absolute' in loss_func:`
			`loss = mean_absolute_error([y_pred], [y_true])`
			`else:`
			`loss = None`
			`raise NotImplementedError`
			`return loss`


			`def online_learning_loop(iter_num, vw_examples, vw_alg, loss_func, method_name=''):`
			`"""Implements the online learning loop.`
			`Args:`
			`iter_num (int): The total number of iterations`
			`vw_examples (list): A list of vw examples`
			`alg (alg instance): An algorithm instance has the following functions:`
			`- alg.learn(example)`
			`- alg.predict(example)`
			`loss_func (str): loss function`
			`Outputs:`
			`cumulative_loss_list (list): the list of cumulative loss from each iteration.`
			`It is returned for the convenience of visualization.`
			`"""`
			`print('rerunning exp....', len(vw_examples), iter_num)`
			`loss_list = []`
			`y_predict_list = []`
			`for i in range(iter_num):`
			`vw_x = vw_examples[i]`
			`y_true = get_y_from_vw_example(vw_x)`
			`# predict step`
			`y_pred = vw_alg.predict(vw_x)`
			`# learn step`
			`vw_alg.learn(vw_x)`
			`# calculate one step loss`
			`loss = get_loss(y_pred, y_true, loss_func)`
			`loss_list.append(loss)`
			`y_predict_list.append([y_pred, y_true])`

			`return loss_list`


			`def get_vw_tuning_problem(tuning_hp='NamesapceInteraction'):`
			`online_vw_exp_setting = {"max_live_model_num": 5,`
			`"fixed_hp_config": {'alg': 'supervised', 'loss_function': 'squared'},`
			`"ns_num": 10,`
			`"max_iter_num": 10000,`
			`}`

			`# construct openml problem setting based on basic experiment setting`
			`vw_oml_problem_args = {"max_iter_num": online_vw_exp_setting['max_iter_num'],`
			`"dataset_id": '42183',`
			`"ns_num": online_vw_exp_setting['ns_num'],`
			`"fixed_hp_config": online_vw_exp_setting['fixed_hp_config'],`
			`}`
			`if tuning_hp == 'NamesapceInteraction':`
			`vw_online_aml_problem = VowpalWabbitNamesspaceTuningProblem(**vw_oml_problem_args)`
			`elif tuning_hp == 'NamesapceInteraction+LearningRate':`
			`vw_online_aml_problem = VowpalWabbitNamesspaceLRTuningProblem(**vw_oml_problem_args)`
			`else:`
			`NotImplementedError`

			`return vw_oml_problem_args, vw_online_aml_problem`


			`class TestAutoVW(unittest.TestCase):`

			`def test_vw_oml_problem_and_vanilla_vw(self):`
			`vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()`
			`vanilla_vw = pyvw.vw(**vw_oml_problem_args["fixed_hp_config"])`
			`cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,`
			`vw_online_aml_problem.vw_examples,`
			`vanilla_vw,`
			`loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),`
			`)`
			`print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))`

			`def test_supervised_vw_tune_namespace(self):`
			`# basic experiment setting`
			`vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem()`
			`autovw = AutoVW(max_live_model_num=5,`
			`search_space=vw_online_aml_problem.search_space,`
			`init_config=vw_online_aml_problem.init_config,`
			`min_resource_lease='auto',`
			`random_seed=2345)`

			`cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,`
			`vw_online_aml_problem.vw_examples,`
			`autovw,`
			`loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),`
			`)`
			`print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))`

			`def test_supervised_vw_tune_namespace_learningrate(self):`
			`# basic experiment setting`
			`vw_oml_problem_args, vw_online_aml_problem = get_vw_tuning_problem(tuning_hp='NamesapceInteraction+LearningRate')`
			`autovw = AutoVW(max_live_model_num=5,`
			`search_space=vw_online_aml_problem.search_space,`
			`init_config=vw_online_aml_problem.init_config,`
			`min_resource_lease='auto',`
			`random_seed=2345)`

			`cumulative_loss_list = online_learning_loop(vw_online_aml_problem.max_iter_num,`
			`vw_online_aml_problem.vw_examples,`
			`autovw,`
			`loss_func=vw_oml_problem_args["fixed_hp_config"].get("loss_function", "squared"),`
			`)`
			`print('final average loss:', sum(cumulative_loss_list) / len(cumulative_loss_list))`

			`def test_bandit_vw_tune_namespace(self):`
			`pass`

			`def test_bandit_vw_tune_namespace_learningrate(self):`
			`pass`


			`if __name__ == "__main__":`
			`unittest.main()`