autogen/flaml/scheduler/online_scheduler.py
Qingyun Wu 0d3a0bfab6
Add ChaCha (#92)
* pickle the AutoML object

* get best model per estimator

* test deberta

* stateless API

* pickle the AutoML object

* get best model per estimator

* test deberta

* stateless API

* prevent divide by zero

* test roberta

* BlendSearchTuner

* sync

* version number

* update gitignore

* delta time

* reindex columns when dropping int-indexed columns

* add seed

* add seed in Args

* merge

* init upload of ChaCha

* remove redundancy

* add back catboost

* improve AutoVW API

* set min_resource_lease in VWOnlineTrial

* docstr

* rename

* docstr

* add docstr

* improve API and documentation

* fix name

* docstr

* naming

* remove max_resource in scheduler

* add TODO in flow2

* remove redundancy in rearcher

* add input type

* adapt code from ray.tune

* move files

* naming

* documentation

* fix import error

* fix format issues

* remove cb in worse than test

* improve _generate_all_comb

* remove ray tune

* naming

* VowpalWabbitTrial

* import error

* import error

* merge test code

* scheduler import

* fix import

* remove

* import, minor bug and version

* Float or Categorical

* fix default

* add test_autovw.py

* add vowpalwabbit and openml

* lint

* reorg

* lint

* indent

* add autovw notebook

* update notebook

* update log msg and autovw notebook

* update autovw notebook

* update autovw notebook

* add available strings for model_select_policy

* string for metric

* Update vw format in flaml/onlineml/trial.py

Co-authored-by: olgavrou <olgavrou@gmail.com>

* make init_config optional

* add _setup_trial_runner and update notebook

* space

Co-authored-by: Chi Wang (MSR) <chiw@microsoft.com>
Co-authored-by: Chi Wang <wang.chi@microsoft.com>
Co-authored-by: Qingyun Wu <qiw@microsoft.com>
Co-authored-by: olgavrou <olgavrou@gmail.com>
2021-06-02 22:08:24 -04:00

141 lines
6.6 KiB
Python

import numpy as np
import logging
from typing import Optional, Dict
from flaml.scheduler import FIFOScheduler, TrialScheduler
from flaml.tune import Trial
logger = logging.getLogger(__name__)
class OnlineScheduler(FIFOScheduler):
"""Implementation of the OnlineFIFOSchedulers.
Methods:
on_trial_result(trial_runner, trial, result)
Report result and return a decision on the trial's status
choose_trial_to_run(trial_runner)
Decide which trial to run next
"""
def on_trial_result(self, trial_runner, trial: Trial, result: Dict):
"""Report result and return a decision on the trial's status
Always keep a trial running (return status TrialScheduler.CONTINUE)
"""
return TrialScheduler.CONTINUE
def choose_trial_to_run(self, trial_runner) -> Trial:
"""Decide which trial to run next
Trial prioritrization according to the status:
PENDING (trials that have not been tried) > PAUSED (trials that have been ran)
For trials with the same status, it chooses the ones with smaller resource lease
"""
for trial in trial_runner.get_trials():
if trial.status == Trial.PENDING:
return trial
min_paused_resource = np.inf
min_paused_resource_trial = None
for trial in trial_runner.get_trials():
# if there is a tie, prefer the earlier added ones
if trial.status == Trial.PAUSED and trial.resource_lease < min_paused_resource:
min_paused_resource = trial.resource_lease
min_paused_resource_trial = trial
if min_paused_resource_trial is not None:
return min_paused_resource_trial
class OnlineSuccessiveDoublingScheduler(OnlineScheduler):
"""Implementation of the OnlineSuccessiveDoublingScheduler.
Methods:
on_trial_result(trial_runner, trial, result)
Report result and return a decision on the trial's status
choose_trial_to_run(trial_runner)
Decide which trial to run next
"""
def __init__(self, increase_factor: float = 2.0):
'''
Args:
increase_factor (float): a multiplicative factor used to increase resource lease.
The default value is 2.0
'''
super().__init__()
self._increase_factor = increase_factor
def on_trial_result(self, trial_runner, trial: Trial, result: Dict):
"""Report result and return a decision on the trial's status
1. Returns TrialScheduler.CONTINUE (i.e., keep the trial running),
if the resource consumed has not reached the current resource_lease.s
2. otherwise double the current resource lease and return TrialScheduler.PAUSE
"""
if trial.result is None or trial.result.resource_used < trial.resource_lease:
return TrialScheduler.CONTINUE
else:
trial.set_resource_lease(trial.resource_lease * self._increase_factor)
logger.info('Doubled resource for trial %s, used: %s, current budget %s',
trial.trial_id, trial.result.resource_used, trial.resource_lease)
return TrialScheduler.PAUSE
class ChaChaScheduler(OnlineSuccessiveDoublingScheduler):
""" Keep the top performing learners running
Methods:
on_trial_result(trial_runner, trial, result)
Report result and return a decision on the trial's status
choose_trial_to_run(trial_runner)
Decide which trial to run next
"""
def __init__(self, increase_factor: float = 2.0, **kwargs):
'''
Args:
increase_factor: a multiplicative factor used to increase resource lease.
The default value is 2.0
'''
super().__init__(increase_factor)
self._keep_champion = kwargs.get('keep_champion', True)
self._keep_challenger_metric = kwargs.get('keep_challenger_metric', 'ucb')
self._keep_challenger_ratio = kwargs.get('keep_challenger_ratio', 0.5)
self._pause_old_froniter = kwargs.get('pause_old_froniter', False)
logger.info('Using chacha scheduler with config %s', kwargs)
def on_trial_result(self, trial_runner, trial: Trial, result: Dict):
"""Report result and return a decision on the trial's status
Make a decision according to: SuccessiveDoubling + champion check + performance check
"""
# Doubling scheduler makes a decision
decision = super().on_trial_result(trial_runner, trial, result)
# ***********Check whether the trial has been paused since a new champion is promoted**
# NOTE: This check is not enabled by default. Just keeping it for experimentation purpose.
## trial.is_checked_under_current_champion being False means the trial
# has not been paused since the new champion is promoted. If so, we need to
# tentatively pause it such that new trials can possiblly be taken into consideration
# NOTE: This may need to be changed. We need to do this because we only add trials.
# into the OnlineTrialRunner when there are avaialbe slots. Maybe we need to consider
# adding max_running_trial number of trials once a new champion is promoted.
if self._pause_old_froniter and not trial.is_checked_under_current_champion:
if decision == TrialScheduler.CONTINUE:
decision = TrialScheduler.PAUSE
trial.set_checked_under_current_champion(True)
logger.info('Tentitively set trial as paused')
# ****************Keep the champion always running******************
if self._keep_champion and trial.trial_id == trial_runner.champion_trial.trial_id and \
decision == TrialScheduler.PAUSE:
return TrialScheduler.CONTINUE
# ****************Keep the trials with top performance always running******************
if self._keep_challenger_ratio is not None:
if decision == TrialScheduler.PAUSE:
logger.debug('champion, %s', trial_runner.champion_trial.trial_id)
# this can be inefficient when the # trials is large. TODO: need to improve efficiency.
top_trials = trial_runner.get_top_running_trials(self._keep_challenger_ratio,
self._keep_challenger_metric)
logger.debug('top_learners: %s', top_trials)
if trial in top_trials:
logger.debug('top runner %s: set from PAUSE to CONTINUE', trial.trial_id)
return TrialScheduler.CONTINUE
return decision