autogen/flaml/default/greedy.py

import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import pairwise_distances


def _augment(row):
    max, avg, id = row.max(), row.mean(), row.index[0]
    return row.apply(lambda x: (x, max, avg, id))


def construct_portfolio(regret_matrix, meta_features, regret_bound):
    """The portfolio construction algorithm.

    (Reference)[https://arxiv.org/abs/2202.09927].

    Args:
        regret_matrix: A dataframe of regret matrix.
        meta_features: None or a dataframe of metafeatures matrix.
            When set to None, the algorithm uses greedy strategy.
            Otherwise, the algorithm uses greedy strategy with feedback
            from the nearest neighbor predictor.
        regret_bound: A float of the regret bound.

    Returns:
        A list of configuration names.
    """
    configs = []
    all_configs = set(regret_matrix.index.tolist())
    tasks = regret_matrix.columns
    # pre-processing
    if meta_features is not None:
        scaler = RobustScaler()
        meta_features = meta_features.loc[tasks]
        meta_features.loc[:, :] = scaler.fit_transform(meta_features)
        nearest_task = {}
        for t in tasks:
            other_meta_features = meta_features.drop(t)
            dist = pd.DataFrame(
                pairwise_distances(
                    meta_features.loc[t].to_numpy().reshape(1, -1),
                    other_meta_features,
                    metric="l2",
                ),
                columns=other_meta_features.index,
            )
            nearest_task[t] = dist.idxmin(axis=1)
        regret_matrix = regret_matrix.apply(_augment, axis=1)
        print(regret_matrix)

    def loss(configs):
        """Loss of config set `configs`, according to nearest neighbor config predictor."""
        if meta_features is not None:
            r = []
            best_config_per_task = regret_matrix.loc[configs, :].min()
            for t in tasks:
                config = best_config_per_task[nearest_task[t]].iloc[0][-1]
                r.append(regret_matrix[t][config][0])
        else:
            r = regret_matrix.loc[configs].min()
        excessive_regret = (np.array(r) - regret_bound).clip(min=0).sum()
        avg_regret = np.array(r).mean()
        return excessive_regret, avg_regret

    prev = np.inf
    i = 0
    eps = 1e-5
    while True:
        candidates = [configs + [d] for d in all_configs.difference(configs)]
        losses, avg_regret = tuple(zip(*(loss(x) for x in candidates)))
        sorted_losses = np.sort(losses)
        if sorted_losses[1] - sorted_losses[0] < eps:
            minloss = np.nanmin(losses)
            print(
                f"tie detected at loss = {sorted_losses[0]}, using alternative metric."
            )
            tied = np.flatnonzero(losses - minloss < eps)
            losses = [(avg_regret[i], i) for i in tied]
            minloss, ind = min(losses)
            if minloss > prev - eps:
                print(
                    f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, "
                    f"prev = {prev:.5f}. Stopping."
                )
                break
            configs = candidates[ind]
            prev = minloss
        else:
            configs = candidates[np.nanargmin(losses)]
        i += 1
        if sorted_losses[0] <= eps:
            print(
                f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!"
            )
            break

    return configs
Zero-shot AutoML (#468) * Prepare for release Co-authored-by: Moe Kayali <t-moekayali@microsoft.com> * bug fix * improve doc and code quality Co-authored-by: Qingyun Wu 2022-03-01 15:39:09 -08:00			`import numpy as np`
			`import pandas as pd`
			`from sklearn.preprocessing import RobustScaler`
			`from sklearn.metrics import pairwise_distances`


			`def _augment(row):`
			`max, avg, id = row.max(), row.mean(), row.index[0]`
			`return row.apply(lambda x: (x, max, avg, id))`


			`def construct_portfolio(regret_matrix, meta_features, regret_bound):`
			`"""The portfolio construction algorithm.`

			`(Reference)[https://arxiv.org/abs/2202.09927].`

			`Args:`
			`regret_matrix: A dataframe of regret matrix.`
			`meta_features: None or a dataframe of metafeatures matrix.`
			`When set to None, the algorithm uses greedy strategy.`
			`Otherwise, the algorithm uses greedy strategy with feedback`
			`from the nearest neighbor predictor.`
			`regret_bound: A float of the regret bound.`

			`Returns:`
			`A list of configuration names.`
			`"""`
			`configs = []`
			`all_configs = set(regret_matrix.index.tolist())`
			`tasks = regret_matrix.columns`
			`# pre-processing`
			`if meta_features is not None:`
			`scaler = RobustScaler()`
			`meta_features = meta_features.loc[tasks]`
			`meta_features.loc[:, :] = scaler.fit_transform(meta_features)`
			`nearest_task = {}`
			`for t in tasks:`
			`other_meta_features = meta_features.drop(t)`
			`dist = pd.DataFrame(`
			`pairwise_distances(`
			`meta_features.loc[t].to_numpy().reshape(1, -1),`
			`other_meta_features,`
			`metric="l2",`
			`),`
			`columns=other_meta_features.index,`
			`)`
			`nearest_task[t] = dist.idxmin(axis=1)`
			`regret_matrix = regret_matrix.apply(_augment, axis=1)`
			`print(regret_matrix)`

			`def loss(configs):`
			"""Loss of config set `configs`, according to nearest neighbor config predictor."""
			`if meta_features is not None:`
			`r = []`
			`best_config_per_task = regret_matrix.loc[configs, :].min()`
			`for t in tasks:`
			`config = best_config_per_task[nearest_task[t]].iloc[0][-1]`
			`r.append(regret_matrix[t][config][0])`
			`else:`
			`r = regret_matrix.loc[configs].min()`
			`excessive_regret = (np.array(r) - regret_bound).clip(min=0).sum()`
			`avg_regret = np.array(r).mean()`
			`return excessive_regret, avg_regret`

			`prev = np.inf`
			`i = 0`
			`eps = 1e-5`
			`while True:`
			`candidates = [configs + [d] for d in all_configs.difference(configs)]`
			`losses, avg_regret = tuple(zip(*(loss(x) for x in candidates)))`
			`sorted_losses = np.sort(losses)`
			`if sorted_losses[1] - sorted_losses[0] < eps:`
			`minloss = np.nanmin(losses)`
			`print(`
			`f"tie detected at loss = {sorted_losses[0]}, using alternative metric."`
			`)`
			`tied = np.flatnonzero(losses - minloss < eps)`
			`losses = [(avg_regret[i], i) for i in tied]`
			`minloss, ind = min(losses)`
			`if minloss > prev - eps:`
			`print(`
			`f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, "`
			`f"prev = {prev:.5f}. Stopping."`
			`)`
			`break`
			`configs = candidates[ind]`
			`prev = minloss`
			`else:`
			`configs = candidates[np.nanargmin(losses)]`
			`i += 1`
			`if sorted_losses[0] <= eps:`
			`print(`
			`f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!"`
			`)`
			`break`

			`return configs`