import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import pairwise_distances


def _augment(row):
    max, avg, id = row.max(), row.mean(), row.index[0]
    return row.apply(lambda x: (x, max, avg, id))


def construct_portfolio(regret_matrix, meta_features, regret_bound):
    """The portfolio construction algorithm.

    (Reference)[https://arxiv.org/abs/2202.09927].

    Args:
        regret_matrix: A dataframe of regret matrix.
        meta_features: None or a dataframe of metafeatures matrix.
            When set to None, the algorithm uses greedy strategy.
            Otherwise, the algorithm uses greedy strategy with feedback
            from the nearest neighbor predictor.
        regret_bound: A float of the regret bound.

    Returns:
        A list of configuration names.
    """
    configs = []
    all_configs = set(regret_matrix.index.tolist())
    tasks = regret_matrix.columns
    # pre-processing
    if meta_features is not None:
        scaler = RobustScaler()
        meta_features = meta_features.loc[tasks]
        meta_features.loc[:, :] = scaler.fit_transform(meta_features)
        nearest_task = {}
        for t in tasks:
            other_meta_features = meta_features.drop(t)
            dist = pd.DataFrame(
                pairwise_distances(
                    meta_features.loc[t].to_numpy().reshape(1, -1),
                    other_meta_features,
                    metric="l2",
                ),
                columns=other_meta_features.index,
            )
            nearest_task[t] = dist.idxmin(axis=1)
        regret_matrix = regret_matrix.apply(_augment, axis=1)
        print(regret_matrix)

    def loss(configs):
        """Loss of config set `configs`, according to nearest neighbor config predictor."""
        if meta_features is not None:
            r = []
            best_config_per_task = regret_matrix.loc[configs, :].min()
            for t in tasks:
                config = best_config_per_task[nearest_task[t]].iloc[0][-1]
                r.append(regret_matrix[t][config][0])
        else:
            r = regret_matrix.loc[configs].min()
        excessive_regret = (np.array(r) - regret_bound).clip(min=0).sum()
        avg_regret = np.array(r).mean()
        return excessive_regret, avg_regret

    prev = np.inf
    i = 0
    eps = 1e-5
    while True:
        candidates = [configs + [d] for d in all_configs.difference(configs)]
        losses, avg_regret = tuple(zip(*(loss(x) for x in candidates)))
        sorted_losses = np.sort(losses)
        if sorted_losses[1] - sorted_losses[0] < eps:
            minloss = np.nanmin(losses)
            print(
                f"tie detected at loss = {sorted_losses[0]}, using alternative metric."
            )
            tied = np.flatnonzero(losses - minloss < eps)
            losses = [(avg_regret[i], i) for i in tied]
            minloss, ind = min(losses)
            if minloss > prev - eps:
                print(
                    f"May be overfitting at k = {i + 1}, current = {minloss:.5f}, "
                    f"prev = {prev:.5f}. Stopping."
                )
                break
            configs = candidates[ind]
            prev = minloss
        else:
            configs = candidates[np.nanargmin(losses)]
        i += 1
        if sorted_losses[0] <= eps:
            print(
                f"Reached target regret bound of {regret_bound}! k = {i}. Declining to pick further!"
            )
            break

    return configs