Source code for selector.hp_point_selection

"""This module contains point selection functions."""
import numpy as np
import copy
import itertools
from selector.pool import ParamType


[docs]def get_relatives(suggested):
    """
    Get information of relations of suggested points by generator tag.

    Parameters
    ----------
    suggested : list of selector.pool.Configuration
        List of suggested points.

    Returns
    -------
    ndarray
        Nested array, indices of related points (by selector.pool.Generator).
    """
    relatives = []
    for s in suggested:
        gen_type = s.generator
        index_list = []
        index_list = [idx for idx, sugg in enumerate(suggested)
                      if sugg != s and sugg.generator == gen_type]
        relatives.append(index_list)

    return np.array(relatives, dtype=object)


[docs]def distance_stats(smfeatures, distances):
    """
    Compute distance statistics.

    Parameters
    ----------
    suggested : list of selector.pool.Configuration
        List of suggested configurations.
    distances : list
        Distance values.

    Returns
    -------
    ndarray
        New features for simulation.
    """
    smflen = len(smfeatures[0])
    smfeatures = np.hstack((smfeatures, np.mean(distances, axis=1).reshape(
                            len(distances), 1)))
    smfeatures = np.hstack((smfeatures, np.mean(distances * distances,
                            axis=1).reshape(len(distances), 1)))
    smfeatures = np.hstack((smfeatures, np.std(distances, axis=1).reshape(
                            len(distances), 1)))
    mindist = np.min(distances, axis=1)
    smfeatures = np.hstack((smfeatures, (smfeatures[:, smflen] -
                            mindist).reshape(len(distances), 1)))

    return smfeatures


[docs]def simulation(suggested, features, max_evals, selected_points, weights,
               npoints, distances, relatives):
    """
    Run simulations of config selection.

    Parameters
    ----------
    suggested : list
        List of configs/points to select from.
    features : list
        Nested list, features of configs/points.
    max_eval : int
        Number of simulation runs per selected point.
    selected_points : list
        Indices of configurations selected so far in the simulations.
    weights: ndarray
        Weights for the scoring function.
    npoints : int
        Number of configurations to select
    distances : ndarray
        Distance features between the configuraions.
    relatives : ndarray
        Indices of relative configurations.

    Returns
    -------
    ndarray
        How often configs/points were selected in the simulation.
    """
    sugg = list(range(len(suggested)))
    sfreq = np.zeros(len(sugg))

    for evaluation in range(max_evals):
        smsel = copy.copy(selected_points)
        smsugg = copy.copy(sugg)
        smfeatures = copy.copy(features)
        smweights = copy.copy(weights)
        smdistances = copy.copy(distances)

        for selpoint in range(len(selected_points), npoints):

            # After the first point is chosen
            if selpoint > 0:
                # Diversity features to selected points
                simseldist = smdistances[:, smsel]
                smfeatures = distance_stats(smfeatures, simseldist)

            rel_sel = list(itertools.chain.from_iterable(relatives[sel]
                                                         for sel in smsel))
            if rel_sel:
                # Diversity features to selected and related points
                simrelseldist = smdistances[:, rel_sel]
                smfeatures = distance_stats(smfeatures, simrelseldist)

            # Min-max normalization
            minf = np.min(smfeatures, axis=0)
            maxf = np.max(smfeatures, axis=0)
            diff = maxf - minf
            eq = np.where(minf == maxf)[0]
            ge = np.setdiff1d(np.arange(smfeatures.shape[1]), eq,
                              assume_unique=True)
            smfeatures[:, ge] = (smfeatures[:, ge] - minf[ge]) / diff[ge]
            # set no variance features to 0, except for the first
            smfeatures[:, eq[1:]] = 0

            # Probability distribution based on scores
            s_w = 1.0 / (1.0 + np.exp(np.sum(smfeatures *
                         smweights[:, 0:len(smfeatures[0])], axis=1)))

            # Scores based on probability distribution
            scores = np.maximum(0, np.minimum(1, s_w))

            # Select with probability according to scores
            if np.sum(scores) > 0:
                selprob = scores / np.sum(scores)
                selected = np.random.choice(smsugg, 1, p=selprob.tolist())[0]
                selected_idx = smsugg.index(selected)
            else:
                selected = np.random.choice(smsugg, 1)[0]
                selected_idx = smsugg.index(selected)

            # Update frequency of selections
            sfreq[selected] += 1

            # Update point selection within simulation run
            smsel.append(selected_idx)

            # Make sure selected points cannot be selected again in simulation
            del smsugg[selected_idx]
            smfeatures = np.delete(smfeatures, selected_idx, axis=0)
            smweights = np.delete(smweights, selected_idx, axis=0)
            smdistances = np.delete(smdistances, selected_idx, axis=0)
            smfeatures = copy.copy(features[0:len(smfeatures)])

    return sfreq


[docs]def normalize_plus_cond_acc(sugg, s):
    """
    Normalize and account for conditionals.

    Parameters
    ----------
    sugg : list of selector.pool.Configuration
        Suggested configurations.
    s : selector.scenario.Scenario
        AC scenario.

    Returns
    -------
    list
        Suggested configuration with normalized and adjusted values.
    """
    maximums = {}
    cat_params = []
    if isinstance(s, list):
        psetting = s
    else:
        psetting = s.parameter

    for param in psetting:
        if param.type == ParamType.categorical:
            if len(param.bound) > 2:
                if isinstance(param.bound[0], (str, np.str_)):
                    maximums[param.name] = len(param.bound)
                else:
                    maximums[param.name] = \
                        float(param.bound[len(param.bound) - 1])
            else:
                maximums[param.name] = 1
            cat_params.append(param.name)
        else:
            maximums[param.name] = param.bound[len(param.bound) - 1]

    for point in sugg:
        for key, _ in point.conf.items():
            if key in cat_params:
                if point.conf[key] is True:
                    point.conf[key] = 1
                else:
                    point.conf[key] = 0

    for key, val in maximums.items():
        for point in sugg:
            if key in point.conf:
                if point.conf[key] is None:
                    pass
            elif key in point.conf and maximums[key] > 0:
                point.conf[key] = point.conf[key] / maximums[key]
            elif key in point.conf and maximums[key] < 0:
                point.conf[key] = maximums[key] / point.conf[key]
            elif key not in point.conf:
                point.conf[key] = None

    return sugg


[docs]def pairwise_distances(sugg_i, sugg_j):
    """
    Compute pairwise distances.

    Parameters
    ----------
    sugg_i : list
        Configuration values for the first set.
    sugg_j : list
        Configuration values for the second set.

    Returns
    -------
    ndarray
        Pairwise distances between the configurations.
    """
    m = np.zeros((len(sugg_i), len(sugg_j)))
    for i, s_i in enumerate(sugg_i):
        for j, s_j in enumerate(sugg_j):
            s = 0
            for key in s_i.conf:
                if (s_i.conf[key] is None and s_j.conf[key]
                        is not None) or \
                        (s_i.conf[key] is not None and s_j.conf[key]
                            is None):
                    s = s + 1
                elif (s_i.conf[key] is None and s_j.conf[key]
                        is None):
                    s = s + 0
                else:
                    if isinstance(s_i.conf[key], str) or \
                            isinstance(s_j.conf[key], str):
                        s = s + (float(s_i.conf[key]) - float(s_j.conf[key]))**2
                    else:
                        s = s + (s_i.conf[key] - s_j.conf[key])**2

            m[i, j] = s**0.5

    return np.array(m)


[docs]def select_point(scenario, suggested, max_evals, npoints, pool, epoch,
                 max_epoch, features, weights, seed):
    """
    Generate features and run simulation.

    Parameters
    ----------
    s : selector.scenario.Scenario
        AC scenario.
    suggested : list
        List of configs/points to select from.
    max_eval : int
        Number of simulation runs per selected point.
    npoints : int
        Number of configs/points requested.
    pool : list
        List of configs/points to select from.
    epoch : int
        Current epoch.
    max_epoch : int
        Total number of epochs.
    features : ndarray (n_suggestions, n_features)
        Features computed for each suggested configuration.
    weights: ndarray (n_suggestions, n_features)
        Preset weights for the scoring function of the selection mechanism,
    seed: int
        Random seed.

    Returns
    -------
    list
        IDs of selected configs/points.
    """
    if seed:
        np.random.seed(seed)

    relatives = get_relatives(suggested)

    suggested_intact = copy.copy(suggested)

    # Not all points have values for conditional params. In order to
    # compute matching feature vectors, we omit conditional params.
    sugg = copy.deepcopy(suggested)
    sugg = normalize_plus_cond_acc(sugg, scenario)

    distances = pairwise_distances(sugg, sugg)

    selected_points = []
    smselected_points = []

    # Run simulation for every point requested
    for psel in range(npoints):

        sfreq = simulation(suggested, features, max_evals, smselected_points,
                           weights, npoints, distances, relatives)
        sidx = np.argmax(sfreq)
        selected_points.append(suggested_intact[sidx])
        del suggested_intact[sidx]
        smselected_points.append(sidx)
        del suggested[sidx]
        weights = np.delete(weights, sidx, axis=0)
        features = np.delete(features, sidx, axis=0)
        distances = np.delete(distances, sidx, axis=0)

    return selected_points