Source code for research.active_learning._selection_methods

"""
Selection criteria to be used along with the ALWrapper object.
"""
import numpy as np


[docs]def entropy(unlabeled_ids, increment, probabilities, **kwargs): """ Sample selection based on Entropy selection criterion. Parameters ---------- unlabeled_ids : array-like of shape (n_samples,) Indices of the unlabeled samples in the original (unlabeled training) dataset. increment : int Number of observations to select. probabilities : array-like of shape (n_samples, n_classes) Class probabilities of the input samples belonging to the unlabeled dataset. Returns ------- new_ids : array of shape (increment,) Indices of unlabeles samples to be added to the labeled training dataset. """ e = (-probabilities * np.log2(probabilities)).sum(axis=1) new_ids = unlabeled_ids[np.argsort(e)[::-1][:increment]] return new_ids
[docs]def breaking_ties(unlabeled_ids, increment, probabilities, **kwargs): """ Sample selection based on breaking ties selection criterion. Selects samples as a smallest difference of probability values between the first and second most likely classes Parameters ---------- unlabeled_ids : array-like of shape (n_samples,) Indices of the unlabeled samples in the original (unlabeled training) dataset. increment : int Number of observations to select. probabilities : array-like of shape (n_samples, n_classes) Class probabilities of the input samples belonging to the unlabeled dataset. Returns ------- new_ids : array of shape (increment,) Indices of unlabeles samples to be added to the labeled training dataset. """ probs_sorted = np.sort(probabilities, axis=1)[:, ::-1] values = probs_sorted[:, 0] - probs_sorted[:, 1] new_ids = unlabeled_ids[np.argsort(values)[:increment]] return new_ids
[docs]def random(unlabeled_ids, increment, random_state=None, **kwargs): """ Random sample selection. Parameters ---------- unlabeled_ids : array-like of shape (n_samples,) Indices of the unlabeled samples in the original (unlabeled training) dataset. increment : int Number of observations to select. random_state : int, RandomState instance, default=None Control the randomization of the algorithm. - If int, ``random_state`` is the seed used by the random number generator; - If ``RandomState`` instance, random_state is the random number generator; - If ``None``, the random number generator is the ``RandomState`` instance used by ``np.random``. Returns ------- new_ids : array of shape (increment,) Indices of unlabeles samples to be added to the labeled training dataset. """ rng = np.random.RandomState(random_state) new_ids = rng.choice(unlabeled_ids, increment, replace=False) return new_ids
SELECTION_CRITERIA = dict( entropy=entropy, breaking_ties=breaking_ties, random=random )