Source code for research.active_learning._selection_methods
"""
Selection criteria to be used along with the ALWrapper object.
"""
import numpy as np
[docs]def entropy(unlabeled_ids, increment, probabilities, **kwargs):
"""
Sample selection based on Entropy selection criterion.
Parameters
----------
unlabeled_ids : array-like of shape (n_samples,)
Indices of the unlabeled samples in the original (unlabeled training)
dataset.
increment : int
Number of observations to select.
probabilities : array-like of shape (n_samples, n_classes)
Class probabilities of the input samples belonging to the unlabeled
dataset.
Returns
-------
new_ids : array of shape (increment,)
Indices of unlabeles samples to be added to the labeled training
dataset.
"""
e = (-probabilities * np.log2(probabilities)).sum(axis=1)
new_ids = unlabeled_ids[np.argsort(e)[::-1][:increment]]
return new_ids
[docs]def breaking_ties(unlabeled_ids, increment, probabilities, **kwargs):
"""
Sample selection based on breaking ties selection criterion.
Selects samples as a smallest difference of probability values
between the first and second most likely classes
Parameters
----------
unlabeled_ids : array-like of shape (n_samples,)
Indices of the unlabeled samples in the original (unlabeled training)
dataset.
increment : int
Number of observations to select.
probabilities : array-like of shape (n_samples, n_classes)
Class probabilities of the input samples belonging to the unlabeled
dataset.
Returns
-------
new_ids : array of shape (increment,)
Indices of unlabeles samples to be added to the labeled training
dataset.
"""
probs_sorted = np.sort(probabilities, axis=1)[:, ::-1]
values = probs_sorted[:, 0] - probs_sorted[:, 1]
new_ids = unlabeled_ids[np.argsort(values)[:increment]]
return new_ids
[docs]def random(unlabeled_ids, increment, random_state=None, **kwargs):
"""
Random sample selection.
Parameters
----------
unlabeled_ids : array-like of shape (n_samples,)
Indices of the unlabeled samples in the original (unlabeled training)
dataset.
increment : int
Number of observations to select.
random_state : int, RandomState instance, default=None
Control the randomization of the algorithm.
- If int, ``random_state`` is the seed used by the random number
generator;
- If ``RandomState`` instance, random_state is the random number
generator;
- If ``None``, the random number generator is the ``RandomState``
instance used by ``np.random``.
Returns
-------
new_ids : array of shape (increment,)
Indices of unlabeles samples to be added to the labeled training
dataset.
"""
rng = np.random.RandomState(random_state)
new_ids = rng.choice(unlabeled_ids, increment, replace=False)
return new_ids
SELECTION_CRITERIA = dict(
entropy=entropy,
breaking_ties=breaking_ties,
random=random
)