Source code for mlresearch.metrics._metrics

import numpy as np
from sklearn.metrics import make_scorer
from sklearn.metrics._scorer import _Scorer, _SCORERS
from imblearn.metrics import geometric_mean_score


[docs] class ALScorer(_Scorer): """ Make an Active Learning scorer from a AL-specific metric or loss function. This factory class wraps scoring functions to be used in :class:`~rlearn.model_selection.ModelSearchCV` and :class:`~sklearn.model_selection.GridSearchCV`. It takes a score function, such as :func:`~mlresearch.metrics.area_under_learning_curve` or :func:`~mlresearch.metrics.data_utilization_rate` and is used to score an AL simulation. The signature of the call is `(estimator, X, y)` where `estimator` is the model to be evaluated, `X` is the data and `y` is the ground truth labeling (or `None` in the case of unsupervised models). Parameters ---------- score_func : callable Score function (or loss function) with signature ``score_func(y, y_pred, **kwargs)``. sign : int, default=1 Use 1 to keep the original variable's scale, use -1 to reverse the scale. Returns ------- scorer : callable Callable object that returns a scalar score. """ def __init__(self, score_func, sign=1, **kwargs): self._score_func = score_func self._sign = sign self._kwargs = kwargs def _score(self, method_caller, estimator, X, y_true, sample_weight=None): """Evaluate predicted target values for X relative to y_true. Parameters ---------- method_caller : callable Returns predictions given an estimator, method name, and other arguments, potentially caching results. estimator : object Trained estimator to use for scoring. Must have a predict_proba method; the output of that is used to compute the score. X : {array-like, sparse matrix} Test data that will be fed to estimator.predict. y_true : array-like Gold standard target values for X. sample_weight : array-like of shape (n_samples,), default=None Sample weights. Returns ------- score : float Score function applied to prediction of estimator on X. """ metadata = estimator.metadata_ return self._sign * self._score_func(metadata)
[docs] def set_score_request(self): """ Placeholder to overwrite sklearn's ``_BaseScorer.set_score_request`` function. It is not used and was raising a docstring error with scikit-learn v1.3.0. Note ---- This placeholder will be removed soon """ pass
[docs] def geometric_mean_score_macro(y_true, y_pred): """Geometric mean score with macro average.""" return geometric_mean_score(y_true, y_pred, average="macro")
[docs] def area_under_learning_curve(metadata, *args): """Area under the learning curve. Used in Active Learning experiments.""" iterations = np.sort([i for i in metadata.keys() if type(i) is int])[1:] test_scores = [metadata[i]["test_score"] for i in iterations] auc = np.sum(test_scores) / len(test_scores) return auc
[docs] def data_utilization_rate(metadata, threshold=0.8): """Data Utilization Rate. Used in Active Learning Experiments.""" iterations = np.sort([i for i in metadata.keys() if type(i) is int])[1:] test_scores = [metadata[i]["test_score"] for i in iterations] n_obs = metadata["data"][0].shape[0] data_utilization = [ metadata[i - 1]["labeled_pool"].sum() / n_obs for i in iterations ] indices = np.where(np.array(test_scores) >= threshold)[0] arg = indices[0] if len(indices) != 0 else -1 dur = data_utilization[arg] if arg != -1 else np.nan return dur
_SCORERS["geometric_mean_score_macro"] = make_scorer(geometric_mean_score_macro) _SCORERS["area_under_learning_curve"] = ALScorer(area_under_learning_curve) _SCORERS["data_utilization_rate"] = ALScorer(data_utilization_rate)