Source code for mlresearch.metrics._metrics
import numpy as np
from sklearn.metrics import make_scorer
from sklearn.metrics._scorer import _Scorer, _SCORERS
from imblearn.metrics import geometric_mean_score
[docs]
class ALScorer(_Scorer):
"""
Make an Active Learning scorer from a AL-specific metric or loss function.
This factory class wraps scoring functions to be used in
:class:`~rlearn.model_selection.ModelSearchCV` and
:class:`~sklearn.model_selection.GridSearchCV`. It takes a score function, such as
:func:`~mlresearch.metrics.area_under_learning_curve` or
:func:`~mlresearch.metrics.data_utilization_rate` and is used to score an AL
simulation. The signature of the call is `(estimator, X, y)` where `estimator` is the
model to be evaluated, `X` is the data and `y` is the ground truth labeling (or
`None` in the case of unsupervised models).
Parameters
----------
score_func : callable
Score function (or loss function) with signature
``score_func(y, y_pred, **kwargs)``.
sign : int, default=1
Use 1 to keep the original variable's scale, use -1 to reverse the scale.
Returns
-------
scorer : callable
Callable object that returns a scalar score.
"""
def __init__(self, score_func, sign=1, **kwargs):
self._score_func = score_func
self._sign = sign
self._kwargs = kwargs
def _score(self, method_caller, estimator, X, y_true, sample_weight=None):
"""Evaluate predicted target values for X relative to y_true.
Parameters
----------
method_caller : callable
Returns predictions given an estimator, method name, and other
arguments, potentially caching results.
estimator : object
Trained estimator to use for scoring. Must have a predict_proba
method; the output of that is used to compute the score.
X : {array-like, sparse matrix}
Test data that will be fed to estimator.predict.
y_true : array-like
Gold standard target values for X.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
Returns
-------
score : float
Score function applied to prediction of estimator on X.
"""
metadata = estimator.metadata_
return self._sign * self._score_func(metadata)
[docs]
def set_score_request(self):
"""
Placeholder to overwrite sklearn's ``_BaseScorer.set_score_request`` function.
It is not used and was raising a docstring error with scikit-learn v1.3.0.
Note
----
This placeholder will be removed soon
"""
pass
[docs]
def geometric_mean_score_macro(y_true, y_pred):
"""Geometric mean score with macro average."""
return geometric_mean_score(y_true, y_pred, average="macro")
[docs]
def area_under_learning_curve(metadata, *args):
"""Area under the learning curve. Used in Active Learning experiments."""
iterations = np.sort([i for i in metadata.keys() if type(i) is int])[1:]
test_scores = [metadata[i]["test_score"] for i in iterations]
auc = np.sum(test_scores) / len(test_scores)
return auc
[docs]
def data_utilization_rate(metadata, threshold=0.8):
"""Data Utilization Rate. Used in Active Learning Experiments."""
iterations = np.sort([i for i in metadata.keys() if type(i) is int])[1:]
test_scores = [metadata[i]["test_score"] for i in iterations]
n_obs = metadata["data"][0].shape[0]
data_utilization = [
metadata[i - 1]["labeled_pool"].sum() / n_obs for i in iterations
]
indices = np.where(np.array(test_scores) >= threshold)[0]
arg = indices[0] if len(indices) != 0 else -1
dur = data_utilization[arg] if arg != -1 else np.nan
return dur
_SCORERS["geometric_mean_score_macro"] = make_scorer(geometric_mean_score_macro)
_SCORERS["area_under_learning_curve"] = ALScorer(area_under_learning_curve)
_SCORERS["data_utilization_rate"] = ALScorer(data_utilization_rate)