"""Classification metrics w/ a rejection option."""
__all__ = (
"get_scoring",
"predict_accept_confusion_matrix",
"predict_reject_accuracy_score",
"predict_reject_recall_score",
)
import functools
import warnings
from sklearn.metrics import (
accuracy_score,
confusion_matrix,
get_scorer,
get_scorer_names,
make_scorer,
zero_one_loss,
)
from sklearn.utils import check_consistent_length, column_or_1d
from sklearn.utils.multiclass import type_of_target
import numpy as np
from ..core import array as ska
from ..utils._legacy import (
Interval,
Real,
StrOptions,
validate_params,
)
from ._common import prediction_quality
[docs]@validate_params(
{
"y_true": ["array-like"],
"y_pred": [ska.FBNDArray],
"labels": ["array-like", None],
"sample_weight": ["array-like", None],
"normalize": [StrOptions({"true", "pred", "all"}), None],
},
prefer_skip_nested_validation=True,
)
def predict_accept_confusion_matrix(
y_true,
y_pred,
labels=None,
sample_weight=None,
normalize=None,
):
"""Computes confusion matrix w/ rows as accuracy and columns as acceptance.
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.
y_pred : FBNDArray of shape (n_samples,)
Estimated targets as returned by both a rejector and a classifier.
labels : array-like of shape (2,), default=None
List of labels to index the matrix. This may be used to reorder
or select a subset of labels.
If ``None`` is given, those that appear at least once
in ``y_true`` or ``y_pred`` are used in sorted order.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
normalize : {'true', 'pred', 'all'}, default=None
Normalizes confusion matrix over the true (rows), predicted (columns)
conditions or all the population. If None, confusion matrix will not be
normalized.
Returns
-------
C : ndarray of shape (2, 2)
TR (true-reject) FA (false-accept)
FR (false-reject) TA (true-accept)
See Also
--------
sklearn.metrics.confusion_matrix : True vs Predicted confusion matrix.
Examples
--------
>>> import numpy as np
>>> from skfb.core import array as ska
>>> from skfb.metrics import predict_accept_confusion_matrix
>>> y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
>>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
... [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
>>> cm = predict_accept_confusion_matrix(y_true=y_true, y_pred=y_pred)
>>> cm
array([[1, 2],
[3, 4]])
"""
y_correct = y_true == y_pred
y_accepted = y_pred.get_dense_neg_fallback_mask()
return confusion_matrix(
y_correct,
y_accepted,
labels=labels,
sample_weight=sample_weight,
normalize=normalize,
)
def _check_targets(y_true, y_pred):
"""Check that ``y_true`` and ``y_pred`` belong to the same classification task.
This converts multiclass or binary types to a common shape, and raises a
ValueError for a mix of multilabel and multiclass targets, a mix of
multilabel formats, for the presence of continuous-valued or multioutput
targets, or for targets of different lengths.
Column vectors are squeezed to 1d, while multilabel formats are returned
as CSR sparse label indicators.
Parameters
----------
y_true : array-like
True labels.
y_pred : array-like
Predictions.
Returns
-------
type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}
The type of the true target data, as output by
``utils.multiclass.type_of_target``.
y_true : array or indicator matrix
y_pred : array or indicator matrix
"""
check_consistent_length(y_true, y_pred)
type_true = type_of_target(y_true)
type_comb = type_of_target(y_pred)
y_type = {type_true, type_comb}
if y_type == {"binary", "multiclass"}:
y_type = {"multiclass"}
if len(y_type) > 1:
raise ValueError(
f"Classification metrics can't handle a mix of "
f"{type_true} and {type_comb} targets"
)
y_type = y_type.pop()
if y_type not in {"binary", "multiclass"}:
raise ValueError(f"{y_type} is not supported")
if y_type in {"binary", "multiclass"}:
y_true = column_or_1d(y_true)
if y_type == "binary":
try:
unique_values = np.union1d(y_true, y_pred)
except TypeError as e:
# We expect y_true and y_pred to be of the same data type.
# If `y_true` was provided to the classifier as strings,
# `y_pred` given by the classifier will also be encoded with
# strings. So we raise a meaningful error
raise TypeError(
"Labels in y_true and y_pred should be of the same type. "
f"Got y_true={np.unique(y_true)} and "
f"y_pred={np.unique(y_pred)}. Make sure that the "
"predictions provided by the classifier coincides with "
"the true labels."
) from e
if unique_values.shape[0] > 2:
y_type = "multiclass"
return y_type, y_true, y_pred
[docs]@validate_params(
{
"y_true": ["array-like"],
"y_pred": [ska.FBNDArray],
},
prefer_skip_nested_validation=True,
)
def predict_reject_accuracy_score(y_true, y_pred):
"""Calculates the ratio of true acceptance and rejection to all predictions.
Parameters
----------
y_true : array-like
True labels.
y_pred : FBNDarray
Base estimator predictions w/ fallback mask.
Returns
-------
score : float
(TA + TR) / (TA + TR + FA + FR)
Examples
--------
>>> import numpy as np
>>> from skfb.core import array as ska
>>> from skfb.metrics import predict_reject_accuracy_score
>>> y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
>>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
... [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
>>> predict_reject_accuracy_score(y_true, y_pred)
np.float64(0.5)
"""
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
if y_type.startswith("multilabel"):
raise ValueError("Multilabel outputs are not supported.")
reject_mask = y_pred.get_dense_fallback_mask()
accept_mask = ~reject_mask
true_accept = sum(y_true[accept_mask] == y_pred[accept_mask])
true_reject = sum(y_true[reject_mask] != y_pred[reject_mask])
false_accept = sum(y_true[accept_mask] != y_pred[accept_mask])
false_reject = sum(y_true[reject_mask] == y_pred[reject_mask])
try:
return (true_accept + true_reject) / (
true_accept + true_reject + false_accept + false_reject
)
except ZeroDivisionError:
warnings.warn(
"invalid value encountered in scalar divide",
category=RuntimeError,
)
return np.nan
[docs]@validate_params(
{
"y_true": ["array-like"],
"y_pred": [ska.FBNDArray],
"beta": [Interval(Real, left=0.0, right=1.0, closed="both")],
},
prefer_skip_nested_validation=True,
)
def predict_reject_recall_score(y_true, y_pred, beta=0.5):
"""Calculates weighted average of prediction and rejection recalls.
Parameters
----------
y_true : array-like
True labels.
y_pred : FBNDarray
Base estimator predictions w/ fallback mask.
beta : float, default=0.5
The weight of prediction recall.
Returns
-------
score : float
TA / (TA + FR) * beta + TR / (TR + FA) * (1 - beta)
Examples
--------
>>> import numpy as np
>>> from skfb.core import array as ska
>>> from skfb.metrics import predict_reject_recall_score
>>> y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
>>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
... [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
>>> # TR = 1, FA = 2, TA = 4, FR = 3
>>> predict_reject_recall_score(y_true, y_pred, beta=0.75)
0.5119...
"""
y_type, y_true, y_pred = _check_targets(y_true, y_pred)
if y_type.startswith("multilabel"):
raise ValueError("Multilabel outputs are not supported.")
cm = predict_accept_confusion_matrix(y_true, y_pred)
ta, tr, fa, fr = cm[1, 1], cm[0, 0], cm[0, 1], cm[1, 0]
return ta / (ta + fr) * beta + tr / (tr + fa) * (1 - beta)
def error_rejection_loss(
y_true,
y_prob,
*,
thresholds,
y_pred=None,
score_func=None,
class_weight=None,
):
"""Computes weighted combination of rejection probabilities and prediction error.
First, for evey class from ``y_pred``, compute the fallback rate, then take weighted
average of fallback rates. Second, add prediction error to the obtained score.
Parameters
----------
y_true : array-like of shape (n_samples,)
True labels.
y_prob : array-like of shape (n_samples, n_classes)
Predicted probabilities.
Defaults to zero-one loss.
thresholds : array-like of shape (n_classes,)
Local (class-wise) thresholds.
y_pred : array-like of shape (n_samples,), default=None
Predicted classes or rejections.
``score_func`` can accept either ``y_prob`` or ``y_pred``.
score_func : callable, default=None
Prediction error scorer.
Defaults to ``sklearn.metrics.zero_one_loss``. If ``y_pred`` is also None,
calculates 0-1 loss between ``y_true`` and hard predictions w/o fallbacks.
Keep in mind that greater values mean higher overall loss.
class_weight : dict, default=None
Mapping from classes and fallback label to weights for weighted average of
losses. If None, defaults to classes from 0 to len(classes) mapped into
uniform proba.
Returns
-------
float : prediction-error--fallback-rate loss
Raises
------
ValueError:
If all ``thresholds``, ``class_weight``, and ``classes`` are None.
"""
# region Validate and set defaults to threshold, classes, and class weights
if class_weight is None:
n_classes = len(thresholds)
class_weight = np.array([1 / (n_classes + 1)] * (n_classes + 1))
# endregion
# region Validate scoring function
if score_func is None:
score_func = zero_one_loss
if y_pred is None:
y_pred = y_prob.argmax(axis=1)
elif isinstance(y_pred, ska.FBNDArray):
score_func = make_scorer(
predict_accept_confusion_matrix, greater_is_better=False
)
# endregion
pred_class_mask = y_prob == y_prob.max(axis=1).reshape(-1, 1)
fallback_mask = np.less(y_prob, thresholds)
mask = pred_class_mask & fallback_mask
score = mask.sum(axis=0) / pred_class_mask.sum(axis=0)
score = sum(score * class_weight[:-1])
if y_pred is None:
error = score_func(y_true, y_prob)
else:
error = score_func(y_true, y_pred)
score += error * class_weight[-1]
return score
@validate_params(
{
"scoring": [callable, StrOptions(set(get_scorer_names())), None],
"fallback_mode": [StrOptions({"store", "return"})],
},
prefer_skip_nested_validation=True,
)
def get_scoring(scoring=None, fallback_label=-1, fallback_mode="store"):
"""Returns the default scorer if it's None; otherwise, ``scoring`` itself."""
if scoring is None:
if fallback_mode == "store":
scoring = predict_reject_accuracy_score
else:
scoring = functools.partial(
prediction_quality,
score_func=accuracy_score,
fallback_label=fallback_label,
)
elif isinstance(scoring, str):
scoring = get_scorer(scoring)
return scoring