Source code for skfb.metrics._classification

"""Classification metrics w/ a rejection option."""

__all__ = (
    "get_scoring",
    "predict_accept_confusion_matrix",
    "predict_reject_accuracy_score",
    "predict_reject_recall_score",
)

import functools
import warnings

from sklearn.metrics import (
    accuracy_score,
    confusion_matrix,
    get_scorer,
    get_scorer_names,
    make_scorer,
    zero_one_loss,
)

from sklearn.utils import check_consistent_length, column_or_1d
from sklearn.utils.multiclass import type_of_target

import numpy as np

from ..core import array as ska
from ..utils._legacy import (
    Interval,
    Real,
    StrOptions,
    validate_params,
)
from ._common import prediction_quality


[docs]@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": [ska.FBNDArray],
        "labels": ["array-like", None],
        "sample_weight": ["array-like", None],
        "normalize": [StrOptions({"true", "pred", "all"}), None],
    },
    prefer_skip_nested_validation=True,
)
def predict_accept_confusion_matrix(
    y_true,
    y_pred,
    labels=None,
    sample_weight=None,
    normalize=None,
):
    """Computes confusion matrix w/ rows as accuracy and columns as acceptance.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : FBNDArray of shape (n_samples,)
        Estimated targets as returned by both a rejector and a classifier.

    labels : array-like of shape (2,), default=None
        List of labels to index the matrix. This may be used to reorder
        or select a subset of labels.
        If ``None`` is given, those that appear at least once
        in ``y_true`` or ``y_pred`` are used in sorted order.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    normalize : {'true', 'pred', 'all'}, default=None
        Normalizes confusion matrix over the true (rows), predicted (columns)
        conditions or all the population. If None, confusion matrix will not be
        normalized.

    Returns
    -------
    C : ndarray of shape (2, 2)
        TR (true-reject)   FA (false-accept)
        FR (false-reject)  TA (true-accept)

    See Also
    --------
    sklearn.metrics.confusion_matrix : True vs Predicted confusion matrix.

    Examples
    --------
    >>> import numpy as np
    >>> from skfb.core import array as ska
    >>> from skfb.metrics import predict_accept_confusion_matrix
    >>> y_true =    np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
    >>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
    ...                      [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> cm = predict_accept_confusion_matrix(y_true=y_true, y_pred=y_pred)
    >>> cm
    array([[1, 2],
           [3, 4]])
    """
    y_correct = y_true == y_pred
    y_accepted = y_pred.get_dense_neg_fallback_mask()
    return confusion_matrix(
        y_correct,
        y_accepted,
        labels=labels,
        sample_weight=sample_weight,
        normalize=normalize,
    )


def _check_targets(y_true, y_pred):
    """Check that ``y_true`` and ``y_pred`` belong to the same classification task.

    This converts multiclass or binary types to a common shape, and raises a
    ValueError for a mix of multilabel and multiclass targets, a mix of
    multilabel formats, for the presence of continuous-valued or multioutput
    targets, or for targets of different lengths.

    Column vectors are squeezed to 1d, while multilabel formats are returned
    as CSR sparse label indicators.

    Parameters
    ----------
    y_true : array-like
        True labels.
    y_pred : array-like
        Predictions.

    Returns
    -------
    type_true : one of {'multilabel-indicator', 'multiclass', 'binary'}
        The type of the true target data, as output by
        ``utils.multiclass.type_of_target``.
    y_true : array or indicator matrix
    y_pred : array or indicator matrix
    """
    check_consistent_length(y_true, y_pred)

    type_true = type_of_target(y_true)
    type_comb = type_of_target(y_pred)
    y_type = {type_true, type_comb}
    if y_type == {"binary", "multiclass"}:
        y_type = {"multiclass"}
    if len(y_type) > 1:
        raise ValueError(
            f"Classification metrics can't handle a mix of "
            f"{type_true} and {type_comb} targets"
        )
    y_type = y_type.pop()
    if y_type not in {"binary", "multiclass"}:
        raise ValueError(f"{y_type} is not supported")

    if y_type in {"binary", "multiclass"}:
        y_true = column_or_1d(y_true)

        if y_type == "binary":
            try:
                unique_values = np.union1d(y_true, y_pred)
            except TypeError as e:
                # We expect y_true and y_pred to be of the same data type.
                # If `y_true` was provided to the classifier as strings,
                # `y_pred` given by the classifier will also be encoded with
                # strings. So we raise a meaningful error
                raise TypeError(
                    "Labels in y_true and y_pred should be of the same type. "
                    f"Got y_true={np.unique(y_true)} and "
                    f"y_pred={np.unique(y_pred)}. Make sure that the "
                    "predictions provided by the classifier coincides with "
                    "the true labels."
                ) from e
            if unique_values.shape[0] > 2:
                y_type = "multiclass"

    return y_type, y_true, y_pred


[docs]@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": [ska.FBNDArray],
    },
    prefer_skip_nested_validation=True,
)
def predict_reject_accuracy_score(y_true, y_pred):
    """Calculates the ratio of true acceptance and rejection to all predictions.

    Parameters
    ----------
    y_true : array-like
        True labels.
    y_pred : FBNDarray
        Base estimator predictions w/ fallback mask.

    Returns
    -------
    score : float
        (TA + TR) / (TA + TR + FA + FR)

    Examples
    --------
    >>> import numpy as np
    >>> from skfb.core import array as ska
    >>> from skfb.metrics import predict_reject_accuracy_score
    >>> y_true =    np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
    >>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
    ...                      [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> predict_reject_accuracy_score(y_true, y_pred)
    np.float64(0.5)
    """
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)

    if y_type.startswith("multilabel"):
        raise ValueError("Multilabel outputs are not supported.")

    reject_mask = y_pred.get_dense_fallback_mask()
    accept_mask = ~reject_mask

    true_accept = sum(y_true[accept_mask] == y_pred[accept_mask])
    true_reject = sum(y_true[reject_mask] != y_pred[reject_mask])
    false_accept = sum(y_true[accept_mask] != y_pred[accept_mask])
    false_reject = sum(y_true[reject_mask] == y_pred[reject_mask])

    try:
        return (true_accept + true_reject) / (
            true_accept + true_reject + false_accept + false_reject
        )
    except ZeroDivisionError:
        warnings.warn(
            "invalid value encountered in scalar divide",
            category=RuntimeError,
        )
        return np.nan


[docs]@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": [ska.FBNDArray],
        "beta": [Interval(Real, left=0.0, right=1.0, closed="both")],
    },
    prefer_skip_nested_validation=True,
)
def predict_reject_recall_score(y_true, y_pred, beta=0.5):
    """Calculates weighted average of prediction and rejection recalls.

    Parameters
    ----------
    y_true : array-like
        True labels.
    y_pred : FBNDarray
        Base estimator predictions w/ fallback mask.
    beta : float, default=0.5
        The weight of prediction recall.

    Returns
    -------
    score : float
        TA / (TA + FR) * beta + TR / (TR + FA) * (1 - beta)

    Examples
    --------
    >>> import numpy as np
    >>> from skfb.core import array as ska
    >>> from skfb.metrics import predict_reject_recall_score
    >>> y_true =    np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 1])
    >>> y_pred = ska.fbarray([0, 1, 0, 1, 0, 1, 1, 1, 0, 1],
    ...                      [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
    >>> # TR = 1, FA = 2, TA = 4, FR = 3
    >>> predict_reject_recall_score(y_true, y_pred, beta=0.75)
    0.5119...
    """
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    if y_type.startswith("multilabel"):
        raise ValueError("Multilabel outputs are not supported.")

    cm = predict_accept_confusion_matrix(y_true, y_pred)
    ta, tr, fa, fr = cm[1, 1], cm[0, 0], cm[0, 1], cm[1, 0]
    return ta / (ta + fr) * beta + tr / (tr + fa) * (1 - beta)


def error_rejection_loss(
    y_true,
    y_prob,
    *,
    thresholds,
    y_pred=None,
    score_func=None,
    class_weight=None,
):
    """Computes weighted combination of rejection probabilities and prediction error.

    First, for evey class from ``y_pred``, compute the fallback rate, then take weighted
    average of fallback rates. Second, add prediction error to the obtained score.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        True labels.
    y_prob : array-like of shape (n_samples, n_classes)
        Predicted probabilities.
        Defaults to zero-one loss.
    thresholds : array-like of shape (n_classes,)
        Local (class-wise) thresholds.
    y_pred : array-like of shape (n_samples,), default=None
        Predicted classes or rejections.
        ``score_func`` can accept either ``y_prob`` or ``y_pred``.
    score_func : callable, default=None
        Prediction error scorer.
        Defaults to ``sklearn.metrics.zero_one_loss``. If ``y_pred`` is also None,
        calculates 0-1 loss between ``y_true`` and hard predictions w/o fallbacks.
        Keep in mind that greater values mean higher overall loss.
    class_weight : dict, default=None
        Mapping from classes and fallback label to weights for weighted average of
        losses. If None, defaults to classes from 0 to len(classes) mapped into
        uniform proba.

    Returns
    -------
    float : prediction-error--fallback-rate loss

    Raises
    ------
    ValueError:
        If all ``thresholds``, ``class_weight``, and ``classes`` are None.
    """
    # region Validate and set defaults to threshold, classes, and class weights
    if class_weight is None:
        n_classes = len(thresholds)
        class_weight = np.array([1 / (n_classes + 1)] * (n_classes + 1))
    # endregion

    # region Validate scoring function
    if score_func is None:
        score_func = zero_one_loss
        if y_pred is None:
            y_pred = y_prob.argmax(axis=1)
        elif isinstance(y_pred, ska.FBNDArray):
            score_func = make_scorer(
                predict_accept_confusion_matrix, greater_is_better=False
            )
    # endregion

    pred_class_mask = y_prob == y_prob.max(axis=1).reshape(-1, 1)
    fallback_mask = np.less(y_prob, thresholds)
    mask = pred_class_mask & fallback_mask

    score = mask.sum(axis=0) / pred_class_mask.sum(axis=0)
    score = sum(score * class_weight[:-1])

    if y_pred is None:
        error = score_func(y_true, y_prob)
    else:
        error = score_func(y_true, y_pred)

    score += error * class_weight[-1]

    return score


@validate_params(
    {
        "scoring": [callable, StrOptions(set(get_scorer_names())), None],
        "fallback_mode": [StrOptions({"store", "return"})],
    },
    prefer_skip_nested_validation=True,
)
def get_scoring(scoring=None, fallback_label=-1, fallback_mode="store"):
    """Returns the default scorer if it's None; otherwise, ``scoring`` itself."""
    if scoring is None:
        if fallback_mode == "store":
            scoring = predict_reject_accuracy_score
        else:
            scoring = functools.partial(
                prediction_quality,
                score_func=accuracy_score,
                fallback_label=fallback_label,
            )
    elif isinstance(scoring, str):
        scoring = get_scorer(scoring)
    return scoring