Source code for skfb.metrics._ranking

"""Metrics to assess performance based on scores."""

import collections
import warnings

import numpy as np

from sklearn.metrics import accuracy_score, auc
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import check_array

from sklearn.utils.multiclass import type_of_target

from ..core.array import fbarray
from ..core.exceptions import SKFBWarning
from ..utils._legacy import (
    Interval,
    Real,
    StrOptions,
    validate_params,
)
from ._common import prediction_quality


FallbackQualityResult = collections.namedtuple(
    "FallbackQualityResult",
    "fallback_rates, scores, thresholds",
)


[docs]@validate_params(
    {
        "y_true": ["array-like"],
        "y_score": ["array-like"],
        "score_func": [callable],
        "predict_method": [
            StrOptions({"predict", "predict_proba", "predict_log_proba"}),
        ],
        "min_fallback_rate": [Interval(Real, 0.0, 1.0, closed="both")],
        "max_fallback_rate": [Interval(Real, 0.0, 1.0, closed="both")],
        "raise_warning": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def fallback_quality_curve(
    y_true,
    y_score,
    score_func=accuracy_score,
    predict_method="predict",
    min_fallback_rate=0.0,
    max_fallback_rate=0.95,
    raise_warning=True,
):
    """Constructs prediction quality vs fallback rate curve.

    First, determines unique thresholds on input samples, then for every threshold,
    calculates the fallback rate and prediction quality.

    Parameter
    ---------
    y_true : array-like, shape (n_samples,)
        True labels.
    y_score : array-like, shape (n_samples,)
        Predicted scores.
    score_func : callable, default=sklearn.metrics.accuracy_score
        Scoring function (such as accuracy score) to calculate on accepted
        examples for every fallback threshold.
    predict_method : {"predict", "predict_proba", "predict_log_proba"}, \
            default="predict"
        Whether ``score_func`` accepts classes, probabilities, or log-probabilities.
    min_fallback_rate : float, default=0.0
        Minimum fallback rate to include.
    max_fallback_rate : float, default=0.95
        Maximum fallback rate to include.
    raise_warning : bool, default=True
        Raise warning if ``score_func`` raises ValueError.

    Returns
    -------
    FallbackQualityResult
        Fallback rates, scores, and thresholds. All are ndarrays of the same shape
        (n_unique_thresholds,).

    Notes
    -----
    If ``score_func`` accepts probabilities, we pass the probabilities of the positive
    class.
    """
    # region Check fallback constraints
    if min_fallback_rate >= max_fallback_rate:
        raise ValueError("min_fallback_rate should be less than max_fallback_rate")
    # endregion

    # region Check types and normalize
    y_type = type_of_target(y_true, input_name="y_true")
    y_true = check_array(y_true, ensure_2d=False, dtype=None)
    y_score = check_array(y_score, ensure_2d=False)

    if y_type not in {"binary", "multiclass"}:
        raise ValueError(f"{y_type} is not supported")

    y_true = LabelEncoder().fit_transform(y_true)
    # endregion

    # region Get unique thresholds
    thresholds = y_score.max(axis=1)
    thresholds = np.unique(thresholds)
    thresholds = np.sort(thresholds, kind="mergesort")
    # endregion

    # region Calculate fallback rate and prediction quality for every threshold
    scores, fallback_rates = [], []
    for threshold in thresholds:
        fallback_mask = thresholds < threshold

        fallback_rate = fallback_mask.sum() / len(thresholds)
        if not min_fallback_rate <= fallback_rate <= max_fallback_rate:
            continue

        fallback_rates.append(fallback_rate)

        if predict_method == "predict_proba":
            y_fb = fbarray(y_score[:, 1], fallback_mask)
        elif predict_method == "predict_log_proba":
            y_fb = fbarray(np.log(y_score[:, 1]), fallback_mask)
        else:
            y_fb = fbarray(y_score.argmax(axis=1), fallback_mask)

        try:
            score = prediction_quality(y_true, y_fb, score_func)
        except ValueError as err:
            if raise_warning:
                warnings.warn(
                    f"Raised ValueError('{err.args[0]}'); skipping the threshold",
                    category=SKFBWarning,
                )
            fallback_rates.pop()
            continue
        else:
            scores.append(score)
    # endregion

    assert fallback_rates, "No fallback rate collected; reset fallback constraints"
    assert scores, "No scores calculated; check input arguments"

    return FallbackQualityResult(fallback_rates, scores, thresholds)


[docs]@validate_params(
    {
        "y_true": ["array-like"],
        "y_score": ["array-like"],
        "score_func": [callable],
        "predict_method": [
            StrOptions({"predict", "predict_proba", "predict_log_proba"}),
        ],
        "min_fallback_rate": [Interval(Real, 0.0, 1.0, closed="both")],
        "max_fallback_rate": [Interval(Real, 0.0, 1.0, closed="both")],
        "raise_warning": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def fallback_quality_auc_score(
    y_true,
    y_score,
    score_func=accuracy_score,
    predict_method="predict",
    min_fallback_rate=0.0,
    max_fallback_rate=0.95,
    raise_warning=True,
):
    """Returns area under prediction quality-fallback rate curve.

    First, determines unique thresholds on input samples, then for every threshold,
    calculates the fallback rate and prediction quality.

    Parameter
    ---------
    y_true : array-like, shape (n_samples,)
        True labels.
    y_score : array-like, shape (n_samples,)
        Predicted scores.
    score_func : callable, default=sklearn.metrics.accuracy_score
        Scoring function (such as accuracy score) to calculate on accepted
        examples for every fallback threshold.
    predict_method : {"predict", "predict_proba", "predict_log_proba"},
            default="predict"
        Whether ``score_func`` accepts classes, probabilities, or log-probabilities.
    min_fallback_rate : float, default=0.0
        Minimum fallback rate to include.
    max_fallback_rate : float, default=0.95
        Maximum fallback rate to include.
    raise_warning : bool, default=True
        Raise warning if ``score_func`` raises ValueError.

    Returns
    -------
    float : The area under the prediction quality vs fallback rate curve.
    """
    fallback_rates, scores, _ = fallback_quality_curve(
        y_true,
        y_score,
        score_func=score_func,
        predict_method=predict_method,
        min_fallback_rate=min_fallback_rate,
        max_fallback_rate=max_fallback_rate,
        raise_warning=raise_warning,
    )
    return auc(fallback_rates, scores)