Source code for skfb.ensemble._threshold

"""Threshold-based cascade ensembles."""

from typing import Sequence

import warnings

import numpy as np

from sklearn.base import BaseEstimator, ClassifierMixin, check_is_fitted, clone
from sklearn.metrics import accuracy_score, get_scorer, get_scorer_names
from sklearn.model_selection import check_cv, ParameterGrid
from sklearn.utils.multiclass import unique_labels
from sklearn.utils.validation import NotFittedError

try:
    from sklearn.utils.parallel import delayed, Parallel
except ModuleNotFoundError:
    from joblib import Parallel, delayed

from ..utils._legacy import (
    _fit_context,
    Integral,
    Interval,
    Real,
    StrOptions,
    validate_params,
)
from ._common import fit_one
from ..core.array import earray
from ..core.exceptions import SKFBException, SKFBWarning


[docs]class CascadeNotFittedWarning(SKFBWarning):
    """Raised if base estimators in cascade are not fitted or fitted incorrectly."""


[docs]class CascadeParetoConfigWarning(SKFBWarning):
    """Raised if no Pareto configuration satisfies cost-performance constraints."""


[docs]class CascadeParetoConfigException(SKFBException):
    """Raised if no Pareto configuration satisfies cost-performance constraints."""


[docs]class ThresholdCascadeClassifier(BaseEstimator, ClassifierMixin):
    """Cascade of classifiers w/ deferrals based on predefined thresholds.

    During inference, runs the first estimator and if a predicted score is lower than
    ``thresholds[0]``, tries the second, and so on. The last estimator always makes
    predictions on the samples deferred by the previous estimators.
    If every estimator is fitted, it is not necessary to run ``fit`` to make
    predictions.

    Parameters
    ----------
    estimators : array-like of object, length n_estimators
        Base estimators. Preferrably, from weakest (e.g., rule-based or linear) to
        strongest (e.g., gradient boosting).
    thresholds : float or array-like of float, length n_estimators - 1
        Deferral thresholds for each base estimator except the last.
        If only one number is specified, every estimator (except the last) will have
        the same threshold (i.e., the threshold will be *global*).
    response_method : {"predict_proba", "decision_function"}, default="predict_proba"
        Methods by ``estimators`` for which we want to find return deferral thresholds.
        For ``"decision_function"``, ``thresholds`` can be negative.
    return_earray : bool, default=False
        Whether to return :class:`~skfb.core.ENDArray` of predicted classes / scores
        or plain numpy ndarray.
    prefit : bool, default=False
        Whether estimators are fitted. If True, checks their ``classes_`` attributes
        for intercompatibility.
    n_jobs : int, default=None
        Number of parallel jobs used during training.
    verbose : int, default=False
        Verbosity level.

    Examples
    --------
    >>> import numpy as np
    >>> from skfb.ensemble import ThresholdCascadeClassifier
    >>> from sklearn.ensemble import RandomForestClassifier
    >>> from sklearn.linear_model import LogisticRegression
    >>> X = np.array([
    ...     [0, 0], [4, 4], [1, 1], [3, 3], [2.5, 2], [2., 2.5], [2., 2.], [2.5, 2.5]
    ... ])
    >>> y = np.array([0, 1, 0, 1, 0, 1, 1, 0])
    >>> maxent = LogisticRegression(random_state=0)
    >>> rf = RandomForestClassifier(random_state=0)
    >>> cascade = ThresholdCascadeClassifier([maxent, rf], [0.8]).fit(X, y)
    >>> cascade.score(X, y)
    1.0
    >>> cascade.set_estimators(0).score(X, y)  # Use only LogisticRegression
    0.75

    Notes
    -----
    If you want to have a fallback option (for the last estimator), consider rejectors
    from :mod:`skfb.estimators`.
    """

    _parameter_constraints = {
        "estimators": ["array-like"],
        "thresholds": ["array-like", Interval(Real, None, None, closed="neither")],
        "response_method": [StrOptions({"decision_function", "predict_proba"})],
        "return_earray": ["boolean"],
        "prefit": ["boolean"],
        "n_jobs": [Interval(Integral, -1, None, closed="left"), None],
        "verbose": ["verbose"],
    }

    def __init__(
        self,
        estimators,
        thresholds,
        response_method="predict_proba",
        return_earray=True,
        prefit=False,
        n_jobs=None,
        verbose=False,
    ):
        self.estimators = estimators
        self.response_method = response_method
        self.thresholds = thresholds
        self._set_thresholds(thresholds)
        self.return_earray = return_earray
        self.prefit = prefit
        self.n_jobs = n_jobs
        self.verbose = verbose

        # region Check if base estimators are fitted correctly
        if self.prefit:
            classes = None
            for i, estimator in enumerate(self.estimators):
                try:
                    check_is_fitted(estimator, "classes_")

                    if not (
                        classes is None or np.array_equal(classes, estimator.classes_)
                    ):
                        warnings.warn(
                            f"Estimators {i} and {i - 1} predict different classes; "
                            f"please, run cascade's `fit` method to train all "
                            f"estimators.",
                            category=CascadeNotFittedWarning,
                        )
                        break

                    classes = estimator.classes_

                except NotFittedError:
                    warnings.warn(
                        f"Estimator {i} is not fitted; "
                        f"please, run cascade's `fit` method to train all estimators.",
                        category=CascadeNotFittedWarning,
                    )
                    break

            else:
                self.estimators_ = estimators
                self.classes_ = classes
                self._current_estimators = self.estimators_[:]
                self._current_thresholds = self.thresholds_[:]
                self.is_fitted_ = True
        # endregion

[docs]    @_fit_context(prefer_skip_nested_validation=False)
    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
            "y": ["array-like"],
            "sample_weight": ["array-like", None],
        },
        prefer_skip_nested_validation=True,
    )
    def fit(self, X, y, sample_weight=None):
        """Fits base estimators and sets meta-estimator attributes.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            The training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            The target values (class labels).
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        self : object
            Returns self.
        """
        self.classes_ = unique_labels(y)

        if not hasattr(self, "estimators_"):
            self.estimators_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
                delayed(fit_one)(estimator, X, y, sample_weight)
                for estimator in self.estimators
            )
        self._current_estimators = self.estimators_[:]
        self._current_thresholds = self.thresholds_[:]

        self.is_fitted_ = True

        return self

[docs]    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
        },
        prefer_skip_nested_validation=True,
    )
    def predict(self, X):
        """Predicts classes using one or more base estimators.

        Tries estimators in the order specified during initialization. If the first
        estimator doesn't have a score higher or equal than the first threshold,
        switches to the second estimator, and so on. The last estimator always makes
        predictions if all the previous estimators deferred.

        Parameters
        ----------
        X : indexable, length n_samples
            Input samples to classify.
            Must fulfill the input assumptions of the underlying estimators.

        Returns
        -------
        y_pred : ndarray of shape (n_samples,)
            Classes predicted by the base estimators.
        """
        check_is_fitted(self, attributes="is_fitted_")

        y_score = self._predict_scores(X)

        if y_score.ndim == 2:
            y_pred = np.take(self.classes_, y_score.argmax(axis=1))
        else:
            y_pred = np.take(self.classes_, y_score >= 0)

        return earray(y_pred, y_score.ensemble_mask) if self.return_earray else y_pred

[docs]    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
        },
        prefer_skip_nested_validation=True,
    )
    def predict_proba(self, X):
        """Predicts probabilities using one or more base estimators.

        Tries estimators in the order specified during initialization. If the first
        estimator doesn't have a score higher or equal than the first threshold,
        switches to the second estimator, and so on. The last estimator always makes
        predictions if all the previous estimators deferred.

        Parameters
        ----------
        X : indexable, length n_samples
            Input samples to classify.
            Must fulfill the input assumptions of the underlying estimators.

        Returns
        -------
        y_prob : ndarray of shape (n_samples, n_classes)
            Probabilities predicted by the base estimators.
        """
        check_is_fitted(self, attributes="is_fitted_")
        return self._predict_scores(X)

[docs]    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
        },
        prefer_skip_nested_validation=True,
    )
    def predict_log_proba(self, X):
        """Predicts log-probabilities using one or more base estimators.

        Tries estimators in the order specified during initialization. If the first
        estimator doesn't have a score higher or equal than the first threshold,
        switches to the second estimator, and so on. The last estimator always makes
        predictions if all the previous estimators deferred.

        Parameters
        ----------
        X : indexable, length n_samples
            Input samples to classify.
            Must fulfill the input assumptions of the underlying estimators.

        Returns
        -------
        y_score : ndarray of shape (n_samples, n_classes)
            Log-probabilities predicted by the base estimators.
        """
        return np.log(self.predict_proba(X))

[docs]    def decision_function(self, X):
        """Predicts decision scores using one or more base estimators.

        Tries estimators in the order specified during initialization. If the first
        estimator doesn't have a score higher or equal than the first threshold,
        switches to the second estimator, and so on. The last estimator always makes
        predictions if all the previous estimators deferred.

        Parameters
        ----------
        X : indexable, length n_samples
            Input samples to classify.
            Must fulfill the input assumptions of the underlying estimators.

        Returns
        -------
        y_score : ndarray of shape n_samples
            Decision scores predicted by the base estimators.
        """
        check_is_fitted(self, attributes="is_fitted_")
        return self._predict_scores(X)

[docs]    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
            "y": ["array-like"],
            "sample_weight": ["array-like", None],
        },
        prefer_skip_nested_validation=True,
    )
    def score(self, X, y, sample_weight=None):
        """Computes accuracy score on true labels and cascade predictions.

        Parameters
        ----------
        X : indexable, length n_samples
            Input samples to evaluate.
            Must fulfill the input assumptions of the underlying estimators.
        y : array-like of shape (n_samples,)
            True labels for `X`.
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, then samples are equally weighted.

        Returns
        -------
        score : float
            Accuracy score.
        """
        check_is_fitted(self, attributes="is_fitted_")

        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)

[docs]    def set_params(self, **params):
        """Sets the parameters of the cascade.

        If thresholds are provided, the transformations are done accordingly, so there
        is no need to refit the cascade.

        Parameters
        ----------
        **params : dict
            Cascade parameters.

        Returns
        -------
        self : object
            Returns self.
        """
        if "thresholds" in params:
            self._set_thresholds(params["thresholds"])
        return super().set_params(**params)

    def _set_thresholds(self, thresholds):
        """Transforms ``thresholds`` into correct sequence of thresholds."""
        if isinstance(thresholds, float):
            self.thresholds_ = [thresholds] * (len(self.estimators) - 1)
        else:
            assert len(thresholds) >= len(self.estimators) - 1, (
                "thresholds must be provided for at least all but the last estimator"
            )

            self.thresholds_ = list(thresholds)

        if self.response_method == "decision_function":
            if len(thresholds) == len(self.estimators) - 1:
                self.thresholds_.append(-np.inf)
        else:
            self.thresholds_.append(0.0)

        self._current_thresholds = self.thresholds_[:]

        return self

[docs]    def set_estimators(self, index):
        """Sets the estimators and thresholds to use for prediction and scoring.

        If a single index passed, the corresponding threshold is set to 0.0 or -np.inf
        depending on the ``response_method`` attribute.

        By default, uses all trained estimators (available by the ``estimators_``
        ``thresholds`` attribute).

        Parameters
        ----------
        index : int, or slice, or "all", or array-like of int

        Returns
        -------
        self : object
            Returns self.

        Raises
        ------
        TypeError:
            If ``index`` is of unsupported type or value.
        """
        check_is_fitted(self, attributes="is_fitted_")

        if isinstance(index, int):
            self._current_estimators = [self.estimators_[index]]
            if self.response_method == "predict_proba":
                self._current_thresholds = [0.0]
            else:
                self._current_thresholds = [-np.inf]

        elif index == "all":
            self._current_estimators = self.estimators_[:]
            self._current_thresholds = self.thresholds_[:]

        elif isinstance(index, Sequence):
            self._current_estimators = [self.estimators_[i] for i in index]
            self._current_thresholds = [self.thresholds_[i] for i in index[:-1]]
            if self.response_method == "predict_proba":
                self._current_thresholds.append(0.0)
            else:
                self._current_thresholds.append(-np.inf)

        elif isinstance(index, slice):
            self._current_estimators = self.estimators_[index]
            self._current_thresholds = self.thresholds_[index]
            self._current_thresholds.pop()
            if self.response_method == "predict_proba":
                self._current_thresholds.append(0.0)
            else:
                self._current_thresholds.append(-np.inf)

        else:
            raise TypeError(
                f"index must be int or slice or sequence of int, not {type(index)}"
            )

        return self

[docs]    def reset_estimators(self):
        """Reactivates all the base estimators.

        Same as ``set_estimators("all")``. Use if you previously set to skip some
        estimators and thresholds, and want to activate all estimators again.

        Returns
        -------
        self : object
            Returns self.
        """
        return self.set_estimators("all")

    def _predict_scores(self, X):
        """Estimates confidence scores for `predict_proba`.

        Returns
        -------
        y_score : np.ndarray, shape = (n_samples, n_classes) or n_samples
            Confidence scores (probabilities or decision scores, depending on
            `self.response_method`).
        """
        n_samples = len(X)
        n_estimators = len(self._current_estimators)
        n_classes = self._current_estimators[0].classes_.shape[0]

        # Scores to return
        if self.response_method == "predict_proba":
            y_score = np.zeros((n_samples, n_classes), dtype=np.float64)
        else:
            y_score = np.zeros(n_samples, dtype=np.float64)
        # Ensemble mask if `self.return_earray` is True
        if self.return_earray:
            ensemble_mask = np.zeros((n_samples, n_estimators), dtype=np.bool_)
        # The current sample indices to process
        remaining_idx = np.arange(n_samples)

        # region Cascaded prediction of the current selected estimators
        for i, (estimator, threshold) in enumerate(
            zip(self._current_estimators, self._current_thresholds)
        ):
            # All samples are processed
            if len(remaining_idx) == 0:
                break

            # region Predict currently deferred samples
            X_remaining = np.take(X, remaining_idx, axis=0)
            y_score_remaining = getattr(estimator, self.response_method)(X_remaining)
            if self.response_method == "predict_proba":
                max_score = np.max(y_score_remaining, axis=1)
            else:
                max_score = y_score_remaining
            # endregion

            # region Mask selected samples
            if i < n_estimators - 1:
                confident_mask = max_score >= threshold
            else:
                confident_mask = np.ones(len(remaining_idx), dtype=np.bool_)
            # endregion

            # region Update indices of deferred samples
            confident_idx = remaining_idx[confident_mask]
            y_score[confident_idx] = y_score_remaining[confident_mask]
            remaining_idx = remaining_idx[~confident_mask]

            if self.return_earray:
                ensemble_mask[confident_idx, i] = True
            # endregion
        # endregion

        if self.return_earray:
            return earray(y_score, ensemble_mask)
        else:
            return y_score


_N_CV_THRESHOLDS = 10
_MAX_DEFAULT_CV_THRESHOLD = 0.95


def _fitting_path(estimators, response_method, X, y, sample_weight):
    """Trains cascaded estimators."""
    return fit_one(
        ThresholdCascadeClassifier(
            clone(estimators),
            thresholds=0.0,
            response_method=response_method,
            return_earray=True,
            prefit=False,
            n_jobs=None,
            verbose=False,
        ),
        X,
        y,
        sample_weight=sample_weight,
    )


def _scoring_path(
    cascade,
    thresholds,
    costs,
    X,
    y,
    sample_weight,
    scoring,
    scoring_response_method,
):
    """Trains cascade and scores with accuracy metric."""
    predictor = getattr(
        cascade.set_params(thresholds=thresholds),
        scoring_response_method,
    )
    y_pred = predictor(X)
    try:
        # NOTE: Some scorers accept the full (n_samples, n_classes) predictions, but
        score = scoring._score_func(y, y_pred, sample_weight=sample_weight)
    except ValueError:
        #       others (e.g., `roc_auc_score` for binary classification) only n_samples.
        score = scoring._score_func(y, y_pred[:, 1], sample_weight=sample_weight)
    cost = y_pred.acceptance_rates @ costs
    return score, cost


[docs]class ThresholdCascadeClassifierCV(ThresholdCascadeClassifier):
    """Cascade of classifiers with Pareto-optimized deferral thresholds.

    Optimizes deferral thresholds via cross-validation grid search, identifying
    non-dominated (Pareto-optimal) threshold configurations that balance performance
    and computational cost. Users can select thresholds based on performance
    constraints or cost budgets (e.g., select the best threshold configuration s.t.
    it gives at least `min_score` classification score on validation).

    During inference, runs the first estimator and if a predicted score is lower
    than ``thresholds[0]``, tries the second, and so on. The last estimator always
    makes predictions on deferred samples.

    Parameters
    ----------
    estimators : array-like of object, length n_estimators
        Base estimators. Preferably ordered from weakest (fast, low-accuracy) to
        strongest (slow, high-accuracy).
    costs : array-like of shape (n_estimators,) or float, default=None
        Computational cost per estimator. Used to identify non-dominated
        threshold configurations along the cost-performance tradeoff. Defaults to
        uniform costs summing to 1.0.
    cv_thresholds : array-like of shape (n_thresholds,) or int, default=None
        Candidate deferral thresholds for grid search. If None, defaults to 10
        thresholds linearly spaced from 1/n_classes to 0.95. If int, generates
        that many thresholds in the same range.
    cv : int, cross-validation generator or iterable, default=5
        Cross-validation splitting strategy. Accepts:

        - int: number of folds (uses StratifiedKFold for classification)
        - CV splitter object
        - Iterable yielding (train_idx, test_idx) splits
    scoring : callable or str, default="accuracy"
        Scorer for threshold evaluation. Can be a scikit-learn scorer name
        (e.g., "accuracy", "f1") or a callable with signature
        ``scorer(y_true, y_pred) -> float`` (higher is better).
    min_score : float, default=None
        Minimum acceptable cross-validation score. If specified, selects the
        Pareto config with lowest cost meeting this accuracy constraint.
        If None (default), uses the highest-accuracy Pareto config.
    max_cost : float, default=None
        Maximum acceptable computational cost. If specified, selects the
        Pareto config with highest accuracy within this cost budget.
        If None (default), uses the highest-accuracy Pareto config.
    raise_error : bool, default=False
        Whether to raise ``CascadeParetoConfigException`` if no Pareto configuration
        satisfies the specified constraints (min_score, max_cost). If False (default),
        issues a warning and falls back to the highest-accuracy Pareto config.
    response_method : {"predict_proba", "decision_function"}, default="predict_proba"
        Method by estimators for computing deferral scores.
    return_earray : bool, default=True
        Whether to return :class:`~skfb.core.ENDArray` with ensemble mask
        or plain numpy ndarray.
    n_jobs : int, default=None
        Parallel jobs:

        - 1) model pre-training for each CV fold;
        - 2) score and cost evaluation for each fold and threshold configuration;
        - 3) and retraining on full data.

        -1 uses all processors. Defaults to one.
    verbose : int, default=0
        Verbosity level for each stage of training.

    Attributes
    ----------
    best_thresholds_ : list of float
        Best selected thresholds.
    all_cv_thresholds_ : ndarray, shape (n_configs, n_splits)
        All generated threshold configurations.
    mean_cv_scores_ : ndarray, shape (n_configs,)
        Average cross-validated classification scores.
    mean_cv_costs_ : ndarray, shape (n_configs,)
        Average cross-validated computational costs.

    Examples
    --------
    >>> from skfb.ensemble import ThresholdCascadeClassifierCV
    >>> from sklearn.datasets import make_classification
    >>> from sklearn.ensemble import RandomForestClassifier
    >>> from sklearn.linear_model import LogisticRegression
    >>> X, y = make_classification(
    ...     n_samples=300, n_features=100, n_redundant=95, class_sep=0.1,
    ...     random_state=0)
    >>> cascading = ThresholdCascadeClassifierCV(
    ...     [LogisticRegression(l1_ratio=1.0, solver="liblinear", random_state=0),
    ...      RandomForestClassifier(random_state=0)],
    ...     costs=[1.0, 5.0],
    ...     cv_thresholds=5,
    ...     cv=3).fit(X, y)
    >>> cascading.best_thresholds_

    Notes
    -----
    The Pareto front contains all non-dominated configurations: those where no
    other configuration achieves both strictly higher score AND strictly lower
    cost.

    If you want a fallback option for the last estimator, consider rejectors
    from :mod:`skfb.estimators`.
    """

    _parameter_constraints = {
        "estimators": ["array-like"],
        "costs": ["array-like", Interval(Real, 0, None, closed="neither"), None],
        "cv_thresholds": [
            "array-like",
            Interval(Real, None, None, closed="neither"),
            None,
        ],
        "cv": ["cv_object"],
        "scoring": [callable, StrOptions(set(get_scorer_names())), None],
        "min_score": [Interval(Real, None, None, closed="neither"), None],
        "max_cost": [Interval(Real, 0, None, closed="left"), None],
        "strategy": [StrOptions({"min_score", "max_cost", "balanced"})],
        "raise_error": ["boolean"],
        "response_method": [StrOptions({"decision_function", "predict_proba"})],
        "return_earray": ["boolean"],
        "n_jobs": [Interval(Integral, -1, None, closed="left"), None],
        "verbose": ["verbose"],
    }

    def __init__(
        self,
        estimators,
        costs=None,
        cv_thresholds=None,
        min_score=None,
        max_cost=None,
        strategy="balanced",
        cv=5,
        scoring="accuracy",
        raise_error=False,
        response_method="predict_proba",
        return_earray=True,
        n_jobs=None,
        verbose=0,
    ):
        super().__init__(
            estimators=estimators,
            thresholds=0.0,
            response_method=response_method,
            return_earray=return_earray,
            prefit=False,
            n_jobs=n_jobs,
            verbose=verbose,
        )
        self.costs = costs
        self.cv_thresholds = cv_thresholds
        self.cv = cv
        self.scoring = scoring
        self.min_score = min_score
        self.max_cost = max_cost
        self.strategy = strategy
        self.raise_error = raise_error

    def _select_best_thresholds(self):
        """Identifies non-dominated (Pareto-optimal) threshold configurations.

        A configuration is Pareto-optimal if no other configuration achieves both
        strictly higher accuracy AND strictly lower cost.
        """
        # region Mask thresholds by constraints
        feasible = np.ones(len(self.all_cv_thresholds_), dtype=bool)
        if self.min_score is not None:
            feasible &= self.mean_cv_scores_ >= self.min_score
        if self.max_cost is not None:
            feasible &= self.mean_cv_costs_ <= self.max_cost
        # endregion

        # region Handle default case
        if not np.any(feasible):
            if self.raise_error:
                raise CascadeParetoConfigException(
                    f"No threshold configuration satisfies constraints: "
                    f"min_score={self.min_score} and max_cost={self.max_cost}."
                )
            else:
                default_idx = np.argmax(self.mean_cv_scores_ / self.mean_cv_costs_)
                self.best_thresholds_ = self.all_cv_thresholds_[default_idx]

                warnings.warn(
                    (
                        f"No threshold configuration satisfies constraints: "
                        f"min_score={self.min_score} and max_cost={self.max_cost}; "
                        f"setting thresholds = {self.best_thresholds_} giving "
                        f"max(scores / costs)."
                    ),
                    category=CascadeParetoConfigWarning,
                )
        # endregion
        # region Handle constraints
        else:
            feasible_idx = np.where(feasible)[0]
            feasible_scores = self.mean_cv_scores_[feasible_idx]
            feasible_costs = self.mean_cv_costs_[feasible_idx]

            pareto = np.ones(len(feasible_idx), dtype=bool)

            for i in range(len(feasible_idx)):
                for j in range(len(feasible_idx)):
                    if i != j:
                        j_dominates_i = (
                            feasible_scores[j] >= feasible_scores[i]
                            and feasible_costs[j] <= feasible_costs[i]
                            and (
                                feasible_scores[j] != feasible_scores[i]
                                or feasible_costs[j] != feasible_costs[i]
                            )
                        )
                        if j_dominates_i:
                            pareto[i] = False
                            break

            pareto_idx = feasible_idx[pareto]
            pareto_scores = self.mean_cv_scores_[pareto_idx]
            pareto_costs = self.mean_cv_costs_[pareto_idx]

            if self.strategy == "min_score":
                best_idx = np.argmax(pareto_scores)
            elif self.strategy == "max_cost":
                best_idx = np.argmin(pareto_costs)
            else:
                best_idx = np.argmax(pareto_scores / pareto_costs)

            self.best_thresholds_ = self.all_cv_thresholds_[pareto_idx[best_idx]]
        # endregion

[docs]    @_fit_context(prefer_skip_nested_validation=False)
    @validate_params(
        {
            "X": ["array-like", "sparse matrix"],
            "y": ["array-like"],
            "sample_weight": ["array-like", None],
        },
        prefer_skip_nested_validation=True,
    )
    def fit(self, X, y, sample_weight=None):
        """Fit estimators and identify Pareto-optimal threshold configurations.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_samples, n_features)
            Training input samples.
        y : array-like, shape (n_samples,) or (n_samples, n_outputs)
            Target class labels.
        sample_weight : array-like of shape (n_samples,), default=None
            Sample weights. If None, samples are equally weighted.

        Returns
        -------
        self : object
            Fitted estimator. Use ``predict()`` and/or ``set_params()`` methods.
        """
        self.classes_ = unique_labels(y)

        # region Process costs
        if self.costs is None:
            self.costs_ = np.array([1.0 / len(self.estimators)] * len(self.estimators))
        elif isinstance(self.costs, (float, int)):
            self.costs_ = np.array([self.costs] * len(self.estimators))
        else:
            self.costs_ = np.asarray(self.costs)
        # endregion

        # region Generate candidate thresholds
        if self.cv_thresholds is None or isinstance(self.cv_thresholds, int):
            n_thresholds = self.cv_thresholds or _N_CV_THRESHOLDS
            self.cv_thresholds_ = np.linspace(
                1 / len(self.classes_),
                _MAX_DEFAULT_CV_THRESHOLD,
                n_thresholds,
            )
        else:
            self.cv_thresholds_ = np.asarray(self.cv_thresholds)
        # endregion

        # region Setup cross-validation and scorer
        self.cv_ = check_cv(self.cv, y=y, classifier=True)
        self.scoring_ = get_scorer(self.scoring)
        try:
            response_methods = self.scoring_._response_method
            if isinstance(response_methods, (list, tuple)):
                self._scoring_response_method = response_methods[-1]
            else:
                self._scoring_response_method = response_methods
        except AttributeError:
            raise ValueError(
                f"`scoring` should be either scikit-learn scorer name like 'accuracy' "
                f"or custom scorer wrapped with `sklearn.metrics.make_scorer`, "
                f"not {self.scoring}."
            )
        # endregion

        # region Generate all threshold combinations
        threshold_grids = [self.cv_thresholds_] * (len(self.estimators) - 1)
        threshold_combinations = ParameterGrid(
            {
                f"threshold_{i}": thresholds
                for i, thresholds in enumerate(threshold_grids)
            },
        )
        self.all_cv_thresholds_ = [
            tuple(combo[f"threshold_{i}"] for i in range(len(self.estimators) - 1))
            for combo in threshold_combinations
        ]
        self.all_cv_thresholds_ = np.array(self.all_cv_thresholds_)
        # endregion

        # region Temporarily train estimators on different folds
        cascades = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
            delayed(_fitting_path)(
                self.estimators,
                self.response_method,
                np.take(X, train_idx, axis=0),
                np.take(y, train_idx, axis=0),
                (
                    np.take(sample_weight, train_idx, axis=0)
                    if sample_weight is not None
                    else None
                ),
            )
            for train_idx, _ in self.cv_.split(X, y)
        )
        cascades = np.array(cascades)
        # endregion

        # region Cross-validation
        results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
            delayed(_scoring_path)(
                cascade,
                thresholds,
                self.costs_,
                np.take(X, test_idx, axis=0),
                np.take(y, test_idx, axis=0),
                (
                    np.take(sample_weight, test_idx, axis=0)
                    if sample_weight is not None
                    else None
                ),
                self.scoring_,
                self._scoring_response_method,
            )
            for thresholds in self.all_cv_thresholds_
            for cascade, (_, test_idx) in zip(cascades, self.cv_.split(X, y))
        )
        scores_and_costs = np.array(results).reshape(
            len(self.all_cv_thresholds_), len(cascades), 2
        )
        self.mean_cv_scores_ = scores_and_costs[:, :, 0].mean(axis=1)
        self.mean_cv_costs_ = scores_and_costs[:, :, 1].mean(axis=1)
        # endregion

        # region Set default thresholds and estimators
        self.set_params(min_score=self.min_score, max_cost=self.max_cost)
        super().fit(X, y, sample_weight=sample_weight)
        # endregion

        return self

[docs]    def set_params(self, **params):
        """Sets the parameters of the cascade.

        If thresholds or new constraints are provided, the transformations are done
        accordingly, so there is no need to refit the cascade.

        Parameters
        ----------
        **params : dict
            Cascade parameters.

        Returns
        -------
        self : object
            Returns self.

        Raises
        ------
        ValueError
            If all `min_score`, `max_cost`, and `thresholds` are passed.
        """
        not_given = "not-given"
        max_cost = params.get("max_cost", not_given)
        min_score = params.get("min_score", not_given)
        thresholds = params.get("thresholds", not_given)

        if all(p != not_given for p in (max_cost, min_score, thresholds)):
            raise ValueError(
                "Pass either min_score and max_cost or thresholds. "
                "The former will automatically determine the best thresholds."
            )

        elif thresholds != not_given:
            self._set_thresholds(thresholds)
            params.pop("thresholds")

            return super().set_params(**params)

        elif max_cost != not_given or min_score != not_given:
            super().set_params(**params)

            self._select_best_thresholds()
            self._set_thresholds(self.best_thresholds_)

            return self

        return super().set_params(**params)