Source code for skfb.estimators._anomaly

"""Fallback classification via anomaly detection."""

__all__ = ("AnomalyFallbackClassifier",)

import numpy as np
from sklearn.utils.metaestimators import available_if
from sklearn.utils.validation import check_is_fitted, NotFittedError

from ..core import array as ska
from ..utils._legacy import HasMethods, validate_params
from ..utils._validation import check_X_y_sample_weight
from .base import BaseFallbackClassifier, _estimator_has


[docs]class AnomalyFallbackClassifier(BaseFallbackClassifier): """A fallback classifier based on provided anomaly detector. Augments ``estimator`` behavior with a reject option based on outlier detection. If ``outlier_detector`` predicts -1 for a given input, then returns, masks, or ignores rejections depending on ``fallback_mode``. Otherwise, accepts the input and makes predictions. Parameters ---------- estimator : object The base estimator making decisions w/o fallbacks. outlier_detector : object The outlier detector returning 1 for inliers and -1 for outliers. remove_outliers : bool, default=False Whether to remove outliers from training data before fitting. fallback_label : any, default=-1 The label of a rejected example. Should be compatible w/ the class labels from training data. fallback_mode : {"return", "store", "ignore"}, default="store" While predicting w/ the ``predict`` method, whether to return: * (``"return"``) a numpy ndarray of both predictions and fallbacks; * (``"store"``) an ``FBNDArray`` of predictions storing also fallback mask; * (``"ignore"``) a numpy ndarray of only estimator's predictions. Calling ``decision_function`` or ``predict_proba`` is equivalent to ``estimator``'s corresponding calls except that with ``"store"``, ``FBNDArray`` is returned. Attributes ---------- estimator_ : object Trained base estimator. outlier_detector_ : object Trained outlier detector. Examples -------- >>> import numpy as np >>> from skfb.estimators import AnomalyFallbackClassifier >>> from sklearn.ensemble import IsolationForest >>> from sklearn.linear_model import LogisticRegression >>> estimator = LogisticRegression(random_state=0) >>> outlier_detector = IsolationForest(n_estimators=10, max_samples=1.0, ... contamination=0.2, random_state=0) >>> rejector = AnomalyFallbackClassifier(estimator, outlier_detector) >>> X = np.array([ ... [0, 0], [10, 10], [1, 1], [9, 9], [1, 0], [9, 10], [0, 1], [10, 9], ... [5.5, 5], [5., 5.5] ... ]) >>> y = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) >>> rejector.fit(X, y).predict(X).get_dense_fallback_mask() array([False, False, False, False, False, False, False, False, True, True]) >>> rejector.set_params(fallback_mode="return").predict(X) array([ 0, 1, 0, 1, 0, 1, 0, 1, -1, -1]) >>> rejector.score(X, y) 1.0 """ _parameter_constraints = {**BaseFallbackClassifier._parameter_constraints} _parameter_constraints.update( { "outlier_detector": [HasMethods(["fit_predict"])], "remove_outliers": ["boolean"], }, ) def __init__( self, estimator, outlier_detector, remove_outliers=False, fallback_label=-1, fallback_mode="store", ): super().__init__( estimator=estimator, fallback_label=fallback_label, fallback_mode=fallback_mode, ) self.outlier_detector = outlier_detector self.remove_outliers = remove_outliers # NOTE: I believe we are violating a scikit-learn proposal by assigning an # attribute right after the initialization instead of the fitting. # But this seems to be the best way to prevent refitting. try: check_is_fitted(self.estimator, "classes_") check_is_fitted(self.outlier_detector) fallback_label_ = self.validate_fallback_label( self.fallback_label, self.estimator.classes_ ) fitted_params = { "estimator_": self.estimator, "outlier_detector_": self.outlier_detector, "classes_": self.estimator.classes_, "fallback_label_": fallback_label_, } self._set_fitted_attributes(fitted_params) except NotFittedError: pass
[docs] def fit(self, X, y, **fit_params): """Trains base estimator and outlier detector then sets fit attributes. Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The training input samples. y : array-like, shape (n_samples,) or (n_samples, n_outputs) The target values. Returns ------- self : object Returns self. """ sample_weight = fit_params.pop("sample_weight", None) X, y, sample_weight = check_X_y_sample_weight( X, y=y, sample_weight=sample_weight ) if sample_weight is not None: self.outlier_detector.fit(X, y=y, sample_weight=sample_weight, **fit_params) else: self.outlier_detector.fit(X, y=y, **fit_params) self._set_fitted_attributes( { "outlier_detector_": self.outlier_detector, "is_fitted_": False, # Not yet; after super().fit(X, y, **fp) }, ) if self.remove_outliers: acceptance_mask = self.outlier_detector_.predict(X) == 1 X = X[acceptance_mask] y = y[acceptance_mask] if sample_weight is not None: sample_weight = sample_weight[acceptance_mask] if sample_weight is not None: return super().fit(X, y, sample_weight=sample_weight, **fit_params) else: return super().fit(X, y, **fit_params)
def _predict(self, X): """Runs outlier detection and classification. Returns both fallbacks and classes if ``self.fallback_mode == 'return'``, or classes w/ fallback mask if ``self.fallback_mode == 'store'``. """ X, _, _ = check_X_y_sample_weight(X) y_out = self.outlier_detector_.predict(X) fallback_mask = y_out == -1 if self.fallback_mode == "return": y_comb = np.empty(len(X), dtype=self.classes_.dtype) acceptance_mask = ~fallback_mask y_comb[acceptance_mask] = self.estimator_.predict(X[acceptance_mask]) y_comb[fallback_mask] = self.fallback_label_ return y_comb else: y_pred = self.estimator_.predict(X) y_pred = ska.fbarray(y_pred, fallback_mask) return y_pred def _set_fallback_mask(self, y_prob, X): """Doesn't set fallback mask for ``predict_proba`` and ``decision_function``.""" y_out = self.outlier_detector_.predict(X) y_prob.fallback_mask = y_out == -1
[docs] @available_if(_estimator_has("decision_function")) @validate_params( { "X": ["array-like", "sparse matrix"], }, prefer_skip_nested_validation=True, ) def decision_function(self, X): """Calls ``decision_function`` on the estimator and sets fallback mask. Parameters ---------- X : indexable, length n_samples Input samples to classify. Must fulfill the input assumptions of the underlying estimator. Returns ------- y_pred : FBNDArray of shape (n_samples,) or (n_samples, n_classes) Predicted class scores for `X` based on the estimator. If ``self.fallback_mode == "store"``, scores store fallback mask. """ check_is_fitted(self, attributes="is_fitted_") y_prob = self.estimator_.decision_function(X) if self.fallback_mode == "store": y_prob = ska.fbarray(y_prob) self._set_fallback_mask(y_prob, X=X) return y_prob
[docs] @available_if(_estimator_has("predict_proba")) @validate_params( { "X": ["array-like", "sparse matrix"], }, prefer_skip_nested_validation=True, ) def predict_proba(self, X): """Calls ``predict_proba`` on the estimator. Parameters ---------- X : indexable, length n_samples Input samples to classify. Must fulfill the input assumptions of the underlying estimator. Returns ------- y_pred : FBNDArray of shape (n_samples,) or (n_samples, n_classes) Predicted class probabilities for `X` based on the estimator. The order of the classes corresponds to that in the fitted attribute :term:`classes_`. If ``self.fallback_mode == "store"``, probabilities store fallback mask. """ check_is_fitted(self, attributes="is_fitted_") y_prob = self.estimator_.predict_proba(X) if self.fallback_mode == "store": y_prob = ska.fbarray(y_prob) self._set_fallback_mask(y_prob, X=X) return y_prob