Source code for skfb.core.array

"""Extensions to numpy ndarrays supporting fallback masks."""

__all__ = (
    "ENDArray",
    "fbarray",
    "FBNDArray",
)

import numpy as np

from scipy.sparse import coo_array, csr_matrix

from ..utils._legacy import validate_params


[docs]class FBNDArray(np.ndarray): """Same as numpy ndarray but stores also additional fallback information. FBNDArrays are usually returned by ``predict``, ``predict_proba``, or ``predict_log_proba`` methods of fallback meta-estimators. Parameters ---------- predictions : array-like of shape (n_samples,) An array of base-estimator predictions. fallback_mask : array-like of shape (n_samples,) or (1, n_samples), or None Array mask indicating whether i-th sample was rejected by a fallback meta-estimator. If None, defaults to all-zeros. Further stored as sparse arrays by the ``fallback_mask`` property. Examples -------- >>> import numpy as np >>> from skfb.core import array as ska >>> y = np.array([0, 1, 0, 1, 0, 1]) >>> f = [0, 0, 0, 0, 1, 1] >>> y_pred = ska.fbarray(y, f) >>> y_pred FBNDArray([0, 1, 0, 1, 0, 1]) >>> y_pred.get_dense_fallback_mask() array([False, False, False, False, True, True]) >>> y_pred.get_dense_neg_fallback_mask() array([ True, True, False, False, False, False]) >>> y_pred.fallback_rate 0.3333333333333333 >>> y_pred.as_comb(fallback_label=2) array([0, 1, 0, 1, 2, 2]) """ def __new__(cls, predictions, fallback_mask=None): obj = np.asarray(predictions).view(cls) obj._fallback_mask = cls._validate_fallback_mask(fallback_mask, len(obj)) return obj def __array_finalize__(self, obj): if obj is not None: empty = np.array([], dtype=np.bool_) self._fallback_mask = getattr(obj, "_fallback_mask", empty) @classmethod @validate_params( { "fallback_mask": ["array-like", None, list], }, prefer_skip_nested_validation=True, ) def _validate_fallback_mask(cls, fallback_mask, num_predictions): """Returns COO sparse ``fallback_mask`` if it's a valid prediction mask.""" if fallback_mask is None or len(fallback_mask) == 0: return coo_array(np.array([], dtype=np.bool_)) if len(fallback_mask) != num_predictions: raise ValueError( f"Mask size = {len(fallback_mask)} is greater than number of " f"elements of array = {num_predictions}" ) else: return coo_array(np.asarray(fallback_mask, dtype=np.bool_)) @property def fallback_mask(self): """Returns the sparse fallback mask.""" return self._fallback_mask @fallback_mask.setter def fallback_mask(self, fallback_mask): """Sets a new sparse fallback mask.""" self._fallback_mask = self._validate_fallback_mask(fallback_mask, len(self)) @property def fallback_rate(self): """Calculates the sparsity of the fallback mask.""" if len(self) == 0: return 0.0 return self.fallback_mask.count_nonzero() / len(self)
[docs] def get_dense_fallback_mask(self): """Converts ``fallback_mask`` to 1D ndarray.""" mask = self.fallback_mask.toarray() if mask.ndim == 2: # For scipy<1.13 mask = mask[0] return mask
[docs] def get_dense_neg_fallback_mask(self): """Returns negation of ``fallback_mask`` (acceptance mask) as 1D ndarray.""" return ~self.get_dense_fallback_mask()
[docs] def as_comb(self, fallback_label=-1): """Returns an ndarray of both predictions and fallbacks based on the mask.""" y_comb = np.asarray(self).copy() y_comb[self.get_dense_fallback_mask()] = fallback_label return y_comb
def fbarray(predictions, fallback_mask=None): """Creates an ndarray of predictions that also stores fallback information. FBNDArrays are usually returned by ``predict``, ``predict_proba``, or ``predict_log_proba`` methods of fallback meta-estimators. Parameters ---------- predictions : array-like of shape (n_samples,) An array of base-estimator predictions. fallback_mask : array-like of shape (n_samples,) or (1, n_samples), or None Array mask indicating whether i-th sample was rejected by a fallback meta-estimator. Defaults to an empty matrix. Further stored as a sparse array by the ``fallback_mask`` property. Returns ------- FBNDArray Same as numpy ndarray but stores also additional fallback information. """ return FBNDArray(predictions, fallback_mask=fallback_mask)
[docs]class ENDArray(np.ndarray): """Numpy ndarray storing masks of predicted samples per estimator. ENDArrays are usually returned by ``predict``, ``predict_proba``, or ``predict_log_proba`` methods of ensemble meta-estimators. Parameters ---------- predictions : array-like of shape (n_samples,) An array of base-estimator predictions. ensemble_mask : array-like of shape (n_samples, n_estimators), or None Array mask indicating which estimator decided to make a prediction on the i-th sample Defaults to an empty matrix. Further stored as a sparse array by the ``ensemble_mask`` property. """ def __new__(cls, predictions, ensemble_mask=None): obj = np.asarray(predictions).view(cls) obj._ensemble_mask = cls._validate_ensemble_mask(ensemble_mask, len(obj)) return obj def __array_finalize__(self, obj): if obj is not None: empty = np.array([[]], dtype=np.bool_) self._ensemble_mask = getattr(obj, "_ensemble_mask", empty) @classmethod @validate_params( { "ensemble_mask": ["array-like", None], }, prefer_skip_nested_validation=True, ) def _validate_ensemble_mask(cls, ensemble_mask, num_predictions): """Returns CSR sparse ``ensemble_mask`` if it's a valid prediction mask.""" if isinstance(ensemble_mask, csr_matrix): return ensemble_mask elif ensemble_mask is None or ensemble_mask.shape[0] == 0: return csr_matrix(np.array([[]], dtype=np.bool_)) elif ensemble_mask.shape[0] != num_predictions: raise ValueError( f"Mask size = {len(ensemble_mask)} is greater than number of " f"elements of array = {num_predictions}" ) else: return csr_matrix(np.asarray(ensemble_mask, dtype=np.bool_)) @property def ensemble_mask(self): """Returns the sparse ensemble mask.""" return self._ensemble_mask @property def acceptance_rates(self): """Returns an ndarray of ratios of accepted samples per estimator.""" return self.ensemble_mask.toarray().mean(axis=0)
def earray(predictions, ensemble_mask=None): """Creates an ndarray of predictions that also stores an ensemble mask.""" return ENDArray(predictions, ensemble_mask=ensemble_mask)