"""Extensions to numpy ndarrays supporting fallback masks."""
__all__ = (
"ENDArray",
"fbarray",
"FBNDArray",
)
import numpy as np
from scipy.sparse import coo_array, csr_matrix
from ..utils._legacy import validate_params
[docs]class FBNDArray(np.ndarray):
"""Same as numpy ndarray but stores also additional fallback information.
FBNDArrays are usually returned by ``predict``, ``predict_proba``, or
``predict_log_proba`` methods of fallback meta-estimators.
Parameters
----------
predictions : array-like of shape (n_samples,)
An array of base-estimator predictions.
fallback_mask : array-like of shape (n_samples,) or (1, n_samples), or None
Array mask indicating whether i-th sample was rejected by a
fallback meta-estimator. If None, defaults to all-zeros.
Further stored as sparse arrays by the ``fallback_mask`` property.
Examples
--------
>>> import numpy as np
>>> from skfb.core import array as ska
>>> y = np.array([0, 1, 0, 1, 0, 1])
>>> f = [0, 0, 0, 0, 1, 1]
>>> y_pred = ska.fbarray(y, f)
>>> y_pred
FBNDArray([0, 1, 0, 1, 0, 1])
>>> y_pred.get_dense_fallback_mask()
array([False, False, False, False, True, True])
>>> y_pred.get_dense_neg_fallback_mask()
array([ True, True, False, False, False, False])
>>> y_pred.fallback_rate
0.3333333333333333
>>> y_pred.as_comb(fallback_label=2)
array([0, 1, 0, 1, 2, 2])
"""
def __new__(cls, predictions, fallback_mask=None):
obj = np.asarray(predictions).view(cls)
obj._fallback_mask = cls._validate_fallback_mask(fallback_mask, len(obj))
return obj
def __array_finalize__(self, obj):
if obj is not None:
empty = np.array([], dtype=np.bool_)
self._fallback_mask = getattr(obj, "_fallback_mask", empty)
@classmethod
@validate_params(
{
"fallback_mask": ["array-like", None, list],
},
prefer_skip_nested_validation=True,
)
def _validate_fallback_mask(cls, fallback_mask, num_predictions):
"""Returns COO sparse ``fallback_mask`` if it's a valid prediction mask."""
if fallback_mask is None or len(fallback_mask) == 0:
return coo_array(np.array([], dtype=np.bool_))
if len(fallback_mask) != num_predictions:
raise ValueError(
f"Mask size = {len(fallback_mask)} is greater than number of "
f"elements of array = {num_predictions}"
)
else:
return coo_array(np.asarray(fallback_mask, dtype=np.bool_))
@property
def fallback_mask(self):
"""Returns the sparse fallback mask."""
return self._fallback_mask
@fallback_mask.setter
def fallback_mask(self, fallback_mask):
"""Sets a new sparse fallback mask."""
self._fallback_mask = self._validate_fallback_mask(fallback_mask, len(self))
@property
def fallback_rate(self):
"""Calculates the sparsity of the fallback mask."""
if len(self) == 0:
return 0.0
return self.fallback_mask.count_nonzero() / len(self)
[docs] def get_dense_fallback_mask(self):
"""Converts ``fallback_mask`` to 1D ndarray."""
mask = self.fallback_mask.toarray()
if mask.ndim == 2: # For scipy<1.13
mask = mask[0]
return mask
[docs] def get_dense_neg_fallback_mask(self):
"""Returns negation of ``fallback_mask`` (acceptance mask) as 1D ndarray."""
return ~self.get_dense_fallback_mask()
[docs] def as_comb(self, fallback_label=-1):
"""Returns an ndarray of both predictions and fallbacks based on the mask."""
y_comb = np.asarray(self).copy()
y_comb[self.get_dense_fallback_mask()] = fallback_label
return y_comb
def fbarray(predictions, fallback_mask=None):
"""Creates an ndarray of predictions that also stores fallback information.
FBNDArrays are usually returned by ``predict``, ``predict_proba``, or
``predict_log_proba`` methods of fallback meta-estimators.
Parameters
----------
predictions : array-like of shape (n_samples,)
An array of base-estimator predictions.
fallback_mask : array-like of shape (n_samples,) or (1, n_samples), or None
Array mask indicating whether i-th sample was rejected by a
fallback meta-estimator. Defaults to an empty matrix.
Further stored as a sparse array by the ``fallback_mask`` property.
Returns
-------
FBNDArray
Same as numpy ndarray but stores also additional fallback information.
"""
return FBNDArray(predictions, fallback_mask=fallback_mask)
[docs]class ENDArray(np.ndarray):
"""Numpy ndarray storing masks of predicted samples per estimator.
ENDArrays are usually returned by ``predict``, ``predict_proba``, or
``predict_log_proba`` methods of ensemble meta-estimators.
Parameters
----------
predictions : array-like of shape (n_samples,)
An array of base-estimator predictions.
ensemble_mask : array-like of shape (n_samples, n_estimators), or None
Array mask indicating which estimator decided to make a prediction on the i-th sample
Defaults to an empty matrix. Further stored as a sparse array by the
``ensemble_mask`` property.
"""
def __new__(cls, predictions, ensemble_mask=None):
obj = np.asarray(predictions).view(cls)
obj._ensemble_mask = cls._validate_ensemble_mask(ensemble_mask, len(obj))
return obj
def __array_finalize__(self, obj):
if obj is not None:
empty = np.array([[]], dtype=np.bool_)
self._ensemble_mask = getattr(obj, "_ensemble_mask", empty)
@classmethod
@validate_params(
{
"ensemble_mask": ["array-like", None],
},
prefer_skip_nested_validation=True,
)
def _validate_ensemble_mask(cls, ensemble_mask, num_predictions):
"""Returns CSR sparse ``ensemble_mask`` if it's a valid prediction mask."""
if isinstance(ensemble_mask, csr_matrix):
return ensemble_mask
elif ensemble_mask is None or ensemble_mask.shape[0] == 0:
return csr_matrix(np.array([[]], dtype=np.bool_))
elif ensemble_mask.shape[0] != num_predictions:
raise ValueError(
f"Mask size = {len(ensemble_mask)} is greater than number of "
f"elements of array = {num_predictions}"
)
else:
return csr_matrix(np.asarray(ensemble_mask, dtype=np.bool_))
@property
def ensemble_mask(self):
"""Returns the sparse ensemble mask."""
return self._ensemble_mask
@property
def acceptance_rates(self):
"""Returns an ndarray of ratios of accepted samples per estimator."""
return self.ensemble_mask.toarray().mean(axis=0)
def earray(predictions, ensemble_mask=None):
"""Creates an ndarray of predictions that also stores an ensemble mask."""
return ENDArray(predictions, ensemble_mask=ensemble_mask)