Source code for neuroscope.mlp.metrics

"""
Metrics Module
Comprehensive evaluation metrics for regression and classification tasks.
"""

import numpy as np



[docs]
class Metrics:
    """
    Comprehensive collection of evaluation metrics for neural networks.

    Provides implementations of standard metrics for both regression and
    classification tasks. All metrics handle edge cases and provide
    meaningful results for model evaluation.
    """


[docs]
    @staticmethod
    def accuracy_multiclass(y_true, y_pred):
        """
        Compute multi-class classification accuracy.

        Calculates the fraction of correctly predicted samples for multi-class
        classification problems. Handles both sparse labels and one-hot encoded inputs.

        Args:
            y_true (NDArray[np.float64]): True class labels of shape (N,) for sparse labels
                or (N, C) for one-hot encoded.
            y_pred (NDArray[np.float64]): Predicted class probabilities of shape (N, C).

        Returns:
            float: Classification accuracy as a fraction (0.0 to 1.0).

        Example:
            >>> accuracy = Metrics.accuracy_multiclass(y_true, y_pred)
            >>> print(f"Accuracy: {accuracy:.2%}")
        """
        # y_pred: (N, C), y_true: (N,) or (N, C)
        pred_classes = np.argmax(y_pred, axis=1)
        if y_true.ndim == 1:
            true_classes = y_true
        else:
            true_classes = np.argmax(y_true, axis=1)
        return float(np.mean(pred_classes == true_classes))



[docs]
    @staticmethod
    def accuracy_binary(y_true, y_pred, thresh=0.5):
        """
        Compute binary classification accuracy.

        Calculates the fraction of correctly predicted samples for binary
        classification by applying a threshold to predicted probabilities.

        Args:
            y_true (NDArray[np.float64]): Binary labels (0/1) of shape (N,) or (N, 1).
            y_pred (NDArray[np.float64]): Predicted probabilities of shape (N,) or (N, 1).
            thresh (float, optional): Classification threshold. Defaults to 0.5.

        Returns:
            float: Binary classification accuracy as a fraction (0.0 to 1.0).

        Example:
            >>> accuracy = Metrics.accuracy_binary(y_true, y_pred, thresh=0.5)
            >>> print(f"Binary Accuracy: {accuracy:.2%}")
        """
        # y_pred: (N, C), y_true: (N,) or (N, C)
        preds = (y_pred >= thresh).astype(int)
        y_true = y_true.reshape(preds.shape)
        return float(np.mean(preds == y_true))



[docs]
    @staticmethod
    def mse(y_true, y_pred):
        """
        Compute mean squared error metric.

        Calculates the average squared differences between predicted and true values.
        Commonly used metric for regression problems.

        Args:
            y_true (NDArray[np.float64]): Ground truth values of shape (N,) or (N, 1).
            y_pred (NDArray[np.float64]): Predicted values of shape (N,) or (N, 1).

        Returns:
            float: Mean squared error (scalar).

        Example:
            >>> mse_score = Metrics.mse(y_true, y_pred)
            >>> print(f"MSE: {mse_score:.4f}")
        """
        y_true = np.asarray(y_true).flatten()
        y_pred = np.asarray(y_pred).flatten()
        return float(np.mean((y_true - y_pred) ** 2))



[docs]
    @staticmethod
    def rmse(y_true, y_pred):
        return float(np.sqrt(Metrics.mse(y_true, y_pred)))



[docs]
    @staticmethod
    def mae(y_true, y_pred):
        y_true = np.asarray(y_true).flatten()
        y_pred = np.asarray(y_pred).flatten()
        return float(np.mean(np.abs(y_true - y_pred)))



[docs]
    @staticmethod
    def r2_score(y_true, y_pred):
        """
        Compute coefficient of determination (R² score).

        Measures the proportion of variance in the dependent variable that is
        predictable from the independent variables. R² = 1 indicates perfect fit,
        R² = 0 indicates the model performs as well as predicting the mean.

        Args:
            y_true (NDArray[np.float64]): Ground truth values of shape (N,) or (N, 1).
            y_pred (NDArray[np.float64]): Predicted values of shape (N,) or (N, 1).

        Returns:
            float: R² score (can be negative for very poor fits).

        Example:
            >>> r2 = Metrics.r2_score(y_true, y_pred)
            >>> print(f"R² Score: {r2:.3f}")
        """
        y_true = np.asarray(y_true).flatten()
        y_pred = np.asarray(y_pred).flatten()
        ss_res = np.sum((y_true - y_pred) ** 2)
        ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
        if ss_tot == 0:
            return 1.0 if ss_res == 0 else 0.0
        return float(1.0 - (ss_res / ss_tot))


    @staticmethod
    def _get_classification_data(y_true, y_pred, threshold=0.5):
        """
        Convert predictions to class arrays and compute confusion matrix elements.

        Returns:
            tuple: (y_true_classes, y_pred_classes, num_classes, tp, fp, fn)
        """
        y_true = np.asarray(y_true)
        y_pred = np.asarray(y_pred)

        if y_pred.ndim == 1 or (y_pred.ndim == 2 and y_pred.shape[1] == 1):
            # Binary classification
            y_pred_classes = (y_pred.flatten() >= threshold).astype(int)
            y_true_classes = y_true.flatten().astype(int)
            num_classes = 2
        else:
            # Multi-class classification
            y_pred_classes = np.argmax(y_pred, axis=1)
            if y_true.ndim == 1:
                y_true_classes = y_true.astype(int)
            else:
                y_true_classes = np.argmax(y_true, axis=1)
            num_classes = max(np.max(y_true_classes), np.max(y_pred_classes)) + 1

        # Compute confusion matrix elements for all classes at once
        tp = np.zeros(num_classes)
        fp = np.zeros(num_classes)
        fn = np.zeros(num_classes)

        for i in range(num_classes):
            tp[i] = np.sum((y_true_classes == i) & (y_pred_classes == i))
            fp[i] = np.sum((y_true_classes != i) & (y_pred_classes == i))
            fn[i] = np.sum((y_true_classes == i) & (y_pred_classes != i))

        return y_true_classes, y_pred_classes, num_classes, tp, fp, fn

    @staticmethod
    def _apply_averaging(scores, y_true_classes, num_classes, average):
        """Apply averaging strategy to per-class scores."""
        if average == "macro":
            return float(np.mean(scores))
        elif average == "weighted":
            support = np.array(
                [np.sum(y_true_classes == i) for i in range(num_classes)]
            )
            total_support = np.sum(support)
            if total_support == 0:
                return 0.0
            weights = support / total_support
            return float(np.sum(scores * weights))
        else:
            return scores


[docs]
    @staticmethod
    def precision(y_true, y_pred, average="weighted", threshold=0.5):
        """
        Compute precision score: TP / (TP + FP)

        Args:
            y_true: True labels
            y_pred: Predicted probabilities or labels
            average: 'macro', 'weighted', or None for per-class scores
            threshold: Decision threshold for binary classification
        """
        y_true_classes, y_pred_classes, num_classes, tp, fp, fn = (
            Metrics._get_classification_data(y_true, y_pred, threshold)
        )

        precision_scores = np.zeros(num_classes)
        for i in range(num_classes):
            precision_scores[i] = (
                tp[i] / (tp[i] + fp[i]) if (tp[i] + fp[i]) > 0 else 0.0
            )

        return Metrics._apply_averaging(
            precision_scores, y_true_classes, num_classes, average
        )



[docs]
    @staticmethod
    def recall(y_true, y_pred, average="weighted", threshold=0.5):
        """
        Compute recall score: TP / (TP + FN)

        Args:
            y_true: True labels
            y_pred: Predicted probabilities or labels
            average: 'macro', 'weighted', or None for per-class scores
            threshold: Decision threshold for binary classification
        """
        y_true_classes, y_pred_classes, num_classes, tp, fp, fn = (
            Metrics._get_classification_data(y_true, y_pred, threshold)
        )

        recall_scores = np.zeros(num_classes)
        for i in range(num_classes):
            recall_scores[i] = tp[i] / (tp[i] + fn[i]) if (tp[i] + fn[i]) > 0 else 0.0

        return Metrics._apply_averaging(
            recall_scores, y_true_classes, num_classes, average
        )



[docs]
    @staticmethod
    def f1_score(y_true, y_pred, average="weighted", threshold=0.5):
        """
        Compute F1 score: 2 * (Precision * Recall) / (Precision + Recall)

        Args:
            y_true: True labels
            y_pred: Predicted probabilities or labels
            average: 'macro', 'weighted', or None for per-class scores
            threshold: Decision threshold for binary classification
        """
        y_true_classes, y_pred_classes, num_classes, tp, fp, fn = (
            Metrics._get_classification_data(y_true, y_pred, threshold)
        )

        f1_scores = np.zeros(num_classes)
        for i in range(num_classes):
            precision_i = tp[i] / (tp[i] + fp[i]) if (tp[i] + fp[i]) > 0 else 0.0
            recall_i = tp[i] / (tp[i] + fn[i]) if (tp[i] + fn[i]) > 0 else 0.0
            f1_scores[i] = (
                2 * precision_i * recall_i / (precision_i + recall_i)
                if (precision_i + recall_i) > 0
                else 0.0
            )

        return Metrics._apply_averaging(f1_scores, y_true_classes, num_classes, average)