Source code for neuroscope.mlp.activations

"""
Activation Functions Module
A comprehensive collection of activation functions and their derivatives for neural networks.
"""

import numpy as np



[docs]
class ActivationFunctions:
    """
    Comprehensive collection of activation functions and their derivatives.

    Provides implementations of popular activation functions used in neural networks,
    including their derivatives for backpropagation. All functions are numerically
    stable and handle edge cases appropriately.
    """


[docs]
    @staticmethod
    def sigmoid(x):
        """
        Compute sigmoid activation function.

        Applies the logistic sigmoid function that maps input to (0, 1) range.
        Includes numerical clipping to prevent overflow in exponential computation.

        Args:
            x (NDArray[np.float64]): Input array of any shape.

        Returns:
            NDArray[np.float64]: Sigmoid-activated values in range (0, 1).

        Example:
            >>> activated = ActivationFunctions.sigmoid(z)
            >>> # Values are now between 0 and 1
        """
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))



[docs]
    @staticmethod
    def sigmoid_derivative(x):
        sig = ActivationFunctions.sigmoid(x)
        return sig * (1 - sig)



[docs]
    @staticmethod
    def softmax(z):  # Z = (N, C)
        z_shift = z - np.max(z, axis=1, keepdims=True)
        exp_z = np.exp(z_shift)
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)



[docs]
    @staticmethod
    def tanh(x):
        return np.tanh(x)



[docs]
    @staticmethod
    def tanh_derivative(z):
        tanh_z = np.tanh(z)
        return 1 - tanh_z * tanh_z



[docs]
    @staticmethod
    def relu(x):
        """
        Compute ReLU (Rectified Linear Unit) activation.

        Applies the rectified linear activation function that outputs the input
        for positive values and zero for negative values. Most popular activation
        for hidden layers in modern neural networks.

        Args:
            x (NDArray[np.float64]): Input array of any shape.

        Returns:
            NDArray[np.float64]: ReLU-activated values (non-negative).

        Example:
            >>> activated = ActivationFunctions.relu(z)
            >>> # Negative values become 0, positive values unchanged
        """
        return np.maximum(0, x)



[docs]
    @staticmethod
    def relu_derivative(x):
        return (x > 0).astype(np.float64)



[docs]
    @staticmethod
    def leaky_relu(x, negative_slope=0.01):
        """
        Compute Leaky ReLU activation function.

        Variant of ReLU that allows small negative values to flow through,
        helping to mitigate the "dying ReLU" problem where neurons can become
        permanently inactive.

        Args:
            x (NDArray[np.float64]): Input array of any shape.
            negative_slope (float, optional): Slope for negative values. Defaults to 0.01.

        Returns:
            NDArray[np.float64]: Leaky ReLU-activated values.

        Example:
            >>> activated = ActivationFunctions.leaky_relu(z, negative_slope=0.01)
            >>> # Positive values unchanged, negative values scaled by 0.01
        """
        return np.where(x > 0, x, negative_slope * x)



[docs]
    @staticmethod
    def leaky_relu_derivative(x, negative_slope=0.01):
        grad = np.ones_like(x, dtype=np.float64)
        grad[x < 0] = negative_slope
        return grad



[docs]
    @staticmethod
    def selu(x):
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        return scale * np.where(x > 0, x, alpha * (np.exp(x) - 1))



[docs]
    @staticmethod
    def selu_derivative(x):
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        return scale * np.where(x > 0, 1, alpha * np.exp(x))



[docs]
    @staticmethod
    def inverted_dropout(x, rate=0.5, training=True):
        if not training or rate == 0.0:
            return x
        keep_prob = 1 - rate
        mask = np.random.binomial(1, keep_prob, size=x.shape) / keep_prob
        return x * mask



[docs]
    @staticmethod
    def inverted_dropout_with_mask(x, rate=0.5, training=True):
        """
        Inverted dropout that returns both output and mask for backpropagation.

        Args:
            x: Input array
            rate: Dropout probability (fraction of units to drop)
            training: Whether in training mode

        Returns:
            tuple: (output, mask) where mask includes the 1/(1-p) scaling
        """
        if not training or rate == 0.0:
            return x, None
        keep_prob = 1 - rate
        mask = np.random.binomial(1, keep_prob, size=x.shape) / keep_prob
        return x * mask, mask



[docs]
    @staticmethod
    def alpha_dropout(x, rate=0.5, training=True):
        if not training or rate == 0:
            return x
        # Constants from SELU
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        alpha0 = -scale * alpha  # ≈ -1.7581
        q = 1 - rate
        # Paper: a = (q + p*alpha'^2)^(-1/2) where q = 1-p and alpha' = alpha0
        a = ((q) + rate * alpha0**2) ** (-0.5)
        b = -a * alpha0 * rate
        x_float = x.astype(np.float32, copy=False)
        mask = np.random.binomial(1, q, size=x.shape).astype(np.float32)
        # a * (x * mask + alpha0 * (1 - mask)) + b
        out = a * (x_float * mask + alpha0 * (1 - mask)) + b
        return out



[docs]
    @staticmethod
    def alpha_dropout_with_mask(x, rate=0.5, training=True):
        """
        Alpha dropout that returns both output and mask for backpropagation.

        Based on "Self-Normalizing Neural Networks" (Klambauer et al., 2017).
        Alpha dropout maintains the self-normalizing property of SELU activations.

        Args:
            x: Input array
            rate: Dropout probability (p in the paper)
            training: Whether in training mode

        Returns:
            tuple: (output, mask_dict) where mask_dict contains the binary dropout
                mask and affine transform parameters for backpropagation
        """
        if not training or rate == 0:
            return x, None

        # Constants from SELU
        alpha = 1.6732632423543772848170429916717
        scale = 1.0507009873554804934193349852946
        alpha0 = -scale * alpha  # ≈ -1.7581
        q = 1 - rate
        # Paper: a = (q + p*alpha'^2)^(-1/2) where q = 1-p and alpha' = alpha0
        a = ((q) + rate * alpha0**2) ** (-0.5)
        b = -a * alpha0 * rate
        x_float = x.astype(np.float32, copy=False)
        mask = np.random.binomial(1, q, size=x.shape).astype(np.float32)
        # a * (x * mask + alpha0 * (1 - mask)) + b
        out = a * (x_float * mask + alpha0 * (1 - mask)) + b
        return out, {"mask": mask, "a": a, "alpha0": alpha0, "b": b}