"""
Activation Functions Module
A comprehensive collection of activation functions and their derivatives for neural networks.
"""
import numpy as np
[docs]
class ActivationFunctions:
"""
Comprehensive collection of activation functions and their derivatives.
Provides implementations of popular activation functions used in neural networks,
including their derivatives for backpropagation. All functions are numerically
stable and handle edge cases appropriately.
"""
[docs]
@staticmethod
def sigmoid(x):
"""
Compute sigmoid activation function.
Applies the logistic sigmoid function that maps input to (0, 1) range.
Includes numerical clipping to prevent overflow in exponential computation.
Args:
x (NDArray[np.float64]): Input array of any shape.
Returns:
NDArray[np.float64]: Sigmoid-activated values in range (0, 1).
Example:
>>> activated = ActivationFunctions.sigmoid(z)
>>> # Values are now between 0 and 1
"""
return 1 / (1 + np.exp(-np.clip(x, -500, 500)))
[docs]
@staticmethod
def sigmoid_derivative(x):
sig = ActivationFunctions.sigmoid(x)
return sig * (1 - sig)
[docs]
@staticmethod
def softmax(z): # Z = (N, C)
z_shift = z - np.max(z, axis=1, keepdims=True)
exp_z = np.exp(z_shift)
return exp_z / np.sum(exp_z, axis=1, keepdims=True)
[docs]
@staticmethod
def tanh(x):
return np.tanh(x)
[docs]
@staticmethod
def tanh_derivative(z):
tanh_z = np.tanh(z)
return 1 - tanh_z * tanh_z
[docs]
@staticmethod
def relu(x):
"""
Compute ReLU (Rectified Linear Unit) activation.
Applies the rectified linear activation function that outputs the input
for positive values and zero for negative values. Most popular activation
for hidden layers in modern neural networks.
Args:
x (NDArray[np.float64]): Input array of any shape.
Returns:
NDArray[np.float64]: ReLU-activated values (non-negative).
Example:
>>> activated = ActivationFunctions.relu(z)
>>> # Negative values become 0, positive values unchanged
"""
return np.maximum(0, x)
[docs]
@staticmethod
def relu_derivative(x):
return (x > 0).astype(np.float64)
[docs]
@staticmethod
def leaky_relu(x, negative_slope=0.01):
"""
Compute Leaky ReLU activation function.
Variant of ReLU that allows small negative values to flow through,
helping to mitigate the "dying ReLU" problem where neurons can become
permanently inactive.
Args:
x (NDArray[np.float64]): Input array of any shape.
negative_slope (float, optional): Slope for negative values. Defaults to 0.01.
Returns:
NDArray[np.float64]: Leaky ReLU-activated values.
Example:
>>> activated = ActivationFunctions.leaky_relu(z, negative_slope=0.01)
>>> # Positive values unchanged, negative values scaled by 0.01
"""
return np.where(x > 0, x, negative_slope * x)
[docs]
@staticmethod
def leaky_relu_derivative(x, negative_slope=0.01):
grad = np.ones_like(x, dtype=np.float64)
grad[x < 0] = negative_slope
return grad
[docs]
@staticmethod
def selu(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale * np.where(x > 0, x, alpha * (np.exp(x) - 1))
[docs]
@staticmethod
def selu_derivative(x):
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
return scale * np.where(x > 0, 1, alpha * np.exp(x))
[docs]
@staticmethod
def inverted_dropout(x, rate=0.5, training=True):
if not training or rate == 0.0:
return x
keep_prob = 1 - rate
mask = np.random.binomial(1, keep_prob, size=x.shape) / keep_prob
return x * mask
[docs]
@staticmethod
def inverted_dropout_with_mask(x, rate=0.5, training=True):
"""
Inverted dropout that returns both output and mask for backpropagation.
Args:
x: Input array
rate: Dropout probability (fraction of units to drop)
training: Whether in training mode
Returns:
tuple: (output, mask) where mask includes the 1/(1-p) scaling
"""
if not training or rate == 0.0:
return x, None
keep_prob = 1 - rate
mask = np.random.binomial(1, keep_prob, size=x.shape) / keep_prob
return x * mask, mask
[docs]
@staticmethod
def alpha_dropout(x, rate=0.5, training=True):
if not training or rate == 0:
return x
# Constants from SELU
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
alpha0 = -scale * alpha # ≈ -1.7581
q = 1 - rate
# Paper: a = (q + p*alpha'^2)^(-1/2) where q = 1-p and alpha' = alpha0
a = ((q) + rate * alpha0**2) ** (-0.5)
b = -a * alpha0 * rate
x_float = x.astype(np.float32, copy=False)
mask = np.random.binomial(1, q, size=x.shape).astype(np.float32)
# a * (x * mask + alpha0 * (1 - mask)) + b
out = a * (x_float * mask + alpha0 * (1 - mask)) + b
return out
[docs]
@staticmethod
def alpha_dropout_with_mask(x, rate=0.5, training=True):
"""
Alpha dropout that returns both output and mask for backpropagation.
Based on "Self-Normalizing Neural Networks" (Klambauer et al., 2017).
Alpha dropout maintains the self-normalizing property of SELU activations.
Args:
x: Input array
rate: Dropout probability (p in the paper)
training: Whether in training mode
Returns:
tuple: (output, mask_dict) where mask_dict contains the binary dropout
mask and affine transform parameters for backpropagation
"""
if not training or rate == 0:
return x, None
# Constants from SELU
alpha = 1.6732632423543772848170429916717
scale = 1.0507009873554804934193349852946
alpha0 = -scale * alpha # ≈ -1.7581
q = 1 - rate
# Paper: a = (q + p*alpha'^2)^(-1/2) where q = 1-p and alpha' = alpha0
a = ((q) + rate * alpha0**2) ** (-0.5)
b = -a * alpha0 * rate
x_float = x.astype(np.float32, copy=False)
mask = np.random.binomial(1, q, size=x.shape).astype(np.float32)
# a * (x * mask + alpha0 * (1 - mask)) + b
out = a * (x_float * mask + alpha0 * (1 - mask)) + b
return out, {"mask": mask, "a": a, "alpha0": alpha0, "b": b}