NeuralNetwork-Simple/Dense.py at main · satyendhran/NeuralNetwork-Simple · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import numpy as np

from Layer import Layer


class Dense(Layer):
    """
    Fully connected (dense) layer with configurable weight initialization.

    Parameters
    ----------
    input_size : int
        Number of input features.
    output_size : int
        Number of output features.
    weight_init : {'xavier', 'he', 'lecun'}, optional
        Weight initialization method. Defaults to 'xavier'.

    Attributes
    ----------
    weights : np.ndarray
        Weight matrix of shape (output_size, input_size).
    bias : np.ndarray
        Bias vector of shape (output_size, 1).
    input : np.ndarray
        Cached input from forward pass.
    output : np.ndarray
        Output of the layer after forward pass.
    """

    def __init__(
        self, input_size: int, output_size: int, weight_init: str = "xavier"
    ) -> None:
        super().__init__()

        if weight_init not in ("xavier", "he", "lecun"):
            raise ValueError(
                f"Invalid weight_init '{weight_init}'. Choose from 'xavier', 'he', 'lecun'."
            )

        self.weights = self._initialize_weights(
            input_size, output_size, method=weight_init
        )
        self.bias = np.zeros((output_size, 1))

    @staticmethod
    def _initialize_weights(
        input_size: int, output_size: int, method: str
    ) -> np.ndarray:
        """Return initialized weight matrix."""
        if method == "he":
            std = np.sqrt(2.0 / input_size)
            return np.random.randn(output_size, input_size) * std
        elif method == "lecun":
            std = np.sqrt(1.0 / input_size)
            return np.random.randn(output_size, input_size) * std
        else:  # Xavier
            limit = np.sqrt(6.0 / (input_size + output_size))
            return np.random.uniform(-limit, limit, (output_size, input_size))

    def forward(self, inputs: np.ndarray) -> np.ndarray:
        """
        Perform the forward pass.

        Parameters
        ----------
        inputs : np.ndarray
            Input data of shape (input_size, batch_size).

        Returns
        -------
        np.ndarray
            Output of shape (output_size, batch_size).
        """
        self.input = inputs
        self.output = np.dot(self.weights, self.input) + self.bias
        return self.output

    def backward(self, output_gradient: np.ndarray, learning_rate: float) -> np.ndarray:
        """
        Compute backward pass and update weights and bias.

        Parameters
        ----------
        output_gradient : np.ndarray
            Gradient from the next layer of shape (output_size, batch_size).
        learning_rate : float
            Learning rate for parameter updates.

        Returns
        -------
        np.ndarray
            Gradient for the previous layer of shape (input_size, batch_size).
        """
        weights_gradient = np.dot(output_gradient, self.input.T)
        input_gradient = np.dot(self.weights.T, output_gradient)

        # In-place updates for performance
        self.weights -= learning_rate * weights_gradient
        self.bias -= learning_rate * np.sum(output_gradient, axis=1, keepdims=True)

        return input_gradient

    def __repr__(self) -> str:
        return f"Dense(input={self.weights.shape[1]}, output={self.weights.shape[0]}, weights={self.weights.shape}, bias={self.bias.shape})"

    def __str__(self) -> str:
        return f"Dense Layer: {self.weights.shape[1]} → {self.weights.shape[0]}"