Skip to content

Commit 683579c

Browse files
authored
Merge pull request #123 from Namesakenberg/feat/vgg16-transfer-learning
2 parents fbeacfb + dca5156 commit 683579c

2 files changed

Lines changed: 349 additions & 1 deletion

File tree

pydeepflow/pretrained/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@
66
"""
77

88
from .vgg16 import VGG16
9+
from .vgg19 import VGG19
910

10-
__all__ = ['VGG16']
11+
__all__ = ['VGG16', 'VGG19']

pydeepflow/pretrained/vgg19.py

Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
"""
2+
VGG19 Architecture Implementation for PyDeepFlow
3+
4+
This module implements the VGG19 deep convolutional neural network architecture
5+
for transfer learning and feature extraction tasks.
6+
7+
VGG19 Architecture:
8+
- Input: 224x224x3 RGB images
9+
- 5 convolutional blocks with max pooling
10+
- 3 fully connected layers (optional with include_top)
11+
- Total: 16 conv layers + 3 FC layers = 19 layers with learnable parameters
12+
13+
Reference:
14+
Simonyan, K., & Zisserman, A. (2014). Very Deep Convolutional Networks for
15+
Large-Scale Image Recognition. arXiv:1409.1556
16+
"""
17+
18+
import numpy as np
19+
from pydeepflow.model import ConvLayer, MaxPooling2D, Flatten
20+
from pydeepflow.device import Device
21+
from pydeepflow.weight_initialization import WeightInitializer
22+
from pydeepflow.activations import activation
23+
import warnings
24+
25+
26+
class VGG19:
27+
"""
28+
VGG19 Convolutional Neural Network for Transfer Learning.
29+
30+
This class implements the VGG19 architecture, which consists of:
31+
- Block 1: 2 conv layers (64 filters) + max pool
32+
- Block 2: 2 conv layers (128 filters) + max pool
33+
- Block 3: 4 conv layers (256 filters) + max pool
34+
- Block 4: 4 conv layers (512 filters) + max pool
35+
- Block 5: 4 conv layers (512 filters) + max pool
36+
- Flatten layer (if include_top=True)
37+
- FC layer 1: 4096 neurons (if include_top=True)
38+
- FC layer 2: 4096 neurons (if include_top=True)
39+
- FC layer 3: num_classes neurons (output, if include_top=True)
40+
41+
Convolutional layers use 3x3 kernels, stride=1, padding=1, ReLU activation.
42+
Max pooling layers use 2x2 window with stride=2.
43+
44+
Parameters
45+
----------
46+
num_classes : int, optional
47+
Number of output classes for classification. Default is 1000 (ImageNet).
48+
input_shape : tuple, optional
49+
Input image shape (height, width, channels). Default is (224, 224, 3).
50+
use_gpu : bool, optional
51+
Whether to use GPU acceleration. Default is False.
52+
include_top : bool, optional
53+
Whether to include the fully connected layers at the top. Default is True.
54+
Set to False for feature extraction.
55+
weights : str or None, optional
56+
Path to pretrained weights file or None for random initialization.
57+
Default is None.
58+
freeze_features : bool, optional
59+
If True, freeze convolutional layers for feature extraction mode.
60+
Default is False.
61+
"""
62+
63+
def __init__(self, num_classes=1000, input_shape=(224, 224, 3),
64+
use_gpu=False, include_top=True, weights=None,
65+
freeze_features=False):
66+
self.device = Device(use_gpu=use_gpu)
67+
self.num_classes = num_classes
68+
self.input_shape = input_shape
69+
self.include_top = include_top
70+
self.frozen_layers = set()
71+
72+
# Validate input shape
73+
if len(input_shape) != 3:
74+
raise ValueError(f"input_shape must be 3D (H, W, C), got {input_shape}")
75+
76+
if input_shape[2] != 3:
77+
warnings.warn(
78+
f"VGG19 was designed for RGB images (3 channels), "
79+
f"but got {input_shape[2]} channels. This may affect performance."
80+
)
81+
82+
# Build the architecture
83+
self.layers = []
84+
self.feature_layers = []
85+
self.classifier_layers = []
86+
87+
self._build_architecture()
88+
89+
# Load pretrained weights if provided
90+
if weights is not None:
91+
self.load_weights(weights)
92+
93+
# Freeze feature layers if requested
94+
if freeze_features:
95+
self.freeze_feature_layers()
96+
97+
def _build_architecture(self):
98+
"""Build the complete VGG19 architecture."""
99+
H, W, C = self.input_shape
100+
101+
# ================================
102+
# BLOCK 1: 2x Conv(64) + MaxPool
103+
# ================================
104+
conv1_1 = ConvLayer(in_channels=C, out_channels=64, kernel_size=3,
105+
stride=1, padding=1, device=self.device,
106+
activation='relu', weight_init='he_normal')
107+
self.layers.append(conv1_1); self.feature_layers.append(conv1_1)
108+
conv1_2 = ConvLayer(in_channels=64, out_channels=64, kernel_size=3,
109+
stride=1, padding=1, device=self.device,
110+
activation='relu', weight_init='he_normal')
111+
self.layers.append(conv1_2); self.feature_layers.append(conv1_2)
112+
pool1 = MaxPooling2D(pool_size=(2, 2), stride=2)
113+
self.layers.append(pool1); self.feature_layers.append(pool1)
114+
H, W = H // 2, W // 2
115+
116+
# ================================
117+
# BLOCK 2: 2x Conv(128) + MaxPool
118+
# ================================
119+
conv2_1 = ConvLayer(in_channels=64, out_channels=128, kernel_size=3,
120+
stride=1, padding=1, device=self.device,
121+
activation='relu', weight_init='he_normal')
122+
self.layers.append(conv2_1); self.feature_layers.append(conv2_1)
123+
conv2_2 = ConvLayer(in_channels=128, out_channels=128, kernel_size=3,
124+
stride=1, padding=1, device=self.device,
125+
activation='relu', weight_init='he_normal')
126+
self.layers.append(conv2_2); self.feature_layers.append(conv2_2)
127+
pool2 = MaxPooling2D(pool_size=(2, 2), stride=2)
128+
self.layers.append(pool2); self.feature_layers.append(pool2)
129+
H, W = H // 2, W // 2
130+
131+
# ================================
132+
# BLOCK 3: 4x Conv(256) + MaxPool
133+
# ================================
134+
in_c = 128
135+
for _ in range(4):
136+
conv = ConvLayer(in_channels=in_c, out_channels=256, kernel_size=3,
137+
stride=1, padding=1, device=self.device,
138+
activation='relu', weight_init='he_normal')
139+
self.layers.append(conv); self.feature_layers.append(conv)
140+
in_c = 256
141+
pool3 = MaxPooling2D(pool_size=(2, 2), stride=2)
142+
self.layers.append(pool3); self.feature_layers.append(pool3)
143+
H, W = H // 2, W // 2
144+
145+
# ================================
146+
# BLOCK 4: 4x Conv(512) + MaxPool
147+
# ================================
148+
in_c = 256
149+
for _ in range(4):
150+
conv = ConvLayer(in_channels=in_c, out_channels=512, kernel_size=3,
151+
stride=1, padding=1, device=self.device,
152+
activation='relu', weight_init='he_normal')
153+
self.layers.append(conv); self.feature_layers.append(conv)
154+
in_c = 512
155+
pool4 = MaxPooling2D(pool_size=(2, 2), stride=2)
156+
self.layers.append(pool4); self.feature_layers.append(pool4)
157+
H, W = H // 2, W // 2
158+
159+
# ================================
160+
# BLOCK 5: 4x Conv(512) + MaxPool
161+
# ================================
162+
in_c = 512
163+
for _ in range(4):
164+
conv = ConvLayer(in_channels=in_c, out_channels=512, kernel_size=3,
165+
stride=1, padding=1, device=self.device,
166+
activation='relu', weight_init='he_normal')
167+
self.layers.append(conv); self.feature_layers.append(conv)
168+
in_c = 512
169+
pool5 = MaxPooling2D(pool_size=(2, 2), stride=2)
170+
self.layers.append(pool5); self.feature_layers.append(pool5)
171+
H, W = H // 2, W // 2
172+
173+
# ================================
174+
# FULLY CONNECTED (Classifier)
175+
# ================================
176+
if self.include_top:
177+
flatten = Flatten(); self.layers.append(flatten)
178+
flattened_size = H * W * 512
179+
initializer = WeightInitializer(device=self.device, mode='auto', bias_init='auto')
180+
181+
fc1_w, fc1_b, _ = initializer.initialize_dense_layer(
182+
input_dim=flattened_size, output_dim=4096, activation='relu')
183+
fc1 = {
184+
'W': self.device.array(fc1_w),
185+
'b': self.device.array(fc1_b.reshape(1, -1)),
186+
'activation': 'relu'
187+
}
188+
self.layers.append(fc1); self.classifier_layers.append(fc1)
189+
190+
fc2_w, fc2_b, _ = initializer.initialize_dense_layer(
191+
input_dim=4096, output_dim=4096, activation='relu')
192+
fc2 = {
193+
'W': self.device.array(fc2_w),
194+
'b': self.device.array(fc2_b.reshape(1, -1)),
195+
'activation': 'relu'
196+
}
197+
self.layers.append(fc2); self.classifier_layers.append(fc2)
198+
199+
output_activation = 'softmax' if self.num_classes > 1 else 'sigmoid'
200+
fc3_w, fc3_b, _ = initializer.initialize_dense_layer(
201+
input_dim=4096, output_dim=self.num_classes, activation=output_activation)
202+
fc3 = {
203+
'W': self.device.array(fc3_w),
204+
'b': self.device.array(fc3_b.reshape(1, -1)),
205+
'activation': output_activation
206+
}
207+
self.layers.append(fc3); self.classifier_layers.append(fc3)
208+
209+
def forward(self, X, training=False):
210+
"""Forward pass through the network."""
211+
if X.ndim != 4:
212+
raise ValueError(f"Input must be 4D (N, H, W, C), got shape {X.shape}")
213+
if X.shape[1:] != self.input_shape:
214+
warnings.warn(
215+
f"Input shape {X.shape[1:]} differs from expected {self.input_shape}. This may affect performance."
216+
)
217+
current_output = X
218+
for layer in self.layers:
219+
if isinstance(layer, (ConvLayer, MaxPooling2D, Flatten)):
220+
current_output = layer.forward(current_output)
221+
elif isinstance(layer, dict) and 'W' in layer:
222+
Z = self.device.dot(current_output, layer['W']) + layer['b']
223+
current_output = activation(Z, layer['activation'], self.device)
224+
return current_output
225+
226+
def predict(self, X):
227+
"""Make predictions on input data (inference mode)."""
228+
return self.forward(X, training=False)
229+
230+
def freeze_feature_layers(self):
231+
"""Freeze all convolutional layers for feature extraction."""
232+
for i, layer in enumerate(self.layers):
233+
if isinstance(layer, ConvLayer):
234+
self.frozen_layers.add(i)
235+
print(f"Frozen {len(self.frozen_layers)} convolutional layers for feature extraction.")
236+
237+
def unfreeze_layers(self, layer_names=None, num_layers=None):
238+
"""Unfreeze specific layers or the last N conv layers for fine-tuning."""
239+
if layer_names is not None:
240+
for idx in layer_names:
241+
if idx in self.frozen_layers:
242+
self.frozen_layers.remove(idx)
243+
print(f"Unfrozen layers: {layer_names}")
244+
elif num_layers is not None:
245+
conv_indices = [i for i, layer in enumerate(self.layers) if isinstance(layer, ConvLayer)]
246+
to_unfreeze = conv_indices[-num_layers:] if num_layers <= len(conv_indices) else conv_indices
247+
for idx in to_unfreeze:
248+
if idx in self.frozen_layers:
249+
self.frozen_layers.remove(idx)
250+
print(f"Unfrozen last {len(to_unfreeze)} convolutional layers.")
251+
else:
252+
self.frozen_layers.clear()
253+
print("Unfrozen all layers.")
254+
255+
def get_trainable_params(self):
256+
"""Return list of all trainable parameter arrays (not frozen)."""
257+
trainable = []
258+
for i, layer in enumerate(self.layers):
259+
if i not in self.frozen_layers:
260+
if isinstance(layer, ConvLayer):
261+
trainable.extend([layer.params['W'], layer.params['b']])
262+
elif isinstance(layer, dict) and 'W' in layer:
263+
trainable.extend([layer['W'], layer['b']])
264+
return trainable
265+
266+
def summary(self):
267+
"""Print a summary of the VGG19 architecture."""
268+
print("=" * 80)
269+
print("VGG19 Architecture Summary")
270+
print("=" * 80)
271+
print(f"Input Shape: {self.input_shape}")
272+
print(f"Number of Classes: {self.num_classes}")
273+
print(f"Include Top (FC Layers): {self.include_top}")
274+
print(f"Frozen Layers: {len(self.frozen_layers)}")
275+
print("=" * 80)
276+
print(f"{'Layer':<30} {'Output Shape':<25} {'Params':<15}")
277+
print("-" * 80)
278+
279+
H, W, C = self.input_shape
280+
total_params = 0
281+
for i, layer in enumerate(self.layers):
282+
frozen_mark = " [FROZEN]" if i in self.frozen_layers else ""
283+
if isinstance(layer, ConvLayer):
284+
out_c = layer.out_channels
285+
params = (layer.Fh * layer.Fw * layer.in_channels * out_c) + out_c
286+
output_shape = f"({H}, {W}, {out_c})"
287+
layer_name = f"Conv2D_{i}{frozen_mark}"
288+
print(f"{layer_name:<30} {output_shape:<25} {params:<15,}")
289+
total_params += params
290+
C = out_c
291+
elif isinstance(layer, MaxPooling2D):
292+
H, W = H // layer.stride, W // layer.stride
293+
output_shape = f"({H}, {W}, {C})"
294+
layer_name = f"MaxPooling2D_{i}"
295+
print(f"{layer_name:<30} {output_shape:<25} {'0':<15}")
296+
elif isinstance(layer, Flatten):
297+
flat_size = H * W * C
298+
output_shape = f"({flat_size},)"
299+
layer_name = "Flatten"
300+
print(f"{layer_name:<30} {output_shape:<25} {'0':<15}")
301+
elif isinstance(layer, dict) and 'W' in layer:
302+
in_size = layer['W'].shape[0]
303+
out_size = layer['W'].shape[1]
304+
params = (in_size * out_size) + out_size
305+
output_shape = f"({out_size},)"
306+
layer_name = f"Dense_{i}{frozen_mark}"
307+
print(f"{layer_name:<30} {output_shape:<25} {params:<15,}")
308+
total_params += params
309+
310+
print("=" * 80)
311+
print(f"Total Parameters: {total_params:,}")
312+
trainable_params = sum(np.prod(p.shape) for p in self.get_trainable_params())
313+
print(f"Trainable Parameters: {trainable_params:,}")
314+
print(f"Non-trainable Parameters: {total_params - trainable_params:,}")
315+
print("=" * 80)
316+
317+
def save_weights(self, filepath):
318+
"""Save model weights to a .npy file."""
319+
weights_dict = {}
320+
for i, layer in enumerate(self.layers):
321+
if isinstance(layer, ConvLayer):
322+
weights_dict[f'conv_{i}_W'] = self.device.asnumpy(layer.params['W'])
323+
weights_dict[f'conv_{i}_b'] = self.device.asnumpy(layer.params['b'])
324+
elif isinstance(layer, dict) and 'W' in layer:
325+
weights_dict[f'dense_{i}_W'] = self.device.asnumpy(layer['W'])
326+
weights_dict[f'dense_{i}_b'] = self.device.asnumpy(layer['b'])
327+
np.save(filepath, weights_dict)
328+
print(f"Model weights saved to {filepath}")
329+
330+
def load_weights(self, filepath):
331+
"""Load model weights from a .npy file."""
332+
try:
333+
weights_dict = np.load(filepath, allow_pickle=True).item()
334+
for i, layer in enumerate(self.layers):
335+
if isinstance(layer, ConvLayer):
336+
if f'conv_{i}_W' in weights_dict:
337+
layer.params['W'] = self.device.array(weights_dict[f'conv_{i}_W'])
338+
layer.params['b'] = self.device.array(weights_dict[f'conv_{i}_b'])
339+
elif isinstance(layer, dict) and 'W' in layer:
340+
if f'dense_{i}_W' in weights_dict:
341+
layer['W'] = self.device.array(weights_dict[f'dense_{i}_W'])
342+
layer['b'] = self.device.array(weights_dict[f'dense_{i}_b'])
343+
print(f"Model weights loaded from {filepath}")
344+
except FileNotFoundError:
345+
raise FileNotFoundError(f"Weights file not found: {filepath}")
346+
except Exception as e:
347+
raise RuntimeError(f"Error loading weights: {str(e)}")

0 commit comments

Comments
 (0)