Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
*.sw[op]
*.pkl
*pkl.gz
results/
*.pth
*.DS_Store
*.Rhistory

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
8 changes: 3 additions & 5 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ Recommended hardware: 4 NVIDIA Tesla P-100 GPUs or 8 NVIDIA Tesla K-80 GPUs
Instructions for preparing the data:
1. Download the dataset CUB_200_2011.tgz from http://www.vision.caltech.edu/visipedia/CUB-200-2011.html
2. Unpack CUB_200_2011.tgz
3. Crop the images using information from bounding_boxes.txt (included in the dataset)
4. Split the cropped images into training and test sets, using train_test_split.txt (included in the dataset)
5. Put the cropped training images in the directory "./datasets/cub200_cropped/train_cropped/"
6. Put the cropped test images in the directory "./datasets/cub200_cropped/test_cropped/"
7. Augment the training set using img_aug.py (included in this code package)
3. Run initial_images_processing.py to process dataset into proper form
-- Ex. python initial_images_processing.py /path/to/CUB_200_2011/images/
4. Augment the training set using img_aug.py (included in this code package)
-- this will create an augmented training set in the following directory:
"./datasets/cub200_cropped/train_cropped_augmented/"

Expand Down
17 changes: 17 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
name: protopnet
dependencies:
- python=3.7
- ipython
- numpy
- pandas
- pytorch
- torchvision
- cudatoolkit
- scipy
- opencv
- matplotlib
- pip:
- Augmentor
channels:
- pytorch
- conda-forge
4 changes: 3 additions & 1 deletion img_aug.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import Augmentor
import os


def makedir(path):
'''
if path does not exist in the file system, create it
Expand All @@ -17,7 +19,7 @@ def makedir(path):

for i in range(len(folders)):
fd = folders[i]
tfd = target_folders[i]
tfd = os.path.abspath(target_folders[i])
# rotation
p = Augmentor.Pipeline(source_directory=fd, output_directory=tfd)
p.rotate(probability=1, max_left_rotation=15, max_right_rotation=15)
Expand Down
62 changes: 62 additions & 0 deletions initial_images_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import argparse
from pathlib import Path

import cv2


def main():
parser = argparse.ArgumentParser()
parser.add_argument('images_dir', help='relative path to the images folder as described in CUB 200-2011')
args = parser.parse_args()

imgs_txt = Path(args.images_dir).joinpath('../images.txt')
bb_txt = Path(args.images_dir).joinpath('../bounding_boxes.txt')
train_test = Path(args.images_dir).joinpath('../train_test_split.txt')
train_dir = Path(__file__).parent.joinpath('datasets/cub200_cropped/train_cropped')
test_dir = Path(__file__).parent.joinpath('datasets/cub200_cropped/test_cropped')
if not train_dir.exists():
train_dir.mkdir(parents=True)
if not test_dir.exists():
test_dir.mkdir(parents=True)

imgs_to_data = []
with open(str(imgs_txt)) as imgs:
img_index = imgs.readlines()
with open(str(bb_txt)) as bb:
bb_index = bb.readlines()
with open(str(train_test)) as tt:
tt_index = tt.readlines()

for i, line in enumerate(img_index):
n1, filename = line.strip().split(' ')
n2, x, y, width, height = bb_index[i].strip().split(' ')
n3, is_train = tt_index[i].strip().split(' ')
if n1 != n2 or n2 != n3:
raise Exception('something went wrong and indexing on images.txt/bounding_boxes.txt/train_test_split.txt is off')
imgs_to_data.append([
Path(args.images_dir).joinpath(filename),
int(float(x)),
int(float(y)),
int(float(width)),
int(float(height)),
bool(int(is_train)),
])

for path, x, y, w, h, is_train in imgs_to_data:
im = cv2.imread(str(path))
# crop and save
im = im[y:y+h, x:x+w, :]

if is_train:
final_dir = train_dir.joinpath(path.parent.name)
else:
final_dir = test_dir.joinpath(path.parent.name)
final_path = final_dir.joinpath(path.name)
if not final_dir.exists():
final_dir.mkdir()

cv2.imwrite(str(final_path), im)


if __name__ == "__main__":
main()
80 changes: 47 additions & 33 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import shutil
import copy

import torch
import torch.utils.data
Expand All @@ -12,12 +13,14 @@

from helpers import makedir
import model
import push
from push import Pusher
import prune
import train_and_test as tnt
import save
from log import create_logger
from preprocess import mean, std, preprocess_input_function
from settings import *


parser = argparse.ArgumentParser()
parser.add_argument('-gpuid', nargs=1, type=str, default='0') # python3 main.py -gpuid=0,1,2,3
Expand All @@ -26,8 +29,6 @@
print(os.environ['CUDA_VISIBLE_DEVICES'])

# book keeping namings and code
from settings import base_architecture, img_size, prototype_shape, num_classes, \
prototype_activation_function, add_on_layers_type, experiment_run

base_architecture_type = re.match('^[a-z]*', base_architecture).group(0)

Expand All @@ -48,8 +49,14 @@
proto_bound_boxes_filename_prefix = 'bb'

# load the data
from settings import train_dir, test_dir, train_push_dir, \
train_batch_size, test_batch_size, train_push_batch_size


def perform_push(pusher, epoch_number):
if use_protobank:
pusher.push_protobank(epoch_number)
else:
pusher.push_orig(epoch_number)


normalize = transforms.Normalize(mean=mean,
std=std)
Expand Down Expand Up @@ -100,43 +107,63 @@
prototype_shape=prototype_shape,
num_classes=num_classes,
prototype_activation_function=prototype_activation_function,
add_on_layers_type=add_on_layers_type)
add_on_layers_type=add_on_layers_type,
bank_size=bank_size)
#if prototype_activation_function == 'linear':
# ppnet.set_last_layer_incorrect_connection(incorrect_strength=0)
ppnet = ppnet.cuda()
ppnet_multi = torch.nn.DataParallel(ppnet)
class_specific = True

# define optimizer
from settings import joint_optimizer_lrs, joint_lr_step_size
if use_protobank:
prototype_params = {
'params': ppnet.protobank_tensor,
'lr': joint_optimizer_lrs['prototype_vectors']
}
else:
prototype_params = {
'params': ppnet.prototype_vectors,
'lr': joint_optimizer_lrs['prototype_vectors']
}

joint_optimizer_specs = \
[{'params': ppnet.features.parameters(), 'lr': joint_optimizer_lrs['features'], 'weight_decay': 1e-3}, # bias are now also being regularized
{'params': ppnet.add_on_layers.parameters(), 'lr': joint_optimizer_lrs['add_on_layers'], 'weight_decay': 1e-3},
{'params': ppnet.prototype_vectors, 'lr': joint_optimizer_lrs['prototype_vectors']},
prototype_params,
]
joint_optimizer = torch.optim.Adam(joint_optimizer_specs)
joint_lr_scheduler = torch.optim.lr_scheduler.StepLR(joint_optimizer, step_size=joint_lr_step_size, gamma=0.1)

from settings import warm_optimizer_lrs
warm_optimizer_specs = \
[{'params': ppnet.add_on_layers.parameters(), 'lr': warm_optimizer_lrs['add_on_layers'], 'weight_decay': 1e-3},
{'params': ppnet.prototype_vectors, 'lr': warm_optimizer_lrs['prototype_vectors']},
prototype_params,
]
warm_optimizer = torch.optim.Adam(warm_optimizer_specs)

from settings import last_layer_optimizer_lr
last_layer_optimizer_specs = [{'params': ppnet.last_layer.parameters(), 'lr': last_layer_optimizer_lr}]
last_layer_optimizer = torch.optim.Adam(last_layer_optimizer_specs)

# weighting of different training losses
from settings import coefs

# number of training epochs, number of warm epochs, push start epoch, push epochs
from settings import num_train_epochs, num_warm_epochs, push_start, push_epochs

# train the model
log('start training')
import copy
pusher = Pusher(
train_push_loader,
prototype_network_parallel=ppnet_multi,
bank_size=bank_size,
class_specific=class_specific,
preprocess_input_function=preprocess_input_function, # normalize if needed
prototype_layer_stride=1,
dir_for_saving_prototypes=img_dir, # if not None, prototypes will be saved here
prototype_img_filename_prefix=prototype_img_filename_prefix,
prototype_self_act_filename_prefix=prototype_self_act_filename_prefix,
proto_bound_boxes_filename_prefix=proto_bound_boxes_filename_prefix,
save_prototype_class_identity=True,
log=log
)

if do_initial_push:
perform_push(pusher, 0)

for epoch in range(num_train_epochs):
log('epoch: \t{0}'.format(epoch))

Expand All @@ -156,19 +183,7 @@
target_accu=0.70, log=log)

if epoch >= push_start and epoch in push_epochs:
push.push_prototypes(
train_push_loader, # pytorch dataloader (must be unnormalized in [0,1])
prototype_network_parallel=ppnet_multi, # pytorch network with prototype_vectors
class_specific=class_specific,
preprocess_input_function=preprocess_input_function, # normalize if needed
prototype_layer_stride=1,
root_dir_for_saving_prototypes=img_dir, # if not None, prototypes will be saved here
epoch_number=epoch, # if not provided, prototypes saved previously will be overwritten
prototype_img_filename_prefix=prototype_img_filename_prefix,
prototype_self_act_filename_prefix=prototype_self_act_filename_prefix,
proto_bound_boxes_filename_prefix=proto_bound_boxes_filename_prefix,
save_prototype_class_identity=True,
log=log)
perform_push(pusher, epoch)
accu = tnt.test(model=ppnet_multi, dataloader=test_loader,
class_specific=class_specific, log=log)
save.save_model_w_condition(model=ppnet, model_dir=model_dir, model_name=str(epoch) + 'push', accu=accu,
Expand All @@ -184,6 +199,5 @@
class_specific=class_specific, log=log)
save.save_model_w_condition(model=ppnet, model_dir=model_dir, model_name=str(epoch) + '_' + str(i) + 'push', accu=accu,
target_accu=0.70, log=log)

logclose()

logclose()
Loading