From 15cbb3fea994441d65096d61cc5cf553aa82fc7f Mon Sep 17 00:00:00 2001 From: Joy Neal Date: Tue, 5 May 2020 18:31:06 +0800 Subject: [PATCH 1/2] bugfix of img_aug.py: Augmentor 0.2.8 needs absolute target path --- img_aug.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/img_aug.py b/img_aug.py index ca05b1954a..4a0fb4671d 100644 --- a/img_aug.py +++ b/img_aug.py @@ -17,7 +17,7 @@ def makedir(path): for i in range(len(folders)): fd = folders[i] - tfd = target_folders[i] + tfd = os.path.abspath(target_folders[i]) # rotation p = Augmentor.Pipeline(source_directory=fd, output_directory=tfd) p.rotate(probability=1, max_left_rotation=15, max_right_rotation=15) From d417ac4881e5384db386d9764df7a0ef0f0a6b28 Mon Sep 17 00:00:00 2001 From: Joy Neal Date: Tue, 5 May 2020 18:45:18 +0800 Subject: [PATCH 2/2] add img_crop.py to automate the cropping, splitting of CUB_200_2011 --- README.txt | 14 ++++++++------ img_crop.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 img_crop.py diff --git a/README.txt b/README.txt index 3355f5d134..468062f907 100644 --- a/README.txt +++ b/README.txt @@ -14,12 +14,14 @@ Recommended hardware: 4 NVIDIA Tesla P-100 GPUs or 8 NVIDIA Tesla K-80 GPUs Instructions for preparing the data: 1. Download the dataset CUB_200_2011.tgz from http://www.vision.caltech.edu/visipedia/CUB-200-2011.html -2. Unpack CUB_200_2011.tgz -3. Crop the images using information from bounding_boxes.txt (included in the dataset) -4. Split the cropped images into training and test sets, using train_test_split.txt (included in the dataset) -5. Put the cropped training images in the directory "./datasets/cub200_cropped/train_cropped/" -6. Put the cropped test images in the directory "./datasets/cub200_cropped/test_cropped/" -7. Augment the training set using img_aug.py (included in this code package) +2. Unpack CUB_200_2011.tgz into "../CUB_200_2011/" +3. Preprocess the CUB_200_2011 dataset using img_crop.py (included in this code package) + -- this will finish the following procedure: + a. Crop the images using information from bounding_boxes.txt (included in the dataset) + b. Split the cropped images into training and test sets, using train_test_split.txt (included in the dataset) + c. Put the cropped training images in the directory "./datasets/cub200_cropped/train_cropped/" + d. Put the cropped test images in the directory "./datasets/cub200_cropped/test_cropped/" +4. Augment the training set using img_aug.py (included in this code package) -- this will create an augmented training set in the following directory: "./datasets/cub200_cropped/train_cropped_augmented/" diff --git a/img_crop.py b/img_crop.py new file mode 100644 index 0000000000..c86c34630e --- /dev/null +++ b/img_crop.py @@ -0,0 +1,41 @@ +import os +import pandas as pd +import cv2 + +def makedir(path): + ''' + if path does not exist in the file system, create it + ''' + if not os.path.exists(path): + os.makedirs(path) + +source_dir = '../CUB_200_2011/' # original CUB_200_2011 dataset directory + +datasets_root_dir = './datasets/cub200_cropped/' +train_dir = datasets_root_dir + 'train_cropped/' +test_dir = datasets_root_dir + 'test_cropped/' +makedir(train_dir) +makedir(test_dir) + +classes = pd.read_csv(source_dir + 'classes.txt', sep=' ', names=['id', 'classname'], index_col='id') +for classname in classes['classname']: + makedir(train_dir + classname) + makedir(test_dir + classname) + +images = pd.read_csv(source_dir + 'images.txt', sep=' ', names=['id', 'path'], index_col='id') +bounding_boxes = pd.read_csv(source_dir + 'bounding_boxes.txt', sep=' ', names=['id', 'x', 'y', 'weight', 'height'], index_col='id') +train_test_split = pd.read_csv(source_dir + 'train_test_split.txt', sep=' ', names=['id', 'train'], index_col='id') + +for idx in images.index: + print(idx) + + imgpath, = images.loc[idx] + x, y, weight, height = bounding_boxes.loc[idx] + is_train, = train_test_split.loc[idx] + x, y, weight, height = int(x), int(y), int(weight), int(height) + + img = cv2.imread(source_dir + 'images/' + imgpath) + basepath = train_dir if is_train else test_dir + cv2.imwrite(basepath + imgpath, img[y:y+height, x:x+weight, :]) + +