|
7 | 7 | from collections import defaultdict |
8 | 8 |
|
9 | 9 | image_size = (28, 28) |
| 10 | +status_update_after = 5000 # images processed |
10 | 11 |
|
11 | 12 | user_class = dict() |
12 | | -user_data = defaultdict(list) |
| 13 | +user_data = defaultdict(dict) |
13 | 14 |
|
14 | 15 | parent_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) |
15 | 16 | image_paths = os.path.join(parent_path, 'data', 'raw_data', 'images_*', '*', '*', '*.png') |
|
22 | 23 | img = Image.open(character_file).resize(image_size, resample=Image.LANCZOS) |
23 | 24 | flattened_img = np.array(img.convert('L')).flatten() / 255. |
24 | 25 |
|
25 | | - user_data[user_id].append(flattened_img.tolist()) |
26 | 26 | if user_id not in user_class: |
27 | 27 | user_class[user_id] = character_class |
| 28 | + user_data[user_id]['x'] = list() |
| 29 | + user_data[user_id]['y'] = list() |
| 30 | + user_data[user_id]['x'].append(flattened_img.tolist()) |
| 31 | + user_data[user_id]['y'].append(user_id) |
28 | 32 |
|
29 | | - if (i+1) % 1000 == 0: |
| 33 | + if (i+1) % status_update_after == 0: |
30 | 34 | print ("{} images converted".format(i+1)) |
31 | 35 |
|
32 | | -all_data = { 'users': list(user_class.items()), 'user_data': user_data } |
| 36 | +all_data = dict() |
| 37 | +all_data['users'] = list(user_class.keys()) |
| 38 | +all_data['num_samples'] = [ len(user_data[x]['x']) for x in all_data['users'] ] |
| 39 | +all_data['user_data'] = user_data |
33 | 40 |
|
34 | 41 | file_name = 'all_data.json' |
35 | 42 | file_path = os.path.join(parent_path, 'data', 'all_data', file_name) |
|
0 commit comments