-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcoco2tsv.py
More file actions
65 lines (54 loc) · 1.65 KB
/
coco2tsv.py
File metadata and controls
65 lines (54 loc) · 1.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
"""
Generate tsv files from the coco dataset
Download the caption annotations from http://mscoco.org/dataset/#download
Assume the following files exists:
```
annotations/captions_train2014.json
annotations/captions_val2014.json
annotations/image_info_test2015.json
```
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
__author__ = "Raingo Lee (raingomm@gmail.com)"
import sys
import os.path as osp
import simplejson
def dump_json(split):
save_path = split + '.tsv'
json_path = osp.join('./annotations/', 'captions_%s.json' % split)
image_only = False
if not osp.exists(json_path):
json_path = osp.join('./annotations/', 'image_info_%s.json' % split)
image_only = True
with open(json_path) as reader:
info = simplejson.load(reader)
images = info['images']
with open(save_path, 'w') as writer:
# image-id image-path caption
if image_only:
for image in images:
print(image['id'],
osp.join(split, image['file_name']),
".", sep='\t', file=writer)
else:
id2path = {item['id']:osp.join(split, item['file_name'])
for item in images}
for ann in info['annotations']:
for caption in ann['caption'].split('\n'):
caption = caption.strip()
if len(caption) > 0:
print(ann['image_id'],
id2path[ann['image_id']],
caption,
sep='\t', file=writer)
def main():
dump_json('train2014')
dump_json('val2014')
dump_json('test2015')
pass
if __name__ == "__main__":
main()
# vim: tabstop=4 expandtab shiftwidth=2 softtabstop=2