-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathgen-csv.py
More file actions
145 lines (114 loc) · 6.19 KB
/
gen-csv.py
File metadata and controls
145 lines (114 loc) · 6.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import json, csv
import os
def generate_album_csv_files():
album_artist = {} # dictionary that matches album mbid to artist mbid, respectively
with open('albums.csv', 'w') as csvfile:
fieldnames = ['mbid', 'name', 'artist', ]
custom_fieldnames = ['image', 'lastfm_url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)
writer.writeheader()
for folderName, subfolders, filenames in os.walk('lastfm_album_info'):
for filename in filenames:
# OPEN THE ALBUM INFO FILE
with open('%s/%s' % (folderName, filename)) as file:
obj = json.load(file)
if 'album' in obj:
inserted = { field: obj['album'][field] for field in fieldnames }
inserted['image'] = obj['album']['image'][2]['#text'] #select one image
inserted['lastfm_url'] = obj['album']['url']
# WRITE TO ALBUMS.CSV FILE
writer.writerow(inserted)
# i'm assuming that artist name field and album artist fields are consistent
if obj['album']['tracks']['track'][0]['artist']['name'] == obj['album']['artist']:
artist_mbid = obj['album']['tracks']['track'][0]['artist']['mbid']
# dictionary that matches album mbid to artist mbid
album_artist[obj['album']['mbid']] = artist_mbid
else:
raise Exception('artist name field and album artist fields are NOT consistent')
else:
raise Exception('No album field')
with open('album-artist.csv', 'w') as csvfile:
fieldnames = ['album_mbid', 'artist_mbid',]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for key, val in album_artist.items():
writer.writerow({'album_mbid': key, 'artist_mbid': val})
def generate_track_csv_files():
album_artist = {} # dictionary that matches album mbid to artist mbid, respectively
album_song = {} # dictionary that matches album mbid to array of song ids
albums_meta = {} # keeps metadata of albums, fetching from track info
with open('tracks.csv', 'w') as csvfile:
fieldnames = ['mbid', 'name', 'duration', 'url', ]
custom_fieldnames = []
writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)
writer.writeheader()
for folderName, subfolders, filenames in os.walk('track_info'):
for filename in filenames:
# OPEN THE ALBUM INFO FILE
with open('%s/%s' % (folderName, filename)) as file:
obj = json.load(file)
if 'track' in obj:
inserted = { field: obj['track'][field] for field in fieldnames }
# WRITE TO ALBUMS.CSV FILE
writer.writerow(inserted)
# FOR ALBUM-TRACK RELATIONS
album_mbid = obj['track']['album']['mbid']
if album_mbid in album_song and type(album_song[album_mbid]) == list :
album_song[album_mbid].append(obj['track']['mbid'])
else:
album_song[album_mbid] = [obj['track']['mbid']]
# FOR KEEPING ALBUM METADATA
if album_mbid not in albums_meta:
albums_meta[album_mbid] = {
'mbid': album_mbid,
'name': obj['track']['album']['title'],
'image': obj['track']['album']['image'][2]['#text'],
'lastfm_url': obj['track']['album']['url'],
}
# FOR KEEPING ALBUM ARTIST RELATIONS
# i'm assuming that artist name field and album artist fields are consistent
if obj['track']['artist']['name'] == obj['track']['album']['artist']:
artist_mbid = obj['track']['artist']['mbid']
# dictionary that matches album mbid to artist mbid
album_artist[obj['track']['album']['mbid']] = artist_mbid
else:
raise Exception('artist name field and album artist fields are NOT consistent')
else:
raise Exception('No track field')
with open('album-artist.csv', 'w') as csvfile:
fieldnames = ['album_mbid', 'artist_mbid',]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for key, val in album_artist.items():
writer.writerow({'album_mbid': key, 'artist_mbid': val})
with open('album-track.csv', 'w') as csvfile:
fieldnames = ['album_mbid', 'track_mbid',]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for album, tracks in album_song.items():
for track in tracks:
writer.writerow({'album_mbid': album, 'track_mbid': track})
with open('albums.csv', 'w') as csvfile:
fieldnames = ['mbid', 'name', ]
custom_fieldnames = ['image', 'lastfm_url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)
writer.writeheader()
for key, val in albums_meta.items():
writer.writerow({'mbid': key,
'name': val['name'],
'image': val['image'],
'lastfm_url': val['lastfm_url']})
def main():
# generate_album_csv_files()
generate_track_csv_files()
# with open('sample-album.json') as file:
# obj = json.load(file)
# print(flattenjson(obj, '__'))
#
# with open( 'sample-album.csv', 'wb' ) as out_file:
# csv_w = csv.writer( out_file )
# csv_w.writerow( columns )
#
# for i_r in input:
# csv_w.writerow( map( lambda x: i_r.get( x, "" ), columns ) )
main()