BulbulProject/gen-csv.py at master · aeakdogan/BulbulProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import json, csv
import os

def generate_album_csv_files():
    album_artist = {} # dictionary that matches album mbid to artist mbid, respectively

    with open('albums.csv', 'w') as csvfile:
        fieldnames = ['mbid', 'name', 'artist', ]
        custom_fieldnames = ['image', 'lastfm_url']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)

        writer.writeheader()

        for folderName, subfolders, filenames in os.walk('lastfm_album_info'):
            for filename in filenames:
                # OPEN THE ALBUM INFO FILE
                with open('%s/%s' % (folderName, filename)) as file:
                    obj = json.load(file)

                    if 'album' in obj:
                        inserted = { field: obj['album'][field] for field in fieldnames }
                        inserted['image'] = obj['album']['image'][2]['#text'] #select one image
                        inserted['lastfm_url'] = obj['album']['url']

                        # WRITE TO ALBUMS.CSV FILE
                        writer.writerow(inserted)

                        # i'm assuming that artist name field and album artist fields are consistent
                        if obj['album']['tracks']['track'][0]['artist']['name'] == obj['album']['artist']:
                            artist_mbid = obj['album']['tracks']['track'][0]['artist']['mbid']
                            # dictionary that matches album mbid to artist mbid
                            album_artist[obj['album']['mbid']] = artist_mbid
                        else:
                            raise Exception('artist name field and album artist fields are NOT consistent')
                    else:
                        raise Exception('No album field')

    with open('album-artist.csv', 'w') as csvfile:
        fieldnames = ['album_mbid', 'artist_mbid',]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for key, val in album_artist.items():
            writer.writerow({'album_mbid': key, 'artist_mbid': val})


def generate_track_csv_files():
    album_artist = {} # dictionary that matches album mbid to artist mbid, respectively
    album_song = {} # dictionary that matches album mbid to array of song ids
    albums_meta = {} # keeps metadata of albums, fetching from track info

    with open('tracks.csv', 'w') as csvfile:
        fieldnames = ['mbid', 'name', 'duration', 'url', ]
        custom_fieldnames = []
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)

        writer.writeheader()

        for folderName, subfolders, filenames in os.walk('track_info'):
            for filename in filenames:
                # OPEN THE ALBUM INFO FILE
                with open('%s/%s' % (folderName, filename)) as file:
                    obj = json.load(file)

                    if 'track' in obj:
                        inserted = { field: obj['track'][field] for field in fieldnames }

                        # WRITE TO ALBUMS.CSV FILE
                        writer.writerow(inserted)

                        # FOR ALBUM-TRACK RELATIONS
                        album_mbid = obj['track']['album']['mbid']
                        if album_mbid in album_song and type(album_song[album_mbid]) == list :
                            album_song[album_mbid].append(obj['track']['mbid'])
                        else:
                            album_song[album_mbid] = [obj['track']['mbid']]

                        # FOR KEEPING ALBUM METADATA
                        if album_mbid not in albums_meta:
                            albums_meta[album_mbid] = {
                                'mbid': album_mbid,
                                'name': obj['track']['album']['title'],
                                'image': obj['track']['album']['image'][2]['#text'],
                                'lastfm_url': obj['track']['album']['url'],
                            }

                        # FOR KEEPING ALBUM ARTIST RELATIONS
                        # i'm assuming that artist name field and album artist fields are consistent
                        if obj['track']['artist']['name'] == obj['track']['album']['artist']:
                            artist_mbid = obj['track']['artist']['mbid']
                            # dictionary that matches album mbid to artist mbid
                            album_artist[obj['track']['album']['mbid']] = artist_mbid
                        else:
                            raise Exception('artist name field and album artist fields are NOT consistent')
                    else:
                        raise Exception('No track field')

    with open('album-artist.csv', 'w') as csvfile:
        fieldnames = ['album_mbid', 'artist_mbid',]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for key, val in album_artist.items():
            writer.writerow({'album_mbid': key, 'artist_mbid': val})

    with open('album-track.csv', 'w') as csvfile:
        fieldnames = ['album_mbid', 'track_mbid',]
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for album, tracks in album_song.items():
            for track in tracks:
                writer.writerow({'album_mbid': album, 'track_mbid': track})

    with open('albums.csv', 'w') as csvfile:
        fieldnames = ['mbid', 'name', ]
        custom_fieldnames = ['image', 'lastfm_url']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames+custom_fieldnames)

        writer.writeheader()

        for key, val in albums_meta.items():
            writer.writerow({'mbid': key,
                            'name': val['name'],
                            'image': val['image'],
                            'lastfm_url': val['lastfm_url']})


def main():
    # generate_album_csv_files()
    generate_track_csv_files()

    # with open('sample-album.json') as file:
    #     obj = json.load(file)
    #     print(flattenjson(obj, '__'))
    #
    #     with open( 'sample-album.csv', 'wb' ) as out_file:
    #         csv_w = csv.writer( out_file )
    #         csv_w.writerow( columns )
    #
    #         for i_r in input:
    #             csv_w.writerow( map( lambda x: i_r.get( x, "" ), columns ) )

main()