randomfurry-api/scraper.py at main · HundSimon/randomfurry-api · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from pixivpy3 import *
import json
from requests import *
from flask import jsonify
from utils.format import format_json

# Read secret
with open("key.json", "r") as key:
    key_loads = json.loads(key.read())
    key_refresh_token = key_loads["refresh_token"]
    key_illust = key_loads["user_bookmarks_illust"]

# Initialize
api = AppPixivAPI()
api.auth(refresh_token=key_refresh_token)

# Remove duplicated
def remove_duplicates(json_data):
    string_data = [json.dumps(d, sort_keys=True) for d in json_data]
    string_data = list(dict.fromkeys(string_data))
    json_data = [json.loads(s) for s in string_data]
    return json_data

def remove_dicts_without_tags(file_path, tag_names):
    with open(file_path, 'r') as f:
        json_data = json.load(f)
    filtered_data = [d for d in json_data if any(any(tag['name'] == tag_name or tag['translated_name'] == tag_name for tag in d.get('tags', [])) for tag_name in tag_names)]
    with open(file_path, 'w') as f:
        json.dump(filtered_data, f, indent=4)

# Get Metadatas
def get_bookmark_illusts():
	merged_json = {}
	json_result = api.user_bookmarks_illust(key_illust)
	merged_json.update(json_result)
	merged_json = merged_json["illusts"]

	while True:
		next_qs = api.parse_qs(json_result.next_url)
		json_result = api.user_bookmarks_illust(**next_qs)
		string_list = json_result["illusts"]
		merged_json.extend(string_list)
		print(next_qs)
		if api.user_bookmarks_illust(**next_qs).next_url == None :
			return merged_json

def get_followed_illusts():
	merged_json = {}
	json_result = api.illust_follow(req_auth=True)
	merged_json.update(json_result)
	merged_json = merged_json["illusts"]

	for x in range(10):
	    next_qs = api.parse_qs(json_result.next_url)
	    json_result = api.illust_follow(**next_qs)
	    merged_json.extend(json_result.illusts)
	    print(api.illust_follow(**next_qs).next_url)
	return merged_json

def get_recommended_illusts():
	merged_json = {}
	json_result = api.illust_recommended(content_type="illust")
	merged_json.update(json_result)
	merged_json = merged_json["illusts"]
	return merged_json


temp_metadata = []

with open("metadata.json", "r", encoding="utf-8") as old_metadata:
	final_metadata = json.load(old_metadata)
	temp_metadata.extend(get_bookmark_illusts())
	temp_metadata.extend(get_followed_illusts())
	temp_metadata.extend(get_recommended_illusts())
	final_metadata.extend(format_json(loaded_data=temp_metadata))

with open("metadata.json", 'w') as new_metadata:
    json.dump(final_metadata, new_metadata, indent=4)

# Remove irrelevant tagged contents
tag_names = ["furry", "furry shota", "furry male", "beast", "kemono", "獣人", "獸", "兽人", "ケモノ", "竜人", "オスケモ"]
remove_dicts_without_tags("metadata.json", tag_names)