-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_search.py
More file actions
102 lines (81 loc) · 3.71 KB
/
github_search.py
File metadata and controls
102 lines (81 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import github
import logging
import multiprocessing
import time
import my_repo
import operator
from typing import Dict
# exception caught in search() function
from requests.exceptions import ConnectionError
# activate debug logging
logging.basicConfig(format='%(asctime)s - %(name)s - %(message)s', level=logging.INFO)
def get_statistic():
"""Gets statistic from search() and processes output
results updates if it's passed more than minute since previous call"""
output = ''
sorted_languages = sorted(languages.items(), key=operator.itemgetter(1), reverse=True)
longest_word_length = 0
for language in languages:
if len(str(language)) > longest_word_length:
longest_word_length = len(str(language))
longest_number_length = len(str(sorted_languages[0][1]))
total_languages_count = sum(languages.values())
for language in sorted_languages:
language_name, language_count = language
percent = language_count * 100 // total_languages_count
if percent == 0:
percent = '< 1'
first_space = ' ' * (longest_word_length - len(language_name) + 4)
second_space = ' ' * (longest_number_length - len(str(language_count)) + 2)
output += f'`{language_name}{first_space}{language_count}{second_space}{percent}%`\n'
logging.info(output)
return output
def get_random_repo():
return last_repo[0].html_url
def generate_query(last_repository):
return f'stars:<{last_repository.stargazers_count}'
def search():
"""searches languages"""
global repositories
while True:
# number of last visited repo
step = 0
try:
while True:
repo = repositories[step]
step += 1
if repo.language is not None:
if repo.language not in languages:
languages[repo.language] = 0
languages[repo.language] += 1
last_repo[0] = repo
logging.info(f'stars: {repo.stargazers_count} {repo.language} {repo.name}')
except github.GithubException:
# github searching capacity is limited with 300 rep/min
# if excess occurs, github.GithubException is thrown
logging.info('GithubException occurred')
time.sleep(60)
except IndexError:
# len(repositories) is about 1000
# if step > 1000 (i.e. if IndexError occurred) then change query
logging.info('IndexError occurred - change query')
repositories = github.Github(login_or_token=my_repo.user, password=my_repo.password).search_repositories(
query=generate_query(repositories[step - 1]))
# there are some problems occur when joining multiprocessing and github api (ConnectionError)
# I suppose it is connected with github api search limits
# Anyway, it does not hinder program work, so just log this exception
except ConnectionError as error:
logging.info(f'ConnectionError occurred: {str(error)}')
def start_search():
# create multiprocess
continuous_search = multiprocessing.Process(target=search)
# start multiprocess
continuous_search.start()
continuous_search.join()
# these variables are global, for they change during search() work and are available from any function
repositories = github.Github().search_repositories(query='stars:>1')
# dict of language -> count_language_pull_in_number
languages: Dict[str, int] = multiprocessing.Manager().dict()
# list of one element (if we use Manager().Value instead,
# we have to specify c_type field, but github.Repository in not instance of ctypes)
last_repo = multiprocessing.Manager().list(sequence=[repositories[0]])