github-classroom-utils/github_graders.py at master · mfbutner/github-classroom-utils · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# github_graders.py
# Dan Wallach <dwallach@rice.edu>
# Available subject to the Apache 2.0 License
# https://www.apache.org/licenses/LICENSE-2.0

import re
import argparse
import random
import pandas as pd
from github_config import *
from github_scanner import *

from typing import List, TypeVar

# your graders, preferably their GitHub IDs (we'll ignore them if they've also checked out a copy of the assignment)
grader_list = default_grader_list

# your own GitHub ID and/or anybody else who you wish to exclude from being graded
ignore_list = default_grader_ignore_list

# command-line argument processing

parser = argparse.ArgumentParser(description='Random assignment of graders to students')
parser.add_argument('--token',
                    nargs=1,
                    default=[default_github_token],
                    help='GitHub API token')
parser.add_argument('--org',
                    nargs=1,
                    default=[default_github_organization],
                    help='GitHub organization to scan, default: ' + default_github_organization)
parser.add_argument('--prefix',
                    nargs=1,
                    default=[default_prefix],
                    help='Prefix on projects to match (default: match all projects)')
parser.add_argument('--students',
                    nargs=1,
                    default=[default_student_csv_name],
                    help="CSV file name with student information (default: student-info.csv)")

args = parser.parse_args()

github_prefix = args.prefix[0]
github_organization = args.org[0]
github_token = args.token[0]
student_file_name = args.students[0]

# Python3's parametric type hints are ... a thing.
T = TypeVar('T')


def group_list_by_n(l: List[T], n: int) -> List[List[T]]:
    """
    Given a list of whatever type, divides it into a list of lists, each of which is n elements long,
    until the last one, having whatever is left.
    """
    if len(l) == 0:
        return []
    elif len(l) <= n:
        return [l]
    else:
        return [l[0:n]] + group_list_by_n(l[n:], n)


def student_name_from(repo_name: str) -> str:
    """
    Given a GitHub repo "name" (e.g., "comp215-week01-intro-danwallach") return the username suffix at the
    end ("danwallach"). If it's not there, the result is an empty string ("").
    """
    m = re.search(github_prefix + "-(.*)$", repo_name)
    if not m:
        return ""  # something funny in the name, so therefore not matching
    else:
        # there might be a trailing dash and digits if the student did the clone thing multiple times
        # also, we're converting everything to lower-case
        return re.sub("-\\d+$", "", m.group(1)).lower()


def desired_user(name: str) -> bool:
    """
    Given a GitHub repo "name" (e.g., "comp215-week01-intro-2017-danwallach"), returns true or false if that
    project is something we're trying to grade now, based on the specified prefix as well as the list of graders
    (to be ignored) and the ignore-list (also to be ignored). Since we might be dealing with student groups,
    which can give themselves their own group names, this function defaults to True, unless it finds a reason
    to say False.
    """
    m = student_name_from(name)
    return m != "" and name.startswith(github_prefix) and name != github_prefix and \
        m not in grader_list and m not in ignore_list


df_students = {}  # will replace below
df_students_success = False
try:
    df_students = pd.read_csv(student_file_name)
    df_students.GitHubID = df_students.GitHubID.astype(str).str.lower()  # force lower-case of GitHub IDs
    df_students_success = True
except FileNotFoundError:
    #    sys.stdout.write("Cannot file student info file: %s\n" % student_file_name)
    #    sys.stdout.flush()
    pass


def student_info(github_id: str) -> str:
    """
    Given a GitHub ID, returns a suitably human-readable string based on
    the student-data CSV file with the student's name, email, etc.
    """
    if df_students_success:
        matches = df_students[df_students['GitHubID'] == github_id]
        if len(matches) == 1:
            student = matches.iloc[0].to_dict()
        elif len(matches) == 0:
            sys.stdout.write("Warning: github-id (%s) not found in student info!\n" % github_id)
            sys.stdout.flush()
            student = {'NetID': '', 'Name': '', 'Email': '', 'SID': '', 'GitHubID': github_id}
        else:
            sys.stdout.write("Warning: two or more rows found for github-id (%s) in student info!\n" % github_id)
            sys.stdout.flush()
            student = matches.iloc[0].to_dict()
        if 'NetID' in student and student['Email'].startswith(student['NetID']):
            return "%s <%s>" % (student['Name'], student['Email'])
        else:
            return "%s <%s> (%s)" % (student['Name'], student['Email'], student['NetID'])
    else:
        return github_id


# First things first, if we have no graders, we can't divide up the work.
if not grader_list:
    print("Error: grader_list is empty, cannot assign grades")
    exit(1)

ids_seen = {}
submissions = {}

filtered_repo_list = [x for x in query_matching_repos(github_organization, github_prefix, github_token)
                      if desired_user(x['name'])]

# Let's do a duplicate check, and also sort out the URL we want to use
print("%d repos in the initial search\n" % len(filtered_repo_list))
for repo in filtered_repo_list:
    if 'html_url' in repo:
        repo['final_url'] = repo['html_url']
    else:
        repo['final_url'] = repo['url']

    gid = student_name_from(repo['name'])
    if gid in ids_seen:
        # check if we have an exact duplicate or not ... this shouldn't happen, but ... does.
        submission_urls = [x['final_url'] for x in submissions[gid]]
        if repo['final_url'] in submission_urls:
            sys.stdout.write('Warning: exact url for GitHub ID <%s> seen more than once!\n' % gid)
            sys.stdout.flush()
        else:
            sys.stdout.write('Warning: GitHub ID <%s> with different URLs seen!\n' % gid)
            sys.stdout.flush()
            ids_seen[gid] = ids_seen[gid] + 1
            submissions[gid].append(repo)
    else:
        ids_seen[gid] = 1
        submissions[gid] = [repo]


# note: we're shuffling the graders, so different graders get lucky each week when the load isn't evenly divisible
# and, of course, we're shuffling the repos.
all_gids = list(submissions.keys())
print("%d unique GitHub IDs found" % len(all_gids))
random.seed()
random.shuffle(all_gids)
random.shuffle(grader_list)

# inefficient, but correct
grading_groups = [[entry[i] for entry
                   in group_list_by_n(all_gids, len(grader_list))
                   if i < len(entry)]
                  for i in range(len(grader_list))]

grader_map = dict(zip(grader_list, grading_groups))

print("# Grade assignments for %s" % github_prefix)
print("%d repos are ready to grade\n" % len(all_gids))
for grader in sorted(grader_map.keys(), key=str.lower):
    print("## %s (%d total)" % (grader, len(grader_map[grader])))
    for gid in sorted(grader_map[grader]):
        repos = submissions[gid]
        if len(repos) == 1:
            if df_students_success:
                print("- [%s](%s) - %s" % (gid, repos[0]['final_url'], student_info(gid)))
            else:
                print("- [%s](%s)" % (gid, repos[0]['final_url']))
        else:
            print("- **Multiple repos for %s** - %s" % (gid, student_info(gid)))
            for repo in repos:
                print("  - [%s](%s)" % (repo['name'], repo['final_url']))