-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
64 lines (52 loc) · 2.67 KB
/
app.py
File metadata and controls
64 lines (52 loc) · 2.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import pandas as pd
from unidecode import unidecode
NAME_PRE_EXERCISES = ['FirstSet', 'SecondSet', 'ThirdSet', 'FourthSet', 'FifthSet', 'SixthSet']
USER_COLUMN_REF = "Please enter your name (FirstName LastName)"
FOLDER_DATA = 'documents/'
def compare_names(name1, name2):
"""
To match name:
- Normalization using unidecode --> drop accents...
- Lowercase comparison
- Check if at lease one of the name match with the users dataset
"""
same_name = False
a_list = unidecode(name1.lower()).split(" ")
b_list = unidecode(name2.lower()).split(" ")
list_matching_names = set(a_list).intersection(b_list)
if list_matching_names:
same_name = True
return same_name
def run():
########## READ THE CSV WITH THE USERS INFORMATION ##########
user_data = pd.read_csv('users.csv')
not_found = []
########## ITERATE OVER EACH SET OF EXERCISES ##########
for set_name in NAME_PRE_EXERCISES:
user_data[set_name] = ""
for file in os.listdir(FOLDER_DATA + set_name):
########## ONLY STUDY THE CSV FILE WITH THE RESULTS ##########
if ".csv" in file:
df_result = pd.read_csv(FOLDER_DATA + set_name + "/" + file)
########## ITERATE OVER EACH USER FOUND ON THE PRE-EXERCISE ##########
for index, row in df_result.iterrows():
########## MATCH THE USERNAME AND EMAIL WITH THE DATASET ##########
locate_user_email = user_data["Email"].apply(lambda x: compare_names(x, row["Username"]))
locate_user_username = user_data["Name"].apply(lambda x: compare_names(x, row[USER_COLUMN_REF]))
########## IF THE USER IS NOT FOUND APPEND ON A LIST TO DEBUG IT LATER ##########
if user_data.loc[locate_user_email].empty and user_data.loc[locate_user_username].empty:
user = [row["Username"], row[USER_COLUMN_REF]]
user_repeated = [element for element in not_found if element[1] == row[USER_COLUMN_REF]]
if not user_repeated:
not_found.append(user)
########## IF USER FOUND SET TO 1 THE STATUS OF THIS PRE-EXERCISE ##########
else:
user_data.loc[locate_user_email | locate_user_username, set_name] = 1
break
########## WRITE A CSV WITH THE RESULTS ##########
user_data.to_csv('user_data.csv')
########## PRINT THE USERS THAT DID SOME PRE-EXERCISE BUT ARE NOT ON THE DATASET ##########
print('{} unmatched people: \n {}'.format(len(not_found), not_found))
if __name__ == "__main__":
run()