Project_FlankerTask/PersonalSummary.py at main · mejansen/Project_FlankerTask · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# importing the packages necessary
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from plotnine import *
from expyriment import misc


# for more explanation on z-method and what is done in this section, see DataSummary.py
def clean(data):
    """This function cleans the data handed to it via z-method and returns it with correct indexing."""
    while True:
        mean = np.mean(data["RT"])
        sd = np.std(data["RT"])
        rows, cols = data.shape
        data["z_value"] = np.zeros(rows)
        for row in data.itertuples(index = True, name = "Row"):
            index = row[0]
            value = row[6]
            data.at[index, "z_value"] = abs(value - mean)/ sd
        z_max = data["z_value"].idxmax()
        if data.at[z_max, "z_value"] > 3:
            data = data.drop(z_max)
        else:
            break
    rows, cols = data.shape
    data = data.set_index(np.arange(rows))

    return data


def create_datasets(tidy = True):
    # import official data and turn Condition, Response, StimVar, as well as Block into a categorical value (for usage in plots)
    data_general = pd.read_csv("data/data.csv")
    data_general["Block"] = data_general["Block"].astype("category")
    data_general["Condition"] = data_general["Condition"].astype("category")
    data_general["Response"] = data_general["Response"].astype("category")
    data_general["StimVar"] = data_general["StimVar"].astype("category")

    # in case the user wants to be shown his own data without having run the experiment, there will be an error raised
    try:
        misc.data_preprocessing.write_concatenated_data("./data", "menu_01.xpd", output_file = "data_converted.csv")  # converting the log-file to csv-format:
        df = pd.read_csv("data_converted.csv")    # reading the dataframe
        df.to_csv(path_or_buf = "data_converted.csv")   # rewrite the csv-file
        data_specific = pd.read_csv("data_converted.csv")   # re-read the csv-file
        data_specific.columns = data_specific.iloc[0]   # reset the column names
        data_specific = data_specific[1:]   # exclude the first column

        # change the d.types of the variables to make working with it feasible
        data_specific = data_specific.astype({"subject_id": 'int32'})
        data_specific = data_specific.astype({"Response": 'int32'})
        data_specific = data_specific.astype({"StimVar": 'int32'})
        data_specific = data_specific.astype({"Onset": 'int32'})
        data_specific = data_specific.astype({"RT": 'int32'})
        data_specific = data_specific.astype({"Block": 'int32'})

        # make some variables categorical to enable better working
        data_specific["Block"] = data_specific["Block"].astype("category")
        data_specific["Condition"] = data_specific["Condition"].astype("category")
        data_specific["Response"] = data_specific["Response"].astype("category")
        data_specific["StimVar"] = data_specific["StimVar"].astype("category")

        # check whether to call upon cleaning the datasets
        if tidy:
            data_general = clean(data_general)
            data_specific = clean(data_specific)

        return data_general, data_specific

    # if there is an error during the execution of the code, raise a FileNotFound-error
    except:
        raise Exception(u"No data files found. Please run the experiment first.")

    return


def rt_plots(data_general, data_specific):
    """This function constructs two plots in the same manner, but binding a different data set to each version."""
    # first plot: uses the official dataset
    fig_general = (ggplot(data_general, aes(x = "RT"))
        + geom_histogram(binwidth = 50, color = "#FFA500", fill = "#FFA500", alpha = 0.4)
        + labs(
            x = "Reaction Time",
            y = "Number of Occurrences",
            title = "Distribution of Reaction Times"
        )
    ).draw()
    # second plot: uses the participant's data
    fig_specific = (ggplot(data_specific, aes(x = "RT"))
        + geom_histogram(binwidth = 10, color = "#FFA500", fill = "#FFA500", alpha = 0.4)
        + labs(
            x = "Reaction Time",
            y = "Number of Occurrences",
            title = "Distribution of Reaction Times"
        )
    ).draw()
    return fig_general, fig_specific

def violin(data_general, data_specific):
    """This function created a violin plot with the general data as it is also done in DataSummary.py and scatters the individual data points on top."""
    # apart from one line, the code is exactly the same as in DataSummary.ipynb
    fig = (ggplot(data_general, aes(x='Block', y='RT'))
     + geom_violin(fill = "#FFA500", color = "#FFA500", alpha = 0.4)
     + stat_summary(color = "#4F94CD")
     + geom_jitter(data = data_specific, color = "red") # jitters the participant's data points over the plot
     + labs(
         x = "Trial Block",
         y = "Reaction Time in ms",
         title = "Learning effect between blocks - mean Reaction Times shrink with practice"
     )
    ).draw()

    return fig

def merge_datasets():
    """In case the user wants to contribute their data, this function will be called upon by the corresponding button."""
    data_general, data_specific = create_datasets(tidy = False)

    # criterions for data_general
    rows_g, cols_g = data_general.shape
    rowGeneral = (rows_g >= 1248)
    colGeneral = (data_general.columns == ["SubjectID", "Response", "StimVar", "Condition", "Onset", "RT", "Block"]).all()

    data_specific = data_specific.rename(columns = {"subject_id" : "SubjectID"})
    # criterions for data_specific:
    rows_s, cols_s = data_specific.shape
    rowSpecific = (rows_s == 48)
    colSpecific = (data_specific.columns == ["SubjectID", "Response", "StimVar", "Condition", "Onset", "RT", "Block"]).all()

    #prepare second dataset:
    last_subject = data_general.at[rows_g - 1, "SubjectID"]
    data_specific["SubjectID"] = last_subject + 1

    # check, whether all criterions are met before merging
    if rowGeneral & colGeneral:
        if rowSpecific & colSpecific:
            # append the data from second df:
            data_combined = data_general.append(data_specific)
            data_combined = data_combined.set_index(np.arange(rows_g + rows_s))
            data_combined.to_csv(path_or_buf = "data/data.csv")
        else:
            print("There must have been a mistake with your experiment data. Try again")   # error message, if there is sth wrong with the participant's file
    else:
        print("There is a problem with the general data file. Try again")   # error message, if there is sth wrong with the data collection

    print("Thank you for contributing your data!")

    return