LMRC-Python-Model/Markov.py at master · DmytroCrawford/LMRC-Python-Model · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from collections import defaultdict
import string
import csv
import random
import numpy as np
import pandas as pd
import math
import statsmodels.api as sm

def findWinner(row):
    if row["Home Points"] > row["Away Points"]:
        return row["Home Team"]
    else:
        return row["Away Team"]

def findLoser(row):
    if row["Home Points"] < row["Away Points"]:
        return row["Home Team"]
    else:
        return row["Away Team"]

def calcRowMargin(row):
    return abs(row["Home Points"] - row["Away Points"])

#This is our logitical regression which employs the help of the slope and intercept of our anticipated logistic regression to refine our results.
def log_r(margin, a, b):
    return 1 / (1 + np.exp(-a * margin - b))

def transitionMatrix(fileName,a,b,h):
    df = pd.read_csv(fileName)
    df = df.dropna(subset=['Home Points', 'Away Points'])
    allTeamNames = []
    for index, row in df.iterrows():
        winner = findWinner(row)
        loser = findLoser(row)
        if winner not in allTeamNames:
            allTeamNames.append(winner)
        if loser not in allTeamNames:
            allTeamNames.append(loser)
        print(winner, loser)

    Records = pd.DataFrame(0, index=allTeamNames, columns=["Wins", "Losses", "Distribution"])

    for index, row in df.iterrows():
        winner = findWinner(row)
        loser = findLoser(row)
        Records.at[winner, 'Wins'] += 1
        Records.at[loser, 'Losses'] += 1

    #This is our Markov Matrix
    M = pd.DataFrame(0, index=allTeamNames, columns=allTeamNames)

    # Update Markov matrix M, Populate the Markov transition matrix with probabilities based on game outcomes and margins (log_r)

    for index, row in df.iterrows():
        if row["Week"] < 16:
            nameW = findWinner(row)
            nameL = findLoser(row)
            HomeT = row["Home Team"]
            AwayT = row["Away Team"]
            marg = calcRowMargin(row)

            M.at[HomeT, HomeT] += 1
            M.at[AwayT, AwayT] += 1
            if row["Neutral Site"] == ["false"]:
                M.at[HomeT, AwayT] += 1 - log_r(marg, a, b)
                M.at[AwayT, HomeT] += log_r(marg, a, b)
            else:
                M.at[HomeT, AwayT] += 1 - log_r(marg + h, a, b)
                M.at[AwayT, HomeT] += log_r(marg + h, a, b)

    M.to_csv(f"Initial-Matrix.csv", index=False)

    #Normalize Transition Matrix such that each row sums to 1, which inturn calculates our probabilities.

    for team in M.columns:
        N = M.loc[team, team]
        M.loc[team] = M.loc[team] / N
        M.at[team, team] = 1 - M.loc[team].sum() + M.at[team, team]

    #print(M)

    M = M.fillna(0.0)

    M.to_csv(f"Normalized-Matrix.csv", index=False)

    #P becomes our stationary distribution as we multiply our transition matrix by itself and our initital markov transition matrix.
    P = M.copy()
    for i in range(400):
        P = P.dot(M)

    P.to_csv(f"Stationairy-Distribution.csv", index=False)

    #print(P)
    #print("_______________________________________________")
    #print(Records)

    # Update Records with stationary distribution
    for name in P.index:
        Records.at[name, 'Distribution'] = P.loc[name, name]

    Records.sort_values(by='Distribution', ascending=False, inplace=True)
    Records.reset_index(inplace=True)
    Records.rename(columns={'index': 'Team'}, inplace=True)

    # Write to CSV file
    Records.to_csv(f"RecordListSorted.csv", index=False)

    #return P

    return Records

def main():
    pd.set_option('display.max_columns', None)
    pd.options.display.max_rows
    #gen_rM()
    a = 0.0228092
    b = -0.08489695
    h = 3.710938
    print(transitionMatrix('./Data/TrainingData.csv',a,b,h))


if __name__ == "__main__":
    main()


#a = 0.0228092
#b = -0.08489695
#h = 3.710938