VCode-QMAT/correlation.py at master · qmat/VCode-QMAT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python

#Usage:correlation.py <primary file> <secondary file> [-t <tolerance in sec +/->] [-l] [-i <Track1, Track2....>]
# -t computes with given tolerance, defaults to 1.0 sec
# -l lists possible track items
# -i items to include in total, defaults to all items


from optparse import OptionParser
from sets import Set
from sys import exit
import codecs
import copy

class Event:
	def __init__(self, track, startTime, duration, comment):
		self.track = track
		self.startTime = startTime
		self.duration = duration
		self.comment = comment


class TrackResult:
    def __init__(self, trackName, isRange, eventMatched, primaryUnmatched, secondaryUnmatched, durationsMatched=-1):
        self.trackName = trackName
        self.isRange = isRange
        self.eventMatched = eventMatched
        self.primaryUnmatched = primaryUnmatched
        self.secondaryUnmatched = secondaryUnmatched
        self.commentsMatched = 0
        self.commentsUnmatched = 0
        self.durationsMatched = durationsMatched


    def eventPercent(self):
        if(self.eventMatched + self.primaryUnmatched > 0):
            return ((float(self.eventMatched)/(self.eventMatched + self.primaryUnmatched)) * 100.0)
        else:
            return -1
        return answer

    def commentPercent(self):
        if(self.commentsMatched + self.commentsUnmatched > 0):
            answer = ((float(self.commentsMatched)/(self.commentsMatched + self.commentsUnmatched)) * 100.0)
        else:
            answer = -1
        return answer


    def durationPercent(self):
        if not self.isRange:
            return -1
        if(self.eventMatched > 0):
            return (float(self.durationsMatched)/self.eventMatched) * 100.0
        else:
            return -1


def parseFile(filename):
    codeOneDict = {}
    try:
        fileone=codecs.open(filename, 'r', "utf-16")
        lines = fileone.readlines()
    except UnicodeError:
        fileone=open(filename, 'r')
        lines = fileone.readlines()
    try:
        tracks = lines[1].split(u':')[1].split(u',')
        for track in tracks:
            track = track.rstrip(u'\n').strip(u' ')
            codeOneDict[track] = []
            [codeOneDict[track].append(Event(track,int(line.split(u',')[0]),int(line.split(u',')[1]),line.split(u',')[3].rstrip("\n"))) for line in lines[4:] if line.split(u',')[2] == track]
    except:
        print "Parsing of file %s failed"%filename
        exit(1)
    return codeOneDict

#if MAIN:
usage = "usage: %prog [options] <primary file> <secondary file>"
parser = OptionParser()
parser.add_option("-t", "--tolerance",type="float", dest="tolerance",
                  default=1.0, help="Tolerance for which two events will be considered matching. Defaults to 1.0s. so x +/- 0.5s")
parser.add_option("-r", "--range",type="float", dest="tolerancerange",
                  default=1.0, help="Tolerance for which two durations will be considered matching. Defaults to 1.0s. so x +/- 0.5s")
parser.add_option("-l", "--list",
                  action="store_true", dest="list", default=False,
                  help="Print list of valid track items")
(options, args) = parser.parse_args()


if(len(args)!=2):
    print "Error; incorrect file names"
    exit(1)

codeOneDict = parseFile(args[0])
codeTwoDict = parseFile(args[1])


#calculate intersection of .keys of both.
validKeys = list(Set(codeOneDict.keys()).intersection(codeTwoDict.keys()))

if(options.list):
    print validKeys
    exit(0)


#results = generateResults(codeOneDict, codeTwoDict, validKeys);

trackIsRanged = {}
for key in validKeys:
    trackIsRanged[key] = False
    for event in codeOneDict[key]:
        if(event.duration > 0):
            trackIsRanged[key] = True
            break


#look at arguments (excluding arg to -t option). Make sure arg list (list of tracks) is in intersectionofkeys

results = []
#in format track:(number matched, number unmatched in f1, number umatched in f2)
for key in validKeys:

    if(trackIsRanged[key]):
        newResult = TrackResult(key,True,0,len(codeOneDict[key]),len(codeTwoDict[key]),0)
    else:
        newResult = TrackResult(key,False,0,len(codeOneDict[key]),len(codeTwoDict[key]),0)

    for event in codeOneDict[key]:
        #print ("Testing Event:", event)
        for event2 in codeTwoDict[key]:
            #print ("against Event:", event2)

            if((event2.startTime > (event.startTime - 0.5 * options.tolerance * 1000)) and
                (event2.startTime < (event.startTime + 0.5 * options.tolerance * 1000))):
                newResult.eventMatched = newResult.eventMatched + 1
                newResult.primaryUnmatched = newResult.primaryUnmatched - 1
                newResult.secondaryUnmatched = newResult.secondaryUnmatched - 1
                codeTwoDict[key].remove(event2)
                if(trackIsRanged[key]):
                    if((event2.duration > (event.duration - 0.5 * options.tolerancerange * 1000)) and
                    (event2.duration < (event.duration + 0.5 * options.tolerancerange * 1000))):
                        newResult.durationsMatched = newResult.durationsMatched + 1
                if(not event.comment == u"(null)"):
                    if(event.comment == event2.comment):
                        newResult.commentsMatched = newResult.commentsMatched + 1
                    else:
                        newResult.commentsUnmatched = newResult.commentsUnmatched + 1

                break

    results.append(newResult)


print("Track: (# matched, # unmatched on file 1, # unmatched on file 2), %correlation on this track \n(#comments matched, #comments unmatched) %correlation of comments \n(#durations matched, #durations unmatched) %correlation of durations")
print("Tolerance for events: %f, Tolerance for Durations %f\n"%(options.tolerance,options.tolerancerange))
for result in results:
    if(result.isRange):
        print("%s:\t (%d, %d, %d) %f%% \t(%d, %d) %f%% \t(%d, %d) %f%%"%(result.trackName,result.eventMatched, result.primaryUnmatched, result.secondaryUnmatched, result.eventPercent(), result.commentsMatched, result.commentsUnmatched, result.commentPercent(), result.durationsMatched, result.eventMatched-result.durationsMatched, result.durationPercent()))
    else:
        print("%s:\t (%d, %d, %d) %f%% \t(%d, %d) %f%%"%(result.trackName,result.eventMatched, result.primaryUnmatched, result.secondaryUnmatched, result.eventPercent(), result.commentsMatched, result.commentsUnmatched, result.commentPercent()))

print("====Total====")
#total = (sum([x[0] for x in results.values()]),
#    sum([x[1] for x in results.values()]),
#    sum([x[2] for x in results.values()]))
#print("Total:" + str(total) +" "
#    + str(total[0]/(total[0] + total[1])*100.0)+"%")

#report all individual %matched, as well as total, according to -i