forked from liming-thu/TestDB
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathKValuesGenerator.py
More file actions
executable file
·104 lines (87 loc) · 3.78 KB
/
KValuesGenerator.py
File metadata and controls
executable file
·104 lines (87 loc) · 3.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import csv
import time
import KeywordsUtil
import Conf
import DatabaseFactory
import QualityUtil
# Common config
dbtype = Conf.DBTYPE
database = Conf.DATABASE
tableName = Conf.TABLE
# Config for this experiment
csvFileName_k_values = 'keyword_k_values.csv'
csvFileName_k_ratios = 'keyword_k_ratios.csv'
# Resolution Scale
x_scale = 1
y_scale = 1
# Keywords frequency range
min_freq = 100000
max_freq = 500000
# target qualities
qualities = [85]
def generateKValues(p_min_freq, p_max_freq):
db = DatabaseFactory.getDatabase(dbtype, database)
l_min_freq = int(p_min_freq)
l_max_freq = int(p_max_freq)
keywords = KeywordsUtil.pickInFrequencyRange(db, l_min_freq, l_max_freq)
t = time.time()
with open(csvFileName_k_values, 'w') as csvFile_k_values:
with open(csvFileName_k_ratios, 'w') as csvFile_k_ratios:
csvWriter_k_values = csv.writer(csvFile_k_values, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csvWriter_k_ratios = csv.writer(csvFile_k_ratios, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
progress = 0
skipped = 0
for keyword in keywords:
progress += 1
k_values = []
k_ratios = []
k_values.append(keyword)
k_ratios.append(keyword)
# 1. Get total coordinates of this keyword
t0 = time.time()
totalCoordinates = db.GetCoordinate(tableName, keyword, -1)
if len(totalCoordinates) == 0:
skipped += 1
continue
# 2. for each target quality calculate the k value using binary search
t1 = time.time()
for quality in qualities:
k, kp = QualityUtil.findKOfQuality(totalCoordinates, float(quality)/100.0, x_scale, y_scale)
k_values.append(k)
k_ratios.append(kp)
# 3. write down the k_values / k_ratios of this keyword to csv file
# [keyword, frequency, k(0.7), k(0.75), ......, k(0.95)]
# [keyword, frequency, kp(0.7), kp(0.75), ......, kp(0.95)]
t2 = time.time()
csvWriter_k_values.writerow(k_values)
csvWriter_k_ratios.writerow(k_ratios)
t3 = time.time()
# 4. output time information to console
print keyword, \
", [query]", t1 - t0, \
", [k_values/ratios]", t2 - t1, \
", [write]", t3 - t2, \
", [total]", time.time() - t0, \
", [All]", time.time() - t, \
", [progress]", str(progress * 100 / len(keywords)) + '%'
return len(keywords) - skipped
print '================================================='
print ' ' + dbtype + ' Experiments - 4.3 K Values Generating'
print '================================================='
print 'table:', tableName
print 'frequency range:[', min_freq, ',', max_freq, ']'
print 'target qualities:', map(lambda quality: str(quality) + '%', qualities)
QualityUtil.printInfo(x_scale, y_scale)
print '-------------------------------------------------'
start = time.time()
count = generateKValues(min_freq, max_freq)
end = time.time()
print '================================================='
print ' ' + dbtype + ' Results - 4.2 K Values Generating'
print '================================================='
print 'table:', tableName
print 'frequency range:[', min_freq, ',', max_freq, ']'
print 'target qualities:', map(lambda quality: str(quality) + '%', qualities)
QualityUtil.printInfo(x_scale, y_scale)
print '-------------------------------------------------'
print 'Finished!', count, 'keywords processed,', end - start, 'seconds spent.'