-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreadData.py
More file actions
77 lines (66 loc) · 2.18 KB
/
readData.py
File metadata and controls
77 lines (66 loc) · 2.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import random
import os
def bacaData(n, jumlah_data, randomFile=False) :
train_data = []
train_labels = []
test_data = []
test_labels = []
#Baca Nama File Di Folder
fNamePos = []
for file in os.listdir("./tes/pos"):
fNamePos.append("pos/"+file)
fNameNeg = []
for file in os.listdir("./tes/neg"):
fNameNeg.append("neg/"+file)
#Fungsi Limit Data
try :
if jumlah_data >= 0 :
tmp_pos = fNamePos
tmp_neg = fNameNeg
jumlah_data = jumlah_data / 2
if jumlah_data <= 0 :
fNamePos = tmp_pos
fNameNeg = tmp_neg
fNamePos = fNamePos[:int(jumlah_data)]
fNameNeg = fNameNeg[:int(jumlah_data)]
except :
fNamePos = tmp_pos
fNameNeg = tmp_neg
print("Jumlah data yang digunakan " + str(len(fNamePos) + len(fNameNeg)))
# Bagi Data, n% untuk data training, sisanya data testing
fNameTrain = []
fNameTest = []
if randomFile :
i = int(len(fNamePos+fNameNeg) * (n / 100))
tmp = fNamePos + fNameNeg
random.shuffle(tmp)
fNameTrain.extend(tmp[:i])
fNameTest.extend(tmp[i:])
else :
i = int(len(fNamePos)*(n/100))
fNameTrain.extend(fNamePos[:i])
fNameTest.extend(fNamePos[i:])
i = int(len(fNameNeg) *(n/100))
fNameTrain.extend(fNameNeg[:i])
fNameTest.extend(fNameNeg[i:])
if fNameTrain == [] or fNameTest == [] :
fNameTrain = fNameTrain + fNameTest
#Baca File Data Training
for fname in fNameTrain :
with open("./polarity/"+fname) as f:
contents = f.readlines()
content = ""
for line in contents :
content+=line
train_data.append(content)
train_labels.append(fname[:3])
# Baca File Data Testing
for fname in fNameTest:
with open("./polarity/" + fname) as f:
contents = f.readlines()
content = ""
for line in contents :
content+=line
test_data.append(content)
test_labels.append(fname[:3])
return train_data, train_labels, test_data, test_labels