-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathucsampling.py
More file actions
81 lines (46 loc) · 2 KB
/
ucsampling.py
File metadata and controls
81 lines (46 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
from sklearn.utils import shuffle
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import pickle as pk
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import entropy
from utils import savePickle,getPicklefile,mapping,read_OnlyTrainData,read_OnlyTestData
def TrainInitialModelSample():
train_data = read_OnlyTrainData(dropFileName=True);
train_data = train_data[:20]
X_train = train_data.drop('label',axis=1).values
y_train = train_data['label']
test_data = read_OnlyTestData(dropFileName=True,returnXy=False);
X_test_set = test_data.drop('label', axis=1).values
y_test_set = test_data['label']
svmClassifier = SVC(C=10, kernel='linear', gamma=0.001, probability=True, random_state=500156)
logRegClassifier = LogisticRegression(random_state=789)
rfClassifier = RandomForestClassifier(criterion='entropy', random_state=4528)
classifiers = {type(svmClassifier).__name__: svmClassifier,
type(logRegClassifier).__name__: logRegClassifier,
type(rfClassifier).__name__: rfClassifier,
}
#Train all 3 classifires with initial data samlples
experiments = []
scores = {}
for clfname,clf in classifiers.iteritems():
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test_set)
score = accuracy_score(y_pred,y_test_set)
pred_probs = clf.predict_proba(X_test_set)
modelObj = {}
modelObj['classifier_name'] = clfname
modelObj['acc_score'] = score
modelObj['pred_probs'] = pred_probs
modelObj['clf_obj'] = clf
scores[clfname] = score
#save models to pickle
savePickle(clf,clfname)
#print scores
#save initial scores to pickle
savePickle([scores[clfname]],clfname+'_scores')
experiments.append(modelObj)
return scores