-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtuto_5.py
More file actions
35 lines (26 loc) · 1.02 KB
/
tuto_5.py
File metadata and controls
35 lines (26 loc) · 1.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Impletement a Naive Bayes classifier to categorize text
from sklearn import datasets, metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
import matplotlib.pyplot as plt
from pandas_ml import ConfusionMatrix
newsgroups_train = datasets.fetch_20newsgroups(subset='train')
newsgroups_test = datasets.fetch_20newsgroups(subset='test')
print(newsgroups_train.keys())
print(newsgroups_train.data[:3])
print(newsgroups_train.target[:3])
print(newsgroups_train.target_names)
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(newsgroups_train.data)
X_test = vectorizer.transform(newsgroups_test.data)
y_train = newsgroups_train.target
y_test = newsgroups_test.target
model = MultinomialNB()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print(model.score(X_test, y_test))
print(metrics.classification_report(y_test, predictions))
labels = list(newsgroups_train.target_names)
cm = ConfusionMatrix(y_test, predictions, labels)
cm.plot()
plt.show()