-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain.py
More file actions
38 lines (30 loc) · 1.4 KB
/
train.py
File metadata and controls
38 lines (30 loc) · 1.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import extract_unigram_feats
import pandas as pd
import pickle
import utils
df = pd.read_pickle('training.pkl')
pos_tweets = [(row['filtered_text'], 'pos') for index, row in df.loc[df['polarity'] == '4'].iterrows()]
neg_tweets = [(row['filtered_text'], 'neg') for index, row in df.loc[df['polarity'] == '0'].iterrows()]
all_tweets = pos_tweets + neg_tweets
analyzer = SentimentAnalyzer()
all_words = analyzer.all_words([doc for doc in all_tweets])
unigram_feats = analyzer.unigram_word_feats(all_words, min_freq=4)
analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
training_set = analyzer.apply_features(all_tweets)
classifier = analyzer.train(NaiveBayesClassifier.train, training_set)
print('done')
with open('analyzer.pkl', 'wb') as analyzer_file:
pickle.dump(analyzer, analyzer_file)
with open('classifier.pkl', 'wb') as classifier_file:
pickle.dump(classifier, classifier_file)
# while True:
# test_tweet = {'text': input()}
# utils.process(test_tweet)
# print(test_tweet['filtered_text'])
# test_set = analyzer.apply_features([(test_tweet['filtered_text'], '')])
# prob = classifier.prob_classify_many([doc[0] for doc in test_set])
# print("pos", prob[0].prob('pos'))
# print("neg", prob[0].prob('neg'))