-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSentiment Classifier
More file actions
65 lines (54 loc) · 1.88 KB
/
Sentiment Classifier
File metadata and controls
65 lines (54 loc) · 1.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def strip_punctuation(str1):
for char in str1:
if char in punctuation_chars:
str1 = str1.replace(char,'')
return str1
def get_pos(tweet):
positive_counter = 0
tweet = tweet.lower()
tweet = tweet.split()
for word in tweet:
new_word = strip_punctuation(word)
if new_word in positive_words:
positive_counter +=1
return positive_counter
def get_neg(tweet):
negative_counter = 0
tweet = tweet.lower()
tweet = tweet.split()
for word in tweet:
new_word = strip_punctuation(word)
if new_word in negative_words:
negative_counter +=1
return negative_counter
punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '@']
resulting = []
# lists of words to use
positive_words = []
with open("positive_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
positive_words.append(lin.strip())
negative_words = []
with open("negative_words.txt") as pos_f:
for lin in pos_f:
if lin[0] != ';' and lin[0] != '\n':
negative_words.append(lin.strip())
with open("project_twitter_data.csv", 'r') as twitter_data:
data = twitter_data.readlines()
for line in data[1:]:
index = 0
line = line.strip().split(',')
tweet, rts, replies = line
pos = get_pos(tweet)
neg = get_neg(tweet)
overall = pos - neg
resulting.append((str(rts), str(replies), str(pos), str(neg), str(overall)))
with open("resulting_data.csv", 'w') as resulting_data:
header = 'Number of Retweets, Number of Replies, Positive Score, Negative Score, Net Score'
resulting_data.write(header)
resulting_data.write('\n')
for result in resulting:
row_string = ','.join(result)
resulting_data.write(row_string)
resulting_data.write('\n')