Projects/Sentiment Classifier at master · omaarsherif/Projects · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
def strip_punctuation(str1):
    for char in str1:
        if char in punctuation_chars:
            str1 = str1.replace(char,'')
    return str1

def get_pos(tweet):
    positive_counter = 0
    tweet = tweet.lower()
    tweet = tweet.split()
    for word in tweet:
        new_word = strip_punctuation(word)

        if new_word in positive_words:

            positive_counter +=1
    return positive_counter

def get_neg(tweet):
    negative_counter = 0
    tweet = tweet.lower()
    tweet = tweet.split()
    for word in tweet:
        new_word = strip_punctuation(word)

        if new_word in negative_words:

            negative_counter +=1
    return negative_counter

punctuation_chars = ["'", '"', ",", ".", "!", ":", ";", '#', '@']
resulting = []
# lists of words to use
positive_words = []
with open("positive_words.txt") as pos_f:
    for lin in pos_f:
        if lin[0] != ';' and lin[0] != '\n':
            positive_words.append(lin.strip())


negative_words = []
with open("negative_words.txt") as pos_f:
    for lin in pos_f:
        if lin[0] != ';' and lin[0] != '\n':
            negative_words.append(lin.strip())

with open("project_twitter_data.csv", 'r') as twitter_data:
    data = twitter_data.readlines()
    for line in data[1:]:
        index = 0
        line = line.strip().split(',')
        tweet, rts, replies = line
        pos = get_pos(tweet)
        neg = get_neg(tweet)
        overall = pos - neg
        resulting.append((str(rts), str(replies), str(pos), str(neg), str(overall)))
with open("resulting_data.csv", 'w') as resulting_data:
    header = 'Number of Retweets, Number of Replies, Positive Score, Negative Score, Net Score'
    resulting_data.write(header)
    resulting_data.write('\n')
    for result in resulting:
        row_string = ','.join(result)
        resulting_data.write(row_string)
        resulting_data.write('\n')