SpamGuard-Python-Streamlit/main.py at main · ShahidAbas/SpamGuard-Python-Streamlit · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import streamlit as st
import pickle
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

ps = PorterStemmer()

# make a function which will work for all as mentioned above in heading 3

def transform_text(text):

    text = text.lower() # to sole the lower case

    text = nltk.word_tokenize(text) # to solve tokenization

    y = []
    for i in text:         #we noly keep the alphabate and alphanumerics
        if i.isalnum():    #special characters will be removed
            y.append(i)


    text = y [:]
    y.clear()             #clear the set
                          #removing stop words and punctuation

    for i in text:
        if i not in stopwords.words('english') and i not in string.punctuation:
            y.append(i)

    text = y[:]
    y.clear()

                         #clear the same meaning words only
                         #reurn base form of word
    for i in text:
        y.append(ps.stem(i))

                           #joining strings
    return " ".join(y)
#####

model = pickle.load(open('model_spam.pkl','rb'))
tfidf = pickle.load(open('vectorizer_spam.pkl','rb'))

st.title("SpamGaurdAi")

input_text = st.text_area("Enter the message")

if st.button('Predict'):

    #1. preprocess
    transformed_sms = transform_text(input_text)
    #2. vectorize
    vector_input = tfidf.transform([transformed_sms])
    #3. predict
    result = model.predict(vector_input)[0]
    #4. display
    if result == 1:

       st.header("Spam")

    else:

      st.header("Not Spam")