Tuyet3005 commited on
Commit
2116269
1 Parent(s): a683b93

Upload 5 files

Browse files
bert-sentiment-detection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
dataset/reviews.csv ADDED
The diff for this file is too large to render. See raw diff
 
streamlit_app.py/Homepage.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(
4
+ page_title="Sentiment Detection"
5
+ )
6
+
7
+ st.title("Sentiment Detection")
8
+
9
+ st.sidebar.success("Select a page above.")
10
+
11
+ st.header("The Need for Sentiment Detection")
12
+ st.text("""
13
+ Spam detection algorithms are used to detect and filter junk and spam emails with a high level of accuracy.
14
+ It is said that around half of all emails are spam, depending on the user. These emails can include scams or viruses intended to cause harm.
15
+ """)
16
+
17
+ st.header("Data Source")
18
+ st.text("""
19
+ Data Source: Preprocessed TREC 2007 Public Corpus Dataset.
20
+ Link: https://www.kaggle.com/datasets/imdeepmind/preprocessed-trec-2007-public-corpus-dataset
21
+
22
+ """)
streamlit_app.py/pages/SentimentDetection.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import path
2
+ import streamlit as st
3
+
4
+ # import pickle
5
+
6
+ # from tensorflow import keras
7
+ import tensorflow as tf
8
+ import torch
9
+ from torch import nn
10
+ from transformers import BertModel, BertTokenizer
11
+
12
+
13
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
+ MODEL_NAME = 'bert-base-cased'
15
+
16
+ # Build the Sentiment Classifier class
17
+ class SentimentClassifier(nn.Module):
18
+
19
+ # Constructor class
20
+ def __init__(self, n_classes):
21
+ super(SentimentClassifier, self).__init__()
22
+ self.bert = BertModel.from_pretrained(MODEL_NAME)
23
+ self.drop = nn.Dropout(p=0.3)
24
+ self.out = nn.Linear(self.bert.config.hidden_size, n_classes)
25
+
26
+ # Forward propagaion class
27
+ def forward(self, input_ids, attention_mask):
28
+ _, pooled_output = self.bert(
29
+ input_ids=input_ids,
30
+ attention_mask=attention_mask,
31
+ return_dict=False
32
+ )
33
+ # Add a dropout layer
34
+ output = self.drop(pooled_output)
35
+ return self.out(output)
36
+
37
+ # from keras_preprocessing.sequence import pad_sequences
38
+
39
+
40
+ # def predict(ham_spam):
41
+ # model = load_model(r'test_HSmodel_r.h5')
42
+ # with open('tokenizer.pickle','rb') as handle:
43
+ # tokenizer = pickle.load(handle)
44
+ # tokenizer.fit_on_texts(ham_spam)
45
+ # x_1 = tokenizer.texts_to_sequences([ham_spam])
46
+ # x_1 = pad_sequences(x_1, maxlen=525)
47
+ # predictions = model.predict(x_1)[0][0]
48
+ # return predictions
49
+
50
+ MODEL_PATH = path.join(path.dirname(__file__), "bert_model.h5")
51
+
52
+
53
+ @st.cache_resource
54
+ def load_model_and_tokenizer():
55
+ model = SentimentClassifier(3)
56
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))
57
+ model.eval()
58
+ return model, BertTokenizer.from_pretrained('bert-base-cased')
59
+
60
+
61
+ def predict(content):
62
+ model, tokenizer = load_model_and_tokenizer()
63
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
64
+
65
+ encoded_review = tokenizer.encode_plus(
66
+ content,
67
+ max_length=160,
68
+ add_special_tokens=True,
69
+ return_token_type_ids=False,
70
+ pad_to_max_length=True,
71
+ return_attention_mask=True,
72
+ return_tensors="pt",
73
+ )
74
+
75
+ input_ids = encoded_review["input_ids"].to(device)
76
+ attention_mask = encoded_review["attention_mask"].to(device)
77
+
78
+ output = model(input_ids, attention_mask)
79
+ _, prediction = torch.max(output, dim=1)
80
+
81
+ class_names = ["negative", "neutral", "positive"]
82
+
83
+ return class_names[prediction]
84
+
85
+
86
+ def main():
87
+ # giving a title to our page
88
+ st.title("Sentiment detection")
89
+ contents = st.text_area("Please enter reviews/sentiment/setences/contents:")
90
+
91
+ prediction = ""
92
+
93
+ # Create a prediction button
94
+ if st.button("Analyze Spam Detection Result"):
95
+ prediction = predict(contents)
96
+
97
+ if prediction < 0.5:
98
+ st.success(prediction)
99
+ elif prediction > 0.5:
100
+ st.success(prediction)
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
streamlit_app.py/pages/bert_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0e320ff87ab99bcb76ed153f14d6973dfa7bd1570d022c6d8bee1e496323e7
3
+ size 433339657