Puyush commited on
Commit
0197d35
1 Parent(s): a844ecc

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -0
app.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import nltk
3
+ import keras
4
+ import spacy
5
+ import string
6
+ import pickle
7
+ import tempfile
8
+ import numpy as np
9
+ import gradio as gr
10
+ import contractions
11
+ import tensorflow as tf
12
+ from nltk.stem import WordNetLemmatizer
13
+ from nltk.tokenize import word_tokenize
14
+ from nltk.corpus import stopwords, wordnet
15
+ from tensorflow.keras.layers import Layer
16
+ from tensorflow.keras import backend as K
17
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
18
+
19
+
20
+ class Attention(Layer):
21
+
22
+ def __init__(self, return_sequences=True, **kwargs):
23
+ self.return_sequences = return_sequences
24
+ super(Attention, self).__init__(**kwargs)
25
+
26
+ def build(self, input_shape):
27
+
28
+ self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
29
+ initializer="normal")
30
+ self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
31
+ initializer="zeros")
32
+
33
+ super(Attention,self).build(input_shape)
34
+
35
+ def call(self, x):
36
+
37
+ e = K.tanh(K.dot(x,self.W)+self.b)
38
+ a = K.softmax(e, axis=1)
39
+ output = x*a
40
+
41
+ if self.return_sequences:
42
+ return output
43
+
44
+ return K.sum(output, axis=1)
45
+
46
+
47
+
48
+ def load_tokenizer(path):
49
+ with open(path, 'rb') as f:
50
+ tokenizer = pickle.load(f)
51
+ return tokenizer
52
+
53
+
54
+ def cleaning(text):
55
+ nlp = spacy.load('en_core_web_sm')
56
+ # Punctuation symbols to remove
57
+ exclude = string.punctuation
58
+
59
+ def expand_contractions(text): return contractions.fix(text)
60
+ text = expand_contractions(text)
61
+
62
+ text = text.lower()
63
+
64
+ def remove_tags(text): return re.sub(r'@\w*', ' ' , text)
65
+ text = remove_tags(text)
66
+
67
+ def remove_hashtags(text): return re.sub(r'#\w*', ' ' , text)
68
+ text = remove_hashtags(text)
69
+
70
+ def remove_apostrophe(text): return re.sub(r"'s\b", "", text)
71
+ text = remove_apostrophe(text)
72
+
73
+ def remove_special_chars(text): return re.sub(r"[^a-zA-Z0-9\s]", ' ', text)
74
+ text = remove_special_chars(text)
75
+
76
+ def remove_number(text): return re.sub(r'[\d]', ' ', text)
77
+ text = remove_number(text)
78
+
79
+ def remove_punc(text): return ''.join([c for c in text if c not in exclude])
80
+ text = remove_punc(text)
81
+
82
+ def remove_extra_spaces(text): return re.sub('^\S', ' ', text)
83
+ text = remove_extra_spaces(text)
84
+
85
+ def map_pos_tags(pos_tags):
86
+ # Map NLTK POS tags to WordNet tags
87
+ tag_map = {
88
+ 'N': wordnet.NOUN,
89
+ 'V': wordnet.VERB,
90
+ 'R': wordnet.ADV,
91
+ 'J': wordnet.ADJ
92
+ }
93
+
94
+ mapped_tags = []
95
+ for token, tag in pos_tags:
96
+ mapped_tag = tag[0].upper()
97
+ if mapped_tag in tag_map:
98
+ mapped_tag = tag_map[mapped_tag]
99
+ else:
100
+ mapped_tag = wordnet.NOUN # Default to noun if no mapping found
101
+ mapped_tags.append(mapped_tag)
102
+
103
+ return mapped_tags
104
+
105
+ def remove_stopwords(text):
106
+ stop_words = set(stopwords.words('english'))
107
+ tokens = word_tokenize(text)
108
+ filtered_text = [word for word in tokens if word.lower() not in stop_words]
109
+ return ' '.join(filtered_text)
110
+ text = remove_stopwords(text)
111
+
112
+ def pos_tag_and_lemmatize(text):
113
+ tokens = word_tokenize(text)
114
+ pos_tags = nltk.pos_tag(tokens)
115
+
116
+ # Map POS tags to WordNet tags
117
+ wordnet_tags = map_pos_tags(pos_tags)
118
+
119
+ # Lemmatize based on POS tags
120
+ lemmatizer = WordNetLemmatizer()
121
+ lemmas = " ".join([lemmatizer.lemmatize(token, tag) for token, tag in zip(tokens, wordnet_tags)])
122
+
123
+ return lemmas
124
+ text = pos_tag_and_lemmatize(text)
125
+
126
+ return text
127
+
128
+
129
+ def label_tweet(test_review):
130
+ token_list = tokenizer.texts_to_sequences([test_review])[0]
131
+ token_list = pad_sequences([token_list], maxlen=44, padding='post')
132
+ predicted = model.predict(token_list, verbose=0)
133
+ if predicted >= 0.5:
134
+ return 1
135
+ else:
136
+ return 0
137
+
138
+
139
+ def analyze_text(comment):
140
+ comment = cleaning(comment)
141
+ result = label_tweet(comment)
142
+ if result == 0:
143
+ text = "Negative"
144
+ else:
145
+ text = "Positive"
146
+ return text
147
+
148
+
149
+ # It can be used to reconstruct the model identically.
150
+ model = keras.models.load_model("twitter_sentiment.keras",
151
+ custom_objects={'Attention': Attention})
152
+
153
+ # Load tokenizer
154
+ tokenizer = load_tokenizer('tokenizer.pkl')
155
+
156
+ interface = gr.Interface(fn=analyze_text, inputs=gr.inputs.Textbox(lines=2, placeholder='Enter a positive or negative tweet here...'),
157
+ outputs='text',title='Twitter Sentimental Analysis', theme='darkhuggingface')
158
+ interface.launch(inline=False)