manish72 commited on
Commit
0bd26c3
1 Parent(s): 30e5590

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model/tweet_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # importing Libraries
2
+
3
+ import streamlit as st
4
+ import PIL
5
+ from PIL import Image
6
+ import tensorflow as tf
7
+ from nltk.stem import WordNetLemmatizer
8
+ from nltk.tokenize import RegexpTokenizer
9
+ import re
10
+ import string
11
+ import numpy as np
12
+ import pandas as pd
13
+ import nltk
14
+
15
+ try: # Check if wordnet is installed
16
+ nltk.find("corpora/wordnet.zip")
17
+ except LookupError:
18
+ nltk.download('wordnet')
19
+
20
+ # ----------------------------------------------------------------------------------
21
+ # read files
22
+ try:
23
+ acronyms_dict, contractions_dict, stops
24
+ except NameError:
25
+ acronyms_dict = pd.read_json("helper/acronym.json", typ = "series")
26
+ contractions_dict = pd.read_json("helper/contractions.json", typ = "series")
27
+ stops = list(pd.read_csv('helper/stopwords.csv').values.flatten())
28
+
29
+ # ----------------------------------------------------------------------------------
30
+ # Defining tokenizer
31
+ regexp = RegexpTokenizer("[\w']+")
32
+
33
+ # preprocess Function
34
+ def preprocess(text):
35
+
36
+ text = text.lower() # lowercase
37
+ text = text.strip() # whitespaces
38
+
39
+ # Removing html tags
40
+ html = re.compile(r'<.*?>')
41
+ text = html.sub(r'', text) # html tags
42
+
43
+ # Removing emoji patterns
44
+ emoji_pattern = re.compile("["
45
+ u"\U0001F600-\U0001F64F" # emoticons
46
+ u"\U0001F300-\U0001F5FF" # symbols & pictographs
47
+ u"\U0001F680-\U0001F6FF" # transport & map symbols
48
+ u"\U0001F1E0-\U0001F1FF" # flags (iOS)
49
+ u"\U00002702-\U000027B0"
50
+ u"\U000024C2-\U0001F251"
51
+ "]+", flags = re.UNICODE)
52
+ text = emoji_pattern.sub(r'', text) # unicode char
53
+
54
+ # Removing urls
55
+ http = "https?://\S+|www\.\S+" # matching strings beginning with http (but not just "http")
56
+ pattern = r"({})".format(http) # creating pattern
57
+ text = re.sub(pattern, "", text) # remove urls
58
+
59
+ # Removing twitter usernames
60
+ pattern = r'@[\w_]+'
61
+ text = re.sub(pattern, "", text) # remove @twitter usernames
62
+
63
+ # Removing punctuations and numbers
64
+ punct_str = string.punctuation + string.digits
65
+ punct_str = punct_str.replace("'", "")
66
+ punct_str = punct_str.replace("-", "")
67
+ text = text.translate(str.maketrans('', '', punct_str)) # punctuation and numbers
68
+
69
+ # Replacing "-" in text with empty space
70
+ text = text.replace("-", " ") # "-"
71
+
72
+ # Substituting acronyms
73
+ words = []
74
+ for word in regexp.tokenize(text):
75
+ if word in acronyms_dict.index:
76
+ words = words + acronyms_dict[word].split()
77
+ else:
78
+ words = words + word.split()
79
+ text = ' '.join(words) # acronyms
80
+
81
+ # Substituting Contractions
82
+ words = []
83
+ for word in regexp.tokenize(text):
84
+ if word in contractions_dict.index:
85
+ words = words + contractions_dict[word].split()
86
+ else:
87
+ words = words + word.split()
88
+ text = " ".join(words) # contractions
89
+
90
+ punct_str = string.punctuation
91
+ text = text.translate(str.maketrans('', '', punct_str)) # punctuation again to remove "'"
92
+
93
+ # lemmatization
94
+ lemmatizer = WordNetLemmatizer()
95
+ text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)]) # lemmatize
96
+
97
+ # Stopwords Removal
98
+ text = ' '.join([word for word in regexp.tokenize(text) if word not in stops]) # stopwords
99
+
100
+ # Removing all characters except alphabets and " " (space)
101
+ filter = string.ascii_letters + " "
102
+ text = "".join([chr for chr in text if chr in filter]) # remove all characters except alphabets and " " (space)
103
+
104
+ # Removing words with one alphabet occuring more than 3 times continuously
105
+ pattern = r'\b\w*?(.)\1{2,}\w*\b'
106
+ text = re.sub(pattern, "", text).strip() # remove words with one alphabet occuring more than 3 times continuously
107
+
108
+ # Removing words with less than 3 characters
109
+ short_words = r'\b\w{1,2}\b'
110
+ text = re.sub(short_words, "", text) # remove words with less than 3 characters
111
+
112
+ # return final output
113
+ return text
114
+
115
+ # ================================================================================================================================================================
116
+ # STREAMLIT
117
+ # ================================================================================================================================================================
118
+
119
+ # App Devolopment Starts
120
+ st.set_page_config(layout="wide")
121
+ st.write("# A Predictive Analysis of Disaster Tweets")
122
+
123
+ img = Image.open("images/t2.png")
124
+ st.image(img)
125
+
126
+ tweet = st.text_input(label = "Type or paste your tweet here", value = "")
127
+
128
+ # Defining a function to store the model in streamlit cache memory
129
+ @st.cache_resource
130
+ def cache_model(model_name):
131
+ model = tf.keras.models.load_model(model_name)
132
+ return model
133
+
134
+ model = cache_model("model/tweet_model") #--------------------------- model
135
+
136
+ # if user gives any input
137
+ if len(tweet) > 0:
138
+ clean_tweet = preprocess(tweet) # cleans tweet
139
+ y_pred = model.predict([clean_tweet]) # gives probability of class = 1
140
+ y_pred_num = int(np.round(y_pred)[0][0]) # get final prediction of output class
141
+
142
+ if y_pred_num == 0:
143
+ # st.write(f"#### Non-Disaster tweet with disaster probability {round(y_pred[0][0]*100, 4)}%")
144
+ st.write(f"#### 🌞🌞This tweet is not flagged as a disaster, but with a probability of {round(y_pred[0][0]*100, 4)}% that it might be. ")
145
+ else:
146
+ st.write(f"#### 🚩🚩High probability ( {round(y_pred[0][0]*100, 4)}%) indicates that this tweet is related to a disaster🚨🚨.")
147
+
148
+ # ================================================================================================================================================================
149
+ # -------------------------------------------------------------------- Example of Tweets -----------------------------------------------------------------------
150
+ # ================================================================================================================================================================
151
+
152
+ # ---------------------------- Disaster Tweets -------------------------------
153
+ # "🚨 Just felt a strong earthquake! Stay safe everyone! #earthquake #safetyfirst" [93.62]
154
+ # "⚠️ Urgent: Massive wildfire approaching our community. Evacuation orders in effect. Please heed warnings and evacuate immediately. #wildfire #safety" [99.30]
155
+ # "🌪️ Tornado warning in effect for our area. Take shelter now! #tornadowarning #safetyfirst" [92.84]
156
+ # "🌊 Coastal areas under tsunami alert. Seek higher ground immediately! #tsunami #emergencyalert" [99.54]
157
+
158
+
159
+ # ---------------------------- Non disaster Tweets -------------------------------
160
+ # "Enjoying a peaceful evening with a good book and a cup of tea. #Relaxation" [4.52]
161
+ # "Excited for the weekend! Planning a movie night with friends. 🍿🎬 #FridayFeeling" [3.27]
162
+ # "Just finished a great workout session at the gym. Feeling energized! 💪 #FitnessGoals" [6.17]
163
+ # "Spent the day exploring a new hiking trail. Nature is so beautiful! 🌳 #OutdoorAdventure" [19.44]
164
+ # "Cooked a delicious homemade dinner tonight. #Foodie #HomeChef" [7.1]
images/Out1.png ADDED
images/Out2.png ADDED
images/t2.png ADDED
model/tweet_model/fingerprint.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376041931ca2355913e3c847180a34408d8ffa2ebd01ddbd964f4fce7cb476d0
3
+ size 57
model/tweet_model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dca4e0121922693287b38f14447d072fabd04e089cd6487964e6e5261e32ff9
3
+ size 21175
model/tweet_model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecab5be566dc4fd13a897026c2e7ab549989858410f1a5fa1a091842a75e77dd
3
+ size 10031550
model/tweet_model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb76923e9a19466c1ef9aa371b9783c8218f05cdc91340af81ddcae1bae72689
3
+ size 1029458023
model/tweet_model/variables/variables.index ADDED
Binary file (14.7 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ streamlit == 1.27.1
2
+ Pillow == 9.4.0
3
+ nltk == 3.8.1
4
+ numpy == 1.24.3
5
+ pandas == 2.0.3
6
+ tensorflow == 2.14.0
7
+ regex == 2022.7.9