Spaces:

manish72
/

Disaster_tweet_sentiment_Analysis

Sleeping

App Files Files Community

manish72 commited on Apr 3

Commit

0bd26c3

•

1 Parent(s): 30e5590

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +1 -0
app.py +164 -0
images/Out1.png +0 -0
images/Out2.png +0 -0
images/t2.png +0 -0
model/tweet_model/fingerprint.pb +3 -0
model/tweet_model/keras_metadata.pb +3 -0
model/tweet_model/saved_model.pb +3 -0
model/tweet_model/variables/variables.data-00000-of-00001 +3 -0
model/tweet_model/variables/variables.index +0 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+model/tweet_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+# importing Libraries
+import streamlit as st
+import PIL
+from PIL import Image
+import tensorflow as tf
+from nltk.stem import WordNetLemmatizer
+from nltk.tokenize import RegexpTokenizer
+import re
+import string
+import numpy as np
+import pandas as pd
+import nltk
+try:                                                                         # Check if wordnet is installed
+    nltk.find("corpora/wordnet.zip")
+except LookupError:
+    nltk.download('wordnet')
+# ----------------------------------------------------------------------------------
+# read files
+try:
+    acronyms_dict, contractions_dict, stops
+except NameError:
+    acronyms_dict = pd.read_json("helper/acronym.json", typ = "series")
+    contractions_dict = pd.read_json("helper/contractions.json", typ = "series")
+    stops = list(pd.read_csv('helper/stopwords.csv').values.flatten())
+# ----------------------------------------------------------------------------------
+# Defining tokenizer
+regexp = RegexpTokenizer("[\w']+")
+# preprocess Function
+def preprocess(text):
+    text = text.lower()                                                                                        # lowercase
+    text = text.strip()                                                                                        # whitespaces
+    # Removing html tags
+    html = re.compile(r'<.*?>')
+    text = html.sub(r'', text)                                                                                 # html tags
+    # Removing emoji patterns
+    emoji_pattern = re.compile("["
+                           u"\U0001F600-\U0001F64F"  # emoticons
+                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
+                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
+                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
+                           u"\U00002702-\U000027B0"
+                           u"\U000024C2-\U0001F251"
+                           "]+", flags = re.UNICODE)
+    text = emoji_pattern.sub(r'', text)                                                                         # unicode char
+    # Removing urls
+    http = "https?://\S+|www\.\S+" # matching strings beginning with http (but not just "http")
+    pattern = r"({})".format(http) # creating pattern
+    text = re.sub(pattern, "", text)                                                                            # remove urls
+    # Removing twitter usernames
+    pattern = r'@[\w_]+'
+    text = re.sub(pattern, "", text)                                                                            # remove @twitter usernames
+    # Removing punctuations and numbers
+    punct_str = string.punctuation + string.digits
+    punct_str = punct_str.replace("'", "")
+    punct_str = punct_str.replace("-", "")
+    text = text.translate(str.maketrans('', '', punct_str))                                                     # punctuation and numbers
+    # Replacing "-" in text with empty space
+    text = text.replace("-", " ")                                                                               # "-"
+    # Substituting acronyms
+    words = []
+    for word in regexp.tokenize(text):
+        if word in acronyms_dict.index:
+            words = words + acronyms_dict[word].split()
+        else:
+            words = words + word.split()
+    text = ' '.join(words)                                                                                       # acronyms
+    # Substituting Contractions
+    words = []
+    for word in regexp.tokenize(text):
+        if word in contractions_dict.index:
+            words = words + contractions_dict[word].split()
+        else:
+            words = words + word.split()
+    text = " ".join(words)                                                                                       # contractions
+    punct_str = string.punctuation
+    text = text.translate(str.maketrans('', '', punct_str))                                                     # punctuation again to remove "'"
+    # lemmatization
+    lemmatizer = WordNetLemmatizer()
+    text = " ".join([lemmatizer.lemmatize(word) for word in regexp.tokenize(text)])                             # lemmatize
+    # Stopwords Removal
+    text = ' '.join([word for word in regexp.tokenize(text) if word not in stops])                              # stopwords
+    # Removing all characters except alphabets and " " (space)
+    filter = string.ascii_letters + " "
+    text = "".join([chr for chr in text if chr in filter])                                                      # remove all characters except alphabets and " " (space)
+    # Removing words with one alphabet occuring more than 3 times continuously
+    pattern = r'\b\w*?(.)\1{2,}\w*\b'
+    text = re.sub(pattern, "", text).strip()                                                                    # remove words with one alphabet occuring more than 3 times continuously
+    # Removing words with less than 3 characters
+    short_words = r'\b\w{1,2}\b'
+    text = re.sub(short_words, "", text)                                                                     # remove words with less than 3 characters
+    # return final output
+    return text
+# ================================================================================================================================================================
+                                                            # STREAMLIT
+# ================================================================================================================================================================
+# App Devolopment Starts
+st.set_page_config(layout="wide")
+st.write("# A Predictive Analysis of Disaster Tweets")
+img = Image.open("images/t2.png")
+st.image(img)
+tweet = st.text_input(label = "Type or paste your tweet here", value = "")
+# Defining a function to store the model in streamlit cache memory
+@st.cache_resource
+def cache_model(model_name):
+    model = tf.keras.models.load_model(model_name)
+    return model
+model = cache_model("model/tweet_model")                                            #--------------------------- model
+# if user gives any input
+if len(tweet) > 0:
+    clean_tweet = preprocess(tweet)                   # cleans tweet
+    y_pred = model.predict([clean_tweet])             # gives probability of class = 1
+    y_pred_num = int(np.round(y_pred)[0][0])          # get final prediction of output class
+    if y_pred_num == 0:
+        # st.write(f"#### Non-Disaster tweet with disaster probability {round(y_pred[0][0]*100, 4)}%")
+        st.write(f"#### 🌞🌞This tweet is not flagged as a disaster, but with a probability of {round(y_pred[0][0]*100, 4)}% that it might be. ")
+    else:
+        st.write(f"#### 🚩🚩High probability ( {round(y_pred[0][0]*100, 4)}%) indicates that this tweet is related to a disaster🚨🚨.")
+# ================================================================================================================================================================
+# --------------------------------------------------------------------  Example of Tweets  -----------------------------------------------------------------------
+# ================================================================================================================================================================
+# ---------------------------- Disaster Tweets -------------------------------
+# "🚨 Just felt a strong earthquake! Stay safe everyone! #earthquake #safetyfirst"  [93.62]
+# "⚠️ Urgent: Massive wildfire approaching our community. Evacuation orders in effect. Please heed warnings and evacuate immediately. #wildfire #safety"  [99.30]
+# "🌪️ Tornado warning in effect for our area. Take shelter now! #tornadowarning #safetyfirst"  [92.84]
+# "🌊 Coastal areas under tsunami alert. Seek higher ground immediately! #tsunami #emergencyalert"  [99.54]
+# ---------------------------- Non disaster Tweets -------------------------------
+# "Enjoying a peaceful evening with a good book and a cup of tea. #Relaxation"  [4.52]
+# "Excited for the weekend! Planning a movie night with friends. 🍿🎬 #FridayFeeling"  [3.27]
+# "Just finished a great workout session at the gym. Feeling energized! 💪 #FitnessGoals"  [6.17]
+# "Spent the day exploring a new hiking trail. Nature is so beautiful! 🌳 #OutdoorAdventure"  [19.44]
+# "Cooked a delicious homemade dinner tonight. #Foodie #HomeChef" [7.1]

images/Out1.png ADDED Viewed

images/Out2.png ADDED Viewed

images/t2.png ADDED Viewed

model/tweet_model/fingerprint.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:376041931ca2355913e3c847180a34408d8ffa2ebd01ddbd964f4fce7cb476d0
+size 57

model/tweet_model/keras_metadata.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dca4e0121922693287b38f14447d072fabd04e089cd6487964e6e5261e32ff9
+size 21175

model/tweet_model/saved_model.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecab5be566dc4fd13a897026c2e7ab549989858410f1a5fa1a091842a75e77dd
+size 10031550

model/tweet_model/variables/variables.data-00000-of-00001 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb76923e9a19466c1ef9aa371b9783c8218f05cdc91340af81ddcae1bae72689
+size 1029458023

model/tweet_model/variables/variables.index ADDED Viewed

Binary file (14.7 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+streamlit == 1.27.1
+Pillow == 9.4.0
+nltk == 3.8.1
+numpy == 1.24.3
+pandas == 2.0.3
+tensorflow  == 2.14.0
+regex == 2022.7.9