billusanda007's picture
Update app.py
3880b3d
import streamlit as st
import joblib
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
# Import necessary libraries
import re
import nltk
from urllib.parse import urlparse
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# Initialize NLTK resources
nltk.download('omw-1.4')
nltk.download('wordnet')
nltk.download('wordnet2022')
nltk.download('punkt')
nltk.download('stopwords')
stop_words = set(stopwords.words("english")) # Create a set of English stopwords
lemmatizer = WordNetLemmatizer() # Initialize the WordNet Lemmatizer
# Define a function for text processing
def textProcess(sent):
try:
if sent is None: # Check if the input is None
return "" # Return an empty string if input is None
# Remove square brackets, parentheses, and other special characters
sent = re.sub('[][)(]', ' ', sent)
# Tokenize the text into words
sent = [word for word in sent.split() if not urlparse(word).scheme]
# Join the words back into a sentence
sent = ' '.join(sent)
# Remove Twitter usernames (words starting with @)
sent = re.sub(r'\@\w+', '', sent)
# Remove HTML tags using regular expression
sent = re.sub(re.compile("<.*?>"), '', sent)
# Remove non-alphanumeric characters (keep only letters and numbers)
sent = re.sub("[^A-Za-z0-9]", ' ', sent)
# Convert text to lowercase
sent = sent.lower()
# Split the text into words, strip whitespace, and join them back into a sentence
sent = [word.strip() for word in sent.split()]
sent = ' '.join(sent)
# Tokenize the text again
tokens = word_tokenize(sent)
# Remove stop words
for word in tokens.copy():
if word in stop_words:
tokens.remove(word)
# Lemmatize the remaining words
sent = [lemmatizer.lemmatize(word) for word in tokens]
# Join the lemmatized words back into a sentence
sent = ' '.join(sent)
# Return the processed text
return sent
except Exception as ex:
print(sent, "\n")
print("Error ", ex)
return "" # Return an empty string in case of an error
# Rest of your code...
# Load the pre-trained model from joblib
model = joblib.load('Stress identification NLP')
# Load the TF-IDF vectorizer used during training
tfidf_vectorizer = joblib.load('tfidf_vectorizer.joblib')
# Define the Streamlit web app
def main():
st.title("Stress Predictor Web App")
st.write("Enter some text to predict if the person is in stress or not.")
# Input text box
user_input = st.text_area("Enter text here:")
if st.button("Predict"):
if user_input:
# Process the input text
processed_text = textProcess(user_input)
# Use the same TF-IDF vectorizer to transform the input text
tfidf_text = tfidf_vectorizer.transform([processed_text])
# Make predictions using the loaded model
prediction = model.predict(tfidf_text)[0]
if prediction == 1:
result = "This person is in stress."
else:
result = "This person is not in stress."
st.write(result)
if __name__ == '__main__':
main()