Spaces:

lydiadida
/

speachdetection

Sleeping

App Files Files Community

speachdetection / app.py

lydiadida

Upload 3 files

408900f verified 8 months ago

raw

history blame

3.18 kB

	import streamlit as st
	from keras.models import load_model
	import nltk
	import re
	from nltk.tokenize import TweetTokenizer
	from tensorflow.keras.preprocessing.text import Tokenizer
	from tensorflow.keras.preprocessing.sequence import pad_sequences
	import subprocess
	import numpy as np

	# Download NLTK stopwords if not already downloaded
	try:
	nltk.data.find('corpora/stopwords')
	except LookupError:
	nltk.download('stopwords')

	# Additional imports
	from nltk.corpus import stopwords

	# Download NLTK punkt tokenizer if not already downloaded
	try:
	nltk.data.find('tokenizers/punkt/PY3/english.pickle')
	except LookupError:
	nltk.download('punkt')

	# Additional imports
	from nltk.tokenize import word_tokenize

	# Load the LSTM model
	model_path = "./my_model.h5" # Set your model path here

	def load_lstm_model(model_path):
	return load_model(model_path)



	def clean_text(text):
	# Remove stopwords
	stop_words = set(stopwords.words('english'))
	words = word_tokenize(text)
	filtered_words = [word for word in words if word not in stop_words]

	# Remove Twitter usernames
	text = re.sub(r'@\w+', '', ' '.join(filtered_words))

	# Remove URLs
	text = re.sub(r'http\S+', '', text)

	# Tokenize using TweetTokenizer
	tokenizer = TweetTokenizer(preserve_case=True)
	text = tokenizer.tokenize(text)

	# Remove hashtag symbols
	text = [word.replace('#', '') for word in text]

	# Remove short words
	text = ' '.join([word.lower() for word in text if len(word) > 2])

	# Remove digits
	text = re.sub(r'\d+', '', text)

	# Remove non-alphanumeric characters
	text = re.sub(r'[^a-zA-Z\s]', '', text)

	return text

	def preprocess_text(text):
	# Clean the text
	cleaned_text = clean_text(text)

	# Tokenize and pad sequences
	token = Tokenizer()
	token.fit_on_texts([cleaned_text])
	text_sequences = token.texts_to_sequences([cleaned_text])
	padded_sequences = pad_sequences(text_sequences, maxlen=100)

	return padded_sequences

	# Function to predict hate speech
	def predict_hate_speech(text, lstm_model):
	# Preprocess the text
	padded_sequences = preprocess_text(text)
	prediction = lstm_model.predict(padded_sequences)
	return prediction

	# Main function to run the Streamlit app
	def main():
	# Set up Streamlit UI
	st.title("Hate Speech Detection")
	st.write("Enter text below to detect hate speech:")
	input_text = st.text_area("Input Text", "")

	if st.button("Detect Hate Speech"):
	if input_text:
	# Load the model
	lstm_model = load_lstm_model(model_path)
	# Predict hate speech
	prediction = predict_hate_speech(input_text, lstm_model)
	# Convert the list to a numpy array
	arr = np.array(prediction[0])
	max_index = np.argmax(arr)
	if max_index == 1:
	#negative
	st.error("Hate Speech Detected")
	else:
	st.success("No Hate Speech Detected")
	else:
	st.warning("Please enter some text")

	# Run the app
	if __name__ == "__main__":
	main()