|
import streamlit as st |
|
from keras.models import load_model |
|
import nltk |
|
import re |
|
from nltk.tokenize import TweetTokenizer |
|
from tensorflow.keras.preprocessing.text import Tokenizer |
|
from tensorflow.keras.preprocessing.sequence import pad_sequences |
|
import subprocess |
|
import numpy as np |
|
|
|
|
|
try: |
|
nltk.data.find('corpora/stopwords') |
|
except LookupError: |
|
nltk.download('stopwords') |
|
|
|
|
|
from nltk.corpus import stopwords |
|
|
|
|
|
try: |
|
nltk.data.find('tokenizers/punkt/PY3/english.pickle') |
|
except LookupError: |
|
nltk.download('punkt') |
|
|
|
|
|
from nltk.tokenize import word_tokenize |
|
|
|
|
|
model_path = "./my_model.h5" |
|
|
|
def load_lstm_model(model_path): |
|
return load_model(model_path) |
|
|
|
|
|
|
|
def clean_text(text): |
|
|
|
stop_words = set(stopwords.words('english')) |
|
words = word_tokenize(text) |
|
filtered_words = [word for word in words if word not in stop_words] |
|
|
|
|
|
text = re.sub(r'@\w+', '', ' '.join(filtered_words)) |
|
|
|
|
|
text = re.sub(r'http\S+', '', text) |
|
|
|
|
|
tokenizer = TweetTokenizer(preserve_case=True) |
|
text = tokenizer.tokenize(text) |
|
|
|
|
|
text = [word.replace('#', '') for word in text] |
|
|
|
|
|
text = ' '.join([word.lower() for word in text if len(word) > 2]) |
|
|
|
|
|
text = re.sub(r'\d+', '', text) |
|
|
|
|
|
text = re.sub(r'[^a-zA-Z\s]', '', text) |
|
|
|
return text |
|
|
|
def preprocess_text(text): |
|
|
|
cleaned_text = clean_text(text) |
|
|
|
|
|
token = Tokenizer() |
|
token.fit_on_texts([cleaned_text]) |
|
text_sequences = token.texts_to_sequences([cleaned_text]) |
|
padded_sequences = pad_sequences(text_sequences, maxlen=100) |
|
|
|
return padded_sequences |
|
|
|
|
|
def predict_hate_speech(text, lstm_model): |
|
|
|
padded_sequences = preprocess_text(text) |
|
prediction = lstm_model.predict(padded_sequences) |
|
return prediction |
|
|
|
|
|
def main(): |
|
|
|
st.title("Hate Speech Detection") |
|
st.write("Enter text below to detect hate speech:") |
|
input_text = st.text_area("Input Text", "") |
|
|
|
if st.button("Detect Hate Speech"): |
|
if input_text: |
|
|
|
lstm_model = load_lstm_model(model_path) |
|
|
|
prediction = predict_hate_speech(input_text, lstm_model) |
|
|
|
arr = np.array(prediction[0]) |
|
max_index = np.argmax(arr) |
|
if max_index == 1: |
|
|
|
st.error("Hate Speech Detected") |
|
else: |
|
st.success("No Hate Speech Detected") |
|
else: |
|
st.warning("Please enter some text") |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|