Spaces:
Runtime error
Runtime error
File size: 2,419 Bytes
9febd82 5424223 9febd82 c0a742f 3e18459 c0a742f 9febd82 5424223 9febd82 5424223 9febd82 673a35d 9febd82 5424223 9febd82 5424223 16ad1b3 7e0f5c4 9febd82 64e7b97 9febd82 5424223 9febd82 673a35d 9febd82 673a35d 9febd82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 6 20:56:08 2022
@author: Aziz Baran Kurtuluş
"""
import os
os.system('pip install nltk')
os.system('pip install scikit-learn')
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')
import streamlit as st
import joblib
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
site_header = st.container()
business_context = st.container()
data_desc = st.container()
performance = st.container()
tweet_input = st.container()
model_results = st.container()
sentiment_analysis = st.container()
contact = st.container()
with site_header:
st.title('Toxic Comment Detection')
with tweet_input:
st.header('Is Your Text Considered Toxic?')
st.write("""*Please note that this prediction is based on how the model was trained, so it may not be an accurate representation.*""")
user_text = st.text_input('Enter Text', max_chars=280)
with model_results:
st.subheader('Prediction:')
if user_text:
# processing user_text
# removing punctuation
user_text = re.sub('[%s]' % re.escape(string.punctuation), '', user_text)
# tokenizing
stop_words = set(stopwords.words('english'))
tokens = nltk.word_tokenize(user_text)
# removing stop words
stopwords_removed = [token.lower() for token in tokens if token.lower() not in stop_words]
# taking root word
lemmatizer = WordNetLemmatizer()
lemmatized_output = []
for word in stopwords_removed:
lemmatized_output.append(lemmatizer.lemmatize(word))
# instantiating tfidf vectorizor
tfidf = TfidfVectorizer(stop_words= 'english', ngram_range=(1,2))
X_train = joblib.load(open('resources/X_train.pickel', 'rb'))
X_test = lemmatized_output
X_train_count = tfidf.fit_transform(X_train)
X_test_count = tfidf.transform(X_test)
# loading in model
final_model = joblib.load(open('resources/final_bayes.pickel', 'rb'))
# applying the model to make predictions
prediction = final_model.predict(X_test_count[0])
if prediction == 0:
st.subheader('**Not Toxic**')
else:
st.subheader('**Toxic**')
st.text('')
|