vm24's picture
Create app.py
e953d9a verified
raw
history blame
2.63 kB
import gradio as gr
import pandas as pd
import numpy as np
import re
import nltk
import string
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from transformers import pipeline
# Download NLTK resources
nltk.download('stopwords')
stopword = set(stopwords.words('english'))
stemmer = SnowballStemmer("english")
# Load the dataset
data = pd.read_csv("commentdataset.csv")
# Labelling the data set with classifier classes according to which classifications has to perform
data["labels"] = data["class"].map({0: "Offensive Language", 1: "Abusive comments", 2: "No Abusive and Offensive"})
data = data[["comments", "labels"]]
# Clean data function
def clean(text):
text = str(text).lower()
text = re.sub(r"she's", "she is", text)
text = re.sub(r"it's", "it is", text)
text = re.sub(r"that's", "that is", text)
text = re.sub(r"what's", "that is", text)
text = re.sub(r"where's", "where is", text)
text = re.sub(r"how's", "how is", text)
text = re.sub(r"'ll", " will", text)
text = re.sub(r"'ve", " have", text)
text = re.sub(r"'re", " are", text)
text = re.sub(r"i'm", "i am", text)
text = re.sub(r"r", "", text)
text = re.sub(r"he's", "he is", text)
text = re.sub(r"'d", " would", text)
text = re.sub(r"won't", "will not", text)
text = re.sub(r"can't", "cannot", text)
text = re.sub(r"n't", " not", text)
text = re.sub(r"n'", "ng", text)
text = re.sub(r"'bout", "about", text)
text = re.sub(r"'til", "until", text)
text = re.sub('\[.*?\]', '', text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
text = [word for word in text.split(' ') if word not in stopword]
text = " ".join(text)
text = [stemmer.stem(word) for word in text.split(' ')]
text = " ".join(text)
return text
data["comments"] = data["comments"].apply(clean)
# Using a pre-trained transformer model for sentiment analysis
sentiment_pipeline = pipeline("sentiment-analysis")
# Function to classify comments
def classify_comment(comment):
cleaned_comment = clean(comment)
prediction = sentiment_pipeline(cleaned_comment)
label = prediction[0]['label']
return label
comment_input = gr.Textbox(label="Enter a comment")
classification_output = gr.Label()
# Create the Gradio interface
interface = gr.Interface(fn=classify_comment, inputs=comment_input, outputs=classification_output, title="Comment Classifier")
interface.launch()