Spaces:

vm24
/

comment_classifier

Runtime error

App Files Files Community

comment_classifier / app.py

vm24

Create app.py

e953d9a verified 4 months ago

raw

history blame

2.63 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import re
	import nltk
	import string
	from nltk.corpus import stopwords
	from nltk.stem import SnowballStemmer
	from transformers import pipeline

	# Download NLTK resources
	nltk.download('stopwords')
	stopword = set(stopwords.words('english'))
	stemmer = SnowballStemmer("english")

	# Load the dataset
	data = pd.read_csv("commentdataset.csv")

	# Labelling the data set with classifier classes according to which classifications has to perform
	data["labels"] = data["class"].map({0: "Offensive Language", 1: "Abusive comments", 2: "No Abusive and Offensive"})
	data = data[["comments", "labels"]]

	# Clean data function
	def clean(text):
	text = str(text).lower()
	text = re.sub(r"she's", "she is", text)
	text = re.sub(r"it's", "it is", text)
	text = re.sub(r"that's", "that is", text)
	text = re.sub(r"what's", "that is", text)
	text = re.sub(r"where's", "where is", text)
	text = re.sub(r"how's", "how is", text)
	text = re.sub(r"'ll", " will", text)
	text = re.sub(r"'ve", " have", text)
	text = re.sub(r"'re", " are", text)
	text = re.sub(r"i'm", "i am", text)
	text = re.sub(r"r", "", text)
	text = re.sub(r"he's", "he is", text)
	text = re.sub(r"'d", " would", text)
	text = re.sub(r"won't", "will not", text)
	text = re.sub(r"can't", "cannot", text)
	text = re.sub(r"n't", " not", text)
	text = re.sub(r"n'", "ng", text)
	text = re.sub(r"'bout", "about", text)
	text = re.sub(r"'til", "until", text)
	text = re.sub('\[.*?\]', '', text)
	text = re.sub('https?://\S+\|www\.\S+', '', text)
	text = re.sub('<.*?>+', '', text)
	text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
	text = re.sub('\n', '', text)
	text = re.sub('\w\d\w', '', text)
	text = [word for word in text.split(' ') if word not in stopword]
	text = " ".join(text)
	text = [stemmer.stem(word) for word in text.split(' ')]
	text = " ".join(text)
	return text

	data["comments"] = data["comments"].apply(clean)

	# Using a pre-trained transformer model for sentiment analysis
	sentiment_pipeline = pipeline("sentiment-analysis")

	# Function to classify comments
	def classify_comment(comment):
	cleaned_comment = clean(comment)
	prediction = sentiment_pipeline(cleaned_comment)
	label = prediction[0]['label']
	return label

	comment_input = gr.Textbox(label="Enter a comment")
	classification_output = gr.Label()

	# Create the Gradio interface
	interface = gr.Interface(fn=classify_comment, inputs=comment_input, outputs=classification_output, title="Comment Classifier")
	interface.launch()