Spaces:
Sleeping
Sleeping
import gradio as gr | |
import json | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline | |
# Load Swear Words | |
try: | |
with open("swearWord.json", "r") as f: | |
swear_words = set(json.load(f)) | |
print("Swear words loaded successfully.") | |
except Exception as e: | |
print(f"Failed to load swearWord.json: {e}") | |
swear_words = set() | |
# Load Model and Tokenizer | |
try: | |
tokenizer = AutoTokenizer.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") | |
model = AutoModelForSequenceClassification.from_pretrained("eliasalbouzidi/distilbert-nsfw-text-classifier") | |
text_classifier = TextClassificationPipeline(model=model, tokenizer=tokenizer) | |
print("Model loaded successfully.") | |
except Exception as e: | |
print(f"Error loading model: {e}") | |
exit(1) | |
# Text Classifier Function | |
def textclassifier(text): | |
if not text.strip(): | |
return "Empty input", 0.0 | |
# Check for swear words | |
if any(word.lower() in swear_words for word in text.split()): | |
return "swear-word", 1.0 | |
# Use model | |
try: | |
result = text_classifier(text) | |
label = result[0]["label"] | |
score = result[0]["score"] | |
# Threshold logic | |
threshold = 0.994 | |
if label == "nsfw" and score < threshold: | |
label = "uncertain" | |
return label, round(score, 4) | |
except Exception as e: | |
return f"Error: {str(e)}", 0.0 | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=textclassifier, | |
inputs=gr.Textbox(label="Enter text"), | |
outputs=[ | |
gr.Label(label="Prediction"), | |
gr.Number(label="Confidence Score") | |
], | |
title="Text Classifier with Swear Word Filter", | |
# description="First checks for swear words, then uses NSFW text classifier if no swear word is found." | |
) | |
interface.launch() | |