Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
# Load language detection model | |
lang_classifier = pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection") | |
# Load translation model (multi-language to English) | |
translator = pipeline("translation", model="facebook/nllb-200-distilled-600M") | |
# Load hate speech detection model | |
offensive_classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive") | |
# Mapping from ISO 639-1 to NLLB-200 language codes | |
LANGUAGE_CODES = { | |
"en": "eng_Latn", "fr": "fra_Latn", "es": "spa_Latn", "de": "deu_Latn", | |
"bg": "bul_Cyrl", "ru": "rus_Cyrl", "it": "ita_Latn", "zh": "zho_Hans", | |
"ar": "arb_Arab", "pt": "por_Latn", "nl": "nld_Latn", "hi": "hin_Deva" | |
} | |
def analyze_text(text): | |
if not text.strip(): | |
return {"error": "Nessun testo"}, {"error": "Nessun testo"} | |
# Detect language | |
lang_result = lang_classifier(text) | |
detected_language = lang_result[0]['label'] | |
language_confidence = lang_result[0]['score'] | |
# Convert detected language to NLLB-200 format | |
detected_language_nllb = LANGUAGE_CODES.get(detected_language, "eng_Latn") | |
# Translate if not English | |
translated_text = text | |
if detected_language_nllb != "eng_Latn": | |
translation_result = translator(text, src_lang=detected_language_nllb, tgt_lang="eng_Latn") | |
translated_text = translation_result[0]['translation_text'] | |
# Detect hate speech using the translated text | |
hate_result = offensive_classifier(translated_text) | |
language_output = { | |
"language": detected_language, | |
"confidence": language_confidence, | |
"original_text": text, | |
"translated_text": translated_text if detected_language_nllb != "eng_Latn" else "Already in English" | |
} | |
hate_output = { | |
"label": hate_result[0]['label'], | |
"score": hate_result[0]['score'] | |
} | |
return language_output, hate_output | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=analyze_text, | |
inputs=gr.Textbox(label="Inserisci testo"), | |
outputs=[ | |
gr.JSON(label="Language Detection & Translation"), | |
gr.JSON(label="Hate Speech Detection") | |
], | |
title="Rileva Lingua, offese e parolaccie", | |
description="Inserisci testo" | |
) | |
# Launch the Gradio app | |
iface.launch(server_name="0.0.0.0", server_port=7860, share=True) | |