SoDehghan's picture
Update models/hsd_ar.py
be29b3f verified
#def write():
#st.markdown(
#"""
# Hate Speech Detection in Arabic
# """
# )
import streamlit as st
from transformers import pipeline
import re
#strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/test-strength")
#strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/BERTurk-hate-speech-strength-prediction")
#target_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/test-target")
strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/hs-ar-translate-syn-4class-for-tool")
target_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/targ-ar-translate-syn-for-tool", return_all_scores=True)
AR_TWEETS_EXAMPLES = [
"أريد أن يموت اللاجئون",
"فليمت جميع اللاجئين",
"كل اللاجئين لصوص",
"أكره اللاجئين",
"ارحلوا من بلدنا",
"هنا مرة أخرى، سلوك اللاجئين المتحرش والمغتصب للاجئين، والآن يجب إزالة القطيع المنحرف من بلادنا",
"النساء، اجلسن في البيت وإلا ستتعرضن للتحرش، لا تقولوا إنني لم أحذركن",
"مكان المرأة هو المطبخ",
"اليونان يجب أن تعرف حدودها، وإلا قد نأتي فجأة في ليلة",
"لا ينبغي للسوريين أن يصوتوا",
"لا ينبغي للاجئين التصويت",
"لا ينبغي للاجئين الاستفادة من مباحث أمن الدولة",
"للاجئين أيضاً الحق في العيش",
"يتم اتخاذ تدابير شاملة ضمن الإطار القانوني لمنع أنشطة المنظمات الإرهابية",
"تم الإعلان عن مواعيد الفعاليات الثقافية لهذا العام في إسرائيل",
"تقوم رائدات الأعمال بافتتاح صالونات تجميل تلبي احتياجات الزبائن من الجاليات المسلمة وغيرها من الجاليات الأخرى",
"تجري الاستعدادات في الأسواق في سوريا قبل الأعياد الإسلامية والمسيحية",
"اشتريت تفاحًا من السوق",
"طعام السوريين لذيذ جداً",
"الأكل السوري لذيذ",
"أحب السوريين كثيراً، فهم شعب متعاطف جداً"
]
NON_HATEFUL_RESPONSE = 'This content is classified as "non-hate" speech. ✅😊'
HATEFUL_RESPONSE = 'This content is classified as "hate" speech. ❌😔'
sentiment_mapping = {
'LABEL_0': NON_HATEFUL_RESPONSE,
'LABEL_1': HATEFUL_RESPONSE,
'LABEL_2': HATEFUL_RESPONSE,
'LABEL_3': HATEFUL_RESPONSE
}
strength_mapping = {
'LABEL_0': 'Severity Level: 0 - No Hate Detected',
'LABEL_1': 'Severity Level: 1 - Mild (Discriminatory Discourse)',
'LABEL_2': 'Severity Level: 2 - Moderate (Exaggeration, Generalization, Attribution, Distortion, Symbolization)',
'LABEL_3': 'Severity Level: 3 - Severe (Swearing, Insult, Defamation, Dehumanization, Threat of Enmity/War/Attack/Murder/Harm)',
}
target_mapping ={
'LABEL_0': 'Target group not specified or not present',
'LABEL_1': 'Country/Nationality/Race/Ethnicity',
'LABEL_2': 'Religion',
'LABEL_3': 'Gender/Sexual Orientation',
'LABEL_4': 'Specific Viewpoint/Status/Practice; Occupational Position Group'
}
def remove_url(text):
return ' '.join(re.sub("(\w+:\/\/\S+)"," ", text).split())
def remove_username(text):
return ' '.join(re.sub("([@][A-Za-z0-9_]+)"," ", text).split())
def remove_at_mark(text):
return re.sub(r'[@]', ' ', text)
def remove_tag_mark(text):
return re.sub(r'[#]', ' ', text)
def remove_punctuation_marks(text):
punc = '''!()-[]{};:'"\,<>./?$%^&_*~'''
for i in text:
if i in punc:
text = text.replace(i, " ")
return text
def preprocess_text (text):
s = text.lower()
s = remove_username(s)
s = remove_at_mark(s)
s = remove_url(s)
s = remove_tag_mark(s)
s = remove_punctuation_marks(s)
#s = replace_hashtags_with_segments(s)
return s
def sidebar_callback():
st.session_state['tr_input'] = st.session_state['sidebar']
def multi_label_target_classification(tr_input):
result = []
predictions = target_pipeline_tr(tr_input)
prediction_result = predictions[0]
#Filter predictions based on the sigmoid threshold
filtered_predictions = [pred for pred in prediction_result if pred['score'] > 0.3]
#Print all labels that meet the threshold
if filtered_predictions:
for item in filtered_predictions:
#result.append(item)
#result.append((item['label'], 'score:', item['score']))
result.append(item['label'])
else:
result.append("No Target")
return result
def write():
if 'tr_input' not in st.session_state:
st.session_state['tr_input'] = ""
st.markdown(
"""
# Hate Speech Detection in Arabic tweets
"""
)
# st.markdown("""
# A brief description of the model and the task it was trained on.
# """)
tr_input = st.text_area("Enter text for analysis:", height=50, key="tr_input")
# Examples dropdown
on = st.toggle("Show predefined examples to test")
if on:
st.selectbox("Select a predefined example to test:", AR_TWEETS_EXAMPLES, key='sidebar', on_change=sidebar_callback)
# Prediction button
if st.button("Analyze Text", key="tr_predict"):
st.write(" ")
with st.spinner('Generating predictions...'):
result_strength_tr = strength_pipeline_tr(preprocess_text(tr_input))
strength_tr = result_strength_tr[0]["label"]
sentiment_tr = sentiment_mapping[strength_tr]
strength_tr = strength_mapping[strength_tr]
#result_target_tr = target_pipeline_tr(tr_input)
#target_tr = result_target_tr[0]["label"]
#target_tr = target_mapping[target_tr]
target_tr = multi_label_target_classification(preprocess_text(tr_input))
st.write(f" **Binary Classification:** {sentiment_tr}")
st.write(f" **Strength of Hate:** {strength_tr}")
st.write(f" **Target Towards:** {target_tr}")