Spaces:
Sleeping
Sleeping
#def write(): | |
#st.markdown( | |
#""" | |
# Hate Speech Detection in Arabic | |
# """ | |
# ) | |
import streamlit as st | |
from transformers import pipeline | |
import re | |
#strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/test-strength") | |
#strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/BERTurk-hate-speech-strength-prediction") | |
#target_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/test-target") | |
strength_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/hs-ar-translate-syn-4class-for-tool") | |
target_pipeline_tr = pipeline(task = "text-classification", model = "SoDehghan/targ-ar-translate-syn-for-tool", return_all_scores=True) | |
AR_TWEETS_EXAMPLES = [ | |
"أريد أن يموت اللاجئون", | |
"فليمت جميع اللاجئين", | |
"كل اللاجئين لصوص", | |
"أكره اللاجئين", | |
"ارحلوا من بلدنا", | |
"هنا مرة أخرى، سلوك اللاجئين المتحرش والمغتصب للاجئين، والآن يجب إزالة القطيع المنحرف من بلادنا", | |
"النساء، اجلسن في البيت وإلا ستتعرضن للتحرش، لا تقولوا إنني لم أحذركن", | |
"مكان المرأة هو المطبخ", | |
"اليونان يجب أن تعرف حدودها، وإلا قد نأتي فجأة في ليلة", | |
"لا ينبغي للسوريين أن يصوتوا", | |
"لا ينبغي للاجئين التصويت", | |
"لا ينبغي للاجئين الاستفادة من مباحث أمن الدولة", | |
"للاجئين أيضاً الحق في العيش", | |
"يتم اتخاذ تدابير شاملة ضمن الإطار القانوني لمنع أنشطة المنظمات الإرهابية", | |
"تم الإعلان عن مواعيد الفعاليات الثقافية لهذا العام في إسرائيل", | |
"تقوم رائدات الأعمال بافتتاح صالونات تجميل تلبي احتياجات الزبائن من الجاليات المسلمة وغيرها من الجاليات الأخرى", | |
"تجري الاستعدادات في الأسواق في سوريا قبل الأعياد الإسلامية والمسيحية", | |
"اشتريت تفاحًا من السوق", | |
"طعام السوريين لذيذ جداً", | |
"الأكل السوري لذيذ", | |
"أحب السوريين كثيراً، فهم شعب متعاطف جداً" | |
] | |
NON_HATEFUL_RESPONSE = 'This content is classified as "non-hate" speech. ✅😊' | |
HATEFUL_RESPONSE = 'This content is classified as "hate" speech. ❌😔' | |
sentiment_mapping = { | |
'LABEL_0': NON_HATEFUL_RESPONSE, | |
'LABEL_1': HATEFUL_RESPONSE, | |
'LABEL_2': HATEFUL_RESPONSE, | |
'LABEL_3': HATEFUL_RESPONSE | |
} | |
strength_mapping = { | |
'LABEL_0': 'Severity Level: 0 - No Hate Detected', | |
'LABEL_1': 'Severity Level: 1 - Mild (Discriminatory Discourse)', | |
'LABEL_2': 'Severity Level: 2 - Moderate (Exaggeration, Generalization, Attribution, Distortion, Symbolization)', | |
'LABEL_3': 'Severity Level: 3 - Severe (Swearing, Insult, Defamation, Dehumanization, Threat of Enmity/War/Attack/Murder/Harm)', | |
} | |
target_mapping ={ | |
'LABEL_0': 'Target group not specified or not present', | |
'LABEL_1': 'Country/Nationality/Race/Ethnicity', | |
'LABEL_2': 'Religion', | |
'LABEL_3': 'Gender/Sexual Orientation', | |
'LABEL_4': 'Specific Viewpoint/Status/Practice; Occupational Position Group' | |
} | |
def remove_url(text): | |
return ' '.join(re.sub("(\w+:\/\/\S+)"," ", text).split()) | |
def remove_username(text): | |
return ' '.join(re.sub("([@][A-Za-z0-9_]+)"," ", text).split()) | |
def remove_at_mark(text): | |
return re.sub(r'[@]', ' ', text) | |
def remove_tag_mark(text): | |
return re.sub(r'[#]', ' ', text) | |
def remove_punctuation_marks(text): | |
punc = '''!()-[]{};:'"\,<>./?$%^&_*~''' | |
for i in text: | |
if i in punc: | |
text = text.replace(i, " ") | |
return text | |
def preprocess_text (text): | |
s = text.lower() | |
s = remove_username(s) | |
s = remove_at_mark(s) | |
s = remove_url(s) | |
s = remove_tag_mark(s) | |
s = remove_punctuation_marks(s) | |
#s = replace_hashtags_with_segments(s) | |
return s | |
def sidebar_callback(): | |
st.session_state['tr_input'] = st.session_state['sidebar'] | |
def multi_label_target_classification(tr_input): | |
result = [] | |
predictions = target_pipeline_tr(tr_input) | |
prediction_result = predictions[0] | |
#Filter predictions based on the sigmoid threshold | |
filtered_predictions = [pred for pred in prediction_result if pred['score'] > 0.3] | |
#Print all labels that meet the threshold | |
if filtered_predictions: | |
for item in filtered_predictions: | |
#result.append(item) | |
#result.append((item['label'], 'score:', item['score'])) | |
result.append(item['label']) | |
else: | |
result.append("No Target") | |
return result | |
def write(): | |
if 'tr_input' not in st.session_state: | |
st.session_state['tr_input'] = "" | |
st.markdown( | |
""" | |
# Hate Speech Detection in Arabic tweets | |
""" | |
) | |
# st.markdown(""" | |
# A brief description of the model and the task it was trained on. | |
# """) | |
tr_input = st.text_area("Enter text for analysis:", height=50, key="tr_input") | |
# Examples dropdown | |
on = st.toggle("Show predefined examples to test") | |
if on: | |
st.selectbox("Select a predefined example to test:", AR_TWEETS_EXAMPLES, key='sidebar', on_change=sidebar_callback) | |
# Prediction button | |
if st.button("Analyze Text", key="tr_predict"): | |
st.write(" ") | |
with st.spinner('Generating predictions...'): | |
result_strength_tr = strength_pipeline_tr(preprocess_text(tr_input)) | |
strength_tr = result_strength_tr[0]["label"] | |
sentiment_tr = sentiment_mapping[strength_tr] | |
strength_tr = strength_mapping[strength_tr] | |
#result_target_tr = target_pipeline_tr(tr_input) | |
#target_tr = result_target_tr[0]["label"] | |
#target_tr = target_mapping[target_tr] | |
target_tr = multi_label_target_classification(preprocess_text(tr_input)) | |
st.write(f" **Binary Classification:** {sentiment_tr}") | |
st.write(f" **Strength of Hate:** {strength_tr}") | |
st.write(f" **Target Towards:** {target_tr}") | |