Spaces:

gokceuludogan
/

TRHatePrint

Sleeping

App Files Files Community

TRHatePrint / app.py

gokceuludogan

Update app.py

11a5b62 verified 10 months ago

raw

history blame contribute delete

6.41 kB

	import gradio as gr
	from transformers import pipeline, AutoTokenizer
	from turkish_lm_tuner import T5ForClassification
	import os

	# Retrieve Hugging Face authentication token from environment variables
	hf_auth_token = os.getenv('HF_AUTH_TOKEN')
	print(hf_auth_token)

	# Example inputs for the different tasks
	binary_classification_examples = [["Yahudi terörüne karşı protestolar kararlılıkla devam ediyor."]]
	categorization_examples = [["Ermeni zulmü sırasında hayatını kaybeden kadınlar anısına dikilen anıt ziyarete açıldı."]]
	target_detection_examples = [["Dün 5 bin suriyeli enik doğmuştur zaten Türkiyede 5 bin suriyelinin gitmesi çok çok az"]]

	# Application description and citation placeholder
	APP_DESCRIPTION = """
	## Hate Speech Detection in Turkish News

	This tool performs hate speech detection across several tasks, including binary classification, categorization, and target detection. Choose a model and input text to analyze its hatefulness, categorize it, or detect targets of hate speech.
	"""

	APP_CITATION = """
	For citation, please refer to the tool's documentation.
	"""

	binary_mapping = {
	'LABEL_0': 'non-hateful',
	'LABEL_1': 'hateful',
	}

	# category_mapping = {
	# 'LABEL_0': 'non-hateful',
	# 'LABEL_1': 'symbolization',
	# 'LABEL_2': 'exaggeration/generalization/attribution/distortion',
	# 'LABEL_3': 'swearing/insult/defamation/dehumanization',
	# 'LABEL_4': 'threat of enmity/war/attack/murder/harm',
	# }

	category_mapping = {
	'LABEL_0': 'non-hateful',
	'LABEL_1': 'symbolization/exaggeration/generalization/attribution/distortion',
	'LABEL_2': 'swearing/insult/defamation/dehumanization/threat of enmity/war/attack/murder/harm',
	}

	target_mapping = {
	'LABEL_0': 'No-group',
	'LABEL_1': 'Refugees',
	'LABEL_2': 'Israel-Jews',
	'LABEL_3': 'Greeks',
	'LABEL_4': 'Armenian',
	'LABEL_5': 'Alevi',
	'LABEL_6': 'Kurdish',
	'LABEL_7': 'Arabian',
	'LABEL_8': 'LGBTI+',
	'LABEL_9': 'Women',
	'LABEL_10': 'Other groups'
	}


	def inference_t5(input_text, selected_model):
	model = T5ForClassification.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
	tokenizer = AutoTokenizer.from_pretrained("gokceuludogan/turna_tr_hateprint_w0.1_new_") #_b128")
	return model(**tokenizer(input_text, return_tensors='pt')).logits


	# Functions for model-based tasks
	def perform_binary_classification(input_text, selected_model):
	if (selected_model is not None) and ('turna' in selected_model):
	return inference_t5(input_text, selected_model)

	model = pipeline(model=f'gokceuludogan/{selected_model}')
	return binary_mapping.get(model(input_text)[0]['label'], 'error')

	def perform_categorization(input_text):
	model = pipeline(model='gokceuludogan/berturk_tr_hateprint_cat_class_w0.1_b128') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
	return category_mapping.get(model(input_text)[0]['label'], 'error')

	# def perform_target_detection(input_text):
	# model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_target')
	# return model(input_text)[0]['generated_text']

	def perform_target_detection(input_text):
	model = pipeline(model='gokceuludogan/berturk_tr_hateprint_target_class_w0.1') # f'gokceuludogan/berturk_tr_hateprint_cat_w0.1_b128')
	return target_mapping.get(model(input_text)[0]['label'], 'error')

	def perform_multi_detection(input_text):
	model = pipeline(model='gokceuludogan/turna_generation_tr_hateprint_multi')
	return model(input_text)[0]['generated_text']

	# Gradio interface
	with gr.Blocks(theme="abidlabs/Lime") as hate_speech_demo:

	# Main description
	with gr.Tab("About"):
	gr.Markdown(APP_DESCRIPTION)

	# Binary Classification Tab
	with gr.Tab("Binary Classification"):
	gr.Markdown("Analyze the hatefulness of a given text using selected models.")
	with gr.Column():
	model_choice_binary = gr.Radio(
	choices=[
	"turna_tr_hateprint_w0.1_new_",
	"berturk_tr_hateprint_w0.1_b128_v2", # "berturk_tr_hateprint_w0.1",
	],
	label="Select Model",
	value="turna_tr_hateprint"
	)
	text_input_binary = gr.Textbox(label="Input Text")
	classify_button = gr.Button("Analyze")
	classification_output = gr.Textbox(label="Classification Result")
	classify_button.click(
	perform_binary_classification,
	inputs=[text_input_binary, model_choice_binary],
	outputs=classification_output
	)

	# Hate Speech Categorization Tab
	with gr.Tab("Hate Speech Categorization"):
	gr.Markdown("Categorize the hate speech type in the provided text.")
	with gr.Column():

	text_input_category = gr.Textbox(label="Input Text")
	categorize_button = gr.Button("Categorize")
	categorization_output = gr.Textbox(label="Categorization Result")
	categorize_button.click(
	perform_categorization,
	inputs=[text_input_category],
	outputs=categorization_output
	)


	# Target Detection Tab
	with gr.Tab("Target Detection"):
	gr.Markdown("Detect the targets of hate speech in the provided text.")
	with gr.Column():
	text_input_target = gr.Textbox(label="Input Text")
	target_button = gr.Button("Detect Targets")
	target_output = gr.Textbox(label="Target Detection Result")
	target_button.click(
	perform_target_detection,
	inputs=[text_input_target],
	outputs=target_output
	)


	# Multi Detection Tab
	with gr.Tab("Multi Detection"):
	gr.Markdown("Detect hate speech, its category, and its targets in the text.")
	with gr.Column():
	text_input_multi = gr.Textbox(label="Input Text")
	multi_button = gr.Button("Detect All")
	multi_output = gr.Textbox(label="Multi Detection Result")
	multi_button.click(
	perform_multi_detection,
	inputs=[text_input_multi],
	outputs=multi_output
	)


	# Citation Section
	gr.Markdown(APP_CITATION)

	# Launch the application
	hate_speech_demo.launch()