Spaces:

CVMX-jaca-tonos
/

Spanish-Audio-Transcription-based-Sexism-Detection

Runtime error

App Files Files Community

Spanish-Audio-Transcription-based-Sexism-Detection / app.py

DrishtiSharma

Update app.py

2f04e25 about 2 years ago

raw

history blame

No virus

2.65 kB

	import gradio as gr
	import librosa
	from transformers import AutoFeatureExtractor, pipeline


	def load_and_fix_data(input_file, model_sampling_rate):
	speech, sample_rate = librosa.load(input_file)
	if len(speech.shape) > 1:
	speech = speech[:, 0] + speech[:, 1]
	if sample_rate != model_sampling_rate:
	speech = librosa.resample(speech, sample_rate, model_sampling_rate)
	return speech


	feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
	sampling_rate = feature_extractor.sampling_rate

	asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")


	def predict_and_ctc_lm_decode(input_file):
	speech = load_and_fix_data(input_file, sampling_rate)
	transcribed_text = asr(speech, chunk_length_s=10, stride_length_s=1)["text"]
	pipe2 = pipeline("text-classification", model = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
	sexism_detection = pipe2(transcribed_text)[0]['label']
	if sexism_detection == "LABEL_0":
	return "The input audio contains NON-SEXIST language"
	else:
	return "SEXIST LANGUAGE DETECTED"


	description = """ This is a Gradio demo for Spanish audio transcription-based Sexism detection. The key objective is to detect whether the sexist language is present in the audio or not. To use this app, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and classified as sexism/non-sexism pertaining to audio (transcription) with the help of pre-trained models.



	Note regarding the predicted label: LABEL_0: "NON SEXISM" or LABEL_1: "SEXISM"



	Pre-trained Model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)

	Pre-trained Model used for Sexism Detection : [hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021](https://huggingface.co/hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021)

	"""


	gr.Interface(
	predict_and_ctc_lm_decode,
	inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")],
	#outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
	outputs=[gr.outputs.Textbox(label="Predicción")],
	examples=[["audio1.wav"], ["audio2.wav"], ["audio3.wav"], ["audio4.wav"], ["sample_audio.wav"]],
	title="Spanish-Audio-Transcription-based-Sexism-Detection",
	description=description,
	layout="horizontal",
	theme="huggingface",
	).launch(enable_queue=True, cache_examples=True)