File size: 3,012 Bytes
d9e3953 afc5c99 ee55457 d9e3953 ee55457 d9e3953 ee55457 d9e3953 ee55457 d9e3953 ee55457 d9e3953 ee55457 d9e3953 826ab06 d9e3953 34b8794 d9e3953 f9697a4 d9e3953 08ca6e7 826ab06 81b2874 f1e7215 1e4b89a f1e7215 c6bdae3 f1e7215 48b85f9 ee55457 d9e3953 1e4b89a 378d519 ee55457 8f07d61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
#importing all the required libraries
import gradio as gr
import librosa
from transformers import AutoFeatureExtractor, pipeline
#Loading and fixing the audio input
def load_and_fix_data(input_file, model_sampling_rate):
speech, sample_rate = librosa.load(input_file)
if len(speech.shape) > 1:
speech = speech[:, 0] + speech[:, 1]
if sample_rate != model_sampling_rate:
speech = librosa.resample(speech, sample_rate, model_sampling_rate)
return speech
#Loading the feature extractor and instantiating the pipeline
model_name1 = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_name1)
sampling_rate = feature_extractor.sampling_rate
asr = pipeline("automatic-speech-recognition", model=model_name1)
#Instantiating a pipeline for harassment detection (text classification)
model_name2 = "hackathon-pln-es/Detect-Acoso-Twitter-Es"
classifier = pipeline("text-classification", model = model_name2)
#Defining a function for speech-to-text-conversion
def speech_to_text(input_file):
speech = load_and_fix_data(input_file, sampling_rate)
transcribed_text = asr(speech, chunk_length_s=15, stride_length_s=1)["text"]
return transcribed_text
#Defining a function for Harassment detection (text classification)
def harassment_detector(transcribed_text):
harassment_detection = classifier(transcribed_text)[0]["label"]
return harassment_detection
#Defining a function which outputs audio transcription and the output of harassment detection module
new_line = "\n\n\n"
def asr_and_harassment_detection(input_file):
transcribed_text = speech_to_text(input_file)
harassment_detection = harassment_detector(transcribed_text)
return f"Audio Transcription :{transcribed_text} {new_line} Audio content is: {harassment_detection}"
inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")]
outputs=[gr.outputs.Textbox(label="Predicción")]
examples=[["audio2.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"]]
title="Spanish-Audio-Transcription-based-Harassment-Detection"
description = """ This is a Gradio demo for Spanish audio transcription-based harassment detection. To use this, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and classified as Harassment/non-harassment pertaining to audio (transcription) with the help of pre-trained models.
Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
Pre-trained model used for Harassment Detection: [hackathon-pln-es/Detect-Acoso-Twitter-Es](https://huggingface.co/hackathon-pln-es/Detect-Acoso-Twitter-Es)"""
gr.Interface(
asr_and_harassment_detection,
inputs=inputs,
outputs=outputs,
examples=examples,
title=title,
description=description,
layout="horizontal",
theme="huggingface",
).launch(enable_queue=True)
|