Spaces:

CVMX-jaca-tonos
/

Spanish-Audio-Transcription-based-Sexism-Detection

Runtime error

App Files Files Community

DrishtiSharma commited on May 30, 2022

Commit

043c07c

•

1 Parent(s): 2f04e25

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -13

app.py CHANGED Viewed

@@ -11,18 +11,31 @@ def load_and_fix_data(input_file, model_sampling_rate):
         speech = librosa.resample(speech, sample_rate, model_sampling_rate)
     return speech
-feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
 sampling_rate = feature_extractor.sampling_rate
-asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")
-def predict_and_ctc_lm_decode(input_file):
-    speech = load_and_fix_data(input_file, sampling_rate)
-    transcribed_text = asr(speech, chunk_length_s=10, stride_length_s=1)["text"]
-    pipe2 = pipeline("text-classification", model = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021")
-    sexism_detection = pipe2(transcribed_text)[0]['label']
     if sexism_detection == "LABEL_0":
         return "The input audio contains NON-SEXIST language"
     else:
@@ -45,7 +58,7 @@ Pre-trained Model used for Sexism Detection : [hackathon-pln-es/twitter_sexismo-
 gr.Interface(
-    predict_and_ctc_lm_decode,
     inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")],
     #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
     outputs=[gr.outputs.Textbox(label="Predicción")],
@@ -54,4 +67,4 @@ gr.Interface(
     description=description,
     layout="horizontal",
     theme="huggingface",
-).launch(enable_queue=True, cache_examples=True)

         speech = librosa.resample(speech, sample_rate, model_sampling_rate)
     return speech
+#Loading the feature extractor and instantiating the pipeline by launching pipeline()
+model_name1 = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_name1)
 sampling_rate = feature_extractor.sampling_rate
+asr = pipeline("automatic-speech-recognition", model=model_name1)
+#Instantiating a pipeline for classifying the text
+model_name2 = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
+classifier = pipeline("text-classification", model = model_name2)
+#Defining a function for speech-to_text conversion
+def speech_to_text(input_file):
+    speech = load_fix_data(input_file, sampling_rate)
+    transcribed_text = asr(speech, chunk_length_s=15, stride_length_s=1)["text"]
+    return transcribed_text
+#Defining a function for sexism detection
+def sexism_detection(transcribed_text):
+    sexism_detection = classifier(transcribed_text)[0]["label"]
+    return sexism_detection
+#Defining a function which will output Spanish audio transcription and the detected sentiment
+def asr_and_sexism_detection(input_file):
+    transcribed_text = speech_to_text(input_text)
+    sexism_detection = sexism_detection(transcribed_text)
     if sexism_detection == "LABEL_0":
         return "The input audio contains NON-SEXIST language"
     else:
 gr.Interface(
+    asr_and_sexism_detection,
     inputs=[gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")],
     #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
     outputs=[gr.outputs.Textbox(label="Predicción")],
     description=description,
     layout="horizontal",
     theme="huggingface",
+).launch(enable_queue=True)