Spaces:

CVMX-jaca-tonos
/

Sentiment-Analysis-of-Spanish-Transcribed-Audios

Runtime error

App Files Files Community

DrishtiSharma commited on May 30, 2022

Commit

8982daf

•

1 Parent(s): 114afff

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -21

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
 import librosa
 from transformers import AutoFeatureExtractor, pipeline
 def load_and_fix_data(input_file, model_sampling_rate):
     speech, sample_rate = librosa.load(input_file)
     if len(speech.shape) > 1:
@@ -11,43 +12,59 @@ def load_and_fix_data(input_file, model_sampling_rate):
         speech = librosa.resample(speech, sample_rate, model_sampling_rate)
     return speech
-feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
 sampling_rate = feature_extractor.sampling_rate
-asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")
-def predict_and_ctc_lm_decode(input_file):
     speech = load_and_fix_data(input_file, sampling_rate)
-    transcribed_text = asr(speech, chunk_length_s=12, stride_length_s=1)["text"]
-    pipe1 = pipeline("sentiment-analysis", model = "finiteautomata/beto-sentiment-analysis")
-    sentiment = pipe1(transcribed_text)[0]["label"]
-    return f"Detected Sentiment: {sentiment}"
-description = """ This is a Gradio demo for Sentiment Analysis of Transcribed Spanish Audio. First, we do Speech to Text, and then we perform sentiment analysis on the obtained transcription of the input audio.
-**Note regarding predicted labels : NEG --> NEGATIVE, NEU --> NEUTRAL, POS --> POSITIVE**
 Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
 Pre-trained model used for Sentiment Analysis of transcribed audio: [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis)
 """
 gr.Interface(
-    predict_and_ctc_lm_decode,
-    inputs=[
-        gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
-    ],
-    #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
-    outputs=[gr.outputs.Textbox(label="Predicción")],
-    examples=[["audio_test.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"], ["Example1.wav"]],
-    title="Sentiment Analysis of Spanish Transcribed Audio",
     description=description,
     layout="horizontal",
     theme="huggingface",
-).launch(enable_queue=True, cache_examples=True)

+#Importing the required libraries
 import gradio as gr
 import librosa
 from transformers import AutoFeatureExtractor, pipeline
+#Loading and fixing the audio file
 def load_and_fix_data(input_file, model_sampling_rate):
     speech, sample_rate = librosa.load(input_file)
     if len(speech.shape) > 1:
         speech = librosa.resample(speech, sample_rate, model_sampling_rate)
     return speech
+#Loading the feature extractor and setting up the pipeline
+model_asr = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
+feature_extractor = AutoFeatureExtractor.from_pretrained(model_asr)
 sampling_rate = feature_extractor.sampling_rate
+asr = pipeline("automatic-speech-recognition", model=model_asr)
+#Instantiating the pipeline for sentiment analysis
+model_sentiment_classifier = "finiteautomata/beto-sentiment-analysis"
+classifier = pipeline("sentiment-analysis", model = model_sentiment_classifier)
+#Defining a function for speech-to_text conversion
+def speech_to_text(speech):
+    audio_transcription = asr(speech, chunk_length_s = 12, stride_length_s=1)["text"]
+    return audio_transcription
+#Defining a function to classify sentiment of the resulting audio transcription
+def sentiment_classifier(text):
+    detected_sentiment = classifier(text)[0]["label"]
+    return detected_sentiment
+new_line = "\n\n\n"
+#Defining a function that outputs audio transcription and the result of sentiment detection module
+def asr_and_sentiment_detection(input_file):
     speech = load_and_fix_data(input_file, sampling_rate)
+    transcription = speech_to_text(speech)
+    sentiment = sentiment_classifier(transcription)
+    return f"Audio Transcription :{transcription} {new_line} Detected Sentiment: {sentiment}"
+inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")]
+outputs = [gr.outputs.Textbox(label="Predicción")]
+examples = [["audio_test.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"], ["Example1.wav"]]
+title = "Spanish ASR and Sentiment Classifier"
+description = """ This is a Gradio demo for Spanish ASR and Sentiment Analysis. First, we do Speech to Text conversion, and then we perform sentiment analysis on the obtained transcription of the input audio.
+**Note regarding predicted labels : NEG --> NEGATIVE, NEU --> NEUTRAL, POS --> POSITIVE**
 Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
 Pre-trained model used for Sentiment Analysis of transcribed audio: [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis)
 """
 gr.Interface(
+    asr_and_sentiment_detection,
+    inputs = inputs,
+    outputs=outputs,
+    examples=examples,
+    title=title,
     description=description,
     layout="horizontal",
     theme="huggingface",
+).launch(enable_queue=True)