DrishtiSharma commited on
Commit
8982daf
1 Parent(s): 114afff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -21
app.py CHANGED
@@ -1,8 +1,9 @@
 
1
  import gradio as gr
2
  import librosa
3
  from transformers import AutoFeatureExtractor, pipeline
4
 
5
-
6
  def load_and_fix_data(input_file, model_sampling_rate):
7
  speech, sample_rate = librosa.load(input_file)
8
  if len(speech.shape) > 1:
@@ -11,43 +12,59 @@ def load_and_fix_data(input_file, model_sampling_rate):
11
  speech = librosa.resample(speech, sample_rate, model_sampling_rate)
12
  return speech
13
 
14
-
15
- feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-xls-r-1b-spanish")
 
16
  sampling_rate = feature_extractor.sampling_rate
 
 
 
 
 
 
 
 
 
 
17
 
18
- asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-xls-r-1b-spanish")
 
 
 
19
 
 
20
 
21
- def predict_and_ctc_lm_decode(input_file):
 
22
  speech = load_and_fix_data(input_file, sampling_rate)
23
- transcribed_text = asr(speech, chunk_length_s=12, stride_length_s=1)["text"]
24
- pipe1 = pipeline("sentiment-analysis", model = "finiteautomata/beto-sentiment-analysis")
25
- sentiment = pipe1(transcribed_text)[0]["label"]
26
- return f"Detected Sentiment: {sentiment}"
27
 
28
- description = """ This is a Gradio demo for Sentiment Analysis of Transcribed Spanish Audio. First, we do Speech to Text, and then we perform sentiment analysis on the obtained transcription of the input audio.
29
 
 
 
 
 
30
 
31
- **Note regarding predicted labels : NEG --> NEGATIVE, NEU --> NEUTRAL, POS --> POSITIVE**
32
 
 
33
 
34
  Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
35
 
36
-
37
  Pre-trained model used for Sentiment Analysis of transcribed audio: [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis)
38
  """
39
 
40
 
41
  gr.Interface(
42
- predict_and_ctc_lm_decode,
43
- inputs=[
44
- gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
45
- ],
46
- #outputs=[gr.outputs.Label(num_top_classes=2),gr.outputs.Label(num_top_classes=2), gr.outputs.Label(num_top_classes=2)],
47
- outputs=[gr.outputs.Textbox(label="Predicción")],
48
- examples=[["audio_test.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"], ["Example1.wav"]],
49
- title="Sentiment Analysis of Spanish Transcribed Audio",
50
  description=description,
51
  layout="horizontal",
52
  theme="huggingface",
53
- ).launch(enable_queue=True, cache_examples=True)
 
 
1
+ #Importing the required libraries
2
  import gradio as gr
3
  import librosa
4
  from transformers import AutoFeatureExtractor, pipeline
5
 
6
+ #Loading and fixing the audio file
7
  def load_and_fix_data(input_file, model_sampling_rate):
8
  speech, sample_rate = librosa.load(input_file)
9
  if len(speech.shape) > 1:
 
12
  speech = librosa.resample(speech, sample_rate, model_sampling_rate)
13
  return speech
14
 
15
+ #Loading the feature extractor and setting up the pipeline
16
+ model_asr = "jonatasgrosman/wav2vec2-xls-r-1b-spanish"
17
+ feature_extractor = AutoFeatureExtractor.from_pretrained(model_asr)
18
  sampling_rate = feature_extractor.sampling_rate
19
+ asr = pipeline("automatic-speech-recognition", model=model_asr)
20
+
21
+ #Instantiating the pipeline for sentiment analysis
22
+ model_sentiment_classifier = "finiteautomata/beto-sentiment-analysis"
23
+ classifier = pipeline("sentiment-analysis", model = model_sentiment_classifier)
24
+
25
+ #Defining a function for speech-to_text conversion
26
+ def speech_to_text(speech):
27
+ audio_transcription = asr(speech, chunk_length_s = 12, stride_length_s=1)["text"]
28
+ return audio_transcription
29
 
30
+ #Defining a function to classify sentiment of the resulting audio transcription
31
+ def sentiment_classifier(text):
32
+ detected_sentiment = classifier(text)[0]["label"]
33
+ return detected_sentiment
34
 
35
+ new_line = "\n\n\n"
36
 
37
+ #Defining a function that outputs audio transcription and the result of sentiment detection module
38
+ def asr_and_sentiment_detection(input_file):
39
  speech = load_and_fix_data(input_file, sampling_rate)
40
+ transcription = speech_to_text(speech)
41
+ sentiment = sentiment_classifier(transcription)
42
+ return f"Audio Transcription :{transcription} {new_line} Detected Sentiment: {sentiment}"
 
43
 
 
44
 
45
+ inputs = [gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")]
46
+ outputs = [gr.outputs.Textbox(label="Predicción")]
47
+ examples = [["audio_test.wav"], ["sample_audio.wav"], ["test1.wav"], ["test2.wav"], ["Example1.wav"]]
48
+ title = "Spanish ASR and Sentiment Classifier"
49
 
50
+ description = """ This is a Gradio demo for Spanish ASR and Sentiment Analysis. First, we do Speech to Text conversion, and then we perform sentiment analysis on the obtained transcription of the input audio.
51
 
52
+ **Note regarding predicted labels : NEG --> NEGATIVE, NEU --> NEUTRAL, POS --> POSITIVE**
53
 
54
  Pre-trained model used for Spanish ASR: [jonatasgrosman/wav2vec2-xls-r-1b-spanish](https://huggingface.co/jonatasgrosman/wav2vec2-xls-r-1b-spanish)
55
 
 
56
  Pre-trained model used for Sentiment Analysis of transcribed audio: [finiteautomata/beto-sentiment-analysis](https://huggingface.co/finiteautomata/beto-sentiment-analysis)
57
  """
58
 
59
 
60
  gr.Interface(
61
+ asr_and_sentiment_detection,
62
+ inputs = inputs,
63
+ outputs=outputs,
64
+ examples=examples,
65
+ title=title,
 
 
 
66
  description=description,
67
  layout="horizontal",
68
  theme="huggingface",
69
+ ).launch(enable_queue=True)
70
+