alvi123 commited on
Commit
51c4e93
β€’
1 Parent(s): 883f3cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -95
app.py CHANGED
@@ -1,102 +1,23 @@
1
  import gradio as gr
2
- import wave
3
- import matplotlib.pyplot as plt
4
- import numpy as np
5
- from extract_features import *
6
- import pickle
7
- import soundfile
8
- import librosa
9
 
10
- classifier = pickle.load(open('finalized_rf.sav', 'rb'))
11
 
12
- def emotion_predict(input):
13
- input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
14
- rf_prediction = classifier.predict(input_features.reshape(1,-1))
15
- if rf_prediction == 'happy':
16
- return 'kata-kerja '
17
- elif rf_prediction == 'neutral':
18
- return 'kata-benda '
19
- elif rf_prediction == 'sad':
20
- return 'kata-sifat '
21
- else:
22
- return 'kata-keterangan'
23
 
24
- def plot_fig(input):
25
- wav = wave.open(input, 'r')
26
 
27
- raw = wav.readframes(-1)
28
- raw = np.frombuffer(raw, "int16")
29
- sampleRate = wav.getframerate()
 
30
 
31
- Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))
32
 
33
- fig = plt.figure()
34
-
35
- plt.rcParams["figure.figsize"] = (50,15)
36
-
37
- plt.title("Waveform Of the Audio", fontsize=25)
38
-
39
- plt.xticks(fontsize=15)
40
-
41
- plt.yticks(fontsize=15)
42
-
43
- plt.ylabel("Amplitude", fontsize=25)
44
-
45
- plt.plot(Time, raw, color='red')
46
-
47
- return fig
48
-
49
-
50
- with gr.Blocks() as app:
51
- gr.Markdown(
52
- """
53
- # Speech Detected 🎡😍
54
- This application classifies inputted audio πŸ”Š according to the prediction into four categories:
55
- 1. kata-benda 😎
56
- 2. kata-kerja 😐
57
- 3. kata-sifat 😒
58
- 4. kata-keterangan 😀
59
- """
60
- )
61
- with gr.Tab("Record Audio"):
62
- record_input = gr.Audio(source="microphone", type="filepath")
63
-
64
- with gr.Accordion("Audio Visualization", open=False):
65
- gr.Markdown(
66
- """
67
- ### Visualization will work only after Audio has been submitted
68
- """
69
- )
70
- plot_record = gr.Button("Display Audio Signal")
71
- plot_record_c = gr.Plot(label='Waveform Of the Audio')
72
-
73
- record_button = gr.Button("Detect Emotion")
74
- record_output = gr.Text(label = 'Emotion Detected')
75
-
76
- with gr.Tab("Upload Audio File"):
77
- gr.Markdown(
78
- """
79
- ## Uploaded Audio should be of .wav format
80
- """
81
- )
82
-
83
- upload_input = gr.Audio(type="filepath")
84
-
85
- with gr.Accordion("Audio Visualization", open=False):
86
- gr.Markdown(
87
- """
88
- ### Visualization will work only after Audio has been submitted
89
- """
90
- )
91
- plot_upload = gr.Button("Display Audio Signal")
92
- plot_upload_c = gr.Plot(label='Waveform Of the Audio')
93
-
94
- upload_button = gr.Button("Detect Emotion")
95
- upload_output = gr.Text(label = 'Emotion Detected')
96
-
97
- record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
98
- upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
99
- plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
100
- plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)
101
-
102
- app.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
 
 
 
3
 
 
4
 
5
+ model = pipeline(task="automatic-speech-recognition",
6
+ model="facebook/s2t-medium-librispeech-asr")
 
 
 
 
 
 
 
 
 
7
 
 
 
8
 
9
+ def predict_speech_to_text(audio):
10
+ prediction = model(audio)
11
+ text = prediction['text']
12
+ return text
13
 
 
14
 
15
+ gr.Interface(fn=predict_speech_to_text,
16
+ title="Automatic Speech Recognition (ASR)",
17
+ inputs=gr.inputs.Audio(
18
+ source="microphone", type="filepath", label="Input"),
19
+ outputs=gr.outputs.Textbox(label="Output"),
20
+ description="Using pipeline with Facebook S2T for ASR.",
21
+ examples=['ljspeech.wav'],
22
+ allow_flagging='never'
23
+ ).launch(share=True)