alvi123 commited on
Commit
c59d0a4
β€’
1 Parent(s): 38fad8a
Files changed (1) hide show
  1. app.py +79 -88
app.py CHANGED
@@ -1,103 +1,94 @@
1
  import gradio as gr
2
- import wave
3
- import matplotlib.pyplot as plt
4
- import numpy as np
5
- from extract_features import *
6
- import pickle
7
- import soundfile
8
  import librosa
 
 
 
9
 
10
- classifier = pickle.load(open('finalized_rf.sav', 'rb'))
11
-
12
- def emotion_predict(input):
13
- input_features = extract_feature(input, mfcc=True, chroma=True, mel=True, contrast=True, tonnetz=True)
14
- rf_prediction = classifier.predict(input_features.reshape(1,-1))
15
- if rf_prediction == 'happy':
16
- return 'kata-kerja '
17
- elif rf_prediction == 'neutral':
18
- return 'kata-benda '
19
- elif rf_prediction == 'sad':
20
- return 'kata-sifat '
21
- else:
22
- return 'kata-keterangan'
23
-
24
-
25
- def plot_fig(input):
26
- wav = wave.open(input, 'r')
27
-
28
- raw = wav.readframes(-1)
29
- raw = np.frombuffer(raw, "int16")
30
- sampleRate = wav.getframerate()
31
-
32
- Time = np.linspace(0, len(raw)/sampleRate, num=len(raw))
33
-
34
- fig = plt.figure()
35
 
36
- plt.rcParams["figure.figsize"] = (50,15)
37
 
38
- plt.title("Waveform Of the Audio", fontsize=25)
39
 
40
- plt.xticks(fontsize=15)
41
 
42
- plt.yticks(fontsize=15)
43
 
44
- plt.ylabel("Amplitude", fontsize=25)
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- plt.plot(Time, raw, color='red')
 
 
 
47
 
48
- return fig
49
 
50
 
51
- with gr.Blocks() as app:
52
- gr.Markdown(
53
  """
54
- # Speech Detected 🎡😍
55
- This application classifies inputted audio πŸ”Š according to the prediction into four categories:
56
- 1. kata-benda 😎
57
- 2. kata-kerja 😐
58
- 3. kata-sifat 😒
59
- 4. kata-keterangan 😀
60
- """
61
- )
62
- with gr.Tab("Record Audio"):
63
- record_input = gr.Audio(source="microphone", type="filepath")
64
-
65
- with gr.Accordion("Audio Visualization", open=False):
66
- gr.Markdown(
67
- """
68
- ### Visualization will work only after Audio has been submitted
69
- """
70
- )
71
- plot_record = gr.Button("Display Audio Signal")
72
- plot_record_c = gr.Plot(label='Waveform Of the Audio')
73
-
74
- record_button = gr.Button("Detect Emotion")
75
- record_output = gr.Text(label = 'Emotion Detected')
76
-
77
- with gr.Tab("Upload Audio File"):
78
- gr.Markdown(
79
  """
80
- ## Uploaded Audio should be of .wav format
81
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  )
83
-
84
- upload_input = gr.Audio(type="filepath")
85
-
86
- with gr.Accordion("Audio Visualization", open=False):
87
- gr.Markdown(
88
- """
89
- ### Visualization will work only after Audio has been submitted
90
- """
91
- )
92
- plot_upload = gr.Button("Display Audio Signal")
93
- plot_upload_c = gr.Plot(label='Waveform Of the Audio')
94
-
95
- upload_button = gr.Button("Detect Emotion")
96
- upload_output = gr.Text(label = 'Emotion Detected')
97
-
98
- record_button.click(emotion_predict, inputs=record_input, outputs=record_output)
99
- upload_button.click(emotion_predict, inputs=upload_input, outputs=upload_output)
100
- plot_record.click(plot_fig, inputs=record_input, outputs=plot_record_c)
101
- plot_upload.click(plot_fig, inputs=upload_input, outputs=plot_upload_c)
102
-
103
- app.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
2
  import librosa
3
+ import matplotlib.pyplot as plt
4
+ import plotly.express as px
5
+ from radar_chart import radar_factory
6
 
7
+ from keras.models import load_model
8
+ import os
9
+ import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
11
 
 
12
 
13
+ model = load_model(os.path.join("model", "Emotion_Voice_Detection_Model_tuned_2.h5"))
14
 
 
15
 
16
+ def convert_class_to_emotion(pred):
17
+ """
18
+ Method to convert the predictions (int) into human readable strings.
19
+ """
20
+
21
+ # label_conversion = {0: 'neutral',
22
+ # 1: 'calm',
23
+ # 2: 'happy',
24
+ # 3: 'sad',
25
+ # 4: 'angry',
26
+ # 5: 'fearful',
27
+ # 6: 'disgust',
28
+ # 7: 'surprised'}
29
 
30
+ label_conversion = {0: 'kata_sifat',
31
+ 1: 'kata_benda',
32
+ 2: 'kata_kerja',
33
+ 3: 'kata_keterangan}
34
 
35
+ return label_conversion[int(pred)]
36
 
37
 
38
+ def make_predictions(file, micro=None):
 
39
  """
40
+ Method to process the files and create your features.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  """
42
+ if file is not None and micro is None:
43
+ input_audio = file
44
+ elif file is None and micro is not None:
45
+ input_audio = micro
46
+ else:
47
+ print("THERE IS A PROBLEM")
48
+ input_audio = file
49
+
50
+ data, sampling_rate = librosa.load(input_audio)
51
+ print(data)
52
+ print(f"THE SAMPLING RATE IS {sampling_rate}")
53
+ mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
54
+ x = np.expand_dims(mfccs, axis=1)
55
+ x = np.expand_dims(x, axis=0)
56
+ predictions = np.argmax(model.predict(x), axis=1)
57
+
58
+ N = 8
59
+ theta = radar_factory(N, frame='polygon')
60
+ spoke_labels = np.array(['kata_benda',
61
+ 'kata_kerja',
62
+ 'kata_keterangan',
63
+ 'kata_sifat'])
64
+ fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
65
+ subplot_kw=dict(projection='radar'))
66
+ vec = model.predict(x)[0]
67
+ axs.plot(theta, vec, color="b")
68
+ axs.fill(theta, vec, alpha=0.3)
69
+
70
+ axs.set_varlabels(spoke_labels)
71
+
72
+ fig = plt.figure()
73
+ plt.plot(data, alpha=0.8)
74
+ plt.xlabel("temps")
75
+ plt.ylabel("amplitude")
76
+
77
+
78
+ return convert_class_to_emotion(predictions), fig, fig_radar
79
+
80
+
81
+
82
+ # Set the starting state to an empty string
83
+ iface = gr.Interface(
84
+ fn=make_predictions,
85
+ title="identify emotion of a chunk of audio speech",
86
+ description="a simple interface to perform emotion recognition from an audio file",
87
+ article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
88
+ inputs=[gr.Audio(source="upload", type="filepath", label="File"),
89
+ gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
90
+ ,
91
+ examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
92
+ outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
93
  )
94
+ iface.launch(debug=True)