ErtugrulDemir
commited on
Commit
•
5ee7288
1
Parent(s):
d127fc4
push root
Browse filesPushing 'Speech Emotion Recognition' portfolio project from local (cloud) to remote.
- .gitattributes +7 -0
- app.py +59 -0
- audio_clf_model/fingerprint.pb +3 -0
- audio_clf_model/keras_metadata.pb +3 -0
- audio_clf_model/saved_model.pb +3 -0
- audio_clf_model/variables/variables.data-00000-of-00001 +3 -0
- audio_clf_model/variables/variables.index +0 -0
- encoder +0 -0
- examples/example_air_conditioner.wav +3 -0
- examples/example_car_horn.wav +0 -0
- examples/example_children_playing.wav +0 -0
- examples/example_dog_bark.wav +3 -0
- examples/example_drilling.wav +0 -0
- examples/example_engine_idling.wav +3 -0
- examples/example_gun_shot.wav +0 -0
- examples/example_jackhammer.wav +3 -0
- examples/example_siren.wav +3 -0
- examples/example_street_music.wav +3 -0
- label_encodings.json +1 -0
- requirements.txt +4 -0
.gitattributes
CHANGED
@@ -32,3 +32,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
audio_clf_model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
36 |
+
examples/example_air_conditioner.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
examples/example_dog_bark.wav filter=lfs diff=lfs merge=lfs -text
|
38 |
+
examples/example_engine_idling.wav filter=lfs diff=lfs merge=lfs -text
|
39 |
+
examples/example_jackhammer.wav filter=lfs diff=lfs merge=lfs -text
|
40 |
+
examples/example_siren.wav filter=lfs diff=lfs merge=lfs -text
|
41 |
+
examples/example_street_music.wav filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tensorflow as tf
|
2 |
+
import gradio as gr
|
3 |
+
import librosa
|
4 |
+
import numpy as np
|
5 |
+
import json
|
6 |
+
|
7 |
+
# File Paths
|
8 |
+
model_path = "audio_clf_model"
|
9 |
+
encoding_path = "label_encodings.json"
|
10 |
+
examples_path = "examples"
|
11 |
+
|
12 |
+
# loading the files
|
13 |
+
model = tf.keras.models.load_model(model_path)
|
14 |
+
classes = json.load(open(encoding_path, "r"))
|
15 |
+
labels = [classes[str(i)] for i in range(len(classes))]
|
16 |
+
labels = ["negative", "positive"]#classes[target].values()
|
17 |
+
|
18 |
+
def pre_processor(audio_path):
|
19 |
+
|
20 |
+
# load the audio file
|
21 |
+
x, sample_rate = librosa.load(audio_path)
|
22 |
+
|
23 |
+
# feature extracting (mfccs is an aduio feature)
|
24 |
+
mfccs = np.mean(librosa.feature.mfcc(y=x, sr=sample_rate, n_mfcc=40).T, axis=0)
|
25 |
+
feature = mfccs
|
26 |
+
|
27 |
+
return feature
|
28 |
+
|
29 |
+
|
30 |
+
def clsf(audio_path):
|
31 |
+
|
32 |
+
# extracting the features
|
33 |
+
features = pre_processor(audio_path)
|
34 |
+
print(len(features))
|
35 |
+
|
36 |
+
# batching the data
|
37 |
+
sample = np.expand_dims(features, axis=0)
|
38 |
+
|
39 |
+
# predicting
|
40 |
+
preds = model.predict(sample).flatten()
|
41 |
+
|
42 |
+
# results
|
43 |
+
confidences = {labels[i]: np.round(float(preds[i]), 3) for i in range(len(labels))}
|
44 |
+
|
45 |
+
return confidences
|
46 |
+
|
47 |
+
# GUI Component
|
48 |
+
gui_params = {
|
49 |
+
"fn":clsf,
|
50 |
+
"inputs":gr.Audio(source="upload", type="filepath"),
|
51 |
+
"outputs" : "label",
|
52 |
+
#live=True,
|
53 |
+
"examples" : examples_path
|
54 |
+
}
|
55 |
+
demo = gr.Interface(**gui_params)
|
56 |
+
|
57 |
+
# Launching the demo
|
58 |
+
if __name__ == "__main__":
|
59 |
+
demo.launch()
|
audio_clf_model/fingerprint.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:23ff7c4fb5723d5b99ce3e8e5b1a5eeca21232e36e6e29ae5cd433fd36f2a073
|
3 |
+
size 54
|
audio_clf_model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f971b6ba97c55e9406edf8f9127c08a328d0a6a9b84a381a6830b8a8571c50b8
|
3 |
+
size 30718
|
audio_clf_model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:584ce2d364c9b19401fd5ac20c83525ab797faf12fa444bc914546d419465452
|
3 |
+
size 280456
|
audio_clf_model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd2136eee974e2d2359724b8fe1cd5e97fd33933a728312deab1c5674c1cfd60
|
3 |
+
size 6728254
|
audio_clf_model/variables/variables.index
ADDED
Binary file (3.03 kB). View file
|
|
encoder
ADDED
Binary file (614 Bytes). View file
|
|
examples/example_air_conditioner.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84c943673ea7a44aa379e68d1cd098c7ff44d6db44cee9399dff5c00f8dff0a4
|
3 |
+
size 1152080
|
examples/example_car_horn.wav
ADDED
Binary file (706 kB). View file
|
|
examples/example_children_playing.wav
ADDED
Binary file (768 kB). View file
|
|
examples/example_dog_bark.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531a6a90ce0bc598d872766a1ae783df0084e23f8be77662dffcbed9bd62fcad
|
3 |
+
size 1152080
|
examples/example_drilling.wav
ADDED
Binary file (768 kB). View file
|
|
examples/example_engine_idling.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:366e771510c21f51e72c7254f9ae14cfb53d684b77a39f19bb578d061f8e3a58
|
3 |
+
size 2304080
|
examples/example_gun_shot.wav
ADDED
Binary file (490 kB). View file
|
|
examples/example_jackhammer.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3a0de19f0946f0007a8c96d8473a9ba9193185a8a84fa757ea971156ed28979
|
3 |
+
size 2304080
|
examples/example_siren.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b541f6f1c8e9dc208579ad45f050ac6424181539eeea33abc995d96f590d4cc
|
3 |
+
size 1152080
|
examples/example_street_music.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b75b3da7714ec574c58d5257dec4b87abca2f1577582b474a4b9aa5907eefd2e
|
3 |
+
size 1058480
|
label_encodings.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"0": "air_conditioner", "1": "car_horn", "2": "children_playing", "3": "dog_bark", "4": "drilling", "5": "engine_idling", "6": "gun_shot", "7": "jackhammer", "8": "siren", "9": "street_music"}
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.27.0
|
2 |
+
librosa==0.10.0.post2
|
3 |
+
numpy==1.22.4
|
4 |
+
tensorflow==2.12.0
|