3loi commited on
Commit
8dda170
1 Parent(s): e096e2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -9,12 +9,20 @@ import numpy as np
9
 
10
 
11
 
 
 
 
12
  def classify_audio(audio_file):
13
  model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes", trust_remote_code=True)
14
- print(audio_file)
15
- mean, std = -8.278621631819787e-05, 0.08485510250851999
16
- raw_wav, _ = librosa.load(audio_file, sr=16000)
17
- norm_wav = (raw_wav - mean) / (std+0.000001)
 
 
 
 
 
18
 
19
  mask = torch.ones(1, len(norm_wav))
20
  wavs = torch.tensor(norm_wav).unsqueeze(0)
@@ -25,11 +33,9 @@ def classify_audio(audio_file):
25
 
26
 
27
  def main():
28
- audio_input = gr.inputs.Audio(source="upload", type="filepath")
29
- output_text = gr.outputs.Textbox()
30
 
31
- iface = gr.Interface(fn=classify_audio, inputs=audio_input,
32
- outputs=output_text, title="Speech Emotion Recognition App",
33
  description="Upload an audio file and hit the 'Submit'\
34
  button")
35
 
@@ -38,4 +44,4 @@ def main():
38
 
39
  if __name__ == '__main__':
40
  main()
41
-
 
9
 
10
 
11
 
12
+
13
+
14
+
15
  def classify_audio(audio_file):
16
  model = AutoModelForAudioClassification.from_pretrained("3loi/SER-Odyssey-Baseline-WavLM-Multi-Attributes", trust_remote_code=True)
17
+ sr, raw_wav = audio_file
18
+
19
+ print(audio_file, audio_file[1].dtype)
20
+ y = raw_wav.astype(np.float32)
21
+ y /= np.max(np.abs(y))
22
+
23
+
24
+ #raw_wav, _ librosa.load(audio_file, sr=16000)
25
+ norm_wav = (y - mean) / (std+0.000001)
26
 
27
  mask = torch.ones(1, len(norm_wav))
28
  wavs = torch.tensor(norm_wav).unsqueeze(0)
 
33
 
34
 
35
  def main():
 
 
36
 
37
+ iface = gr.Interface(fn=classify_audio, inputs=gr.Audio(sources=["upload", "microphone"], label="Audio file"),
38
+ outputs=gr.Text(), title="Speech Emotion Recognition App",
39
  description="Upload an audio file and hit the 'Submit'\
40
  button")
41
 
 
44
 
45
  if __name__ == '__main__':
46
  main()
47
+