BHW commited on
Commit
c5cb4be
1 Parent(s): 66d40fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -3,11 +3,11 @@ import time
3
  import gradio as gr
4
  import librosa
5
  import numpy as np
6
- import soundfile as sf
7
  from transformers import pipeline
8
 
9
  TARGET_SAMPLE_RATE = 16_000
10
- AUDIO_SECONDS_THRESHOLD = 5
11
  pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
12
  prediction = [{"score": 1, "label": "recording..."}]
13
 
@@ -27,7 +27,7 @@ def streaming_recording_fn(stream, new_chunk):
27
  if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
28
  prediction = pipe(stream)
29
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
30
- sf.write(file_name, stream, TARGET_SAMPLE_RATE)
31
  print(f"SAVE AUDIO: {file_name}")
32
  print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
33
  stream = None
@@ -49,7 +49,7 @@ def microphone_fn(waveform):
49
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
50
  result = pipe(y)
51
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
52
- sf.write(file_name, y, TARGET_SAMPLE_RATE)
53
  return {i['label']: i['score'] for i in result}
54
 
55
 
@@ -61,7 +61,7 @@ def file_fn(waveform):
61
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
62
  result = pipe(y)
63
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
64
- sf.write(file_name, y, TARGET_SAMPLE_RATE)
65
  return {i['label']: i['score'] for i in result}
66
 
67
 
@@ -98,7 +98,7 @@ with gr.Blocks() as example:
98
 
99
  with gr.Blocks() as demo:
100
  gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
101
- ["Audio file", "Streaming", "Microphone", "example"])
102
 
103
  if __name__ == "__main__":
104
 
 
3
  import gradio as gr
4
  import librosa
5
  import numpy as np
6
+ # import soundfile as sf
7
  from transformers import pipeline
8
 
9
  TARGET_SAMPLE_RATE = 16_000
10
+ AUDIO_SECONDS_THRESHOLD = 2
11
  pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
12
  prediction = [{"score": 1, "label": "recording..."}]
13
 
 
27
  if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
28
  prediction = pipe(stream)
29
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
30
+ # # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
31
  print(f"SAVE AUDIO: {file_name}")
32
  print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
33
  stream = None
 
49
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
50
  result = pipe(y)
51
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
52
+ # sf.write(file_name, y, TARGET_SAMPLE_RATE)
53
  return {i['label']: i['score'] for i in result}
54
 
55
 
 
61
  y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
62
  result = pipe(y)
63
  file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
64
+ # sf.write(file_name, y, TARGET_SAMPLE_RATE)
65
  return {i['label']: i['score'] for i in result}
66
 
67
 
 
98
 
99
  with gr.Blocks() as demo:
100
  gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
101
+ ["Audio file", "Streaming", "Microphone", "Example"])
102
 
103
  if __name__ == "__main__":
104