Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,11 @@ import time
|
|
3 |
import gradio as gr
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
-
import soundfile as sf
|
7 |
from transformers import pipeline
|
8 |
|
9 |
TARGET_SAMPLE_RATE = 16_000
|
10 |
-
AUDIO_SECONDS_THRESHOLD =
|
11 |
pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
|
12 |
prediction = [{"score": 1, "label": "recording..."}]
|
13 |
|
@@ -27,7 +27,7 @@ def streaming_recording_fn(stream, new_chunk):
|
|
27 |
if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
|
28 |
prediction = pipe(stream)
|
29 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
30 |
-
sf.write(file_name, stream, TARGET_SAMPLE_RATE)
|
31 |
print(f"SAVE AUDIO: {file_name}")
|
32 |
print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
|
33 |
stream = None
|
@@ -49,7 +49,7 @@ def microphone_fn(waveform):
|
|
49 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
50 |
result = pipe(y)
|
51 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
52 |
-
sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
53 |
return {i['label']: i['score'] for i in result}
|
54 |
|
55 |
|
@@ -61,7 +61,7 @@ def file_fn(waveform):
|
|
61 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
62 |
result = pipe(y)
|
63 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
64 |
-
sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
65 |
return {i['label']: i['score'] for i in result}
|
66 |
|
67 |
|
@@ -98,7 +98,7 @@ with gr.Blocks() as example:
|
|
98 |
|
99 |
with gr.Blocks() as demo:
|
100 |
gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
|
101 |
-
["Audio file", "Streaming", "Microphone", "
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
|
|
|
3 |
import gradio as gr
|
4 |
import librosa
|
5 |
import numpy as np
|
6 |
+
# import soundfile as sf
|
7 |
from transformers import pipeline
|
8 |
|
9 |
TARGET_SAMPLE_RATE = 16_000
|
10 |
+
AUDIO_SECONDS_THRESHOLD = 2
|
11 |
pipe = pipeline("audio-classification", model="MIT/ast-finetuned-audioset-10-10-0.4593")
|
12 |
prediction = [{"score": 1, "label": "recording..."}]
|
13 |
|
|
|
27 |
if (stream.shape[-1] / TARGET_SAMPLE_RATE) >= AUDIO_SECONDS_THRESHOLD:
|
28 |
prediction = pipe(stream)
|
29 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
30 |
+
# # sf.write(file_name, stream, TARGET_SAMPLE_RATE)
|
31 |
print(f"SAVE AUDIO: {file_name}")
|
32 |
print(f">>>>>>1\t{y.shape=}, {stream.shape=}\n\t{prediction[0]=}")
|
33 |
stream = None
|
|
|
49 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
50 |
result = pipe(y)
|
51 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
52 |
+
# sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
53 |
return {i['label']: i['score'] for i in result}
|
54 |
|
55 |
|
|
|
61 |
y = librosa.resample(y, orig_sr=sr, target_sr=TARGET_SAMPLE_RATE)
|
62 |
result = pipe(y)
|
63 |
file_name = f'./audio/{time.strftime("%Y%m%d_%H%M%S", time.localtime())}.wav'
|
64 |
+
# sf.write(file_name, y, TARGET_SAMPLE_RATE)
|
65 |
return {i['label']: i['score'] for i in result}
|
66 |
|
67 |
|
|
|
98 |
|
99 |
with gr.Blocks() as demo:
|
100 |
gr.TabbedInterface([file_demo, streaming_demo, microphone_demo, example],
|
101 |
+
["Audio file", "Streaming", "Microphone", "Example"])
|
102 |
|
103 |
if __name__ == "__main__":
|
104 |
|