Spaces:
Runtime error
Runtime error
update remove microphone input
Browse files
app.py
CHANGED
@@ -203,10 +203,20 @@ def detect_breath_from_speed_vad(speech,index_vad):
|
|
203 |
threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
|
204 |
threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
|
205 |
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
|
212 |
return speech_b_detect
|
@@ -214,6 +224,7 @@ def detect_breath_from_speed_vad(speech,index_vad):
|
|
214 |
def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
|
215 |
print("Finding Voice Activity Deteciton")
|
216 |
speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
|
|
|
217 |
print("Detecting Breath sound in speech")
|
218 |
speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
|
219 |
return speech,speech_b_detect
|
@@ -224,15 +235,15 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
|
|
224 |
X = np.divide(range(0, len(speech)), SAMPLING_RATE)
|
225 |
|
226 |
# Create a figure
|
227 |
-
plt.figure(figsize=(
|
228 |
|
229 |
# Define font size
|
230 |
font_size = 24
|
231 |
|
232 |
# Second subplot: Speech, Detected breath, and True breath
|
233 |
-
plt.subplot(3, 1, 2)
|
234 |
-
plt.plot(X, speech, label="Speech", color='blue', linewidth=2)
|
235 |
-
plt.plot(X, 0.
|
236 |
plt.title(f"Speech and detected breaths", fontsize=24)
|
237 |
plt.legend(fontsize=12)
|
238 |
plt.xlabel("Time (seconds)", fontsize=20)
|
@@ -250,7 +261,8 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
|
|
250 |
# original_task_model,Feature_mean,Feature_std = initialisation()
|
251 |
|
252 |
|
253 |
-
def gradio_interface(image_file,input_audio_file):
|
|
|
254 |
print("Gradio Interface audio file:",input_audio_file)
|
255 |
# Load the audio file
|
256 |
audio = AudioSegment.from_file(input_audio_file)
|
@@ -273,7 +285,8 @@ def gradio_interface(image_file,input_audio_file):
|
|
273 |
default_image = "Text.png"
|
274 |
iface = gr.Interface(
|
275 |
fn=gradio_interface,
|
276 |
-
inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
|
|
|
277 |
outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
|
278 |
title="Breath sound Detector",
|
279 |
description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",
|
|
|
203 |
threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
|
204 |
threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
|
205 |
|
206 |
+
frame_length=int(np.floor(FRAME_TIME*SAMPLING_RATE))
|
207 |
+
hop_length=int(np.floor(HOP_TIME*SAMPLING_RATE))
|
208 |
+
offset = frame_length - hop_length
|
209 |
+
print(f"Number of breaths detected: {np.size(index_b)/2}")
|
210 |
+
for i in range(int(np.size(index_b)/2)):
|
211 |
+
index_b[0,2*i+1] = index_b[0,2*i+1] + offset
|
212 |
+
if (index_b[0,2*i+1] > len(speech)):
|
213 |
+
index_b[0,2*i+1]=len(speech)
|
214 |
+
speech_b_detect[range(int(index_b[0,2*i]),int(index_b[0,2*i+1])+1)]=1
|
215 |
+
|
216 |
+
# if join==1:
|
217 |
+
# index_b,speech_b_detect=join_close_breaths(index_b,threshold_breath_to_breath,speech_b_detect)
|
218 |
+
# if remove==1:
|
219 |
+
# index_b,speech_b_detect=remove_small_breaths(index_b,threshold_breath,speech_b_detect)
|
220 |
|
221 |
|
222 |
return speech_b_detect
|
|
|
224 |
def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
|
225 |
print("Finding Voice Activity Deteciton")
|
226 |
speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
|
227 |
+
print(f"Number of Non-Voice regions: {len(index_vad)/2}")
|
228 |
print("Detecting Breath sound in speech")
|
229 |
speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
|
230 |
return speech,speech_b_detect
|
|
|
235 |
X = np.divide(range(0, len(speech)), SAMPLING_RATE)
|
236 |
|
237 |
# Create a figure
|
238 |
+
plt.figure(figsize=(8, 2))
|
239 |
|
240 |
# Define font size
|
241 |
font_size = 24
|
242 |
|
243 |
# Second subplot: Speech, Detected breath, and True breath
|
244 |
+
# plt.subplot(3, 1, 2)
|
245 |
+
plt.plot(X, 0.5*speech, label="Speech", color='blue', linewidth=2)
|
246 |
+
plt.plot(X, 0.2 * speech_b_detect, label="Detected breath", color='red', linewidth=3)
|
247 |
plt.title(f"Speech and detected breaths", fontsize=24)
|
248 |
plt.legend(fontsize=12)
|
249 |
plt.xlabel("Time (seconds)", fontsize=20)
|
|
|
261 |
# original_task_model,Feature_mean,Feature_std = initialisation()
|
262 |
|
263 |
|
264 |
+
# def gradio_interface(image_file,input_audio_file):
|
265 |
+
def gradio_interface(input_audio_file):
|
266 |
print("Gradio Interface audio file:",input_audio_file)
|
267 |
# Load the audio file
|
268 |
audio = AudioSegment.from_file(input_audio_file)
|
|
|
285 |
default_image = "Text.png"
|
286 |
iface = gr.Interface(
|
287 |
fn=gradio_interface,
|
288 |
+
# inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
|
289 |
+
inputs=[gr.Audio(sources=["upload"], type="filepath",format='wav')],
|
290 |
outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
|
291 |
title="Breath sound Detector",
|
292 |
description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",
|