kmiyasar commited on
Commit
a21f829
·
1 Parent(s): 8e8d000

update remove microphone input

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -203,10 +203,20 @@ def detect_breath_from_speed_vad(speech,index_vad):
203
  threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
204
  threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
205
 
206
- if join==1:
207
- index_b,speech_b_detect=join_close_breaths(index_b,threshold_breath_to_breath,speech_b_detect)
208
- if remove==1:
209
- index_b,speech_b_detect=remove_small_breaths(index_b,threshold_breath,speech_b_detect)
 
 
 
 
 
 
 
 
 
 
210
 
211
 
212
  return speech_b_detect
@@ -214,6 +224,7 @@ def detect_breath_from_speed_vad(speech,index_vad):
214
  def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
215
  print("Finding Voice Activity Deteciton")
216
  speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
 
217
  print("Detecting Breath sound in speech")
218
  speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
219
  return speech,speech_b_detect
@@ -224,15 +235,15 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
224
  X = np.divide(range(0, len(speech)), SAMPLING_RATE)
225
 
226
  # Create a figure
227
- plt.figure(figsize=(12, 8))
228
 
229
  # Define font size
230
  font_size = 24
231
 
232
  # Second subplot: Speech, Detected breath, and True breath
233
- plt.subplot(3, 1, 2)
234
- plt.plot(X, speech, label="Speech", color='blue', linewidth=2)
235
- plt.plot(X, 0.15 * speech_b_detect, label="Detected breath", color='red', linewidth=3)
236
  plt.title(f"Speech and detected breaths", fontsize=24)
237
  plt.legend(fontsize=12)
238
  plt.xlabel("Time (seconds)", fontsize=20)
@@ -250,7 +261,8 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
250
  # original_task_model,Feature_mean,Feature_std = initialisation()
251
 
252
 
253
- def gradio_interface(image_file,input_audio_file):
 
254
  print("Gradio Interface audio file:",input_audio_file)
255
  # Load the audio file
256
  audio = AudioSegment.from_file(input_audio_file)
@@ -273,7 +285,8 @@ def gradio_interface(image_file,input_audio_file):
273
  default_image = "Text.png"
274
  iface = gr.Interface(
275
  fn=gradio_interface,
276
- inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
 
277
  outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
278
  title="Breath sound Detector",
279
  description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",
 
203
  threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
204
  threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
205
 
206
+ frame_length=int(np.floor(FRAME_TIME*SAMPLING_RATE))
207
+ hop_length=int(np.floor(HOP_TIME*SAMPLING_RATE))
208
+ offset = frame_length - hop_length
209
+ print(f"Number of breaths detected: {np.size(index_b)/2}")
210
+ for i in range(int(np.size(index_b)/2)):
211
+ index_b[0,2*i+1] = index_b[0,2*i+1] + offset
212
+ if (index_b[0,2*i+1] > len(speech)):
213
+ index_b[0,2*i+1]=len(speech)
214
+ speech_b_detect[range(int(index_b[0,2*i]),int(index_b[0,2*i+1])+1)]=1
215
+
216
+ # if join==1:
217
+ # index_b,speech_b_detect=join_close_breaths(index_b,threshold_breath_to_breath,speech_b_detect)
218
+ # if remove==1:
219
+ # index_b,speech_b_detect=remove_small_breaths(index_b,threshold_breath,speech_b_detect)
220
 
221
 
222
  return speech_b_detect
 
224
  def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
225
  print("Finding Voice Activity Deteciton")
226
  speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
227
+ print(f"Number of Non-Voice regions: {len(index_vad)/2}")
228
  print("Detecting Breath sound in speech")
229
  speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
230
  return speech,speech_b_detect
 
235
  X = np.divide(range(0, len(speech)), SAMPLING_RATE)
236
 
237
  # Create a figure
238
+ plt.figure(figsize=(8, 2))
239
 
240
  # Define font size
241
  font_size = 24
242
 
243
  # Second subplot: Speech, Detected breath, and True breath
244
+ # plt.subplot(3, 1, 2)
245
+ plt.plot(X, 0.5*speech, label="Speech", color='blue', linewidth=2)
246
+ plt.plot(X, 0.2 * speech_b_detect, label="Detected breath", color='red', linewidth=3)
247
  plt.title(f"Speech and detected breaths", fontsize=24)
248
  plt.legend(fontsize=12)
249
  plt.xlabel("Time (seconds)", fontsize=20)
 
261
  # original_task_model,Feature_mean,Feature_std = initialisation()
262
 
263
 
264
+ # def gradio_interface(image_file,input_audio_file):
265
+ def gradio_interface(input_audio_file):
266
  print("Gradio Interface audio file:",input_audio_file)
267
  # Load the audio file
268
  audio = AudioSegment.from_file(input_audio_file)
 
285
  default_image = "Text.png"
286
  iface = gr.Interface(
287
  fn=gradio_interface,
288
+ # inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
289
+ inputs=[gr.Audio(sources=["upload"], type="filepath",format='wav')],
290
  outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
291
  title="Breath sound Detector",
292
  description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",