Spaces:

kmiyasar
/

BreathSoundDetection

Runtime error

App Files Files Community

kmiyasar commited on Jun 9, 2024

Commit

a21f829

1 Parent(s): 8e8d000

update remove microphone input

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -203,10 +203,20 @@ def detect_breath_from_speed_vad(speech,index_vad):
     threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
     threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
-    if join==1:
-        index_b,speech_b_detect=join_close_breaths(index_b,threshold_breath_to_breath,speech_b_detect)
-    if remove==1:
-        index_b,speech_b_detect=remove_small_breaths(index_b,threshold_breath,speech_b_detect)
     return speech_b_detect
@@ -214,6 +224,7 @@ def detect_breath_from_speed_vad(speech,index_vad):
 def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
     print("Finding Voice Activity Deteciton")
     speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
     print("Detecting Breath sound in speech")
     speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
     return speech,speech_b_detect
@@ -224,15 +235,15 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
     X = np.divide(range(0, len(speech)), SAMPLING_RATE)
     # Create a figure
-    plt.figure(figsize=(12, 8))
     # Define font size
     font_size = 24
     # Second subplot: Speech, Detected breath, and True breath
-    plt.subplot(3, 1, 2)
-    plt.plot(X, speech, label="Speech", color='blue', linewidth=2)
-    plt.plot(X, 0.15 * speech_b_detect, label="Detected breath", color='red', linewidth=3)
     plt.title(f"Speech and detected breaths", fontsize=24)
     plt.legend(fontsize=12)
     plt.xlabel("Time (seconds)", fontsize=20)
@@ -250,7 +261,8 @@ def plot_waveform(speech,SAMPLING_RATE,speech_b_detect):
     # original_task_model,Feature_mean,Feature_std = initialisation()
-def gradio_interface(image_file,input_audio_file):
     print("Gradio Interface audio file:",input_audio_file)
     # Load the audio file
     audio = AudioSegment.from_file(input_audio_file)
@@ -273,7 +285,8 @@ def gradio_interface(image_file,input_audio_file):
 default_image = "Text.png"
 iface = gr.Interface(
     fn=gradio_interface,
-    inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
     outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
     title="Breath sound Detector",
     description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",

     threshold_breath=BREATH_THRESHOLD*SAMPLING_RATE
     threshold_breath_to_breath=BREATH_TO_BREATH_TIME*SAMPLING_RATE
+    frame_length=int(np.floor(FRAME_TIME*SAMPLING_RATE))
+    hop_length=int(np.floor(HOP_TIME*SAMPLING_RATE))
+    offset = frame_length - hop_length
+    print(f"Number of breaths detected: {np.size(index_b)/2}")
+    for i in range(int(np.size(index_b)/2)):
+        index_b[0,2*i+1] = index_b[0,2*i+1] + offset
+        if (index_b[0,2*i+1] > len(speech)):
+            index_b[0,2*i+1]=len(speech)
+        speech_b_detect[range(int(index_b[0,2*i]),int(index_b[0,2*i+1])+1)]=1
+    # if join==1:
+    #     index_b,speech_b_detect=join_close_breaths(index_b,threshold_breath_to_breath,speech_b_detect)
+    # if remove==1:
+    #     index_b,speech_b_detect=remove_small_breaths(index_b,threshold_breath,speech_b_detect)
     return speech_b_detect
 def detect_breath_from_speed(speech_file_path,original_task_model,Feature_mean,Feature_std):
     print("Finding Voice Activity Deteciton")
     speech,speech_scaled,index_vad=read_speech_derive_vad(speech_file_path,SAMPLING_RATE,original_task_model,Feature_mean,Feature_std)
+    print(f"Number of Non-Voice regions: {len(index_vad)/2}")
     print("Detecting Breath sound in speech")
     speech_b_detect=detect_breath_from_speed_vad(speech,index_vad)
     return speech,speech_b_detect
     X = np.divide(range(0, len(speech)), SAMPLING_RATE)
     # Create a figure
+    plt.figure(figsize=(8, 2))
     # Define font size
     font_size = 24
     # Second subplot: Speech, Detected breath, and True breath
+    # plt.subplot(3, 1, 2)
+    plt.plot(X, 0.5*speech, label="Speech", color='blue', linewidth=2)
+    plt.plot(X, 0.2 * speech_b_detect, label="Detected breath", color='red', linewidth=3)
     plt.title(f"Speech and detected breaths", fontsize=24)
     plt.legend(fontsize=12)
     plt.xlabel("Time (seconds)", fontsize=20)
     # original_task_model,Feature_mean,Feature_std = initialisation()
+# def gradio_interface(image_file,input_audio_file):
+def gradio_interface(input_audio_file):
     print("Gradio Interface audio file:",input_audio_file)
     # Load the audio file
     audio = AudioSegment.from_file(input_audio_file)
 default_image = "Text.png"
 iface = gr.Interface(
     fn=gradio_interface,
+    # inputs=[gr.Image(type="filepath", value=default_image,interactive=False),gr.Audio(sources=["microphone","upload"], type="filepath",format='wav')],
+    inputs=[gr.Audio(sources=["upload"], type="filepath",format='wav')],
     outputs=[gr.Image(type="filepath"),gr.Audio(type="filepath")],
     title="Breath sound Detector",
     description="Record your speech reading the given paragraph. The audio will be processed and the breath detection will be performed. The detected breath will be displayed in the image and the breath enhanced speech can be heard.",