Spaces:

ernestchu
/

tsm-net

Sleeping

App Files Files Community

ernestchu commited on Jun 30, 2023

Commit

ded337a

•

1 Parent(s): c55d8da

add speech enhancement

Browse files

Files changed (2) hide show

app.py +21 -7
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -3,9 +3,11 @@ import time
 from tsmnet import Stretcher
 import gradio as gr
 from gradio import processing_utils
-# import torch
 import torchaudio
 import yt_dlp
 model_root = './weights'
 yt_dl_dir = 'yt-audio'
@@ -58,7 +60,7 @@ def prepare_audio_file(rec, audio_file, yt_url):
         raise gr.Error('No audio found!')
-def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
     audio_file = prepare_audio_file(rec, audio_file, yt_url)
     x, sr = torchaudio.load(audio_file)
@@ -67,8 +69,18 @@ def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
     x = x[:, int(start_time * sr):int(end_time * sr)]
-    if speed != 1:
-        x = models[model](x, speed).cpu()
     torchaudio.save(audio_file, x, sr)
     return processing_utils.audio_from_file(audio_file)
@@ -86,16 +98,17 @@ with gr.Blocks() as demo:
         with gr.Column():
             with gr.Tab('From microphone'):
                 rec_box = gr.Audio(label='Recording', source='microphone', type='filepath')
-            with gr.Tab('From file'):
-                audio_file_box = gr.Audio(label='Audio sample', type='filepath')
             with gr.Tab('From YouTube'):
                 yt_url_box  = gr.Textbox(label='YouTube URL', placeholder='https://youtu.be/q6EoRBvdVPQ')
             rec_box.change(lambda: [None] * 2, outputs=[audio_file_box, yt_url_box])
             audio_file_box.change(lambda: [None] * 2, outputs=[rec_box, yt_url_box])
             yt_url_box.input(lambda: [None] * 2, outputs=[rec_box, audio_file_box])
-            speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
             with gr.Accordion('Fine-grained settings', open=False):
                 with gr.Tab('Trim audio sample (sec)'):
                     # gr.Markdown('### Trim audio sample (sec)')
@@ -117,6 +130,7 @@ with gr.Blocks() as demo:
             rec_box,
             audio_file_box,
             yt_url_box,
             speed_box,
             model_box,
             start_time_box,

 from tsmnet import Stretcher
 import gradio as gr
 from gradio import processing_utils
+import torch
+import numpy as np
 import torchaudio
 import yt_dlp
+import noisereduce as nr
 model_root = './weights'
 yt_dl_dir = 'yt-audio'
         raise gr.Error('No audio found!')
+def run(rec, audio_file, yt_url, denoise, speed, model, start_time, end_time):
     audio_file = prepare_audio_file(rec, audio_file, yt_url)
     x, sr = torchaudio.load(audio_file)
     x = x[:, int(start_time * sr):int(end_time * sr)]
+    if speed == 1:
+        torchaudio.save(audio_file, x, sr)
+        return processing_utils.audio_from_file(audio_file)
+    x = models[model](x, speed).cpu()
+    if denoise:
+        if len(x.shape) == 1: # mono
+            x = x[None]
+        x = x.numpy()
+        # perform noise reduction
+        x = torch.from_numpy(np.stack([nr.reduce_noise(y=y, sr=sr) for y in x]))
     torchaudio.save(audio_file, x, sr)
     return processing_utils.audio_from_file(audio_file)
         with gr.Column():
             with gr.Tab('From microphone'):
                 rec_box = gr.Audio(label='Recording', source='microphone', type='filepath')
             with gr.Tab('From YouTube'):
                 yt_url_box  = gr.Textbox(label='YouTube URL', placeholder='https://youtu.be/q6EoRBvdVPQ')
+            with gr.Tab('From file'):
+                audio_file_box = gr.Audio(label='Audio sample', type='filepath')
+            denoise_box = gr.Checkbox(label='Speech enhancement (should be off for music)', value=True)
             rec_box.change(lambda: [None] * 2, outputs=[audio_file_box, yt_url_box])
             audio_file_box.change(lambda: [None] * 2, outputs=[rec_box, yt_url_box])
             yt_url_box.input(lambda: [None] * 2, outputs=[rec_box, audio_file_box])
+            speed_box = gr.Slider(label='Playback speed', minimum=0.25, maximum=2, value=1)
             with gr.Accordion('Fine-grained settings', open=False):
                 with gr.Tab('Trim audio sample (sec)'):
                     # gr.Markdown('### Trim audio sample (sec)')
             rec_box,
             audio_file_box,
             yt_url_box,
+            denoise_box,
             speed_box,
             model_box,
             start_time_box,

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ torchvision
 torchaudio
 yt-dlp
 wget

 torchaudio
 yt-dlp
 wget
+noisereduce