ernestchu commited on
Commit
f6c8d4d
1 Parent(s): b6ef12a
Files changed (4) hide show
  1. app.py +10 -12
  2. tsmnet/interface.py +1 -1
  3. weights/general.pt +0 -3
  4. weights/speech.pt +2 -2
app.py CHANGED
@@ -6,7 +6,7 @@ import torch
6
  import torchaudio
7
 
8
  model_root = './weights'
9
- available_models = ['general', 'pop-music', 'classical-music', 'speech']
10
  working_sr = 22050
11
 
12
  def prepare_models():
@@ -28,19 +28,17 @@ def prepare_audio_file(rec, audio_file, yt_url):
28
 
29
  def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
30
  audio_file = prepare_audio_file(rec, audio_file, yt_url)
31
- if speed == 1:
32
- return processing_utils.audio_from_file(audio_file)
33
-
34
- model = models[model]
35
 
36
  x, sr = torchaudio.load(audio_file)
37
  x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
38
  sr = working_sr
39
 
40
- x = model(x, speed).cpu()
41
 
42
- torchaudio.save(audio_file, x, sr)
 
43
 
 
44
  return processing_utils.audio_from_file(audio_file)
45
 
46
 
@@ -66,11 +64,11 @@ with gr.Blocks() as demo:
66
 
67
  speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
68
  with gr.Accordion('Fine-grained settings', open=False):
69
- with gr.Row():
70
- gr.Textbox(label='', value='Trim audio sample', interactive=False)
71
- start_time_box = gr.Number(label='Start', value=0)
72
- end_time_box = gr.Number(label='End', value=20)
73
-
74
  model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
75
 
76
  submit_btn = gr.Button('Submit')
 
6
  import torchaudio
7
 
8
  model_root = './weights'
9
+ available_models = ['speech', 'pop-music', 'classical-music']
10
  working_sr = 22050
11
 
12
  def prepare_models():
 
28
 
29
  def run(rec, audio_file, yt_url, speed, model, start_time, end_time):
30
  audio_file = prepare_audio_file(rec, audio_file, yt_url)
 
 
 
 
31
 
32
  x, sr = torchaudio.load(audio_file)
33
  x = torchaudio.transforms.Resample(orig_freq=sr, new_freq=working_sr)(x)
34
  sr = working_sr
35
 
36
+ x = x[:, int(start_time * sr):int(end_time * sr)]
37
 
38
+ if speed != 1:
39
+ x = models[model](x, speed).cpu()
40
 
41
+ torchaudio.save(audio_file, x, sr)
42
  return processing_utils.audio_from_file(audio_file)
43
 
44
 
 
64
 
65
  speed_box = gr.Slider(label='Playback speed', minimum=0, maximum=2, value=1)
66
  with gr.Accordion('Fine-grained settings', open=False):
67
+ with gr.Tab('Trim audio sample (sec)'):
68
+ # gr.Markdown('### Trim audio sample (sec)')
69
+ with gr.Row():
70
+ start_time_box = gr.Number(label='Start', value=0)
71
+ end_time_box = gr.Number(label='End', value=20)
72
  model_box = gr.Dropdown(label='Model weight', choices=available_models, value=available_models[0])
73
 
74
  submit_btn = gr.Button('Submit')
tsmnet/interface.py CHANGED
@@ -68,7 +68,7 @@ class Stretcher:
68
  self.neuralgram = Neuralgram(path, device)
69
 
70
  @torch.no_grad()
71
- def __call__(self, audio, rate , interpolation=InterpolationMode.NEAREST): # NEAREST | BILINEAR | BICUBIC
72
  if rate == 1:
73
  return audio.numpy() if isinstance(audio, torch.Tensor) else audio
74
  neu = self.neuralgram(audio)
 
68
  self.neuralgram = Neuralgram(path, device)
69
 
70
  @torch.no_grad()
71
+ def __call__(self, audio, rate , interpolation=InterpolationMode.BICUBIC): # NEAREST | BILINEAR | BICUBIC
72
  if rate == 1:
73
  return audio.numpy() if isinstance(audio, torch.Tensor) else audio
74
  neu = self.neuralgram(audio)
weights/general.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70b0ca672ab2008da3517ae3eb524135a1ef5685d59cc034084316a665f69f6
3
- size 100400920
 
 
 
 
weights/speech.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e29674ce2312e1ba8f9071348de84031e8afbb08412cbc8088b7365f2162f497
3
- size 100400879
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3010d34e0d538ecb4c63c8bc89ad4023630dc36e2746bb71b799026d2b03ad4
3
+ size 100400898