Z commited on
Commit
f652bfc
1 Parent(s): 83b77ed
Files changed (2) hide show
  1. app.py +5 -2
  2. audio.py +13 -12
app.py CHANGED
@@ -35,10 +35,13 @@ def model_interface(model_name, top_k, top_p, temperature, cfg_coef, segments, o
35
  )
36
 
37
  extension_parameters = {"segments":segments, "overlap":overlap}
 
38
 
39
  prompts = split_prompt(prompt, segments)
40
  first_prompt = prompts[0]
41
- sample_rate, audio = predict(loaded_model, prompts, None, extension_parameters)
 
 
42
  counter = 1
43
  audio_path = "static/"
44
  audio_name = first_prompt
@@ -82,7 +85,7 @@ with gr.Blocks() as interface:
82
  with gr.Column():
83
  with gr.Row():
84
  model_dropdown = gr.components.Dropdown(choices=["small", "medium", "large", "melody"], label="Model Size", value="large")
85
- optional_audio = gr.components.Audio(source="upload", type="filepath", label="Optional Audio", interactive=True)
86
 
87
  slider_keys = list(slider_param.keys())
88
  slider_pairs = list(zip_longest(slider_keys[::2], slider_keys[1::2]))
 
35
  )
36
 
37
  extension_parameters = {"segments":segments, "overlap":overlap}
38
+ optional_audio_parameters = {"optional_audio":optional_audio, "sample_rate":loaded_model.sample_rate}
39
 
40
  prompts = split_prompt(prompt, segments)
41
  first_prompt = prompts[0]
42
+
43
+ sample_rate, audio = predict(loaded_model, prompts, duration, optional_audio_parameters, extension_parameters)
44
+
45
  counter = 1
46
  audio_path = "static/"
47
  audio_name = first_prompt
 
85
  with gr.Column():
86
  with gr.Row():
87
  model_dropdown = gr.components.Dropdown(choices=["small", "medium", "large", "melody"], label="Model Size", value="large")
88
+ optional_audio = gr.components.Audio(source="upload", type="numpy", label="Optional Audio", interactive=True)
89
 
90
  slider_keys = list(slider_param.keys())
91
  slider_pairs = list(zip_longest(slider_keys[::2], slider_keys[1::2]))
audio.py CHANGED
@@ -1,16 +1,17 @@
1
  import numpy as np
2
  import os, re, json, sys
3
  import torch, torchaudio, pathlib
 
4
 
5
- def load_and_process_audio(model, melody, sample_rate):
6
- if melody is not None:
7
- melody = torch.from_numpy(melody).to(model.device).float().t().unsqueeze(0)
8
- if melody.dim() == 2:
9
- melody = melody[None]
10
- melody = melody[..., :int(sample_rate * model.lm.cfg.dataset.segment_duration)]
11
- return melody
12
- else:
13
  return None
 
 
 
 
 
 
14
 
15
  #From https://colab.research.google.com/drive/154CqogsdP-D_TfSF9S2z8-BY98GN_na4?usp=sharing#scrollTo=exKxNU_Z4i5I
16
  #Thank you DragonForged for the link
@@ -35,12 +36,12 @@ def extend_audio(model, prompt_waveform, prompts, prompt_sr, segments=5, overlap
35
 
36
  return prompt_waveform
37
 
38
- def predict(model, prompts, melody_parameters, extension_parameters):
39
- melody = None #load_and_process_audio(MODEL, **melody_parameters)
40
 
41
  if melody is not None:
42
- output = MODEL.generate_with_chroma(
43
- descriptions=[prompt[0]],
44
  melody_wavs=melody,
45
  melody_sample_rate=melody_parameters['sample_rate'],
46
  progress=False
 
1
  import numpy as np
2
  import os, re, json, sys
3
  import torch, torchaudio, pathlib
4
+ from audiocraft.data.audio_utils import convert_audio
5
 
6
+ def load_and_process_audio(model, duration, optional_audio, sample_rate):
7
+ if optional_audio is None:
 
 
 
 
 
 
8
  return None
9
+ sr, optional_audio = optional_audio[0], torch.from_numpy(optional_audio[1]).to(model.device).float().t()
10
+ if optional_audio.dim() == 1:
11
+ optional_audio = optional_audio[None]
12
+ optional_audio = optional_audio[..., :int(sr * duration)]
13
+ optional_audio = convert_audio(optional_audio, sr, sr, 1)
14
+ return optional_audio
15
 
16
  #From https://colab.research.google.com/drive/154CqogsdP-D_TfSF9S2z8-BY98GN_na4?usp=sharing#scrollTo=exKxNU_Z4i5I
17
  #Thank you DragonForged for the link
 
36
 
37
  return prompt_waveform
38
 
39
+ def predict(model, prompts, duration, melody_parameters, extension_parameters):
40
+ melody = load_and_process_audio(model, duration, **melody_parameters)
41
 
42
  if melody is not None:
43
+ output = model.generate_with_chroma(
44
+ descriptions=[prompts[0]],
45
  melody_wavs=melody,
46
  melody_sample_rate=melody_parameters['sample_rate'],
47
  progress=False