Spaces:
Running
on
T4
Running
on
T4
First Audio Fade version
Browse files-fades audio segments together
- app.py +15 -1
- audiocraft/data/audio_utils.py +26 -0
app.py
CHANGED
@@ -15,6 +15,7 @@ import time
|
|
15 |
import warnings
|
16 |
from audiocraft.models import MusicGen
|
17 |
from audiocraft.data.audio import audio_write
|
|
|
18 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
19 |
import numpy as np
|
20 |
import random
|
@@ -162,7 +163,20 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
162 |
output = output_segments[0]
|
163 |
for i in range(1, len(output_segments)):
|
164 |
overlap_samples = overlap * MODEL.sample_rate
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
output = output.detach().cpu().float()[0]
|
167 |
except Exception as e:
|
168 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
|
|
15 |
import warnings
|
16 |
from audiocraft.models import MusicGen
|
17 |
from audiocraft.data.audio import audio_write
|
18 |
+
from audiocraft.data.audio_utils import apply_fade
|
19 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
|
20 |
import numpy as np
|
21 |
import random
|
|
|
163 |
output = output_segments[0]
|
164 |
for i in range(1, len(output_segments)):
|
165 |
overlap_samples = overlap * MODEL.sample_rate
|
166 |
+
#stack tracks and fade out/in
|
167 |
+
overlapping_output_fadeout = output[:, :, -overlap_samples:]
|
168 |
+
overlapping_output_fadeout = apply_fade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True, curve_end=0.9, current_device=MODEL.device)
|
169 |
+
|
170 |
+
overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
|
171 |
+
overlapping_output_fadein = apply_fade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, curve_start=0.1, current_device=MODEL.device)
|
172 |
+
|
173 |
+
overlapping_output = (overlapping_output_fadeout + overlapping_output_fadein) / 2
|
174 |
+
print(f" overlap size Fade:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
|
175 |
+
##overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=1) #stack tracks
|
176 |
+
##print(f" overlap size stack:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
|
177 |
+
#overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=2) #stack tracks
|
178 |
+
#print(f" overlap size cat:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
|
179 |
+
output = torch.cat([output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]], dim=dimension)
|
180 |
output = output.detach().cpu().float()[0]
|
181 |
except Exception as e:
|
182 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
audiocraft/data/audio_utils.py
CHANGED
@@ -172,3 +172,29 @@ def i16_pcm(wav: torch.Tensor) -> torch.Tensor:
|
|
172 |
else:
|
173 |
assert wav.dtype == torch.int16
|
174 |
return wav
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
else:
|
173 |
assert wav.dtype == torch.int16
|
174 |
return wav
|
175 |
+
|
176 |
+
def apply_fade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, curve_start:float=0.0, curve_end:float=1.0, current_device:str="cpu") -> torch.Tensor:
|
177 |
+
fade_samples = int(sample_rate * duration) # Number of samples for the fade duration
|
178 |
+
fade_curve = torch.linspace(curve_start, curve_end, fade_samples, device=current_device) # Generate linear fade curve
|
179 |
+
|
180 |
+
if out:
|
181 |
+
fade_curve = fade_curve.flip(0) # Reverse the fade curve for fade out
|
182 |
+
|
183 |
+
# Select the portion of the audio to apply the fade
|
184 |
+
if start:
|
185 |
+
audio_fade_section = audio[:, :fade_samples]
|
186 |
+
else:
|
187 |
+
audio_fade_section = audio[:, -fade_samples:]
|
188 |
+
|
189 |
+
# Apply the fade curve to the audio section
|
190 |
+
audio_faded = audio.clone()
|
191 |
+
audio_faded[:, :fade_samples] *= fade_curve.unsqueeze(0)
|
192 |
+
audio_faded[:, -fade_samples:] *= fade_curve.unsqueeze(0)
|
193 |
+
|
194 |
+
# Replace the selected portion of the audio with the faded section
|
195 |
+
if start:
|
196 |
+
audio_faded[:, :fade_samples] = audio_fade_section
|
197 |
+
else:
|
198 |
+
audio_faded[:, -fade_samples:] = audio_fade_section
|
199 |
+
|
200 |
+
return audio_faded
|