Spaces:
Runtime error
Runtime error
allow steps to be different from 1000
Browse files- audiodiffusion/__init__.py +16 -11
- notebooks/test_model.ipynb +0 -0
audiodiffusion/__init__.py
CHANGED
@@ -4,12 +4,12 @@ import torch
|
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
6 |
from tqdm.auto import tqdm
|
7 |
-
from diffusers import DDPMPipeline
|
8 |
from librosa.beat import beat_track
|
|
|
9 |
|
10 |
from .mel import Mel
|
11 |
|
12 |
-
VERSION = "1.1.
|
13 |
|
14 |
|
15 |
class AudioDiffusion:
|
@@ -60,7 +60,7 @@ class AudioDiffusion:
|
|
60 |
raw_audio: np.ndarray = None,
|
61 |
slice: int = 0,
|
62 |
start_step: int = 0,
|
63 |
-
steps: int =
|
64 |
generator: torch.Generator = None
|
65 |
) -> Tuple[Image.Image, Tuple[int, np.ndarray]]:
|
66 |
"""Generate random mel spectrogram from audio input and convert to audio.
|
@@ -70,7 +70,7 @@ class AudioDiffusion:
|
|
70 |
raw_audio (np.ndarray): audio as numpy array
|
71 |
slice (int): slice number of audio to convert
|
72 |
start_step (int): step to start from
|
73 |
-
steps (int): number of de-noising steps to perform
|
74 |
generator (torch.Generator): random number generator or None
|
75 |
|
76 |
Returns:
|
@@ -80,6 +80,10 @@ class AudioDiffusion:
|
|
80 |
|
81 |
# It would be better to derive a class from DDPMDiffusionPipeline
|
82 |
# but currently the return type ImagePipelineOutput cannot be imported.
|
|
|
|
|
|
|
|
|
83 |
images = torch.randn(
|
84 |
(1, self.ddpm.unet.in_channels, self.ddpm.unet.sample_size,
|
85 |
self.ddpm.unet.sample_size),
|
@@ -94,16 +98,17 @@ class AudioDiffusion:
|
|
94 |
input_image.height))
|
95 |
input_image = ((input_image / 255) * 2 - 1)
|
96 |
if start_step > 0:
|
97 |
-
images[0][0] =
|
98 |
-
|
99 |
-
|
100 |
|
101 |
images = images.to(self.ddpm.device)
|
102 |
-
self.
|
103 |
-
for t in self.progress_bar(self.ddpm.scheduler.timesteps[start_step:]):
|
104 |
model_output = self.ddpm.unet(images, t)['sample']
|
105 |
-
images =
|
106 |
-
|
|
|
|
|
107 |
images = (images / 2 + 0.5).clamp(0, 1)
|
108 |
images = images.cpu().permute(0, 2, 3, 1).numpy()
|
109 |
|
|
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
6 |
from tqdm.auto import tqdm
|
|
|
7 |
from librosa.beat import beat_track
|
8 |
+
from diffusers import DDPMPipeline, DDPMScheduler
|
9 |
|
10 |
from .mel import Mel
|
11 |
|
12 |
+
VERSION = "1.1.3"
|
13 |
|
14 |
|
15 |
class AudioDiffusion:
|
|
|
60 |
raw_audio: np.ndarray = None,
|
61 |
slice: int = 0,
|
62 |
start_step: int = 0,
|
63 |
+
steps: int = None,
|
64 |
generator: torch.Generator = None
|
65 |
) -> Tuple[Image.Image, Tuple[int, np.ndarray]]:
|
66 |
"""Generate random mel spectrogram from audio input and convert to audio.
|
|
|
70 |
raw_audio (np.ndarray): audio as numpy array
|
71 |
slice (int): slice number of audio to convert
|
72 |
start_step (int): step to start from
|
73 |
+
steps (int): number of de-noising steps to perform (defaults to num_train_timesteps)
|
74 |
generator (torch.Generator): random number generator or None
|
75 |
|
76 |
Returns:
|
|
|
80 |
|
81 |
# It would be better to derive a class from DDPMDiffusionPipeline
|
82 |
# but currently the return type ImagePipelineOutput cannot be imported.
|
83 |
+
if steps is None:
|
84 |
+
steps = self.ddpm.scheduler.num_train_timesteps
|
85 |
+
scheduler = DDPMScheduler(num_train_timesteps=steps)
|
86 |
+
scheduler.set_timesteps(steps)
|
87 |
images = torch.randn(
|
88 |
(1, self.ddpm.unet.in_channels, self.ddpm.unet.sample_size,
|
89 |
self.ddpm.unet.sample_size),
|
|
|
98 |
input_image.height))
|
99 |
input_image = ((input_image / 255) * 2 - 1)
|
100 |
if start_step > 0:
|
101 |
+
images[0][0] = scheduler.add_noise(
|
102 |
+
torch.tensor(input_image[np.newaxis, np.newaxis, :]),
|
103 |
+
images, steps - start_step)
|
104 |
|
105 |
images = images.to(self.ddpm.device)
|
106 |
+
for t in self.progress_bar(scheduler.timesteps[start_step:]):
|
|
|
107 |
model_output = self.ddpm.unet(images, t)['sample']
|
108 |
+
images = scheduler.step(model_output,
|
109 |
+
t,
|
110 |
+
images,
|
111 |
+
generator=generator)['prev_sample']
|
112 |
images = (images / 2 + 0.5).clamp(0, 1)
|
113 |
images = images.cpu().permute(0, 2, 3, 1).numpy()
|
114 |
|
notebooks/test_model.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|