Spaces:

descript
/

vampnet

Sleeping

Hugo Flores Garcia commited on Jul 27, 2023

Commit

7b88c07

1 Parent(s): bf35d45

better onset detection!!!!!

remove annealing from sampling temperature
add pitch shifting w/ torch pitch shift
improvements to lora config
(TODO: fix a lora bug where the lora weights won't load correctly)
add helper scripts for collecting xeno canto data

Files changed (8) hide show

app.py +60 -11
conf/lora/lora.yml +4 -2
scripts/exp/fine_tune.py +2 -2
scripts/utils/augment.py +38 -24
scripts/utils/remove_quiet_files.py +29 -0
scripts/xeno-canto-dl.py +234 -0
vampnet/mask.py +38 -20
vampnet/modules/transformer.py +9 -14

app.py CHANGED Viewed

@@ -18,6 +18,16 @@ Interface = argbind.bind(Interface)
 conf = argbind.parse_args()
 def load_interface():
     with argbind.scope(conf):
         interface = Interface()
@@ -95,6 +105,10 @@ def _vamp(data, return_mask=False):
     out_dir = OUT_DIR / str(uuid.uuid4())
     out_dir.mkdir()
     sig = at.AudioSignal(data[input_audio])
     z = interface.encode(sig)
@@ -134,7 +148,27 @@ def _vamp(data, return_mask=False):
     mask = pmask.codebook_unmask(mask, ncc)
-    print(data)
     _top_p = data[top_p] if data[top_p] > 0 else None
     # save the mask as a txt file
     np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
@@ -153,6 +187,7 @@ def _vamp(data, return_mask=False):
         top_p=_top_p,
         gen_fn=interface.coarse.generate,
         seed=_seed,
     )
     if use_coarse2fine:
@@ -356,7 +391,7 @@ with gr.Blocks() as demo:
                 onset_mask_width = gr.Slider(
                     label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
                     minimum=0,
-                    maximum=20,
                     step=1,
                     value=5,
                 )
@@ -374,6 +409,14 @@ with gr.Blocks() as demo:
                 with gr.Accordion("extras ", open=False):
                     rand_mask_intensity = gr.Slider(
                         label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
                         minimum=0.0,
@@ -436,14 +479,15 @@ with gr.Blocks() as demo:
             masktemp = gr.Slider(
                 label="mask temperature",
                 minimum=0.0,
-                maximum=10.0,
                 value=1.5
             )
             sampletemp = gr.Slider(
                 label="sample temperature",
                 minimum=0.1,
-                maximum=2.0,
-                value=1.0
             )
@@ -459,7 +503,7 @@ with gr.Blocks() as demo:
                     label="typical filtering ",
                     value=False
                 )
-                typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
                     minimum=0.01,
                     maximum=0.99,
@@ -472,6 +516,13 @@ with gr.Blocks() as demo:
                     step=1,
                     value=64
                 )
             use_coarse2fine = gr.Checkbox(
                 label="use coarse2fine",
@@ -495,10 +546,6 @@ with gr.Blocks() as demo:
                 value=0.0
             )
-            use_new_trick = gr.Checkbox(
-                label="new trick",
-                value=False
-            )
             seed = gr.Number(
                 label="seed (0 for random)",
@@ -560,6 +607,8 @@ with gr.Blocks() as demo:
             beat_mask_downbeats,
             seed,
             lora_choice,
         }
     # connect widgets
@@ -589,4 +638,4 @@ with gr.Blocks() as demo:
         outputs=[thank_you, download_file]
     )
-demo.launch(share=True, enable_queue=False, debug=True)

 conf = argbind.parse_args()
+from torch_pitch_shift import pitch_shift, get_fast_shifts
+def shift_pitch(signal, interval: int):
+    signal.samples = pitch_shift(
+        signal.samples,
+        shift=interval,
+        sample_rate=signal.sample_rate
+    )
+    return signal
 def load_interface():
     with argbind.scope(conf):
         interface = Interface()
     out_dir = OUT_DIR / str(uuid.uuid4())
     out_dir.mkdir()
     sig = at.AudioSignal(data[input_audio])
+    sig = interface.preprocess(sig)
+    if data[pitch_shift_amt] != 0:
+        sig = shift_pitch(sig, data[pitch_shift_amt])
     z = interface.encode(sig)
     mask = pmask.codebook_unmask(mask, ncc)
+    print(f"dropout {data[dropout]}")
+    print(f"masktemp {data[masktemp]}")
+    print(f"sampletemp {data[sampletemp]}")
+    print(f"top_p {data[top_p]}")
+    print(f"prefix_s {data[prefix_s]}")
+    print(f"suffix_s {data[suffix_s]}")
+    print(f"rand_mask_intensity {data[rand_mask_intensity]}")
+    print(f"num_steps {data[num_steps]}")
+    print(f"periodic_p {data[periodic_p]}")
+    print(f"periodic_w {data[periodic_w]}")
+    print(f"n_conditioning_codebooks {data[n_conditioning_codebooks]}")
+    print(f"use_coarse2fine {data[use_coarse2fine]}")
+    print(f"onset_mask_width {data[onset_mask_width]}")
+    print(f"beat_mask_width {data[beat_mask_width]}")
+    print(f"beat_mask_downbeats {data[beat_mask_downbeats]}")
+    print(f"stretch_factor {data[stretch_factor]}")
+    print(f"seed {data[seed]}")
+    print(f"pitch_shift_amt {data[pitch_shift_amt]}")
+    print(f"sample_cutoff {data[sample_cutoff]}")
     _top_p = data[top_p] if data[top_p] > 0 else None
     # save the mask as a txt file
     np.savetxt(out_dir / "mask.txt", mask[:,0,:].long().cpu().numpy())
         top_p=_top_p,
         gen_fn=interface.coarse.generate,
         seed=_seed,
+        sample_cutoff=data[sample_cutoff],
     )
     if use_coarse2fine:
                 onset_mask_width = gr.Slider(
                     label="onset mask width (multiplies with the periodic mask, 1 step ~= 10milliseconds) ",
                     minimum=0,
+                    maximum=100,
                     step=1,
                     value=5,
                 )
                 with gr.Accordion("extras ", open=False):
+                    pitch_shift_amt = gr.Slider(
+                        label="pitch shift amount (semitones)",
+                        minimum=-12,
+                        maximum=12,
+                        step=1,
+                        value=0,
+                    )
                     rand_mask_intensity = gr.Slider(
                         label="random mask intensity. (If this is less than 1, scatters prompts throughout the audio, should be between 0.9 and 1.0)",
                         minimum=0.0,
             masktemp = gr.Slider(
                 label="mask temperature",
                 minimum=0.0,
+                maximum=100.0,
                 value=1.5
             )
             sampletemp = gr.Slider(
                 label="sample temperature",
                 minimum=0.1,
+                maximum=10.0,
+                value=1.0,
+                step=0.001
             )
                     label="typical filtering ",
                     value=False
                 )
+                typical_mass = gr.Slider(
                     label="typical mass (should probably stay between 0.1 and 0.5)",
                     minimum=0.01,
                     maximum=0.99,
                     step=1,
                     value=64
                 )
+                sample_cutoff = gr.Slider(
+                    label="sample cutoff",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.01
+                )
             use_coarse2fine = gr.Checkbox(
                 label="use coarse2fine",
                 value=0.0
             )
             seed = gr.Number(
                 label="seed (0 for random)",
             beat_mask_downbeats,
             seed,
             lora_choice,
+            pitch_shift_amt,
+            sample_cutoff
         }
     # connect widgets
         outputs=[thank_you, download_file]
     )
+demo.launch(share=True, enable_queue=True, debug=True)

conf/lora/lora.yml CHANGED Viewed

@@ -4,14 +4,16 @@ $include:
 fine_tune: True
 train/AudioDataset.n_examples: 100000000
-val/AudioDataset.n_examples: 100
 NoamScheduler.warmup: 500
 batch_size: 7
 num_workers: 7
-save_iters: [100000, 200000, 300000, 4000000, 500000]
 AdamW.lr: 0.0001

 fine_tune: True
 train/AudioDataset.n_examples: 100000000
+val/AudioDataset.n_examples: 500
 NoamScheduler.warmup: 500
 batch_size: 7
 num_workers: 7
+save_iters: [10000, 20000, 30000, 40000, 50000]
+sample_freq: 1000
+val_freq: 500
 AdamW.lr: 0.0001

scripts/exp/fine_tune.py CHANGED Viewed

@@ -48,10 +48,10 @@ def fine_tune(audio_files_or_folders: List[str], name: str):
     }
     interface_conf = {
-        "Interface.coarse_ckpt": f"./models/vampnet/coarse.pth",
         "Interface.coarse_lora_ckpt": f"./runs/{name}/coarse/latest/lora.pth",
-        "Interface.coarse2fine_ckpt": f"./models/vampnet/c2f.pth",
         "Interface.coarse2fine_lora_ckpt": f"./runs/{name}/c2f/latest/lora.pth",
         "Interface.wavebeat_ckpt": "./models/wavebeat.pth",

     }
     interface_conf = {
+        "Interface.coarse_ckpt": f"./runs/{name}/coarse/latest/vampnet/weights.pth",
         "Interface.coarse_lora_ckpt": f"./runs/{name}/coarse/latest/lora.pth",
+        "Interface.coarse2fine_ckpt": f"./runs/{name}/c2f/latest/vampnet/weights.pth",
         "Interface.coarse2fine_lora_ckpt": f"./runs/{name}/c2f/latest/lora.pth",
         "Interface.wavebeat_ckpt": "./models/wavebeat.pth",

scripts/utils/augment.py CHANGED Viewed

@@ -5,34 +5,19 @@ from audiotools import AudioSignal
 import argbind
 import tqdm
-from pedalboard import (
-   Compressor, Gain, Chorus, LadderFilter, Phaser, Convolution, Reverb, Pedalboard
-)
-from pedalboard.io import AudioFile
-# Read in a whole file, resampling to our desired sample rate:
-samplerate = 44100.0
-with AudioFile('guitar-input.wav').resampled_to(samplerate) as f:
-  audio = f.read(f.frames)
-# Make a pretty interesting sounding guitar pedalboard:
-board = Pedalboard([
-    Compressor(threshold_db=-50, ratio=25),
-    Gain(gain_db=30),
-    Chorus(),
-    LadderFilter(mode=LadderFilter.Mode.HPF12, cutoff_hz=900),
-    Phaser(),
-    Convolution("./guitar_amp.wav", 1.0),
-    Reverb(room_size=0.25),
-])
 @argbind.bind(without_prefix=True)
 def augment(
-    audio_folder: Path,
-    dest_folder: Path,
     n_augmentations: int = 10,
 ):
     """
@@ -41,7 +26,8 @@ def augment(
         The dest foler will contain a folder for each of the clean dataset's files.
         Under each of these folders, there will be a clean file and many augmented files.
     """
     audio_files = at.util.find_audio(audio_folder)
     for audio_file in tqdm.tqdm(audio_files):
@@ -49,5 +35,33 @@ def augment(
         subdir = subtree / audio_file.stem
         subdir.mkdir(parents=True, exist_ok=True)
-        # apply pedalboard transforms
-        for i in range(n_augmentations):

 import argbind
 import tqdm
+import torch
+from torch_pitch_shift import pitch_shift, get_fast_shifts
+from torch_time_stretch import time_stretch, get_fast_stretches
+from audiotools.core.util import sample_from_dist
 @argbind.bind(without_prefix=True)
 def augment(
+    audio_folder: Path = None,
+    dest_folder: Path = None,
     n_augmentations: int = 10,
 ):
     """
         The dest foler will contain a folder for each of the clean dataset's files.
         Under each of these folders, there will be a clean file and many augmented files.
     """
+    assert audio_folder is not None
+    assert dest_folder is not None
     audio_files = at.util.find_audio(audio_folder)
     for audio_file in tqdm.tqdm(audio_files):
         subdir = subtree / audio_file.stem
         subdir.mkdir(parents=True, exist_ok=True)
+        src = AudioSignal(audio_file).to("cuda" if torch.cuda.is_available() else "cpu")
+        for i, chunk in tqdm.tqdm(enumerate(src.windows(10, 10))):
+            # apply pedalboard transforms
+            for j in range(n_augmentations):
+                # pitch shift between -7 and 7 semitones
+                import random
+                dst = chunk.clone()
+                dst.samples = pitch_shift(
+                    dst.samples,
+                    shift=random.choice(get_fast_shifts(src.sample_rate,
+                            condition=lambda x: x >= 0.25 and x <= 1.0)),
+                    sample_rate=src.sample_rate
+                )
+                dst.samples = time_stretch(
+                    dst.samples,
+                    stretch=random.choice(get_fast_stretches(src.sample_rate,
+                                          condition=lambda x: x >= 0.667 and x <= 1.5, )),
+                    sample_rate=src.sample_rate,
+                )
+                dst.cpu().write(subdir / f"{i}-{j}.wav")
+if __name__ == "__main__":
+    args = argbind.parse_args()
+    with argbind.scope(args):
+        augment()

scripts/utils/remove_quiet_files.py ADDED Viewed

	@@ -0,0 +1,29 @@

+# removes files with loudness below 24db
+from pathlib import Path
+import shutil
+import audiotools as at
+import argbind
+@argbind.bind(without_prefix=True)
+def remove_quiet_files(
+    src_dir: Path = None,
+    dest_dir: Path = None,
+    min_loudness: float = -30,
+):
+    # copy src to dest
+    dest_dir.mkdir(parents=True, exist_ok=True)
+    shutil.copytree(src_dir, dest_dir, dirs_exist_ok=True)
+    audio_files = at.util.find_audio(dest_dir)
+    for audio_file in audio_files:
+        sig = at.AudioSignal(audio_file)
+        if sig.loudness() < min_loudness:
+            audio_file.unlink()
+            print(f"removed {audio_file}")
+if __name__ == "__main__":
+    args = argbind.parse_args()
+    with argbind.scope(args):
+        remove_quiet_files()

scripts/xeno-canto-dl.py ADDED Viewed

	@@ -0,0 +1,234 @@

+from xenopy import Query
+SPECIES = [
+    "American Robin",
+    "Northern Cardinal",
+    "Mourning Dove",
+    "American Crow",
+    "Baltimore Oriole",
+    "Blue Jay",
+    "Eastern Bluebird",
+    "House Finch",
+    "American Goldfinch",
+    "House Sparrow",
+    "Song Sparrow",
+    "Tufted Titmouse",
+    "White-breasted Nuthatch",
+    "European Starling",
+    "American Redstart",
+    "Red-winged Blackbird",
+    "Brown-headed Cowbird",
+    "Common Grackle",
+    "Boat-tailed Grackle",
+    "Common Yellowthroat",
+    "Northern Mockingbird",
+    "Carolina Wren",
+    "Eastern Meadowlark",
+    "Chipping Sparrow",
+    "Tree Swallow",
+    "Barn Swallow",
+    "Cliff Swallow",
+    "Pine Siskin",
+    "Indigo Bunting",
+    "Eastern Towhee",
+    "Carolina Chickadee",
+    "Great Crested Flycatcher",
+    "Eastern Wood-Pewee",
+    "Ovenbird",
+    "Northern Flicker",
+    "Red-eyed Vireo",
+    "American Woodcock",
+    "Eastern Phoebe",
+    "Downy Woodpecker",
+    "Scarlet Tanager",
+    "Yellow Warbler",
+    "White-eyed Vireo",
+    "Common Loon",
+    "White-throated Sparrow",
+    "Yellow-throated Vireo",
+    "Great Blue Heron",
+    "Belted Kingfisher",
+    "Pied-billed Grebe",
+    "Wild Turkey",
+    "Wood Thrush",
+    "Rose-breasted Grosbeak",
+    "Field Sparrow",
+    "Hooded Warbler",
+    "Northern Parula",
+    "Chestnut-sided Warbler",
+    "Blue-winged Warbler",
+    "Red-bellied Woodpecker",
+    "Yellow-billed Cuckoo",
+    "Gray Catbird",
+    "Northern Saw-whet Owl",
+    "Osprey",
+    "Common Nighthawk",
+    "Broad-winged Hawk",
+    "Black-throated Green Warbler",
+    "Great Horned Owl",
+    "Common Raven",
+    "Barred Owl",
+    "Canada Warbler",
+    "Magnolia Warbler",
+    "Black-and-white Warbler",
+    "Eastern Kingbird",
+    "Swainson's Thrush",
+    "Worm-eating Warbler",
+    "Prairie Warbler",
+    "Baltimore Oriole",
+    "Black-throated Blue Warbler",
+    "Louisiana Waterthrush",
+    "Blackburnian Warbler",
+    "Black-capped Chickadee",
+    "Cerulean Warbler",
+    "Red-shouldered Hawk",
+    "Cooper's Hawk",
+    "Yellow-throated Warbler",
+    "Blue-headed Vireo",
+    "Blackpoll Warbler",
+    "Ruffed Grouse",
+    "Kentucky Warbler",
+    "Hermit Thrush",
+    "Cedar Waxwing",
+    "Eastern Screech-Owl",
+    "Northern Goshawk",
+    "Green Heron",
+    "Red-tailed Hawk",
+    "Black Vulture",
+    "Hairy Woodpecker",
+    "Golden-crowned Kinglet",
+    "Ruby-crowned Kinglet",
+    "Bicknell's Thrush",
+    "Blue-gray Gnatcatcher",
+    "Veery",
+    "Pileated Woodpecker",
+    "Purple Finch",
+    "White-crowned Sparrow",
+    "Snow Bunting",
+    "Pine Grosbeak",
+    "American Tree Sparrow",
+    "Dark-eyed Junco",
+    "Snowy Owl",
+    "White-winged Crossbill",
+    "Red Crossbill",
+    "Common Redpoll",
+    "Northern Shrike",
+    "Northern Harrier",
+    "Rough-legged Hawk",
+    "Long-eared Owl",
+    "Evening Grosbeak",
+    "Northern Pintail",
+    "American Black Duck",
+    "Mallard",
+    "Canvasback",
+    "Redhead",
+    "Ring-necked Duck",
+    "Greater Scaup",
+    "Lesser Scaup",
+    "Bufflehead",
+    "Common Goldeneye",
+    "Hooded Merganser",
+    "Common Merganser",
+    "Red-breasted Merganser",
+    "Ruddy Duck",
+    "Wood Duck",
+    "Gadwall",
+    "American Wigeon",
+    "Northern Shoveler",
+    "Green-winged Teal",
+    "Blue-winged Teal",
+    "Cinnamon Teal",
+    "Ringed Teal",
+    "Cape Teal",
+    "Northern Fulmar",
+    "Yellow-billed Loon",
+    "Red-throated Loon",
+    "Arctic Loon",
+    "Pacific Loon",
+    "Horned Grebe",
+    "Red-necked Grebe",
+    "Eared Grebe",
+    "Western Grebe",
+    "Clark's Grebe",
+    "Double-crested Cormorant",
+    "Pelagic Cormorant",
+    "Great Cormorant",
+    "American White Pelican",
+    "Brown Pelican",
+    "Brandt's Cormorant",
+    "Least Bittern",
+    "Great Egret",
+    "Snowy Egret",
+    "Little Blue Heron",
+    "Tricolored Heron",
+    "Reddish Egret",
+    "Black-crowned Night-Heron",
+    "Yellow-crowned Night-Heron",
+    "White Ibis",
+    "Glossy Ibis",
+    "Roseate Spoonbill",
+    "Wood Stork",
+    "Black-bellied Whistling-Duck",
+    "Fulvous Whistling-Duck",
+    "Greater White-fronted Goose",
+    "Snow Goose",
+    "Ross's Goose",
+    "Canada Goose",
+    "Brant",
+    "Mute Swan",
+    "Tundra Swan",
+    "Whooper Swan",
+    "Sandhill Crane",
+    "Black-necked Stilt",
+    "American Avocet",
+    "Northern Jacana",
+    "Greater Yellowlegs",
+    "Lesser Yellowlegs",
+    "Willet",
+    "Spotted Sandpiper",
+    "Upland Sandpiper",
+    "Whimbrel",
+    "Long-billed Curlew",
+    "Marbled Godwit",
+    "Ruddy Turnstone",
+    "Red Knot",
+    "Sanderling",
+    "Semipalmated Sandpiper",
+    "Western Sandpiper",
+    "Least Sandpiper",
+    "White-rumped Sandpiper",
+    "Baird's Sandpiper",
+    "Pectoral Sandpiper",
+    "Dunlin",
+    "Buff-breasted Sandpiper",
+    "Short-billed Dowitcher",
+    "Long-billed Dowitcher",
+    "Common Snipe",
+    "American Woodcock",
+    "Wilson's Phalarope",
+    "Red-necked Phalarope",
+    "Red Phalarope"
+]
+from pathlib import Path
+def remove_spaces(s):
+    return s.replace(" ", "")
+for species in SPECIES:
+    if Path("/media/CHONK/hugo/xeno-canto-full/" + remove_spaces(species)).exists():
+        continue
+    try:
+        q = Query(
+            name=species, q="A", length="10-30",
+            )
+        # retrieve metadata
+        metafiles = q.retrieve_meta(verbose=True)
+        # retrieve recordings
+        q.retrieve_recordings(multiprocess=True, nproc=10, attempts=10, outdir="/media/CHONK/hugo/xeno-canto-full/")
+    except:
+        print("Failed to download " + species)
+        continue

vampnet/mask.py CHANGED Viewed

@@ -191,29 +191,47 @@ def onset_mask(
     width: int = 1
 ):
     import librosa
-    onset_indices = librosa.onset.onset_detect(
-        y=sig.clone().to_mono().samples.cpu().numpy()[0, 0],
-        sr=sig.sample_rate,
-        hop_length=interface.codec.hop_length,
-        backtrack=True,
-    )
-    # create a mask, set onset
-    mask = torch.ones_like(z)
-    n_timesteps = z.shape[-1]
-    for onset_index in onset_indices:
-        onset_index = min(onset_index, n_timesteps - 1)
-        onset_index = max(onset_index, 0)
-        mask[:, :, onset_index - width:onset_index + width] = 0.0
-    print(mask)
     return mask
 if __name__ == "__main__":
-    torch.set_printoptions(threshold=10000)

     width: int = 1
 ):
     import librosa
+    import madmom
+    from madmom.features.onsets import RNNOnsetProcessor, OnsetPeakPickingProcessor
+    import tempfile
+    import numpy as np
+    with tempfile.NamedTemporaryFile(suffix='.wav') as f:
+        sig = sig.clone()
+        sig.write(f.name)
+        proc = RNNOnsetProcessor(online=False)
+        onsetproc = OnsetPeakPickingProcessor(threshold=0.3,
+                                              fps=sig.sample_rate/interface.codec.hop_length)
+        act = proc(f.name)
+        onset_times = onsetproc(act)
+        # convert to indices for z array
+        onset_indices = librosa.time_to_frames(onset_times, sr=sig.sample_rate, hop_length=interface.codec.hop_length)
+        if onset_indices.shape[0] == 0:
+            mask = empty_mask(z)
+            print(f"no onsets found, returning empty mask")
+        else:
+            torch.set_printoptions(threshold=1000)
+            print("onset indices: ", onset_indices)
+            print("onset times: ", onset_times)
+            # create a mask, set onset
+            mask = torch.ones_like(z)
+            n_timesteps = z.shape[-1]
+            for onset_index in onset_indices:
+                onset_index = min(onset_index, n_timesteps - 1)
+                onset_index = max(onset_index, 0)
+                mask[:, :, onset_index - width:onset_index + width] = 0.0
+            print(mask)
     return mask
 if __name__ == "__main__":
+    pass

vampnet/modules/transformer.py CHANGED Viewed

@@ -367,15 +367,6 @@ class TransformerLayer(nn.Module):
         return x, position_bias, encoder_decoder_position_bias
-def t_schedule(n_steps, max_temp=1.0, min_temp=0.0, k=1.0):
-    x = np.linspace(0, 1, n_steps)
-    a = (0.5 - min_temp) / (max_temp - min_temp)
-    x = (x * 12) - 6
-    x0 = np.log((1 / a - 1) + 1e-5) / k
-    y = (1 / (1 + np.exp(- k *(x-x0))))[::-1]
-    return y
 class TransformerStack(nn.Module):
     def __init__(
@@ -598,7 +589,7 @@ class VampNet(at.ml.BaseModel):
         top_p=None,
         return_signal=True,
         seed: int = None,
-        sample_cutoff: float = 0.5
     ):
         if seed is not None:
             at.util.seed(seed)
@@ -651,7 +642,6 @@ class VampNet(at.ml.BaseModel):
         #################
         # begin sampling #
         #################
-        t_sched = t_schedule(sampling_steps, max_temp=sampling_temperature)
         for i in range(sampling_steps):
             logging.debug(f"step {i} of {sampling_steps}")
@@ -680,7 +670,7 @@ class VampNet(at.ml.BaseModel):
                 logits, sample=(
                    (i / sampling_steps) <= sample_cutoff
                 ),
-                temperature=t_sched[i],
                 typical_filtering=typical_filtering, typical_mass=typical_mass,
                 typical_min_tokens=typical_min_tokens,
                 top_k=None, top_p=top_p, return_probs=True,
@@ -843,7 +833,11 @@ def sample_from_logits(
-def mask_by_random_topk(num_to_mask: int, probs: torch.Tensor, temperature: float = 1.0):
     """
     Args:
         num_to_mask (int): number of tokens to mask
@@ -856,7 +850,8 @@ def mask_by_random_topk(num_to_mask: int, probs: torch.Tensor, temperature: floa
     logging.debug(f"temperature: {temperature}")
     logging.debug("")
-    confidence = torch.log(probs) + temperature * gumbel_noise_like(probs)
     logging.debug(f"confidence shape: {confidence.shape}")
     sorted_confidence, sorted_idx = confidence.sort(dim=-1)

         return x, position_bias, encoder_decoder_position_bias
 class TransformerStack(nn.Module):
     def __init__(
         top_p=None,
         return_signal=True,
         seed: int = None,
+        sample_cutoff: float = 0.5,
     ):
         if seed is not None:
             at.util.seed(seed)
         #################
         # begin sampling #
         #################
         for i in range(sampling_steps):
             logging.debug(f"step {i} of {sampling_steps}")
                 logits, sample=(
                    (i / sampling_steps) <= sample_cutoff
                 ),
+                temperature=sampling_temperature,
                 typical_filtering=typical_filtering, typical_mass=typical_mass,
                 typical_min_tokens=typical_min_tokens,
                 top_k=None, top_p=top_p, return_probs=True,
+def mask_by_random_topk(
+        num_to_mask: int,
+        probs: torch.Tensor,
+        temperature: float = 1.0,
+    ):
     """
     Args:
         num_to_mask (int): number of tokens to mask
     logging.debug(f"temperature: {temperature}")
     logging.debug("")
+    noise = gumbel_noise_like(probs)
+    confidence = torch.log(probs) + temperature * noise
     logging.debug(f"confidence shape: {confidence.shape}")
     sorted_confidence, sorted_idx = confidence.sort(dim=-1)