sunsetsobserver
commited on
Commit
•
b8b9847
1
Parent(s):
30ce9c6
Debuged generate.py
Browse files- .DS_Store +0 -0
- .gitignore +1 -0
- gen_res/First chunk copy.mid +0 -0
- generate.py +33 -7
- runs/.DS_Store +0 -0
.DS_Store
DELETED
Binary file (6.15 kB)
|
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
.DS_Store
|
gen_res/First chunk copy.mid
ADDED
Binary file (369 Bytes). View file
|
|
generate.py
CHANGED
@@ -8,13 +8,40 @@ from torch import Tensor, argmax
|
|
8 |
from torch.utils.data import DataLoader
|
9 |
from torch.cuda import is_available as cuda_available, is_bf16_supported
|
10 |
from torch.backends.mps import is_available as mps_available
|
11 |
-
from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer,
|
12 |
from transformers.trainer_utils import set_seed
|
13 |
from evaluate import load as load_metric
|
14 |
from miditok import REMI, TokenizerConfig
|
15 |
from miditok.pytorch_data import DatasetTok, DataCollator
|
16 |
from tqdm import tqdm
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
# Seed
|
19 |
set_seed(777)
|
20 |
|
@@ -39,7 +66,7 @@ collator = DataCollator(
|
|
39 |
)
|
40 |
|
41 |
# Creates model using the correct configuration
|
42 |
-
model =
|
43 |
|
44 |
collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
|
45 |
|
@@ -53,7 +80,6 @@ generation_config = GenerationConfig(
|
|
53 |
top_p=0.95,
|
54 |
epsilon_cutoff=3e-4,
|
55 |
eta_cutoff=1e-3,
|
56 |
-
pad_token_id=tokenizer.padding_token_id,
|
57 |
)
|
58 |
|
59 |
# Here the sequences are padded to the left, so that the last token along the time dimension
|
@@ -77,10 +103,10 @@ for batch in tqdm(dataloader_test, desc='Testing model / Generating results'):
|
|
77 |
tokens = [seq.tolist() for seq in tokens]
|
78 |
for tok_seq in tokens[1:]:
|
79 |
_midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
|
80 |
-
midi.
|
81 |
-
midi.
|
82 |
-
midi.
|
83 |
-
midi.
|
84 |
midi.dump_midi(gen_results_path / f'{count}.mid')
|
85 |
tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
|
86 |
|
|
|
8 |
from torch.utils.data import DataLoader
|
9 |
from torch.cuda import is_available as cuda_available, is_bf16_supported
|
10 |
from torch.backends.mps import is_available as mps_available
|
11 |
+
from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer, MistralForCausalLM
|
12 |
from transformers.trainer_utils import set_seed
|
13 |
from evaluate import load as load_metric
|
14 |
from miditok import REMI, TokenizerConfig
|
15 |
from miditok.pytorch_data import DatasetTok, DataCollator
|
16 |
from tqdm import tqdm
|
17 |
|
18 |
+
# Our tokenizer's configuration
|
19 |
+
PITCH_RANGE = (21, 109)
|
20 |
+
BEAT_RES = {(0, 1): 8, (1, 2): 4, (2, 4): 2, (4, 8): 1}
|
21 |
+
NUM_VELOCITIES = 24
|
22 |
+
SPECIAL_TOKENS = ["PAD", "MASK", "BOS", "EOS"]
|
23 |
+
USE_CHORDS = False
|
24 |
+
USE_RESTS = False
|
25 |
+
USE_TEMPOS = True
|
26 |
+
USE_TIME_SIGNATURE = False
|
27 |
+
USE_PROGRAMS = False
|
28 |
+
NUM_TEMPOS = 32
|
29 |
+
TEMPO_RANGE = (50, 200) # (min_tempo, max_tempo)
|
30 |
+
TOKENIZER_PARAMS = {
|
31 |
+
"pitch_range": PITCH_RANGE,
|
32 |
+
"beat_res": BEAT_RES,
|
33 |
+
"num_velocities": NUM_VELOCITIES,
|
34 |
+
"special_tokens": SPECIAL_TOKENS,
|
35 |
+
"use_chords": USE_CHORDS,
|
36 |
+
"use_rests": USE_RESTS,
|
37 |
+
"use_tempos": USE_TEMPOS,
|
38 |
+
"use_time_signatures": USE_TIME_SIGNATURE,
|
39 |
+
"use_programs": USE_PROGRAMS,
|
40 |
+
"num_tempos": NUM_TEMPOS,
|
41 |
+
"tempo_range": TEMPO_RANGE,
|
42 |
+
}
|
43 |
+
config = TokenizerConfig(**TOKENIZER_PARAMS)
|
44 |
+
|
45 |
# Seed
|
46 |
set_seed(777)
|
47 |
|
|
|
66 |
)
|
67 |
|
68 |
# Creates model using the correct configuration
|
69 |
+
model = MistralForCausalLM.from_pretrained("./runs")
|
70 |
|
71 |
collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
|
72 |
|
|
|
80 |
top_p=0.95,
|
81 |
epsilon_cutoff=3e-4,
|
82 |
eta_cutoff=1e-3,
|
|
|
83 |
)
|
84 |
|
85 |
# Here the sequences are padded to the left, so that the last token along the time dimension
|
|
|
103 |
tokens = [seq.tolist() for seq in tokens]
|
104 |
for tok_seq in tokens[1:]:
|
105 |
_midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
|
106 |
+
midi.tracks.append(_midi.tracks[0])
|
107 |
+
midi.tracks[0].name = f'Continuation of original sample ({len(generated)} tokens)'
|
108 |
+
midi.tracks[1].name = f'Original sample ({len(prompt)} tokens)'
|
109 |
+
midi.tracks[2].name = f'Original sample and continuation'
|
110 |
midi.dump_midi(gen_results_path / f'{count}.mid')
|
111 |
tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
|
112 |
|
runs/.DS_Store
DELETED
Binary file (6.15 kB)
|
|