sunsetsobserver commited on
Commit
b8b9847
1 Parent(s): 30ce9c6

Debuged generate.py

Browse files
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. .gitignore +1 -0
  3. gen_res/First chunk copy.mid +0 -0
  4. generate.py +33 -7
  5. runs/.DS_Store +0 -0
.DS_Store DELETED
Binary file (6.15 kB)
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .DS_Store
gen_res/First chunk copy.mid ADDED
Binary file (369 Bytes). View file
 
generate.py CHANGED
@@ -8,13 +8,40 @@ from torch import Tensor, argmax
8
  from torch.utils.data import DataLoader
9
  from torch.cuda import is_available as cuda_available, is_bf16_supported
10
  from torch.backends.mps import is_available as mps_available
11
- from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer, AutoModel
12
  from transformers.trainer_utils import set_seed
13
  from evaluate import load as load_metric
14
  from miditok import REMI, TokenizerConfig
15
  from miditok.pytorch_data import DatasetTok, DataCollator
16
  from tqdm import tqdm
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  # Seed
19
  set_seed(777)
20
 
@@ -39,7 +66,7 @@ collator = DataCollator(
39
  )
40
 
41
  # Creates model using the correct configuration
42
- model = AutoModelForCausalLM.from_pretrained("sunsetsobserver/MIDI/runs")
43
 
44
  collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
45
 
@@ -53,7 +80,6 @@ generation_config = GenerationConfig(
53
  top_p=0.95,
54
  epsilon_cutoff=3e-4,
55
  eta_cutoff=1e-3,
56
- pad_token_id=tokenizer.padding_token_id,
57
  )
58
 
59
  # Here the sequences are padded to the left, so that the last token along the time dimension
@@ -77,10 +103,10 @@ for batch in tqdm(dataloader_test, desc='Testing model / Generating results'):
77
  tokens = [seq.tolist() for seq in tokens]
78
  for tok_seq in tokens[1:]:
79
  _midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
80
- midi.instruments.append(_midi.instruments[0])
81
- midi.instruments[0].name = f'Continuation of original sample ({len(generated)} tokens)'
82
- midi.instruments[1].name = f'Original sample ({len(prompt)} tokens)'
83
- midi.instruments[2].name = f'Original sample and continuation'
84
  midi.dump_midi(gen_results_path / f'{count}.mid')
85
  tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
86
 
 
8
  from torch.utils.data import DataLoader
9
  from torch.cuda import is_available as cuda_available, is_bf16_supported
10
  from torch.backends.mps import is_available as mps_available
11
+ from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig, AutoTokenizer, MistralForCausalLM
12
  from transformers.trainer_utils import set_seed
13
  from evaluate import load as load_metric
14
  from miditok import REMI, TokenizerConfig
15
  from miditok.pytorch_data import DatasetTok, DataCollator
16
  from tqdm import tqdm
17
 
18
+ # Our tokenizer's configuration
19
+ PITCH_RANGE = (21, 109)
20
+ BEAT_RES = {(0, 1): 8, (1, 2): 4, (2, 4): 2, (4, 8): 1}
21
+ NUM_VELOCITIES = 24
22
+ SPECIAL_TOKENS = ["PAD", "MASK", "BOS", "EOS"]
23
+ USE_CHORDS = False
24
+ USE_RESTS = False
25
+ USE_TEMPOS = True
26
+ USE_TIME_SIGNATURE = False
27
+ USE_PROGRAMS = False
28
+ NUM_TEMPOS = 32
29
+ TEMPO_RANGE = (50, 200) # (min_tempo, max_tempo)
30
+ TOKENIZER_PARAMS = {
31
+ "pitch_range": PITCH_RANGE,
32
+ "beat_res": BEAT_RES,
33
+ "num_velocities": NUM_VELOCITIES,
34
+ "special_tokens": SPECIAL_TOKENS,
35
+ "use_chords": USE_CHORDS,
36
+ "use_rests": USE_RESTS,
37
+ "use_tempos": USE_TEMPOS,
38
+ "use_time_signatures": USE_TIME_SIGNATURE,
39
+ "use_programs": USE_PROGRAMS,
40
+ "num_tempos": NUM_TEMPOS,
41
+ "tempo_range": TEMPO_RANGE,
42
+ }
43
+ config = TokenizerConfig(**TOKENIZER_PARAMS)
44
+
45
  # Seed
46
  set_seed(777)
47
 
 
66
  )
67
 
68
  # Creates model using the correct configuration
69
+ model = MistralForCausalLM.from_pretrained("./runs")
70
 
71
  collator = DataCollator(tokenizer["PAD_None"], tokenizer["BOS_None"], tokenizer["EOS_None"], copy_inputs_as_labels=True)
72
 
 
80
  top_p=0.95,
81
  epsilon_cutoff=3e-4,
82
  eta_cutoff=1e-3,
 
83
  )
84
 
85
  # Here the sequences are padded to the left, so that the last token along the time dimension
 
103
  tokens = [seq.tolist() for seq in tokens]
104
  for tok_seq in tokens[1:]:
105
  _midi = tokenizer.tokens_to_midi([deepcopy(tok_seq)])
106
+ midi.tracks.append(_midi.tracks[0])
107
+ midi.tracks[0].name = f'Continuation of original sample ({len(generated)} tokens)'
108
+ midi.tracks[1].name = f'Original sample ({len(prompt)} tokens)'
109
+ midi.tracks[2].name = f'Original sample and continuation'
110
  midi.dump_midi(gen_results_path / f'{count}.mid')
111
  tokenizer.save_tokens(tokens, gen_results_path / f'{count}.json')
112
 
runs/.DS_Store DELETED
Binary file (6.15 kB)