pere commited on
Commit
6bf5124
•
1 Parent(s): d326afc

manual commit

Browse files
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
- "Wav2Vec2ForCTC"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 1,
 
6
  "add_adapter": false,
7
  "apply_spec_augment": true,
8
  "architectures": [
9
+ "Wav2Vec2ForPreTraining"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 1,
run.sh CHANGED
@@ -13,16 +13,16 @@ python run_speech_recognition_ctc_bnb.py \
13
  --output_dir="./" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="20" \
16
- --per_device_train_batch_size="8" \
17
- --per_device_eval_batch_size="8" \
18
  --gradient_accumulation_steps="4" \
19
  --learning_rate="7.5e-5" \
20
  --warmup_steps="100" \
21
  --evaluation_strategy="steps" \
22
  --length_column_name="input_length" \
23
  --chars_to_ignore , ? . ! \- \; \: \" " % ‘ " � — ’ … – \
24
- --save_steps="100" \
25
- --eval_steps="100" \
26
  --logging_steps="100" \
27
  --layerdrop="0.0" \
28
  --activation_dropout="0.1" \
 
13
  --output_dir="./" \
14
  --overwrite_output_dir \
15
  --num_train_epochs="20" \
16
+ --per_device_train_batch_size="16" \
17
+ --per_device_eval_batch_size="16" \
18
  --gradient_accumulation_steps="4" \
19
  --learning_rate="7.5e-5" \
20
  --warmup_steps="100" \
21
  --evaluation_strategy="steps" \
22
  --length_column_name="input_length" \
23
  --chars_to_ignore , ? . ! \- \; \: \" " % ‘ " � — ’ … – \
24
+ --save_steps="250" \
25
+ --eval_steps="250" \
26
  --logging_steps="100" \
27
  --layerdrop="0.0" \
28
  --activation_dropout="0.1" \
run_speech_recognition_ctc_bnb.py CHANGED
@@ -686,25 +686,25 @@ def main():
686
  # Instantiate custom data collator
687
  data_collator = DataCollatorCTCWithPadding(processor=processor)
688
 
689
- decay_parameters = get_parameter_names(model, [torch.nn.LayerNorm])
690
- decay_parameters = [name for name in decay_parameters if "bias" not in name]
691
- optimizer_grouped_parameters = [
692
- {
693
- "params": [p for n, p in model.named_parameters() if n in decay_parameters],
694
- "weight_decay": training_args.weight_decay,
695
- },
696
- {
697
- "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
698
- "weight_decay": 0.0,
699
- },
700
- ]
701
- optimizer = bnb.optim.Adam8bit(
702
- params=optimizer_grouped_parameters,
703
- betas=(training_args.adam_beta1, training_args.adam_beta2),
704
- eps=training_args.adam_epsilon,
705
- )
706
-
707
- optimizers = (optimizer, None)
708
 
709
  # Initialize Trainer
710
  trainer = Trainer(
@@ -715,7 +715,7 @@ def main():
715
  train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
716
  eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
717
  tokenizer=feature_extractor,
718
- optimizers=optimizers,
719
  )
720
 
721
  # 8. Finally, we can start training
 
686
  # Instantiate custom data collator
687
  data_collator = DataCollatorCTCWithPadding(processor=processor)
688
 
689
+ #decay_parameters = get_parameter_names(model, [torch.nn.LayerNorm])
690
+ #decay_parameters = [name for name in decay_parameters if "bias" not in name]
691
+ #optimizer_grouped_parameters = [
692
+ # {
693
+ # "params": [p for n, p in model.named_parameters() if n in decay_parameters],
694
+ # "weight_decay": training_args.weight_decay,
695
+ # },
696
+ # {
697
+ # "params": [p for n, p in model.named_parameters() if n not in decay_parameters],
698
+ # "weight_decay": 0.0,
699
+ # },
700
+ #]
701
+ #optimizer = bnb.optim.Adam8bit(
702
+ # params=optimizer_grouped_parameters,
703
+ # betas=(training_args.adam_beta1, training_args.adam_beta2),
704
+ # eps=training_args.adam_epsilon,
705
+ #)
706
+
707
+ #optimizers = (optimizer, None)
708
 
709
  # Initialize Trainer
710
  trainer = Trainer(
 
715
  train_dataset=vectorized_datasets["train"] if training_args.do_train else None,
716
  eval_dataset=vectorized_datasets["eval"] if training_args.do_eval else None,
717
  tokenizer=feature_extractor,
718
+ #optimizers=optimizers,
719
  )
720
 
721
  # 8. Finally, we can start training
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}