marinone94 commited on
Commit
060c28e
1 Parent(s): 7780ee2

remove add lm from script

Browse files
Files changed (1) hide show
  1. run_speech_recognition_ctc.py +1 -19
run_speech_recognition_ctc.py CHANGED
@@ -31,7 +31,6 @@ import numpy as np
31
  import torch
32
  import wandb
33
  from datasets import DatasetDict, load_dataset, load_metric
34
- from pyctcdecode import build_ctcdecoder
35
 
36
  import transformers
37
  from transformers import (
@@ -743,24 +742,7 @@ def main():
743
  trainer.push_to_hub(**kwargs)
744
  else:
745
  trainer.create_model_card(**kwargs)
746
-
747
- if training_args.push_lm_to_hub:
748
- vocab_dict = processor.tokenizer.get_vocab()
749
- sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
750
-
751
- decoder = build_ctcdecoder(
752
- labels=list(sorted_vocab_dict.keys()),
753
- kenlm_model_path="5gram_sv_lm.bin",
754
- )
755
-
756
- processor_with_lm = Wav2Vec2ProcessorWithLM(
757
- feature_extractor=processor.feature_extractor,
758
- tokenizer=processor.tokenizer,
759
- decoder=decoder
760
- )
761
- processor_with_lm.save_pretrained(repo_name)
762
- processor_with_lm.push_to_hub(**kwargs)
763
-
764
  return results
765
 
766
 
 
31
  import torch
32
  import wandb
33
  from datasets import DatasetDict, load_dataset, load_metric
 
34
 
35
  import transformers
36
  from transformers import (
 
742
  trainer.push_to_hub(**kwargs)
743
  else:
744
  trainer.create_model_card(**kwargs)
745
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
746
  return results
747
 
748