cahya
/

wav2vec2-base-turkish

Automatic Speech Recognition

Generated from Trainer

hf-asr-leaderboard

robust-speech-event

Inference Endpoints

Model card Files Files and versions Community

cahya commited on Feb 3, 2022

Commit

f551c27

·

1 Parent(s): 133f88a

add run_evaluation.py

Files changed (1) hide show

run_evaluation.py +5 -3

run_evaluation.py CHANGED Viewed

@@ -16,13 +16,13 @@ python run_evaluation.y -m <wav2vec2 model_name> -d <Zindi dataset directory> -o
 class KenLM:
-    def __init__(self, tokenizer, model_name, num_workers=8, beam_width=128):
         self.num_workers = num_workers
         self.beam_width = beam_width
         vocab_dict = tokenizer.get_vocab()
         self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
         self.vocabulary = self.vocabulary[:-2]
-        self.decoder = build_ctcdecoder(self.vocabulary, model_name)
     @staticmethod
     def lm_postprocess(text):
@@ -52,6 +52,8 @@ def main():
                         help="Batch size")
     parser.add_argument("-k", "--kenlm", type=str, required=False, default=False,
                         help="Path to KenLM model")
     parser.add_argument("--num_workers", type=int, required=False, default=8,
                         help="KenLM's number of workers")
     parser.add_argument("-w", "--beam_width", type=int, required=False, default=128,
@@ -67,7 +69,7 @@ def main():
     model = Wav2Vec2ForCTC.from_pretrained(args.model_name)
     kenlm = None
     if args.kenlm:
-        kenlm = KenLM(processor.tokenizer, args.kenlm)
     # Preprocessing the datasets.
     # We need to read the audio files as arrays

 class KenLM:
+    def __init__(self, tokenizer, model_name, unigrams=None, num_workers=8, beam_width=128):
         self.num_workers = num_workers
         self.beam_width = beam_width
         vocab_dict = tokenizer.get_vocab()
         self.vocabulary = [x[0] for x in sorted(vocab_dict.items(), key=lambda x: x[1], reverse=False)]
         self.vocabulary = self.vocabulary[:-2]
+        self.decoder = build_ctcdecoder(self.vocabulary, model_name, unigrams=unigrams)
     @staticmethod
     def lm_postprocess(text):
                         help="Batch size")
     parser.add_argument("-k", "--kenlm", type=str, required=False, default=False,
                         help="Path to KenLM model")
+    parser.add_argument("-u", "--unigrams", type=str, required=False, default=False,
+                        help="Path to unigrams file")
     parser.add_argument("--num_workers", type=int, required=False, default=8,
                         help="KenLM's number of workers")
     parser.add_argument("-w", "--beam_width", type=int, required=False, default=128,
     model = Wav2Vec2ForCTC.from_pretrained(args.model_name)
     kenlm = None
     if args.kenlm:
+        kenlm = KenLM(processor.tokenizer, args.kenlm, args.unigrams)
     # Preprocessing the datasets.
     # We need to read the audio files as arrays