AlexN commited on
Commit
6c5f6a4
1 Parent(s): 7132467
Files changed (2) hide show
  1. eval.py +1 -1
  2. run_speech_recognition_ctc.py +1 -1
eval.py CHANGED
@@ -102,7 +102,7 @@ def main(args):
102
  # map function to decode audio
103
  def map_to_pred(batch):
104
  prediction = asr(
105
- batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s,,skip_special_tokens=True
106
  )
107
 
108
  batch["prediction"] = prediction["text"]# "".join(prediction["text"].split("<s>"))
 
102
  # map function to decode audio
103
  def map_to_pred(batch):
104
  prediction = asr(
105
+ batch["audio"]["array"], chunk_length_s=args.chunk_length_s, stride_length_s=args.stride_length_s
106
  )
107
 
108
  batch["prediction"] = prediction["text"]# "".join(prediction["text"].split("<s>"))
run_speech_recognition_ctc.py CHANGED
@@ -643,7 +643,7 @@ def main():
643
 
644
  pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
645
 
646
- pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
647
  # we do not want to group tokens when computing the metrics
648
  label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
649
 
 
643
 
644
  pred.label_ids[pred.label_ids == -100] = tokenizer.pad_token_id
645
 
646
+ pred_str = tokenizer.batch_decode(pred_ids)
647
  # we do not want to group tokens when computing the metrics
648
  label_str = tokenizer.batch_decode(pred.label_ids, group_tokens=False)
649