patrickvonplaten commited on
Commit
84a4607
1 Parent(s): 7ed45ae
Files changed (2) hide show
  1. avg_wer.py +24 -0
  2. hf_whisper_meanwhile.py +1 -1
avg_wer.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+
4
+ file = sys.argv[1]
5
+ key = sys.argv[2]
6
+
7
+ with open(file, "r") as f:
8
+ lines = f.readlines()
9
+
10
+ wers = []
11
+ count = 0
12
+
13
+ for line in lines:
14
+ if key in line:
15
+ wers.append(float(line.split(key)[-1].lstrip().rstrip()))
16
+ count += 1
17
+
18
+ mean = sum(wers) / len(wers)
19
+ var = sum([(w - mean)**2 for w in wers]) / len(wers)
20
+
21
+
22
+ print("mean", mean)
23
+ print("var", var)
24
+
hf_whisper_meanwhile.py CHANGED
@@ -42,7 +42,7 @@ for audio, label in zip(audios, labels):
42
  if inputs["input_features"].shape[-1] < 3000:
43
  continue
44
 
45
- # result = model_orig.transcribe(audio.astype(dtype=np.float32), condition_on_previous_text=DO_COND, temperature=0.0, logprob_threshold=None, compression_ratio_threshold=None, no_speech_threshold=None)
46
 
47
  gen_length = 448
48
  result_hf = model.generate(**inputs, condition_on_prev_tokens=DO_COND, max_new_tokens=gen_length, return_timestamps=True)
 
42
  if inputs["input_features"].shape[-1] < 3000:
43
  continue
44
 
45
+ result = model_orig.transcribe(audio.astype(dtype=np.float32), condition_on_previous_text=DO_COND, temperature=0.0, logprob_threshold=None, compression_ratio_threshold=None, no_speech_threshold=None)
46
 
47
  gen_length = 448
48
  result_hf = model.generate(**inputs, condition_on_prev_tokens=DO_COND, max_new_tokens=gen_length, return_timestamps=True)