sanchit-gandhi HF staff commited on
Commit
3e3e17d
1 Parent(s): 3843f4e

add overall statistics

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py CHANGED
@@ -64,6 +64,46 @@ target_dtype = np.int16
64
  max_range = np.iinfo(target_dtype).max
65
 
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def get_visualisation(idx, model="large-v2", round_dp=2, ngram_degree=5):
68
  idx -= 1
69
  audio = dataset[idx]["audio"]
@@ -141,6 +181,18 @@ if __name__ == "__main__":
141
  "relative to the target transcriptions. Insertions are displayed in <span style='background-color:Lightgreen'>green</span>, and "
142
  "deletions in <span style='background-color:#FFCCCB'><s>red</s></span>."
143
  )
 
 
 
 
 
 
 
 
 
 
 
 
144
  slider = gr.Slider(
145
  minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
146
  )
 
64
  max_range = np.iinfo(target_dtype).max
65
 
66
 
67
+ def get_statistics(model="large-v2", round_dp=2, ngram_degree=5):
68
+ text1 = norm_target
69
+ if model == "large-v2":
70
+ text2 = norm_pred_v2
71
+ elif model == "large-32-2":
72
+ text2 = norm_pred_32_2
73
+ else:
74
+ raise ValueError(
75
+ f"Got unknown model {model}, should be one of `'large-v2'` or `'large-32-2'`."
76
+ )
77
+
78
+ wer_output = process_words(text1, text2, wer_default, wer_default)
79
+ wer_percentage = round(100 * wer_output.wer, round_dp)
80
+ ier_percentage = round(
81
+ 100 * wer_output.insertions / sum([len(ref) for ref in wer_output.references]), round_dp
82
+ )
83
+
84
+ all_ngrams = list(ngrams(" ".join(text2).split(), ngram_degree))
85
+
86
+ unique_ngrams = []
87
+ for ngram in all_ngrams:
88
+ if ngram not in unique_ngrams:
89
+ unique_ngrams.append(ngram)
90
+
91
+ repeated_ngrams = len(all_ngrams) - len(unique_ngrams)
92
+
93
+ return wer_percentage, ier_percentage, repeated_ngrams
94
+
95
+
96
+ def get_overall_table():
97
+ large_v2 = get_statistics(model="large-v2")
98
+ large_32_2 = get_statistics(model="large-32-2")
99
+ # format the rows
100
+ table = [large_v2, large_32_2]
101
+ # format the model names
102
+ table[0] = ["large-v2", *table[0]]
103
+ table[1] = ["large-32-2", *table[1]]
104
+ return table
105
+
106
+
107
  def get_visualisation(idx, model="large-v2", round_dp=2, ngram_degree=5):
108
  idx -= 1
109
  audio = dataset[idx]["audio"]
 
181
  "relative to the target transcriptions. Insertions are displayed in <span style='background-color:Lightgreen'>green</span>, and "
182
  "deletions in <span style='background-color:#FFCCCB'><s>red</s></span>."
183
  )
184
+ gr.Markdown("**Overall statistics:**")
185
+ table = gr.Dataframe(
186
+ value=get_overall_table(),
187
+ headers=[
188
+ "Model",
189
+ "Word Error Rate (WER)",
190
+ "Insertion Error Rate (IER)",
191
+ "Repeated 5-grams",
192
+ ],
193
+ row_count=2,
194
+ )
195
+ gr.Markdown("**Per-sample statistics:**")
196
  slider = gr.Slider(
197
  minimum=1, maximum=len(norm_target), step=1, label="Dataset sample"
198
  )