wldmr commited on
Commit
22e7a05
1 Parent(s): 25e3dec

added measures

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from deepmultilingualpunctuation import PunctuationModel
2
  import gradio as gr
3
  import re
 
4
 
5
  # https://stackoverflow.com/questions/22800401/how-to-capitalize-the-first-letter-of-every-sentence
6
  def cap(match):
@@ -73,7 +74,12 @@ def predict(brakes, transcript):
73
  regex3 = r"^\w"
74
  pcnt_file_cr_cap = re.sub(regex3, lambda x: x.group().upper(), re.sub(regex2, lambda x: x.group().upper(), re.sub(regex1, "I", pcnt_file_cr)))
75
 
76
- return pcnt_file_cr_cap
 
 
 
 
 
77
 
78
  if __name__ == "__main__":
79
 
@@ -85,9 +91,13 @@ Model restores punctuation and case i.e. of the following punctuations -- [! ? .
85
  examples = [['sentences', "my name is clara i live in berkeley california"]]
86
 
87
  interface = gr.Interface(fn = predict,
88
- inputs = [gr.Radio(["no brakes","sentences", "textlines"], value="no brakes", label="line brakes"),
89
  "text"],
90
- outputs = ["text"],
 
 
 
 
91
  title = title,
92
  description = description,
93
  examples=examples,
 
1
  from deepmultilingualpunctuation import PunctuationModel
2
  import gradio as gr
3
  import re
4
+ import metrics
5
 
6
  # https://stackoverflow.com/questions/22800401/how-to-capitalize-the-first-letter-of-every-sentence
7
  def cap(match):
 
74
  regex3 = r"^\w"
75
  pcnt_file_cr_cap = re.sub(regex3, lambda x: x.group().upper(), re.sub(regex2, lambda x: x.group().upper(), re.sub(regex1, "I", pcnt_file_cr)))
76
 
77
+ n_tokens= metrics.num_tokens(pcnt_file_cr_cap)
78
+ n_sents = metrics.num_sentences(pcnt_file_cr_cap)
79
+ n_words = metrics.num_words(pcnt_file_cr_cap)
80
+ n_chars = metrics.num_chars(pcnt_file_cr_cap)
81
+
82
+ return pcnt_file_cr_cap, n_words, n_sents, n_chars, n_tokens
83
 
84
  if __name__ == "__main__":
85
 
 
91
  examples = [['sentences', "my name is clara i live in berkeley california"]]
92
 
93
  interface = gr.Interface(fn = predict,
94
+ inputs = [gr.Radio(["no brakes","sentences", "textlines"], value="no brakes", label="preserve line brakes"),
95
  "text"],
96
+ outputs=[gr.Textbox(label="Punctuated Transcript"),
97
+ gr.Number(label="Number of Words"),
98
+ gr.Number(label="Number of Sentences"),
99
+ gr.Number(label="Number of Characters"),
100
+ gr.Number(label="Number of Tokens")],
101
  title = title,
102
  description = description,
103
  examples=examples,