patrickvonplaten commited on
Commit
7fb9ab5
1 Parent(s): df6ff05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -0
app.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from speechbox import PunctuationRestorer
2
+ from datasets import load_dataset
3
+
4
+ streamed_dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True)
5
+
6
+ # get first sample
7
+ sample = next(iter(streamed_dataset))
8
+
9
+ # print out normalized transcript
10
+ print(sample["text"])
11
+ # => "HE WAS IN A FEVERED STATE OF MIND OWING TO THE BLIGHT HIS WIFE'S ACTION THREATENED TO CAST UPON HIS ENTIRE FUTURE"
12
+
13
+ # load the restoring class
14
+ restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en")
15
+ restorer.to("cuda")
16
+
17
+ restored_text, log_probs = restorer(sample["audio"]["array"], sample["text"], sampling_rate=sample["audio"]["sampling_rate"], num_beams=1)
18
+
19
+ print("Restored text:\n", restored_text)