patrickvonplaten commited on
Commit
8026eab
1 Parent(s): 3545dbc
Files changed (2) hide show
  1. app.py +34 -12
  2. requirements.txt +2 -2
app.py CHANGED
@@ -1,19 +1,41 @@
1
  from speechbox import PunctuationRestorer
2
- from datasets import load_dataset
 
 
3
 
4
- streamed_dataset = load_dataset("librispeech_asr", "clean", split="validation", streaming=True)
5
 
6
- # get first sample
7
- sample = next(iter(streamed_dataset))
8
 
9
- # print out normalized transcript
10
- print(sample["text"])
11
- # => "HE WAS IN A FEVERED STATE OF MIND OWING TO THE BLIGHT HIS WIFE'S ACTION THREATENED TO CAST UPON HIS ENTIRE FUTURE"
 
 
 
 
 
 
12
 
13
- # load the restoring class
14
- restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en")
15
- restorer.to("cuda")
16
 
17
- restored_text, log_probs = restorer(sample["audio"]["array"], sample["text"], sampling_rate=sample["audio"]["sampling_rate"], num_beams=1)
 
 
 
 
 
 
 
 
18
 
19
- print("Restored text:\n", restored_text)
 
 
 
 
 
 
 
 
 
 
 
 
1
  from speechbox import PunctuationRestorer
2
+ import soundfile as sf
3
+ import subprocess
4
+ import gradio as gr
5
 
6
+ restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en")
7
 
 
 
8
 
9
+ def convert_to_wav(path):
10
+ if path[-3:] != 'wav':
11
+ new_path = '.'.join(path.split('.')[:-1]) + '.wav'
12
+ try:
13
+ subprocess.call(['ffmpeg', '-i', path, new_path, '-y'])
14
+ except: # noqa: E722
15
+ return path, 'Error: Could not convert file to .wav'
16
+ path = new_path
17
+ return path, None
18
 
 
 
 
19
 
20
+ def restore(audio, original_transcript):
21
+ path, error = convert_to_wav(audio)
22
+ print(error)
23
+ data, samplerate = sf.read(path)
24
+
25
+ text, probs = restorer(data, original_transcript, samplerate, num_beams=1)
26
+
27
+ return text, probs
28
+
29
 
30
+ gr.Interface(
31
+ title='Punctuation Restorer',
32
+ fn=restore,
33
+ inputs=[
34
+ gr.inputs.Audio(source="upload", type="filepath"),
35
+ gr.inputs.Text(default="", label="normalized text")
36
+ ],
37
+ outputs=[
38
+ gr.outputs.Textbox(label='Restored text'),
39
+ gr.outputs.Number(label='Log prob')
40
+ ]
41
+ ).launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  transformers>=4.25.0
2
  torch
3
- speechbox
4
- torchaudio
 
1
  transformers>=4.25.0
2
  torch
3
+ speechbox>=0.1.0
4
+ soundfile