|
from speechbox import PunctuationRestorer |
|
import soundfile as sf |
|
import subprocess |
|
import gradio as gr |
|
|
|
restorer = PunctuationRestorer.from_pretrained("openai/whisper-tiny.en") |
|
|
|
|
|
def convert_to_wav(path): |
|
if path[-3:] != 'wav': |
|
new_path = '.'.join(path.split('.')[:-1]) + '.wav' |
|
try: |
|
subprocess.call(['ffmpeg', '-i', path, new_path, '-y']) |
|
except: |
|
return path, 'Error: Could not convert file to .wav' |
|
path = new_path |
|
return path, None |
|
|
|
|
|
def restore(audio, original_transcript): |
|
path, error = convert_to_wav(audio) |
|
print(error) |
|
data, samplerate = sf.read(path) |
|
|
|
text, probs = restorer(data, original_transcript, samplerate, num_beams=1) |
|
|
|
return text, probs |
|
|
|
|
|
gr.Interface( |
|
title='Punctuation Restorer', |
|
fn=restore, |
|
inputs=[ |
|
gr.inputs.Audio(source="upload", type="filepath"), |
|
gr.inputs.Textbox(default="", label="normalized text") |
|
], |
|
outputs=[ |
|
gr.outputs.Textbox(label='Restored text'), |
|
gr.outputs.Number(label='Log prob') |
|
] |
|
).launch() |
|
|