Spaces:
Running
Running
File size: 3,855 Bytes
126f94b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# Module Imports
from pytube import YouTube
import whisper
import gradio as gr
model = whisper.load_model("base.en")
# Functions
def transcribe(file):
options = dict(task="transcribe", best_of=5)
text = model.transcribe(file, **options)["text"]
return text.strip()
def translate(file):
options = dict(task="translate", best_of=5)
text = model.transcribe(file, **options)["text"]
return text.strip()
def get_filename(file_obj):
return file_obj.orig_name
def inference(link):
yt = YouTube(link)
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
options = whisper.DecodingOptions(without_timestamps=True)
results = model.transcribe(path)
return results['text']
def populate_metadata(link):
yt = YouTube(link)
return yt.thumbnail_url, yt.title
def transcribe_file(file):
options = dict(task="transcribe", best_of=5)
file = get_filename(file)
text = model.transcribe(file, **options)["text"]
return text.strip()
# Gradio Blocks
demo = gr.Blocks()
with demo:
gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""")
with gr.Tabs():
with gr.TabItem("Voice Record"):
with gr.Row():
audio = gr.Audio(show_label=False,source="microphone",type="filepath")
text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
transcribe_button1 = gr.Button("Transcribe")
CG_button1 = gr.Button("Check Grammar (Work In Progress)")
with gr.TabItem("Upload File"):
with gr.Row():
file_upload = gr.File()
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
transcribe_button2 = gr.Button("Transcribe")
CG_button3 = gr.Button("Check Grammar (Work In Progress)")
with gr.TabItem("Youtube Link"):
with gr.Box():
link = gr.Textbox(label="YouTube Link")
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
with gr.Row().style(mobile_collapse=False, equal_height=True):
transcribe_button3 = gr.Button("Transcribe")
CG_button3 = gr.Button("Check Grammar (Work In Progress)")
with gr.TabItem("About"):
with gr.Row():
gr.Markdown("""
<p style="text-align: center;"> Thesis System presented by <br/> <br/>
• <b>Jomari A. Buenaobra</b> <br/>
• <b>Christian G. Eslit</b> <br/>
• <b>Daniel L. Espinola</b> <br/>
• <b>Jhon Vincent A. Gupo</b> <br/>
• <b>Ryan M. Ibay</b> <br/> <br/>
In partial fulfillment of the requirements for the degree <br/>
Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/>
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
• <b>Crisanto F. Gulay</b> - Adviser <br/>
• <b>Jonardo R. Asor</b> - Subject Specialist <br/>
</p>
""")
link.change(populate_metadata, inputs=[link], outputs=[img, title])
transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
transcribe_button3.click(inference, inputs=link, outputs=text_link_output)
demo.launch() |