Ragnov
remove share=True
813029b
raw
history blame
7.37 kB
# Module Imports
from pytube import YouTube
import whisper
import gradio as gr
import time
import re
from happytransformer import HappyTextToText, TTSettings
from difflib import Differ
STTmodel = whisper.load_model("base.en")
GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker")
args = TTSettings(num_beams=5, min_length=1)
# Functions
def transcribe(file):
options = dict(task="transcribe", best_of=5)
text = STTmodel.transcribe(file, **options)["text"]
return text.strip()
def get_filename(file_obj):
return file_obj.orig_name
def inference(link):
yt = YouTube(link)
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
options = whisper.DecodingOptions(without_timestamps=True)
results = STTmodel.transcribe(path)
return results['text']
def populate_metadata(link):
yt = YouTube(link)
return yt.thumbnail_url, yt.title
def transcribe_file(file):
options = dict(task="transcribe", best_of=5)
file = get_filename(file)
text = STTmodel.transcribe(file, **options)["text"]
return text.strip()
def real_time_transcribe(audio, state=""):
time.sleep(2)
text = STTmodel.transcribe(audio)["text"]
state += text + " "
return state, state
def paragraph_to_sentences(paragraph):
"""
This function takes a paragraph as input and returns a list of sentences.
Args:
paragraph (str): The paragraph to be converted to a list of sentences.
Returns:
list: A list of sentences extracted from the paragraph.
"""
# Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter.
sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph)
# Remove any leading or trailing spaces from each sentence.
sentences = [sentence.strip() for sentence in sentences]
return sentences
def sentences_to_paragraph(sentences):
final_result = ""
for num, sentence in enumerate(sentences):
result = GCmodel.generate_text("grammar: "+ sentence, args=args)
final_result += result.text
if num < len(sentences) - 1:
final_result += " "
return final_result
# Function that takes transcribed result and gramify it
def gramify(paragraph):
result_1 = paragraph_to_sentences(paragraph)
final_result = sentences_to_paragraph(result_1)
return final_result
# Function that takes transcribed text for its first inpu
def diff_texts(text1, text2):
"""
This function takes transcribed text for its first input
and grammatically corrected text as its second input which return the difference
of the two text.
"""
d = Differ()
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
res_diff = []
# Gradio Blocks
demo = gr.Blocks()
with demo:
gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""")
with gr.Tabs():
with gr.TabItem("Voice Record"):
with gr.Row():
audio = gr.Audio(show_label=False,source="microphone",type="filepath")
text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
with gr.Row():
transcribe_button1 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row():
Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
with gr.TabItem("Upload File"):
with gr.Row():
file_upload = gr.File()
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
with gr.Row():
transcribe_button2 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row():
Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
with gr.TabItem("Youtube Link"):
with gr.Box():
link = gr.Textbox(label="YouTube Link")
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
with gr.Row().style(mobile_collapse=False, equal_height=True):
transcribe_button3 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row().style(mobile_collapse=False, equal_height=True):
Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br>
<a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a>
</p>""")
with gr.Accordion("About",open=False):
gr.Markdown("""
<p style="text-align: center;"> Thesis System presented by <br/> <br/>
• <b>Daniel L. Espinola</b> <br/>
• <b>Jhon Vincent A. Gupo</b> <br/>
• <b>Ryan M. Ibay</b> <br/> <br/>
In partial fulfillment of the requirements for the degree <br/>
Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/>
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
• <b>Crisanto F. Gulay</b> - Adviser <br/>
• <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/>
</p>
""")
link.change(populate_metadata, inputs=[link], outputs=[img, title])
# Transcription
transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
transcribe_button3.click(inference, inputs=link, outputs=text_link_output)
# Gramify
text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1)
text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2)
text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3)
# For Text Difference
Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1)
Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2)
Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3)
demo.launch()