Spaces:
Sleeping
Sleeping
# Module Imports | |
from pytube import YouTube | |
import whisper | |
import gradio as gr | |
import time | |
import re | |
from happytransformer import HappyTextToText, TTSettings | |
from difflib import Differ | |
STTmodel = whisper.load_model("base.en") | |
GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker") | |
args = TTSettings(num_beams=5, min_length=1) | |
# Functions | |
def transcribe(file): | |
options = dict(task="transcribe", best_of=5) | |
text = STTmodel.transcribe(file, **options)["text"] | |
return text.strip() | |
def get_filename(file_obj): | |
return file_obj.orig_name | |
def inference(link): | |
yt = YouTube(link) | |
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4") | |
options = whisper.DecodingOptions(without_timestamps=True) | |
results = STTmodel.transcribe(path) | |
return results['text'] | |
def populate_metadata(link): | |
yt = YouTube(link) | |
return yt.thumbnail_url, yt.title | |
def transcribe_file(file): | |
options = dict(task="transcribe", best_of=5) | |
file = get_filename(file) | |
text = STTmodel.transcribe(file, **options)["text"] | |
return text.strip() | |
def real_time_transcribe(audio, state=""): | |
time.sleep(2) | |
text = STTmodel.transcribe(audio)["text"] | |
state += text + " " | |
return state, state | |
def paragraph_to_sentences(paragraph): | |
""" | |
This function takes a paragraph as input and returns a list of sentences. | |
Args: | |
paragraph (str): The paragraph to be converted to a list of sentences. | |
Returns: | |
list: A list of sentences extracted from the paragraph. | |
""" | |
# Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter. | |
sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph) | |
# Remove any leading or trailing spaces from each sentence. | |
sentences = [sentence.strip() for sentence in sentences] | |
return sentences | |
def sentences_to_paragraph(sentences): | |
final_result = "" | |
for num, sentence in enumerate(sentences): | |
result = GCmodel.generate_text("grammar: "+ sentence, args=args) | |
final_result += result.text | |
if num < len(sentences) - 1: | |
final_result += " " | |
return final_result | |
# Function that takes transcribed result and gramify it | |
def gramify(paragraph): | |
result_1 = paragraph_to_sentences(paragraph) | |
final_result = sentences_to_paragraph(result_1) | |
return final_result | |
# Function that takes transcribed text for its first inpu | |
def diff_texts(text1, text2): | |
""" | |
This function takes transcribed text for its first input | |
and grammatically corrected text as its second input which return the difference | |
of the two text. | |
""" | |
d = Differ() | |
return [ | |
(token[2:], token[0] if token[0] != " " else None) | |
for token in d.compare(text1, text2) | |
] | |
res_diff = [] | |
# Gradio Blocks | |
demo = gr.Blocks() | |
with demo: | |
gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""") | |
with gr.Tabs(): | |
with gr.TabItem("Voice Record"): | |
with gr.Row(): | |
audio = gr.Audio(show_label=False,source="microphone",type="filepath") | |
text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output") | |
with gr.Row(): | |
transcribe_button1 = gr.Button("Transcribe") | |
with gr.Row(): | |
Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") | |
with gr.Row(): | |
Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) | |
with gr.TabItem("Upload File"): | |
with gr.Row(): | |
file_upload = gr.File() | |
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output") | |
with gr.Row(): | |
transcribe_button2 = gr.Button("Transcribe") | |
with gr.Row(): | |
Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") | |
with gr.Row(): | |
Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) | |
with gr.TabItem("Youtube Link"): | |
with gr.Box(): | |
link = gr.Textbox(label="YouTube Link") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
title = gr.Label(label="Video Title", placeholder="Title") | |
img = gr.Image(label="Thumbnail") | |
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5) | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
transcribe_button3 = gr.Button("Transcribe") | |
with gr.Row(): | |
Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output") | |
with gr.Row().style(mobile_collapse=False, equal_height=True): | |
Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"}) | |
gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br> | |
<a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a> | |
</p>""") | |
with gr.Accordion("About",open=False): | |
gr.Markdown(""" | |
<p style="text-align: center;"> Thesis System presented by <br/> <br/> | |
• <b>Daniel L. Espinola</b> <br/> | |
• <b>Jhon Vincent A. Gupo</b> <br/> | |
• <b>Ryan M. Ibay</b> <br/> <br/> | |
In partial fulfillment of the requirements for the degree <br/> | |
Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/> | |
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/> | |
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/> | |
• <b>Crisanto F. Gulay</b> - Adviser <br/> | |
• <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/> | |
</p> | |
""") | |
link.change(populate_metadata, inputs=[link], outputs=[img, title]) | |
# Transcription | |
transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1) | |
transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2) | |
transcribe_button3.click(inference, inputs=link, outputs=text_link_output) | |
# Gramify | |
text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1) | |
text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2) | |
text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3) | |
# For Text Difference | |
Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1) | |
Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2) | |
Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3) | |
demo.launch() |