Spaces:
Running
Running
File size: 7,378 Bytes
126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af 126f94b af969af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# Module Imports
from pytube import YouTube
import whisper
import gradio as gr
import time
import re
from happytransformer import HappyTextToText, TTSettings
from difflib import Differ
STTmodel = whisper.load_model("base.en")
GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker")
args = TTSettings(num_beams=5, min_length=1)
# Functions
def transcribe(file):
options = dict(task="transcribe", best_of=5)
text = STTmodel.transcribe(file, **options)["text"]
return text.strip()
def get_filename(file_obj):
return file_obj.orig_name
def inference(link):
yt = YouTube(link)
path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
options = whisper.DecodingOptions(without_timestamps=True)
results = STTmodel.transcribe(path)
return results['text']
def populate_metadata(link):
yt = YouTube(link)
return yt.thumbnail_url, yt.title
def transcribe_file(file):
options = dict(task="transcribe", best_of=5)
file = get_filename(file)
text = STTmodel.transcribe(file, **options)["text"]
return text.strip()
def real_time_transcribe(audio, state=""):
time.sleep(2)
text = STTmodel.transcribe(audio)["text"]
state += text + " "
return state, state
def paragraph_to_sentences(paragraph):
"""
This function takes a paragraph as input and returns a list of sentences.
Args:
paragraph (str): The paragraph to be converted to a list of sentences.
Returns:
list: A list of sentences extracted from the paragraph.
"""
# Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter.
sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph)
# Remove any leading or trailing spaces from each sentence.
sentences = [sentence.strip() for sentence in sentences]
return sentences
def sentences_to_paragraph(sentences):
final_result = ""
for num, sentence in enumerate(sentences):
result = GCmodel.generate_text("grammar: "+ sentence, args=args)
final_result += result.text
if num < len(sentences) - 1:
final_result += " "
return final_result
# Function that takes transcribed result and gramify it
def gramify(paragraph):
result_1 = paragraph_to_sentences(paragraph)
final_result = sentences_to_paragraph(result_1)
return final_result
# Function that takes transcribed text for its first inpu
def diff_texts(text1, text2):
"""
This function takes transcribed text for its first input
and grammatically corrected text as its second input which return the difference
of the two text.
"""
d = Differ()
return [
(token[2:], token[0] if token[0] != " " else None)
for token in d.compare(text1, text2)
]
res_diff = []
# Gradio Blocks
demo = gr.Blocks()
with demo:
gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""")
with gr.Tabs():
with gr.TabItem("Voice Record"):
with gr.Row():
audio = gr.Audio(show_label=False,source="microphone",type="filepath")
text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
with gr.Row():
transcribe_button1 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row():
Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
with gr.TabItem("Upload File"):
with gr.Row():
file_upload = gr.File()
text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
with gr.Row():
transcribe_button2 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row():
Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
with gr.TabItem("Youtube Link"):
with gr.Box():
link = gr.Textbox(label="YouTube Link")
with gr.Row().style(mobile_collapse=False, equal_height=True):
title = gr.Label(label="Video Title", placeholder="Title")
img = gr.Image(label="Thumbnail")
text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
with gr.Row().style(mobile_collapse=False, equal_height=True):
transcribe_button3 = gr.Button("Transcribe")
with gr.Row():
Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
with gr.Row().style(mobile_collapse=False, equal_height=True):
Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br>
<a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a>
</p>""")
with gr.Accordion("About",open=False):
gr.Markdown("""
<p style="text-align: center;"> Thesis System presented by <br/> <br/>
• <b>Daniel L. Espinola</b> <br/>
• <b>Jhon Vincent A. Gupo</b> <br/>
• <b>Ryan M. Ibay</b> <br/> <br/>
In partial fulfillment of the requirements for the degree <br/>
Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/>
Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
• <b>Crisanto F. Gulay</b> - Adviser <br/>
• <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/>
</p>
""")
link.change(populate_metadata, inputs=[link], outputs=[img, title])
# Transcription
transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
transcribe_button3.click(inference, inputs=link, outputs=text_link_output)
# Gramify
text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1)
text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2)
text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3)
# For Text Difference
Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1)
Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2)
Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3)
demo.launch(share=True) |