Spaces:

Ragnov
/

STT-Grammar-Checker

Sleeping

Ragnov

remove share=True

813029b over 1 year ago

7.37 kB

	# Module Imports
	from pytube import YouTube
	import whisper
	import gradio as gr
	import time
	import re
	from happytransformer import HappyTextToText, TTSettings
	from difflib import Differ

	STTmodel = whisper.load_model("base.en")
	GCmodel = HappyTextToText("T5", "Ragnov/T5-Base-Grammar-Checker")
	args = TTSettings(num_beams=5, min_length=1)

	# Functions
	def transcribe(file):
	options = dict(task="transcribe", best_of=5)
	text = STTmodel.transcribe(file, **options)["text"]
	return text.strip()

	def get_filename(file_obj):
	return file_obj.orig_name

	def inference(link):
	yt = YouTube(link)
	path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp4")
	options = whisper.DecodingOptions(without_timestamps=True)
	results = STTmodel.transcribe(path)
	return results['text']

	def populate_metadata(link):
	yt = YouTube(link)
	return yt.thumbnail_url, yt.title

	def transcribe_file(file):
	options = dict(task="transcribe", best_of=5)
	file = get_filename(file)
	text = STTmodel.transcribe(file, **options)["text"]
	return text.strip()

	def real_time_transcribe(audio, state=""):
	time.sleep(2)
	text = STTmodel.transcribe(audio)["text"]
	state += text + " "
	return state, state

	def paragraph_to_sentences(paragraph):
	"""
	This function takes a paragraph as input and returns a list of sentences.

	Args:
	paragraph (str): The paragraph to be converted to a list of sentences.

	Returns:
	list: A list of sentences extracted from the paragraph.
	"""
	# Split the paragraph into sentences using a period, exclamation mark or question mark as the delimiter.
	sentences = re.split(r'(?<=[^A-Z].[.?!]) +(?=[A-Z])\|(?<=[^A-Z][!]) +(?=[A-Z])', paragraph)

	# Remove any leading or trailing spaces from each sentence.
	sentences = [sentence.strip() for sentence in sentences]

	return sentences

	def sentences_to_paragraph(sentences):
	final_result = ""
	for num, sentence in enumerate(sentences):
	result = GCmodel.generate_text("grammar: "+ sentence, args=args)
	final_result += result.text
	if num < len(sentences) - 1:
	final_result += " "

	return final_result

	# Function that takes transcribed result and gramify it
	def gramify(paragraph):
	result_1 = paragraph_to_sentences(paragraph)
	final_result = sentences_to_paragraph(result_1)
	return final_result

	# Function that takes transcribed text for its first inpu
	def diff_texts(text1, text2):
	"""
	This function takes transcribed text for its first input
	and grammatically corrected text as its second input which return the difference
	of the two text.
	"""
	d = Differ()
	return [
	(token[2:], token[0] if token[0] != " " else None)
	for token in d.compare(text1, text2)
	]
	res_diff = []
	# Gradio Blocks
	demo = gr.Blocks()
	with demo:
	gr.Markdown(""" <p style="text-align: center;"> Speech To Text Grammar Checker </p>""")
	with gr.Tabs():
	with gr.TabItem("Voice Record"):
	with gr.Row():
	audio = gr.Audio(show_label=False,source="microphone",type="filepath")
	text_output1 = gr.Textbox(label="Transcription", placeholder="Text Output")
	with gr.Row():
	transcribe_button1 = gr.Button("Transcribe")
	with gr.Row():
	Grammar_text_output1 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
	with gr.Row():
	Diff_text_output1 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
	with gr.TabItem("Upload File"):
	with gr.Row():
	file_upload = gr.File()
	text_output2 = gr.Textbox(label="Transcription", placeholder="Text Output")
	with gr.Row():
	transcribe_button2 = gr.Button("Transcribe")
	with gr.Row():
	Grammar_text_output2 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
	with gr.Row():
	Diff_text_output2 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
	with gr.TabItem("Youtube Link"):
	with gr.Box():
	link = gr.Textbox(label="YouTube Link")
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	title = gr.Label(label="Video Title", placeholder="Title")
	img = gr.Image(label="Thumbnail")
	text_link_output = gr.Textbox(label="Transcription", placeholder="Text Output",lines=5)
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	transcribe_button3 = gr.Button("Transcribe")
	with gr.Row():
	Grammar_text_output3 = gr.Textbox(label="Grammatically Corrected Text", placeholder="Text Output")
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	Diff_text_output3 = gr.HighlightedText(label="Text Difference",combine_adjacent=True,value=res_diff).style(color_map={"+": "green", "-": "red"})
	gr.Markdown("""<p style="text-align: center;"> Not Satisfied with the result? </br>
	<a href="https://forms.gle/yZA5DBygMUNmLZtv7">Click here to help us make it better.</a>
	</p>""")

	with gr.Accordion("About",open=False):
	gr.Markdown("""
	<p style="text-align: center;"> Thesis System presented by <br/> <br/>
	• <b>Daniel L. Espinola</b> <br/>
	• <b>Jhon Vincent A. Gupo</b> <br/>
	• <b>Ryan M. Ibay</b> <br/> <br/>
	In partial fulfillment of the requirements for the degree <br/>
	Bachelor of Science in Computer Science Specialized in Intelligent Systems <br/>
	Laguna State Polytechnic University - Los Baños Campus . <br/> <br/>
	We would also like to thank our fellow adviser and subject specialist for their guidance in making this idea a reality. <br/>
	• <b>Crisanto F. Gulay</b> - Adviser <br/>
	• <b>Gene Marck B. Catedrilla</b> - Subject Specialist <br/>
	</p>
	""")
	link.change(populate_metadata, inputs=[link], outputs=[img, title])

	# Transcription
	transcribe_button1.click(transcribe, inputs=audio, outputs=text_output1)
	transcribe_button2.click(transcribe_file, inputs=file_upload, outputs=text_output2)
	transcribe_button3.click(inference, inputs=link, outputs=text_link_output)

	# Gramify
	text_output1.change(gramify,inputs=text_output1,outputs=Grammar_text_output1)
	text_output2.change(gramify,inputs=text_output2,outputs=Grammar_text_output2)
	text_link_output.change(gramify, inputs=text_link_output ,outputs=Grammar_text_output3)

	# For Text Difference
	Grammar_text_output1.change(diff_texts,inputs=[text_output1,Grammar_text_output1],outputs=Diff_text_output1)
	Grammar_text_output2.change(diff_texts,inputs=[text_output2,Grammar_text_output2],outputs=Diff_text_output2)
	Grammar_text_output3.change(diff_texts,inputs=[text_link_output,Grammar_text_output3],outputs=Diff_text_output3)

	demo.launch()