punct-tube-gr

Sleeping

App Files Files Community

punct-tube-gr / app.py

wencheng256

Update app.py

86d0ab8 verified almost 2 years ago

raw

history blame contribute delete

1.91 kB

	from myrpunct import RestorePuncts
	import gradio as gr
	import re

	def predict(input_text):
	rpunct = RestorePuncts()
	output_text = rpunct.punctuate(input_text)
	print("Punctuation finished...")

	# restore the carrige returns
	srt_file = input_text.replace("\n", " ")
	punctuated = output_text

	srt_file_strip=srt_file.strip()
	srt_file_sub=re.sub('\s\n\s','# ',srt_file_strip)
	srt_file_array=srt_file_sub.split(' ')
	pcnt_file_array=punctuated.split(' ')

	# goal: restore the break points i.e. the same number of lines as the srt file
	# this is necessary, because each line in the srt file corresponds to a frame from the video
	if len(srt_file_array)!=len(pcnt_file_array):
	return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
	pcnt_file_array_hash = []
	for idx, item in enumerate(srt_file_array):
	if item.endswith('#'):
	pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
	else:
	pcnt_file_array_hash.append(pcnt_file_array[idx])

	# assemble the array back to a string
	pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')

	return pcnt_file_cr

	if __name__ == "__main__":

	title = "Rpunct App"
	description = """
	<b>Description</b>: <br>
	Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words. <br>
	"""
	examples = ["my name is clara and i live in berkeley california"]

	interface = gr.Interface(fn = predict,
	inputs = ["text"],
	outputs = ["text"],
	title = title,
	description = description,
	examples=examples,
	allow_flagging="never")

	interface.launch()