Spaces:
Sleeping
Sleeping
| from myrpunct import RestorePuncts | |
| import gradio as gr | |
| import re | |
| def predict(input_text): | |
| rpunct = RestorePuncts() | |
| output_text = rpunct.punctuate(input_text) | |
| print("Punctuation finished...") | |
| # restore the carrige returns | |
| srt_file = input_text.replace("\n", " ") | |
| punctuated = output_text | |
| srt_file_strip=srt_file.strip() | |
| srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip) | |
| srt_file_array=srt_file_sub.split(' ') | |
| pcnt_file_array=punctuated.split(' ') | |
| # goal: restore the break points i.e. the same number of lines as the srt file | |
| # this is necessary, because each line in the srt file corresponds to a frame from the video | |
| if len(srt_file_array)!=len(pcnt_file_array): | |
| return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array) | |
| pcnt_file_array_hash = [] | |
| for idx, item in enumerate(srt_file_array): | |
| if item.endswith('#'): | |
| pcnt_file_array_hash.append(pcnt_file_array[idx]+'#') | |
| else: | |
| pcnt_file_array_hash.append(pcnt_file_array[idx]) | |
| # assemble the array back to a string | |
| pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n') | |
| return pcnt_file_cr | |
| if __name__ == "__main__": | |
| title = "Rpunct App" | |
| description = """ | |
| <b>Description</b>: <br> | |
| Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words. <br> | |
| """ | |
| examples = ["my name is clara and i live in berkeley california"] | |
| interface = gr.Interface(fn = predict, | |
| inputs = ["text"], | |
| outputs = ["text"], | |
| title = title, | |
| description = description, | |
| examples=examples, | |
| allow_flagging="never") | |
| interface.launch() | |