from myrpunct import RestorePuncts
import gradio as gr
import re

def predict(input_text):
    rpunct = RestorePuncts()
    output_text = rpunct.punctuate(input_text)
    print("Punctuation finished...")
    
    # restore the carrige returns 
    srt_file = input_text
    punctuated = output_text

    srt_file_strip=srt_file.strip()
    srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
    srt_file_array=srt_file_sub.split(' ')
    pcnt_file_array=punctuated.split(' ')

    # goal: restore the break points i.e. the same number of lines as the srt file
    # this is necessary, because each line in the srt file corresponds to a frame from the video
    if len(srt_file_array)!=len(pcnt_file_array):
        return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
    pcnt_file_array_hash = []
    for idx, item in enumerate(srt_file_array):
        if item.endswith('#'):
            pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
        else:
            pcnt_file_array_hash.append(pcnt_file_array[idx])

    # assemble the array back to a string
    pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')

    return pcnt_file_cr
 
if __name__ == "__main__":

    title = "Rpunct App"
    description = """
<b>Description</b>: <br>
Model restores punctuation and case i.e. of the following punctuations -- [! ? . , - : ; ' ] and also the upper-casing of words. <br>
"""
    examples = ["my name is clara and i live in berkeley california"]

    interface = gr.Interface(fn = predict,
                         inputs = ["text"],
                         outputs = ["text"],
                         title = title,
                         description = description, 
                         examples=examples, 
                         allow_flagging="never")

    interface.launch()