import gradio as gr

from transformers import T5Tokenizer, T5ForConditionalGeneration

# xl size run out of memory on 16GB vm
# All the models have input length of 512 tokens and outputs of 512 tokens
# small 80M param
# base 250M
# large 780M
# xl
# xxl
model_name = "large"
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-" + model_name)
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-" + model_name)

title = ""

def get_examples ():
    return [
        ["Peter goes to the store to buy a soda. The soda costs $.25 an ounce. \
He brought $2 with him and leaves with $.50. How many ounces of soda did he buy?",
         "How much did Peter spend on soda? ** He spend $1.5 on soda because 2 - .5 = <<2-.5=1.5>>1.5 \
     How many ounces of soda did Peter buy? ** He bought 6 ounces of soda because 1.5 / .25 = <<6=6>>6 #### 6"
         ],
        ["Krystian works in the library. He borrows an average of 40 books every day. \
Every Friday, his number of borrowed books is about 40% higher than the daily average. How many books does he borrow in a week if the library is open from Monday to Friday?"
            ,"How many books does Krystian borrow on Friday? ** The number of books borrowed \
on Friday is higher by 40 * 40/100 = <<40*40/100=16>>16 books. How many books does Krystian borrow in a week? ** There are 5 days from Monday to Friday inclusive, so Krystian borrows an average of 5 * 40 = <<5*40=200>>200 books during that time. How many books does Krystian borrow in a week? ** With Friday's increase in borrowings, during one week Krystian borrows 200 + 16 = <<200+16=216>>216 books."]
        , ["Jane had $60 but gave $30 to dave and went to movies and spend $2. How much money does Jane has left? Answer by reasoning step by step:", "$28"],
        ["Cat is a friend of a Dog. Are cat and Dog friends?", "Yes"]
    ]


def text2text(input_text):
    input_ids = tokenizer(input_text, return_tensors="pt").input_ids

    input_num_tokens = input_ids.shape[1]

    print( "Number of input tokens: " + str(input_num_tokens))
    print("Length of input: " + str(len(input_text)))

    list_of_tokens = tokenizer.convert_ids_to_tokens(input_ids.view(-1).tolist())

    print( "Tokens : " + ' '.join(list_of_tokens))

    # Does not seem to care if it goes over 512... humm...
    # To make it faster generate 100 tokens at a time
    # sampling mode.. don't greedily take the highest probability token every time. Helps it chat with some variation
    # temperature.. how random should the sampling be.
    # top_p Which set of tokens to sample from. Filters out some low probability tokens before smapling.
    # 
    # input_ids should not be over 512 tokens. This method does not break over 512 tokens.. what is it doing?
    outputs = model.generate(input_ids, max_new_tokens=100, do_sample=True, temperature=0.7, top_p=0.8)

    # Remove <pad> and </s> eof sequence tokens
    model_output_text = tokenizer.decode(outputs[0],skip_special_tokens=True)

    print("Number of output tokens: " + str(outputs.shape[1]))
    print("length of output: " + str(len(model_output_text)))
    print("Output: " + model_output_text)
    # Space is added because model seem to not add it automatically.
    output_text = input_text + " " + model_output_text
    return output_text


with gr.Blocks() as demo:
    gr.Markdown(
        """
        # Flan T5 Large Demo (Chat Mode)
        780M parameter Large language model fine tuned on diverse tasks.
        Prompt the model in the Input box. Models output is appended to input. To get additional generation hit submit again.
        """)
    txt_in = gr.Textbox(label="Input", lines=8)
    correct_label = gr.Label(label="Correct")
    # txt_out = gr.Textbox(value="", label="Output", lines=4)


    btn = gr.Button(value="Submit")
    # Send back to inputs
    btn.click(text2text, inputs=[txt_in], outputs=[txt_in])


    gr.Examples(
        examples=get_examples(),
        inputs=[txt_in,correct_label]
    )


if __name__ == "__main__":
    demo.launch()