import os
import sys

import torch
import transformers
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer


def main(
    load_8bit: bool = False,
    base_model: str = "decapoda-research/llama-7b-hf",
    lora_weights: str = "ohmreborn/llama-lora-7b",
):
    device = 'cpu'
    base_model = base_model

    tokenizer = LlamaTokenizer.from_pretrained(base_model)
    
    model = LlamaForCausalLM.from_pretrained(
        base_model,
        load_in_8bit=load_8bit,
        max_memory={"cpu":"15GiB"},
        device_map="auto", 
        low_cpu_mem_usage=True
    )
    model = PeftModel.from_pretrained(
        model,
        lora_weights,
        device_map={"": device},
    )
    

    model.config.pad_token_id = tokenizer.pad_token_id = 0 
    model.config.bos_token_id = 1
    model.config.eos_token_id = 2


    model.eval()
    if torch.__version__ >= "2" and sys.platform != "win32":
        model = torch.compile(model)
    return model,tokenizer
model,tokenizer = main()


from typing import Union
import requests
class Prompter(object):

    def __init__(self):
        
        url = "https://raw.githubusercontent.com/tloen/alpaca-lora/main/templates/alpaca.json"
        response = requests.request("GET", url)
        self.template = response.json()

    def generate_prompt(
        self,
        instruction: str,
        input: Union[None, str] = None,
        label: Union[None, str] = None,
    ) -> str:
        if input:
            res = self.template["prompt_input"].format(
                instruction=instruction, input=input
            )
        else:
            res = self.template["prompt_no_input"].format(
                instruction=instruction
            )
        if label:
            res = f"{res}{label}"
        return res

    def get_response(self, output: str) -> str:
        return output.split(self.template["response_split"])[1].strip()

def generate(
    input=None,
    temperature=0.75, # ทำให้ model มั่นใจมากขึ้นใน softmax function https://stackoverflow.com/questions/58764619/why-should-we-use-temperature-in-softmax/63471046#63471046
    top_p=0.95, # จะ เอา ค่าความน่าจะเป็นของ top ความน่าจะเป็นที่มากที่สุดมารวมกันจนมากกว่า 0.95 แล้วค่อยให้ model สุ่ม ออกมาhttps://www.linkedin.com/pulse/text-generation-temperature-top-p-sampling-gpt-models-selvakumar
    top_k=50, # เอา 50 แรก แต่ถ้า ใส่ค่า top p ไปด้วย จะทำให้ คิดของ top p ก่อน เช่น ถ้า 50 ตัวแรกมีความน่าจะเป็นรวมกัน = 0.90 ซึ่งไม่ถึงค่าที่ตั้งไว้ก็เอามาไว้ใช้สำหรับการทำนายครั้งถัดไป https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p#2-pick-from-amongst-the-top-tokens-top-k
    max_new_tokens=1024,
    instruction="Please create an inference question in the style of TOEFL reading comprehension section. Also provide an answer in the format",
    model=model,
    tokenizer=tokenizer,
):
    prompter = Prompter()
    prompt = prompter.generate_prompt(instruction, input,)
    print(prompt)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"]
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        repetition_penalty=1.2
    )


    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    return prompter.get_response(output)


import gradio as gr
example = """Education is the process of facilitating learning, or the acquisition of knowledge, skills, values, morals, beliefs, habits, 
and personal development. There are many types of potential educational aims and objectives, 
irrespective of the specific subject being learned. Some can cross multiple school disciplines.
"""
demo = gr.Interface(fn=generate,
                    inputs=[gr.Textbox(value=example,label='inputs'),
                            gr.Slider(0,1,value=0.75,step=0.05,label='temperature'),
                            gr.Slider(0,1,value=0.95,step=0.05,label='top_p'),
                            gr.Slider(0,100,value=50,step=10,label='top_k')], 
                    outputs=["text"])
demo.launch(inline=False)