import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

HUGGING_FACE_USER_NAME = "indikamk"
model_name = "BLOOMZ_finetuned_Misconceptions"

peft_model_id = f"{HUGGING_FACE_USER_NAME}/{model_name}"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
model = PeftModel.from_pretrained(model, peft_model_id)

def make_inference(sentence):
    batch =  tokenizer(f"### INSTRUCTION\nBelow is a student response to a writen question about an electrical circuit. Please identify whether there is a sequential misconception. A sequential misconception in terms of electric circuits is one in which it is believed that elements that are further “downstream” from a source (such as R2 and R3 in the example circuit of Figure 1) “receive” current after elements closer to the source (R1 in the example circuit). With such a misconception, it is likely that a student will think that changes in R2 have no effect on the potential difference and current associated with R1 or Vs..\n\n### Sentence:\n{sentence}\n### Response:\n", return_tensors='pt')
    
    with torch.cuda.amp.autocast():
        output_tokens = model.generate(**batch, max_new_tokens=200)

    return tokenizer.decode(output_tokens[0], skip_special_tokens=True)

if __name__ == "__main__":
    # make a gradio interface
    import gradio as gr

    gr.Interface(
        make_inference,
        [
            gr.inputs.Textbox(lines=2, label="Sentence"),
        ],
        gr.outputs.Textbox(label="Response"),
        title="MisconAI",
        description="MisconAI is a tool the allows you to input a student response to a writen question about an electrical circuit. It will identify whether there is a sequential misconcepion",    ).launch()