import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging
import gradio as gr

model_name = "microsoft/phi-2"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

adapter_path = 'checkpoint-500'
model.load_adapter(adapter_path)


def generate_context(prompt, tokens=300):
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=tokens)
    sentence = "[INST] " + prompt + " [/INST]"
    result = pipe(sentence)
    text = result[0]['generated_text']

    return text[len(sentence):]


examples = [
    ["What is a large language model?", 250],
    ["Explain the process of photosynthesis", 350]
]

demo = gr.Interface(
    fn=generate_context,
    inputs=[
        gr.Textbox(label="How may I help you ? 🤖"),
        gr.Slider(200, 500, value=300, label="Sentence length", step=50)
    ],
    outputs="text",
    examples=examples
)

demo.launch(debug=True)