import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging import gradio as gr model_name = "microsoft/phi-2" model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True ) model.config.use_cache = False tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token # Loading adapter (trained LORA weights) # ckpt = '/content/drive/MyDrive/S27/results/checkpoint-500' # model.load_adapter(ckpt) adapter_path = 'checkpoint-500' model.load_adapter(adapter_path) def inference(prompt): pipe = pipeline(task="text-generation",model=model,tokenizer=tokenizer,max_length=200) result = pipe(f"[INST] {prompt} [/INST]") return result[0]['generated_text'] INTERFACE = gr.Interface(fn=inference, inputs=[gr.Textbox(label= "Prompt", value= 'Please write about Shakuntala Devi'), outputs=gr.Text(label= "Generated Text"), title="Phi-2 fine-tuned with OpenAssistant/oasst-1 dataset using QLoRA strategy", examples = [['Write a note on Indiana Jones],] ).launch(debug=True) # with gr.Blocks() as demo: # gr.Markdown( # """ # # Phi2 trained on OpenAssistant/oasst1 dataset # Start typing below to see the output. # """) # prompt = gr.Textbox(label="Prompt") # output = gr.Textbox(label="Output Box") # greet_btn = gr.Button("Generate") # examples = gr.Examples(examples=[[prompt = 'Please write about Shakuntala Devi'], [prompt = 'Write a brief note on Indiana Jones']], cache_examples=False) # greet_btn.click(fn=inference, inputs=prompt, outputs=output) # demo.launch(debug=True)