import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging import gradio as gr model_name = "microsoft/phi-2" model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True ) model.config.use_cache = False tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token adapter_path = 'checkpoint-500' model.load_adapter(adapter_path) def generate_context(prompt, tokens=300): pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=tokens) sentence = "[INST] " + prompt + " [/INST]" result = pipe(sentence) text = result[0]['generated_text'] return text[len(sentence):] examples = [ ["What is a large language model?", 250], ["Explain the process of photosynthesis", 350] ] demo = gr.Interface( fn=generate_context, inputs=[ gr.Textbox(label="How may I help you ? 🤖"), gr.Slider(200, 500, value=300, label="Sentence length", step=50) ], outputs="text", examples=examples ) demo.launch(debug=True)