import torch from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer, pipeline import gradio as gr peft_model_id = "Pr123/TinyLlama-EA-Chat" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoPeftModelForCausalLM.from_pretrained( peft_model_id, torch_dtype=torch.bfloat16 ).to(device) tokenizer = AutoTokenizer.from_pretrained(peft_model_id) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100) def chat_with_tinyllm(prompt): instruction = "Answer the following question: if you don't know the answer, just say that you don't know; don't try to make up an answer." prompt_content = f"[INST] <>{instruction}<>{prompt}[/INST]" result = pipe(prompt_content) result = result[0]['generated_text'].split('[/INST]')[-1] return result def chat_interface(): iface = gr.Interface( fn=chat_with_tinyllm, inputs=gr.Textbox(lines=2, placeholder="Type your question here..."), outputs="text", title="Chat with TinyLlama", description="This is a simple chatbot fine-tuned on the TinyLlama-1.1B-Chat-v1.0 model from Hugging Face, designed to answer questions related to East Africa.") return iface iface = chat_interface() iface.launch(share=True)