Spaces:

Pr123
/

TinyLlama-EA-Chat

Sleeping

File size: 1,319 Bytes

32a88a7
 
 
 
 
 
 
 
dd17305
32a88a7
 
68a378e
dd17305
 
32a88a7
2f781ea
32a88a7
 
 
 
 
 
 
 
 
 
 
 
8cbf8ae
32a88a7
 
1ea4428
32a88a7
 
 
6bd6e16

import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer, pipeline
import gradio as gr


peft_model_id = "Pr123/TinyLlama-EA-Chat"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoPeftModelForCausalLM.from_pretrained(
    peft_model_id,
    torch_dtype=torch.bfloat16
).to(device)

tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=100)


def chat_with_tinyllm(prompt):
    instruction = "Answer the following question: if you don't know the answer, just say that you don't know; don't try to make up an answer."
    prompt_content = f"<s>[INST] <<SYS>>{instruction}<</SYS>>{prompt}[/INST]"
    result = pipe(prompt_content)
    result = result[0]['generated_text'].split('[/INST]')[-1]
    return result

def chat_interface():
    iface = gr.Interface(
        fn=chat_with_tinyllm,
        inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),  
        outputs="text",
        title="Chat with TinyLlama",
        description="This is a simple chatbot fine-tuned on the TinyLlama-1.1B-Chat-v1.0 model from Hugging Face, designed to answer questions related to East Africa.")
    return iface

iface = chat_interface()
iface.launch(share=True)