File size: 1,447 Bytes
3e7fb54
7db3c78
3e7fb54
f3c977f
 
cd9b4c7
3e7fb54
f3c977f
 
 
 
 
 
 
3e7fb54
f3c977f
 
7db3c78
3e7fb54
f3c977f
 
7db3c78
3e7fb54
7db3c78
f3c977f
 
 
7db3c78
f3c977f
7db3c78
f3c977f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "deepseek-ai/DeepSeek-V3"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True

def predict(message, history):
    conversation = []
    for user_msg, bot_response in history:
        conversation.append({"role": "user", "content": user_msg})
        if bot_response: # Only add bot response if it exists
            conversation.append({"role": "assistant", "content": bot_response})
    conversation.append({"role": "user", "content": message})

    inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU
    outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Basic cleanup (you might need more sophisticated cleaning)
    response = response.replace("<|assistant|>", "").strip()
    return response

iface = gr.ChatInterface(
    fn=predict,
    inputs=gr.Chatbox(placeholder="Type a message..."),
    outputs=gr.Chatbot(),
    title="DeepSeek-V3 Chatbot",
    description="Chat with the DeepSeek-V3 model.",
)
iface.launch()