Spaces:

sapthesh
/

deepseekv3

Runtime error

File size: 1,447 Bytes

3e7fb54
7db3c78
3e7fb54
f3c977f
 
cd9b4c7
3e7fb54
f3c977f
 
 
 
 
 
 
3e7fb54
f3c977f
 
7db3c78
3e7fb54
f3c977f
 
7db3c78
3e7fb54
7db3c78
f3c977f
 
 
7db3c78
f3c977f
7db3c78
f3c977f

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "deepseek-ai/DeepSeek-V3"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True

def predict(message, history):
    conversation = []
    for user_msg, bot_response in history:
        conversation.append({"role": "user", "content": user_msg})
        if bot_response: # Only add bot response if it exists
            conversation.append({"role": "assistant", "content": bot_response})
    conversation.append({"role": "user", "content": message})

    inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU
    outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Basic cleanup (you might need more sophisticated cleaning)
    response = response.replace("<|assistant|>", "").strip()
    return response

iface = gr.ChatInterface(
    fn=predict,
    inputs=gr.Chatbox(placeholder="Type a message..."),
    outputs=gr.Chatbot(),
    title="DeepSeek-V3 Chatbot",
    description="Chat with the DeepSeek-V3 model.",
)
iface.launch()