|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
import os |
|
|
|
|
|
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' |
|
|
|
|
|
model_name = "deepseek-ai/DeepSeek-V3-0324" |
|
|
|
print("Loading tokenizer...") |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
|
|
print("Loading model...") |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16, |
|
device_map="auto", |
|
trust_remote_code=True, |
|
|
|
low_cpu_mem_usage=True, |
|
|
|
use_flash_attention_2=True, |
|
use_cache=True |
|
) |
|
|
|
|
|
model.eval() |
|
|
|
|
|
def generate_response(message, chat_history, system_prompt="You are a helpful AI assistant.", max_length=2048, |
|
temperature=0.7): |
|
try: |
|
|
|
full_prompt = f"{system_prompt}\n\nUser: {message}\nAssistant:" |
|
|
|
if chat_history: |
|
history_text = "" |
|
for user_msg, assistant_msg in chat_history: |
|
history_text += f"User: {user_msg}\nAssistant: {assistant_msg}\n" |
|
full_prompt = f"{system_prompt}\n\n{history_text}User: {message}\nAssistant:" |
|
|
|
|
|
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs.input_ids, |
|
max_length=max_length, |
|
temperature=temperature, |
|
do_sample=True, |
|
pad_token_id=tokenizer.eos_token_id, |
|
top_p=0.9, |
|
repetition_penalty=1.1, |
|
|
|
use_cache=True, |
|
num_beams=1, |
|
) |
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
response = response.split("Assistant:")[-1].strip() |
|
|
|
return response |
|
except Exception as e: |
|
return f"An error occurred: {str(e)}" |
|
|
|
|
|
|
|
with gr.Blocks(css="footer {visibility: hidden}") as demo: |
|
gr.Markdown("# DeepSeek V3 Chatbot") |
|
gr.Markdown("Welcome! This is a chatbot powered by the DeepSeek-V3-0324 model.") |
|
|
|
chatbot = gr.Chatbot(height=600) |
|
msg = gr.Textbox(label="Type your message here...", placeholder="Hello! How can I help you today?") |
|
clear = gr.Button("Clear Conversation") |
|
|
|
|
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature", |
|
info="Higher = more creative, Lower = more focused" |
|
) |
|
|
|
|
|
def user(user_message, history): |
|
return "", history + [[user_message, None]] |
|
|
|
|
|
def bot(history, temp): |
|
user_message = history[-1][0] |
|
bot_message = generate_response(user_message, history[:-1], temperature=temp) |
|
history[-1][1] = bot_message |
|
return history |
|
|
|
|
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( |
|
bot, [chatbot, temperature], chatbot |
|
) |
|
clear.click(lambda: None, None, chatbot, queue=False) |
|
|
|
demo.queue() |
|
demo.launch() |
|
|