|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("⏳ Downloading & Loading Model... (This takes 1-2 mins on first run)") |
|
|
|
|
|
llm = Llama.from_pretrained( |
|
|
repo_id="bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF", |
|
|
filename="DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf", |
|
|
verbose=True, |
|
|
n_ctx=4096 |
|
|
) |
|
|
|
|
|
|
|
|
def chat_with_deepseek(message, history): |
|
|
|
|
|
|
|
|
prompt = f"User: {message}\nAssistant:" |
|
|
|
|
|
|
|
|
output = llm( |
|
|
prompt, |
|
|
max_tokens=512, |
|
|
stop=["User:", "\n\n"], |
|
|
echo=False |
|
|
) |
|
|
return output['choices'][0]['text'] |
|
|
|
|
|
|
|
|
|
|
|
gr.ChatInterface(chat_with_deepseek).launch() |
|
|
|