code-yug / app.py
Nishant51's picture
Update app.py
5c4dbe9 verified
import gradio as gr
from llama_cpp import Llama
# 1. Load the Model
# This automatically downloads the "DeepSeek-R1-Distill-Llama-8B" (GGUF version)
# We use the Q4_K_M version because it fits in the FREE 16GB RAM tier.
print("⏳ Downloading & Loading Model... (This takes 1-2 mins on first run)")
llm = Llama.from_pretrained(
repo_id="bartowski/DeepSeek-R1-Distill-Llama-8B-GGUF",
filename="DeepSeek-R1-Distill-Llama-8B-Q4_K_M.gguf",
verbose=True,
n_ctx=4096 # Context window (memory of the conversation)
)
# 2. Define the Chat Function
def chat_with_deepseek(message, history):
# Format the prompt for DeepSeek
# It expects: User: <msg> \n Assistant:
prompt = f"User: {message}\nAssistant:"
# Generate response
output = llm(
prompt,
max_tokens=512, # How long the answer can be
stop=["User:", "\n\n"], # Stop it from talking to itself
echo=False
)
return output['choices'][0]['text']
# 3. Launch the Chat Interface
# We use ChatInterface because it handles the UI automatically
gr.ChatInterface(chat_with_deepseek).launch()