|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp import Llama |
|
|
import time |
|
|
import os |
|
|
|
|
|
|
|
|
MODEL_REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF" |
|
|
MODEL_FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf" |
|
|
|
|
|
|
|
|
def get_model_path(): |
|
|
"""Download model from Hugging Face Hub or use cached version.""" |
|
|
try: |
|
|
model_path = hf_hub_download( |
|
|
repo_id=MODEL_REPO_ID, |
|
|
filename=MODEL_FILENAME, |
|
|
local_dir="./models", |
|
|
local_dir_use_symlinks=False, |
|
|
resume_download=True |
|
|
) |
|
|
print(f"✅ Model downloaded to: {model_path}") |
|
|
return model_path |
|
|
except Exception as e: |
|
|
print(f"❌ Error downloading model: {e}") |
|
|
|
|
|
if os.path.exists(MODEL_FILENAME): |
|
|
return MODEL_FILENAME |
|
|
raise |
|
|
|
|
|
|
|
|
MODEL_PATH = get_model_path() |
|
|
llm = None |
|
|
|
|
|
def load_model(): |
|
|
"""Lazy-load the model only when needed.""" |
|
|
global llm |
|
|
if llm is None: |
|
|
print(f"⏳ Loading model... This may take 1-2 minutes on first run.") |
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
llm = Llama( |
|
|
model_path=MODEL_PATH, |
|
|
n_ctx=2048, |
|
|
n_threads=2, |
|
|
n_gpu_layers=0, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
load_time = time.time() - start_time |
|
|
print(f"✅ Model loaded in {load_time:.1f} seconds. Ready for inference.") |
|
|
return llm |
|
|
|
|
|
def generate_code(prompt, max_tokens=256, temperature=0.7): |
|
|
"""Main generation function.""" |
|
|
try: |
|
|
model = load_model() |
|
|
|
|
|
|
|
|
formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" |
|
|
|
|
|
|
|
|
output = model( |
|
|
formatted_prompt, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
top_p=0.95, |
|
|
echo=False, |
|
|
stop=["###", "\n\n\n"] |
|
|
) |
|
|
|
|
|
return output['choices'][0]['text'].strip() |
|
|
|
|
|
except Exception as e: |
|
|
return f"❌ Error: {str(e)}" |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=generate_code, |
|
|
inputs=[ |
|
|
gr.Textbox( |
|
|
label="Code Prompt", |
|
|
placeholder="Write a Python function to reverse a string...", |
|
|
lines=4 |
|
|
), |
|
|
gr.Slider( |
|
|
minimum=32, |
|
|
maximum=512, |
|
|
value=256, |
|
|
step=32, |
|
|
label="Max Tokens" |
|
|
), |
|
|
gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Temperature" |
|
|
) |
|
|
], |
|
|
outputs=gr.Code( |
|
|
label="Generated Code", |
|
|
language="python" |
|
|
), |
|
|
title="💻 DeepSeek Coder 6.7B Instruct - o87Dev", |
|
|
description="**CPU Deployment** - Running on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**", |
|
|
examples=[ |
|
|
["Write a Python function to check if a number is prime"], |
|
|
["Create a React component for a login form"], |
|
|
["Explain binary search algorithm in Python"] |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
share=False |
|
|
) |