API / app.py
Mehgoss's picture
Update app.py
9a382b7 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
print("Starting model download...")
# Download model file explicitly (better control)
try:
# Try to find the GGUF file in the repo
# If your repo has a different filename, change it here
model_path = hf_hub_download(
repo_id="TheBloke/CodeLlama-7B-Python-GGUF", # Using TheBloke's reliable repo
filename="codellama-7b-python.Q4_K_M.gguf", # 4.08GB file
cache_dir="./models"
)
print(f"βœ“ Model downloaded to: {model_path}")
except Exception as e:
print(f"Error downloading model: {e}")
raise
# Load the GGUF model
print("Loading model into memory...")
llm = Llama(
model_path=model_path,
n_ctx=2048, # Context window
n_threads=int(os.getenv("N_THREADS", "2")), # CPU threads
n_batch=512, # Batch size for prompt processing
verbose=True
)
print("βœ“ Model loaded successfully!")
def generate_code(prompt, max_tokens=500, temperature=0.7):
"""Generate code from prompt"""
try:
response = llm(
prompt,
max_tokens=max_tokens,
temperature=temperature,
stop=["</s>", "###", "\n\n\n"], # Stop sequences
echo=False
)
return response['choices'][0]['text']
except Exception as e:
return f"Error generating code: {str(e)}"
# Create Gradio interface
with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πŸ¦™ CodeLlama-7B Python Assistant")
gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)")
with gr.Row():
with gr.Column():
prompt_input = gr.Textbox(
label="Enter your coding question or task",
placeholder="Write a Python function to...",
lines=5
)
with gr.Row():
max_tokens = gr.Slider(
minimum=100,
maximum=1000,
value=500,
step=50,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
)
submit_btn = gr.Button("πŸš€ Generate Code", variant="primary", size="lg")
clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
with gr.Column():
output = gr.Textbox(
label="Generated Code",
lines=15,
show_copy_button=True
)
# Button actions
submit_btn.click(
fn=generate_code,
inputs=[prompt_input, max_tokens, temperature],
outputs=output
)
clear_btn.click(
fn=lambda: ("", ""),
inputs=None,
outputs=[prompt_input, output]
)
# Example prompts
gr.Examples(
examples=[
["Write a Python function to calculate fibonacci numbers"],
["Create a binary search tree class with insert and search methods"],
["Write a function to reverse a linked list"],
["Implement quicksort algorithm in Python"],
["Create a decorator to measure function execution time"]
],
inputs=prompt_input
)
gr.Markdown("""
### πŸ’‘ Tips:
- Be specific in your prompts for better results
- Lower temperature (0.3-0.5) for more focused code
- Higher temperature (0.7-0.9) for more creative solutions
- Model works best for Python code generation
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)