|
|
import gradio as gr |
|
|
from llama_cpp import Llama |
|
|
from huggingface_hub import hf_hub_download |
|
|
import os |
|
|
|
|
|
print("Starting model download...") |
|
|
|
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
model_path = hf_hub_download( |
|
|
repo_id="TheBloke/CodeLlama-7B-Python-GGUF", |
|
|
filename="codellama-7b-python.Q4_K_M.gguf", |
|
|
cache_dir="./models" |
|
|
) |
|
|
print(f"β Model downloaded to: {model_path}") |
|
|
except Exception as e: |
|
|
print(f"Error downloading model: {e}") |
|
|
raise |
|
|
|
|
|
|
|
|
print("Loading model into memory...") |
|
|
llm = Llama( |
|
|
model_path=model_path, |
|
|
n_ctx=2048, |
|
|
n_threads=int(os.getenv("N_THREADS", "2")), |
|
|
n_batch=512, |
|
|
verbose=True |
|
|
) |
|
|
print("β Model loaded successfully!") |
|
|
|
|
|
def generate_code(prompt, max_tokens=500, temperature=0.7): |
|
|
"""Generate code from prompt""" |
|
|
try: |
|
|
response = llm( |
|
|
prompt, |
|
|
max_tokens=max_tokens, |
|
|
temperature=temperature, |
|
|
stop=["</s>", "###", "\n\n\n"], |
|
|
echo=False |
|
|
) |
|
|
return response['choices'][0]['text'] |
|
|
except Exception as e: |
|
|
return f"Error generating code: {str(e)}" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="CodeLlama Assistant", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# π¦ CodeLlama-7B Python Assistant") |
|
|
gr.Markdown("AI-powered code generation using CodeLlama-7B (4GB GGUF model)") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
prompt_input = gr.Textbox( |
|
|
label="Enter your coding question or task", |
|
|
placeholder="Write a Python function to...", |
|
|
lines=5 |
|
|
) |
|
|
with gr.Row(): |
|
|
max_tokens = gr.Slider( |
|
|
minimum=100, |
|
|
maximum=1000, |
|
|
value=500, |
|
|
step=50, |
|
|
label="Max Tokens" |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
minimum=0.1, |
|
|
maximum=1.0, |
|
|
value=0.7, |
|
|
step=0.1, |
|
|
label="Temperature" |
|
|
) |
|
|
submit_btn = gr.Button("π Generate Code", variant="primary", size="lg") |
|
|
clear_btn = gr.Button("ποΈ Clear", size="sm") |
|
|
|
|
|
with gr.Column(): |
|
|
output = gr.Textbox( |
|
|
label="Generated Code", |
|
|
lines=15, |
|
|
show_copy_button=True |
|
|
) |
|
|
|
|
|
|
|
|
submit_btn.click( |
|
|
fn=generate_code, |
|
|
inputs=[prompt_input, max_tokens, temperature], |
|
|
outputs=output |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=lambda: ("", ""), |
|
|
inputs=None, |
|
|
outputs=[prompt_input, output] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Write a Python function to calculate fibonacci numbers"], |
|
|
["Create a binary search tree class with insert and search methods"], |
|
|
["Write a function to reverse a linked list"], |
|
|
["Implement quicksort algorithm in Python"], |
|
|
["Create a decorator to measure function execution time"] |
|
|
], |
|
|
inputs=prompt_input |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
### π‘ Tips: |
|
|
- Be specific in your prompts for better results |
|
|
- Lower temperature (0.3-0.5) for more focused code |
|
|
- Higher temperature (0.7-0.9) for more creative solutions |
|
|
- Model works best for Python code generation |
|
|
""") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |