XZtar / app.py
druvx13's picture
Update app.py
0672ed5 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os
# Model configuration
MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF"
MODEL_FILE = "qwen3-0.6b-q5_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200
# Initialize model (loads once at startup)
def load_model():
"""Download and load GGUF model with proper path handling"""
os.makedirs(CACHE_DIR, exist_ok=True)
# Download model if not cached
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE,
cache_dir=CACHE_DIR,
force_download=False # Set to True to bypass cache
)
return Llama(
model_path=model_path, # Now a valid path string
n_ctx=2048, # Context window size
n_threads=4, # CPU threads for faster inference
verbose=False # Disable debug logs
)
# Load model at startup
llm = load_model()
# Generation function with parameters
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
"""Generate text using GGUF model with parameter control"""
try:
output = llm(
prompt=prompt,
max_tokens=max_tokens,
temperature=temp,
top_p=top_p,
echo=False # Don't repeat input in output
)
return output["choices"][0]["text"]
except Exception as e:
return f"Error generating text: {str(e)}"
# UI Components
with gr.Blocks(theme="soft") as demo:
gr.Markdown("""
# 🧠 GPT2 Text Generator (GGUF Version)
Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
""")
with gr.Row():
with gr.Column():
# Input components
prompt = gr.Textbox(
label="Input Prompt",
placeholder="Enter your prompt here...",
lines=5
)
max_tokens = gr.Slider(
minimum=50,
maximum=500,
value=200,
step=50,
label="Max Output Length"
)
temp = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Creativity (Temperature)"
)
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p Sampling"
)
with gr.Column():
# Output and button
output = gr.Textbox(label="Generated Text", lines=10)
generate_btn = gr.Button("🚀 Generate", variant="primary")
# Event handler
generate_btn.click(
fn=generate_text,
inputs=[prompt, max_tokens, temp, top_p],
outputs=output
)
# Launch app
demo.launch()