|
import gradio as gr |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
import os |
|
|
|
|
|
MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF" |
|
MODEL_FILE = "qwen3-0.6b-q5_0.gguf" |
|
CACHE_DIR = "./model_cache" |
|
MAX_TOKENS = 200 |
|
|
|
|
|
def load_model(): |
|
"""Download and load GGUF model with proper path handling""" |
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
|
|
|
model_path = hf_hub_download( |
|
repo_id=MODEL_REPO, |
|
filename=MODEL_FILE, |
|
cache_dir=CACHE_DIR, |
|
force_download=False |
|
) |
|
|
|
return Llama( |
|
model_path=model_path, |
|
n_ctx=2048, |
|
n_threads=4, |
|
verbose=False |
|
) |
|
|
|
|
|
llm = load_model() |
|
|
|
|
|
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95): |
|
"""Generate text using GGUF model with parameter control""" |
|
try: |
|
output = llm( |
|
prompt=prompt, |
|
max_tokens=max_tokens, |
|
temperature=temp, |
|
top_p=top_p, |
|
echo=False |
|
) |
|
return output["choices"][0]["text"] |
|
except Exception as e: |
|
return f"Error generating text: {str(e)}" |
|
|
|
|
|
with gr.Blocks(theme="soft") as demo: |
|
gr.Markdown(""" |
|
# 🧠 GPT2 Text Generator (GGUF Version) |
|
Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
prompt = gr.Textbox( |
|
label="Input Prompt", |
|
placeholder="Enter your prompt here...", |
|
lines=5 |
|
) |
|
max_tokens = gr.Slider( |
|
minimum=50, |
|
maximum=500, |
|
value=200, |
|
step=50, |
|
label="Max Output Length" |
|
) |
|
temp = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Creativity (Temperature)" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p Sampling" |
|
) |
|
|
|
with gr.Column(): |
|
|
|
output = gr.Textbox(label="Generated Text", lines=10) |
|
generate_btn = gr.Button("🚀 Generate", variant="primary") |
|
|
|
|
|
generate_btn.click( |
|
fn=generate_text, |
|
inputs=[prompt, max_tokens, temp, top_p], |
|
outputs=output |
|
) |
|
|
|
|
|
demo.launch() |