XZtar

Sleeping

XZtar / app.py

Update app.py

0672ed5 verified 5 months ago

2.93 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download
	import os

	# Model configuration
	MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF"
	MODEL_FILE = "qwen3-0.6b-q5_0.gguf"
	CACHE_DIR = "./model_cache"
	MAX_TOKENS = 200

	# Initialize model (loads once at startup)
	def load_model():
	"""Download and load GGUF model with proper path handling"""
	os.makedirs(CACHE_DIR, exist_ok=True)

	# Download model if not cached
	model_path = hf_hub_download(
	repo_id=MODEL_REPO,
	filename=MODEL_FILE,
	cache_dir=CACHE_DIR,
	force_download=False # Set to True to bypass cache
	)

	return Llama(
	model_path=model_path, # Now a valid path string
	n_ctx=2048, # Context window size
	n_threads=4, # CPU threads for faster inference
	verbose=False # Disable debug logs
	)

	# Load model at startup
	llm = load_model()

	# Generation function with parameters
	def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
	"""Generate text using GGUF model with parameter control"""
	try:
	output = llm(
	prompt=prompt,
	max_tokens=max_tokens,
	temperature=temp,
	top_p=top_p,
	echo=False # Don't repeat input in output
	)
	return output["choices"][0]["text"]
	except Exception as e:
	return f"Error generating text: {str(e)}"

	# UI Components
	with gr.Blocks(theme="soft") as demo:
	gr.Markdown("""
	# 🧠 GPT2 Text Generator (GGUF Version)
	Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
	""")

	with gr.Row():
	with gr.Column():
	# Input components
	prompt = gr.Textbox(
	label="Input Prompt",
	placeholder="Enter your prompt here...",
	lines=5
	)
	max_tokens = gr.Slider(
	minimum=50,
	maximum=500,
	value=200,
	step=50,
	label="Max Output Length"
	)
	temp = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Creativity (Temperature)"
	)
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p Sampling"
	)

	with gr.Column():
	# Output and button
	output = gr.Textbox(label="Generated Text", lines=10)
	generate_btn = gr.Button("🚀 Generate", variant="primary")

	# Event handler
	generate_btn.click(
	fn=generate_text,
	inputs=[prompt, max_tokens, temp, top_p],
	outputs=output
	)

	# Launch app
	demo.launch()