Spaces:
Sleeping
Sleeping
import os | |
import torch | |
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
from peft import PeftModel | |
# Hugging Face Token from Space Secrets | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
# Model IDs | |
BASE_MODEL = "google/gemma-3-1b-it" | |
LORA_ADAPTER = "markredito/gemma-pip-finetuned-v2" # π Replace with your actual LoRA repo | |
# Check device | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# Quantization config for 4-bit (recommended on T4 GPU) | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_use_double_quant=True, | |
) | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, token=HF_TOKEN) | |
model = AutoModelForCausalLM.from_pretrained( | |
BASE_MODEL, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, | |
quantization_config=bnb_config, | |
token=HF_TOKEN, | |
attn_implementation="eager" # Required for Gemma3 + quant | |
) | |
model = PeftModel.from_pretrained(model, LORA_ADAPTER, token=HF_TOKEN) | |
# Pad token fallback | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
tokenizer.padding_side = "right" | |
# Generation function | |
def generate_response(prompt, temperature, top_p, top_k): | |
formatted = ( | |
"<start_of_turn>user\n" | |
f"{prompt.strip()}\n" | |
"<end_of_turn>\n" | |
"<start_of_turn>model\n" | |
) | |
inputs = tokenizer(formatted, return_tensors="pt").to(model.device) | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=200, | |
do_sample=True, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
pad_token_id=tokenizer.pad_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) | |
cleaned = decoded.split("<end_of_turn>")[0].replace("model\n", "").strip() | |
return cleaned | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("## β¨ Gemma Psychedelic Model Demo") | |
gr.Markdown("Use your imagination or try one of the examples below to explore poetic and philosophical responses.") | |
gr.Markdown("Note: this model intentionally hallucinates.") | |
examples = [ | |
"Describe a world where clouds are solid and people walk on them", | |
"Contrast quantum realities phenomena from the perspective of a starship navigator, using a spiral into infinity.", | |
"Dream up futuristic phenomena from the perspective of a timeless oracle, using a fractal blooming in chaos.", | |
] | |
with gr.Row(): | |
with gr.Column(): | |
prompt_input = gr.Textbox(label="Enter your prompt", lines=4, placeholder="Try something like: What if gravity took a day off?") | |
gr.Examples( | |
examples=examples, | |
inputs=prompt_input, | |
label="Example Prompts" | |
) | |
temperature = gr.Slider(0.1, 1.5, value=0.7, label="Temperature") | |
top_p = gr.Slider(0.1, 1.0, value=0.9, label="Top-p (nucleus sampling)") | |
top_k = gr.Slider(0, 100, step=1, value=50, label="Top-k") | |
submit = gr.Button("Generate") | |
with gr.Column(): | |
output = gr.Textbox(label="Model Response", lines=10) | |
submit.click(fn=generate_response, inputs=[prompt_input, temperature, top_p, top_k], outputs=output) | |
demo.launch() | |