hikewa's picture
Upload app.py with huggingface_hub
d383e50 verified
"""Dialectic Reasoning Chatbot — Gradio Space with ZeroGPU."""
import gc
import spaces
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
MODELS = {
"Qwen3-8B (recommended)": {
"base": "Qwen/Qwen3-8B",
"adapter": "hikewa/dialectic-qwen3-8b-lora",
},
"Qwen2.5-1.5B": {
"base": "Qwen/Qwen2.5-1.5B-Instruct",
"adapter": "hikewa/dialectic-qwen2.5-1.5b-lora",
},
}
DEFAULT_MODEL = "Qwen3-8B (recommended)"
SYSTEM_PROMPT = (
"You reason carefully through problems by considering competing "
"perspectives before reaching a conclusion. You identify genuine "
"tensions, engage with the strongest form of each argument, and "
"integrate insights rather than picking sides or hedging."
)
loaded = {"name": None, "model": None, "tokenizer": None}
def load_model(model_name):
global loaded
if loaded["name"] == model_name:
return loaded["model"], loaded["tokenizer"]
# Free previous model
if loaded["model"] is not None:
del loaded["model"]
loaded["model"] = None
gc.collect()
torch.cuda.empty_cache()
cfg = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
cfg["adapter"], trust_remote_code=True
)
base = AutoModelForCausalLM.from_pretrained(
cfg["base"], torch_dtype=torch.float16, trust_remote_code=True
)
model = PeftModel.from_pretrained(base, cfg["adapter"])
model = model.to("cuda")
model.eval()
loaded["name"] = model_name
loaded["model"] = model
loaded["tokenizer"] = tokenizer
return model, tokenizer
@spaces.GPU
def respond(message, history, model_name):
model, tokenizer = load_model(model_name)
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for msg in history:
if isinstance(msg, dict):
messages.append(msg)
elif isinstance(msg, (list, tuple)) and len(msg) == 2:
messages.append({"role": "user", "content": msg[0]})
messages.append({"role": "assistant", "content": msg[1]})
messages.append({"role": "user", "content": message})
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt")
inputs = {k: v.to("cuda") for k, v in inputs.items()}
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
do_sample=True,
repetition_penalty=1.1,
pad_token_id=tokenizer.pad_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
response = tokenizer.decode(generated, skip_special_tokens=True).strip()
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Dropdown(
choices=list(MODELS.keys()),
value=DEFAULT_MODEL,
label="Model",
),
],
title="Dialectic Reasoning",
description=(
"Fine-tuned on 510 dialectic reasoning traces. "
"Ask a question involving competing perspectives."
),
examples=[
["Should AI systems be transparent about their reasoning, even when transparency reduces performance?"],
["Is it better to optimize for individual freedom or collective wellbeing?"],
["When does pragmatic compromise become unprincipled capitulation?"],
],
cache_examples=False,
)
if __name__ == "__main__":
demo.launch(ssr_mode=False)