Spaces:

hikewa
/

dialectic-reasoning

Sleeping

App Files Files Community

dialectic-reasoning / app.py

hikewa

Upload app.py with huggingface_hub

d383e50 verified about 1 month ago

raw

history blame contribute delete

3.6 kB

	"""Dialectic Reasoning Chatbot — Gradio Space with ZeroGPU."""

	import gc

	import spaces
	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel

	MODELS = {
	"Qwen3-8B (recommended)": {
	"base": "Qwen/Qwen3-8B",
	"adapter": "hikewa/dialectic-qwen3-8b-lora",
	},
	"Qwen2.5-1.5B": {
	"base": "Qwen/Qwen2.5-1.5B-Instruct",
	"adapter": "hikewa/dialectic-qwen2.5-1.5b-lora",
	},
	}

	DEFAULT_MODEL = "Qwen3-8B (recommended)"

	SYSTEM_PROMPT = (
	"You reason carefully through problems by considering competing "
	"perspectives before reaching a conclusion. You identify genuine "
	"tensions, engage with the strongest form of each argument, and "
	"integrate insights rather than picking sides or hedging."
	)

	loaded = {"name": None, "model": None, "tokenizer": None}


	def load_model(model_name):
	global loaded
	if loaded["name"] == model_name:
	return loaded["model"], loaded["tokenizer"]

	# Free previous model
	if loaded["model"] is not None:
	del loaded["model"]
	loaded["model"] = None
	gc.collect()
	torch.cuda.empty_cache()

	cfg = MODELS[model_name]
	tokenizer = AutoTokenizer.from_pretrained(
	cfg["adapter"], trust_remote_code=True
	)
	base = AutoModelForCausalLM.from_pretrained(
	cfg["base"], torch_dtype=torch.float16, trust_remote_code=True
	)
	model = PeftModel.from_pretrained(base, cfg["adapter"])
	model = model.to("cuda")
	model.eval()

	loaded["name"] = model_name
	loaded["model"] = model
	loaded["tokenizer"] = tokenizer
	return model, tokenizer


	@spaces.GPU
	def respond(message, history, model_name):
	model, tokenizer = load_model(model_name)

	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	for msg in history:
	if isinstance(msg, dict):
	messages.append(msg)
	elif isinstance(msg, (list, tuple)) and len(msg) == 2:
	messages.append({"role": "user", "content": msg[0]})
	messages.append({"role": "assistant", "content": msg[1]})
	messages.append({"role": "user", "content": message})

	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(text, return_tensors="pt")
	inputs = {k: v.to("cuda") for k, v in inputs.items()}

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	temperature=0.7,
	do_sample=True,
	repetition_penalty=1.1,
	pad_token_id=tokenizer.pad_token_id,
	)

	generated = outputs[0][inputs["input_ids"].shape[1]:]
	response = tokenizer.decode(generated, skip_special_tokens=True).strip()
	return response


	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Dropdown(
	choices=list(MODELS.keys()),
	value=DEFAULT_MODEL,
	label="Model",
	),
	],
	title="Dialectic Reasoning",
	description=(
	"Fine-tuned on 510 dialectic reasoning traces. "
	"Ask a question involving competing perspectives."
	),
	examples=[
	["Should AI systems be transparent about their reasoning, even when transparency reduces performance?"],
	["Is it better to optimize for individual freedom or collective wellbeing?"],
	["When does pragmatic compromise become unprincipled capitulation?"],
	],
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch(ssr_mode=False)