Spaces:

anabury
/

CHAT_BOX

Runtime error

App Files Files Community

CHAT_BOX / app.py

anabury

Update app.py

9a972c0 verified about 1 month ago

raw

history blame contribute delete

1.86 kB

	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from peft import PeftModel

	# Your adapter (LoRA fine-tuned model on Hugging Face)
	ADAPTER_ID = "Anabury/My_Finetuned_Phi-4"

	# Detect device
	USE_GPU = torch.cuda.is_available()

	# Pick base model depending on device
	if USE_GPU:
	BASE_MODEL = "unsloth/phi-4-unsloth-bnb-4bit" # fast + quantized
	else:
	BASE_MODEL = "unsloth/phi-4" # full precision for CPU

	print(f"Loading base model: {BASE_MODEL} on {'GPU' if USE_GPU else 'CPU'}")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)

	# Load base model
	base = AutoModelForCausalLM.from_pretrained(
	BASE_MODEL,
	device_map="auto" if USE_GPU else None,
	torch_dtype=torch.float16 if USE_GPU else torch.float32,
	trust_remote_code=True
	)

	# Attach your LoRA adapter
	model = PeftModel.from_pretrained(base, ADAPTER_ID)
	model.eval()

	# Chat function
	def chat(message, history):
	# simple prompt, you can swap in chat template later
	inputs = tokenizer(message, return_tensors="pt").to(model.device)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id,
	)
	reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
	history.append((message, reply))
	return history, history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# 🧠 Phi-4 Chatbot (Fine-tuned)")
	chatbot = gr.Chatbot(height=420)
	msg = gr.Textbox(placeholder="Ask me anything…")
	clear = gr.Button("Clear")

	msg.submit(chat, [msg, chatbot], [chatbot, chatbot])
	clear.click(lambda: [], None, chatbot, queue=False)

	demo.launch()