Spaces:

Muhammadidrees
/

JAYmedalpaca

Runtime error

App Files Files Community

JAYmedalpaca / app.py

Muhammadidrees

Update app.py

a1208eb verified 2 months ago

raw

history blame contribute delete

5.63 kB

	import gradio as gr
	import torch
	import json
	import threading
	import os
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextIteratorStreamer

	# ============================================================
	# MODEL SETUP
	# ============================================================
	MODEL_ID = "augtoma/qCammel-13"

	# 4-bit quantization (saves GPU memory)
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_use_double_quant=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16
	)

	device = "cuda" if torch.cuda.is_available() else "cpu"

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	quantization_config=bnb_config,
	device_map="auto",
	trust_remote_code=True
	)

	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token

	# ============================================================
	# MEMORY HANDLING
	# ============================================================
	MEMORY_FILE = "chat_memory.json"

	# Load or initialize chat memory
	if os.path.exists(MEMORY_FILE):
	with open(MEMORY_FILE, "r") as f:
	try:
	chat_memory = json.load(f)
	except json.JSONDecodeError:
	chat_memory = []
	else:
	chat_memory = []

	def save_memory(history):
	"""Save chat history persistently."""
	with open(MEMORY_FILE, "w") as f:
	json.dump(history, f, indent=2)

	# ============================================================
	# SYSTEM PROMPT (doctor personality)
	# ============================================================
	SYSTEM_PROMPT = (
	"You are Dr. Camel, a professional, empathetic, and helpful medical doctor. "
	"You will respond only when the patient speaks. "
	"Never start the conversation by yourself. "
	"Always reply as 'Doctor:' and never simulate the patient's responses. "
	"Your tone should be calm, supportive, and medically informative. "
	"If symptoms seem serious, politely suggest seeing a healthcare professional."
	)

	# ============================================================
	# CONVERSATION PROMPT BUILDER
	# ============================================================
	def build_conversation_prompt(history):
	"""Builds a memory-aware prompt (doctor only replies after patient)."""
	conversation = SYSTEM_PROMPT + "\n\n"
	for turn in history[-6:]:
	if turn["role"] == "user":
	conversation += f"Patient: {turn['content'].strip()}\n"
	elif turn["role"] == "assistant":
	conversation += f"Doctor: {turn['content'].strip()}\n"
	conversation += "Doctor:"
	return conversation

	# ============================================================
	# TEXT GENERATION (STREAMING)
	# ============================================================
	def generate_stream(history, max_new_tokens=512):
	prompt = build_conversation_prompt(history)
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
	generation_kwargs = dict(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=max_new_tokens,
	repetition_penalty=1.05,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	streamer=streamer,
	pad_token_id=tokenizer.eos_token_id
	)

	thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	partial = ""
	for new_text in streamer:
	partial += new_text
	yield partial

	# ============================================================
	# RESPONSE LOGIC
	# ============================================================
	def respond(user_message, history):
	if not user_message.strip():
	return gr.update(), history

	# Prevent the bot from talking first
	if len(history) == 0 and "Doctor" in user_message:
	return gr.update(), history

	history.append({"role": "user", "content": user_message})
	partial = ""

	for partial in generate_stream(history):
	yield history + [{"role": "assistant", "content": partial}], history

	history.append({"role": "assistant", "content": partial})
	save_memory(history)
	yield history, history

	def clear_chat():
	global chat_memory
	chat_memory = []
	save_memory(chat_memory)
	return [], []

	# ============================================================
	# GRADIO UI
	# ============================================================
	with gr.Blocks(title="🩺 Dr. Camel — Medical Chatbot", css=".footer {display:none;}") as demo:
	gr.Markdown(
	"""
	# 🩺 Dr. Camel — AI Medical Assistant
	Ask about your symptoms or medical concerns, and Dr. Camel will respond with care and clarity.
	(For demo purposes only — not real medical advice.)
	"""
	)

	chatbot = gr.Chatbot(type="messages", elem_id="chatbot", height=520, value=chat_memory)
	with gr.Row():
	txt = gr.Textbox(show_label=False, placeholder="Describe your symptoms or ask a question...", lines=2)
	clear = gr.Button("🧹 Clear Chat")

	state = gr.State(chat_memory)

	txt.submit(respond, [txt, state], [chatbot, state])
	clear.click(clear_chat, None, [chatbot, state])

	gr.Markdown(
	"### ⚠️ Disclaimer: This chatbot does not replace a real medical consultation. "
	"Always seek professional medical help for health emergencies."
	)

	demo.queue()
	demo.launch(server_name="0.0.0.0", server_port=7860)