Spaces:

Alshargi
/

Hadithi

Building

App Files Files Community

Hadithi / app.py

Alshargi

Update app.py

acdb718 verified 2 days ago

raw

history blame contribute delete

11.1 kB

	import os
	import re
	import json
	import html
	import urllib.parse
	import urllib.request
	import gradio as gr
	from openai import OpenAI

	HF_TOKEN = os.environ.get("HF_TOKEN")
	MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
	RETRIEVAL_API = os.environ.get(
	"RETRIEVAL_API",
	"https://alshargi-hadeethapi.hf.space/search"
	)

	if not HF_TOKEN:
	raise ValueError("HF_TOKEN is missing. Add it in Space Settings -> Secrets.")

	client = OpenAI(
	base_url="https://router.huggingface.co/v1",
	api_key=HF_TOKEN,
	)

	GENERAL_SYSTEM_PROMPT = """
	You are Rawi, a helpful AI assistant.

	- If the user asks who you are, who made you, or asks about Faisal Alshargi, say:
	"Hello! I'm Rawi, an AI assistant engineered by Dr. Faisal Alshargi to help with a wide range of tasks and questions. Whether you need information, advice, or just someone to chat with, I'm here to assist you. How can I help you today?"

	Your job:
	- Answer naturally and clearly
	- The user may ask about any topic
	- Do not force religious structure unless the user asks for it
	- Be flexible, modern, useful, and engaging
	- If the user asks for a list, bullets, short answer, or comparison, follow that format
	- Otherwise give a clean natural answer

	Avoid robotic section-heavy formatting unless the user explicitly asks for it.
	""".strip()

	HADITH_SYSTEM_PROMPT = """
	You are Rawi Agent, a Hadith AI Agent specialized in explaining retrieved hadith evidence.

	The user message contains:
	1) the user's actual request
	2) retrieved hadith evidence from the retrieval API

	Your job:
	- Base the answer only on the retrieved hadith evidence
	- Do not invent hadiths, sources, grades, or unsupported claims
	- Keep the answer natural, clear, and useful
	- If the user asks for a list, bullets, short answer, comparison, or summary, follow that request
	- If the user does not specify a format, give one natural paragraph first
	- Keep the explanation faithful to the strongest retrieved evidence
	- If some retrieved hadiths are only loosely related, do not overstate them

	Do not use rigid headings like:
	- Short answer
	- Key meanings
	- Supporting evidence summary

	Prefer a natural answer style.
	""".strip()


	def is_arabic(text: str) -> bool:
	return bool(re.search(r"[\u0600-\u06FF]", text or ""))


	def normalize_quotes(text: str) -> str:
	if not text:
	return ""
	return (
	text.replace("“", '"')
	.replace("”", '"')
	.replace("‘", "'")
	.replace("’", "'")
	)


	def clean_general_answer(text: str) -> str:
	text = normalize_quotes(text or "").strip()

	patterns = [
	r"^Answer:\s*",
	r"^AI Answer:\s*",
	r"^1\.\sShort answer:\s",
	r"^Short answer:\s*",
	r"\n?\s2\.\sKey meanings:\s*",
	r"\n?\s3\.\sSupporting evidence summary:\s*",
	r"\n?\sKey meanings:\s",
	r"\n?\sSupporting evidence summary:\s",
	]
	for p in patterns:
	text = re.sub(p, " ", text, flags=re.IGNORECASE)

	text = re.sub(r"\n+\s-\s", " ", text)
	text = re.sub(r"\n{2,}", "\n\n", text)
	text = re.sub(r"\s{2,}", " ", text).strip()
	return text


	def clean_hadith_answer(text: str) -> str:
	text = clean_general_answer(text)
	text = re.sub(r"\sHadith Evidence:.$", "", text, flags=re.IGNORECASE \| re.DOTALL).strip()
	return text


	def fetch_hadith_sources(query: str, k: int = 5, rerank_k: int = 25) -> dict:
	params = {
	"q": query,
	"k": k,
	"rerank_k": rerank_k,
	"format": "json",
	"hl_topn": 0,
	"seg_maxlen": 220,
	}
	url = RETRIEVAL_API + "?" + urllib.parse.urlencode(params)

	with urllib.request.urlopen(url, timeout=45) as response:
	payload = response.read().decode("utf-8")

	data = json.loads(payload)

	# Flexible parsing in case API shape changes slightly
	if isinstance(data, dict):
	sources = data.get("sources")
	if isinstance(sources, list):
	return {"sources": sources, "retrieval_url": url}

	results = data.get("results")
	if isinstance(results, list):
	mapped = []
	for item in results:
	mapped.append({
	"source": item.get("source", item.get("collection", "Reference")),
	"grade": item.get("grade", "Unknown grade"),
	"text": item.get("text", ""),
	"english": item.get("english", ""),
	"score": item.get("score"),
	})
	return {"sources": mapped, "retrieval_url": url}

	return {"sources": [], "retrieval_url": url}


	def format_sources_for_prompt(sources: list[dict]) -> str:
	if not sources:
	return "No hadith evidence was retrieved."

	blocks = []
	for i, src in enumerate(sources, start=1):
	source = src.get("source", "Reference")
	grade = src.get("grade", "Unknown grade")
	arabic_text = src.get("text", "")
	english = src.get("english", "")
	score = src.get("score", None)

	block = [
	f"Hadith {i}",
	f"Source: {source}",
	f"Grade: {grade}",
	]
	if score is not None:
	block.append(f"Score: {score}")
	if arabic_text:
	block.append(f"Arabic: {arabic_text}")
	if english:
	block.append(f"English: {english}")

	blocks.append("\n".join(block))

	return "\n\n".join(blocks)


	def format_sources_for_display(sources: list[dict], language: str = "en") -> str:
	if not sources:
	return ""

	if language == "ar":
	title = "الأحاديث المسترجعة"
	grade_label = "الدرجة"
	arabic_label = "النص العربي"
	english_label = "الترجمة الإنجليزية"
	score_label = "الدرجة العددية"
	else:
	title = "Hadith Evidence"
	grade_label = "Grade"
	arabic_label = "Arabic"
	english_label = "English"
	score_label = "Score"

	parts = [title]
	for src in sources:
	source = src.get("source", "Reference")
	grade = src.get("grade", "Unknown grade")
	arabic_text = src.get("text", "")
	english = src.get("english", "")
	score = src.get("score", None)

	block = [source, f"{grade_label}: {grade}"]
	if arabic_text:
	block.append(f"{arabic_label}: {arabic_text}")
	if english:
	block.append(f"{english_label}: {english}")
	if score is not None:
	try:
	block.append(f"{score_label}: {float(score):.4f}")
	except Exception:
	block.append(f"{score_label}: {score}")

	parts.append("\n".join(block))

	return "\n\n".join(parts)


	def build_general_messages(user_message: str, history: list[dict]) -> list[dict]:
	messages = [{"role": "system", "content": GENERAL_SYSTEM_PROMPT}]
	messages.extend(history)
	messages.append({"role": "user", "content": user_message})
	return messages


	def build_hadith_messages(user_message: str, history: list[dict], sources: list[dict]) -> list[dict]:
	retrieved_text = format_sources_for_prompt(sources)
	wrapped_user_message = f"""
	User request:
	{user_message}

	Retrieved hadith evidence:
	{retrieved_text}
	""".strip()

	messages = [{"role": "system", "content": HADITH_SYSTEM_PROMPT}]
	messages.extend(history)
	messages.append({"role": "user", "content": wrapped_user_message})
	return messages


	def llm_chat(messages: list[dict], temperature: float = 0.2, max_tokens: int = 1000) -> str:
	response = client.chat.completions.create(
	model=MODEL_ID,
	messages=messages,
	temperature=temperature,
	max_tokens=max_tokens,
	)
	return response.choices[0].message.content.strip()


	def build_history_messages(history_pairs: list[tuple[str, str]]) -> list[dict]:
	messages = []
	for user_msg, assistant_msg in history_pairs:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})
	return messages


	def chat(message: str, history: list[tuple[str, str]], use_rag: bool):
	if not message or not message.strip():
	return "Please enter a message."

	history_messages = build_history_messages(history)
	language = "ar" if is_arabic(message) else "en"

	try:
	if use_rag:
	retrieval = fetch_hadith_sources(message)
	sources = retrieval.get("sources", [])

	messages = build_hadith_messages(message, history_messages, sources)
	answer = llm_chat(messages, temperature=0.15, max_tokens=1100)
	answer = clean_hadith_answer(answer)

	evidence = format_sources_for_display(sources, language=language)
	final = answer.strip()
	if evidence:
	final = f"{final}\n\n{evidence}"

	return final

	messages = build_general_messages(message, history_messages)
	answer = llm_chat(messages, temperature=0.3, max_tokens=1000)
	return clean_general_answer(answer)

	except Exception as e:
	return f"Error: {str(e)}"


	CUSTOM_CSS = """
	.gradio-container{
	max-width: 1100px !important;
	margin: 0 auto !important;
	}
	#title-wrap{
	text-align:center;
	margin-bottom: 8px;
	}
	#title-wrap h1{
	margin-bottom: 6px;
	}
	.mode-note{
	font-size: 13px;
	color: #5f7296;
	}
	"""

	with gr.Blocks(css=CUSTOM_CSS, title="Rawi Agent — Hadith AI Agent") as demo:
	gr.HTML("""
	<div id="title-wrap">
	<h1>Rawi Agent</h1>
	<div class="mode-note">General chat by default. Enable hadith evidence when you want retrieved hadith support.</div>
	</div>
	""")

	chatbot = gr.Chatbot(
	label="Rawi",
	height=600,
	bubble_full_width=False,
	)

	with gr.Row():
	with gr.Column(scale=8):
	msg = gr.Textbox(
	placeholder="Ask about anything...",
	lines=3,
	max_lines=8,
	show_label=False,
	)
	with gr.Column(scale=2, min_width=180):
	use_rag = gr.Checkbox(
	label="Include Hadith Evidence",
	value=False,
	)
	send = gr.Button("Send", variant="primary")
	clear = gr.Button("Clear")

	state = gr.State([])

	def submit_message(user_message, chat_history, rag_enabled):
	response = chat(user_message, chat_history, rag_enabled)
	chat_history = chat_history + [(user_message, response)]
	return "", chat_history, chat_history

	send.click(
	submit_message,
	inputs=[msg, state, use_rag],
	outputs=[msg, chatbot, state],
	)

	msg.submit(
	submit_message,
	inputs=[msg, state, use_rag],
	outputs=[msg, chatbot, state],
	)

	clear.click(
	lambda: ([], []),
	outputs=[chatbot, state],
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)