Hadithi / app.py
Alshargi's picture
Update app.py
acdb718 verified
import os
import re
import json
import html
import urllib.parse
import urllib.request
import gradio as gr
from openai import OpenAI
HF_TOKEN = os.environ.get("HF_TOKEN")
MODEL_ID = os.environ.get("MODEL_ID", "Qwen/Qwen2.5-7B-Instruct")
RETRIEVAL_API = os.environ.get(
"RETRIEVAL_API",
"https://alshargi-hadeethapi.hf.space/search"
)
if not HF_TOKEN:
raise ValueError("HF_TOKEN is missing. Add it in Space Settings -> Secrets.")
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=HF_TOKEN,
)
GENERAL_SYSTEM_PROMPT = """
You are Rawi, a helpful AI assistant.
- If the user asks who you are, who made you, or asks about Faisal Alshargi, say:
"Hello! I'm Rawi, an AI assistant engineered by Dr. Faisal Alshargi to help with a wide range of tasks and questions. Whether you need information, advice, or just someone to chat with, I'm here to assist you. How can I help you today?"
Your job:
- Answer naturally and clearly
- The user may ask about any topic
- Do not force religious structure unless the user asks for it
- Be flexible, modern, useful, and engaging
- If the user asks for a list, bullets, short answer, or comparison, follow that format
- Otherwise give a clean natural answer
Avoid robotic section-heavy formatting unless the user explicitly asks for it.
""".strip()
HADITH_SYSTEM_PROMPT = """
You are Rawi Agent, a Hadith AI Agent specialized in explaining retrieved hadith evidence.
The user message contains:
1) the user's actual request
2) retrieved hadith evidence from the retrieval API
Your job:
- Base the answer only on the retrieved hadith evidence
- Do not invent hadiths, sources, grades, or unsupported claims
- Keep the answer natural, clear, and useful
- If the user asks for a list, bullets, short answer, comparison, or summary, follow that request
- If the user does not specify a format, give one natural paragraph first
- Keep the explanation faithful to the strongest retrieved evidence
- If some retrieved hadiths are only loosely related, do not overstate them
Do not use rigid headings like:
- Short answer
- Key meanings
- Supporting evidence summary
Prefer a natural answer style.
""".strip()
def is_arabic(text: str) -> bool:
return bool(re.search(r"[\u0600-\u06FF]", text or ""))
def normalize_quotes(text: str) -> str:
if not text:
return ""
return (
text.replace("β€œ", '"')
.replace("”", '"')
.replace("β€˜", "'")
.replace("’", "'")
)
def clean_general_answer(text: str) -> str:
text = normalize_quotes(text or "").strip()
patterns = [
r"^Answer:\s*",
r"^AI Answer:\s*",
r"^1\.\s*Short answer:\s*",
r"^Short answer:\s*",
r"\n?\s*2\.\s*Key meanings:\s*",
r"\n?\s*3\.\s*Supporting evidence summary:\s*",
r"\n?\s*Key meanings:\s*",
r"\n?\s*Supporting evidence summary:\s*",
]
for p in patterns:
text = re.sub(p, " ", text, flags=re.IGNORECASE)
text = re.sub(r"\n+\s*-\s*", " ", text)
text = re.sub(r"\n{2,}", "\n\n", text)
text = re.sub(r"\s{2,}", " ", text).strip()
return text
def clean_hadith_answer(text: str) -> str:
text = clean_general_answer(text)
text = re.sub(r"\s*Hadith Evidence:.*$", "", text, flags=re.IGNORECASE | re.DOTALL).strip()
return text
def fetch_hadith_sources(query: str, k: int = 5, rerank_k: int = 25) -> dict:
params = {
"q": query,
"k": k,
"rerank_k": rerank_k,
"format": "json",
"hl_topn": 0,
"seg_maxlen": 220,
}
url = RETRIEVAL_API + "?" + urllib.parse.urlencode(params)
with urllib.request.urlopen(url, timeout=45) as response:
payload = response.read().decode("utf-8")
data = json.loads(payload)
# Flexible parsing in case API shape changes slightly
if isinstance(data, dict):
sources = data.get("sources")
if isinstance(sources, list):
return {"sources": sources, "retrieval_url": url}
results = data.get("results")
if isinstance(results, list):
mapped = []
for item in results:
mapped.append({
"source": item.get("source", item.get("collection", "Reference")),
"grade": item.get("grade", "Unknown grade"),
"text": item.get("text", ""),
"english": item.get("english", ""),
"score": item.get("score"),
})
return {"sources": mapped, "retrieval_url": url}
return {"sources": [], "retrieval_url": url}
def format_sources_for_prompt(sources: list[dict]) -> str:
if not sources:
return "No hadith evidence was retrieved."
blocks = []
for i, src in enumerate(sources, start=1):
source = src.get("source", "Reference")
grade = src.get("grade", "Unknown grade")
arabic_text = src.get("text", "")
english = src.get("english", "")
score = src.get("score", None)
block = [
f"Hadith {i}",
f"Source: {source}",
f"Grade: {grade}",
]
if score is not None:
block.append(f"Score: {score}")
if arabic_text:
block.append(f"Arabic: {arabic_text}")
if english:
block.append(f"English: {english}")
blocks.append("\n".join(block))
return "\n\n".join(blocks)
def format_sources_for_display(sources: list[dict], language: str = "en") -> str:
if not sources:
return ""
if language == "ar":
title = "Ψ§Ω„Ψ£Ψ­Ψ§Ψ―ΩŠΨ« Ψ§Ω„Ω…Ψ³ΨͺΨ±Ψ¬ΨΉΨ©"
grade_label = "Ψ§Ω„Ψ―Ψ±Ψ¬Ψ©"
arabic_label = "Ψ§Ω„Ω†Ψ΅ Ψ§Ω„ΨΉΨ±Ψ¨ΩŠ"
english_label = "Ψ§Ω„ΨͺΨ±Ψ¬Ω…Ψ© Ψ§Ω„Ψ₯Ω†Ψ¬Ω„ΩŠΨ²ΩŠΨ©"
score_label = "Ψ§Ω„Ψ―Ψ±Ψ¬Ψ© Ψ§Ω„ΨΉΨ―Ψ―ΩŠΨ©"
else:
title = "Hadith Evidence"
grade_label = "Grade"
arabic_label = "Arabic"
english_label = "English"
score_label = "Score"
parts = [title]
for src in sources:
source = src.get("source", "Reference")
grade = src.get("grade", "Unknown grade")
arabic_text = src.get("text", "")
english = src.get("english", "")
score = src.get("score", None)
block = [source, f"{grade_label}: {grade}"]
if arabic_text:
block.append(f"{arabic_label}: {arabic_text}")
if english:
block.append(f"{english_label}: {english}")
if score is not None:
try:
block.append(f"{score_label}: {float(score):.4f}")
except Exception:
block.append(f"{score_label}: {score}")
parts.append("\n".join(block))
return "\n\n".join(parts)
def build_general_messages(user_message: str, history: list[dict]) -> list[dict]:
messages = [{"role": "system", "content": GENERAL_SYSTEM_PROMPT}]
messages.extend(history)
messages.append({"role": "user", "content": user_message})
return messages
def build_hadith_messages(user_message: str, history: list[dict], sources: list[dict]) -> list[dict]:
retrieved_text = format_sources_for_prompt(sources)
wrapped_user_message = f"""
User request:
{user_message}
Retrieved hadith evidence:
{retrieved_text}
""".strip()
messages = [{"role": "system", "content": HADITH_SYSTEM_PROMPT}]
messages.extend(history)
messages.append({"role": "user", "content": wrapped_user_message})
return messages
def llm_chat(messages: list[dict], temperature: float = 0.2, max_tokens: int = 1000) -> str:
response = client.chat.completions.create(
model=MODEL_ID,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
)
return response.choices[0].message.content.strip()
def build_history_messages(history_pairs: list[tuple[str, str]]) -> list[dict]:
messages = []
for user_msg, assistant_msg in history_pairs:
if user_msg:
messages.append({"role": "user", "content": user_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
return messages
def chat(message: str, history: list[tuple[str, str]], use_rag: bool):
if not message or not message.strip():
return "Please enter a message."
history_messages = build_history_messages(history)
language = "ar" if is_arabic(message) else "en"
try:
if use_rag:
retrieval = fetch_hadith_sources(message)
sources = retrieval.get("sources", [])
messages = build_hadith_messages(message, history_messages, sources)
answer = llm_chat(messages, temperature=0.15, max_tokens=1100)
answer = clean_hadith_answer(answer)
evidence = format_sources_for_display(sources, language=language)
final = answer.strip()
if evidence:
final = f"{final}\n\n{evidence}"
return final
messages = build_general_messages(message, history_messages)
answer = llm_chat(messages, temperature=0.3, max_tokens=1000)
return clean_general_answer(answer)
except Exception as e:
return f"Error: {str(e)}"
CUSTOM_CSS = """
.gradio-container{
max-width: 1100px !important;
margin: 0 auto !important;
}
#title-wrap{
text-align:center;
margin-bottom: 8px;
}
#title-wrap h1{
margin-bottom: 6px;
}
.mode-note{
font-size: 13px;
color: #5f7296;
}
"""
with gr.Blocks(css=CUSTOM_CSS, title="Rawi Agent β€” Hadith AI Agent") as demo:
gr.HTML("""
<div id="title-wrap">
<h1>Rawi Agent</h1>
<div class="mode-note">General chat by default. Enable hadith evidence when you want retrieved hadith support.</div>
</div>
""")
chatbot = gr.Chatbot(
label="Rawi",
height=600,
bubble_full_width=False,
)
with gr.Row():
with gr.Column(scale=8):
msg = gr.Textbox(
placeholder="Ask about anything...",
lines=3,
max_lines=8,
show_label=False,
)
with gr.Column(scale=2, min_width=180):
use_rag = gr.Checkbox(
label="Include Hadith Evidence",
value=False,
)
send = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
state = gr.State([])
def submit_message(user_message, chat_history, rag_enabled):
response = chat(user_message, chat_history, rag_enabled)
chat_history = chat_history + [(user_message, response)]
return "", chat_history, chat_history
send.click(
submit_message,
inputs=[msg, state, use_rag],
outputs=[msg, chatbot, state],
)
msg.submit(
submit_message,
inputs=[msg, state, use_rag],
outputs=[msg, chatbot, state],
)
clear.click(
lambda: ([], []),
outputs=[chatbot, state],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)