Spaces:

sitayeb
/

Scientific_Paper_Discovery_Bot

Running

App Files Files Community

Scientific_Paper_Discovery_Bot / app.py

sitayeb

Update app.py

b54c8f9 verified 21 days ago

raw

history blame contribute delete

59.9 kB

	# ================================================================
	# Scientific Paper Discovery Bot v7.4 — SyntaxError FIXED
	# ================================================================
	import os, re, time, json, pickle, threading
	import requests
	import xml.etree.ElementTree as ET
	from datetime import datetime, timedelta
	from collections import Counter

	import numpy as np
	import faiss
	import pandas as pd
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from groq import Groq
	from gtts import gTTS
	from langdetect import detect, DetectorFactory
	from reportlab.lib.pagesizes import A4
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import cm
	from reportlab.lib import colors
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, HRFlowable

	DetectorFactory.seed = 0

	GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
	S2_API_KEY = os.environ.get("S2_API_KEY", "")
	groq_client = Groq(api_key=GROQ_API_KEY)

	print("Loading embedder...")
	embedder = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
	_ = embedder.encode(["warmup"])
	print("Embedder ready!")

	PAPERS = []
	ACTIVE_PAPERS = []
	FAISS_INDEX = None
	AUTO_RUNNING = False
	AUTO_LOG = []
	CURRENT_YEAR = datetime.now().year

	PERSIST_DIR = "/tmp"
	FAVORITES_PATH = PERSIST_DIR + "/favorites.pkl"
	SEEN_IDS_PATH = PERSIST_DIR + "/seen_ids.json"
	os.makedirs(PERSIST_DIR, exist_ok=True)

	CATEGORIES = {
	"🌐 All": "",
	"📊 Economics": "econ",
	"💰 Quant Finance": "q-fin",
	"🤖 AI": "cs.AI",
	"🧠 Machine Learning":"cs.LG",
	"💬 NLP": "cs.CL",
	"📈 Statistics": "stat",
	"🔬 Biology": "q-bio",
	"⚛️ Physics": "physics",
	"📐 Mathematics": "math",
	"💻 Computer Science":"cs",
	}
	CROSSREF_SUBJECTS = {
	"🌐 All": "",
	"📊 Economics": "economics",
	"💰 Quant Finance": "finance",
	"🤖 AI": "artificial intelligence",
	"🧠 Machine Learning":"machine learning",
	"💬 NLP": "natural language processing",
	"📈 Statistics": "statistics",
	"🔬 Biology": "biology",
	"⚛️ Physics": "physics",
	"📐 Mathematics": "mathematics",
	"💻 Computer Science":"computer science",
	}
	LANG_CHOICES = ["Arabic", "English"]
	SORT_CHOICES = ["Newest", "Oldest", "Most Cited", "Least Cited"]
	AR_RULES = """
	- ابدأ كل قسم بـ ## مع سطر فارغ قبله وبعده
	- اكتب كل قسم في فقرة 3-4 جمل بالعربية الفصحى
	- لا تكرر عنوان القسم داخل النص
	"""

	# ================================================================
	# HELPERS
	# ================================================================
	def detect_lang(text):
	try:
	return "ar" if detect(str(text)[:300]).startswith("ar") else "en"
	except:
	return "en"

	def clean_md(text):
	text = re.sub(r"[#*`>\[\]!_~]", "", text)
	return re.sub(r"\n+", " ", text).strip()[:2500]

	def fix_ar_format(text):
	text = re.sub(r"\n(##)", r"\n\n\1", text)
	text = re.sub(r"(## [^\n]+)\n([^\n#])", r"\1\n\n\2", text)
	return re.sub(r"\n{3,}", "\n\n", text).strip()

	def cit_badge(n):
	if n is None or n == "": return "—"
	n = int(n)
	if n >= 1000: return "🥇 " + "{:,}".format(n)
	if n >= 100: return "🏆 " + "{:,}".format(n)
	if n >= 10: return "⭐ " + "{:,}".format(n)
	if n > 0: return "📄 " + str(n)
	return "·"

	def build_table(papers_list):
	rows = "\| # \| Title \| Author \| Date \| Citations \| Source \|\n"
	rows += "\|---\|---\|---\|---\|---\|---\|\n"
	choices = []
	for i, p in enumerate(papers_list):
	first = p["authors"][0] if p["authors"] else "N/A"
	badge = "NEW" if p.get("recent") else "📄"
	rows += "\| {} \| {} {} \| {} \| {} \| {} \| {} \|\n".format(
	i+1, badge, p["title"], first,
	p["published"], cit_badge(p.get("citations")),
	p.get("source","arXiv"))
	choices.append("{}. {}".format(i+1, p["title"]))
	return rows, choices

	def s2_headers():
	h = {"User-Agent": "ScientificPaperBot/7.4"}
	if S2_API_KEY:
	h["x-api-key"] = S2_API_KEY
	return h

	def cr_headers():
	return {"User-Agent": "ScientificPaperBot/7.4 (mailto:researcher@example.com)"}

	# ================================================================
	# CrossRef date parser — rejects garbage years
	# ================================================================
	def parse_crossref_date(item):
	for field in ["issued", "published", "published-print", "published-online", "created"]:
	dp = (item.get(field) or {}).get("date-parts", [[]])
	if not dp or not dp[0]: continue
	pts = dp[0]
	try:
	year = int(pts[0])
	if not (1900 <= year <= CURRENT_YEAR + 1): continue
	month = max(1, min(12, int(pts[1]) if len(pts) >= 2 else 1))
	day = max(1, min(31, int(pts[2]) if len(pts) >= 3 else 1))
	return "{:04d}-{:02d}-{:02d}".format(year, month, day)
	except (ValueError, TypeError, IndexError):
	continue
	return "N/A"

	# ================================================================
	# SEEN / FAVORITES
	# ================================================================
	def load_seen_ids():
	try:
	with open(SEEN_IDS_PATH) as f: return set(json.load(f))
	except: return set()

	def save_seen_ids(ids):
	with open(SEEN_IDS_PATH, "w") as f: json.dump(list(ids), f)

	def load_favorites():
	try:
	with open(FAVORITES_PATH, "rb") as f: return pickle.load(f)
	except: return []

	def save_favorite(paper):
	favs = load_favorites()
	if paper["id"] not in {p["id"] for p in favs}:
	favs.append(paper)
	with open(FAVORITES_PATH, "wb") as f: pickle.dump(favs, f)
	return "Saved: " + paper["title"]
	return "Already saved."

	def export_favorites_csv():
	favs = load_favorites()
	if not favs: return None
	df = pd.DataFrame([{
	"Title": p["title"],
	"Authors": ", ".join(p["authors"][:3]),
	"Date": p["published"],
	"Citations": p.get("citations","N/A"),
	"URL": p["url"],
	"Source": p.get("source","arXiv")
	} for p in favs])
	path = PERSIST_DIR + "/favorites.csv"
	df.to_csv(path, index=False, encoding="utf-8-sig")
	return path

	def gr_export_fav(): return export_favorites_csv()

	# ================================================================
	# PDF EXPORT
	# ================================================================
	def export_explanation_pdf(explanation_text, paper_title="paper"):
	if not explanation_text or len(explanation_text) < 30: return None
	safe = re.sub(r"[^\w\s-]", "", paper_title)[:50].strip().replace(" ", "_")
	path = PERSIST_DIR + "/explanation_" + safe + ".pdf"
	doc = SimpleDocTemplate(path, pagesize=A4,
	rightMargin=2cm, leftMargin=2cm,
	topMargin=2cm, bottomMargin=2cm)
	styles = getSampleStyleSheet()
	h2_style = ParagraphStyle("H2", parent=styles["Heading2"],
	fontSize=11, textColor=colors.HexColor("#2563eb"),
	spaceBefore=14, spaceAfter=6)
	bd_style = ParagraphStyle("BD", parent=styles["Normal"],
	fontSize=10, leading=16, spaceAfter=8)
	mt_style = ParagraphStyle("MT", parent=styles["Normal"],
	fontSize=9, textColor=colors.HexColor("#64748b"))
	story = []
	for line in explanation_text.split("\n"):
	line = line.strip()
	if not line: story.append(Spacer(1, 6)); continue
	clean = re.sub(r"\\(.+?)\\", r"\1", line)
	clean = re.sub(r"\(.+?)\", r"\1", clean)
	clean = re.sub(r"`(.+?)`", r"\1", clean)
	clean = re.sub(r"^#{1,6}\s*", "", clean)
	clean = re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]", "", clean).strip()
	if not clean: continue
	if line.startswith("## ") or line.startswith("# "):
	story.append(HRFlowable(width="100%", thickness=0.5,
	color=colors.HexColor("#e2e8f0"), spaceAfter=4))
	story.append(Paragraph(clean, h2_style))
	elif line.startswith(">"):
	q_st = ParagraphStyle("Q", parent=styles["Normal"],
	fontSize=9, leftIndent=20,
	textColor=colors.HexColor("#475569"), leading=14)
	story.append(Paragraph(
	re.sub(r"[🎯❓🔧📊🌟🔗📄👥📅📡🤖#*_~]","",line.lstrip(">").strip()),
	q_st))
	else:
	story.append(Paragraph(clean, bd_style))
	story += [
	Spacer(1, 20),
	HRFlowable(width="100%", thickness=0.5, color=colors.HexColor("#e2e8f0")),
	Paragraph("Generated by Paper Discovery v7.4 — " +
	datetime.now().strftime("%Y-%m-%d %H:%M"), mt_style)
	]
	try:
	doc.build(story); return path
	except Exception as e:
	print("PDF error: " + str(e)); return None

	def gr_export_pdf(explanation_text, choice):
	if not explanation_text or len(explanation_text) < 50:
	return None, "Explain a paper first."
	title = choice.split(". ", 1)[-1] if choice else "paper"
	path = export_explanation_pdf(explanation_text, title)
	return (path, "PDF ready!") if path else (None, "PDF failed.")

	# ================================================================
	# SOURCE 1 — arXiv
	# KEY FIX: sort_by parameter
	# Browse → "submittedDate" latest papers
	# Global → "relevance" exact title match
	# ================================================================
	def fetch_arxiv_papers(query, category, max_results=20, days_back=365,
	sort_by="submittedDate"):
	parts = []
	words = query.strip().split()
	if len(words) >= 3 and sort_by == "relevance":
	parts.append('ti:"' + query.strip() + '"')
	elif query.strip():
	parts.append("all:" + query.strip())
	if category.strip():
	parts.append("cat:" + category.strip())
	sq = " AND ".join(parts) if parts else "all:machine learning"
	params = {
	"search_query": sq,
	"start": 0,
	"max_results": max_results,
	"sortBy": sort_by,
	"sortOrder": "descending",
	}
	try:
	resp = requests.get("http://export.arxiv.org/api/query", params=params, timeout=30)
	resp.raise_for_status()
	except Exception as e:
	print("arXiv error: " + str(e)); return []

	ns_a = "http://www.w3.org/2005/Atom"
	ns_x = "http://arxiv.org/schemas/atom"
	root = ET.fromstring(resp.content)
	cutoff = datetime.now() - timedelta(days=days_back)
	papers = []
	for entry in root.findall("{" + ns_a + "}entry"):
	try:
	pid = entry.find("{" + ns_a + "}id").text.split("/abs/")[-1].strip()
	title = entry.find("{" + ns_a + "}title").text.strip().replace("\n"," ")
	abstract = entry.find("{" + ns_a + "}summary").text.strip().replace("\n"," ")
	published = entry.find("{" + ns_a + "}published").text[:10]
	authors = [a.find("{" + ns_a + "}name").text
	for a in entry.findall("{" + ns_a + "}author")]
	cats = set()
	pc = entry.find("{" + ns_x + "}primary_category")
	if pc is not None: cats.add(pc.get("term",""))
	for c in entry.findall("{" + ns_x + "}category"): cats.add(c.get("term",""))
	cats.discard("")
	papers.append({
	"id": pid,
	"title": title,
	"authors": authors[:6],
	"abstract": abstract[:1200],
	"published": published,
	"categories": list(cats)[:4],
	"citations": None,
	"url": "https://arxiv.org/abs/" + pid,
	"pdf_url": "https://arxiv.org/pdf/" + pid,
	"recent": datetime.strptime(published, "%Y-%m-%d") >= cutoff,
	"source": "arXiv",
	})
	except Exception as e:
	print("arXiv parse: " + str(e))
	return papers

	# ================================================================
	# SOURCE 2 — CrossRef
	# ================================================================
	def fetch_crossref_papers(query, category_label="", max_results=20,
	days_back=365, use_title=False):
	subject = CROSSREF_SUBJECTS.get(category_label, "")
	full_query = (query + " " + subject).strip() if subject else query
	key = "query.title" if use_title else "query"
	params = {
	key: full_query,
	"rows": min(max_results * 3, 200),
	"sort": "relevance",
	"select": ("title,author,abstract,published,published-print,"
	"published-online,issued,created,DOI,"
	"is-referenced-by-count,link,subject"),
	}
	items = []
	for attempt in range(3):
	try:
	r = requests.get("https://api.crossref.org/works",
	params=params, headers=cr_headers(), timeout=30)
	if r.status_code == 200:
	items = r.json().get("message",{}).get("items",[]); break
	if r.status_code == 429: time.sleep(2**attempt); continue
	print("CrossRef " + str(r.status_code)); return []
	except Exception as e:
	print("CrossRef attempt " + str(attempt) + ": " + str(e)); time.sleep(1)

	cutoff = datetime.now() - timedelta(days=days_back)
	papers, seen_ids = [], set()
	for item in items:
	if len(papers) >= max_results: break
	title_list = item.get("title", [])
	if not title_list: continue
	title = title_list[0].strip()
	if not title or title.lower().startswith("title pending"): continue
	pub = parse_crossref_date(item)
	if pub == "N/A": continue
	cit = int(item.get("is-referenced-by-count", 0) or 0)
	authors = [
	(a.get("given","") + " " + a.get("family","")).strip()
	for a in item.get("author",[])[:6]
	]
	authors = [a for a in authors if a.strip()] or ["Unknown"]
	abstract = re.sub(r"<[^>]+>","",
	item.get("abstract","No abstract.")).strip()[:1200]
	doi = item.get("DOI","")
	url = "https://doi.org/" + doi if doi else "#"
	pid = doi or re.sub(r"\W","",title)[:40]
	if pid in seen_ids: continue
	seen_ids.add(pid)
	pdf_url = next((l.get("URL","") for l in item.get("link",[])
	if "pdf" in l.get("content-type","").lower()), "")
	try: recent = datetime.strptime(pub[:10], "%Y-%m-%d") >= cutoff
	except: recent = False
	papers.append({
	"id": pid,
	"title": title,
	"authors": authors,
	"abstract": abstract,
	"published": pub[:10],
	"categories": item.get("subject",[])[:3],
	"citations": cit,
	"url": url,
	"pdf_url": pdf_url,
	"recent": recent,
	"source": "CrossRef",
	})
	papers.sort(key=lambda x: x["citations"], reverse=True)
	return papers

	# ================================================================
	# GLOBAL PAPER SEARCH — relevance sorted
	# ================================================================
	def global_paper_search(query, source_choice, max_results=10):
	if not query or not query.strip():
	return "Enter a title or keywords."
	q = query.strip(); papers = []
	if source_choice in ("arXiv", "Both"):
	papers += fetch_arxiv_papers(q, "", int(max_results), 3650,
	sort_by="relevance")
	if source_choice in ("CrossRef", "Both"):
	papers += fetch_crossref_papers(q, "", int(max_results), 3650,
	use_title=True)
	if not papers:
	return "No results for: " + q

	seen, unique = set(), []
	for p in papers:
	key = re.sub(r"\W","",p["title"].lower())[:60]
	if key not in seen: seen.add(key); unique.append(p)
	unique.sort(key=lambda x: x.get("citations") or 0, reverse=True)

	NL = "\n"
	md = "## Search Results: " + q + NL + NL
	md += "" + str(len(unique)) + " papers found" + NL + NL + "---" + NL + NL
	for i, p in enumerate(unique, 1):
	cit = (" \| " + cit_badge(p.get("citations"))) if p.get("citations") else ""
	cats = " \| ".join(p.get("categories",[])[:2])
	auth = ", ".join(p["authors"][:3])
	abst = p["abstract"][:450]
	link = "[View](" + p["url"] + ")"
	pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
	src = p.get("source","")
	md += ("### " + str(i) + ". " + p["title"] + NL + NL +
	auth + " \| " + p["published"] + cit + " \| " + src +
	(" \| " + cats if cats else "") + NL + NL +
	"> " + abst + "..." + NL + NL +
	link + pdf + NL + NL + "---" + NL + NL)
	return md

	# ================================================================
	# CITATION ENGINE — 3-layer
	# ================================================================
	def enrich_citations(papers):
	arxiv_papers = [p for p in papers
	if p.get("source")=="arXiv" and
	(p.get("citations") is None or p.get("citations")==0)]
	if not arxiv_papers:
	for p in papers:
	if p.get("citations") is None: p["citations"] = 0
	return papers
	id_map, batch_ids = {}, []
	for p in arxiv_papers:
	clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
	id_map[clean] = p
	batch_ids.append("arXiv:" + clean)
	for i in range(0, len(batch_ids), 500):
	try:
	r = requests.post(
	"https://api.semanticscholar.org/graph/v1/paper/batch",
	json={"ids": batch_ids[i:i+500]},
	params={"fields":"citationCount,externalIds"},
	headers=s2_headers(), timeout=30)
	if r.status_code == 200:
	for item in r.json():
	if not item: continue
	ext = item.get("externalIds") or {}
	clean = re.sub(r"v\d+$","",
	ext.get("ArXiv","").split("/")[-1].strip())
	if clean and clean in id_map:
	c = item.get("citationCount")
	if c is not None: id_map[clean]["citations"] = int(c)
	elif r.status_code == 429: time.sleep(4)
	except Exception as e: print("S2 batch: " + str(e))
	for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0][:15]:
	clean = re.sub(r"v\d+$","", p["id"].split("/")[-1].strip())
	for attempt in range(2):
	try:
	r = requests.get(
	"https://api.semanticscholar.org/graph/v1/paper/arXiv:" + clean,
	params={"fields":"citationCount"},
	headers=s2_headers(), timeout=10)
	if r.status_code == 200:
	c = r.json().get("citationCount")
	p["citations"] = int(c) if c else 0; break
	if r.status_code == 429: time.sleep(2**attempt); continue
	p["citations"] = 0; break
	except: p["citations"] = 0; break
	time.sleep(0.12)
	for p in [x for x in arxiv_papers if (x.get("citations") or 0)==0]:
	try:
	r = requests.get("https://api.crossref.org/works",
	params={"query.title": p["title"], "rows": 1,
	"select": "is-referenced-by-count,title"},
	headers=cr_headers(), timeout=8)
	if r.status_code == 200:
	items = r.json().get("message",{}).get("items",[])
	if items:
	found = (items[0].get("title") or [""])[0].lower()
	qw = set(p["title"].lower().split()[:5])
	fw = set(found.split()[:10])
	p["citations"] = (
	int(items[0].get("is-referenced-by-count",0) or 0)
	if len(qw & fw) >= 2 else 0)
	else: p["citations"] = 0
	else: p["citations"] = 0
	time.sleep(0.12)
	except: p["citations"] = 0
	for p in papers:
	if p.get("citations") is None: p["citations"] = 0
	return papers

	# ================================================================
	# FAISS
	# ================================================================
	def build_papers_index(papers):
	global FAISS_INDEX, PAPERS
	PAPERS = papers
	if not papers: FAISS_INDEX = None; return
	texts = [p["title"] + " " + p["abstract"] for p in papers]
	embs = embedder.encode(texts, convert_to_numpy=True,
	normalize_embeddings=True).astype("float32")
	idx = faiss.IndexFlatIP(embs.shape[1])
	idx.add(embs)
	FAISS_INDEX = idx

	def search_papers(query, top_k=5):
	if FAISS_INDEX is None or not PAPERS: return []
	qe = embedder.encode([query], convert_to_numpy=True,
	normalize_embeddings=True).astype("float32")
	scores, ids = FAISS_INDEX.search(qe, min(top_k, len(PAPERS)))
	return [{"paper": PAPERS[i], "score": float(s)}
	for s, i in zip(scores[0], ids[0]) if i >= 0 and float(s) > 0.1]

	# ================================================================
	# AUTO-FETCH
	# ================================================================
	def auto_fetch_worker(query, category, interval):
	global AUTO_RUNNING
	while AUTO_RUNNING:
	time.sleep(interval)
	if not AUTO_RUNNING: break
	papers = fetch_arxiv_papers(query, category, 30, 1)
	seen = load_seen_ids()
	new_ps = [p for p in papers if p["id"] not in seen]
	if new_ps:
	save_seen_ids(seen \| {p["id"] for p in papers})
	AUTO_LOG.append(
	"[" + datetime.now().strftime("%H:%M") + "] NEW " +
	str(len(new_ps)) + " — " + query)
	if len(AUTO_LOG) > 20: AUTO_LOG.pop(0)

	def start_auto_fetch(query, cat_label, interval_min):
	global AUTO_RUNNING
	if AUTO_RUNNING: return "Already running."
	AUTO_RUNNING = True
	threading.Thread(
	target=auto_fetch_worker,
	args=(query, CATEGORIES.get(cat_label,""), int(interval_min)*60),
	daemon=True).start()
	return "Auto-fetch started every " + str(interval_min) + " min for: " + query

	def stop_auto_fetch():
	global AUTO_RUNNING; AUTO_RUNNING = False; return "Stopped."

	def get_auto_log():
	return "\n\n".join(reversed(AUTO_LOG[-10:])) if AUTO_LOG else "No log."

	# ================================================================
	# TRENDS
	# ================================================================
	def analyze_trends(papers):
	if not papers: return None, "No papers."
	date_counts = Counter(p["published"][:7] for p in papers if p["published"]!="N/A")
	stopwords = {"the","a","an","of","in","for","on","with","and","or","to","using",
	"based","via","from","by","is","are","our","we","this","that","which",
	"towards","approach","method","new","into","over","learning","deep",
	"model","models","data","neural","large","language","paper","study",
	"analysis","results","show","also","can","used","two","its","their"}
	all_words = [w.lower() for p in papers
	for w in re.findall(r"[a-zA-Z]{4,}", p["title"])
	if w.lower() not in stopwords]
	top_words = Counter(all_words).most_common(15)
	sources = Counter(p.get("source","arXiv") for p in papers)
	cit_papers = [p for p in papers if (p.get("citations") or 0)>0]
	top_cited = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:10]
	all_auth = [a for p in papers for a in p["authors"][:3]]
	top_authors = Counter(all_auth).most_common(10)
	cvals = [p["citations"] for p in cit_papers]
	buckets = [0,1,5,10,50,100,500,10000]
	blabels = ["0","1-4","5-9","10-49","50-99","100-499","500+"]
	bcounts = ([sum(1 for c in cvals if buckets[i]<=c<buckets[i+1])
	for i in range(len(buckets)-1)] if cvals else [0]*7)
	avg_cit = round(sum(cvals)/max(len(cvals),1),1) if cvals else 0
	total_cit = sum(p.get("citations") or 0 for p in papers)
	C = ["#3b82f6","#8b5cf6","#10b981","#f59e0b","#ef4444","#06b6d4",
	"#ec4899","#14b8a6","#f97316","#a855f7","#22d3ee","#84cc16",
	"#fbbf24","#34d399","#f87171"]
	BG,PNL,BR,W = "#0f172a","#1e293b","#334155","white"
	fig, axes = plt.subplots(2, 3, figsize=(20,12))
	fig.patch.set_facecolor(BG)
	fig.suptitle("Research Trends", color=W, fontsize=16, fontweight="bold", y=1.01)
	def style(ax):
	ax.set_facecolor(PNL)
	for sp in ax.spines.values(): sp.set_edgecolor(BR)
	ax.tick_params(colors=W, labelsize=8)
	ax = axes[0,0]; style(ax)
	if date_counts:
	ms,cs = zip(*sorted(date_counts.items()))
	ms,cs = list(ms), list(cs)
	bars = ax.bar(ms, cs, color=C[0], edgecolor="#60a5fa", lw=0.8)
	for b,c in zip(bars,cs):
	ax.text(b.get_x()+b.get_width()/2, b.get_height()+.05, str(c),
	ha="center", va="bottom", color=W, fontsize=8)
	if len(cs) > 2:
	z = np.polyfit(range(len(cs)), cs, 1)
	ax.plot(ms, np.poly1d(z)(range(len(cs))), "--",
	color="#f59e0b", lw=1.5, alpha=.8, label="Trend")
	ax.legend(fontsize=8, facecolor=PNL, labelcolor=W)
	ax.set_title("Papers per Month", color=W, fontsize=12, fontweight="bold", pad=10)
	ax.set_ylabel("Count", color=W, fontsize=9)
	ax.tick_params(rotation=45)
	ax = axes[0,1]; style(ax)
	if top_words:
	wds,wcts = zip(*top_words)
	ax.barh(list(wds), list(wcts), color=C[:len(wds)], edgecolor="#475569", lw=.6)
	for b,c in zip(ax.patches, wcts):
	ax.text(b.get_width()+.1, b.get_y()+b.get_height()/2, str(c),
	va="center", color=W, fontsize=8)
	ax.set_title("Top Keywords", color=W, fontsize=12, fontweight="bold", pad=10)
	ax.set_xlabel("Frequency", color=W, fontsize=9)
	ax = axes[0,2]; ax.set_facecolor(PNL)
	if sources:
	sl,sv = zip(*sources.items())
	_,txts,ats = ax.pie(sv, labels=sl, autopct="%1.0f%%",
	colors=C[:len(sl)], startangle=90,
	textprops={"color":W,"fontsize":10},
	wedgeprops={"edgecolor":BR,"linewidth":1.5})
	for at in ats: at.set_color(W); at.set_fontsize(9)
	ax.set_title("Source Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
	ax = axes[1,0]; style(ax)
	if top_cited:
	lbls = [(p["title"][:35]+"..." if len(p["title"])>35 else p["title"])
	for p in top_cited]
	cv = [p["citations"] for p in top_cited]
	ax.barh(lbls[::-1], cv[::-1], color=C[1], edgecolor="#475569", lw=.6)
	mx = max(cv) if cv else 1
	for b,c in zip(ax.patches, cv[::-1]):
	ax.text(b.get_width()+mx*.01, b.get_y()+b.get_height()/2,
	"{:,}".format(c), va="center", color=W, fontsize=8)
	ax.set_xlabel("Citations", color=W, fontsize=9)
	else:
	ax.text(.5,.5,"No citation data", ha="center", va="center",
	color="#94a3b8", fontsize=11, transform=ax.transAxes)
	ax.set_title("Top 10 Cited", color=W, fontsize=12, fontweight="bold", pad=10)
	ax = axes[1,1]; style(ax)
	if any(bcounts):
	ax.bar(blabels, bcounts, color=C[2], edgecolor="#475569", lw=.8)
	for b,c in zip(ax.patches, bcounts):
	if c > 0:
	ax.text(b.get_x()+b.get_width()/2, b.get_height()+.1, str(c),
	ha="center", va="bottom", color=W, fontsize=9)
	ax.set_xlabel("Citation Range", color=W, fontsize=9)
	ax.set_ylabel("Papers", color=W, fontsize=9)
	ax.annotate("Avg " + str(avg_cit) + " \| Total " + "{:,}".format(total_cit),
	xy=(.98,.96), xycoords="axes fraction",
	ha="right", va="top", color="#94a3b8", fontsize=8)
	else:
	ax.text(.5,.5,"No citation data", ha="center", va="center",
	color="#94a3b8", fontsize=11, transform=ax.transAxes)
	ax.set_title("Citation Distribution", color=W, fontsize=12, fontweight="bold", pad=10)
	ax = axes[1,2]; style(ax)
	if top_authors:
	an,ac = zip(*top_authors)
	ax.barh(list(an)[::-1], list(ac)[::-1], color=C[3], edgecolor="#475569", lw=.6)
	for b,c in zip(ax.patches, list(ac)[::-1]):
	ax.text(b.get_width()+.05, b.get_y()+b.get_height()/2, str(c),
	va="center", color=W, fontsize=8)
	ax.set_xlabel("Papers", color=W, fontsize=9)
	ax.set_title("Top Authors", color=W, fontsize=12, fontweight="bold", pad=10)
	plt.tight_layout(pad=3)
	path = PERSIST_DIR + "/trends.png"
	plt.savefig(path, bbox_inches="tight", dpi=150, facecolor=BG)
	plt.close()
	top5 = sorted(cit_papers, key=lambda x:x["citations"], reverse=True)[:5]
	stats = ("### Stats\n\n\| Metric \| Value \|\n\|---\|---\|\n" +
	"\| Total \| " + str(len(papers)) + " \|\n" +
	"\| New \| " + str(sum(1 for p in papers if p.get("recent"))) + " \|\n" +
	"\| Citations \| " + "{:,}".format(total_cit) + " \|\n" +
	"\| Average \| " + str(avg_cit) + " \|\n\n")
	if top5:
	stats += "### Top Cited\n\n"
	for i,p in enumerate(top5,1):
	stats += (str(i) + ". [" + p["title"] + "](" + p["url"] + ")" +
	" — " + "{:,}".format(p["citations"]) + "\n\n")
	return path, stats

	# ================================================================
	# LLM
	# ================================================================
	def _llm(messages, max_tokens=1200):
	try:
	r = groq_client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=messages, temperature=0.3, max_tokens=max_tokens)
	return r.choices[0].message.content.strip()
	except Exception as e: return "LLM Error: " + str(e)

	def explain_paper(paper, lang="ar"):
	cit = paper.get("citations","N/A")
	if lang == "ar":
	return fix_ar_format(_llm([
	{"role":"system","content": "أنت خبير أكاديمي يشرح الأبحاث بالعربية الفصحى.\n" + AR_RULES},
	{"role":"user","content":
	"اشرح الورقة:\nالعنوان: " + paper["title"] + "\n" +
	"المؤلفون: " + ", ".join(paper["authors"][:3]) + "\n" +
	"التاريخ: " + paper["published"] + " \| الاقتباسات: " + str(cit) + "\n" +
	"الملخص: " + paper["abstract"] + "\n\n" +
	"## موضوع الورقة\n\n## المشكلة\n\n## المنهجية\n\n" +
	"## النتائج\n\n## الأهمية\n\n## التطبيقات"}]))
	return _llm([{"role":"user","content":
	"Explain:\nTitle: " + paper["title"] + "\nAuthors: " +
	", ".join(paper["authors"][:3]) + "\nDate: " + paper["published"] +
	" \| Citations: " + str(cit) + "\nAbstract: " + paper["abstract"] + "\n\n" +
	"## Topic\n## Problem\n## Methodology\n## Findings\n## Contribution\n## Applications"}])

	def compare_papers(pa, pb, lang="ar"):
	body = ("Paper A: " + pa["title"] + " \| Citations: " + str(pa.get("citations","N/A")) +
	"\n" + pa["abstract"][:500] + "\n\nPaper B: " +
	pb["title"] + " \| Citations: " + str(pb.get("citations","N/A")) +
	"\n" + pb["abstract"][:500])
	if lang == "ar":
	return fix_ar_format(_llm([{"role":"user","content":
	"قارن بين الورقتين.\n" + AR_RULES + "\n\n" + body + "\n\n" +
	"## الهدف\n\n## المنهجية\n\n## النتائج\n\n" +
	"## القوة\n\n## القيود\n\n## الخلاصة"}], 1400))
	return _llm([{"role":"user","content":
	"Compare:\n" + body + "\n\n" +
	"## Topic\n## Methodology\n## Results\n## Strengths\n## Limits\n## Verdict"}], 1400)

	def summarize_papers(papers, topic, lang="ar"):
	text = "".join(
	str(i) + ". " + p["title"] + " (" + p["published"] + "): " +
	p["abstract"][:300] + "...\n\n"
	for i,p in enumerate(papers[:8],1))
	if lang == "ar":
	return fix_ar_format(_llm([{"role":"user","content":
	"نظرة عامة أكاديمية حول \"" + topic + "\".\n" + AR_RULES +
	"\n\n" + text + "\n\n" +
	"## الاتجاهات\n\n## أبرز الأوراق\n\n" +
	"## المواضيع المشتركة\n\n## الفجوات"}], 900))
	return _llm([{"role":"user","content":
	"Academic overview of \"" + topic + "\":\n" + text + "\n\n" +
	"## Trends\n## Key Papers\n## Themes\n## Gaps"}], 900)

	def generate_bibliography(papers, style="APA"):
	entries = []
	for i,p in enumerate(papers,1):
	auth = ", ".join(p["authors"][:6]) + (" et al." if len(p["authors"])>6 else "")
	year = p["published"][:4] if p["published"] not in ("N/A","") else "n.d."
	t,u = p["title"], p["url"]
	if style == "APA":
	entries.append(str(i) + ". " + auth + " (" + year + "). " + t + ". " + u)
	elif style == "IEEE":
	ae = " and ".join(p["authors"][:3]) + (" et al." if len(p["authors"])>3 else "")
	entries.append("[" + str(i) + "] " + ae + ', "' + t + '," ' + year + ". [Online]: " + u)
	elif style == "Chicago":
	entries.append(str(i) + ". " + auth + '. "' + t + '." (' + year + "). " + u)
	else:
	key = re.sub(r"\W","", (p["authors"][0].split()[-1]
	if p["authors"] else "Auth")) + year
	entries.append("@article{" + key + str(i) + ",\n title={" + t +
	"},\n author={" + auth + "},\n year={" + year +
	"},\n url={" + u + "}\n}")
	bib = "\n\n".join(entries)
	path = PERSIST_DIR + "/bibliography_" + style + ".txt"
	with open(path, "w", encoding="utf-8") as f: f.write(bib)
	return bib, path

	def chat_about_papers(question, history):
	if not PAPERS:
	return ("يرجى جلب الأوراق أولاً." if detect_lang(question)=="ar"
	else "Fetch papers first.")
	lang = detect_lang(question)
	relevant = search_papers(question, top_k=4)
	context = ""
	if relevant:
	context = ("الأوراق ذات الصلة:\n\n" if lang=="ar" else "Relevant papers:\n\n")
	for r in relevant:
	p = r["paper"]
	cit = (" \| " + str(p["citations"]) + " citations") if p.get("citations") else ""
	context += ("" + p["title"] + " (" + p["published"] + ")" + cit +
	"\n" + p["abstract"][:400] + "\n🔗 " + p["url"] + "\n\n")
	sys_msg = (("أنت مساعد بحثي. أجب بالعربية الفصحى.\n" + AR_RULES) if lang=="ar"
	else "You are an academic assistant. Answer in English.")
	msgs = [{"role":"system","content":sys_msg}]
	for t in history[-4:]: msgs.append({"role":t["role"],"content":t["content"]})
	msgs.append({"role":"user","content":
	(context + "\nسؤال: " + question) if context else question})
	out = _llm(msgs, 800)
	return fix_ar_format(out) if lang=="ar" else out

	def text_to_audio(text, lang="ar"):
	clean = clean_md(text)
	if not clean: return None
	try:
	tts = gTTS(text=clean, lang=lang, slow=False)
	path = PERSIST_DIR + "/audio_" + lang + ".mp3"
	tts.save(path); return path
	except Exception as e: print("TTS: " + str(e)); return None

	# ================================================================
	# GRADIO HANDLERS
	# ================================================================
	def gr_fetch(query, category_label, max_results, days_back, source_choice,
	progress=gr.Progress()):
	global ACTIVE_PAPERS
	progress(0.05, desc="Connecting...")
	papers, warn = [], ""
	if source_choice in ("arXiv", "Both"):
	progress(0.15, desc="Fetching arXiv...")
	papers += fetch_arxiv_papers(query, CATEGORIES.get(category_label,""),
	int(max_results), int(days_back),
	sort_by="submittedDate")
	if source_choice in ("CrossRef", "Both"):
	progress(0.35, desc="Fetching CrossRef...")
	cr = fetch_crossref_papers(query, category_label, int(max_results), int(days_back))
	if not cr: warn = "\n\n> CrossRef: no results."
	papers += cr
	seen, unique = set(), []
	for p in papers:
	key = re.sub(r"\W","",p["title"].lower())[:60]
	if key not in seen: seen.add(key); unique.append(p)
	papers = unique
	if not papers:
	return ("No results." + warn,
	gr.update(choices=[], value=None), gr.update(choices=[], value=None),
	gr.update(choices=[], value=None), gr.update(choices=[], value=None),
	"0 papers")
	progress(0.60, desc="Fetching citations...")
	papers = enrich_citations(papers)
	progress(0.85, desc="FAISS indexing...")
	build_papers_index(papers)
	ACTIVE_PAPERS = list(papers)
	tbl, choices = build_table(papers)
	recent = sum(1 for p in papers if p.get("recent"))
	tot_cit = sum(p.get("citations") or 0 for p in papers)
	zero_cit = sum(1 for p in papers if (p.get("citations") or 0)==0)
	note = ("\n\n> " + str(zero_cit) + " papers with 0 citations (new/unindexed)."
	if zero_cit else "")
	md = ("## Fetched " + str(len(papers)) + " papers\n\n" +
	"New: " + str(recent) + " \| Citations: **" +
	"{:,}".format(tot_cit) + "**" + warn + note +
	"\n\n---\n\n" + tbl)
	upd = gr.update(choices=choices, value=choices[0] if choices else None)
	progress(1.0)
	return md, upd, upd, upd, upd, str(len(papers)) + " papers \| " + "{:,}".format(tot_cit) + " cit."

	def gr_filter_papers(year_from, year_to, cit_min, cit_max, sort_by):
	global ACTIVE_PAPERS
	if not PAPERS: return "Fetch papers first.", gr.update(), "0"
	filtered = []
	for p in PAPERS:
	try:
	y = int(p["published"][:4])
	if y < int(year_from) or y > int(year_to): continue
	except: pass
	cit = int(p.get("citations") or 0)
	if cit < int(cit_min) or cit > int(cit_max): continue
	filtered.append(p)
	if sort_by == "Newest": filtered.sort(key=lambda x: x["published"], reverse=True)
	elif sort_by == "Oldest": filtered.sort(key=lambda x: x["published"])
	elif sort_by == "Most Cited": filtered.sort(key=lambda x: x.get("citations") or 0, reverse=True)
	elif sort_by == "Least Cited":filtered.sort(key=lambda x: x.get("citations") or 0)
	if not filtered:
	ACTIVE_PAPERS = []
	return "No matching papers.", gr.update(choices=[], value=None), "0"
	ACTIVE_PAPERS = list(filtered)
	tbl, choices = build_table(filtered)
	tot = sum(p.get("citations") or 0 for p in filtered)
	md = ("## " + str(len(filtered)) + "/" + str(len(PAPERS)) + " papers" +
	" \| " + str(year_from) + "-" + str(year_to) +
	" \| cit " + str(cit_min) + "-" + str(cit_max) +
	" \| total " + "{:,}".format(tot) + "\n\n---\n\n" + tbl)
	return md, gr.update(choices=choices, value=choices[0] if choices else None), str(len(filtered)) + "/" + str(len(PAPERS))

	def gr_search_fetched(query):
	if not query or not query.strip(): return "Enter a query."
	if not PAPERS: return "Fetch papers first."
	results = search_papers(query.strip(), top_k=8)
	if not results: return "No results for: " + query
	NL = "\n"
	md = "## Search: " + query + " — " + str(len(results)) + " results" + NL + NL
	for r in results:
	p,s = r["paper"], r["score"]
	bar = "green " * round(s*10)
	cit = (" \| " + cit_badge(p.get("citations"))) if p.get("citations") else ""
	link = "[View](" + p["url"] + ")"
	pdf = (" [PDF](" + p["pdf_url"] + ")") if p.get("pdf_url") else ""
	md += ("### " + "{:.0f}".format(s*100) + "% — " + p["title"] + NL + NL +
	", ".join(p["authors"][:2]) + " \| " + p["published"] + cit +
	" \| " + p.get("source","") + NL + NL +
	"> " + p["abstract"][:350] + "..." + NL + NL +
	link + pdf + NL + NL + "---" + NL + NL)
	return md

	def _get_paper(choice):
	pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
	try: return pool[int(choice.split(".")[0]) - 1]
	except: return None

	def gr_explain(choice, lang_choice):
	if not choice: return "Fetch papers and select one."
	paper = _get_paper(choice)
	if not paper: return "Selection error."
	lang = "ar" if "Arabic" in lang_choice else "en"
	NL = "\n"
	# ✅ FIX: No backslash inside f-string — use concatenation
	pdf_link = (" [PDF](" + paper["pdf_url"] + ")") if paper.get("pdf_url") else ""
	header = ("# " + paper["title"] + NL + NL +
	"Authors: " + ", ".join(paper["authors"]) + NL + NL +
	"Date: " + paper["published"] +
	" \| Citations: " + cit_badge(paper.get("citations")) +
	" \| Source: " + paper.get("source","arXiv") + NL + NL +
	"[View Paper](" + paper["url"] + ")" + pdf_link + NL + NL +
	"---" + NL + NL +
	"> " + paper["abstract"] + NL + NL +
	"---" + NL + NL +
	"## Explanation (Llama 3.3 70B)" + NL + NL)
	return header + explain_paper(paper, lang)

	def gr_audio(txt, lang_choice):
	if not txt or len(txt) < 50: return None
	return text_to_audio(txt, "ar" if "Arabic" in lang_choice else "en")

	def gr_save_fav(choice):
	if not choice: return "Select a paper first."
	paper = _get_paper(choice)
	return save_favorite(paper) if paper else "Error."

	def gr_show_favs():
	favs = load_favorites()
	if not favs: return "No saved papers."
	NL = "\n"
	lines = [("" + p["title"] + "" + NL +
	(p["authors"][0] if p["authors"] else "N/A") +
	" \| " + p["published"] + " \| " + p.get("source","") +
	" \| " + cit_badge(p.get("citations")) +
	" \| [Link](" + p["url"] + ")")
	for p in favs]
	return ("### Favorites — " + str(len(favs)) + " papers" + NL + NL +
	(NL + NL + "---" + NL + NL).join(lines))

	def gr_compare(ca, cb, lc):
	if not ca or not cb: return "Select two papers."
	pa = _get_paper(ca); pb = _get_paper(cb)
	if not pa or not pb: return "Selection error."
	if pa["id"] == pb["id"]: return "Select two different papers."
	return compare_papers(pa, pb, "ar" if "Arabic" in lc else "en")

	def gr_overview(query, lc):
	if not PAPERS: return "Fetch papers first."
	pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
	return ("## Overview\n\n" +
	summarize_papers(pool, query or "research",
	"ar" if "Arabic" in lc else "en"))

	def gr_trends():
	if not PAPERS: return None, "Fetch papers first."
	return analyze_trends(ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS)

	def gr_bib(style, progress=gr.Progress()):
	if not PAPERS: return "Fetch papers first.", None
	progress(0.5, desc="Generating...")
	pool = ACTIVE_PAPERS if ACTIVE_PAPERS else PAPERS
	text, path = generate_bibliography(pool, style)
	progress(1.0)
	short = text[:3000] + ("..." if len(text)>3000 else "")
	return "```\n" + short + "\n```", path

	def gr_chat_fn(message, history):
	if not message.strip(): return history, ""
	hd = []
	for pair in history:
	if pair[0]: hd.append({"role":"user", "content":pair[0]})
	if pair[1]: hd.append({"role":"assistant","content":pair[1]})
	history.append((message, chat_about_papers(message, hd)))
	return history, ""

	# ================================================================
	# UI
	# ================================================================
	CSS = """
	footer{display:none!important}
	h1{text-align:center}
	.status-bar{font-size:.85rem;color:#94a3b8;padding:2px 0}
	.legend{font-size:.8rem;color:#cbd5e1;background:#1e293b;
	border-radius:8px;padding:6px 14px;margin-bottom:6px}
	.filter-box{background:#1e293b;border-radius:10px;
	padding:12px 16px;margin-top:8px}
	.gs-box{background:#1e293b;border-radius:10px;padding:14px 18px;
	margin-bottom:10px;border:1px solid #334155}
	"""

	with gr.Blocks(
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="purple"),
	title="Scientific Paper Discovery v7.4", css=CSS
	) as demo:

	gr.Markdown("# Scientific Paper Discovery v7.4\narXiv · CrossRef · Llama-3.3-70B · FAISS")
	gr.Markdown("Citations: 🥇 >=1000 \| 🏆 >=100 \| ⭐ >=10 \| 📄 <10 \| · = 0",
	elem_classes="legend")
	status_bar = gr.Markdown("No papers loaded yet.", elem_classes="status-bar")

	with gr.Tabs():

	# ── TAB 1: BROWSE ──────────────────────────────────
	with gr.Tab("Browse / Search"):
	with gr.Row():
	with gr.Column(scale=3):
	t_query = gr.Textbox(label="Topic",
	placeholder="ARIMA, inflation, LLM...",
	value="economic forecasting")
	t_category = gr.Dropdown(label="Category",
	choices=list(CATEGORIES.keys()),
	value="📊 Economics")
	t_source = gr.Radio(label="Source",
	choices=["arXiv","CrossRef","Both"],
	value="arXiv")
	with gr.Column(scale=1):
	t_max = gr.Slider(5, 50, value=15, step=5, label="Max papers")
	t_days = gr.Slider(1, 1500, value=365, step=30, label="Last N days")
	btn_fetch = gr.Button("Fetch Papers", variant="primary", size="lg")
	papers_table_md = gr.Markdown("Results appear here.")
	paper_selector = gr.Dropdown(label="Select paper", choices=[], interactive=True)
	with gr.Group(elem_classes="filter-box"):
	gr.Markdown("### Filter & Sort")
	with gr.Row():
	f_year_from = gr.Slider(2000,2026,value=2020,step=1,label="Year from")
	f_year_to = gr.Slider(2000,2026,value=2026,step=1,label="Year to")
	with gr.Row():
	f_cit_min = gr.Slider(0,5000,value=0, step=5,label="Citations min")
	f_cit_max = gr.Slider(0,5000,value=5000,step=5,label="Citations max")
	with gr.Row():
	f_sort = gr.Dropdown(choices=SORT_CHOICES,
	value="Most Cited",label="Sort",scale=3)
	btn_filter = gr.Button("Apply",variant="primary",scale=1)
	gr.Markdown("---\n### Semantic Search (FAISS — in loaded papers)")
	with gr.Row():
	search_in_box = gr.Textbox(label="Search in loaded papers",
	placeholder="ARIMA, transformer...",scale=5)
	btn_search_in = gr.Button("Search",scale=1)
	search_in_out = gr.Markdown()

	# ── TAB 2: GLOBAL SEARCH ───────────────────────────
	with gr.Tab("Global Search"):
	gr.Markdown(
	"### Search any paper by title or keywords\n\n"
	"> Uses arXiv relevance sort + CrossRef title search.\n"
	"> Example: `Attention is All You Need`"
	)
	with gr.Group(elem_classes="gs-box"):
	with gr.Row():
	gs_query = gr.Textbox(
	label="Title or keywords",
	placeholder="Attention is All You Need \| ARIMA forecasting ...",
	scale=4)
	gs_source = gr.Radio(label="Source",
	choices=["arXiv","CrossRef","Both"],
	value="Both", scale=2)
	gs_max = gr.Slider(5,30,value=10,step=5,label="Max results",scale=1)
	btn_gs = gr.Button("Search Now", variant="primary", size="lg")
	gs_out = gr.Markdown("Enter a title or keywords...")

	# ── TAB 3: EXPLAIN ─────────────────────────────────
	with gr.Tab("Explain"):
	with gr.Row():
	paper_sel2 = gr.Dropdown(label="Select paper",
	choices=[], interactive=True, scale=4)
	lang_exp = gr.Radio(LANG_CHOICES, value="Arabic",
	label="Language", scale=1)
	with gr.Row():
	btn_explain = gr.Button("Explain", variant="primary")
	btn_fav = gr.Button("Save Fav")
	btn_audio = gr.Button("Listen")
	btn_export_pdf = gr.Button("Export PDF", variant="secondary")
	with gr.Row():
	fav_status = gr.Markdown()
	pdf_status = gr.Markdown()
	explanation_out = gr.Markdown("Fetch papers and select one.")
	audio_out = gr.Audio(label="Audio", type="filepath")
	pdf_out = gr.File(label="Download PDF")

	# ── TAB 4: COMPARE ─────────────────────────────────
	with gr.Tab("Compare"):
	with gr.Row():
	cmp_a = gr.Dropdown(label="Paper A", choices=[], interactive=True)
	cmp_b = gr.Dropdown(label="Paper B", choices=[], interactive=True)
	lang_cmp = gr.Radio(LANG_CHOICES, value="Arabic",
	label="Language", scale=1)
	btn_compare = gr.Button("Compare", variant="primary")
	compare_out = gr.Markdown("Select two papers.")

	# ── TAB 5: CHAT ────────────────────────────────────
	with gr.Tab("Chat"):
	chatbot_ui = gr.Chatbot(label="Research Assistant",
	height=480, bubble_full_width=False)
	with gr.Row():
	chat_in = gr.Textbox(label="Question", scale=5,
	placeholder="Key findings? \| ما أبرز النتائج؟")
	btn_send = gr.Button("Send", variant="primary", scale=1)
	btn_clear = gr.Button("Clear", size="sm")

	# ── TAB 6: OVERVIEW ────────────────────────────────
	with gr.Tab("Overview"):
	with gr.Row():
	lang_ov = gr.Radio(LANG_CHOICES, value="Arabic",
	label="Language", scale=1)
	btn_overview = gr.Button("Generate Report", variant="primary", scale=3)
	overview_out = gr.Markdown("Fetch papers first.")

	# ── TAB 7: TRENDS ──────────────────────────────────
	with gr.Tab("Trends"):
	btn_trends = gr.Button("Analyze Trends", variant="primary", size="lg")
	trend_chart = gr.Image(label="Trends Dashboard", type="filepath")
	trend_stats = gr.Markdown("Fetch papers first.")

	# ── TAB 8: BIBLIOGRAPHY ────────────────────────────
	with gr.Tab("Bibliography"):
	bib_style = gr.Radio(["APA","IEEE","Chicago","BibTeX"],
	value="APA", label="Style")
	btn_bib = gr.Button("Generate Bibliography", variant="primary")
	bib_out = gr.Markdown()
	bib_file = gr.File(label="Download")

	# ── TAB 9: FAVORITES ───────────────────────────────
	with gr.Tab("Favorites"):
	btn_show_fav = gr.Button("Show Favorites")
	favs_md = gr.Markdown("Press to show.")
	btn_export_fav = gr.Button("Export CSV", variant="secondary")
	fav_csv_file = gr.File(label="CSV File")

	# ── TAB 10: AUTO-FETCH ─────────────────────────────
	with gr.Tab("Auto-Fetch"):
	with gr.Row():
	auto_q = gr.Textbox(label="Topic",
	value="economic forecasting", scale=3)
	auto_cat = gr.Dropdown(label="Category",
	choices=list(CATEGORIES.keys()),
	value="📊 Economics", scale=2)
	auto_interval = gr.Slider(5,120,value=60,step=5,
	label="Every (min)",scale=1)
	with gr.Row():
	btn_start_auto = gr.Button("Start", variant="primary")
	btn_stop_auto = gr.Button("Stop", variant="stop")
	btn_refresh_log = gr.Button("Refresh Log")
	auto_status = gr.Markdown()
	auto_log_md = gr.Markdown("No log.")

	# ── TAB 11: ABOUT ──────────────────────────────────
	with gr.Tab("About"):
	gr.Markdown("""
	# 🔬 Scientific Paper Discovery
	### Version 7.4 — Intelligent Research Assistant

	---

	## 🧠 About This Tool

	Scientific Paper Discovery is an AI-powered academic research assistant that enables researchers, students, and scientists to discover, understand, and organize scientific literature with unprecedented ease. It combines state-of-the-art language models with multi-source academic APIs to deliver a seamless research experience.

	---

	## ⚙️ Core Technologies

	\| Component \| Technology \| Role \|
	\|---\|---\|---\|
	\| 🤖 Language Model \| Llama 3.3 70B via Groq API \| Paper explanation, comparison & chat \|
	\| 🔍 Semantic Search \| FAISS + MiniLM-L12-v2 \| Vector similarity search \|
	\| 📡 Source 1 \| arXiv API \| Preprints across all sciences \|
	\| 📚 Source 2 \| CrossRef API \| Peer-reviewed journal articles \|
	\| 📊 Citations \| Semantic Scholar (3-layer) \| Real citation counts \|
	\| 🎙️ Text-to-Speech \| gTTS \| Audio playback of explanations \|
	\| 📄 PDF Export \| ReportLab \| Professional PDF generation \|

	---

	## 🗂️ Feature Overview

	\| Tab \| Feature \| Description \|
	\|---\|---\|---\|
	\| 🔍 Browse \| Paper Fetching \| Fetch latest papers by topic & category \|
	\| 🌐 Global Search \| Title Search \| Find any paper by exact title (relevance-sorted) \|
	\| 📖 Explain \| AI Explanation \| Full structured explanation in Arabic or English \|
	\| ⚖️ Compare \| Paper Comparison \| Side-by-side AI comparison of two papers \|
	\| 💬 Chat \| Research Chat \| Ask questions about loaded papers \|
	\| 🌐 Overview \| Batch Summary \| Academic overview of all loaded papers \|
	\| 📊 Trends \| Analytics \| Citation, keyword & author trend charts \|
	\| 📚 Bibliography \| Citation Export \| APA, IEEE, Chicago, BibTeX formats \|
	\| ⭐ Favorites \| Saved Papers \| Bookmark & export favorite papers \|
	\| 🔔 Auto-Fetch \| Monitoring \| Automatic periodic paper discovery \|

	---

	## 🔎 Search Mode Guide

	\| Mode \| Algorithm \| Best For \|
	\|---\|---\|---\|
	\| Browse \| `sortBy=submittedDate` \| Discovering latest papers on a topic \|
	\| 🌐 Global Search \| `sortBy=relevance` + `ti:"..."` \| Finding a specific paper by title \|
	\| FAISS (internal) \| Cosine similarity \| Semantic search within loaded papers \|

	---

	## 📌 Citation Badges

	\| Badge \| Meaning \|
	\|---\|---\|
	\| 🥇 \| ≥ 1,000 citations — Highly influential \|
	\| 🏆 \| ≥ 100 citations — Well-cited \|
	\| ⭐ \| ≥ 10 citations — Notable \|
	\| 📄 \| < 10 citations — Recent or niche \|
	\| · \| 0 citations — New or unindexed \|

	---

	Built with ❤️ for the research community — v7.4
	""")

	# ── WIRING ──────────────────────────────────────────────
	FETCH_OUT = [papers_table_md, paper_selector, paper_sel2, cmp_a, cmp_b, status_bar]

	btn_fetch.click(gr_fetch,
	inputs=[t_query, t_category, t_max, t_days, t_source],
	outputs=FETCH_OUT)
	btn_filter.click(gr_filter_papers,
	inputs=[f_year_from, f_year_to, f_cit_min, f_cit_max, f_sort],
	outputs=[papers_table_md, paper_selector, status_bar])
	paper_selector.change(lambda x: [gr.update(value=x)]*3,
	inputs=[paper_selector],
	outputs=[paper_sel2, cmp_a, cmp_b])

	btn_search_in.click(gr_search_fetched, inputs=[search_in_box], outputs=[search_in_out])
	search_in_box.submit(gr_search_fetched, inputs=[search_in_box], outputs=[search_in_out])

	btn_gs.click(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])
	gs_query.submit(global_paper_search, inputs=[gs_query, gs_source, gs_max], outputs=[gs_out])

	btn_explain.click(gr_explain, inputs=[paper_sel2, lang_exp], outputs=[explanation_out])
	btn_fav.click(gr_save_fav, inputs=[paper_sel2], outputs=[fav_status])
	btn_audio.click(gr_audio, inputs=[explanation_out, lang_exp], outputs=[audio_out])
	btn_export_pdf.click(gr_export_pdf,
	inputs=[explanation_out, paper_sel2],
	outputs=[pdf_out, pdf_status])

	btn_compare.click(gr_compare, inputs=[cmp_a, cmp_b, lang_cmp], outputs=[compare_out])
	btn_overview.click(gr_overview, inputs=[t_query, lang_ov], outputs=[overview_out])
	btn_trends.click(gr_trends, outputs=[trend_chart, trend_stats])
	btn_bib.click(gr_bib, inputs=[bib_style], outputs=[bib_out, bib_file])

	btn_show_fav.click(gr_show_favs, outputs=[favs_md])
	btn_export_fav.click(gr_export_fav, outputs=[fav_csv_file])

	btn_start_auto.click(start_auto_fetch,
	inputs=[auto_q, auto_cat, auto_interval],
	outputs=[auto_status])
	btn_stop_auto.click(stop_auto_fetch, outputs=[auto_status])
	btn_refresh_log.click(get_auto_log, outputs=[auto_log_md])

	btn_send.click(gr_chat_fn, inputs=[chat_in, chatbot_ui], outputs=[chatbot_ui, chat_in])
	chat_in.submit(gr_chat_fn, inputs=[chat_in, chatbot_ui], outputs=[chatbot_ui, chat_in])
	btn_clear.click(lambda: ([], ""), outputs=[chatbot_ui, chat_in])

	if __name__ == "__main__":
	demo.launch()