| | import asyncio |
| | import json |
| | import os |
| | import time |
| | from typing import Any, Dict, Optional |
| |
|
| | import gradio as gr |
| | import httpx |
| |
|
| | from core.config import settings |
| | from core.rate_limit import check_and_increment_global_ai_cap |
| | from core.pdf_report import build_pdf |
| | from core.sources import pubchem, ntp, ctx as ctx_src, iarc, scholar, fema |
| |
|
| | |
| | try: |
| | from core.sources import cdc |
| | except Exception: |
| | cdc = None |
| |
|
| | |
| | |
| | |
| | SEARCH_CACHE: Dict[str, Dict[str, Any]] = {} |
| | AI_CACHE: Dict[str, str] = {} |
| |
|
| |
|
| | def json_pretty(obj: Any) -> str: |
| | try: |
| | return json.dumps(obj, indent=2, ensure_ascii=False, default=str) |
| | except Exception: |
| | return str(obj) |
| |
|
| |
|
| | def client() -> httpx.AsyncClient: |
| | return httpx.AsyncClient(headers={"user-agent": "toxrai-hf-demo"}) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def render_overview(data: Dict[str, Any]) -> str: |
| | q = data.get("query") or "" |
| | cas = data.get("cas_used") or "" |
| | lines = [ |
| | f"**Query:** `{q}`", |
| | f"**CAS used:** `{cas}`", |
| | ] |
| |
|
| | |
| | pub = data.get("pubchem") or {} |
| | if pub.get("ok") and pub.get("cid"): |
| | lines.append(f"**PubChem CID:** `{pub.get('cid')}`") |
| | ctx = data.get("ctx_genetox") or {} |
| | if ctx.get("ok") and ctx.get("dtxsid"): |
| | lines.append(f"**EPA CompTox DTXSID:** `{ctx.get('dtxsid')}`") |
| |
|
| | return "\n\n".join(lines) |
| |
|
| |
|
| | def render_pubchem_summary(pub: Dict[str, Any]) -> str: |
| | if not pub or not pub.get("ok"): |
| | err = pub.get("error") if isinstance(pub, dict) else "Unknown PubChem error" |
| | return f"PubChem unavailable: {err}" |
| |
|
| | cid = pub.get("cid") |
| | resolved_cas = pub.get("resolved_cas") or "-" |
| | props = pub.get("props") or {} |
| |
|
| | iupac_name = props.get("IUPACName") or props.get("iupac_name") or "-" |
| | formula = props.get("MolecularFormula") or "-" |
| | mw = props.get("MolecularWeight") |
| | mw_str = f"{mw}" if mw not in (None, "") else "-" |
| | smiles = props.get("CanonicalSMILES") or "-" |
| |
|
| | lines = [] |
| | lines.append(f"**CID:** `{cid}`") |
| | lines.append(f"**Resolved CAS (from synonyms):** `{resolved_cas}`") |
| | lines.append(f"**IUPAC/Title:** {iupac_name}") |
| | lines.append("") |
| | lines.append(f"**Molecular Formula:** `{formula}`") |
| | lines.append(f"**Molecular Weight:** `{mw_str}`") |
| | lines.append(f"**Canonical SMILES:** `{smiles}`") |
| |
|
| | structure_png = pub.get("structure_png") |
| | if structure_png: |
| | lines.append("") |
| | lines.append("**Structure**") |
| | lines.append(f"") |
| |
|
| | url = pub.get("url") |
| | if url: |
| | lines.append("") |
| | lines.append(f"[Open PubChem]({url})") |
| |
|
| | hazards = pub.get("hazards") or [] |
| | if hazards: |
| | lines.append("") |
| | lines.append("### Safety / Hazard Information") |
| | |
| | for h in hazards: |
| | name = (h or {}).get("name") or "Hazard" |
| | text = (h or {}).get("text") or "" |
| | if not text: |
| | continue |
| | lines.append(f"**{name}:** {text}") |
| | lines.append("") |
| |
|
| | return "\n".join(lines).rstrip() + "\n" |
| |
|
| |
|
| | def render_ctx_summary(ctx: Dict[str, Any]) -> str: |
| | if not ctx or not ctx.get("ok"): |
| | search_url = ctx.get("dashboard_search") if isinstance(ctx, dict) else None |
| | err = ctx.get("error") if isinstance(ctx, dict) else "Unknown CTX error" |
| | if search_url: |
| | return f"{err}\n\n[Open CompTox Dashboard search]({search_url})" |
| | return str(err) |
| |
|
| | dtxsid = ctx.get("dtxsid") |
| | dash = ctx.get("dashboard_url") |
| | summary = ctx.get("summary") |
| |
|
| | lines = [] |
| | if dtxsid: |
| | lines.append(f"**DTXSID:** `{dtxsid}`") |
| | if dash: |
| | lines.append(f"[Open CompTox Dashboard]({dash})") |
| |
|
| | |
| | if isinstance(summary, dict): |
| | interesting_keys = [ |
| | "geneTox", |
| | "genetox", |
| | "overall", |
| | "summary", |
| | "conclusion", |
| | "call", |
| | "result", |
| | "assessment", |
| | ] |
| | picked = {} |
| | for k in summary.keys(): |
| | lk = k.lower() |
| | if any(tok in lk for tok in interesting_keys): |
| | picked[k] = summary[k] |
| | if not picked: |
| | |
| | for k in list(summary.keys())[:8]: |
| | picked[k] = summary[k] |
| |
|
| | lines.append("") |
| | lines.append("```json") |
| | txt = json_pretty(picked) |
| | |
| | if len(txt) > 6000: |
| | txt = txt[:6000] + "\n... (truncated)" |
| | lines.append(txt) |
| | lines.append("```") |
| |
|
| | return "\n".join(lines) |
| |
|
| |
|
| | def render_ntp_summary(ntp_res: Dict[str, Any]) -> str: |
| | if not ntp_res or not ntp_res.get("ok"): |
| | err = ntp_res.get("error") if isinstance(ntp_res, dict) else "Unknown NTP error" |
| | return f"NTP Technical Reports unavailable: {err}" |
| |
|
| | items = ntp_res.get("items") or [] |
| | if not items: |
| | return "No NTP Technical Reports found for this CAS." |
| |
|
| | lines = [] |
| | for it in items: |
| | num = it.get("tr") or it.get("num") or "" |
| | title = it.get("title") or "Report" |
| | url = it.get("report_page") or it.get("url") or "" |
| | if url: |
| | lines.append(f"- **TR-{num}** [{title}]({url})") |
| | else: |
| | lines.append(f"- **TR-{num}** {title}") |
| | return "\n".join(lines) |
| |
|
| |
|
| | def render_iarc_block(iarc_res: Dict[str, Any]) -> str: |
| | if not iarc_res or not iarc_res.get("ok"): |
| | return "IARC link unavailable." |
| | url = iarc_res.get("url") |
| | if url: |
| | return f"[Search IARC Monographs (NCBI Bookshelf)]({url})" |
| |
|
| | results = iarc_res.get("results") if isinstance(iarc_res, dict) else None |
| | if isinstance(results, list) and results: |
| | lines = [] |
| | for it in results: |
| | if not isinstance(it, dict): |
| | continue |
| | title = it.get("title") or "IARC Monographs" |
| | link = it.get("url") |
| | year = it.get("year") |
| | suffix = f" ({year})" if year else "" |
| | if link: |
| | lines.append(f"- [{title}]({link}){suffix}") |
| | else: |
| | lines.append(f"- {title}{suffix}") |
| | return "\n".join(lines) if lines else "IARC link unavailable." |
| |
|
| | return "IARC link unavailable." |
| |
|
| |
|
| | def render_scholar_block(sch_res: Dict[str, Any]) -> str: |
| | if not sch_res or not sch_res.get("ok"): |
| | return "Google Scholar link unavailable." |
| | url = sch_res.get("url") |
| | return f"[Open Google Scholar search]({url})" if url else "Google Scholar link unavailable." |
| |
|
| |
|
| | def render_fema_block(fema_res: Dict[str, Any]) -> str: |
| | if not fema_res or not fema_res.get("ok"): |
| | err = fema_res.get("error") if isinstance(fema_res, dict) else "FEMA link unavailable." |
| | return str(err) |
| | cas_url = fema_res.get("cas_url") |
| | name_url = fema_res.get("name_url") |
| | combo_url = fema_res.get("combo_url") |
| | alt = fema_res.get("alt_url") |
| | search_api = fema_res.get("search_api_url") |
| | if not cas_url and not name_url and not combo_url and not alt and not search_api: |
| | return "FEMA link unavailable." |
| | lines = ["A FEMA risk assessment for this chemical is available:"] |
| | if cas_url: |
| | lines.append(f"- [Search by CAS]({cas_url})") |
| | if name_url: |
| | lines.append(f"- [Search by Chemical Name]({name_url})") |
| | if combo_url: |
| | lines.append(f"- [Search by CAS + Name]({combo_url})") |
| | if search_api: |
| | lines.append(f"- [Generic FEMA search (alt)]({search_api})") |
| | if alt: |
| | lines.append(f"- [Generic FEMA search]({alt})") |
| | return "\n".join(lines) |
| |
|
| |
|
| | def render_cdc_block(cdc_res: Any) -> str: |
| | if not cdc_res: |
| | return "No CDC ToxProfiles match." |
| | |
| | if isinstance(cdc_res, dict): |
| | url = cdc_res.get("url") |
| | name = cdc_res.get("name") or "CDC ToxProfile" |
| | return f"[{name}]({url})" if url else name |
| | if isinstance(cdc_res, list): |
| | lines = [] |
| | for it in cdc_res: |
| | if not isinstance(it, dict): |
| | continue |
| | name = it.get("name") or "CDC ToxProfile" |
| | url = it.get("url") |
| | lines.append(f"- [{name}]({url})" if url else f"- {name}") |
| | return "\n".join(lines) if lines else "No CDC ToxProfiles match." |
| | return str(cdc_res) |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | async def run_search(query: str) -> Dict[str, Any]: |
| | q = (query or "").strip() |
| | if not q: |
| | raise gr.Error("Enter a CAS number (preferred) or chemical name.") |
| |
|
| | cache_key = f"search::{q.lower()}" |
| | if cache_key in SEARCH_CACHE: |
| | return SEARCH_CACHE[cache_key] |
| |
|
| | async with client() as http: |
| | |
| | pub = await pubchem.pubchem_by_query(q, http) |
| |
|
| | cas = q |
| | if not pubchem.is_cas(cas): |
| | cas = pub.get("resolved_cas") or q |
| |
|
| | |
| | pub_dtxsid = pub.get("dtxsid") if isinstance(pub, dict) else None |
| | ctx_query = pub_dtxsid or q |
| | ctx_task = ctx_src.fetch_ctx_genetox(ctx_query, http) if ctx_query else asyncio.sleep(0, result={"ok": False}) |
| | ntp_task = ntp.search_technical_reports(cas, http, limit=8) |
| |
|
| | ctx_res, ntp_res = await asyncio.gather(ctx_task, ntp_task) |
| |
|
| | out: Dict[str, Any] = { |
| | "query": q, |
| | "cas_used": cas, |
| | "pubchem": pub, |
| | "ctx_genetox": ctx_res, |
| | "ntp_technical_reports": ntp_res, |
| | "iarc_monographs": iarc.bookshelf_link(cas), |
| | "google_scholar": {"ok": True, "url": scholar.scholar_link(cas)}, |
| | "fema": fema.fema_link(cas if pubchem.is_cas(cas) else "", q), |
| | } |
| |
|
| | |
| | if cdc is not None: |
| | try: |
| | |
| | if hasattr(cdc, "lookup"): |
| | out["cdc_toxprofiles"] = cdc.lookup(cas) |
| | elif hasattr(cdc, "search"): |
| | out["cdc_toxprofiles"] = cdc.search(cas) |
| | elif hasattr(cdc, "toxprofile_for"): |
| | out["cdc_toxprofiles"] = cdc.toxprofile_for(cas) |
| | else: |
| | out["cdc_toxprofiles"] = None |
| | except Exception: |
| | out["cdc_toxprofiles"] = None |
| |
|
| | SEARCH_CACHE[cache_key] = out |
| | return out |
| |
|
| |
|
| | def _prune_for_prompt(obj: Any, max_chars: int) -> str: |
| | txt = json_pretty(obj) |
| | if len(txt) <= max_chars: |
| | return txt |
| | return txt[:max_chars] + "\n... (truncated)" |
| |
|
| |
|
| | def build_prompt(data: Dict[str, Any]) -> str: |
| | """Build a prompt that will not exceed model context. |
| | |
| | Key change vs earlier version: DO NOT dump full raw JSON from all sources. |
| | """ |
| |
|
| | pub = data.get("pubchem") or {} |
| | props = (pub.get("props") or {}) if isinstance(pub, dict) else {} |
| | hazards = (pub.get("hazards") or []) if isinstance(pub, dict) else [] |
| |
|
| | prompt_obj = { |
| | "query": data.get("query"), |
| | "cas_used": data.get("cas_used"), |
| | "pubchem": { |
| | "cid": pub.get("cid"), |
| | "resolved_cas": pub.get("resolved_cas"), |
| | "iupac": props.get("IUPACName") or props.get("iupac_name"), |
| | "formula": props.get("MolecularFormula"), |
| | "molecular_weight": props.get("MolecularWeight"), |
| | "canonical_smiles": props.get("CanonicalSMILES"), |
| | "hazards": hazards[:10], |
| | }, |
| | "ctx_genetox": { |
| | "ok": (data.get("ctx_genetox") or {}).get("ok"), |
| | "dtxsid": (data.get("ctx_genetox") or {}).get("dtxsid"), |
| | "summary": (data.get("ctx_genetox") or {}).get("summary"), |
| | }, |
| | "ntp_technical_reports": (data.get("ntp_technical_reports") or {}).get("items", []), |
| | "cdc_toxprofiles": data.get("cdc_toxprofiles"), |
| | } |
| |
|
| | body = _prune_for_prompt(prompt_obj, max_chars=12000) |
| |
|
| | return ( |
| | "You are a toxicology regulatory assistant. " |
| | "Using ONLY the evidence JSON below, write a concise weight-of-evidence summary focused on mutagenicity/genotoxicity. " |
| | "If evidence is conflicting or absent, say so explicitly. " |
| | "Cite which source each statement comes from (PubChem hazards, CTX genetox summary, NTP TR titles, CDC ToxProfiles).\n\n" |
| | "EVIDENCE_JSON:\n" |
| | + body |
| | ) |
| |
|
| |
|
| | def do_search(query: str): |
| | data = asyncio.run(run_search(query)) |
| |
|
| | overview_md_text = render_overview(data) |
| | pubchem_md_text = render_pubchem_summary(data.get("pubchem", {})) |
| | ctx_md_text = render_ctx_summary(data.get("ctx_genetox", {})) |
| | ntp_md_text = render_ntp_summary(data.get("ntp_technical_reports", {})) |
| | iarc_md_text = render_iarc_block(data.get("iarc_monographs", {})) |
| | scholar_md_text = render_scholar_block(data.get("google_scholar", {})) |
| | fema_md_text = render_fema_block(data.get("fema", {})) |
| |
|
| | cdc_md_text = "" |
| | if "cdc_toxprofiles" in data: |
| | cdc_md_text = render_cdc_block(data.get("cdc_toxprofiles")) |
| |
|
| | raw_pubchem_json = json_pretty(data.get("pubchem", {})) |
| | raw_ctx_json = json_pretty(data.get("ctx_genetox", {})) |
| | raw_ntp_json = json_pretty(data.get("ntp_technical_reports", {})) |
| | raw_iarc_json = json_pretty(data.get("iarc_monographs", {})) |
| | raw_scholar_json = json_pretty(data.get("google_scholar", {})) |
| | raw_fema_json = json_pretty(data.get("fema", {})) |
| |
|
| | |
| | |
| | return ( |
| | data, |
| | overview_md_text, |
| | pubchem_md_text, |
| | cdc_md_text, |
| | ctx_md_text, |
| | ntp_md_text, |
| | iarc_md_text, |
| | scholar_md_text, |
| | fema_md_text, |
| | raw_pubchem_json, |
| | raw_ctx_json, |
| | raw_ntp_json, |
| | raw_iarc_json, |
| | raw_scholar_json, |
| | raw_fema_json, |
| | "", |
| | ) |
| |
|
| |
|
| | def generate_ai(data: dict): |
| | if not data: |
| | raise gr.Error("Run a search first.") |
| |
|
| | cas = data.get("cas_used") or data.get("query") or "" |
| | cache_key = f"ai::{cas}" |
| | if cache_key in AI_CACHE: |
| | return AI_CACHE[cache_key] |
| |
|
| | allowed, info = check_and_increment_global_ai_cap() |
| | if not allowed: |
| | return f"AI Summary capacity reached for today (limit {info.get('limit')}). Please try again tomorrow." |
| |
|
| | from core.sources.ai_summary import generate_ai_summary |
| |
|
| | resp = generate_ai_summary(build_prompt(data)) |
| | if not resp.get("ok"): |
| | return f"**AI summary unavailable:** {resp.get('error')}" |
| |
|
| | text = resp.get("text") or "" |
| | AI_CACHE[cache_key] = text |
| | return text |
| |
|
| |
|
| | def download_report(data: dict, ai_text: str): |
| | if not data: |
| | raise gr.Error("Run a search first.") |
| |
|
| | cas = data.get("cas_used") or data.get("query") or "unknown" |
| | pdf_path, json_path = build_pdf(cas, evidence=data, ai_summary=ai_text if ai_text else None) |
| | return pdf_path, json_path |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | with gr.Blocks(title="ToxRAI (HF Demo)") as demo: |
| | gr.Markdown("# 🧪 ToxRAI — Demo (CAS-first)") |
| | gr.Markdown( |
| | f"Public demo • AI summaries/day global cap: **{settings.max_ai_summaries_per_day}** • Cache TTL: **{settings.cache_ttl_seconds}s**" |
| | ) |
| |
|
| | with gr.Tabs(): |
| | with gr.Tab("Search"): |
| | state = gr.State(None) |
| |
|
| | with gr.Row(): |
| | query_in = gr.Textbox( |
| | label="CAS (preferred) or Chemical name", |
| | placeholder="e.g., 80-05-7 or bisphenol A", |
| | scale=4, |
| | ) |
| | search_btn = gr.Button("Search", variant="primary", scale=1) |
| |
|
| | overview_md = gr.Markdown() |
| |
|
| | with gr.Accordion("PubChem (summary)", open=False): |
| | pubchem_md = gr.Markdown() |
| |
|
| | |
| | with gr.Accordion("CDC ToxProfiles", open=False): |
| | cdc_md = gr.Markdown() |
| |
|
| | with gr.Accordion("EPA CompTox (CTX) — Genetox (full fields)", open=False): |
| | ctx_md = gr.Markdown() |
| |
|
| | with gr.Accordion("NTP Technical Reports", open=False): |
| | ntp_md = gr.Markdown() |
| |
|
| | with gr.Accordion("IARC Monographs", open=False): |
| | iarc_md = gr.Markdown() |
| |
|
| | with gr.Accordion("Google Scholar", open=False): |
| | scholar_md = gr.Markdown() |
| |
|
| | with gr.Accordion("FEMA Risk Assessment", open=False): |
| | fema_md = gr.Markdown() |
| |
|
| | with gr.Accordion("Raw outputs (all sources)", open=False): |
| | raw_pubchem = gr.Code(label="PubChem (raw)", language="json") |
| | raw_ctx = gr.Code(label="CTX Genetox (raw)", language="json") |
| | raw_ntp = gr.Code(label="NTP TR (raw)", language="json") |
| | raw_iarc = gr.Code(label="IARC (raw)", language="json") |
| | raw_scholar = gr.Code(label="Scholar link (raw)", language="json") |
| | raw_fema = gr.Code(label="FEMA (raw)", language="json") |
| |
|
| | with gr.Row(): |
| | ai_btn = gr.Button("Generate AI Summary (GPT-4o)", variant="secondary") |
| | pdf_btn = gr.Button("Build PDF + JSON") |
| |
|
| | ai_out = gr.Markdown() |
| |
|
| | with gr.Row(): |
| | pdf_file = gr.File(label="Download PDF") |
| | json_file = gr.File(label="Download JSON evidence packet") |
| |
|
| | search_btn.click( |
| | fn=do_search, |
| | inputs=[query_in], |
| | outputs=[ |
| | state, |
| | overview_md, |
| | pubchem_md, |
| | cdc_md, |
| | ctx_md, |
| | ntp_md, |
| | iarc_md, |
| | scholar_md, |
| | fema_md, |
| | raw_pubchem, |
| | raw_ctx, |
| | raw_ntp, |
| | raw_iarc, |
| | raw_scholar, |
| | raw_fema, |
| | ai_out, |
| | ], |
| | ) |
| |
|
| | query_in.submit( |
| | fn=do_search, |
| | inputs=[query_in], |
| | outputs=[ |
| | state, |
| | overview_md, |
| | pubchem_md, |
| | cdc_md, |
| | ctx_md, |
| | ntp_md, |
| | iarc_md, |
| | scholar_md, |
| | fema_md, |
| | raw_pubchem, |
| | raw_ctx, |
| | raw_ntp, |
| | raw_iarc, |
| | raw_scholar, |
| | raw_fema, |
| | ai_out, |
| | ], |
| | ) |
| |
|
| | ai_btn.click(fn=generate_ai, inputs=[state], outputs=[ai_out]) |
| | pdf_btn.click(fn=download_report, inputs=[state, ai_out], outputs=[pdf_file, json_file]) |
| |
|
| |
|
| | demo.queue(default_concurrency_limit=6) |
| | app = demo |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |
| |
|