| |
|
| |
|
| | import re
|
| | import requests
|
| | import streamlit as st
|
| |
|
| | st.set_page_config(page_title="Brain Trials Finder", layout="wide")
|
| |
|
| | STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
| |
|
| | DEFAULT_DIAG_TERMS = {
|
| | "Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"],
|
| | "Diffuse midline glioma": ["diffuse midline glioma", "DMG"],
|
| | "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
| | }
|
| |
|
| |
|
| | def ensure_list(v):
|
| | if isinstance(v, list):
|
| | return v
|
| | if v is None:
|
| | return []
|
| | return [v]
|
| |
|
| |
|
| | def mentions(text: str, needle: str) -> bool:
|
| | if not text:
|
| | return False
|
| | return needle.lower() in text.lower()
|
| |
|
| |
|
| | def _to_int(v):
|
| | try:
|
| | if v is None:
|
| | return None
|
| | if isinstance(v, (int, float)):
|
| | return int(v)
|
| |
|
| | m = re.search(r"(\d+)", str(v))
|
| | return int(m.group(1)) if m else None
|
| | except Exception:
|
| | return None
|
| |
|
| |
|
| | def build_terms(diagnosis: str, keywords: str):
|
| | base = DEFAULT_DIAG_TERMS.get(diagnosis, [])
|
| | extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
| | terms = list(dict.fromkeys([*base, *extra]))
|
| | return terms or ["brain tumor"]
|
| |
|
| |
|
| | def build_expr(diagnosis: str, keywords: str) -> str:
|
| | terms = build_terms(diagnosis, keywords)
|
| |
|
| | return " OR ".join(f'"{t}"' if " " in t else t for t in terms)
|
| |
|
| |
|
| | @st.cache_data(ttl=3600)
|
| | def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5):
|
| | """Return a list of study dicts from ClinicalTrials.gov v2."""
|
| | url = "https://clinicaltrials.gov/api/v2/studies"
|
| | all_studies = []
|
| | token = None
|
| | for _ in range(max_pages):
|
| | params = {
|
| | "query.term": expr,
|
| | "pageSize": page_size,
|
| | "filter.overallStatus": ",".join(statuses),
|
| | }
|
| | if token:
|
| | params["pageToken"] = token
|
| | r = requests.get(url, params=params, timeout=30)
|
| | r.raise_for_status()
|
| | data = r.json() or {}
|
| | studies = data.get("studies") or []
|
| | all_studies.extend(studies)
|
| | token = data.get("nextPageToken")
|
| | if not token:
|
| | break
|
| | return all_studies
|
| |
|
| |
|
| | def extract_row(study: dict) -> dict:
|
| | ps = (study.get("protocolSection") or {})
|
| | idm = (ps.get("identificationModule") or {})
|
| | scm = (ps.get("statusModule") or {})
|
| | dsm = (ps.get("designModule") or {})
|
| | cdnm = (ps.get("conditionsModule") or {})
|
| | slm = (ps.get("sponsorCollaboratorsModule") or {})
|
| |
|
| | title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
| | nct = (idm.get("nctId") or "").strip()
|
| |
|
| | status_raw = (scm.get("overallStatus") or "").strip()
|
| | status = status_raw.replace("_", " ").title() if status_raw else ""
|
| |
|
| | phases_list = ensure_list(dsm.get("phases"))
|
| |
|
| | def fmt_phase(p: str) -> str:
|
| | p = str(p or "").upper()
|
| | if p.startswith("PHASE"):
|
| | pnum = p.replace("PHASE", "").replace("_", "/").strip()
|
| | pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3")
|
| | return f"Phase {pnum}" if pnum else "Phase"
|
| | return p.title() if p else ""
|
| | phases = ", ".join([fmt_phase(p) for p in phases_list if p])
|
| |
|
| | conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
| |
|
| | sponsor = ""
|
| | lead = slm.get("leadSponsor") or {}
|
| | if isinstance(lead, dict):
|
| | sponsor = (lead.get("name") or "").strip()
|
| |
|
| | return {
|
| | "title": title,
|
| | "nct": nct,
|
| | "status": status,
|
| | "phases": phases,
|
| | "conditions": conditions,
|
| | "sponsor": sponsor,
|
| | }
|
| |
|
| |
|
| | def score_trial(study: dict, intake: dict):
|
| | ps = (study.get("protocolSection") or {})
|
| | scm = (ps.get("statusModule") or {})
|
| | dsm = (ps.get("designModule") or {})
|
| | elm = (ps.get("eligibilityModule") or {})
|
| | idm = (ps.get("identificationModule") or {})
|
| |
|
| | s = 0
|
| | reasons = []
|
| |
|
| | status = (scm.get("overallStatus") or "")
|
| | if status == "RECRUITING":
|
| | s += 15
|
| | elif status == "NOT_YET_RECRUITING":
|
| | s += 8
|
| |
|
| | phases = ensure_list(dsm.get("phases"))
|
| | if any("PHASE3" in str(p).upper() for p in phases):
|
| | s += 12
|
| | if any("PHASE2" in str(p).upper() for p in phases):
|
| | s += 8
|
| |
|
| |
|
| | min_age_raw = elm.get("minimumAge")
|
| | max_age_raw = elm.get("maximumAge")
|
| | min_age = _to_int(min_age_raw)
|
| | max_age = _to_int(max_age_raw)
|
| | age = int(intake.get("age") or 0)
|
| | if min_age is not None and age < min_age:
|
| | reasons.append(f"Age below minimum ({min_age_raw}).")
|
| | s -= 30
|
| | if max_age is not None and age > max_age:
|
| | reasons.append(f"Age above maximum ({max_age_raw}).")
|
| | s -= 30
|
| |
|
| |
|
| | crit = elm.get("eligibilityCriteria") or ""
|
| | kps = int(intake.get("kps") or 0)
|
| | if mentions(crit, "Karnofsky") and kps < 70:
|
| | s -= 10
|
| | reasons.append("Requires KPS ≥70.")
|
| |
|
| |
|
| | title = (idm.get("briefTitle") or idm.get("officialTitle") or "")
|
| | summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or ""
|
| | keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()]
|
| | blob = " ".join([title, summary])
|
| | for kw in keywords:
|
| | if mentions(blob, kw):
|
| | s += 2
|
| |
|
| | return s, reasons
|
| |
|
| |
|
| |
|
| | st.title("Brain Cancer Trials Finder (MVP)")
|
| |
|
| | with st.sidebar:
|
| | diagnosis = st.selectbox(
|
| | "Diagnosis",
|
| | ["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"],
|
| | index=0,
|
| | )
|
| | setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1)
|
| | age = st.number_input("Age", min_value=1, max_value=100, value=55)
|
| | kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80)
|
| | prior_bev = st.checkbox("Prior bevacizumab", value=False)
|
| | keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device")
|
| | do_search = st.button("Search", type="primary")
|
| |
|
| |
|
| | if do_search or "did_first" not in st.session_state:
|
| | st.session_state["did_first"] = True
|
| | expr = build_expr(diagnosis, keywords)
|
| | studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5)
|
| |
|
| | intake = {
|
| | "age": age,
|
| | "kps": kps,
|
| | "prior_bev": prior_bev,
|
| | "setting": setting,
|
| | "keywords": keywords,
|
| | "diagnosis": diagnosis,
|
| | }
|
| |
|
| | rows = []
|
| | for sdict in studies:
|
| | try:
|
| | sc, reasons = score_trial(sdict, intake)
|
| | row = extract_row(sdict)
|
| | nct = row.get("nct") or ""
|
| | url = f"https://clinicaltrials.gov/study/{nct}" if nct else ""
|
| | rows.append(
|
| | (
|
| | sc,
|
| | row.get("title", ""),
|
| | nct,
|
| | row.get("status", ""),
|
| | row.get("phases", ""),
|
| | row.get("conditions", ""),
|
| | row.get("sponsor", ""),
|
| | reasons,
|
| | url,
|
| | sdict,
|
| | )
|
| | )
|
| | except Exception:
|
| | continue
|
| |
|
| | rows = sorted(rows, key=lambda x: -x[0])[:50]
|
| |
|
| | st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.")
|
| |
|
| | for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows:
|
| | with st.container(border=True):
|
| | if url:
|
| | st.markdown(f"**[{title}]({url})**")
|
| | else:
|
| | st.markdown(f"**{title}**")
|
| | meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}"
|
| | st.write(meta)
|
| | if conds:
|
| | st.write(f"Conditions: {conds}")
|
| |
|
| | with st.expander("Contacts and Locations"):
|
| | ps = (study.get("protocolSection") or {})
|
| | clm = (ps.get("contactsLocationsModule") or {})
|
| |
|
| | centrals = ensure_list(clm.get("centralContacts"))
|
| | if centrals:
|
| | st.write("Central Contacts:")
|
| | for c in centrals:
|
| | parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")]
|
| | st.write(" - " + " | ".join([p for p in parts if p]))
|
| |
|
| | officials = ensure_list(clm.get("overallOfficials"))
|
| | if officials:
|
| | st.write("Overall Officials:")
|
| | for o in officials:
|
| | parts = [o.get("name"), o.get("role"), o.get("affiliation")]
|
| | st.write(" - " + " | ".join([p for p in parts if p]))
|
| |
|
| | locs = ensure_list(clm.get("locations"))
|
| | if locs:
|
| | st.write("Locations:")
|
| | for L in locs:
|
| | facility = (L.get("locationFacility") or "").strip()
|
| | city = (L.get("locationCity") or "").strip()
|
| | state = (L.get("locationState") or "").strip()
|
| | country = (L.get("locationCountry") or "").strip()
|
| | status_l = (L.get("status") or "").strip()
|
| | site_line = ", ".join([p for p in [facility, city, state, country] if p])
|
| | if site_line:
|
| | st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else ""))
|
| | lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
|
| | for lc in lcontacts:
|
| | parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")]
|
| | parts = [p for p in parts if p]
|
| | if parts:
|
| | st.write(" • " + " | ".join(parts))
|
| |
|
| | if reasons:
|
| | with st.expander("Why this score?"):
|
| | for r in reasons:
|
| | st.write(f"- {r}")
|
| |
|