Spaces:
Sleeping
Sleeping
File size: 9,178 Bytes
57d5f2c e2c04b6 6f23dc2 fa94666 875ce0d fa94666 57d5f2c 6f23dc2 1729902 57d5f2c 6f23dc2 875ce0d 47975fb fa94666 6f23dc2 7b0bd9a 57d5f2c 6f23dc2 57d5f2c 6f23dc2 57d5f2c 6f23dc2 fa94666 6f23dc2 5d480b1 875ce0d 7b0bd9a 57d5f2c fa94666 6f23dc2 fa94666 7b0bd9a 57d5f2c 6f23dc2 57d5f2c 6f23dc2 57d5f2c fa94666 6f23dc2 fa94666 7b0bd9a 57d5f2c 6f23dc2 57d5f2c 875ce0d 6f23dc2 875ce0d 6f23dc2 57d5f2c 6f23dc2 57d5f2c 6f23dc2 875ce0d 6f23dc2 875ce0d fa94666 6f23dc2 fa94666 57d5f2c 6f23dc2 875ce0d 6f23dc2 57d5f2c fa94666 6f23dc2 875ce0d 6f23dc2 875ce0d 6f23dc2 57d5f2c 875ce0d fa94666 6f23dc2 6ee935a bc27b65 6f23dc2 57d5f2c 6f23dc2 875ce0d fa94666 57d5f2c 6f23dc2 875ce0d 6f23dc2 57d5f2c 6f23dc2 875ce0d 7b0bd9a fa94666 6f23dc2 fa94666 6f23dc2 5d480b1 1729902 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 |
"""
GENESIS-AI β Multimodal Synthetic Biology Research Pipeline
Coordinates ontology expansion, literature review, AI summarization, image generation,
funding intelligence, safety/biosecurity checks, and report export.
"""
import os
import re
from datetime import datetime
from typing import Dict, Any, List
# API clients
from genesis.api_clients.pubmed_api import search_pubmed_literature
from genesis.api_clients.bioportal_api import expand_with_bioportal
from genesis.api_clients.umls_api import expand_with_umls
from genesis.api_clients.chembl_api import get_molecule_data
from genesis.structures import fetch_structures_for_terms
# Core logic providers
from genesis.providers import (
run_deepseek_summary,
run_gemini_summary,
run_openai_summary,
run_gemini_image,
run_openai_image,
run_hf_image,
narrate_text_elevenlabs
)
# Utility modules
from genesis.utils.pdf_export import export_report_to_pdf
from genesis.utils.graph_tools import write_topic_and_papers
# Visualizations
from genesis.visualization import generate_pathway_graph, generate_funding_network
# Data sources
from genesis.funding import fetch_funding_data
from genesis.trials import fetch_clinical_trials
from genesis.biosecurity import analyze_biosecurity_risks
from genesis.regulation import fetch_regulatory_info
from genesis.safety import analyze_safety_concerns
from genesis.ontology import merge_ontology_terms
# Environment vars
ELEVEN_LABS_API_KEY = os.getenv("ELEVEN_LABS_API_KEY")
NEO4J_URI = os.getenv("NEO4J_URI")
SYNBIO_MODE = True # Bias towards synthetic biology context
# Demo queries
DEMO_QUERIES = [
"CRISPR living therapeutics in clinical trials since 2020",
"AI-designed enzymes for plastic degradation β literature + pathways",
"Synthetic biology startups in oncology β funding map",
"Metabolic pathway for artemisinin biosynthesis in yeast",
"Oncolytic virus engineering β biosecurity risk analysis"
]
# ---------- Helper Functions ----------
def extract_citations(text: str) -> List[Dict[str, str]]:
"""Extract DOI, PMID, and URLs from text."""
citations = []
doi_pattern = r"(10\.\d{4,9}/[-._;()/:A-Z0-9]+)"
pmid_pattern = r"PMID:\s*(\d+)"
url_pattern = r"(https?://[^\s)]+)"
for match in re.finditer(doi_pattern, text, re.IGNORECASE):
citations.append({"type": "DOI", "id": match.group(1), "url": f"https://doi.org/{match.group(1)}"})
for match in re.finditer(pmid_pattern, text, re.IGNORECASE):
citations.append({"type": "PMID", "id": match.group(1), "url": f"https://pubmed.ncbi.nlm.nih.gov/{match.group(1)}/"})
for match in re.finditer(url_pattern, text, re.IGNORECASE):
if not any(c["url"] == match.group(1) for c in citations):
citations.append({"type": "URL", "id": "", "url": match.group(1)})
return citations
def inject_synbio_context(query: str, expanded_terms: List[str]) -> str:
"""Inject synthetic biology expertise into the prompt."""
context = (
"You are an expert in synthetic biology, biosecurity, and regulatory affairs. "
"Provide literature review, molecular insights, market trends, and policy implications. "
"Focus on CRISPR, metabolic engineering, living therapeutics, protein design, biosensors, and biosecurity. "
"Be concise, factual, and provide citations."
)
return f"{context}\n\nQuery: {query}\nExpanded terms: {', '.join(expanded_terms)}"
# ---------- Main Pipeline ----------
def multimodal_research(query: str, narration: bool = False, generate_pdf: bool = False) -> Dict[str, Any]:
"""Main research pipeline for GENESIS-AI."""
print(f"[Pipeline] Starting research for query: {query}")
# 1 β Expand query with ontology
expanded_terms = merge_ontology_terms(
query,
expand_with_umls(query),
expand_with_bioportal(query)
)
print(f"[Pipeline] Expanded terms: {expanded_terms}")
# 2 β Enrich query with domain-specific context
enriched_query = inject_synbio_context(query, expanded_terms) if SYNBIO_MODE else query
# 3 β Summarization (fallback order)
summary = None
for summarizer in [run_deepseek_summary, run_gemini_summary, run_openai_summary]:
try:
summary = summarizer(enriched_query)
if summary:
print(f"[Pipeline] Summary generated by {summarizer.__name__}")
break
except Exception as e:
print(f"[Pipeline] {summarizer.__name__} failed: {e}")
if not summary:
summary = "No summary generated β please refine your query."
# 4 β Extract citations, fallback to PubMed if none found
citations = extract_citations(summary)
if not citations:
print("[Pipeline] No citations in summary, querying PubMed...")
citations = search_pubmed_literature(query)
# 5 β Fetch related structures (NCBI, ChEMBL)
structures = fetch_structures_for_terms(expanded_terms)
# 6 β Image generation with fallback
image_url = None
for img_fn in [run_gemini_image, run_openai_image, run_hf_image]:
try:
image_url = img_fn(query)
if image_url:
print(f"[Pipeline] Image generated by {img_fn.__name__}")
break
except Exception as e:
print(f"[Pipeline] {img_fn.__name__} failed: {e}")
# 7 β Funding, trials, regulations, safety, biosecurity
funding_data = fetch_funding_data(query) or []
trial_data = fetch_clinical_trials(query) or []
regulation_data = fetch_regulatory_info(query) or []
safety_data = analyze_safety_concerns(query) or []
biosecurity_data = analyze_biosecurity_risks(query) or []
# 8 β Graph visualizations
pathway_graph = generate_pathway_graph(query) if expanded_terms else None
funding_graph = generate_funding_network(query) if funding_data else None
# 9 β Save to Neo4j if configured
if NEO4J_URI:
try:
write_topic_and_papers(query, citations, expanded_terms)
print("[Pipeline] Data saved to Neo4j")
except Exception as e:
print(f"[Pipeline] Neo4j save failed: {e}")
# 10 β Narration (optional)
audio_url = None
if narration and ELEVEN_LABS_API_KEY:
try:
audio_url = narrate_text_elevenlabs(summary)
print("[Pipeline] Narration generated")
except Exception as e:
print(f"[Pipeline] Narration failed: {e}")
# 11 β PDF export (optional)
pdf_path = None
if generate_pdf:
try:
pdf_path = export_report_to_pdf(query, summary, citations, structures, funding_data, regulation_data)
print("[Pipeline] PDF report generated")
except Exception as e:
print(f"[Pipeline] PDF generation failed: {e}")
return {
"timestamp": datetime.utcnow().isoformat(),
"query": query,
"expanded_terms": expanded_terms,
"summary": summary,
"citations": citations,
"structures": structures,
"image_url": image_url,
"funding_data": funding_data,
"trial_data": trial_data,
"regulation_data": regulation_data,
"safety_data": safety_data,
"biosecurity_data": biosecurity_data,
"pathway_graph": pathway_graph,
"funding_graph": funding_graph,
"audio_url": audio_url,
"pdf_path": pdf_path
}
# ---------- Wrappers for app.py ----------
def research_once(topic: str) -> Dict[str, Any]:
"""Alias for multimodal_research."""
return multimodal_research(topic)
def run_literature_review(query: str):
"""For literature review tab."""
result = multimodal_research(query)
return result["summary"], result["citations"]
def run_molecule_lookup(molecule_name: str):
"""For molecule lookup tab."""
try:
chembl_data = get_molecule_data(molecule_name)
except Exception as e:
chembl_data = {"error": str(e)}
structures = fetch_structures_for_terms([molecule_name])
img_url = None
for img_fn in [run_gemini_image, run_openai_image, run_hf_image]:
try:
img_url = img_fn(molecule_name)
if img_url:
break
except:
pass
return str({"chembl": chembl_data, "structures": structures}), img_url
def run_pathway_analysis(pathway_name: str):
"""For pathway analysis tab."""
graph_data = generate_pathway_graph(pathway_name, [pathway_name])
return f"Pathway analysis for {pathway_name}", graph_data
def run_funding_analysis(keyword: str):
"""For funding analysis tab."""
funding_info = fetch_funding_data(keyword)
return str(funding_info)
def run_image_analysis(image_path: str):
"""For image analysis tab."""
prompt = f"Analyze this microscopy or biological image: {image_path}"
analysis = None
for summarizer in [run_gemini_summary, run_openai_summary, run_deepseek_summary]:
try:
analysis = summarizer(prompt)
if analysis:
break
except:
pass
return analysis or "Image analysis failed."
|