from typing import Optional, List import time import cohere from settings import ( COHERE_API_KEY, COHERE_API_URL, COHERE_MODEL_PRIMARY, COHERE_EMBED_MODEL, MODEL_SETTINGS, USE_OPEN_FALLBACKS, COHERE_TIMEOUT_S ) try: from local_llm import LocalLLM _HAS_LOCAL = True except Exception: _HAS_LOCAL = False _client: Optional[cohere.Client] = None def _co_client() -> Optional[cohere.Client]: global _client if _client is not None: return _client if not COHERE_API_KEY: return None kwargs = {"api_key": COHERE_API_KEY, "timeout": COHERE_TIMEOUT_S} if COHERE_API_URL: kwargs["base_url"] = COHERE_API_URL _client = cohere.Client(**kwargs) return _client def _retry(fn, attempts=3, backoff=0.8): last = None for i in range(attempts): try: return fn() except Exception as e: last = e time.sleep(backoff * (2 ** i)) raise last if last else RuntimeError("Unknown error") def cohere_chat(prompt: str) -> Optional[str]: cli = _co_client(); if not cli: return None def _call(): resp = cli.chat( model=COHERE_MODEL_PRIMARY, message=prompt, temperature=MODEL_SETTINGS["temperature"], max_tokens=MODEL_SETTINGS["max_new_tokens"], ) return getattr(resp, "text", None) or getattr(resp, "reply", None) \ or (resp.generations[0].text if getattr(resp, "generations", None) else None) try: return _retry(_call, attempts=2) except Exception as e: from audit_log import log_event; log_event("cohere_chat_error", None, {"err": str(e)}) return None def open_fallback_chat(prompt: str) -> Optional[str]: if not USE_OPEN_FALLBACKS or not _HAS_LOCAL: return None try: return LocalLLM().chat(prompt) except Exception: return None def cohere_embed(texts: List[str]) -> List[List[float]]: cli = _co_client() if not cli or not texts: return [] def _call(): resp = cli.embed(texts=texts, model=COHERE_EMBED_MODEL) return getattr(resp, "embeddings", None) or getattr(resp, "data", []) or [] try: return _retry(_call, attempts=2) except Exception as e: from audit_log import log_event; log_event("cohere_embed_error", None, {"err": str(e)}) return [] def generate_narrative(scenario_text: str, structured_sections_md: str, rag_snippets: List[str]) -> str: grounding = "\n\n".join([f"[RAG {i+1}]\n{t}" for i, t in enumerate(rag_snippets or [])]) prompt = f"""You are a Canadian healthcare operations copilot. Follow the scenario's requested deliverables exactly. Use the structured computations provided (already calculated deterministically) and the RAG snippets for grounding. # Scenario {scenario_text} # Deterministic Results (already computed) {structured_sections_md} # Grounding (Canadian sources, snippets) {grounding} Write a concise, decision-ready report tailored to provincial operations leaders. Do not invent numbers. If data are missing, say so clearly. """ out = cohere_chat(prompt) if out: return out out = open_fallback_chat(prompt) if out: return out return "Unable to generate narrative at this time."