| import os
|
| import base64
|
| import pickle
|
| import numpy as np
|
| import gradio as gr
|
| from perplexity import Perplexity
|
| import json
|
|
|
|
|
| MODEL_NAME = "pplx-embed-v1-0.6b"
|
| DB_FILE = "case_embeddings.pkl"
|
|
|
|
|
| print(f"Loading vector database from {DB_FILE}...")
|
| if not os.path.exists(DB_FILE):
|
| raise FileNotFoundError(
|
| f"{DB_FILE} not found. Please run create_case_db.py first! "
|
| "(Note: This takes ~12 hours for the full dataset)."
|
| )
|
|
|
| with open(DB_FILE, 'rb') as f:
|
| database = pickle.load(f)
|
|
|
| cases = database["cases"]
|
| legislation = database["legislation"]
|
| mapping = database["mapping"]
|
| corpus_embeddings = database["embeddings"]
|
|
|
|
|
| corpus_norms = np.linalg.norm(corpus_embeddings, axis=1, keepdims=True)
|
| corpus_embeddings_normalized = corpus_embeddings / np.maximum(corpus_norms, 1e-8)
|
|
|
|
|
| case_indices = [i for i, m in enumerate(mapping) if m["type"] == "case"]
|
| leg_indices = [i for i, m in enumerate(mapping) if m["type"] == "legislation"]
|
|
|
| case_embeddings = corpus_embeddings_normalized[case_indices]
|
| leg_embeddings = corpus_embeddings_normalized[leg_indices]
|
|
|
| print(f"β
Loaded {len(cases)} clinical cases and {len(legislation)} legislation sections.")
|
|
|
|
|
| def decode_embedding(b64_string):
|
| """Decode a base64-encoded int8 embedding to float32."""
|
| return np.frombuffer(base64.b64decode(b64_string), dtype=np.int8).astype(np.float32)
|
|
|
|
|
| def format_nursing_reasoning(reasoning):
|
| """Format the nursing_reasoning dictionary into readable markdown."""
|
| if not isinstance(reasoning, dict):
|
| return str(reasoning)
|
|
|
| output = ""
|
| if "Differential Diagnosis" in reasoning:
|
| output += f"**Differential Diagnosis:**\n{reasoning['Differential Diagnosis']}\n\n"
|
| if "Diagnostic Tests" in reasoning:
|
| output += f"**Diagnostic Tests:**\n{reasoning['Diagnostic Tests']}\n\n"
|
| if "Management Plan" in reasoning:
|
| output += f"**Management Plan:**\n{reasoning['Management Plan']}\n\n"
|
| if "Underlying Mechanism" in reasoning:
|
| output += f"**Underlying Mechanism:**\n{reasoning['Underlying Mechanism']}\n\n"
|
| if "Relevant Medications" in reasoning:
|
| output += f"**Relevant Medications:**\n{reasoning['Relevant Medications']}\n\n"
|
| return output
|
|
|
|
|
| def perform_search(api_key, query, top_k, search_type):
|
| """Perform actual semantic search using the Perplexity API."""
|
| if not api_key or not api_key.strip():
|
| return "β οΈ **Please enter your Perplexity API Key** above.", ""
|
|
|
| if not query or not query.strip():
|
| return "Please enter a clinical presentation or legal query.", ""
|
|
|
| try:
|
|
|
| client = Perplexity(api_key=api_key.strip())
|
| response = client.embeddings.create(
|
| input=[query],
|
| model=MODEL_NAME
|
| )
|
| query_embedding = decode_embedding(response.data[0].embedding)
|
|
|
|
|
| query_norm = np.linalg.norm(query_embedding)
|
| query_embedding_normalized = query_embedding / max(query_norm, 1e-8)
|
|
|
| output = f"### π Results for: *\"{query}\"*\n\n---\n"
|
| cost_str = ""
|
|
|
| if hasattr(response, 'usage') and response.usage:
|
| cost_str = f"API Cost: ${response.usage.cost.total_cost:.6f}"
|
|
|
| if search_type == "case":
|
|
|
| similarities = np.dot(case_embeddings, query_embedding_normalized)
|
| top_k_indices = np.argsort(similarities)[-top_k:][::-1]
|
|
|
| for i, local_idx in enumerate(top_k_indices):
|
| score = similarities[local_idx]
|
| global_idx = case_indices[local_idx]
|
| case_item = cases[mapping[global_idx]["source_idx"]]
|
|
|
| vignette = case_item.get("original_vignette", "")
|
| reasoning = case_item.get("nursing_reasoning", {})
|
| case_id = case_item.get("id", "Unknown")
|
|
|
| output += f"#### Result {i+1} β Relevance: {score:.3f} (Case #{case_id})\n"
|
| output += f"**Patient Presentation:**\n> {vignette}\n\n"
|
| output += f"<details><summary><b>View Nursing Reasoning</b></summary>\n\n"
|
| output += f"{format_nursing_reasoning(reasoning)}\n</details>\n\n---\n"
|
|
|
| else:
|
|
|
| similarities = np.dot(leg_embeddings, query_embedding_normalized)
|
| top_k_indices = np.argsort(similarities)[-top_k:][::-1]
|
|
|
| for i, local_idx in enumerate(top_k_indices):
|
| score = similarities[local_idx]
|
| global_idx = leg_indices[local_idx]
|
| leg_item = legislation[mapping[global_idx]["source_idx"]]
|
|
|
| title = leg_item.get("title", "")
|
| leg_id = leg_item.get("legislation_id", "")
|
| text = leg_item.get("text", "")
|
|
|
| output += f"#### Result {i+1} β Relevance: {score:.3f} (Act: {leg_id})\n"
|
| output += f"βοΈ **{title}**\n\n"
|
| output += f"{text}\n\n---\n"
|
|
|
| return output, cost_str
|
|
|
| except Exception as e:
|
| error_msg = str(e)
|
| if "401" in error_msg or "auth" in error_msg.lower():
|
| return "β **Invalid API Key.** Check your key and try again.", ""
|
| return f"β Error: {error_msg}", ""
|
|
|
|
|
| def search_cases(api_key, query, top_k):
|
| return perform_search(api_key, query, top_k, search_type="case")
|
|
|
| def search_legislation(api_key, query, top_k):
|
| return perform_search(api_key, query, top_k, search_type="legislation")
|
|
|
|
|
|
|
| with gr.Blocks(title="NurseLex-Match") as app:
|
| gr.Markdown(
|
| """
|
| # π©Ί NurseLex-Match
|
| ### Clinical Case Similarity & Legal Lookup
|
| *Powered by Perplexity Embeddings (`pplx-embed-v1-0.6b`)*
|
|
|
| Retrieve similar historical cases from the **NurseReason-Dataset** or search UK nursing law in **NurseLex**.
|
| """
|
| )
|
|
|
| with gr.Accordion("π API Key (BYOK β Bring Your Own Key)", open=True):
|
| gr.Markdown(
|
| "Your key is **never stored** β it is used only for this session to query the embeddings API. "
|
| )
|
| api_key_input = gr.Textbox(
|
| label="Perplexity API Key",
|
| placeholder="pplx-...",
|
| type="password",
|
| lines=1
|
| )
|
| api_cost = gr.Markdown("*API Cost: $0.000000*")
|
|
|
| with gr.Tabs():
|
|
|
| with gr.TabItem("π₯ Clinical Case Matcher"):
|
| with gr.Row():
|
| with gr.Column(scale=3):
|
| case_search_input = gr.Textbox(
|
| label="Describe the patient presentation:",
|
| placeholder="e.g., 72-year-old presenting with acute confusion and suspected UTI...",
|
| lines=3
|
| )
|
| with gr.Column(scale=1):
|
| case_top_k = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Results")
|
| case_search_btn = gr.Button("π Find Similar Cases", variant="primary")
|
|
|
| gr.Examples(
|
| examples=[
|
| "Patient is a 45-year-old female complaining of sharp left lower quadrant abdominal pain radiating to the back.",
|
| "Elderly patient with a history of heart failure presenting with increased shortness of breath and pitting edema.",
|
| "Post-operative patient day 2 showing signs of infection at the wound site with low-grade fever."
|
| ],
|
| inputs=case_search_input
|
| )
|
| case_results = gr.Markdown("*Similar cases will appear here...*")
|
|
|
|
|
| with gr.TabItem("βοΈ Legal Lookup"):
|
| with gr.Row():
|
| with gr.Column(scale=3):
|
| leg_search_input = gr.Textbox(
|
| label="Describe the legal context or policy question:",
|
| placeholder="e.g., What are the rules regarding compulsory admission under the Mental Health Act?",
|
| lines=3
|
| )
|
| with gr.Column(scale=1):
|
| leg_top_k = gr.Slider(minimum=1, maximum=10, step=1, value=3, label="Results")
|
| leg_search_btn = gr.Button("π Search Legislation", variant="primary")
|
|
|
| gr.Examples(
|
| examples=[
|
| "When can a nurse legally refuse to participate in a procedure?",
|
| "What is the required staffing ratio for an intensive care unit?",
|
| "Regulations regarding the administration of controlled drugs."
|
| ],
|
| inputs=leg_search_input
|
| )
|
| leg_results = gr.Markdown("*Relevant legislation will appear here...*")
|
|
|
|
|
| case_search_btn.click(
|
| fn=search_cases,
|
| inputs=[api_key_input, case_search_input, case_top_k],
|
| outputs=[case_results, api_cost]
|
| )
|
| case_search_input.submit(
|
| fn=search_cases,
|
| inputs=[api_key_input, case_search_input, case_top_k],
|
| outputs=[case_results, api_cost]
|
| )
|
|
|
| leg_search_btn.click(
|
| fn=search_legislation,
|
| inputs=[api_key_input, leg_search_input, leg_top_k],
|
| outputs=[leg_results, api_cost]
|
| )
|
| leg_search_input.submit(
|
| fn=search_legislation,
|
| inputs=[api_key_input, leg_search_input, leg_top_k],
|
| outputs=[leg_results, api_cost]
|
| )
|
|
|
| if __name__ == "__main__":
|
| print("Starting NurseLex-Match...")
|
| app.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|
|
|