""" OnCall.ai Medical Advice Generation Module This module handles: 1. RAG prompt construction from retrieval results 2. Medical advice generation using Med42-70B 3. Response formatting and confidence assessment 4. Integration with multi-dataset architecture 5. Fallback generation mechanisms for reliability Author: OnCall.ai Team Date: 2025-07-31 """ import logging from typing import Dict, List, Optional, Any, Union from datetime import datetime import json import re # Import existing LLM client from llm_clients import llm_Med42_70BClient # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) # Fallback Generation Configuration (Simplified Architecture) FALLBACK_TIMEOUTS = { "primary": 60.0, # Primary Med42-70B increased timeout for stable evaluation "fallback_1": 1.0, # RAG template generation (renamed from fallback_2) "fallback_2": 0.1 # Minimal template generation (instant) } FALLBACK_TOKEN_LIMITS = { "primary": 1600, # Full comprehensive medical advice (increased) "fallback_1": 0, # RAG template-based, no LLM tokens (renamed from fallback_2) "fallback_2": 0 # Minimal template-based, no LLM tokens } FALLBACK_CONFIDENCE_SCORES = { "fallback_1": 0.4, # RAG template only (renamed from fallback_2) "fallback_2": 0.2 # Minimal template only } FALLBACK_ERROR_TRIGGERS = { "timeout_errors": ["TimeoutError", "RequestTimeout"], "connection_errors": ["ConnectionError", "HTTPError", "APIError"], "processing_errors": ["TokenLimitError", "JSONDecodeError", "ValidationError"], "content_errors": ["EmptyResponse", "MalformedResponse"] } class MedicalAdviceGenerator: """ Core generation module for medical advice using RAG approach """ def __init__(self, llm_client: Optional[llm_Med42_70BClient] = None): """ Initialize medical advice generator Args: llm_client: Optional Med42-70B client, creates new if None """ self.llm_client = llm_client or llm_Med42_70BClient() # Dataset source priorities for different intentions self.dataset_priorities = { "treatment": { "emergency_subset": 2, "treatment_subset": 4, "symptom_subset": 0, # Reserved for Dataset B "diagnosis_subset": 0 # Reserved for Dataset B }, "diagnosis": { "emergency_subset": 4, "treatment_subset": 2, "symptom_subset": 0, # Reserved for Dataset B "diagnosis_subset": 0 # Reserved for Dataset B }, # "STAT": { # # NOTE: Use when query contains urgent indicators like "NOW", "STAT", "critical" # "emergency_subset": 5, # "treatment_subset": 1, # "symptom_subset": 0, # Reserved for Dataset B # "diagnosis_subset": 0 # Reserved for Dataset B # } } logger.info("MedicalAdviceGenerator initialized") def generate_medical_advice(self, user_query: str, retrieval_results: Dict[str, Any], intention: Optional[str] = None) -> Dict[str, Any]: """ Complete pipeline: construct prompt → generate advice → format response Args: user_query: Original user medical query retrieval_results: Results from BasicRetrievalSystem.search() intention: Optional query intention ('treatment', 'diagnosis', 'STAT'(tentative)) Returns: Dict containing formatted medical advice and metadata """ try: logger.info(f"Generating medical advice for query: '{user_query[:50]}...'") start_time = datetime.now() # Step 1: Extract and classify chunks from retrieval results classified_chunks = self._classify_retrieval_chunks(retrieval_results) # Step 2: Build RAG prompt based on intention and chunk classification rag_prompt = self.generate_prompt(user_query, classified_chunks, intention) # Step 3: Generate medical advice using Med42-70B generation_result = self._generate_with_med42(rag_prompt) # Step 4: Format structured response formatted_response = self._format_medical_response( user_query=user_query, generated_advice=generation_result, chunks_used=classified_chunks, intention=intention, processing_time=(datetime.now() - start_time).total_seconds() ) processing_duration = formatted_response.get('query_metadata', {}).get('processing_time_seconds', 0) logger.info(f"Medical advice generated successfully in {processing_duration:.3f}s") return formatted_response except Exception as e: logger.error(f"Medical advice generation failed: {e}") return self._generate_error_response(user_query, str(e)) def generate_prompt(self, user_query: str, classified_chunks: Dict[str, List], intention: Optional[str] = None) -> str: """ Enhanced prompt generator with flexible dataset integration Args: user_query: User's medical query classified_chunks: Chunks classified by dataset source intention: Query intention if detected Returns: Structured RAG prompt for Med42-70B """ logger.info(f"Generating prompt with intention: {intention}") # Extract chunks by dataset source emergency_chunks = classified_chunks.get("emergency_subset", []) treatment_chunks = classified_chunks.get("treatment_subset", []) symptom_chunks = classified_chunks.get("symptom_subset", []) # Dataset B (future) diagnosis_chunks = classified_chunks.get("diagnosis_subset", []) # Dataset B (future) hospital_custom_chunks = classified_chunks.get("hospital_custom", []) # Hospital customization # Select chunks based on intention or intelligent defaults selected_chunks = self._select_chunks_by_intention( intention=intention, emergency_chunks=emergency_chunks, treatment_chunks=treatment_chunks, symptom_chunks=symptom_chunks, diagnosis_chunks=diagnosis_chunks, hospital_custom_chunks=hospital_custom_chunks ) # Build context block from selected chunks context_block = self._build_context_block(selected_chunks) # Construct medical RAG prompt prompt = self._construct_medical_prompt(user_query, context_block, intention) logger.info(f"Generated prompt with {len(selected_chunks)} chunks, {len(context_block)} chars") return prompt def _classify_retrieval_chunks(self, retrieval_results: Dict[str, Any]) -> Dict[str, List]: """ Classify retrieval chunks by dataset source Args: retrieval_results: Results from BasicRetrievalSystem.search() Returns: Dict mapping dataset sources to chunk lists """ classified = { "emergency_subset": [], "treatment_subset": [], "symptom_subset": [], # Reserved for Dataset B "diagnosis_subset": [], # Reserved for Dataset B "hospital_custom": [] # Hospital-specific customization } # Process results from current dual-index system processed_results = retrieval_results.get('processed_results', []) for chunk in processed_results: chunk_type = chunk.get('type', 'unknown') # Map current system types to dataset sources if chunk_type == 'emergency': classified["emergency_subset"].append(chunk) elif chunk_type == 'treatment': classified["treatment_subset"].append(chunk) else: # Unknown type, classify by content analysis or default to STAT (tentative) logger.warning(f"Unknown chunk type: {chunk_type}, defaulting to STAT (tentative)") classified["emergency_subset"].append(chunk) # Process hospital customization results if available customization_results = retrieval_results.get('customization_results', []) if customization_results: for custom_chunk in customization_results: # Convert customization format to standard chunk format standardized_chunk = { 'type': 'hospital_custom', 'text': custom_chunk.get('chunk_text', ''), 'distance': 1 - custom_chunk.get('score', 0), # Convert score to distance 'matched': f"Hospital Doc: {custom_chunk.get('document', 'Unknown')}", 'metadata': custom_chunk.get('metadata', {}) } classified["hospital_custom"].append(standardized_chunk) logger.info(f"Added {len(customization_results)} hospital-specific chunks") # TODO: Future integration point for Dataset B # When Dataset B team provides symptom/diagnosis data: # classified["symptom_subset"] = process_dataset_b_symptoms(retrieval_results) # classified["diagnosis_subset"] = process_dataset_b_diagnosis(retrieval_results) logger.info(f"Classified chunks: Emergency={len(classified['emergency_subset'])}, " f"Treatment={len(classified['treatment_subset'])}, " f"Hospital Custom={len(classified['hospital_custom'])}") return classified def _select_chunks_by_intention(self, intention: Optional[str], emergency_chunks: List, treatment_chunks: List, symptom_chunks: List, diagnosis_chunks: List, hospital_custom_chunks: List = None) -> List: """ Select optimal chunk combination based on query intention Args: intention: Detected or specified intention *_chunks: Chunks from different dataset sources hospital_custom_chunks: Hospital-specific customization chunks Returns: List of selected chunks for prompt construction """ hospital_custom_chunks = hospital_custom_chunks or [] if intention and intention in self.dataset_priorities: # Use predefined priorities for known intentions priorities = self.dataset_priorities[intention] selected_chunks = [] # Add chunks according to priority allocation selected_chunks.extend(emergency_chunks[:priorities["emergency_subset"]]) selected_chunks.extend(treatment_chunks[:priorities["treatment_subset"]]) # Add hospital custom chunks alongside (limit to top 3 for quality) selected_chunks.extend(hospital_custom_chunks[:3]) # TODO: Future Dataset B integration # selected_chunks.extend(symptom_chunks[:priorities["symptom_subset"]]) # selected_chunks.extend(diagnosis_chunks[:priorities["diagnosis_subset"]]) logger.info(f"Selected chunks by intention '{intention}': {len(selected_chunks)} total") else: # No specific intention - let LLM judge from best available chunks all_chunks = emergency_chunks + treatment_chunks + symptom_chunks + diagnosis_chunks + hospital_custom_chunks # Sort by relevance (distance) and take top 6 all_chunks_sorted = sorted(all_chunks, key=lambda x: x.get("distance", 999)) selected_chunks = all_chunks_sorted[:6] # Limit to top 6 most relevant logger.info(f"Selected chunks by relevance (no intention): {len(selected_chunks)} total") return selected_chunks def _build_context_block(self, selected_chunks: List) -> str: """ Build formatted context block from selected chunks Args: selected_chunks: List of selected chunks Returns: Formatted context string for prompt """ if not selected_chunks: return "No relevant medical guidelines found." context_parts = [] for i, chunk in enumerate(selected_chunks, 1): chunk_text = chunk.get("text", "").strip() chunk_type = chunk.get("type", "unknown") distance = chunk.get("distance", 0) # Format each chunk with metadata if chunk_type == 'hospital_custom': # Special formatting for hospital-specific guidelines source_label = "Hospital Protocol" context_part = f""" [Guideline {i}] (Source: {source_label}, Relevance: {1-distance:.3f}) 📋 {chunk.get('matched', 'Hospital Document')} {chunk_text} """.strip() else: context_part = f""" [Guideline {i}] (Source: {chunk_type.title()}, Angular Distance: {distance:.3f}) {chunk_text} """.strip() context_parts.append(context_part) return "\n\n".join(context_parts) def _construct_medical_prompt(self, user_query: str, context_block: str, intention: Optional[str]) -> str: """ Construct final medical RAG prompt with appropriate framing Args: user_query: Original user query context_block: Formatted context from selected chunks intention: Query intention if detected Returns: Complete RAG prompt for Med42-70B """ # Customize prompt based on intention if intention == "treatment": focus_guidance = "Focus on providing specific treatment protocols, management steps, and therapeutic interventions." elif intention == "diagnosis": focus_guidance = "Focus on differential diagnosis, diagnostic criteria, and assessment approaches." elif intention == "STAT(tentative)": focus_guidance = "Focus on immediate emergency interventions and critical decision-making steps." else: focus_guidance = "Provide comprehensive medical guidance covering both diagnostic and treatment aspects as appropriate." prompt = f""" You are an experienced attending physician providing guidance to a junior clinician in an emergency setting. A colleague is asking for your expert medical opinion. Clinical Question: {user_query} Relevant Medical Guidelines: {context_block} Instructions: {focus_guidance} Provide guidance with: • Prioritize information and evidence from above sources (PRIMARY) • Use your medical knowledge to organize guidelines into actionable steps • Numbered points (1. 2. 3.) for key steps • Line breaks between major sections • Highlight medications with dosages and routes • Emphasize clinical judgment for individual patient factors (SECONDARY) IMPORTANT: Keep response under 1000 words. Use concise numbered points. For complex cases with multiple conditions, address the most urgent condition first, then relevant comorbidities. Prioritize actionable clinical steps over theoretical explanations. Your response should provide practical clinical guidance suitable for immediate bedside application with appropriate medical caution.""" return prompt def _generate_with_med42(self, prompt: str) -> Dict[str, Any]: """ Generate medical advice using Med42-70B with comprehensive fallback support This method implements the complete 3-tier fallback system: 1. Primary: Med42-70B with full RAG context 2. Fallback 1: Med42-70B with simplified prompt 3. Fallback 2: RAG template response 4. Final: Error response Args: prompt: Complete RAG prompt Returns: Generation result with metadata and fallback information """ try: logger.info("🤖 GENERATION: Attempting Med42-70B with RAG context") result = self.llm_client.analyze_medical_query( query=prompt, max_tokens=FALLBACK_TOKEN_LIMITS["primary"], # Use configured token limit timeout=FALLBACK_TIMEOUTS["primary"] # Use configured timeout ) # Check for API errors in response if result.get('error'): logger.warning(f"⚠️ Med42-70B returned error: {result['error']}") # Pass any available content for potential simplification primary_content = result.get('raw_response', '') return self._attempt_fallback_generation(prompt, result['error'], primary_content) # Check for empty response if not result.get('raw_response', '').strip(): logger.warning("⚠️ Med42-70B returned empty response") return self._attempt_fallback_generation(prompt, "Empty response from primary generation") # Primary generation successful logger.info("✅ GENERATION: Med42-70B with RAG successful") # Mark as primary method for tracking result['fallback_method'] = 'primary' return result except Exception as e: logger.error(f"❌ GENERATION: Med42-70B with RAG failed: {e}") # Attempt fallback chain instead of raising exception return self._attempt_fallback_generation(prompt, str(e)) def _format_medical_response(self, user_query: str, generated_advice: Dict[str, Any], chunks_used: Dict[str, List], intention: Optional[str], processing_time: float) -> Dict[str, Any]: """ Format final medical response with metadata and confidence assessment Args: user_query: Original query generated_advice: Result from Med42-70B chunks_used: Classification of chunks used intention: Detected intention processing_time: Total processing time Returns: Structured medical advice response """ # Extract generated content - use raw_response for complete medical advice advice_content = generated_advice.get('raw_response', '') if not advice_content: advice_content = generated_advice.get('extracted_condition', 'Unable to generate medical advice.') # Calculate confidence based on available factors confidence_score = self._calculate_confidence_score(generated_advice, chunks_used) # Count chunks used by source chunk_counts = {source: len(chunks) for source, chunks in chunks_used.items()} total_chunks = sum(chunk_counts.values()) formatted_response = { "medical_advice": advice_content, "confidence_score": confidence_score, "query_metadata": { "original_query": user_query, "detected_intention": intention, "processing_time_seconds": processing_time, "total_chunks_used": total_chunks, "chunks_by_source": chunk_counts }, "generation_metadata": { "model_used": "m42-health/Llama3-Med42-70B", "generation_time": generated_advice.get('latency', 0), "model_confidence": generated_advice.get('confidence', 'unknown'), "timestamp": datetime.now().isoformat() }, "sources": { "emergency_sources": len(chunks_used.get("emergency_subset", [])), "treatment_sources": len(chunks_used.get("treatment_subset", [])), "total_sources": total_chunks }, "disclaimer": "This advice is for informational purposes only and should not replace professional medical consultation. Always consult with qualified healthcare providers for medical decisions." } return formatted_response def _calculate_confidence_score(self, generated_advice: Dict[str, Any], chunks_used: Dict[str, List]) -> float: """ Calculate confidence score based on generation quality and source reliability Args: generated_advice: Result from Med42-70B chunks_used: Chunks used in generation Returns: Confidence score between 0.0 and 1.0 """ confidence_factors = [] # Factor 1: Model confidence if available model_confidence = generated_advice.get('confidence', '0.5') try: model_conf_value = float(model_confidence) confidence_factors.append(model_conf_value) except (ValueError, TypeError): confidence_factors.append(0.5) # Default neutral confidence # Factor 2: Number of sources used (more sources = higher confidence) total_chunks = sum(len(chunks) for chunks in chunks_used.values()) source_confidence = min(total_chunks / 6.0, 1.0) # Normalize to max 6 chunks confidence_factors.append(source_confidence) # Factor 3: Response length (reasonable length indicates comprehensive advice) response_length = len(generated_advice.get('raw_response', '')) length_confidence = min(response_length / 500.0, 1.0) # Normalize to ~500 chars confidence_factors.append(length_confidence) # Factor 4: Processing success (no errors = higher confidence) if generated_advice.get('error'): confidence_factors.append(0.3) # Lower confidence if errors occurred else: confidence_factors.append(0.8) # Higher confidence for clean generation # Calculate weighted average final_confidence = sum(confidence_factors) / len(confidence_factors) # Ensure confidence is within valid range return max(0.1, min(0.95, final_confidence)) def _generate_error_response(self, user_query: str, error_message: str) -> Dict[str, Any]: """ Generate error response when generation fails Args: user_query: Original query error_message: Error details Returns: Error response in standard format """ return { "medical_advice": "I apologize, but I encountered an error while processing your medical query. Please try rephrasing your question or contact technical support if the issue persists.", "confidence_score": 0.0, "query_metadata": { "original_query": user_query, "detected_intention": None, "processing_time_seconds": 0.0, "total_chunks_used": 0, "chunks_by_source": {} }, "generation_metadata": { "model_used": "m42-health/Llama3-Med42-70B", "error": error_message, "timestamp": datetime.now().isoformat() }, "sources": { "emergency_sources": 0, "treatment_sources": 0, "total_sources": 0 }, "disclaimer": "This system experienced a technical error. Please consult with qualified healthcare providers for medical decisions." } def _attempt_fallback_generation(self, original_prompt: str, primary_error: str, primary_result: str = None) -> Dict[str, Any]: """ Orchestrate fallback generation attempts with detailed logging This function coordinates the fallback chain when primary Med42-70B generation fails. It attempts progressively simpler generation methods while maintaining medical value. Args: original_prompt: The complete RAG prompt that failed in primary generation primary_error: Error details from the primary generation attempt primary_result: Primary result content (if available) for simplification Returns: Dict containing successful fallback response or final error response """ logger.info("🔄 FALLBACK: Attempting fallback generation strategies") # Fallback 1: RAG-only template response (renamed from fallback_2) try: logger.info("📍 FALLBACK 1: RAG-only template response") fallback_1_result = self._attempt_rag_template(original_prompt, primary_error) if not fallback_1_result.get('error'): logger.info("✅ FALLBACK 1: Success - RAG template response") # Mark response as fallback method 1 (renamed) fallback_1_result['fallback_method'] = 'rag_template' fallback_1_result['primary_error'] = primary_error return fallback_1_result else: logger.warning(f"❌ FALLBACK 1: Failed - {fallback_1_result.get('error')}") except Exception as e: logger.error(f"❌ FALLBACK 1: Exception - {e}") # Fallback 2: Minimal template response (renamed from fallback_3) try: logger.info("📍 FALLBACK 2: Minimal template response") user_query = self._extract_user_query_from_prompt(original_prompt) minimal_response = self._generate_minimal_template_response(user_query or "medical query") logger.info("✅ FALLBACK 2: Success - Minimal template response") return { 'extracted_condition': 'minimal_template_response', 'confidence': str(FALLBACK_CONFIDENCE_SCORES['fallback_2']), 'raw_response': minimal_response, 'fallback_method': 'minimal_template', 'primary_error': primary_error, 'latency': 0.1, 'template_based': True } except Exception as e: logger.error(f"❌ FALLBACK 2: Exception - {e}") # All fallbacks failed - return comprehensive error response logger.error("🚫 ALL FALLBACKS FAILED: Returning final error response") return self._generate_final_error_response(original_prompt, primary_error) def _generate_final_error_response(self, original_prompt: str, primary_error: str) -> Dict[str, Any]: """ Generate final error response when all fallback methods fail Args: original_prompt: Original RAG prompt that failed primary_error: Primary generation error details Returns: Comprehensive error response with fallback attempt details """ return { 'extracted_condition': '', 'confidence': '0', 'raw_response': 'All generation methods failed. Please try rephrasing your query or contact technical support.', 'error': f"Primary: {primary_error}. All fallback methods failed.", 'fallback_method': 'none', 'latency': 0.0 } def _attempt_rag_template(self, original_prompt: str, primary_error: str) -> Dict[str, Any]: """ Generate template-based response using available RAG context (Fallback 1) This method creates a structured response using retrieved medical guidelines without LLM processing: - Instant response (no API calls) - Template-based formatting - Uses extracted RAG context from original prompt - Lower confidence score (0.4) Args: original_prompt: Original RAG prompt that failed primary_error: Error from primary generation attempt Returns: Dict with generation result or error details """ logger.info("📍 FALLBACK 1: Med42-70B without RAG context") try: # Extract user query from complex RAG prompt user_query = self._extract_user_query_from_prompt(original_prompt) if not user_query: logger.error("❌ FALLBACK 1: Failed to extract user query from prompt") return { 'error': 'Unable to extract user query from original prompt', 'fallback_method': 'med42_simplified' } # Create simplified prompt for Med42-70B simplified_prompt = f"As a medical professional, provide concise clinical guidance for the following case: {user_query}" logger.info(f"🔄 FALLBACK 1: Calling Med42-70B with simplified prompt (max_tokens={FALLBACK_TOKEN_LIMITS['fallback_1']}, timeout={FALLBACK_TIMEOUTS['fallback_1']}s)") # Call Med42-70B with reduced parameters result = self.llm_client.analyze_medical_query( query=simplified_prompt, max_tokens=FALLBACK_TOKEN_LIMITS["fallback_1"], # 300 tokens timeout=FALLBACK_TIMEOUTS["fallback_1"] # 15 seconds ) # Check for API errors if result.get('error'): logger.warning(f"❌ FALLBACK 1: API error - {result['error']}") return { 'error': f"Med42-70B API error: {result['error']}", 'fallback_method': 'med42_simplified', 'primary_error': primary_error } # Check for empty response raw_response = result.get('raw_response', '').strip() if not raw_response: logger.warning("❌ FALLBACK 1: Empty response from Med42-70B") return { 'error': 'Empty response from simplified Med42-70B call', 'fallback_method': 'med42_simplified' } # Success - format response with fallback metadata logger.info("✅ FALLBACK 1: Success - Med42-70B without RAG") # Adjust confidence score for fallback method original_confidence = float(result.get('confidence', '0.5')) fallback_confidence = min(original_confidence, FALLBACK_CONFIDENCE_SCORES['fallback_1']) return { 'extracted_condition': result.get('extracted_condition', 'simplified_med42_response'), 'confidence': str(fallback_confidence), 'raw_response': raw_response, 'fallback_method': 'med42_simplified', 'primary_error': primary_error, 'latency': result.get('latency', 0), 'simplified_prompt_used': True } except Exception as e: logger.error(f"❌ FALLBACK 1: Exception during simplified Med42-70B call - {e}") return { 'error': f"Exception in simplified Med42-70B: {str(e)}", 'fallback_method': 'med42_simplified', 'primary_error': primary_error } def _attempt_rag_template(self, original_prompt: str, primary_error: str) -> Dict[str, Any]: """ Generate template-based response using available RAG context (Fallback 2) This method creates a structured response using retrieved medical guidelines without LLM processing: - Instant response (no API calls) - Template-based formatting - Uses extracted RAG context from original prompt - Lower confidence score (0.4) Args: original_prompt: Original RAG prompt that failed primary_error: Error from primary generation attempt Returns: Dict with template response or error details """ logger.info("📍 FALLBACK 1: RAG-only template response") try: # Extract user query and RAG context from original prompt user_query = self._extract_user_query_from_prompt(original_prompt) rag_context = self._extract_rag_context_from_prompt(original_prompt) if not user_query: logger.error("❌ FALLBACK 1: Failed to extract user query") return { 'error': 'Unable to extract user query for template response', 'fallback_method': 'rag_template' } if not rag_context: logger.warning("⚠️ FALLBACK 2: No RAG context available, using minimal template") # Create minimal response without RAG context template_response = self._generate_minimal_template_response(user_query) else: # Create full template response with RAG context template_response = self._generate_rag_template_response(user_query, rag_context) logger.info("✅ FALLBACK 1: Success - RAG template response") return { 'extracted_condition': 'rag_template_response', 'confidence': str(FALLBACK_CONFIDENCE_SCORES['fallback_1']), # 0.4 (renamed) 'raw_response': template_response, 'fallback_method': 'rag_template', 'primary_error': primary_error, 'latency': 0.1, # Nearly instant 'template_based': True } except Exception as e: logger.error(f"❌ FALLBACK 2: Exception during template generation - {e}") return { 'error': f"Exception in RAG template generation: {str(e)}", 'fallback_method': 'rag_template', 'primary_error': primary_error } def _generate_rag_template_response(self, user_query: str, rag_context: str) -> str: """ Create structured template response from RAG content Args: user_query: Original user medical question rag_context: Retrieved medical guideline text Returns: Formatted template response string """ # Format RAG content for better readability formatted_context = self._format_rag_content(rag_context) template = f"""Based on available medical guidelines for your query: "{user_query}" CLINICAL GUIDANCE: {formatted_context} IMPORTANT CLINICAL NOTES: • This guidance is based on standard medical protocols and guidelines • Individual patient factors may require modifications to these recommendations • Consider patient-specific contraindications and comorbidities • Consult with senior physician or specialist for complex cases • Follow local institutional protocols and policies SYSTEM NOTE: This response was generated using medical guidelines only, without advanced clinical reasoning, due to technical limitations with the primary system. For complex cases requiring detailed clinical analysis, please consult directly with medical professionals. Please ensure appropriate clinical oversight and use professional medical judgment in applying these guidelines.""" return template def _generate_minimal_template_response(self, user_query: str) -> str: """ Create minimal template response when no RAG context is available Args: user_query: Original user medical question Returns: Minimal template response string """ template = f"""Regarding your medical query: "{user_query}" SYSTEM STATUS: Due to technical difficulties with our medical guidance system, we cannot provide specific clinical recommendations at this time. RECOMMENDED ACTIONS: • Please consult with qualified healthcare providers for immediate clinical guidance • Contact your primary care physician or relevant specialist • For emergency situations, seek immediate medical attention • Consider consulting medical literature or clinical decision support tools IMPORTANT: This system experienced technical limitations that prevented access to our medical guideline database. Professional medical consultation is strongly recommended for this query. Please try rephrasing your question or contact our technical support if the issue persists.""" return template def _format_rag_content(self, rag_context: str) -> str: """ Format RAG context content for better readability in template responses Args: rag_context: Raw RAG context text Returns: Formatted and structured RAG content """ try: # Clean up the content lines = rag_context.split('\n') formatted_lines = [] for line in lines: line = line.strip() if line and len(line) > 10: # Skip very short lines # Add bullet points for better structure if not line.startswith(('•', '-', '*', '1.', '2.', '3.')): line = f"• {line}" formatted_lines.append(line) # Limit to reasonable length if len(formatted_lines) > 10: formatted_lines = formatted_lines[:10] formatted_lines.append("• [Additional guidelines available - truncated for brevity]") return '\n'.join(formatted_lines) except Exception as e: logger.error(f"Error formatting RAG content: {e}") return f"• {rag_context[:500]}..." # Fallback formatting def _extract_user_query_from_prompt(self, rag_prompt: str) -> str: """ Extract original user query from complex RAG prompt structure This function parses the structured RAG prompt to extract the original user medical query, which is needed for simplified Med42-70B calls. Args: rag_prompt: Complete RAG prompt with structure like: 'You are an experienced physician... Clinical Question: {user_query} Relevant Medical Guidelines: {context}...' Returns: Extracted user query string, or empty string if extraction fails """ try: # Method 1: Look for "Clinical Question:" section clinical_question_pattern = r"Clinical Question:\s*\n?\s*(.+?)(?:\n\s*\n|\nRelevant Medical Guidelines|$)" match = re.search(clinical_question_pattern, rag_prompt, re.DOTALL | re.IGNORECASE) if match: extracted_query = match.group(1).strip() logger.info(f"🎯 Extracted user query via 'Clinical Question' pattern: {extracted_query[:50]}...") return extracted_query # Method 2: Look for common medical query patterns at the start # This handles cases where the prompt might be simpler lines = rag_prompt.split('\n') for line in lines: line = line.strip() # Skip system instructions and headers if (line and not line.startswith('You are') and not line.startswith('Provide') and not line.startswith('Instructions') and not line.startswith('Relevant Medical') and len(line) > 10): logger.info(f"🎯 Extracted user query via line parsing: {line[:50]}...") return line # Method 3: Fallback - return the first substantial line for line in lines: line = line.strip() if len(line) > 20 and not line.startswith(('You are', 'As a', 'Provide')): logger.warning(f"⚠️ Using fallback extraction method: {line[:50]}...") return line logger.error("❌ Failed to extract user query from prompt") return "" except Exception as e: logger.error(f"❌ Error extracting user query: {e}") return "" def _extract_rag_context_from_prompt(self, rag_prompt: str) -> str: """ Extract RAG context/guidelines from complex RAG prompt structure This function extracts the medical guideline content for use in template-based responses (Fallback 2). Args: rag_prompt: Complete RAG prompt containing medical guidelines Returns: Extracted RAG context string, or empty string if extraction fails """ try: # Look for "Relevant Medical Guidelines:" section guidelines_pattern = r"Relevant Medical Guidelines:\s*\n?\s*(.+?)(?:\n\s*Instructions:|$)" match = re.search(guidelines_pattern, rag_prompt, re.DOTALL | re.IGNORECASE) if match: extracted_context = match.group(1).strip() logger.info(f"🎯 Extracted RAG context: {len(extracted_context)} characters") return extracted_context # Fallback: look for any substantial medical content lines = rag_prompt.split('\n') context_lines = [] in_context_section = False for line in lines: line = line.strip() # Start collecting after finding medical content indicators if any(indicator in line.lower() for indicator in ['guideline', 'protocol', 'treatment', 'management', 'clinical']): in_context_section = True if in_context_section and len(line) > 20: context_lines.append(line) if context_lines: extracted_context = '\n'.join(context_lines) logger.info(f"🎯 Extracted RAG context via fallback method: {len(extracted_context)} characters") return extracted_context logger.warning("⚠️ No RAG context found in prompt") return "" except Exception as e: logger.error(f"❌ Error extracting RAG context: {e}") return "" # Example usage and testing def main(): """ Test the medical advice generation system """ # Initialize generator generator = MedicalAdviceGenerator() # Example retrieval results (simulated) example_retrieval_results = { "processed_results": [ { "type": "emergency", "distance": 0.3, "text": "Acute myocardial infarction requires immediate assessment including ECG, cardiac enzymes, and chest X-ray. Time-sensitive condition requiring rapid intervention.", "matched": "MI|chest pain" }, { "type": "treatment", "distance": 0.25, "text": "Treatment protocol for STEMI includes aspirin 325mg, clopidogrel loading dose, and urgent PCI within 90 minutes when available.", "matched_treatment": "aspirin|PCI|thrombolytic" } ] } # Test queries test_queries = [ ("How should I treat a patient with chest pain?", "treatment"), ("What are the signs of acute MI?", "diagnosis"), # ("Emergency management of cardiac arrest", "STAT(tentative)") ] for query, intention in test_queries: print(f"\n{'='*60}") print(f"Testing: {query}") print(f"Intention: {intention}") try: result = generator.generate_medical_advice( user_query=query, retrieval_results=example_retrieval_results, intention=intention ) print(f"✅ Success: {result['confidence_score']:.2f} confidence") print(f"Advice: {result['medical_advice'][:200]}...") except Exception as e: print(f"❌ Error: {e}") if __name__ == "__main__": main()