Spaces:

DeadPool1236
/

Well-Being-Agent

Sleeping

App Files Files Community

Well-Being-Agent / Agent.py

DeadPool1236

Upload 11 files

cb55d04 verified 26 days ago

raw

history blame contribute delete

51.7 kB

	import os
	import json
	import requests
	import time
	import pickle
	import hashlib
	import random
	import re
	from typing import List, Optional, Any
	import logging
	from dotenv import load_dotenv
	from datetime import datetime

	# === LANGUAGE DETECTION IMPORTS ===
	from langdetect import detect, DetectorFactory
	DetectorFactory.seed = 0
	# ==================================

	# === OPENAI CLIENT IMPORT ===
	from openai import OpenAI
	# ============================

	# === HUGGING FACE DETECTION ===
	IS_HUGGING_FACE = os.path.exists('/.dockerenv') or 'SPACE_ID' in os.environ

	if IS_HUGGING_FACE:
	print("🚀 Hugging Face Space detected")
	os.environ['FORCE_FREE_MODEL'] = 'true'

	# Load environment variables
	if not IS_HUGGING_FACE:
	env_path = os.path.join("config", ".env")
	print(f"🔍 Looking for .env file at: {env_path}")
	if os.path.exists(env_path):
	load_dotenv(env_path)
	print("✅ .env file loaded successfully")
	else:
	print(f"❌ .env file not found at: {env_path}")
	else:
	print("✅ Hugging Face environment - using repository secrets")

	# === CACHE CONFIGURATION ===
	CACHE_DIR = "cache"
	RESPONSE_CACHE_FILE = os.path.join(CACHE_DIR, "response_cache.pkl")

	class ResponseCache:
	"""Simple response cache to avoid repeated LLM calls"""

	def __init__(self):
	self.cache = {}
	self.load_cache()

	def get_cache_key(self, query: str, context_chunks: List[Any]) -> str:
	"""Generate unique cache key from query and context"""
	query_hash = hashlib.md5(query.encode()).hexdigest()
	if context_chunks:
	# Use full context for better uniqueness
	context_text = "".join([chunk.text for chunk in context_chunks if hasattr(chunk, 'text')])
	context_hash = hashlib.md5(context_text.encode()).hexdigest()
	else:
	context_hash = "no_context"
	return f"{query_hash}_{context_hash}"

	def get(self, key: str) -> Optional[str]:
	"""Get cached response if exists and not expired"""
	if key in self.cache:
	cached_time, response = self.cache[key]
	if time.time() - cached_time < 24 * 3600:
	return response
	else:
	del self.cache[key]
	return None

	def set(self, key: str, response: str):
	"""Cache response with timestamp"""
	self.cache[key] = (time.time(), response)
	self.save_cache()

	def save_cache(self):
	"""Save cache to disk"""
	os.makedirs(CACHE_DIR, exist_ok=True)
	with open(RESPONSE_CACHE_FILE, 'wb') as f:
	pickle.dump(self.cache, f)

	def load_cache(self):
	"""Load cache from disk"""
	try:
	if os.path.exists(RESPONSE_CACHE_FILE):
	with open(RESPONSE_CACHE_FILE, 'rb') as f:
	self.cache = pickle.load(f)
	print(f"✅ Loaded response cache with {len(self.cache)} entries")
	except Exception as e:
	print(f"⚠️ Could not load cache: {e}")
	self.cache = {}

	# Initialize cache globally
	response_cache = ResponseCache()

	# === Conversation Logger ===
	class ConversationLogger:
	"""JSON-based conversation logging system"""

	def __init__(self, log_file="conversations.json"):
	self.log_file = log_file
	self.ensure_log_file()

	def ensure_log_file(self):
	"""Ensure log file exists with proper structure"""
	if not os.path.exists(self.log_file):
	with open(self.log_file, 'w', encoding='utf-8') as f:
	json.dump([], f, indent=2)

	def log_conversation(self, user_input: str, llm_response: str, language: str, response_type: str):
	"""Log conversation to JSON file"""
	try:
	# Read existing data
	with open(self.log_file, 'r', encoding='utf-8') as f:
	conversations = json.load(f)

	# Add new conversation
	conversation_entry = {
	"timestamp": datetime.now().isoformat(),
	"user_input": user_input,
	"llm_response": llm_response,
	"language": language,
	"response_type": response_type
	}

	conversations.append(conversation_entry)

	# Write back to file
	with open(self.log_file, 'w', encoding='utf-8') as f:
	json.dump(conversations, f, indent=2, ensure_ascii=False)

	print(f"💾 Conversation logged to {self.log_file}")

	except Exception as e:
	print(f"❌ Error logging conversation: {e}")

	# Initialize global logger
	conversation_logger = ConversationLogger()

	# === Centralized Configuration System ===
	class Config:
	"""Centralized configuration - loads ONLY from config/config.json for all environments"""

	def __init__(self):
	self.api_keys = []
	self.current_key_index = 0
	self.settings = self._load_config_file()
	self._validate_and_correct_paths()

	self.SUPPORTED_LANGUAGES = ["english", "urdu"]
	self.DEFAULT_LANGUAGE = "english"

	# Apply settings
	self.MODEL_PROVIDER = self.settings["model_provider"]
	self.MODEL_ID = self.settings["model_id"]
	self.API_KEYS_FOLDER = self.settings["api_keys_folder"]
	self.INDEX_PATH = self.settings["index_path"]
	self.DATASET_PATH = self.settings["dataset_path"]
	self.SIMILARITY_TOP_K = self.settings["similarity_top_k"]
	self.TEMPERATURE = self.settings["temperature"]
	self.MAX_TOKENS = self.settings["max_tokens"]
	self.FALLBACK_MESSAGE = self.settings["fallback_message"]

	self.api_keys = self._load_api_keys()
	self.api_key = self._get_current_api_key()

	self._validate_config()

	def _load_config_file(self):
	"""Load configuration ONLY from config/config.json file for ALL environments"""
	config_file = os.path.join("config", "config.json")

	# Default configuration as fallback
	default_config = {
	"model_provider": "openrouter",
	"model_id": "meta-llama/llama-3.3-70b-instruct:free",
	"api_keys_folder": "config",
	"index_path": "cancer_index_store",
	"dataset_path": "breast_cancer.json",
	"similarity_top_k": 5,
	"temperature": 0.2,
	"max_tokens": 350,
	"fallback_message": "Sorry, I don't know the answer."
	}

	try:
	if os.path.exists(config_file):
	print(f"✅ Loading configuration from: {config_file}")
	with open(config_file, 'r', encoding='utf-8') as f:
	loaded_config = json.load(f)

	# Merge with defaults for missing keys
	final_config = {default_config, loaded_config}

	print("📋 Configuration loaded successfully from config.json")
	return final_config
	else:
	# Create directory and config file if it doesn't exist
	os.makedirs(os.path.dirname(config_file), exist_ok=True)
	with open(config_file, 'w', encoding='utf-8') as f:
	json.dump(default_config, f, indent=4)
	print(f"📁 Created default config file at: {config_file}")
	return default_config

	except Exception as e:
	print(f"❌ Error loading config from {config_file}: {e}")
	print("🔄 Using default configuration as fallback")
	return default_config

	def _validate_and_correct_paths(self):
	"""Validate and correct file paths"""
	# Correct dataset path if needed
	original_dataset_path = self.settings["dataset_path"]
	if not os.path.exists(original_dataset_path):
	possible_paths = [
	original_dataset_path,
	f"DataSet/{original_dataset_path}",
	f"data/{original_dataset_path}",
	"DataSet/breast_cancer.json",
	"breast_cancer.json"
	]

	for path in possible_paths:
	if os.path.exists(path):
	if path != original_dataset_path:
	print(f"🔄 Using dataset at: {path}")
	self.settings["dataset_path"] = path
	else:
	print(f"✅ Dataset found at: {path}")
	return

	print(f"❌ Dataset not found in any location")
	else:
	print(f"✅ Dataset found at: {original_dataset_path}")

	def _load_api_keys(self) -> List[str]:
	"""Load API keys from environment variables"""
	api_keys = []
	print("🔍 Checking for API keys in environment variables...")

	keys_to_check = ["API_KEY", "API_KEY_2", "API_KEY_3", "API_KEY_4", "API_KEY_5"]

	for key_name in keys_to_check:
	key_value = os.getenv(key_name)
	if key_value and key_value.strip():
	api_keys.append(key_value.strip())
	print(f"✅ Found {key_name}")

	return api_keys

	def _get_current_api_key(self) -> str:
	"""Get current active API key"""
	if self.api_keys and self.current_key_index < len(self.api_keys):
	return self.api_keys[self.current_key_index]
	return ""

	def rotate_to_next_key(self) -> bool:
	"""Rotate to next API key if available"""
	if self.current_key_index < len(self.api_keys) - 1:
	self.current_key_index += 1
	self.api_key = self._get_current_api_key()
	print(f"🔄 Rotated to API key {self.current_key_index + 1}")
	return True
	else:
	print("❌ No more API keys available")
	return False

	def _validate_config(self):
	"""Validate configuration"""
	if not self.api_keys:
	print("❌ No API keys found in environment variables")
	if IS_HUGGING_FACE:
	print("💡 Please add API keys in Hugging Face Space Settings → Repository secrets")
	else:
	print(f"✅ Found {len(self.api_keys)} API key(s)")

	# Print current configuration
	print("📋 Current Configuration (from config.json):")
	print(f" Model Provider: {self.MODEL_PROVIDER}")
	print(f" Model ID: {self.MODEL_ID}")
	print(f" Index Path: {self.INDEX_PATH}")
	print(f" Dataset Path: {self.DATASET_PATH}")
	print(f" Similarity Top K: {self.SIMILARITY_TOP_K}")
	print(f" Temperature: {self.TEMPERATURE}")
	print(f" Max Tokens: {self.MAX_TOKENS}")

	# Initialize configuration
	config = Config()

	# === Setup Logging ===
	if IS_HUGGING_FACE:
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[logging.StreamHandler()]
	)
	else:
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[
	logging.StreamHandler(),
	logging.FileHandler('rag_system.log')
	]
	)

	# === FAST INDEX LOADING ===
	def load_index_fast():
	"""Fast index loading by reusing cached embeddings"""
	try:
	from llama_index.core import StorageContext, VectorStoreIndex
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding

	print(f"🔍 Loading index from: {config.INDEX_PATH}")
	if not os.path.exists(config.INDEX_PATH):
	print(f"❌ Index path doesn't exist: {config.INDEX_PATH}")
	return None, None

	embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
	storage_context = StorageContext.from_defaults(persist_dir=config.INDEX_PATH)
	index = VectorStoreIndex.from_documents(
	[],
	storage_context=storage_context,
	embed_model=embed_model
	)
	retriever = index.as_retriever(similarity_top_k=config.SIMILARITY_TOP_K)
	print("✅ Index loaded successfully")
	return index, retriever

	except Exception as e:
	print(f"❌ Failed to load index: {e}")
	import traceback
	traceback.print_exc()
	return None, None

	def load_index():
	return load_index_fast()

	# === Enhanced RAG System Class ===
	class BreastCancerRAGSystem:
	"""Enhanced RAG system for breast cancer information with emotional support"""

	def __init__(self, index, retriever):
	self.index = index
	self.retriever = retriever
	self.conversation_history = []

	if not config.api_keys:
	logging.error("🚫 System initialized without API key - LLM features will not work")

	def get_predefined_questions(self, language: str = "english") -> List[dict]:
	"""Get predefined daily routine questions for breast cancer patients"""

	english_questions = [
	{
	"question": "What are some gentle exercises I can do during recovery?",
	"category": "exercise",
	"icon": "fas fa-walking"
	},
	{
	"question": "How do I deal with anxiety about my next treatment?",
	"category": "emotional",
	"icon": "fas fa-heart"
	},
	{
	"question": "When can I expect my hair to grow back after treatment?",
	"category": "appearance",
	"icon": "fas fa-user"
	},
	{
	"question": "How do I talk to my family about my diagnosis?",
	"category": "emotional",
	"icon": "fas fa-users"
	},
	{
	"question": "What are the signs of infection I should watch for?",
	"category": "symptoms",
	"icon": "fas fa-exclamation-triangle"
	}
	]

	urdu_questions = [
	{
	"question": "کیموتھراپی کے دوران تھکاوٹ کیسے کم کریں؟",
	"category": "symptoms",
	"icon": "fas fa-bed"
	},
	{
	"question": "ریکوری کے دوران ہلکی پھلکی ورزشیں کون سی ہیں؟",
	"category": "exercise",
	"icon": "fas fa-walking"
	},
	{
	"question": "اگلے علاج کے بارے میں پریشانی کیسے دور کریں؟",
	"category": "emotional",
	"icon": "fas fa-heart"
	},
	{
	"question": "کیموتھراپی کے بعد متلی کے لیے کون سی غذائیں مفید ہیں؟",
	"category": "nutrition",
	"icon": "fas fa-apple-alt"
	},
	{
	"question": "ماسٹکٹومی کے بعد درد کیسے منظم کریں؟",
	"category": "pain",
	"icon": "fas fa-hand-holding-heart"
	},
	]

	return urdu_questions if language == "urdu" else english_questions

	def detect_language(self, text: str) -> str:
	"""Detect language of user query"""
	try:
	urdu_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]+')
	if urdu_pattern.search(text):
	return 'urdu'
	detected_lang = detect(text)
	return 'urdu' if detected_lang == 'ur' else 'english'
	except:
	return 'english'

	def _clean_urdu_text(self, text: str) -> str:
	"""Advanced cleaning for Urdu text with comprehensive spelling correction"""
	if not text or not text.strip():
	return text

	# Comprehensive spelling correction dictionary
	spelling_corrections = {
	# Character repetition fixes
	'مجہے': 'مجھے',
	'پروگرہوں': 'پروگرام',
	'کہےنسر': 'کینسر',
	'ڈڈاکٹر': 'ڈاکٹر',
	'ہےہ': 'ہے',
	'مہےں': 'میں',
	'ہےں': 'ہیں',
	'ھے': 'ہے',
	'ھوں': 'ہوں',
	'ھیں': 'ہیں',
	'ےے': 'ے',
	'ںں': 'ں',
	'ہہ': 'ہ',
	'یی': 'ی',

	# Common phrase corrections
	'ے لہےے': 'کے لیے',
	'کا ے لہےے': 'کے لیے',
	'و ہےہ': 'کو',
	'ہےقہےن': 'یقین',
	'اکہےلے': 'اکیلے',
	'نہہےں': 'نہیں',
	'ہہےں': 'ہیں',
	'کا ے': 'کے',
	'ساتھ ہہےں': 'ساتھ ہیں',
	'تجوہےز': 'تجویز',
	'ضرورہے': 'ضروری',
	'بارے مہےں': 'بارے میں',
	'کرہےں': 'کریں',
	'بہترہےن': 'بہترین',
	'ہے مدد': 'کی مدد',
	'خوشہے': 'خوشی',
	'ترجہےح': 'ترجیح',
	'جسے سے': 'جس سے',

	# Medical term corrections
	'برہےسٹ': 'بریسٹ',
	'کہےموتھراپہے': 'کیموتھراپی',
	'متلہے': 'متلی',
	'غذائہےں': 'غذائیں',
	'چربہے': 'چربی',
	'ہلکے': 'ہلکی',
	'آسانہے': 'آسانی',
	'ہائہےڈرہےٹنگ': 'ہائیڈریٹنگ',
	'ہائہےڈرہےٹڈ': 'ہائیڈریٹڈ',

	# Grammar and structure fixes
	'کرنےے': 'کرنے',
	'ہونےے': 'ہونے',
	'سکتےے': 'سکتے',
	'سکتیی': 'سکتی',
	'والےے': 'والے',
	'والیی': 'والی',
	'کہے': 'کے',
	'ہےے': 'ہے',

	# Common word fixes
	'ام ': 'ہوں ',
	'می ': 'میں ',
	'آپ ک': 'آپ کا ',
	'دوران ': 'دوران ',
	'عام ': 'عام ',
	'مسئل ': 'مسئلہ ',
	'اس ': 'اس ',
	'کو ': 'کو ',
	'کرن ': 'کرنے ',
	'س ': 'سے ',
	'طریق ': 'طریقے ',
	'بتا ': 'بتا ',
	'سکتی ': 'سکتی ',
	'اکٹر': 'ڈاکٹر',
	'اکیل': 'اکیلے',
	'میش': 'میں',
	'وتی': 'ہوتی',
	'لکی': 'ہلکی',
	'بتر': 'بہتر',
	'محفوظ ر': 'محفوظ رکھتی ہے',
	'رشت': 'رشتہ داروں',
	}

	# Apply spelling corrections iteratively
	cleaned_text = text
	for wrong, correct in spelling_corrections.items():
	cleaned_text = cleaned_text.replace(wrong, correct)

	# Fix common grammatical patterns using regex for better coverage
	import re

	# Fix character repetition patterns
	repetition_patterns = [
	(r'ہہ', 'ہ'),
	(r'یی', 'ی'),
	(r'ےے', 'ے'),
	(r'ںں', 'ں'),
	(r'کک', 'ک'),
	(r'گگ', 'گ'),
	]

	for pattern, replacement in repetition_patterns:
	cleaned_text = re.sub(pattern, replacement, cleaned_text)

	# Fix common grammatical patterns
	grammatical_fixes = [
	('ک دوران', 'کے دوران'),
	('ک بار', 'کے بارے'),
	('ک بعد', 'کے بعد'),
	('ک لی', 'کے لیے'),
	('ک ساتھ', 'کے ساتھ'),
	('ک طور', 'کے طور'),
	('ک ذریع', 'کے ذریعے'),
	('ک مطابق', 'کے مطابق'),
	]

	for wrong, correct in grammatical_fixes:
	cleaned_text = cleaned_text.replace(wrong, correct)

	# Fix spacing and punctuation issues
	cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Multiple spaces to single space
	cleaned_text = re.sub(r' \.', '.', cleaned_text) # Space before period
	cleaned_text = re.sub(r' ،', '،', cleaned_text) # Space before comma
	cleaned_text = re.sub(r' ', ' ', cleaned_text) # Double spaces
	cleaned_text = re.sub(r'۔۔', '۔', cleaned_text) # Double periods

	# Ensure sentence completion and structure
	sentences = cleaned_text.split('۔')
	cleaned_sentences = []

	for sentence in sentences:
	sentence = sentence.strip()
	if sentence and len(sentence.split()) >= 2: # At least 2 words
	# Ensure sentence starts properly (no hanging characters)
	if sentence and sentence[0] in [' ', '،', '۔']:
	sentence = sentence[1:].strip()
	if sentence:
	cleaned_sentences.append(sentence)

	# Reconstruct text with proper punctuation
	if cleaned_sentences:
	cleaned_text = '۔ '.join(cleaned_sentences) + '۔'
	else:
	cleaned_text = cleaned_text.strip()

	# Final normalization
	cleaned_text = cleaned_text.strip()

	return cleaned_text

	def _detect_emotional_needs(self, user_query: str, language: str = "english") -> dict:
	"""Enhanced emotional need detection with better Urdu support"""
	query_lower = user_query.lower()

	# Emotional triggers in both languages
	emotional_triggers_english = [
	"scared", "afraid", "worried", "anxious", "fear", "nervous", "stressed",
	"overwhelmed", "depressed", "sad", "lonely", "alone", "hopeless",
	"can't cope", "struggling", "difficult", "hard time", "suffering",
	"terrified", "panic", "breakdown", "crying", "tears", "misery"
	]

	emotional_triggers_urdu = [
	"خوف", "ڈر", "پریشانی", "فکر", "تنہائی", "اداسی", "مایوسی", "تکلیف",
	"گھبراہٹ", "بے چینی", "بے بسی", "رونا", "آنسو", "دکھ", "غم",
	"ہمت", "طاقت", "حوصلہ", "پرسکون", "سکون", "چین"
	]

	# Information triggers
	info_triggers_english = [
	"what", "how", "when", "where", "which", "why",
	"treatment", "medication", "exercise", "diet", "symptoms",
	"pain", "side effects", "recovery", "diagnosis", "procedure"
	]

	info_triggers_urdu = [
	"کیا", "کیسے", "کب", "کہاں", "کون سا", "کیوں", "کس طرح",
	"علاج", "دوا", "ورزش", "غذا", "علامات", "درد", "مراحل",
	"طریقہ", "عمل", "تفصیل", "معلومات"
	]

	if language == "urdu":
	emotional_triggers = emotional_triggers_urdu
	info_triggers = info_triggers_urdu
	else:
	emotional_triggers = emotional_triggers_english
	info_triggers = info_triggers_english

	# More sophisticated emotional detection
	emotional_score = 0
	for trigger in emotional_triggers:
	if trigger in query_lower:
	emotional_score += 1

	# Context-aware emotional detection
	negative_context_words = ["not", "don't", "no", "never", "n't"]
	has_negative_context = any(word in query_lower for word in negative_context_words)

	info_score = sum(1 for trigger in info_triggers if trigger in query_lower)

	return {
	"needs_emotional_support": emotional_score > 0 and not has_negative_context,
	"needs_information": info_score > 0,
	"emotional_score": emotional_score,
	"info_score": info_score
	}

	def _add_emotional_support(self, response: str, user_query: str, language: str = "english") -> str:
	"""Add natural emotional support integrated into the response"""
	emotional_needs = self._detect_emotional_needs(user_query, language)

	# Always add some level of emotional support, but more if detected
	if language == "urdu":
	if emotional_needs["needs_emotional_support"]:
	# Strong emotional support phrases
	support_phrases = [
	"آپ کی طاقت قابلِ تعریف ہے، اور میں آپ کے ساتھ ہوں۔",
	"یہ مشکل وقت ہے، لیکن آپ اکیلے نہیں ہیں۔ ہم مل کر اس کا سامنا کریں گے۔",
	"آپ کی ہمت اور صبر کو سلام، بہتر دن ضرور آئیں گے۔",
	]
	else:
	# Gentle emotional support phrases
	support_phrases = [
	"آپ کی صحت اور خوشی ہماری پہلی ترجیح ہے۔",
	"یقین رکھیں، ہر طوفان کے بعد سکون ضرور آتا ہے۔",
	"آپ جیسے بہادر لوگ ہی دنیا کو روشن کرتے ہیں۔",
	]
	else:
	if emotional_needs["needs_emotional_support"]:
	# Strong emotional support phrases
	support_phrases = [
	"Your strength is truly admirable, and I'm here with you every step of the way.",
	"This is a challenging time, but you're not alone. We'll face this together.",
	"I want you to know how much courage you're showing, and better days will come.",
	]
	else:
	# Gentle emotional support phrases
	support_phrases = [
	"Your wellbeing and happiness are my top priority right now.",
	"Please remember that after every storm comes calm.",
	"People like you, with such resilience, truly light up the world.",
	]

	# Choose a support phrase that fits naturally
	support_text = random.choice(support_phrases)

	# Integrate support naturally - for Urdu, place at beginning for impact
	if language == "urdu":
	if support_text not in response:
	# Check if response already has emotional content
	if not any(phrase in response for phrase in ['طاقت', 'ہمت', 'حوصلہ', 'سکون', 'خوشی']):
	return f"{support_text}\n\n{response}"
	else:
	if support_text not in response:
	# Check if response already has emotional content
	if not any(phrase in response for phrase in ['strength', 'courage', 'hope', 'together', 'proud']):
	return f"{support_text}\n\n{response}"

	return response

	def retrieve_relevant_chunks(self, user_query: str, language: str = "english") -> List[Any]:
	"""Retrieve relevant chunks with language-specific prioritization"""
	if not hasattr(self, 'retriever') or self.retriever is None:
	print("❌ Retriever not available")
	return []

	try:
	if language == "urdu":
	print("🔍 Prioritizing Urdu content for Urdu query...")
	from llama_index.core.vector_stores import MetadataFilter, MetadataFilters
	urdu_filter = MetadataFilter(key="language", value="urdu")
	urdu_results = self.retriever.retrieve(
	user_query,
	filters=MetadataFilters(filters=[urdu_filter])
	)

	quality_threshold = 0.5
	high_quality_urdu = [
	result for result in urdu_results
	if hasattr(result, 'score') and result.score >= quality_threshold
	]

	if high_quality_urdu:
	print(f"✅ Found {len(high_quality_urdu)} high-quality Urdu chunks")
	return high_quality_urdu[:5]
	elif urdu_results:
	print(f"⚠️ Using {len(urdu_results)} lower-confidence Urdu chunks")
	return urdu_results[:3]

	print("🔍 No Urdu content found, searching all content...")

	retrieval_results = self.retriever.retrieve(user_query)
	quality_threshold = 0.5
	high_quality_results = [
	result for result in retrieval_results
	if hasattr(result, 'score') and result.score >= quality_threshold
	]

	if not high_quality_results and retrieval_results:
	print("⚠️ Using lower confidence results")
	return retrieval_results[:3]

	print(f"✅ Retrieved {len(high_quality_results)} relevant chunks")
	return high_quality_results[:5]

	except Exception as e:
	print(f"❌ Retrieval error: {e}")
	return []

	def build_concise_prompt(self, user_query: str, context_chunks: List[Any], language: str = "english") -> str:
	"""Build prompt for concise, targeted responses with emotional intelligence"""

	context_text = ""
	if context_chunks:
	context_parts = []
	for i, chunk in enumerate(context_chunks[:2]):
	chunk_text = chunk.text if hasattr(chunk, 'text') else str(chunk)
	key_points = " ".join(chunk_text.split()[:100])
	context_parts.append(f"CONTEXT {i+1}: {key_points}")
	context_text = "\n".join(context_parts)

	# Analyze emotional and information needs
	needs_analysis = self._detect_emotional_needs(user_query, language)

	if language == "urdu":
	prompt = f"""
	# WELL BEING AGENT - BREAST CANCER SUPPORT
	# CRITICAL: RESPOND ONLY IN URDU LANGUAGE USING CORRECT URDU SPELLING AND GRAMMAR
	# ABSOLUTELY NO HINDI, ARABIC, OR OTHER LANGUAGES - PURE URDU ONLY

	## PATIENT'S QUERY:
	"{user_query}"

	## EMOTIONAL ANALYSIS:
	- Needs Emotional Support: {'YES' if needs_analysis['needs_emotional_support'] else 'NO'}
	- Needs Information: {'YES' if needs_analysis['needs_information'] else 'NO'}

	## CONTEXT (USE IF RELEVANT):
	{context_text if context_text else "General breast cancer knowledge"}

	## CRITICAL SPELLING RULES - MUST FOLLOW:
	1. ✅ "مجھے" ❌ "مجہے"
	2. ✅ "پروگرام" ❌ "پروگرہوں"
	3. ✅ "کینسر" ❌ "کہےنسر"
	4. ✅ "ڈاکٹر" ❌ "ڈڈاکٹر"
	5. ✅ "ہے" ❌ "ہےہ"
	6. ✅ "میں" ❌ "مہےں"
	7. ✅ "کے لیے" ❌ "کا ے لہےے"
	8. ✅ "جس سے" ❌ "جسے سے"

	## RESPONSE REQUIREMENTS - URDU:
	1. LANGUAGE: صرف اردو میں جواب دیں، درست ہجے اور قواعد کا استعمال کریں
	2. EMOTIONAL TONE: ہمدردانہ، گرمجوش، اور امید بخش انداز اپنائیں
	3. CONTENT: اگر معلومات درکار ہوں تو واضح، درست معلومات دیں
	4. SUPPORT: جذباتی مدد قدرتی طور پر پیش کریں، الگ سے ذکر نہ کریں
	5. LENGTH: 4-6 جملے، مختصر مگر جامع
	6. SPELLING: درست اردو ہجے استعمال کریں، غلط ہجے سے پرہیز کریں
	7. COMPLETENESS: مکمل جملے لکھیں، ادھورے جملے نہ چھوڑیں

	## آپ کا گرمجوش، درست اردو میں اور مکمل جواب:
	"""
	else:
	prompt = f"""
	# WELL BEING AGENT - BREAST CANCER SUPPORT

	## PATIENT'S QUERY:
	"{user_query}"

	## EMOTIONAL ANALYSIS:
	- Needs Emotional Support: {'YES' if needs_analysis['needs_emotional_support'] else 'NO'}
	- Needs Information: {'YES' if needs_analysis['needs_information'] else 'NO'}

	## CONTEXT (USE IF RELEVANT):
	{context_text if context_text else "General breast cancer knowledge"}

	## RESPONSE REQUIREMENTS:

	1. TONE: Warm, compassionate, and hopeful
	2. CONTENT: Provide accurate information if needed
	3. SUPPORT: Integrate emotional support naturally without explicitly stating it
	4. LENGTH: 4-6 sentences, concise but comprehensive
	5. FOCUS: Be caring and present with the patient
	6. COMPLETENESS: Write complete sentences, no incomplete thoughts

	## YOUR COMPASSIONATE RESPONSE:
	"""

	return prompt.strip()

	def build_urdu_prompt(self, user_query: str, context_chunks: List[Any]) -> str:
	"""Build detailed prompt for Urdu responses with strong language enforcement"""
	context_text = ""
	if context_chunks:
	context_parts = []
	for i, chunk in enumerate(context_chunks[:3]):
	chunk_text = chunk.text if hasattr(chunk, 'text') else str(chunk)
	source_topic = chunk.metadata.get('topic', 'General Information') if hasattr(chunk, 'metadata') else 'General Information'
	context_parts.append(f"SOURCE {i+1} - {source_topic}:\n{chunk_text}")
	context_text = "\n\n".join(context_parts)

	urdu_prompt = f"""
	# WELL BEING AGENT - BREAST CANCER SUPPORT
	# CRITICAL: RESPOND ONLY IN URDU LANGUAGE WITH PERFECT SPELLING
	# ABSOLUTELY NO HINDI, ARABIC, OR ENGLISH - PURE URDU ONLY

	## YOUR ROLE IN URDU:
	آپ بریسٹ کینسر کی سپیشلائزڈ ویل بینگ ایجنٹ ہیں۔ آپ مریضوں کو نہ صرف طبی معلومات بلکہ قدرتی طور پر جذباتی مدد اور ہمت بھی فراہم کرتی ہیں۔

	## AVAILABLE CONTEXT:
	{context_text if context_text else "General breast cancer knowledge"}

	## USER'S QUESTION (IN URDU):
	"{user_query}"

	## CRITICAL SPELLING RULES - MUST FOLLOW:
	1. ✅ "مجھے" ❌ "مجہے"
	2. ✅ "پروگرام" ❌ "پروگرہوں"
	3. ✅ "کینسر" ❌ "کہےنسر"
	4. ✅ "ڈاکٹر" ❌ "ڈڈاکٹر"
	5. ✅ "ہے" ❌ "ہےہ"
	6. ✅ "میں" ❌ "مہےں"
	7. ✅ "کے لیے" ❌ "کا ے لہےے"
	8. ✅ "جس سے" ❌ "جسے سے"

	## RESPONSE REQUIREMENTS - URDU:
	1. LANGUAGE ENFORCEMENT: صرف اور صرف اردو میں جواب دیں
	2. SPELLING ACCURACY: درست اردو ہجے استعمال کریں، عام غلطیوں سے پرہیز کریں
	3. EMOTIONAL INTEGRATION: جذباتی مدد کو قدرتی انداز میں پیش کریں
	4. COMPASSIONATE TONE: گرمجوش، ہمدردانہ، اور امید بخش انداز
	5. INFORMATION ACCURACY: سیاق و سباق کے مطابق درست معلومات دیں
	6. COMPLETE SENTENCES: مکمل جملے لکھیں، ادھورے جملے نہ چھوڑیں

	## EXAMPLES OF CORRECT URDU:
	- ✅ "بریسٹ کینسر کے بارے میں معلومات حاصل کرنا ایک اہم قدم ہے۔"
	- ✅ "میں آپ کو درست معلومات فراہم کرنے کی کوشش کروں گی۔"
	- ✅ "آپ کے سوال کا جواب دینے میں مجھے خوشی ہو رہی ہے۔"

	## آپ کا درست ہجے، مکمل جملوں اور ہمدردانہ انداز میں جواب:
	"""
	return urdu_prompt.strip()

	def build_enhanced_prompt(self, user_query: str, context_chunks: List[Any]) -> str:
	"""Build prompt for English responses with emotional intelligence"""
	context_text = ""
	if context_chunks:
	context_parts = []
	for i, chunk in enumerate(context_chunks[:3]):
	chunk_text = chunk.text if hasattr(chunk, 'text') else str(chunk)
	source_topic = chunk.metadata.get('topic', 'General Information') if hasattr(chunk, 'metadata') else 'General Information'
	context_parts.append(f"SOURCE {i+1} - {source_topic}:\n{chunk_text}")
	context_text = "\n\n".join(context_parts)

	# Analyze emotional needs
	needs_analysis = self._detect_emotional_needs(user_query, "english")

	prompt = f"""
	# WELL BEING AGENT - BREAST CANCER SUPPORT

	## YOUR ROLE
	You are a compassionate Well Being Agent specializing in breast cancer support. You provide supportive information, emotional comfort, and evidence-based guidance.

	## EMOTIONAL ANALYSIS:
	- Patient Needs Emotional Support: {'YES' if needs_analysis['needs_emotional_support'] else 'NO'}
	- Patient Needs Information: {'YES' if needs_analysis['needs_information'] else 'NO'}

	## RESPONSE GUIDELINES
	- Tone: Warm, supportive, compassionate, and hopeful
	- Emotional Integration: Naturally incorporate emotional support without explicitly stating it
	- Information: Provide evidence-based guidance when needed
	- Presence: Be fully present and caring with the patient
	- Completeness: Write complete sentences, no incomplete thoughts

	## AVAILABLE CONTEXT
	{context_text if context_text else "General breast cancer knowledge"}

	## USER'S QUESTION
	"{user_query}"

	## RESPONSE REQUIREMENTS
	1. If emotional support is needed: Integrate comfort and hope naturally into your response
	2. If information is needed: Provide clear, accurate guidance
	3. Always acknowledge the patient's strength implicitly
	4. Maintain a caring, present tone throughout
	5. Keep response concise but comprehensive (4-6 complete sentences)

	## YOUR COMPASSIONATE RESPONSE:
	"""
	return prompt.strip()

	def query_llm_with_retry(self, prompt: str, language: str = "english", max_retries: int = 3) -> str:
	"""Enhanced LLM query using OpenAI client format"""
	if not config.api_key:
	print("❌ No API key available")
	return config.FALLBACK_MESSAGE

	# Enhanced system message with Urdu-specific instructions
	if language == "urdu":
	system_message = """آپ بریسٹ کینسر کی سپیشلائزڈ ویل بینگ ایجنٹ ہیں۔

	CRITICAL URDU LANGUAGE RULES:
	1. صرف اور صرف اردو میں جواب دیں
	2. ہر لفظ کے ہجے درست ہوں
	3. مکمل اور واضح جملے استعمال کریں
	4. غلط ہجے اور ادھورے جملوں سے پرہیز کریں
	5. طبی معلومات درست اور واضح ہوں

	مثال کے طور پر:
	✅ "بریسٹ کینسر کے علاج کے مختلف طریقے ہیں۔"
	❌ "برہےسٹ کہےنسر کا علاچ کہے طرح ہےہ۔"

	جذباتی مدد قدرتی طور پر پیش کریں اور مریض کی طاقت کو تسلیم کریں۔"""
	else:
	system_message = """You are a compassionate Well Being Agent for breast cancer support. Provide direct, helpful information while naturally integrating emotional support. Always maintain a warm, hopeful, and caring tone. Ensure complete sentences and clear information."""

	for attempt in range(max_retries):
	try:
	# Initialize OpenAI client with OpenRouter configuration
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=config.api_key,
	)

	# Adjust parameters for better Urdu quality
	temperature = 0.2 if language == "urdu" else 0.3
	max_tokens = 500 if language == "urdu" else config.MAX_TOKENS

	print(f"🔄 Sending request to {config.MODEL_PROVIDER} (attempt {attempt + 1})")

	completion = client.chat.completions.create(
	extra_headers={
	"HTTP-Referer": "https://huggingface.co",
	"X-Title": "Well Being Agent",
	},
	extra_body={},
	model=config.MODEL_ID,
	messages=[
	{
	"role": "system",
	"content": system_message
	},
	{
	"role": "user",
	"content": prompt
	}
	],
	temperature=temperature,
	max_tokens=max_tokens,
	)

	response_text = completion.choices[0].message.content
	print("✅ LLM response received")

	# For Urdu, do immediate quality check
	if language == "urdu":
	if self._is_urdu_response_corrupted(response_text):
	print("⚠️ Urdu response appears corrupted, applying enhanced cleaning")

	return response_text

	except Exception as e:
	print(f"❌ Request failed: {e}")
	if "429" in str(e):
	wait_time = 2 ** attempt
	print(f"⏳ Rate limited. Waiting {wait_time} seconds...")
	time.sleep(wait_time)
	continue
	elif "401" in str(e) or "402" in str(e):
	print(f"❌ API key issue")
	if config.rotate_to_next_key():
	continue
	else:
	return config.FALLBACK_MESSAGE

	if attempt == max_retries - 1:
	if config.rotate_to_next_key():
	return self.query_llm_with_retry(prompt, language, max_retries)
	return config.FALLBACK_MESSAGE
	time.sleep(1)

	return config.FALLBACK_MESSAGE

	def _is_urdu_response_corrupted(self, text: str) -> bool:
	"""Check if Urdu response has common corruption patterns"""
	corruption_indicators = [
	'ہےہ', 'مہےں', 'کہے', 'پروگرہوں', 'ڈڈاکٹر', 'کا ے لہےے', 'جسے سے'
	]

	for indicator in corruption_indicators:
	if indicator in text:
	return True

	# Check for excessive character repetition
	import re
	if re.search(r'(.)\1\1', text): # Three repeated characters
	return True

	return False

	def _verify_language_compliance(self, text: str, expected_language: str) -> str:
	"""Verify and correct language compliance"""
	if expected_language == "urdu":
	# Check for common incorrect language patterns
	hindi_pattern = re.compile(r'[\u0900-\u097F]+') # Hindi characters
	arabic_pattern = re.compile(r'[\uFE70-\uFEFF]+') # Arabic specific characters

	if hindi_pattern.search(text):
	print("⚠️ Hindi detected in Urdu response, applying correction...")
	# Add Urdu language reminder
	return text + "\n\nبراہ کرم صرف اردو میں جواب دیں۔"

	if arabic_pattern.search(text):
	print("⚠️ Arabic detected in Urdu response, applying correction...")
	# Add Urdu language reminder
	return text + "\n\nبراہ کرم صرف اردو میں جواب دیں۔"

	return text

	def format_final_response(self, llm_answer: str, language: str = "english") -> str:
	cleaned_answer = llm_answer.strip()

	# Enhanced Urdu text cleaning
	if language == 'urdu':
	print("🧹 Applying advanced Urdu text cleaning...")
	cleaned_answer = self._clean_urdu_text(cleaned_answer)

	# Verify language compliance
	cleaned_answer = self._verify_language_compliance(cleaned_answer, language)

	if language == 'urdu':
	gentle_reminder = "\n\nاپنی صحت کی دیکھ بھال ٹیم سے اپنے خدشات پر بات کرنا یاد رکھیں۔"
	else:
	gentle_reminder = "\n\nRemember to discuss any concerns with your healthcare team."

	if gentle_reminder not in cleaned_answer:
	cleaned_answer += gentle_reminder

	return cleaned_answer.strip()

	def get_enhanced_answer(self, user_query: str, language: str = None, response_type: str = "text") -> str:
	print(f"🔍 Processing query: '{user_query}' (Type: {response_type})")

	if language is None:
	language = self.detect_language(user_query)
	print(f"🌐 Detected language: {language}")

	# Special handling for problematic Urdu queries
	if language == "urdu":
	problematic_patterns = ['اوج ایک انسر', 'اصلاح ملکم', 'نعم']
	if any(pattern in user_query for pattern in problematic_patterns):
	print("⚠️ Detected problematic query pattern, applying enhanced Urdu handling")

	chunks = self.retrieve_relevant_chunks(user_query, language)

	cache_key = response_cache.get_cache_key(user_query, chunks)
	cached_response = response_cache.get(cache_key)

	if cached_response:
	print("✅ Using cached response")
	final_answer = cached_response
	else:
	# Enhanced prompt selection with quality focus
	query_lower = user_query.lower()
	wants_details = any(phrase in query_lower for phrase in [
	"give details", "more detail", "explain more", "tell me more",
	"elaborate", "in detail", "detailed", "comprehensive"
	])

	if language == 'urdu':
	if wants_details:
	prompt = self.build_urdu_prompt(user_query, chunks)
	else:
	prompt = self.build_concise_prompt(user_query, chunks, language)
	else:
	if wants_details:
	prompt = self.build_enhanced_prompt(user_query, chunks)
	else:
	prompt = self.build_concise_prompt(user_query, chunks, language)

	llm_answer = self.query_llm_with_retry(prompt, language)

	# Enhanced cleaning and validation for Urdu
	if language == 'urdu':
	original_length = len(llm_answer.strip().split())
	llm_answer = self.format_final_response(llm_answer, language)
	cleaned_length = len(llm_answer.strip().split())

	if cleaned_length < 5: # Too short
	print("⚠️ Urdu response too short, may be incomplete")
	elif cleaned_length < original_length * 0.7: # Significant reduction
	print("⚠️ Significant text reduction during cleaning")

	final_answer = self.format_final_response(llm_answer, language)

	# Always add emotional support naturally
	final_answer = self._add_emotional_support(final_answer, user_query, language)

	response_cache.set(cache_key, final_answer)
	print("💾 Response cached for future use")

	# Log conversation to JSON
	conversation_logger.log_conversation(
	user_input=user_query,
	llm_response=final_answer,
	language=language,
	response_type=response_type
	)

	self.conversation_history.append({
	"query": user_query,
	"answer": final_answer,
	"language": language,
	"response_type": response_type,
	"timestamp": time.time()
	})

	return final_answer

	# === Pre-load Index at Module Level ===
	print("🚀 Starting Well Being Agent with optimized loading...")
	_start_time = time.time()

	print("🔄 Loading vector index...")
	index, retriever = load_index_fast()

	_load_time = time.time() - _start_time
	print(f"✅ System ready in {_load_time:.2f} seconds")

	# Create global RAG system instance
	rag_system = BreastCancerRAGSystem(index, retriever)

	# === Interactive Chat Mode ===
	def interactive_chat():
	print("💬 Well Being Agent - Breast Cancer Support")
	print("=" * 50)
	print("Type 'quit' to exit, 'topics' to see available topics, 'cache' for cache stats")
	print("=" * 50)

	global rag_system

	while True:
	user_input = input("\n❓ Your question: ").strip()
	if user_input.lower() in ['quit', 'exit', 'q']:
	break
	elif user_input.lower() == 'topics':
	print("\n📚 Available topics: Fertility, Treatment, Symptoms, Diagnosis, etc.")
	continue
	elif user_input.lower() == 'cache':
	print(f"\n📊 Cache stats: {len(response_cache.cache)} cached responses")
	continue
	elif not user_input:
	continue

	print("🤔 Thinking...")
	start_time = time.time()
	answer = rag_system.get_enhanced_answer(user_input)
	response_time = time.time() - start_time
	print(f"\n💡 {answer}")
	print(f"⏱️ Response time: {response_time:.2f} seconds")

	# === Main Function ===
	def main():
	print("🏥 Well Being Agent - Breast Cancer Support System")
	print("=" * 50)
	print(f"📋 Current Configuration:")
	print(f" Model: {config.MODEL_ID}")
	print(f" Provider: {config.MODEL_PROVIDER}")
	print(f" Index: {config.INDEX_PATH}")
	print(f" Cache: {len(response_cache.cache)} responses loaded")
	print("=" * 50)

	if not config.api_keys:
	print("❌ API keys not configured.")
	if IS_HUGGING_FACE:
	print("💡 Add API keys in Space Settings → Repository secrets")
	return

	interactive_chat()

	if __name__ == "__main__":
	main()