""" Check RAG System Status - Verify all vector stores Checks all 6 specialized ChromaDB databases """ from pathlib import Path import sys # Add parent directory to path sys.path.insert(0, str(Path(__file__).parent.parent)) # Vector store definitions VECTOR_STORES = { 'medical_diseases': { 'name': 'ViMedical Diseases', 'path': 'rag/vector_store/medical_diseases', 'expected_size': 50, # MB 'test_query': 'đau đầu triệu chứng' }, 'mental_health': { 'name': 'Mental Health', 'path': 'rag/vector_store/mental_health', 'expected_size': 80, 'test_query': 'stress anxiety depression' }, 'nutrition': { 'name': 'Nutrition Plans', 'path': 'rag/vector_store/nutrition', 'expected_size': 20, 'test_query': 'diet meal plan calories' }, 'vietnamese_nutrition': { 'name': 'Vietnamese Food', 'path': 'rag/vector_store/vietnamese_nutrition', 'expected_size': 5, 'test_query': 'phở cơm nutrition' }, 'fitness': { 'name': 'Fitness Exercises', 'path': 'rag/vector_store/fitness', 'expected_size': 10, 'test_query': 'gym workout exercise' }, 'symptom_qa': { 'name': 'Medical Q&A', 'path': 'rag/vector_store/symptom_qa', 'expected_size': 8, 'test_query': 'triệu chứng bệnh' }, 'general_health_qa': { 'name': 'General Health Q&A', 'path': 'rag/vector_store/general_health_qa', 'expected_size': 7, 'test_query': 'sức khỏe tổng quát' } } def check_vector_store(store_info): """Check individual vector store""" print(f"\n📦 {store_info['name']}") print("-" * 50) store_path = Path(store_info['path']) # Check existence if not store_path.exists(): print(f"❌ Not found: {store_info['path']}") print(f" Reason: Directory does not exist") return {'status': False, 'reason': 'Directory not found'} print(f"✅ Exists: {store_info['path']}") # Check size total_size = sum(f.stat().st_size for f in store_path.rglob('*') if f.is_file()) size_mb = total_size / (1024 * 1024) expected = store_info['expected_size'] print(f"📊 Size: {size_mb:.1f} MB (expected ~{expected} MB)") if size_mb < 0.1: print("⚠️ Database seems empty") print(" Reason: Database size < 0.1 MB (likely not built)") return {'status': False, 'reason': 'Database empty or not built'} # Try to load and query try: import chromadb client = chromadb.PersistentClient(path=str(store_path)) collections = client.list_collections() if not collections: print("⚠️ No collections found") print(" Reason: ChromaDB has no collections") return {'status': False, 'reason': 'No collections in database'} collection = collections[0] count = collection.count() print(f"📚 Documents: {count:,} chunks") if count == 0: print("⚠️ Collection is empty") print(" Reason: Collection exists but has 0 documents") return {'status': False, 'reason': 'Collection is empty (0 documents)'} # Test query try: results = collection.query( query_texts=[store_info['test_query']], n_results=1 ) if results and results['documents'] and results['documents'][0]: print("✅ Query test passed") return {'status': True, 'reason': None} else: print("⚠️ Query returned no results") print(" Reason: Query executed but found no matching documents") return {'status': False, 'reason': 'Query returned no results'} except Exception as e: print(f"⚠️ Query test failed: {e}") print(f" Reason: {str(e)}") return {'status': False, 'reason': f'Query failed: {str(e)}'} except ImportError: print("⚠️ ChromaDB not installed") print(" Reason: pip install chromadb") return {'status': False, 'reason': 'ChromaDB package not installed'} except Exception as e: print(f"⚠️ Error: {e}") print(f" Reason: {str(e)}") return {'status': False, 'reason': f'Error loading database: {str(e)}'} def check_rag_status(): """Check all RAG vector stores""" print("="*60) print("🔍 RAG System Status Check") print("="*60) # Check base directory base_path = Path('rag/vector_store') if not base_path.exists(): print("\n❌ Vector store directory not found!") print(f" Expected: {base_path}") print("\n💡 Solution:") print(" bash scripts/setup_rag.sh") return False print(f"\n✅ Base directory exists: {base_path}") # Check each vector store results = {} for store_id, store_info in VECTOR_STORES.items(): results[store_id] = check_vector_store(store_info) # Summary print("\n" + "="*60) print("📊 Summary") print("="*60) total = len(results) passed = sum(1 for v in results.values() if v['status']) for store_id, result in results.items(): status = "✅" if result['status'] else "❌" name = VECTOR_STORES[store_id]['name'] print(f"{status} {name}") if not result['status'] and result['reason']: print(f" └─ {result['reason']}") print("\n" + "="*60) print(f"Result: {passed}/{total} databases OK") if passed == total: print("\n🎉 All vector stores are ready!") print("\nNext steps:") print(" python app.py") print(" Open http://localhost:7860") print("="*60) return True else: print("\n⚠️ Some databases are missing or have issues") print("\n💡 Solutions:") print("\n1️⃣ Quick fix (rebuild all):") print(" bash scripts/setup_rag.sh") print("\n2️⃣ Rebuild specific databases:") # Map store_id to script script_map = { 'medical_diseases': 'python data_mining/mining_vimedical.py', 'mental_health': 'python data_mining/mining_mentalchat.py', 'nutrition': 'python data_mining/mining_nutrition.py', 'vietnamese_nutrition': 'python data_mining/mining_vietnamese_food.py', 'fitness': 'python data_mining/mining_fitness.py', 'symptom_qa': 'python data_mining/mining_medical_qa.py', 'general_health_qa': 'python data_mining/mining_medical_qa.py' } for store_id, result in results.items(): if not result['status']: name = VECTOR_STORES[store_id]['name'] script = script_map.get(store_id, 'Unknown') print(f"\n ❌ {name}:") print(f" Reason: {result['reason']}") print(f" Fix: {script}") print("\n" + "="*60) return False if __name__ == '__main__': success = check_rag_status() exit(0 if success else 1)