Spaces:

Blaiseboy
/

BioGPT-chatbot

Sleeping

App Files Files Community

Blaiseboy commited on Aug 5

Commit

02a841e

verified ·

1 Parent(s): d85cddc

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -638

app.py DELETED Viewed

@@ -1,638 +0,0 @@
-import gradio as gr
-import os
-import torch
-import re
-import json
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class StandaloneMedicalChatbot:
-    def __init__(self, use_gpu=False):
-        """Standalone medical chatbot with built-in ML improvements"""
-        self.use_gpu = use_gpu and torch.cuda.is_available()
-        self.device = "cuda" if self.use_gpu else "cpu"
-        # Initialize model components
-        self.model = None
-        self.tokenizer = None
-        self.knowledge_chunks = []
-        # Setup medical intelligence
-        self.setup_medical_keywords()
-        self.load_biomedical_model()
-    def setup_medical_keywords(self):
-        """Setup comprehensive medical keyword dictionaries"""
-        self.medical_keywords = {
-            'symptoms': [
-                'fever', 'temperature', 'hot', 'cough', 'cold', 'headache', 'pain', 'ache',
-                'sore', 'swelling', 'rash', 'vomiting', 'nausea', 'diarrhea', 'fatigue',
-                'tired', 'breathing', 'breath', 'wheeze', 'sneeze', 'runny nose', 'congestion',
-                'itchy', 'burning', 'stinging', 'cramping', 'bloating', 'dizzy', 'weakness'
-            ],
-            'conditions': [
-                'asthma', 'pneumonia', 'bronchitis', 'allergy', 'allergic', 'infection',
-                'flu', 'influenza', 'covid', 'coronavirus', 'strep', 'throat', 'ear infection',
-                'gastroenteritis', 'dehydration', 'constipation', 'reflux', 'eczema',
-                'dermatitis', 'chickenpox', 'measles', 'mumps', 'rubella', 'rsv'
-            ],
-            'treatments': [
-                'medicine', 'medication', 'drug', 'antibiotic', 'paracetamol', 'acetaminophen',
-                'ibuprofen', 'aspirin', 'treatment', 'therapy', 'dose', 'dosage', 'prescription',
-                'vaccine', 'vaccination', 'immunization', 'shot', 'injection', 'remedy',
-                'cure', 'heal', 'recover', 'rest', 'fluids', 'hydration'
-            ],
-            'anatomy': [
-                'head', 'chest', 'stomach', 'belly', 'throat', 'ear', 'eye', 'nose', 'mouth',
-                'skin', 'lung', 'lungs', 'heart', 'brain', 'kidney', 'liver', 'blood',
-                'bones', 'muscle', 'joint', 'neck', 'back', 'arm', 'leg', 'hand', 'foot'
-            ],
-            'age_groups': [
-                'baby', 'babies', 'infant', 'toddler', 'child', 'children', 'pediatric',
-                'newborn', 'kid', 'kids', 'teenager', 'adolescent', 'preschooler',
-                'months', 'years', 'old', 'age', 'young', 'little'
-            ],
-            'medical_context': [
-                'doctor', 'physician', 'pediatrician', 'hospital', 'clinic', 'nurse',
-                'diagnosis', 'symptom', 'symptoms', 'health', 'medical', 'care', 'urgent',
-                'emergency', 'when to call', 'normal', 'abnormal', 'healthy', 'sick',
-                'illness', 'disease', 'condition', 'problem', 'concern', 'help'
-            ],
-            'emergency': [
-                'emergency', 'urgent', 'critical', 'severe', 'serious', 'dangerous',
-                'life threatening', 'call doctor', 'hospital', '911', 'ambulance',
-                'immediate', 'right away', 'cannot breathe', 'unconscious', 'seizure'
-            ]
-        }
-        # Non-medical keywords
-        self.non_medical_keywords = [
-            'weather', 'cooking', 'recipe', 'food', 'restaurant', 'movie', 'film',
-            'music', 'song', 'sports', 'football', 'basketball', 'game', 'play',
-            'travel', 'vacation', 'holiday', 'work', 'job', 'school', 'homework',
-            'money', 'shopping', 'buy', 'sell', 'car', 'vehicle', 'computer',
-            'phone', 'technology', 'politics', 'news', 'celebrity', 'entertainment'
-        ]
-        # Create flat list of medical keywords
-        self.all_medical_keywords = []
-        for category_keywords in self.medical_keywords.values():
-            self.all_medical_keywords.extend(category_keywords)
-    def load_biomedical_model(self):
-        """Load a lightweight biomedical model"""
-        try:
-            logger.info("Loading biomedical language model...")
-            # Use a lightweight medical model or fallback to general model
-            model_name = "microsoft/DialoGPT-medium"  # Lightweight conversational model
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-            self.model = AutoModelForCausalLM.from_pretrained(
-                model_name,
-                torch_dtype=torch.float16 if self.use_gpu else torch.float32,
-                device_map="auto" if self.use_gpu else None
-            )
-            # Add padding token if not present
-            if self.tokenizer.pad_token is None:
-                self.tokenizer.pad_token = self.tokenizer.eos_token
-            logger.info(f"Model loaded successfully on {self.device}")
-        except Exception as e:
-            logger.error(f"Error loading model: {e}")
-            logger.info("Model loading failed, will use template-based responses")
-            self.model = None
-            self.tokenizer = None
-    def load_medical_data(self, file_path):
-        """Load and chunk medical data"""
-        try:
-            if not os.path.exists(file_path):
-                logger.error(f"Medical data file not found: {file_path}")
-                return False
-            with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-            # Simple chunking by paragraphs or sentences
-            chunks = []
-            # Split by double newlines (paragraphs)
-            paragraphs = content.split('\n\n')
-            for i, paragraph in enumerate(paragraphs):
-                if len(paragraph.strip()) > 50:  # Minimum chunk size
-                    chunks.append({
-                        'id': i,
-                        'text': paragraph.strip(),
-                        'source': file_path
-                    })
-            # If no paragraphs, split by sentences
-            if len(chunks) < 10:
-                sentences = re.split(r'[.!?]+', content)
-                chunks = []
-                for i, sentence in enumerate(sentences):
-                    if len(sentence.strip()) > 30:
-                        chunks.append({
-                            'id': i,
-                            'text': sentence.strip(),
-                            'source': file_path
-                        })
-            self.knowledge_chunks = chunks
-            logger.info(f"Loaded {len(self.knowledge_chunks)} knowledge chunks")
-            return True
-        except Exception as e:
-            logger.error(f"Error loading medical data: {e}")
-            return False
-    def calculate_medical_relevance(self, query):
-        """Calculate medical relevance score"""
-        if not query:
-            return 0.0
-        query_lower = query.lower()
-        words = re.findall(r'\b\w+\b', query_lower)
-        if not words:
-            return 0.0
-        # Count medical keyword matches
-        medical_matches = 0
-        for word in words:
-            for keyword in self.all_medical_keywords:
-                if word == keyword or keyword in word or word in keyword:
-                    medical_matches += 1
-                    break
-        # Count non-medical keywords
-        non_medical_matches = sum(1 for word in words if word in self.non_medical_keywords)
-        # Calculate score
-        medical_score = medical_matches / len(words)
-        non_medical_penalty = non_medical_matches / len(words)
-        relevance_score = medical_score - (non_medical_penalty * 0.7)
-        return max(0.0, min(1.0, relevance_score))
-    def classify_medical_intent(self, query):
-        """Classify medical intent with improved fever detection"""
-        query_lower = query.lower()
-        intent_scores = {}
-        # Special handling for fever-related queries
-        fever_keywords = ['fever', 'temperature', 'hot', 'warm', 'degrees', 'thermometer']
-        if any(keyword in query_lower for keyword in fever_keywords):
-            # This is definitely a fever/symptom query
-            return "fever_management", 0.9
-        for category, keywords in self.medical_keywords.items():
-            score = 0
-            for keyword in keywords:
-                if keyword in query_lower:
-                    # Give higher weight to exact matches
-                    if f" {keyword} " in f" {query_lower} ":
-                        score += 2
-                    else:
-                        score += 1
-            intent_scores[category] = score
-        if max(intent_scores.values()) > 0:
-            best_intent = max(intent_scores, key=intent_scores.get)
-            max_score = intent_scores[best_intent]
-            confidence = min(1.0, max_score / 3)  # Normalize confidence
-            return best_intent, confidence
-        return "general", 0.1
-    def search_relevant_chunks(self, query, top_k=5):
-        """Enhanced search for relevant knowledge chunks"""
-        if not self.knowledge_chunks:
-            return []
-        query_lower = query.lower()
-        query_words = set(re.findall(r'\b\w+\b', query_lower))
-        # Get intent for boosting
-        intent, _ = self.classify_medical_intent(query)
-        intent_keywords = set(self.medical_keywords.get(intent, []))
-        # Special fever keywords for boosting fever-related content
-        fever_keywords = {'fever', 'temperature', 'hot', 'warm', 'degrees', 'thermometer'}
-        is_fever_query = bool(fever_keywords.intersection(query_words))
-        scored_chunks = []
-        for chunk in self.knowledge_chunks:
-            chunk_text = chunk['text'].lower()
-            chunk_words = set(re.findall(r'\b\w+\b', chunk_text))
-            # Calculate different similarity scores
-            # 1. Exact word matches
-            exact_matches = len(query_words.intersection(chunk_words))
-            exact_score = exact_matches / len(query_words) if query_words else 0
-            # 2. Partial matches
-            partial_matches = 0
-            for q_word in query_words:
-                for c_word in chunk_words:
-                    if q_word in c_word or c_word in q_word:
-                        partial_matches += 0.5
-                        break
-            partial_score = partial_matches / len(query_words) if query_words else 0
-            # 3. Intent keyword matches
-            intent_matches = len(intent_keywords.intersection(chunk_words))
-            intent_score = intent_matches * 0.2
-            # 4. Special fever boost
-            fever_boost = 0
-            if is_fever_query:
-                fever_matches = len(fever_keywords.intersection(chunk_words))
-                fever_boost = fever_matches * 0.5
-            # 5. Medical keyword density
-            medical_words_in_chunk = sum(1 for word in chunk_words if word in self.all_medical_keywords)
-            medical_density = medical_words_in_chunk / len(chunk_words) if chunk_words else 0
-            # Combine scores
-            final_score = (
-                exact_score * 0.4 +
-                partial_score * 0.3 +
-                intent_score +
-                fever_boost +
-                medical_density * 0.1
-            )
-            # Length bonus for substantial chunks
-            if len(chunk['text']) > 100:
-                final_score *= 1.1
-            scored_chunks.append((final_score, chunk))
-        # Sort and return top chunks
-        scored_chunks.sort(key=lambda x: x[0], reverse=True)
-        return [chunk for score, chunk in scored_chunks[:top_k] if score > 0.1]
-    def generate_response_with_model(self, query, context):
-        """Generate response using the language model"""
-        if not self.model or not self.tokenizer:
-            return None
-        try:
-            # Prepare prompt with better structure
-            prompt = f"Medical Question: {query}\n\nRelevant Medical Information:\n{context[:800]}\n\nProvide a helpful medical response:"
-            # Tokenize
-            inputs = self.tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
-            if self.use_gpu:
-                inputs = inputs.to(self.device)
-            # Generate
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    inputs,
-                    max_length=inputs.shape[1] + 150,
-                    num_return_sequences=1,
-                    temperature=0.7,
-                    do_sample=True,
-                    pad_token_id=self.tokenizer.eos_token_id
-                )
-            # Decode response
-            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            # Extract just the answer part
-            if "Provide a helpful medical response:" in response:
-                answer = response.split("Provide a helpful medical response:")[-1].strip()
-            elif "Medical response:" in response.lower():
-                answer = response.split("Medical response:")[-1].strip()
-            else:
-                # Take the part after the original prompt
-                answer = response[len(prompt):].strip()
-            return answer if len(answer) > 10 else None
-        except Exception as e:
-            logger.error(f"Error generating response with model: {e}")
-            return None
-    def generate_template_response(self, query, context, intent):
-        """Generate template-based response with improved fever handling"""
-        # Check if this is a fever query
-        query_lower = query.lower()
-        if any(keyword in query_lower for keyword in ['fever', 'temperature', 'hot', 'warm']):
-            # Generate fever-specific response
-            if context:
-                fever_response = self.create_fever_specific_response(context)
-                if fever_response:
-                    return fever_response
-        # Template responses based on intent
-        templates = {
-            'fever_management': [
-                f"For fever management in children:\n\n{context[:400]}...\n\n⚠️ Always monitor your child closely and consult a doctor if fever is high or persistent."
-            ],
-            'symptoms': [
-                f"Regarding symptoms in children:\n\n{context[:400]}...\n\nIf symptoms persist or worsen, please consult a healthcare professional."
-            ],
-            'treatments': [
-                f"Treatment information from medical sources:\n\n{context[:400]}...\n\n⚠️ Always consult a doctor before starting any treatment."
-            ],
-            'emergency': [
-                f"⚠️ For emergency situations:\n\n{context[:300]}...\n\n🚨 IMPORTANT: If this is a medical emergency, call emergency services immediately!"
-            ],
-            'conditions': [
-                f"Medical information about this condition:\n\n{context[:400]}...\n\nConsult your pediatrician for proper diagnosis and treatment."
-            ]
-        }
-        # Select appropriate template
-        if intent in templates:
-            template_list = templates[intent]
-            template = template_list[0]  # Use first template for now
-        else:
-            template = f"Based on medical information:\n\n{context[:400]}...\n\nPlease consult with a healthcare professional for personalized advice."
-        return template
-    def create_fever_specific_response(self, context):
-        """Create fever-specific response from context"""
-        if not context:
-            return None
-        # Look for fever-related sentences in the context
-        sentences = re.split(r'[.!?]+', context)
-        fever_sentences = []
-        fever_keywords = ['fever', 'temperature', 'hot', 'warm', 'degrees', 'thermometer', 'acetaminophen', 'ibuprofen', 'fluids']
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if len(sentence) > 20:
-                sentence_lower = sentence.lower()
-                if any(keyword in sentence_lower for keyword in fever_keywords):
-                    fever_sentences.append(sentence)
-        if fever_sentences:
-            # Take the most relevant fever sentences
-            response = "For managing fever in children:\n\n" + '. '.join(fever_sentences[:3]) + '.'
-            response += "\n\n⚠️ Monitor your child's temperature regularly and seek medical attention if fever is high (>104°F/40°C) or if your child appears very ill."
-            return response
-        return None
-    def chat(self, user_input):
-        """Main chat method with enhanced fever handling"""
-        try:
-            # Check medical relevance
-            relevance_score = self.calculate_medical_relevance(user_input)
-            if relevance_score < 0.2:
-                return ("I'm designed to help with pediatric medical questions. Your query doesn't seem to be medical-related. "
-                       "Please ask about children's health, symptoms, treatments, or medical conditions.\n\n"
-                       "Examples:\n"
-                       "• 'What should I do if my child has a fever?'\n"
-                       "• 'How to treat a child's cough?'\n"
-                       "• 'When should I call the doctor for my baby?'")
-            # Classify intent with special fever handling
-            intent, intent_confidence = self.classify_medical_intent(user_input)
-            # Search for relevant information
-            relevant_chunks = self.search_relevant_chunks(user_input, top_k=4)
-            if not relevant_chunks:
-                # Provide helpful response even without chunks
-                if 'fever' in user_input.lower():
-                    return ("For fever in children, general guidelines include:\n\n"
-                           "• Monitor temperature regularly\n"
-                           "• Ensure adequate fluid intake\n"
-                           "• Consider age-appropriate fever reducers (consult your pediatrician)\n"
-                           "• Watch for signs of dehydration or severe illness\n"
-                           "• Seek medical attention if fever is very high or persistent\n\n"
-                           "⚠️ Always consult with your pediatrician for specific advice about your child's fever.")
-                else:
-                    return (f"I understand you're asking about {intent}-related medical information, "
-                           f"but I couldn't find specific details in my knowledge base. "
-                           f"Please consult with a pediatrician for personalized medical advice about your child's condition.")
-            # Prepare context
-            context = "\n\n".join([chunk['text'] for chunk in relevant_chunks])
-            # Try to generate response with model
-            response = self.generate_response_with_model(user_input, context)
-            # Fallback to template response
-            if not response or len(response) < 20:
-                response = self.generate_template_response(user_input, context, intent)
-            # Add helpful footer
-            footer = "\n\n💡 This information is for educational purposes only. Always consult with a qualified pediatrician for medical advice, diagnosis, and treatment."
-            if intent == "emergency" or any(word in user_input.lower() for word in ['urgent', 'emergency', 'serious']):
-                footer = "\n\n🚨 IMPORTANT: For medical emergencies, contact emergency services immediately (911 in the US, 999 in the UK, etc.)"
-            return response + footer
-        except Exception as e:
-            logger.error(f"Error in chat: {e}")
-            return f"I encountered an error processing your question: {str(e)}. Please try rephrasing your medical question."
-def initialize_standalone_chatbot():
-    """Initialize the standalone chatbot"""
-    try:
-        print("🚀 Initializing Standalone Medical Chatbot...")
-        use_gpu = torch.cuda.is_available()
-        chatbot = StandaloneMedicalChatbot(use_gpu=use_gpu)
-        # Load medical data
-        medical_file = "Pediatric_cleaned.txt"
-        if os.path.exists(medical_file):
-            success = chatbot.load_medical_data(medical_file)
-            if success:
-                status = f"✅ Standalone Medical Chatbot loaded! Medical file '{medical_file}' processed with {len(chatbot.knowledge_chunks)} knowledge chunks."
-                return chatbot, status, True
-            else:
-                status = f"⚠️ Chatbot initialized but failed to load medical data from '{medical_file}'."
-                return chatbot, status, False
-        else:
-            status = f"⚠️ Medical file '{medical_file}' not found. Chatbot will work with basic medical knowledge only."
-            return chatbot, status, False
-    except Exception as e:
-        error_msg = f"❌ Failed to initialize chatbot: {str(e)}"
-        print(error_msg)
-        return None, error_msg, False
-# Initialize chatbot
-print("🏥 Starting Standalone Pediatric Medical Assistant...")
-chatbot, startup_status, medical_file_loaded = initialize_standalone_chatbot()
-def generate_response(user_input, history):
-    """Generate response"""
-    if not chatbot:
-        return history + [("System Error", "❌ Chatbot failed to initialize. Please refresh the page and try again.")], ""
-    if not user_input.strip():
-        return history, ""
-    try:
-        bot_response = chatbot.chat(user_input)
-        history = history + [(user_input, bot_response)]
-        return history, ""
-    except Exception as e:
-        error_response = f"⚠️ Sorry, I encountered an error: {str(e)}. Please try rephrasing your question."
-        history = history + [(user_input, error_response)]
-        return history, ""
-# Custom CSS
-custom_css = """
-.gradio-container {
-    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-}
-.chatbot {
-    height: 500px !important;
-}
-.standalone-badge {
-    background: linear-gradient(90deg, #2196f3, #1976d2);
-    color: white;
-    padding: 6px 15px;
-    border-radius: 25px;
-    font-size: 0.9em;
-    font-weight: bold;
-    display: inline-block;
-    margin-bottom: 15px;
-    box-shadow: 0 2px 4px rgba(0,0,0,0.2);
-}
-"""
-# Create Gradio interface
-with gr.Blocks(css=custom_css, title="Standalone Medical Assistant") as demo:
-    gr.Markdown(
-        """
-        # 🩺 Standalone Pediatric Medical Assistant
-        <div class="standalone-badge">🚀 FULLY STANDALONE - NO EXTERNAL DEPENDENCIES</div>
-        This is a complete standalone medical chatbot that includes:
-        - **🧠 Built-in Medical Intelligence** - No external ML libraries needed
-        - **🎯 Smart Relevance Detection** - Filters medical vs non-medical queries
-        - **🔍 Advanced Search** - Multi-factor similarity scoring
-        - **💬 Conversational AI** - Powered by transformer models
-        - **📚 Medical Knowledge Base** - Pediatric-focused information
-        **⚠️ Medical Disclaimer:** This tool provides educational information only.
-        Always consult qualified healthcare professionals for medical diagnosis, treatment, and personalized advice.
-        """
-    )
-    # Status display
-    gr.Markdown(f"**System Status:** {startup_status}")
-    # Main chat interface
-    with gr.Row():
-        with gr.Column(scale=4):
-            chatbot_ui = gr.Chatbot(
-                label="💬 Standalone Medical AI Chat",
-                height=500,
-                show_label=True,
-                avatar_images=("👤", "🤖")
-            )
-            with gr.Row():
-                user_input = gr.Textbox(
-                    placeholder="Ask any pediatric medical question... (e.g., 'What should I do if my child has a fever?')",
-                    lines=2,
-                    max_lines=5,
-                    show_label=False,
-                    scale=4
-                )
-                submit_btn = gr.Button("Send 📤", variant="primary", scale=1)
-        with gr.Column(scale=1):
-            gr.Markdown(
-                """
-                ### ✨ Key Features:
-                **��� Intelligent Filtering**
-                - Detects medical relevance
-                - Redirects non-medical queries
-                - Provides helpful suggestions
-                **🧠 Medical Understanding**
-                - Symptom recognition
-                - Treatment information
-                - Emergency detection
-                - Age-appropriate advice
-                **🔍 Smart Search**
-                - Multi-factor scoring
-                - Intent-based boosting
-                - Context-aware matching
-                ### 💡 Try These Questions:
-                - "My 2-year-old has a fever of 101°F, what should I do?"
-                - "How do I know if my baby's cough is serious?"
-                - "What are normal sleep patterns for toddlers?"
-                - "When should I call the doctor for vomiting?"
-                - "How to treat diaper rash naturally?"
-                - "Signs of allergic reactions in children"
-                ### 🔧 Technical Info:
-                - **Base:** Transformer language model
-                - **Search:** Multi-factor similarity
-                - **Knowledge:** Pediatric medical database
-                - **Relevance:** Keyword-based ML
-                """
-            )
-    # Event handlers
-    user_input.submit(
-        fn=generate_response,
-        inputs=[user_input, chatbot_ui],
-        outputs=[chatbot_ui, user_input],
-        show_progress=True
-    )
-    submit_btn.click(
-        fn=generate_response,
-        inputs=[user_input, chatbot_ui],
-        outputs=[chatbot_ui, user_input],
-        show_progress=True
-    )
-    # Footer
-    gr.Markdown(
-        """
-        ---
-        **🏥 Standalone Medical AI** | Self-Contained System | For Educational Purposes Only
-        **No External Dependencies:** This system runs entirely standalone without requiring
-        external ML libraries like NLTK, scikit-learn, or sentence-transformers.
-        **Always Remember:** Consult healthcare professionals for medical emergencies and personalized advice.
-        """
-    )
-if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        share=False
-    )