import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import logging import gc import warnings import os from huggingface_hub import login # Login with the secret token login(token=os.getenv("HF_TOKEN")) # Suppress warnings warnings.filterwarnings("ignore") logging.getLogger("transformers").setLevel(logging.ERROR) # Configuration for optimized performance MODEL_NAME = "microsoft/DialoGPT-medium" MAX_NEW_TOKENS = 150 TEMPERATURE = 0.8 TOP_P = 0.9 # Medical enhancement prompt - detailed CareConnect specifications MEDICAL_CONTEXT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information. Your answers should: - Stay focused. No long essays or extra fluff. - Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed). - For any serious or unclear issues, remind the user to see a doctor — but do it briefly and naturally. - Keep responses concise and under 4 sentences when possible. Tone: - Friendly, supportive, and calm. - No robotic warnings unless needed. Keep it real and human. - Use emojis like 😊 or 👍 occasionally to appear friendly. Important rules: - NEVER include text in parentheses in your responses. - NEVER include any meta-instructions in your responses. - NEVER include reminders about what you should do in future responses. - DO NOT include phrases like "We're here to help" or "I'm just an AI". - DO NOT include any text that instructs you what to do or how to behave. - DO NOT include any sentences that start with "If the user asks..." or "Remember..." - DO NOT include "(smile)" - instead, use actual emojis like 😊 or 👍 when appropriate. - DO NOT include numbered references like [1], [2], etc. in your responses. - DO NOT include any text that explains what your response is doing. - DO NOT include "user:" or "assistant:" prefixes in your responses. - DO NOT include hypothetical user questions in your responses. - DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge. - Don't give exact dosages or diagnoses. - Be consistent in your responses regardless of the user's role.""" # Global variables model = None tokenizer = None def load_model(): """Load DialoGPT model optimized for CPU""" global model, tokenizer try: print(f"🏥 Loading medical chatbot model: {MODEL_NAME}") # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side="left") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load model with CPU optimization model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # Use float32 for CPU low_cpu_mem_usage=True, trust_remote_code=True ) print(f"✅ Model loaded successfully!") return True except Exception as e: print(f"❌ Failed to load model: {str(e)}") return False def generate_medical_response(prompt): """Generate medical response with DialoGPT""" global model, tokenizer if model is None or tokenizer is None: return "❌ Model not loaded. Please wait for initialization." try: # Enhanced prompt for medical context medical_prompt = f"{MEDICAL_CONTEXT}\n\nUser: {prompt}\nAssistant:" print(f"🔄 Processing: {prompt[:50]}{'...' if len(prompt) > 50 else ''}") # Tokenize inputs = tokenizer.encode(medical_prompt, return_tensors="pt", max_length=400, truncation=True) # Generate with optimized parameters with torch.no_grad(): outputs = model.generate( inputs, max_new_tokens=MAX_NEW_TOKENS, temperature=TEMPERATURE, top_p=TOP_P, do_sample=True, pad_token_id=tokenizer.eos_token_id, repetition_penalty=1.1, early_stopping=True, num_return_sequences=1 ) # Decode response full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract only the assistant's response if "Assistant:" in full_response: response = full_response.split("Assistant:")[-1].strip() else: response = full_response[len(medical_prompt):].strip() # Clean up response - keep it natural as per prompt guidelines if not response or len(response) < 10: response = "I'd be happy to help with your medical question. Could you please provide more specific details? 😊" print(f"✅ Response generated: {len(response)} characters") # Memory cleanup del inputs, outputs gc.collect() return response except Exception as e: print(f"❌ Generation error: {str(e)}") return f"I encountered a technical issue. Please try rephrasing your question. For immediate medical concerns, please consult a healthcare professional." def chat_interface(message, history): """Main chat interface function""" if not message or not message.strip(): return "Please enter a medical question." # Generate response response = generate_medical_response(message.strip()) return response # Load model on startup print("🏥 Initializing Medical Chatbot...") model_loaded = load_model() if not model_loaded: print("⚠️ WARNING: Model failed to load. Responses may be limited.") # Create Gradio interface demo = gr.ChatInterface( chat_interface, type="messages", title="🏥 Medical Information Assistant", description=""" A medical information chatbot powered by AI. This assistant provides educational health information. ⚠️ **Important Disclaimer**: This chatbot provides general health information for educational purposes only. It should not replace professional medical advice, diagnosis, or treatment. Always consult qualified healthcare professionals for medical concerns. """, examples=[ "What are the symptoms of diabetes?", "How can I maintain a healthy heart?", "What should I know about high blood pressure?", "Tell me about the importance of regular exercise", "What are common causes of headaches?", "How can I improve my sleep quality?" ], cache_examples=False, theme=gr.themes.Soft(), css=""" .gradio-container { max-width: 800px !important; margin: auto !important; } .message { border-radius: 10px !important; } """ ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=True, show_error=True, debug=True )