# =================================================================================
# rag_pipeline.py: Create the Gemini model and the RAG chain
# =================================================================================
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.llms.gemini import Gemini
from llama_index.core.prompts.base import PromptTemplate
from llama_index.core.prompts import ChatPromptTemplate
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from google.generativeai.types import HarmCategory, HarmBlockThreshold
import config
import os

def initialize_llm_and_embed_model():
    """
    Initializes and sets the global LLM and embedding model for LlamaIndex.
    """
    print(f"Initializing Gemini model: {config.LLM_MODEL_ID}...")
    
    # Define safety settings to be less restrictive, especially for medical content
    safety_settings = {
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
    }

    # System instruction for Gemini (if supported by your version)
    system_instruction = (
        "You are PharmaBot, an AI pharmaceutical information assistant. "
        "You provide accurate information from FDA drug labels but never give medical advice or diagnose conditions. "
        "You always respond in the user's language and maintain conversation context throughout the session."
    )

    llm = Gemini(
        model_name=config.LLM_MODEL_ID, 
        temperature=0.3,
        safety_settings=safety_settings,
        generation_config={"candidate_count": 1},
        system_instruction=system_instruction  # Add system instruction
    )
    
    print(f"Loading embedding model: {config.EMBEDDING_MODEL_NAME}...")
    
    # Get the token from environment variables
    hf_token = os.getenv("HUGGING_FACE_TOKEN")
    if not hf_token:
        print("Warning: HUGGING_FACE_TOKEN environment variable not set.")

    embed_model = HuggingFaceEmbedding(
        model_name=config.EMBEDDING_MODEL_NAME,
        token=hf_token
    )
    
    # Set the global models for LlamaIndex
    Settings.llm = llm
    Settings.embed_model = embed_model

def load_vector_index():
    """
    Loads the LlamaIndex vector index from storage.
    """
    if not os.path.exists(config.LLAMA_INDEX_STORE_PATH):
        raise FileNotFoundError(f"LlamaIndex store not found at {config.LLAMA_INDEX_STORE_PATH}. Please run build_knowledge_base.py first.")
    
    print("Loading LlamaIndex vector store...")
    storage_context = StorageContext.from_defaults(persist_dir=config.LLAMA_INDEX_STORE_PATH)
    index = load_index_from_storage(storage_context)
    return index

from llama_index.core.memory import ChatMemoryBuffer

def build_query_engine(index):
    """
    Builds a query engine from the LlamaIndex vector index.
    """
    
    # Condensed, action-oriented prompt that guides behavior without being conversational
    qa_template_str = (
        "Context information from FDA drug labels:\n"
        "---------------------\n"
        "{context_str}\n"
        "---------------------\n\n"
        "Instructions:\n"
        "1. LANGUAGE: Respond entirely in the same language as the query. Detect: English, Turkish, Spanish, French, German, Arabic, etc.\n"
        "2. QUERY TYPE:\n"
        "   - Medical/Drug query (medications, symptoms, dosages, interactions) → Use context to provide structured response\n"
        "   - General conversation (greetings, small talk) → Respond conversationally, no context needed\n"
        "3. CONTEXT CHECK:\n"
        "   - If context is empty/irrelevant → State you couldn't find information, ask for clarification\n"
        "   - If context is relevant → Proceed with response\n"
        "4. RESPONSE FORMAT FOR DRUG QUERIES:\n"
        "   **Drug Name:** [from brand_name/generic_name]\n"
        "   **What It's Used For:** [summarize indications_and_usage]\n"
        "   **How to Take It:** [summarize dosage_and_administration]\n"
        "   **Important Warnings:** [list 4-5 critical points from warnings/adverse_reactions/contraindications]\n"
        "   **Drug Interactions:** [if available from drug_interactions]\n"
        "5. RESPONSE FORMAT FOR DRUG INTERACTIONS:\n"
        "   **Drug Interaction: [Drug A] and [Drug B]**\n"
        "   **Interaction Found:** [describe]\n"
        "   **Clinical Significance:** [explain risks]\n"
        "   **Recommendation:** [FDA guidance]\n"
        "6. RESPONSE FORMAT FOR SYMPTOM QUERIES (first ask):\n"
        "   Ask 5 clarifying questions: duration, severity, prior medications, current medications, allergies\n"
        "7. RESPONSE FORMAT FOR SYMPTOM QUERIES (after details):\n"
        "   Present 2-3 FDA-approved medication options with: Type, Used For, Dosage, Key Warning\n"
        "8. SAFETY:\n"
        "   - Only use info from context for medical responses\n"
        "   - If details missing from context, state explicitly\n"
        "   - ALWAYS end medical responses with:\n"
        "   ⚠️ Disclaimer: I am an AI assistant, not a medical professional. This information is from FDA labels and is for educational purposes only. Always consult your doctor or pharmacist before taking any medication.\n"
        "9. MEMORY: Reference previous drugs/symptoms/allergies mentioned in conversation\n\n"
        "Query: {query_str}\n\n"
        "Answer (in same language as query):"
    )
    
    qa_template = PromptTemplate(qa_template_str)

    print("Building query engine...")
    
    memory = ChatMemoryBuffer.from_defaults(token_limit=3000)
    
    # Use simple chat mode to avoid condense_question_prompt issues
    # The chat mode will still maintain conversation history through memory
    query_engine = index.as_chat_engine(
        chat_mode="context",  # Changed from "condense_question" to "context"
        memory=memory,
        system_prompt=(
            "You are PharmaBot, an AI pharmaceutical information assistant. "
            "Always respond in the user's language. Use FDA drug label data to answer medical queries. "
            "Never diagnose or prescribe. Include disclaimers on medical responses."
        ),
        context_template=qa_template,  # Use our custom template
        similarity_top_k=5,
        verbose=True
    )
    
    return query_engine