import gradio as gr
import torch
import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
from huggingface_hub import login

# --- Configuration ---
# Using a smaller, faster-loading model for better initial experience.
# Change this back if you have the resources.
BASE_MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" 
ADAPTER_PATH = "echarif/lora_adapter_llama3.2_1B" # WARNING: This adapter was for a 3B model. It will likely FAIL with an 8B model. You must use a compatible adapter.

# --- Helper Functions ---
def authenticate_huggingface():
    """Authenticate with HuggingFace using an environment variable."""
    token = os.getenv('HF_TOKEN')
    if token:
        try:
            login(token=token, add_to_git_credential=False)
            return True, "✅ Successfully authenticated with HuggingFace."
        except Exception as e:
            return False, f"❌ HuggingFace authentication failed: {e}"
    return False, "⚠️ HuggingFace token (HF_TOKEN) not found in environment variables."

# --- Core Application Logic ---

def load_chatbot_model():
    """
    Generator function to load the model and tokenizer, yielding status updates.
    This non-blocking approach provides a better user experience.
    """
    # Step 1: Authentication
    authenticated, auth_message = authenticate_huggingface()
    yield auth_message, None, None, gr.update(interactive=False), gr.update(interactive=False)
    
    if not authenticated:
        yield auth_message, None, None, gr.update(interactive=False), gr.update(interactive=False)
        return

    # Step 2: Model Loading
    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        torch_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float32
        
        yield f"🔄 Loading base model ({BASE_MODEL_ID}) on {device}...", None, None, None, None

        base_model = AutoModelForCausalLM.from_pretrained(
            BASE_MODEL_ID,
            torch_dtype=torch_dtype,
            device_map="auto",
        )

        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
            
        # This is commented out because the adapter is incompatible with the new base model.
        # To use your adapter, you MUST use the correct base model it was trained on.
        # For now, we will use the base model directly.
        # yield f"🔄 Loading LoRA adapter ({ADAPTER_PATH})...", None, None, None, None
        # model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
        model = base_model # Using the base model directly
        model.eval()

        yield "✅ Model loaded successfully! You can now ask questions.", model, tokenizer, gr.update(interactive=True), gr.update(interactive=True)

    except Exception as e:
        error_msg = f"❌ Error loading model: {str(e)}"
        yield error_msg, None, None, gr.update(interactive=False), gr.update(interactive=False)


def generate_answer(model, tokenizer, messages, generation_params):
    """Generate a response from the model."""
    if model is None or tokenizer is None:
        return "Error: Model is not loaded. Please wait for loading to complete."

    try:
        # Llama 3 requires termination tokens for the assistant role
        terminators = [
            tokenizer.eos_token_id,
            tokenizer.convert_tokens_to_ids("<|eot_id|>")
        ]
        
        prompt = tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )
        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=int(generation_params['max_new_tokens']),
                temperature=float(generation_params['temperature']),
                top_p=float(generation_params['top_p']),
                repetition_penalty=float(generation_params['repetition_penalty']),
                do_sample=True,
                pad_token_id=tokenizer.eos_token_id,
                eos_token_id=terminators,
            )
        
        response_ids = outputs[0][inputs.input_ids.shape[1]:]
        response = tokenizer.decode(response_ids, skip_special_tokens=True)
        return response.strip()

    except Exception as e:
        return f"❌ Error during generation: {e}"

# --- Gradio UI Event Handlers ---

def chat_interface(message, history, model, tokenizer, max_tokens, temp, top_p, rep_penalty):
    if not message.strip():
        return history
    if model is None or tokenizer is None:
        history.append((message, "❌ **Model not loaded!** Please wait or check the logs for errors."))
        return history

    messages = [{"role": m_role, "content": m_text} for m_role, m_text in history]
    messages.append({"role": "user", "content": message})
    
    history.append((message, ""))
    
    generation_params = {'max_new_tokens': max_tokens, 'temperature': temp, 'top_p': top_p, 'repetition_penalty': rep_penalty}
    response = generate_answer(model, tokenizer, messages, generation_params)
    history[-1] = (message, response)
    return history

def clear_chat_history():
    return [], ""

# FIX: The function now correctly updates the state variable as well as the component visibility.
def toggle_settings_panel(is_visible):
    """Toggles the visibility of the settings panel and its state."""
    return gr.update(visible=not is_visible), not is_visible

# --- Professional CSS (No changes needed) ---
professional_css = """
/* Import Font */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
/* Base Styles */
.gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif !important; background-color: var(--background-fill-primary) !important; }
/* Main Layout */
.main-container { display: flex; height: 100vh; max-width: 100%; padding: 0 !important; margin: 0 !important; }
.sidebar { width: 280px; background-color: var(--background-fill-secondary) !important; border-right: 1px solid var(--border-color-primary) !important; display: flex; flex-direction: column; padding: 1rem; gap: 1rem; }
.chat-area { flex: 1; display: flex; flex-direction: column; background-color: var(--background-fill-primary) !important; }
/* Sidebar Elements */
.sidebar-button { background: var(--button-secondary-background-fill) !important; color: var(--button-secondary-text-color) !important; border: 1px solid var(--border-color-primary) !important; transition: all 0.2s ease !important; }
.sidebar-button:hover { border-color: var(--color-accent) !important; background: var(--button-secondary-background-fill-hover) !important; }
.new-chat-btn { background: var(--button-primary-background-fill) !important; color: var(--button-primary-text-color) !important; border: none !important; }
.new-chat-btn:hover { background: var(--button-primary-background-fill-hover) !important; }
.status-heading { margin-top: auto !important; }
.status-markdown { background-color: var(--background-fill-secondary) !important; border: 1px solid var(--border-color-primary) !important; padding: 0.75rem !important; border-radius: var(--radius-lg) !important; font-size: var(--text-sm) !important; line-height: 1.4 !important; color: var(--body-text-color-subdued) !important; }
.status-markdown p { margin: 0 !important; }
/* Chat Area */
.chat-header { padding: 1rem 1.5rem; border-bottom: 1px solid var(--border-color-primary) !important; min-height: 64px; }
.chat-title { font-size: 1.25rem; font-weight: 600; color: var(--text-color-primary); }
.gradio-chatbot { flex: 1; overflow-y: auto; background-color: var(--background-fill-primary) !important; border: none !important; box-shadow: none !important; }
.gradio-chatbot .message { padding: 1.5rem !important; margin: 0 !important; border-radius: 0 !important; border-bottom: 1px solid var(--border-color-primary) !important; }
.gradio-chatbot .message .content { max-width: 800px !important; margin: 0 auto !important; font-size: 1rem !important; line-height: 1.6 !important; }
/* Input Area */
.chat-input-area { padding: 1.5rem; border-top: 1px solid var(--border-color-primary) !important; background-color: var(--background-fill-primary) !important; }
.input-container { max-width: 800px; margin: 0 auto; }
/* Settings Panel */
.settings-panel { position: absolute !important; top: 1rem; right: 1rem; width: 340px; z-index: 100; background-color: var(--background-fill-secondary) !important; border: 1px solid var(--border-color-primary) !important; border-radius: var(--radius-xl) !important; box-shadow: var(--shadow-drop-lg) !important; padding: 1.5rem !important; }
.settings-panel-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem; }
.settings-title { font-size: 1.1rem; font-weight: 600; color: var(--text-color-primary); margin: 0; }
.close-button { min-width: 30px !important; max-width: 30px !important; height: 30px !important; font-size: 1rem !important; }
/* Hide default Gradio padding and footer */
.gradio-container .gap { gap: 0 !important; }
footer { display: none !important; }
"""

# --- Gradio UI Layout ---
def create_interface():
    with gr.Blocks(css=professional_css, title="Professional Student Assistant", theme=gr.themes.Soft(primary_hue="slate", secondary_hue="gray")) as app:
        app.js = "(new Gradio.Toggle).render(document.body)"
        
        model_state = gr.State(None)
        tokenizer_state = gr.State(None)
        settings_panel_visible = gr.State(False)

        with gr.Row(elem_classes="main-container"):
            with gr.Column(scale=0, min_width=280, elem_classes="sidebar"):
                new_chat_btn = gr.Button("➕ New Chat", elem_classes=["sidebar-button", "new-chat-btn"])
                gr.Markdown("---")
                settings_btn = gr.Button("⚙️ Generation Settings", elem_classes="sidebar-button")
                gr.Markdown("### Status", elem_classes="status-heading")
                model_status_md = gr.Markdown("🔄 Model is loading automatically...", elem_classes="status-markdown")

            with gr.Column(scale=4, elem_classes="chat-area"):
                gr.HTML(f'<h1 class="chat-title">Student Assistant ({BASE_MODEL_ID.split("/")[-1]})</h1>')
                chatbot = gr.Chatbot(
                    elem_id="chatbot", label="Student Assistant", show_label=False,
                    bubble_full_width=False, avatar_images=("🧑‍💻", "🤖"),
                    layout="panel", height=700,
                    # FIX: Use Gradio's history format
                    value=[("user", "Hello!"), ("assistant", "Hi there! I'm loading the AI model now. I'll let you know when I'm ready to answer your questions.")]
                )
                
                with gr.Row(elem_classes="chat-input-area"):
                    with gr.Column(elem_classes="input-container"):
                        msg_textbox = gr.Textbox(placeholder="Please wait for the model to load...", show_label=False, container=False, scale=8, lines=1, max_lines=5, interactive=False)
                        send_btn = gr.Button("Send", scale=1, interactive=False)
        
        with gr.Group(elem_classes="settings-panel", visible=False) as settings_panel:
            with gr.Row(elem_classes="settings-panel-header"):
                gr.HTML('<h2 class="settings-title">Generation Settings</h2>')
                close_settings_btn = gr.Button("✕", elem_classes="close-button")
            max_tokens_slider = gr.Slider(256, 4096, value=1024, step=128, label="Max New Tokens")
            temp_slider = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature")
            top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
            rep_penalty_slider = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition Penalty")

        # --- Event Handlers Wiring ---
        
        # NEW: Automatic model loading on page load
        app.load(
            fn=load_chatbot_model,
            outputs=[model_status_md, model_state, tokenizer_state, msg_textbox, send_btn],
            queue=True
        )

        # Chat Logic
        chat_inputs = [msg_textbox, chatbot, model_state, tokenizer_state, max_tokens_slider, temp_slider, top_p_slider, rep_penalty_slider]
        chat_outputs = [chatbot]
        
        msg_textbox.submit(chat_interface, inputs=chat_inputs, outputs=chat_outputs).then(lambda: "", outputs=msg_textbox)
        send_btn.click(chat_interface, inputs=chat_inputs, outputs=chat_outputs).then(lambda: "", outputs=msg_textbox)

        # UI Control Logic
        new_chat_btn.click(clear_chat_history, outputs=[chatbot, msg_textbox])
        
        # FIX: The click events now update both the panel and its state tracker
        settings_btn.click(toggle_settings_panel, inputs=[settings_panel_visible], outputs=[settings_panel, settings_panel_visible])
        close_settings_btn.click(toggle_settings_panel, inputs=[settings_panel_visible], outputs=[settings_panel, settings_panel_visible])

    return app

# --- Launch the App ---
if __name__ == "__main__":
    print("🚀 Launching Professional Student Assistant Interface...")
    app = create_interface()
    app.launch(server_name="0.0.0.0", server_port=7860, debug=True)