import gradio as gr import torch import os import time from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel from huggingface_hub import login # --- Configuration --- # Using a smaller, faster-loading model for better initial experience. # Change this back if you have the resources. BASE_MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" ADAPTER_PATH = "echarif/lora_adapter_llama3.2_1B" # WARNING: This adapter was for a 3B model. It will likely FAIL with an 8B model. You must use a compatible adapter. # --- Helper Functions --- def authenticate_huggingface(): """Authenticate with HuggingFace using an environment variable.""" token = os.getenv('HF_TOKEN') if token: try: login(token=token, add_to_git_credential=False) return True, "✅ Successfully authenticated with HuggingFace." except Exception as e: return False, f"❌ HuggingFace authentication failed: {e}" return False, "⚠️ HuggingFace token (HF_TOKEN) not found in environment variables." # --- Core Application Logic --- def load_chatbot_model(): """ Generator function to load the model and tokenizer, yielding status updates. This non-blocking approach provides a better user experience. """ # Step 1: Authentication authenticated, auth_message = authenticate_huggingface() yield auth_message, None, None, gr.update(interactive=False), gr.update(interactive=False) if not authenticated: yield auth_message, None, None, gr.update(interactive=False), gr.update(interactive=False) return # Step 2: Model Loading try: device = "cuda" if torch.cuda.is_available() else "cpu" torch_dtype = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float32 yield f"🔄 Loading base model ({BASE_MODEL_ID}) on {device}...", None, None, None, None base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, torch_dtype=torch_dtype, device_map="auto", ) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # This is commented out because the adapter is incompatible with the new base model. # To use your adapter, you MUST use the correct base model it was trained on. # For now, we will use the base model directly. # yield f"🔄 Loading LoRA adapter ({ADAPTER_PATH})...", None, None, None, None # model = PeftModel.from_pretrained(base_model, ADAPTER_PATH) model = base_model # Using the base model directly model.eval() yield "✅ Model loaded successfully! You can now ask questions.", model, tokenizer, gr.update(interactive=True), gr.update(interactive=True) except Exception as e: error_msg = f"❌ Error loading model: {str(e)}" yield error_msg, None, None, gr.update(interactive=False), gr.update(interactive=False) def generate_answer(model, tokenizer, messages, generation_params): """Generate a response from the model.""" if model is None or tokenizer is None: return "Error: Model is not loaded. Please wait for loading to complete." try: # Llama 3 requires termination tokens for the assistant role terminators = [ tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>") ] prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=int(generation_params['max_new_tokens']), temperature=float(generation_params['temperature']), top_p=float(generation_params['top_p']), repetition_penalty=float(generation_params['repetition_penalty']), do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=terminators, ) response_ids = outputs[0][inputs.input_ids.shape[1]:] response = tokenizer.decode(response_ids, skip_special_tokens=True) return response.strip() except Exception as e: return f"❌ Error during generation: {e}" # --- Gradio UI Event Handlers --- def chat_interface(message, history, model, tokenizer, max_tokens, temp, top_p, rep_penalty): if not message.strip(): return history if model is None or tokenizer is None: history.append((message, "❌ **Model not loaded!** Please wait or check the logs for errors.")) return history messages = [{"role": m_role, "content": m_text} for m_role, m_text in history] messages.append({"role": "user", "content": message}) history.append((message, "")) generation_params = {'max_new_tokens': max_tokens, 'temperature': temp, 'top_p': top_p, 'repetition_penalty': rep_penalty} response = generate_answer(model, tokenizer, messages, generation_params) history[-1] = (message, response) return history def clear_chat_history(): return [], "" # FIX: The function now correctly updates the state variable as well as the component visibility. def toggle_settings_panel(is_visible): """Toggles the visibility of the settings panel and its state.""" return gr.update(visible=not is_visible), not is_visible # --- Professional CSS (No changes needed) --- professional_css = """ /* Import Font */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); /* Base Styles */ .gradio-container { font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif !important; background-color: var(--background-fill-primary) !important; } /* Main Layout */ .main-container { display: flex; height: 100vh; max-width: 100%; padding: 0 !important; margin: 0 !important; } .sidebar { width: 280px; background-color: var(--background-fill-secondary) !important; border-right: 1px solid var(--border-color-primary) !important; display: flex; flex-direction: column; padding: 1rem; gap: 1rem; } .chat-area { flex: 1; display: flex; flex-direction: column; background-color: var(--background-fill-primary) !important; } /* Sidebar Elements */ .sidebar-button { background: var(--button-secondary-background-fill) !important; color: var(--button-secondary-text-color) !important; border: 1px solid var(--border-color-primary) !important; transition: all 0.2s ease !important; } .sidebar-button:hover { border-color: var(--color-accent) !important; background: var(--button-secondary-background-fill-hover) !important; } .new-chat-btn { background: var(--button-primary-background-fill) !important; color: var(--button-primary-text-color) !important; border: none !important; } .new-chat-btn:hover { background: var(--button-primary-background-fill-hover) !important; } .status-heading { margin-top: auto !important; } .status-markdown { background-color: var(--background-fill-secondary) !important; border: 1px solid var(--border-color-primary) !important; padding: 0.75rem !important; border-radius: var(--radius-lg) !important; font-size: var(--text-sm) !important; line-height: 1.4 !important; color: var(--body-text-color-subdued) !important; } .status-markdown p { margin: 0 !important; } /* Chat Area */ .chat-header { padding: 1rem 1.5rem; border-bottom: 1px solid var(--border-color-primary) !important; min-height: 64px; } .chat-title { font-size: 1.25rem; font-weight: 600; color: var(--text-color-primary); } .gradio-chatbot { flex: 1; overflow-y: auto; background-color: var(--background-fill-primary) !important; border: none !important; box-shadow: none !important; } .gradio-chatbot .message { padding: 1.5rem !important; margin: 0 !important; border-radius: 0 !important; border-bottom: 1px solid var(--border-color-primary) !important; } .gradio-chatbot .message .content { max-width: 800px !important; margin: 0 auto !important; font-size: 1rem !important; line-height: 1.6 !important; } /* Input Area */ .chat-input-area { padding: 1.5rem; border-top: 1px solid var(--border-color-primary) !important; background-color: var(--background-fill-primary) !important; } .input-container { max-width: 800px; margin: 0 auto; } /* Settings Panel */ .settings-panel { position: absolute !important; top: 1rem; right: 1rem; width: 340px; z-index: 100; background-color: var(--background-fill-secondary) !important; border: 1px solid var(--border-color-primary) !important; border-radius: var(--radius-xl) !important; box-shadow: var(--shadow-drop-lg) !important; padding: 1.5rem !important; } .settings-panel-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 1rem; } .settings-title { font-size: 1.1rem; font-weight: 600; color: var(--text-color-primary); margin: 0; } .close-button { min-width: 30px !important; max-width: 30px !important; height: 30px !important; font-size: 1rem !important; } /* Hide default Gradio padding and footer */ .gradio-container .gap { gap: 0 !important; } footer { display: none !important; } """ # --- Gradio UI Layout --- def create_interface(): with gr.Blocks(css=professional_css, title="Professional Student Assistant", theme=gr.themes.Soft(primary_hue="slate", secondary_hue="gray")) as app: app.js = "(new Gradio.Toggle).render(document.body)" model_state = gr.State(None) tokenizer_state = gr.State(None) settings_panel_visible = gr.State(False) with gr.Row(elem_classes="main-container"): with gr.Column(scale=0, min_width=280, elem_classes="sidebar"): new_chat_btn = gr.Button("➕ New Chat", elem_classes=["sidebar-button", "new-chat-btn"]) gr.Markdown("---") settings_btn = gr.Button("⚙️ Generation Settings", elem_classes="sidebar-button") gr.Markdown("### Status", elem_classes="status-heading") model_status_md = gr.Markdown("🔄 Model is loading automatically...", elem_classes="status-markdown") with gr.Column(scale=4, elem_classes="chat-area"): gr.HTML(f'

Student Assistant ({BASE_MODEL_ID.split("/")[-1]})

') chatbot = gr.Chatbot( elem_id="chatbot", label="Student Assistant", show_label=False, bubble_full_width=False, avatar_images=("🧑‍💻", "🤖"), layout="panel", height=700, # FIX: Use Gradio's history format value=[("user", "Hello!"), ("assistant", "Hi there! I'm loading the AI model now. I'll let you know when I'm ready to answer your questions.")] ) with gr.Row(elem_classes="chat-input-area"): with gr.Column(elem_classes="input-container"): msg_textbox = gr.Textbox(placeholder="Please wait for the model to load...", show_label=False, container=False, scale=8, lines=1, max_lines=5, interactive=False) send_btn = gr.Button("Send", scale=1, interactive=False) with gr.Group(elem_classes="settings-panel", visible=False) as settings_panel: with gr.Row(elem_classes="settings-panel-header"): gr.HTML('

Generation Settings

') close_settings_btn = gr.Button("✕", elem_classes="close-button") max_tokens_slider = gr.Slider(256, 4096, value=1024, step=128, label="Max New Tokens") temp_slider = gr.Slider(0.1, 2.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P") rep_penalty_slider = gr.Slider(1.0, 2.0, value=1.1, step=0.05, label="Repetition Penalty") # --- Event Handlers Wiring --- # NEW: Automatic model loading on page load app.load( fn=load_chatbot_model, outputs=[model_status_md, model_state, tokenizer_state, msg_textbox, send_btn], queue=True ) # Chat Logic chat_inputs = [msg_textbox, chatbot, model_state, tokenizer_state, max_tokens_slider, temp_slider, top_p_slider, rep_penalty_slider] chat_outputs = [chatbot] msg_textbox.submit(chat_interface, inputs=chat_inputs, outputs=chat_outputs).then(lambda: "", outputs=msg_textbox) send_btn.click(chat_interface, inputs=chat_inputs, outputs=chat_outputs).then(lambda: "", outputs=msg_textbox) # UI Control Logic new_chat_btn.click(clear_chat_history, outputs=[chatbot, msg_textbox]) # FIX: The click events now update both the panel and its state tracker settings_btn.click(toggle_settings_panel, inputs=[settings_panel_visible], outputs=[settings_panel, settings_panel_visible]) close_settings_btn.click(toggle_settings_panel, inputs=[settings_panel_visible], outputs=[settings_panel, settings_panel_visible]) return app # --- Launch the App --- if __name__ == "__main__": print("🚀 Launching Professional Student Assistant Interface...") app = create_interface() app.launch(server_name="0.0.0.0", server_port=7860, debug=True)