Spaces:

cosmosai471
/

come_onnn

Running

App Files Files Community

cosmosai471 commited on 27 days ago

Commit

6ac02a9

verified ·

1 Parent(s): c2a3849

Update app.py

Browse files

Files changed (1) hide show

app.py +185 -122

app.py CHANGED Viewed

@@ -17,18 +17,22 @@ from io import BytesIO
 import numpy as np
 # --- CONFIGURATION & INITIALIZATION ---
-STT_DEVICE = "cpu"
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 AUDIO_DIR = "audio_outputs"
-DOC_DIR = "doc_outputs"
 if not os.path.exists(AUDIO_DIR):
     os.makedirs(AUDIO_DIR)
 if not os.path.exists(DOC_DIR):
     os.makedirs(DOC_DIR)
 REPO_ID = "cosmosai471/Luna-v3"
 MODEL_FILE = "luna.gguf"
-LOCAL_MODEL_PATH = MODEL_FILE
-SYSTEM_PROMPT = "You are Luna, a helpful and friendly AI assistant. Your response must begin with two separate tags: an **Intent** tag and a **Confidence** tag (0-100). Example: '[Intent: qa_general][Confidence: 85]'. Your full response must follow these tags."
 # Configuration: confidence threshold for triggering web search fallback
 CONFIDENCE_THRESHOLD = 30  # only trigger web-search fallback if confidence is less than this
@@ -51,10 +55,10 @@ try:
     print("Initializing Llama...")
     llm = Llama(
         model_path=LOCAL_MODEL_PATH,
-        n_ctx=8192,
-        n_threads=4,
-        n_batch=256,
-        n_gpu_layers=0,
         verbose=False
     )
     print("✅ Luna Model loaded successfully!")
@@ -62,6 +66,7 @@ except Exception as e:
     print(f"❌ Error loading Luna model: {e}")
     class DummyLLM:
         def create_completion(self, *args, **kwargs):
             yield {'choices': [{'text': '[Intent: qa_general][Confidence: 0] ERROR: Luna model failed to load. Check logs and resources.'}]}
     llm = DummyLLM()
@@ -74,7 +79,7 @@ except Exception as e:
 image_pipe = None
 try:
-    VLM_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
     image_pipe = pipeline("image-to-text", model=VLM_MODEL_ID, device=STT_DEVICE)
     print(f"✅ Loaded {VLM_MODEL_ID} for image processing.")
 except Exception as e:
@@ -83,7 +88,7 @@ except Exception as e:
 img_gen_pipe = None
 try:
     img_gen_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32)
-    img_gen_pipe.to(STT_DEVICE)
     print("✅ Loaded Stable Diffusion (v1-5) for image generation.")
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
@@ -93,65 +98,58 @@ except Exception as e:
 def simulate_recording_delay():
     time.sleep(3)
-    return None
 def clean_response_stream(raw_text: str) -> str:
     """Cleans up raw response text by removing tags and repeats.
-    NOTE: do NOT split on the plain word 'Intent' or 'Action' because that would
-    chop off the tags and/or the rest of the response in many outputs.
     """
-    # Safely cut at common separators that mark model output boundaries
     clean_text = re.split(r'\nUser:|\nAssistant:|</s>', raw_text, 1)[0].strip()
     # Remove bracketed instruction tokens and inline actions
     clean_text = re.sub(r'\[/?INST\]|\[/?s\]|\s*<action>.*?</action>\s*', '', clean_text, flags=re.DOTALL).strip()
-    # Remove explicit tags if they are present (we remove them from visible output)
-    clean_text = re.sub(r'\[Intent:\s*[\w\-]+\]|\[Confidence:\s*\d{1,3}\]', '', clean_text, flags=re.IGNORECASE).strip()
-    # Remove repeated trailing words (simple dedupe heuristic)
     words = clean_text.split()
-    if len(words) > 4 and words[-2:] == words[-4:-2]:
         clean_text = ' '.join(words[:-2])
     return clean_text
 def web_search_tool(query: str) -> str:
-    time.sleep(1.5)
     print(f"Simulating Google Search fallback for: {query}")
     return f"\n\n🌐 **Web Search Results for '{query}':** I've gathered information from external sources to supplement my knowledge."
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
     """Checks confidence from the raw response tag and triggers fallback if very low.
-    Improvements:
-    - Uses a robust regex for confidence.
-    - If the response lacks a confidence tag, uses a simple length-based heuristic
-      to decide whether to consider confidence low or high (avoids defaulting to 0).
-    - Only triggers the web-search fallback when confidence is < CONFIDENCE_THRESHOLD.
     """
-    # Try to extract explicit confidence tag
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
     cleaned_response = clean_response_stream(raw_response_with_tags)
     if confidence_match:
         try:
             confidence_score = int(confidence_match.group(1))
-            # clamp to 0-100
             confidence_score = max(0, min(confidence_score, 100))
         except Exception:
             confidence_score = 0
     else:
-        # heuristic: if the cleaned response is short/empty -> likely low-confidence output
         if not cleaned_response or len(cleaned_response.strip()) < 30:
-            confidence_score = 10  # very low: trigger fallback
         else:
-            confidence_score = 85  # assume decent confidence when there's a substantial response
-    # Decide whether to invoke web search fallback
     if confidence_score < CONFIDENCE_THRESHOLD:
-        print(f"Low confidence ({confidence_score}%) detected (threshold={CONFIDENCE_THRESHOLD}). Triggering Google Search fallback.")
         search_snippet = web_search_tool(prompt)
         if "error" in cleaned_response.lower() or confidence_score <= 5:
-             final_response = f"I apologize for the limited response (Confidence: {confidence_score}%). {search_snippet} I will use this to generate a more comprehensive answer."
         else:
-            # keep whatever content exists, then add web results to supplement
             final_response = f"{cleaned_response} {search_snippet} I can elaborate further based on this."
     else:
         final_response = cleaned_response
@@ -159,7 +157,10 @@ def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> st
     return final_response
 def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
-    """Uses the VLM pipeline (LLaVA) for VQA."""
     global image_pipe
     success = False
     if image_pipe is None:
@@ -169,25 +170,33 @@ def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
     try:
         if isinstance(image_data_or_path, str):
             image = Image.open(image_data_or_path).convert("RGB")
-        elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         if image:
             vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
             results = image_pipe(image, prompt=vqa_prompt, generate_kwargs={"max_new_tokens": 1024})
-            raw_vlm_output = results[0]['generated_text'] if results else "Error: VLM did not return text."
-            vqa_response = raw_vlm_output.split("ASSISTANT:")[-1].strip()
-            if not vqa_response: vqa_response = "VLM analysis failed or returned empty."
             del image
-            success = True
             prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {message}"
             return prompt_injection, success
     except Exception as e:
         print(f"Image Pipeline Error: {e}")
         return f"[Image Processing Error: {e}] **User Query:** {message}", success
     return f"[Image Processing Error: Could not load image data.] **User Query:** {message}", success
 def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.update, bool, gr.update]:
@@ -198,11 +207,11 @@ def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.upda
         transcribed_text = stt_pipe(audio_file_path)["text"]
         new_button_update = gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"])
         return (
-            transcribed_text.strip(),
-            f"🎙️ Transcribed: '{transcribed_text.strip()}'",
-            gr.update(interactive=True),
-            new_button_update,
-            True,
             gr.update(visible=False)
         )
     except Exception as e:
@@ -211,14 +220,14 @@ def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.upda
 def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
     if not is_voice_chat:
-        return None
     clean_text = re.sub(r'```.*?```|\[Image Processing Error:.*?\]|\*\*Web Search Results:.*?$|\(file=.*?\)', '', text, flags=re.DOTALL | re.MULTILINE)
     if len(clean_text.strip()) > 5:
         try:
             audio_output_path = os.path.join(AUDIO_DIR, f"luna_response_{random.randint(1000, 9999)}.mp3")
             tts = gTTS(text=clean_text.strip(), lang='en')
             tts.save(audio_output_path)
-            return audio_output_path
         except Exception as e:
             print(f"gTTS Error: {e}")
             return None
@@ -239,22 +248,35 @@ INTENT_STATUS_MAP = {
 }
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
-    """Parses intent/confidence, returns intent, status, cleaned text."""
-    match = re.search(r'\[Intent:\s*([\w\-]+)\]', raw_response, re.IGNORECASE)
     intent = match.group(1).lower() if match else "default"
     if is_vqa_flow:
         intent = "vqa"
-    cleaned_text = re.sub(r'\[Intent:\s*[\w\-]+\]\s*', '', raw_response, count=1, flags=re.IGNORECASE).strip()
     cleaned_text = re.sub(r'\[Confidence:\s*\d{1,3}\]\s*', '', cleaned_text, count=1, flags=re.IGNORECASE).strip()
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
-    """Generates a file (Image, DOCX, PPTX) and returns the file path for download."""
     file_path = None
     try:
         if file_type == "image":
-            if img_gen_pipe is None: raise RuntimeError("Image generation model not loaded.")
             image = img_gen_pipe(content).images[0]
             file_filename = f"generated_img_{random.randint(1000, 9999)}.png"
             file_path = os.path.join(DOC_DIR, file_filename)
@@ -272,13 +294,18 @@ def generate_file_content(content: str, history: List[Dict[str, str]], file_type
             prs = Presentation()
             slide = prs.slides.add_slide(prs.slide_layouts[0])
             slide.shapes.title.text = "Luna Generated Presentation"
-            slide.placeholders[1].text = content[:100] + "..."
             file_filename = f"generated_ppt_{random.randint(1000, 9999)}.pptx"
             file_path = os.path.join(DOC_DIR, file_filename)
             prs.save(file_path)
             display_content = f"📊 **Presentation Generated!** Summary:\n\n{content[:200]}...\n\n[Download {file_filename}](file={file_path})"
         else:
             raise ValueError(f"Unknown file type: {file_type}")
         history[-1]['content'] = display_content
     except Exception as e:
         error_msg = f"❌ **Error generating {file_type.upper()}:** {e}. Check logs/libs."
@@ -288,102 +315,134 @@ def generate_file_content(content: str, history: List[Dict[str, str]], file_type
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
-    # Component Outputs: [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output (INVISIBLE)]
-    if len(history) < 2 or history[-1]['role'] != 'assistant' or history[-1]['content'] != "":
-        yield history, False, "Error: Generator called in unexpected state.", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
-    last_user_index = len(history) - 2
-    original_message = history[last_user_index]['content']
     is_vqa_flow = False
-    if isinstance(image_input_data, str):
         is_vqa_flow = image_input_data != ""
-    elif isinstance(image_input_data, np.ndarray):
-        is_vqa_flow = image_input_data.size > 0
-    else:
         is_vqa_flow = image_input_data is not None
     vqa_success = False
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
         history[last_user_index]['content'] = f"[IMAGE RECEIVED] {original_message}"
         llm_input_message = processed_message
-    else:
-        llm_input_message = original_message
-        image_input_data = None
     prompt = f"SYSTEM: {SYSTEM_PROMPT}\n"
-    for item in history[:-1]:
         role = item['role'].upper()
         content = item['content'] if item['content'] is not None else ""
-        if role == "ASSISTANT": prompt += f"LUNA: {content}\n"
-        elif role == "USER": prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
-    hint_text = "✨ Luna is starting to think..."
-    history[-1]['content'] = ""
-    yield history, stop_signal, hint_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
-    time.sleep(0.5)
     full_response = ""
-    current_intent = "default"
     try:
         stream = llm.create_completion(
-            prompt=prompt, max_tokens=8192,
             stop=["USER:", "SYSTEM:", "</s>"],
             echo=False, stream=True, temperature=0.7
         )
     except Exception as e:
         error_text = f"❌ Error generating response: {e}"
         history[-1]['content'] = error_text
         yield history, False, error_text, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
     try:
         for output in stream:
             token = output["choices"][0].get("text", "")
             full_response += token
             current_intent, current_hint, display_text = get_intent_status(full_response, is_vqa_flow and vqa_success)
-            history[-1]['content'] = display_text
             yield history, stop_signal, current_hint, gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     except Exception as e:
         _, _, final_response_text = get_intent_status(full_response, is_vqa_flow and vqa_success)
         error_msg = f"⚠️ Streaming interrupted: {e}"
-        history[-1]['content'] = final_response_text
         yield history, False, error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
-    # 5. POST-PROCESSING & TOOL EXECUTION
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
     if current_intent == "image_generate":
-        yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
-        history, file_download_path = generate_file_content(content_for_tool, history, "image")
     elif current_intent == "doc_generate":
-        yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
-        history, file_download_path = generate_file_content(content_for_tool, history, "doc")
     elif current_intent == "ppt_generate":
-        yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
-        history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
         final_cleaned_response = content_for_tool + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=open+google+simulated+search)"
         history[-1]['content'] = final_cleaned_response
     elif current_intent == "open_camera":
         final_cleaned_response = content_for_tool + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
         history[-1]['content'] = final_cleaned_response
-    TOOL_EXECUTION_INTENTS = ["image_generate", "doc_generate", "ppt_generate", "open_google", "open_camera", "vqa"]
-    if current_intent not in TOOL_EXECUTION_INTENTS:
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
-    else:
-        final_response_content = history[-1]['content']
-    audio_file_path = text_to_audio(final_response_content, is_voice_chat)
     hint = "✅ Response generated."
     yield history, False, hint, gr.update(interactive=True), gr.update(value="↑", interactive=True), audio_file_path, False, gr.update(visible=True), gr.update(value=None), gr.update(), file_download_path
@@ -392,12 +451,16 @@ def chat_generator(message_from_input: str, image_input_data: Any, history: List
 # --- GRADIO WRAPPERS FOR UI ACTIONS ---
 def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, gr.update]:
-    new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
-    """Appends the user message to the chat history if text or image is provided."""
-    has_text = bool(user_message)
     has_image = False
     if isinstance(staged_image_input, str):
         has_image = staged_image_input != ""
@@ -409,17 +472,17 @@ def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_imag
     if not has_text and not has_image:
         return user_message, chat_history
-    if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] == "":
-         return user_message, chat_history
     if not has_text and has_image:
         user_message_to_add = "Analyzing Staged Media."
     else:
-        user_message_to_add = user_message
-    chat_history.append({"role": "user", "content": user_message_to_add})
-    chat_history.append({"role": "assistant", "content": ""})
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
@@ -439,7 +502,8 @@ def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str
         if item['role'] == 'user' and item['content']:
             last_user_prompt = item['content'].split("**User Query:**")[-1].strip().replace("[IMAGE RECEIVED]", "").strip()
             break
-    if not last_user_prompt: return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
     new_history[-1]['content'] += web_results
@@ -455,32 +519,32 @@ def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]], s
 # --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     # --- State Components ---
     stop_signal = gr.State(value=False)
-    is_voice_chat = gr.State(value=False)
-    staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
     gr.HTML("<h1 style='text-align: center; color: #4B0082;'>🌙 Luna Chat Space</h1>")
-    hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
-    file_download_output = gr.File(label="Generated File", visible=False)
     with gr.Row(visible=False) as fact_check_btn_row:
         gr.Column(min_width=1); btn_fact_check = gr.Button("Fact Check 🔎"); gr.Column(min_width=1)
-    chatbot = gr.Chatbot(label="Luna", height=500, type='messages')
     with gr.Row(visible=False) as webcam_capture_row:
         webcam_capture_component = gr.Image(sources=["webcam"], type="numpy", show_label=False)
         close_webcam_btn = gr.Button("✅ Use this image")
     with gr.Row(visible=False) as audio_record_row:
         audio_input = gr.Audio(sources=["microphone"], type="filepath", show_label=False)
     with gr.Column(visible=False, elem_id="menu_options_row") as menu_options_row:
-        file_input = gr.File(type="filepath", label="File Uploader", interactive=False)
         btn_take_photo = gr.Button("📸 Google Lens (Take Photo)")
         btn_add_files = gr.Button("📎 Upload File")
@@ -489,20 +553,19 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
         txt = gr.Textbox(placeholder="Ask anything", show_label=False, lines=1, autofocus=True)
         mic_btn = gr.Button("🎙️", interactive=True, size="sm")
         combined_btn = gr.Button("✈️", variant="primary", size="sm")
-    audio_output = gr.Audio(visible=False)
     output_components = [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
     # --- WIRE EVENTS ---
     btn_menu.click(
         fn=toggle_menu, inputs=[menu_visible_state], outputs=[menu_visible_state, menu_options_row, fact_check_btn_row, btn_menu], queue=False
     )
     def prepare_file_upload(): return gr.update(visible=False), gr.update(value="➕"), gr.update(visible=False), gr.update(interactive=True), gr.update(value="")
     btn_add_files.click(fn=prepare_file_upload, inputs=[], outputs=[menu_options_row, btn_menu, fact_check_btn_row, file_input, txt], queue=False)
     file_input.change(
         fn=stage_file_upload, inputs=[file_input], outputs=[staged_image, hint_box, txt, file_input], queue=False
     )
@@ -511,12 +574,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "📸 Camera Active. Capture an image.", gr.update(value="➕")),
         inputs=[], outputs=[menu_options_row, webcam_capture_row, input_row, hint_box, btn_menu], queue=False
     )
     close_webcam_btn.click(
         fn=lambda img: (gr.update(visible=True), gr.update(visible=False), img, f"📸 Photo staged: Click send (✈️).", gr.update(value="")),
         inputs=[webcam_capture_component], outputs=[input_row, webcam_capture_row, staged_image, hint_box, txt], queue=False
     )
     mic_btn.click(
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), "🎙️ Recording..."),
         inputs=[], outputs=[input_row, audio_record_row, hint_box], queue=False
@@ -536,7 +599,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     )
     generator_inputs = [txt, staged_image, chatbot, stop_signal, is_voice_chat]
     # Text submit (Enter key)
     txt.submit(
         fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
@@ -545,7 +608,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     ).then(
         fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
     )
     # Send button click
     combined_btn.click(
         fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
@@ -554,7 +617,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     ).then(
         fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
     )
     btn_fact_check.click(
         fn=manual_fact_check, inputs=[chatbot], outputs=[chatbot, hint_box, fact_check_btn_row], queue=True
     )

 import numpy as np
 # --- CONFIGURATION & INITIALIZATION ---
+STT_DEVICE = "cpu"
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 AUDIO_DIR = "audio_outputs"
+DOC_DIR = "doc_outputs"
 if not os.path.exists(AUDIO_DIR):
     os.makedirs(AUDIO_DIR)
 if not os.path.exists(DOC_DIR):
     os.makedirs(DOC_DIR)
 REPO_ID = "cosmosai471/Luna-v3"
 MODEL_FILE = "luna.gguf"
+LOCAL_MODEL_PATH = MODEL_FILE
+SYSTEM_PROMPT = (
+    "You are Luna, a helpful and friendly AI assistant. Your response must begin with two separate "
+    "tags: an **Intent** tag and a **Confidence** tag (0-100). Example: '[Intent: qa_general][Confidence: 85]'. "
+    "Your full response must follow these tags."
+)
 # Configuration: confidence threshold for triggering web search fallback
 CONFIDENCE_THRESHOLD = 30  # only trigger web-search fallback if confidence is less than this
     print("Initializing Llama...")
     llm = Llama(
         model_path=LOCAL_MODEL_PATH,
+        n_ctx=8192,
+        n_threads=4,
+        n_batch=256,
+        n_gpu_layers=0,
         verbose=False
     )
     print("✅ Luna Model loaded successfully!")
     print(f"❌ Error loading Luna model: {e}")
     class DummyLLM:
         def create_completion(self, *args, **kwargs):
+            # yield one piece to mimic streaming
             yield {'choices': [{'text': '[Intent: qa_general][Confidence: 0] ERROR: Luna model failed to load. Check logs and resources.'}]}
     llm = DummyLLM()
 image_pipe = None
 try:
+    VLM_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
     image_pipe = pipeline("image-to-text", model=VLM_MODEL_ID, device=STT_DEVICE)
     print(f"✅ Loaded {VLM_MODEL_ID} for image processing.")
 except Exception as e:
 img_gen_pipe = None
 try:
     img_gen_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32)
+    img_gen_pipe.to(STT_DEVICE)
     print("✅ Loaded Stable Diffusion (v1-5) for image generation.")
 except Exception as e:
     print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
 def simulate_recording_delay():
     time.sleep(3)
+    return None
 def clean_response_stream(raw_text: str) -> str:
     """Cleans up raw response text by removing tags and repeats.
+    We intentionally DO NOT split on plain words 'Intent' or 'Action' to avoid chopping tags.
     """
+    # Cut at common separators marking model streaming boundaries
     clean_text = re.split(r'\nUser:|\nAssistant:|</s>', raw_text, 1)[0].strip()
     # Remove bracketed instruction tokens and inline actions
     clean_text = re.sub(r'\[/?INST\]|\[/?s\]|\s*<action>.*?</action>\s*', '', clean_text, flags=re.DOTALL).strip()
+    # Remove tags for display ([Intent: ...], [Confidence: ...]) — keep them for parsing elsewhere
+    clean_text = re.sub(r'\[Intent:\s*[\w\-\_]+\]|\[Confidence:\s*\d{1,3}\]', '', clean_text, flags=re.IGNORECASE).strip()
+    # Deduplicate trailing repeated words (simple heuristic)
     words = clean_text.split()
+    if len(words) > 4 and words[-2:] == words[-4:-2]:
         clean_text = ' '.join(words[:-2])
     return clean_text
 def web_search_tool(query: str) -> str:
+    time.sleep(1.5)
     print(f"Simulating Google Search fallback for: {query}")
     return f"\n\n🌐 **Web Search Results for '{query}':** I've gathered information from external sources to supplement my knowledge."
 def check_confidence_and_augment(raw_response_with_tags: str, prompt: str) -> str:
     """Checks confidence from the raw response tag and triggers fallback if very low.
+    - If explicit [Confidence: N] exists, use it.
+    - Otherwise fall back to heuristic based on cleaned response length.
+    - Only triggers web search if below CONFIDENCE_THRESHOLD.
     """
     confidence_match = re.search(r'\[Confidence:\s*([0-9]{1,3})\]', raw_response_with_tags, flags=re.IGNORECASE)
     cleaned_response = clean_response_stream(raw_response_with_tags)
     if confidence_match:
         try:
             confidence_score = int(confidence_match.group(1))
             confidence_score = max(0, min(confidence_score, 100))
         except Exception:
             confidence_score = 0
     else:
+        # heuristic: very short or empty cleaned response -> low confidence
         if not cleaned_response or len(cleaned_response.strip()) < 30:
+            confidence_score = 10
         else:
+            confidence_score = 85
     if confidence_score < CONFIDENCE_THRESHOLD:
+        print(f"Low confidence ({confidence_score}%) detected (threshold={CONFIDENCE_THRESHOLD}). Triggering web-search fallback.")
         search_snippet = web_search_tool(prompt)
         if "error" in cleaned_response.lower() or confidence_score <= 5:
+            final_response = f"I apologize for the limited response (Confidence: {confidence_score}%). {search_snippet} I will use this to generate a more comprehensive answer."
         else:
             final_response = f"{cleaned_response} {search_snippet} I can elaborate further based on this."
     else:
         final_response = cleaned_response
     return final_response
 def process_image(image_data_or_path: Any, message: str) -> Tuple[str, bool]:
+    """Perform VQA via the image_pipe. Returns a prompt-injection string for the LLM and success flag.
+    If the VLM fails or returns nothing meaningful, return helpful instructions to the LLM rather than empty.
+    """
     global image_pipe
     success = False
     if image_pipe is None:
     try:
         if isinstance(image_data_or_path, str):
             image = Image.open(image_data_or_path).convert("RGB")
+        elif isinstance(image_data_or_path, np.ndarray):
             image = Image.fromarray(image_data_or_path).convert("RGB")
         if image:
             vqa_prompt = f"USER: <image>\n{message}\nASSISTANT:"
             results = image_pipe(image, prompt=vqa_prompt, generate_kwargs={"max_new_tokens": 1024})
+            raw_vlm_output = results[0].get('generated_text', "") if results and isinstance(results, list) else ""
+            vqa_response = raw_vlm_output.split("ASSISTANT:")[-1].strip() if raw_vlm_output else ""
+            # If empty or nonsense, produce a friendly fallback message
+            if not vqa_response:
+                vqa_response = (
+                    "VQA analysis returned no clear answer. Possible reasons: image unreadable, wrong crop, or "
+                    "ambiguous content. Please re-upload a clearer image or provide more context about what you want."
+                )
+                success = False
+            else:
+                success = True
             del image
             prompt_injection = f"**VQA Analysis:** {vqa_response}\n\n**User Query:** {message}"
             return prompt_injection, success
     except Exception as e:
         print(f"Image Pipeline Error: {e}")
         return f"[Image Processing Error: {e}] **User Query:** {message}", success
     return f"[Image Processing Error: Could not load image data.] **User Query:** {message}", success
 def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.update, bool, gr.update]:
         transcribed_text = stt_pipe(audio_file_path)["text"]
         new_button_update = gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"])
         return (
+            transcribed_text.strip(),
+            f"🎙️ Transcribed: '{transcribed_text.strip()}'",
+            gr.update(interactive=True),
+            new_button_update,
+            True,
             gr.update(visible=False)
         )
     except Exception as e:
 def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
     if not is_voice_chat:
+        return None
     clean_text = re.sub(r'```.*?```|\[Image Processing Error:.*?\]|\*\*Web Search Results:.*?$|\(file=.*?\)', '', text, flags=re.DOTALL | re.MULTILINE)
     if len(clean_text.strip()) > 5:
         try:
             audio_output_path = os.path.join(AUDIO_DIR, f"luna_response_{random.randint(1000, 9999)}.mp3")
             tts = gTTS(text=clean_text.strip(), lang='en')
             tts.save(audio_output_path)
+            return audio_output_path
         except Exception as e:
             print(f"gTTS Error: {e}")
             return None
 }
 def get_intent_status(raw_response: str, is_vqa_flow: bool) -> Tuple[str, str, str]:
+    """Parses intent (and removes tags for display). Returns (intent, status, cleaned_text_for_display)."""
+    match = re.search(r'\[Intent:\s*([\w\-\_]+)\]', raw_response, re.IGNORECASE)
     intent = match.group(1).lower() if match else "default"
     if is_vqa_flow:
         intent = "vqa"
+    # Remove only the display tags, keep raw_response intact elsewhere
+    cleaned_text = re.sub(r'\[Intent:\s*[\w\-\_]+\]\s*', '', raw_response, count=1, flags=re.IGNORECASE).strip()
     cleaned_text = re.sub(r'\[Confidence:\s*\d{1,3}\]\s*', '', cleaned_text, count=1, flags=re.IGNORECASE).strip()
+    cleaned_text = clean_response_stream(cleaned_text)  # extra clean
     status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
     return intent, status, cleaned_text
 def generate_file_content(content: str, history: List[Dict[str, str]], file_type: str):
+    """Generates a file (Image, DOCX, PPTX) and returns the file path for download.
+    If content is too short or missing, ask the user to clarify instead of producing empty files.
+    """
     file_path = None
     try:
+        if not content or len(content.strip()) < 20:
+            history[-1]['content'] = (
+                f"⚠️ I was instructed to generate a {file_type}, but I don't have enough details. "
+                "Could you please provide a short description or title for the file (what should it contain)?"
+            )
+            return history, None
         if file_type == "image":
+            if img_gen_pipe is None:
+                raise RuntimeError("Image generation model not loaded.")
             image = img_gen_pipe(content).images[0]
             file_filename = f"generated_img_{random.randint(1000, 9999)}.png"
             file_path = os.path.join(DOC_DIR, file_filename)
             prs = Presentation()
             slide = prs.slides.add_slide(prs.slide_layouts[0])
             slide.shapes.title.text = "Luna Generated Presentation"
+            try:
+                slide.placeholders[1].text = content[:200] + "..."
+            except Exception:
+                # fallback if layout mismatch
+                pass
             file_filename = f"generated_ppt_{random.randint(1000, 9999)}.pptx"
             file_path = os.path.join(DOC_DIR, file_filename)
             prs.save(file_path)
             display_content = f"📊 **Presentation Generated!** Summary:\n\n{content[:200]}...\n\n[Download {file_filename}](file={file_path})"
         else:
             raise ValueError(f"Unknown file type: {file_type}")
         history[-1]['content'] = display_content
     except Exception as e:
         error_msg = f"❌ **Error generating {file_type.upper()}:** {e}. Check logs/libs."
 # --- CORE GENERATOR FUNCTION ---
 def chat_generator(message_from_input: str, image_input_data: Any, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
+    """
+    Returns: [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
+    Changes made:
+    - user_turn will now only append the user message. We add the assistant entry here once generation starts,
+      so there's no empty assistant box created prematurely.
+    """
+    # Validate that last item is a USER (we expect user_turn to add only the user record)
+    if not history or history[-1]['role'] != 'user':
+        yield history, False, "Error: Generator called in unexpected state (no user message found).", gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
+    last_user_index = len(history) - 1
+    original_message = history[last_user_index]['content'] if history[last_user_index]['content'] is not None else ""
+    # Detect VQA flow
     is_vqa_flow = False
+    if isinstance(image_input_data, str):
         is_vqa_flow = image_input_data != ""
+    elif isinstance(image_input_data, np.ndarray):
+        is_vqa_flow = image_input_data.size > 0
+    else:
         is_vqa_flow = image_input_data is not None
+    # Process image if present (returns prompt injection for LLM)
     vqa_success = False
+    llm_input_message = original_message
     if is_vqa_flow:
         processed_message, vqa_success = process_image(image_input_data, original_message)
+        # Replace the user's content with tag for logging while preserving original_message separately
         history[last_user_index]['content'] = f"[IMAGE RECEIVED] {original_message}"
         llm_input_message = processed_message
+    # Build prompt (system + conversation)
     prompt = f"SYSTEM: {SYSTEM_PROMPT}\n"
+    for item in history[:-1]:  # all conversation before last user
         role = item['role'].upper()
         content = item['content'] if item['content'] is not None else ""
+        if role == "ASSISTANT":
+            prompt += f"LUNA: {content}\n"
+        elif role == "USER":
+            prompt += f"USER: {content}\n"
     prompt += f"USER: {llm_input_message}\nLUNA: "
+    # Now create assistant entry only when we begin generation (avoids empty assistant box)
+    assistant_initial_text = "✨ Luna is starting to think..."
+    history.append({"role": "assistant", "content": assistant_initial_text})
+    # Early UI update to show the thinking state (assistant box will appear now)
+    yield history, stop_signal, assistant_initial_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
+    time.sleep(0.2)
     full_response = ""
+    current_intent = "default"
     try:
         stream = llm.create_completion(
+            prompt=prompt, max_tokens=8192,
             stop=["USER:", "SYSTEM:", "</s>"],
             echo=False, stream=True, temperature=0.7
         )
     except Exception as e:
         error_text = f"❌ Error generating response: {e}"
+        # update assistant with error
         history[-1]['content'] = error_text
         yield history, False, error_text, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=False), image_input_data, gr.update(), gr.update()
         return
+    # Stream tokens and update assistant content incrementally (without exposing tags)
     try:
         for output in stream:
             token = output["choices"][0].get("text", "")
             full_response += token
             current_intent, current_hint, display_text = get_intent_status(full_response, is_vqa_flow and vqa_success)
+            # display_text is cleaned (no [Intent] or [Confidence])
+            # Ensure we never set assistant content to empty — if cleaned is empty, show a small typing indicator
+            history[-1]['content'] = display_text if display_text.strip() else "✨ Luna is forming a reply..."
             yield history, stop_signal, current_hint, gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
     except Exception as e:
+        # Stream interruption — salvage what we have
         _, _, final_response_text = get_intent_status(full_response, is_vqa_flow and vqa_success)
         error_msg = f"⚠️ Streaming interrupted: {e}"
+        history[-1]['content'] = final_response_text if final_response_text.strip() else error_msg
         yield history, False, error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True), None, False, gr.update(visible=True), image_input_data, gr.update(), gr.update()
         return
+    # POST-PROCESSING & TOOL EXECUTION
     file_download_path = None
     _, _, content_for_tool = get_intent_status(full_response, is_vqa_flow and vqa_success)
+    # If model wants to run a tool but content is weak, ask for clarification instead of generating empty files
     if current_intent == "image_generate":
+        if not content_for_tool or len(content_for_tool.strip()) < 20:
+            history[-1]['content'] = "I detected a request to generate an image but I don't have enough prompt details. Please give a short description: e.g. 'sunset over mountains, vibrant colors'."
+        else:
+            history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
+            yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
+            history, file_download_path = generate_file_content(content_for_tool, history, "image")
     elif current_intent == "doc_generate":
+        if not content_for_tool or len(content_for_tool.strip()) < 20:
+            history[-1]['content'] = "I was asked to generate a document but I need more details — what's the document about? (1–2 sentences.)"
+        else:
+            history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
+            yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
+            history, file_download_path = generate_file_content(content_for_tool, history, "doc")
     elif current_intent == "ppt_generate":
+        if not content_for_tool or len(content_for_tool.strip()) < 20:
+            history[-1]['content'] = "I can make a short presentation, but please give me a title and 3–5 bullet points to include."
+        else:
+            history[-1]['content'] = INTENT_STATUS_MAP[current_intent]
+            yield history, stop_signal, history[-1]['content'], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True), None, is_voice_chat, gr.update(visible=False), image_input_data, gr.update(), gr.update()
+            history, file_download_path = generate_file_content(content_for_tool, history, "ppt")
     elif current_intent == "open_google":
         final_cleaned_response = content_for_tool + "\n\n🔗 **Action:** [Search Google](https://www.google.com/search?q=open+google+simulated+search)"
         history[-1]['content'] = final_cleaned_response
     elif current_intent == "open_camera":
         final_cleaned_response = content_for_tool + "\n\n📸 **Action:** Use the 'Google Lens' button to capture an image."
         history[-1]['content'] = final_cleaned_response
+    else:
+        # Normal response path — check confidence and maybe augment with web-search snippet
         final_response_content = check_confidence_and_augment(full_response, original_message)
         history[-1]['content'] = final_response_content
+    # If after all processing the assistant content is empty (defensive), fill a friendly fallback
+    if not history[-1]['content'] or not str(history[-1]['content']).strip():
+        history[-1]['content'] = "Sorry — I couldn't produce a good response. Can you rephrase or give more details?"
+    audio_file_path = text_to_audio(history[-1]['content'], is_voice_chat)
     hint = "✅ Response generated."
     yield history, False, hint, gr.update(interactive=True), gr.update(value="↑", interactive=True), audio_file_path, False, gr.update(visible=True), gr.update(value=None), gr.update(), file_download_path
 # --- GRADIO WRAPPERS FOR UI ACTIONS ---
 def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, gr.update]:
+    new_visibility = not current_visibility
     return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "➕")
 def user_turn(user_message: str, chat_history: List[Dict[str, str]], staged_image_input: Any) -> Tuple[str, List[Dict[str, str]]]:
+    """
+    Appends only the USER message to chat_history. We no longer append an assistant placeholder here,
+    so the UI won't show an empty assistant box immediately after user sends a message.
+    The assistant will be appended inside chat_generator when generation begins.
+    """
+    has_text = bool(user_message and user_message.strip())
     has_image = False
     if isinstance(staged_image_input, str):
         has_image = staged_image_input != ""
     if not has_text and not has_image:
         return user_message, chat_history
+    # Prevent double-sending if assistant is already generating (detect last assistant placeholder)
+    if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] and "thinking" in chat_history[-1]['content'].lower():
+        return user_message, chat_history
     if not has_text and has_image:
         user_message_to_add = "Analyzing Staged Media."
     else:
+        user_message_to_add = user_message.strip()
+    chat_history.append({"role": "user", "content": user_message_to_add})
+    # do NOT append assistant here — chat_generator will append assistant entry when it starts
     return "", chat_history
 def stage_file_upload(file_path: str) -> Tuple[Any, str, gr.update, gr.update]:
         if item['role'] == 'user' and item['content']:
             last_user_prompt = item['content'].split("**User Query:**")[-1].strip().replace("[IMAGE RECEIVED]", "").strip()
             break
+    if not last_user_prompt:
+        return history, "Error: Could not find query.", gr.update(visible=False)
     web_results = web_search_tool(last_user_prompt)
     new_history = list(history)
     new_history[-1]['content'] += web_results
 # --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
     # --- State Components ---
     stop_signal = gr.State(value=False)
+    is_voice_chat = gr.State(value=False)
+    staged_image = gr.State(value=None)
     menu_visible_state = gr.State(value=False)
     gr.HTML("<h1 style='text-align: center; color: #4B0082;'>🌙 Luna Chat Space</h1>")
+    hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
+    file_download_output = gr.File(label="Generated File", visible=False)
     with gr.Row(visible=False) as fact_check_btn_row:
         gr.Column(min_width=1); btn_fact_check = gr.Button("Fact Check 🔎"); gr.Column(min_width=1)
+    chatbot = gr.Chatbot(label="Luna", height=500, type='messages')
     with gr.Row(visible=False) as webcam_capture_row:
         webcam_capture_component = gr.Image(sources=["webcam"], type="numpy", show_label=False)
         close_webcam_btn = gr.Button("✅ Use this image")
     with gr.Row(visible=False) as audio_record_row:
         audio_input = gr.Audio(sources=["microphone"], type="filepath", show_label=False)
     with gr.Column(visible=False, elem_id="menu_options_row") as menu_options_row:
+        file_input = gr.File(type="filepath", label="File Uploader", interactive=False)
         btn_take_photo = gr.Button("📸 Google Lens (Take Photo)")
         btn_add_files = gr.Button("📎 Upload File")
         txt = gr.Textbox(placeholder="Ask anything", show_label=False, lines=1, autofocus=True)
         mic_btn = gr.Button("🎙️", interactive=True, size="sm")
         combined_btn = gr.Button("✈️", variant="primary", size="sm")
+    audio_output = gr.Audio(visible=False)
     output_components = [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
     # --- WIRE EVENTS ---
     btn_menu.click(
         fn=toggle_menu, inputs=[menu_visible_state], outputs=[menu_visible_state, menu_options_row, fact_check_btn_row, btn_menu], queue=False
     )
     def prepare_file_upload(): return gr.update(visible=False), gr.update(value="➕"), gr.update(visible=False), gr.update(interactive=True), gr.update(value="")
     btn_add_files.click(fn=prepare_file_upload, inputs=[], outputs=[menu_options_row, btn_menu, fact_check_btn_row, file_input, txt], queue=False)
     file_input.change(
         fn=stage_file_upload, inputs=[file_input], outputs=[staged_image, hint_box, txt, file_input], queue=False
     )
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "📸 Camera Active. Capture an image.", gr.update(value="➕")),
         inputs=[], outputs=[menu_options_row, webcam_capture_row, input_row, hint_box, btn_menu], queue=False
     )
     close_webcam_btn.click(
         fn=lambda img: (gr.update(visible=True), gr.update(visible=False), img, f"📸 Photo staged: Click send (✈️).", gr.update(value="")),
         inputs=[webcam_capture_component], outputs=[input_row, webcam_capture_row, staged_image, hint_box, txt], queue=False
     )
     mic_btn.click(
         fn=lambda: (gr.update(visible=False), gr.update(visible=True), "🎙️ Recording..."),
         inputs=[], outputs=[input_row, audio_record_row, hint_box], queue=False
     )
     generator_inputs = [txt, staged_image, chatbot, stop_signal, is_voice_chat]
     # Text submit (Enter key)
     txt.submit(
         fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
     ).then(
         fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
     )
     # Send button click
     combined_btn.click(
         fn=user_turn, inputs=[txt, chatbot, staged_image], outputs=[txt, chatbot], queue=False
     ).then(
         fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
     )
     btn_fact_check.click(
         fn=manual_fact_check, inputs=[chatbot], outputs=[chatbot, hint_box, fact_check_btn_row], queue=True
     )