cosmosai471 commited on
Commit
8d1858d
Β·
verified Β·
1 Parent(s): 4d02bbf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +684 -0
app.py ADDED
@@ -0,0 +1,684 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import re
5
+ import random
6
+ import torch
7
+ from huggingface_hub import hf_hub_download
8
+ from llama_cpp import Llama
9
+ from typing import List, Dict, Any, Tuple
10
+ from PIL import Image
11
+ from transformers import pipeline
12
+ from gtts import gTTS
13
+ from diffusers import StableDiffusionPipeline
14
+ from docx import Document
15
+ from pptx import Presentation
16
+ from io import BytesIO
17
+
18
+ # --- CONFIGURATION & INITIALIZATION ---
19
+ # Set device for pipelines (STT/VQA/ImageGen). Use "cpu" for compatibility.
20
+ STT_DEVICE = "cpu"
21
+ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
22
+ AUDIO_DIR = "audio_outputs"
23
+ DOC_DIR = "doc_outputs"
24
+ if not os.path.exists(AUDIO_DIR):
25
+ os.makedirs(AUDIO_DIR)
26
+ if not os.path.exists(DOC_DIR):
27
+ os.makedirs(DOC_DIR)
28
+
29
+ # Hugging Face Model Info
30
+ REPO_ID = "cosmosai471/Luna-v3"
31
+ MODEL_FILE = "luna.gguf"
32
+ LOCAL_MODEL_PATH = MODEL_FILE
33
+
34
+ # Updated SYSTEM PROMPT for Intent Tagging
35
+ SYSTEM_PROMPT = "You are Luna, a helpful and friendly AI assistant. When responding, start your response with an **Intent** tag based on the user's request, such as '[Intent: code_generate]', '[Intent: code_explain]', '[Intent: qa_general]', '[Intent: image_generate]', '[Intent: doc_generate]', '[Intent: ppt_generate]', '[Intent: open_camera]', '[Intent: open_google]', or '[Intent: greeting]'. Your response must be complete."
36
+
37
+ # Helper to safely delete Llama instance (prevents resource leaks)
38
+ def safe_del(self):
39
+ try:
40
+ if hasattr(self, "close") and callable(self.close):
41
+ self.close()
42
+ except Exception:
43
+ pass
44
+ Llama.__del__ = safe_del
45
+
46
+ # --- MODEL LOADING ---
47
+ llm = None
48
+ try:
49
+ print(f"Downloading {MODEL_FILE} from {REPO_ID}...")
50
+ hf_hub_download(repo_id=REPO_ID, filename=MODEL_FILE, local_dir=".")
51
+ if not os.path.exists(LOCAL_MODEL_PATH):
52
+ raise FileNotFoundError(f"Download failed for {MODEL_FILE}")
53
+
54
+ print("Initializing Llama...")
55
+ llm = Llama(
56
+ model_path=LOCAL_MODEL_PATH,
57
+ n_ctx=8192,
58
+ n_threads=4,
59
+ n_batch=256,
60
+ n_gpu_layers=0,
61
+ verbose=False
62
+ )
63
+ print("βœ… Luna Model loaded successfully!")
64
+ except Exception as e:
65
+ print(f"❌ Error loading Luna model: {e}")
66
+ class DummyLLM:
67
+ def create_completion(self, *args, **kwargs):
68
+ yield {'choices': [{'text': 'ERROR: Luna model failed to load. Check logs and resources.'}]}
69
+ llm = DummyLLM()
70
+
71
+ # --- MULTIMODAL PIPELINE LOADING ---
72
+ stt_pipe = None
73
+ try:
74
+ stt_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=STT_DEVICE)
75
+ print(f"βœ… Loaded Whisper-base on device: {STT_DEVICE}")
76
+ except Exception as e:
77
+ print(f"⚠️ Could not load Whisper. Voice chat disabled. Error: {e}")
78
+
79
+ image_pipe = None
80
+ try:
81
+ VLM_MODEL_ID = "llava-hf/llava-1.5-7b-hf"
82
+ image_pipe = pipeline("image-to-text", model=VLM_MODEL_ID, device=STT_DEVICE)
83
+ print(f"βœ… Loaded {VLM_MODEL_ID} for image processing.")
84
+ except Exception as e:
85
+ print(f"⚠️ Could not load VLM ({VLM_MODEL_ID}). Image chat disabled. Error: {e}")
86
+
87
+ img_gen_pipe = None
88
+ try:
89
+ img_gen_pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32)
90
+ img_gen_pipe.to(STT_DEVICE)
91
+ print("βœ… Loaded Stable Diffusion (v1-5) for image generation.")
92
+ except Exception as e:
93
+ print(f"⚠️ Could not load Image Generation pipeline. Image generation disabled. Error: {e}")
94
+
95
+
96
+ # --- UTILITY FUNCTIONS ---
97
+
98
+ def simulate_recording_delay():
99
+ """Simulates a 3-second recording time for the UI flow."""
100
+ time.sleep(3)
101
+ return None
102
+
103
+ def clean_response_stream(raw_text: str) -> str:
104
+ """Cleans up raw LLaMA-style output and removes repeats."""
105
+ # 1. Strip stop tokens
106
+ clean_text = re.split(r'\nUser:|\nAssistant:|</s>|Intent|Action', raw_text, 1)[0].strip()
107
+
108
+ # 2. Remove instruction/action markers
109
+ clean_text = re.sub(r'\[/?INST\]|\[/?s\]|\s*<action>.*?</action>\s*', '', clean_text, flags=re.DOTALL).strip()
110
+
111
+ # 3. Simple word-repeat check
112
+ words = clean_text.split()
113
+ if len(words) > 4 and words[-2:] == words[-4:-2]:
114
+ clean_text = ' '.join(words[:-2])
115
+
116
+ return clean_text
117
+
118
+ def web_search_tool(query: str) -> str:
119
+ """Simulated Google Search Fallback."""
120
+ time.sleep(1.5)
121
+ print(f"Simulating Google Search fallback for: {query}")
122
+ return f"\n\n🌐 **Web Search Results for '{query}':** I've gathered information from external sources to supplement my knowledge."
123
+
124
+ def check_confidence_and_augment(raw_response: str, prompt: str) -> str:
125
+ """Simulated check for confidence. Triggers fallback if response is deemed inadequate."""
126
+ cleaned_response = clean_response_stream(raw_response)
127
+
128
+ if "error" in cleaned_response.lower() or len(cleaned_response.split()) < 10:
129
+ print("Low confidence/short response detected. Triggering Google Search fallback.")
130
+ search_snippet = web_search_tool(prompt)
131
+
132
+ if "error" in cleaned_response.lower():
133
+ final_response = f"I apologize for the limited response. {search_snippet} I will use this to generate a more comprehensive answer."
134
+ else:
135
+ final_response = f"{cleaned_response} {search_snippet} I can elaborate further based on this."
136
+ else:
137
+ final_response = cleaned_response
138
+
139
+ return final_response
140
+
141
+ def process_image(image_path: str, message: str) -> str:
142
+ """Uses the VLM pipeline (LLaVA) for Visual Question Answering (VQA)."""
143
+ global image_pipe
144
+ if image_path and image_pipe:
145
+ try:
146
+ image = Image.open(image_path).convert("RGB")
147
+ vqa_prompt = f"USER: {message}\nASSISTANT:"
148
+
149
+ results = image_pipe(image, prompt=vqa_prompt)
150
+ vqa_response = results[0]['generated_text'] if results else "The image could not be processed."
151
+ del image
152
+
153
+ prompt_injection = f"**Image Analysis (VQA):** {vqa_response}\n\n**User Query:** {message}"
154
+ return prompt_injection
155
+ except Exception as e:
156
+ print(f"Image Pipeline Error: {e}")
157
+ return f"[Image Processing Error: {e}] **User Query:** {message}"
158
+
159
+ return message
160
+
161
+ def transcribe_audio(audio_file_path: str) -> Tuple[str, str, gr.update, gr.update, bool, gr.update]:
162
+ """Transcribes audio file using Whisper."""
163
+ if stt_pipe is None or audio_file_path is None:
164
+ error_msg = "Error: Whisper model failed to load or no audio recorded."
165
+ return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
166
+
167
+ try:
168
+ transcribed_text = stt_pipe(audio_file_path)["text"]
169
+ new_button_update = gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"])
170
+
171
+ return (
172
+ transcribed_text.strip(),
173
+ f"πŸŽ™οΈ Transcribed: '{transcribed_text.strip()}'",
174
+ gr.update(interactive=True),
175
+ new_button_update,
176
+ True,
177
+ gr.update(visible=False)
178
+ )
179
+ except Exception as e:
180
+ error_msg = f"Transcription Error: {e}"
181
+ return "", error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), False, gr.update(visible=False)
182
+
183
+
184
+ def text_to_audio(text: str, is_voice_chat: bool) -> str or None:
185
+ """Converts the final response text to an MP3 file using gTTS."""
186
+ if not is_voice_chat:
187
+ return None
188
+
189
+ clean_text = re.sub(r'```.*?```|\[Image Processing Error:.*?\]|\*\*Web Search Results:.*?$', '', text, flags=re.DOTALL)
190
+
191
+ if len(clean_text.strip()) > 5:
192
+ try:
193
+ audio_output_path = os.path.join(AUDIO_DIR, f"luna_response_{random.randint(1000, 9999)}.mp3")
194
+ tts = gTTS(text=clean_text.strip(), lang='en')
195
+ tts.save(audio_output_path)
196
+ return audio_output_path
197
+ except Exception as e:
198
+ print(f"gTTS Error: {e}")
199
+ return None
200
+ return None
201
+
202
+
203
+ # Intent and Dynamic Hint Logic
204
+ INTENT_STATUS_MAP = {
205
+ "code_generate": "Analyzing requirements and drafting code πŸ’»...",
206
+ "code_explain": "Reviewing code logic and writing explanation πŸ’‘...",
207
+ "qa_general": "Drafting comprehensive general answer ✍️...",
208
+ "greeting": "Replying to greeting πŸ‘‹...",
209
+ "vqa": "Analyzing VQA results and forming a final response 🧠...",
210
+ "image_generate": "Generating image using Stable Diffusion (This may be slow on CPU) πŸ–ΌοΈ...",
211
+ "doc_generate": "Generating content and formatting DOCX file πŸ“„...",
212
+ "ppt_generate": "Generating content and formatting PPTX file πŸ“Š...",
213
+ "open_camera": "Activating camera for image capture πŸ“Έ...",
214
+ "open_google": "Simulating external search link generation πŸ”—...",
215
+ "default": "Luna is thinking...",
216
+ }
217
+
218
+ def get_intent_status(raw_response: str, is_vqa: bool) -> Tuple[str, str, str]:
219
+ """Parses the Intent tag from the model's raw response and returns the intent, status, and cleaned response."""
220
+ if is_vqa and "Image Analysis (VQA)" in raw_response:
221
+ return "vqa", INTENT_STATUS_MAP["vqa"], raw_response
222
+
223
+ match = re.search(r'\[Intent:\s*(\w+)\]', raw_response, re.IGNORECASE)
224
+ intent = match.group(1).lower() if match else "default"
225
+
226
+ cleaned_text = re.sub(r'\[Intent:\s*\w+\]\s*', '', raw_response, count=1).strip()
227
+
228
+ status = INTENT_STATUS_MAP.get(intent, INTENT_STATUS_MAP["default"])
229
+ return intent, status, cleaned_text
230
+
231
+
232
+ # --- NEW GENERATOR FUNCTIONS FOR UPGRADES ---
233
+
234
+ def generate_image_and_update_history(prompt_text: str, history: List[Dict[str, str]]):
235
+ """Uses Stable Diffusion to generate an image."""
236
+ image_path = None
237
+ if img_gen_pipe is None:
238
+ history[-1]['content'] = f"{prompt_text}\n\n❌ **Error:** Image generation model is not loaded (CPU/RAM constraint). Please check logs."
239
+ else:
240
+ try:
241
+ print(f"Generating image for prompt: {prompt_text}")
242
+ image = img_gen_pipe(prompt_text).images[0]
243
+
244
+ image_filename = f"generated_img_{random.randint(1000, 9999)}.png"
245
+ image_path = os.path.join(DOC_DIR, image_filename)
246
+ image.save(image_path)
247
+
248
+ history[-1]['content'] = f"{prompt_text}\n\nπŸ–ΌοΈ **Image Generated:**"
249
+ except Exception as e:
250
+ history[-1]['content'] = f"{prompt_text}\n\n❌ **Error generating image:** {e}"
251
+
252
+ return history, image_path
253
+
254
+ def generate_doc_and_update_history(content: str, history: List[Dict[str, str]]):
255
+ """Generates a DOCX file from the content and returns the file path."""
256
+ docx_file_path = None
257
+ try:
258
+ doc = Document()
259
+ doc.add_heading('Luna Generated Document', 0)
260
+
261
+ doc.add_paragraph(content)
262
+
263
+ doc_filename = f"generated_doc_{random.randint(1000, 9999)}.docx"
264
+ docx_file_path = os.path.join(DOC_DIR, doc_filename)
265
+ doc.save(docx_file_path)
266
+
267
+ history[-1]['content'] = f"πŸ“„ **Document Generated!** You can download the file below. Content summary:\n\n{content[:200]}..."
268
+ except Exception as e:
269
+ history[-1]['content'] = f"❌ **Error generating DOCX:** {e}. Please ensure the `python-docx` library is installed."
270
+
271
+ return history, docx_file_path
272
+
273
+ def generate_ppt_and_update_history(content: str, history: List[Dict[str, str]]):
274
+ """Generates a PPTX file from the content and returns the file path."""
275
+ pptx_file_path = None
276
+ try:
277
+ prs = Presentation()
278
+ title_slide_layout = prs.slide_layouts[0]
279
+ slide = prs.slides.add_slide(title_slide_layout)
280
+ title = slide.shapes.title
281
+ subtitle = slide.placeholders[1]
282
+
283
+ title.text = "Luna Generated Presentation"
284
+
285
+ sections = content.split('\n\n')
286
+
287
+ for i, section in enumerate(sections[:3]):
288
+ if len(section.strip()) > 5:
289
+ content_slide_layout = prs.slide_layouts[1]
290
+ slide = prs.slides.add_slide(content_slide_layout)
291
+ slide.shapes.title.text = f"Section {i+1}"
292
+ body = slide.shapes.placeholders[1]
293
+
294
+ for line in section.split('\n'):
295
+ p = body.text_frame.add_paragraph()
296
+ p.text = line.strip()
297
+
298
+ ppt_filename = f"generated_ppt_{random.randint(1000, 9999)}.pptx"
299
+ pptx_file_path = os.path.join(DOC_DIR, ppt_filename)
300
+ prs.save(pptx_file_path)
301
+
302
+ history[-1]['content'] = f"πŸ“Š **Presentation Generated!** You can download the file below. Summary:\n\n{content[:200]}..."
303
+ except Exception as e:
304
+ history[-1]['content'] = f"❌ **Error generating PPTX:** {e}. Please ensure the `python-pptx` library is installed."
305
+
306
+ return history, pptx_file_path
307
+
308
+
309
+ # --- CORE GENERATOR FUNCTION ---
310
+
311
+ def chat_generator(message: str, image_path: str, history: List[Dict[str, str]], stop_signal: bool, is_voice_chat: bool) -> Any:
312
+ """The main generator function for streaming the LLM response."""
313
+
314
+ # Component Outputs: [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
315
+
316
+ if not history or history[-1]['content'] is not None:
317
+ yield history, False, "Error: Generator called without a recent user message in history.", gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), None, False, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
318
+ return
319
+
320
+ # 1. PRE-PROCESSING & CONTEXT
321
+ # The last user message is the second-to-last item (since the last item is the placeholder assistant message)
322
+ last_user_index = len(history) - 2
323
+ original_message = history[last_user_index]['content']
324
+
325
+ # FIX: Safely check if image_path contains a non-empty string path
326
+ is_vqa_flow = bool(image_path) and isinstance(image_path, str)
327
+
328
+ if is_vqa_flow:
329
+ message = process_image(image_path, original_message)
330
+ # Update the user's content to reflect VQA flow for context building
331
+ history[last_user_index]['content'] = f"[IMAGE RECEIVED] {original_message}"
332
+ else:
333
+ message = original_message
334
+ image_path = None
335
+
336
+ # Build the prompt with conversation history (Context)
337
+ prompt = f"SYSTEM: {SYSTEM_PROMPT}\n"
338
+
339
+ # Iterate through history (skipping the very last, incomplete assistant turn)
340
+ for i, item in enumerate(history[:-1]):
341
+ role = item['role'].upper()
342
+ content = item['content'] if item['content'] is not None else ""
343
+
344
+ if role == "ASSISTANT":
345
+ prompt += f"LUNA: {content}\n"
346
+ elif role == "USER":
347
+ prompt += f"USER: {content}\n"
348
+
349
+ # The *actual* current user message is what we pass to the model, which might be VQA-enriched
350
+ prompt += f"USER: {message}\nLUNA: "
351
+
352
+ # 2. HINT BOX & STREAM START
353
+ hint_text = "✨ Luna is starting to think..."
354
+
355
+ # Set the current assistant response to an empty string (the last item in history)
356
+ history[-1]['content'] = ""
357
+ # Yield initial state: show thinking, clear download box, disable input
358
+ yield history, stop_signal, hint_text, gr.update(value="", interactive=False), gr.update(value="Stop ⏹️", interactive=True, elem_classes=["circle-btn", "stop-mode"]), None, is_voice_chat, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
359
+ time.sleep(0.5)
360
+
361
+ # 3. DIRECT STREAMING
362
+ full_response = ""
363
+ current_intent = "default"
364
+
365
+ try:
366
+ stream = llm.create_completion(
367
+ prompt=prompt,
368
+ max_tokens=8192,
369
+ stop=["USER:", "SYSTEM:", "\n\n", "</s>"],
370
+ echo=False,
371
+ stream=True,
372
+ temperature=0.7
373
+ )
374
+ except Exception as e:
375
+ error_text = f"❌ Error generating response: {e}"
376
+ history[-1]['content'] = error_text
377
+ yield history, False, error_text, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), None, False, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
378
+ return
379
+
380
+ try:
381
+ for output in stream:
382
+ token = output["choices"][0].get("text", "")
383
+ full_response += token
384
+
385
+ # Get intent and cleaned text for display
386
+ current_intent, current_hint, display_text = get_intent_status(full_response, is_vqa_flow)
387
+
388
+ # Update the last assistant message's content
389
+ history[-1]['content'] = display_text
390
+
391
+ # Yield continuous update
392
+ yield history, stop_signal, current_hint, gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True, elem_classes=["circle-btn", "stop-mode"]), None, is_voice_chat, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
393
+
394
+ except Exception as e:
395
+ _, _, final_response_text = get_intent_status(full_response, is_vqa_flow)
396
+ error_msg = f"⚠️ Streaming interrupted: {e}"
397
+ history[-1]['content'] = final_response_text
398
+ yield history, False, error_msg, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), None, False, gr.update(visible=True), gr.update(value=None), gr.update(value=None), gr.update(value=None)
399
+ return
400
+
401
+ # 4. POST-PROCESSING & TOOL EXECUTION
402
+ _, _, final_cleaned_response = get_intent_status(full_response, is_vqa_flow)
403
+ final_response = final_cleaned_response
404
+ file_download_path = None
405
+
406
+ if current_intent == "image_generate":
407
+ yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True, elem_classes=["circle-btn", "stop-mode"]), None, is_voice_chat, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
408
+ history, file_download_path = generate_image_and_update_history(final_response, history)
409
+ final_response = history[-1]['content']
410
+
411
+ elif current_intent == "doc_generate":
412
+ yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True, elem_classes=["circle-btn", "stop-mode"]), None, is_voice_chat, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
413
+ history, file_download_path = generate_doc_and_update_history(final_response, history)
414
+ final_response = history[-1]['content']
415
+
416
+ elif current_intent == "ppt_generate":
417
+ yield history, stop_signal, INTENT_STATUS_MAP[current_intent], gr.update(interactive=False), gr.update(value="Stop ⏹️", interactive=True, elem_classes=["circle-btn", "stop-mode"]), None, is_voice_chat, gr.update(visible=False), gr.update(value=None), gr.update(value=None), gr.update(value=None)
418
+ history, file_download_path = generate_ppt_and_update_history(final_response, history)
419
+ final_response = history[-1]['content']
420
+
421
+ elif current_intent == "open_google":
422
+ final_response += "\n\nπŸ”— **Action:** Since I cannot open a window for you, click here to search Google for this topic: [Google Search Link](https://www.google.com/search?q=open+google+simulated+search)"
423
+
424
+ elif current_intent == "open_camera":
425
+ final_response += "\n\nπŸ“Έ **Action:** I cannot directly open the camera within this chat stream, but I will prepare the UI for you to use the 'Google Lens' button if you click 'Send' now!"
426
+
427
+ # If no special tool was executed, perform confidence check and web search fallback
428
+ if file_download_path is None:
429
+ final_response = check_confidence_and_augment(final_response, original_message)
430
+
431
+ audio_file_path = text_to_audio(final_response, is_voice_chat)
432
+
433
+ # Update history with the final response
434
+ history[-1]['content'] = final_response
435
+
436
+ # 5. FINAL YIELD
437
+ hint = "βœ… Response generated."
438
+
439
+ yield history, False, hint, gr.update(interactive=True), gr.update(value="↑", interactive=True, elem_classes=["circle-btn", "send-mode"]), audio_file_path, False, gr.update(visible=True), gr.update(value=None), gr.update(value=None), file_download_path
440
+
441
+ # --- GRADIO WRAPPERS FOR UI ACTIONS ---
442
+
443
+ def toggle_menu(current_visibility: bool) -> Tuple[bool, gr.update, gr.update, gr.update]:
444
+ """Toggles the visibility of the media options menu."""
445
+ new_visibility = not current_visibility
446
+ return new_visibility, gr.update(visible=new_visibility), gr.update(visible=False), gr.update(value="⬇️" if new_visibility else "βž•")
447
+
448
+ def user_turn(user_message: str, chat_history: List[Dict[str, str]]) -> Tuple[str, List[Dict[str, str]]]:
449
+ """Appends the user message to the chat history and clears the input box, using the 'messages' format."""
450
+ if not user_message and not chat_history:
451
+ return "", chat_history
452
+
453
+ # If the last message is an incomplete assistant message, and no new user message is provided, don't update
454
+ if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] is None and not user_message:
455
+ return "", chat_history
456
+
457
+ if user_message:
458
+ # Append the new user message
459
+ chat_history.append({"role": "user", "content": user_message})
460
+ # Append a placeholder for the assistant's response (required for streaming/generation)
461
+ chat_history.append({"role": "assistant", "content": None})
462
+
463
+ return "", chat_history
464
+
465
+ def stage_file_upload(file_path: str) -> Tuple[str, str, gr.update, gr.update]:
466
+ """Stages the file path and updates the hint box."""
467
+ if file_path:
468
+ return file_path, f"πŸ“Ž File staged: {os.path.basename(file_path)}. Click send (✈️) to analyze.", gr.update(value="", interactive=True), gr.update(interactive=False)
469
+ return None, "File upload cancelled/cleared.", gr.update(value="", interactive=True), gr.update(interactive=False)
470
+
471
+ def clear_staged_media() -> gr.update:
472
+ """Clears the staged media state after sending or canceling."""
473
+ return gr.update(value=None)
474
+
475
+ def manual_fact_check(history: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], str, gr.update]:
476
+ """Triggers a manual fact check/web search, using the 'messages' format."""
477
+ if not history or not history[-1]['content']:
478
+ return history, "Error: No final response to check.", gr.update(visible=False)
479
+
480
+ # Find the most recent user prompt that generated the last assistant response
481
+ last_user_prompt = ""
482
+ for item in reversed(history):
483
+ if item['role'] == 'user' and item['content']:
484
+ last_user_prompt = item['content'].split("**User Query:**")[-1].strip()
485
+ break
486
+
487
+ if not last_user_prompt:
488
+ return history, "Error: Could not find the original user query.", gr.update(visible=False)
489
+
490
+ web_results = web_search_tool(last_user_prompt)
491
+
492
+ new_history = list(history)
493
+ new_history[-1]['content'] += web_results
494
+
495
+ return new_history, "βœ… Double-checked with web facts.", gr.update(visible=False)
496
+
497
+ # UPGRADE 3: Automatic Camera Capture Function (Simplified)
498
+ def auto_capture_camera(user_message: str, chat_history: List[Dict[str, str]]) -> Tuple[str, List[Dict[str, str]], gr.update, gr.update, gr.update, gr.update, gr.update]:
499
+ """
500
+ Simulates the automatic capture action by updating the UI components
501
+ to show the camera, and then immediately capturing (simulated).
502
+ """
503
+ # Use user_turn logic to setup the chat history correctly for the intent flow
504
+ _, chat_history = user_turn(user_message, chat_history)
505
+
506
+ # Update the last assistant response placeholder with a status message
507
+ if chat_history and chat_history[-1]['role'] == 'assistant' and chat_history[-1]['content'] is None:
508
+ chat_history[-1]['content'] = "πŸ“Έ Preparing camera capture..."
509
+
510
+ # Update UI to show the webcam (start capture simulation)
511
+ return "", chat_history, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(value="πŸ“Έ Capturing in 3 seconds...", interactive=False), gr.update(value="βž•")
512
+
513
+
514
+ # --- GRADIO INTERFACE ---
515
+
516
+ with gr.Blocks(theme=gr.themes.Soft(), title="Luna Coding Partner") as demo:
517
+
518
+ # --- State Components ---
519
+ stop_signal = gr.State(value=False)
520
+ is_voice_chat = gr.State(value=False)
521
+ staged_image = gr.State(value=None)
522
+ menu_visible_state = gr.State(value=False)
523
+
524
+ gr.HTML("<h1 style='text-align: center; color: #4B0082;'>πŸŒ™ Luna Chat Space</h1>")
525
+
526
+ # Hint Box
527
+ hint_box = gr.Textbox(value="Ask anything", lines=1, show_label=False, interactive=False, placeholder="Luna's Action...", visible=True)
528
+
529
+ # Download Box
530
+ file_download_output = gr.File(label="Generated File", visible=True)
531
+
532
+ # Fact Check button row
533
+ with gr.Row(visible=False) as fact_check_btn_row:
534
+ gr.Column(min_width=1)
535
+ btn_fact_check = gr.Button("Fact Check πŸ”Ž")
536
+ gr.Column(min_width=1)
537
+
538
+ # Chatbot Area
539
+ # --- FIX: Added type='messages' to comply with new Gradio standard ---
540
+ chatbot = gr.Chatbot(label="Luna", height=500, type='messages')
541
+
542
+ # Webcam Capture Area (Hidden)
543
+ with gr.Row(visible=False) as webcam_capture_row:
544
+ webcam_capture_component = gr.Image(sources=["webcam"], show_label=False)
545
+ close_webcam_btn = gr.Button("βœ… Use this image")
546
+
547
+ # Audio Recording Row (Hidden)
548
+ with gr.Row(visible=False) as audio_record_row:
549
+ audio_input = gr.Audio(sources=["microphone"], type="filepath", show_label=False)
550
+
551
+ # Option Menu (Hidden)
552
+ with gr.Column(visible=False, elem_id="menu_options_row") as menu_options_row:
553
+ file_input = gr.File(type="filepath", label="File Uploader", interactive=False)
554
+ btn_take_photo = gr.Button("πŸ“Έ Google Lens (Take Photo)")
555
+ btn_add_files = gr.Button("πŸ“Ž Upload File")
556
+
557
+ # Fixed Input Row (Footer)
558
+ with gr.Row(variant="panel") as input_row:
559
+ btn_menu = gr.Button("βž•", interactive=True, size="sm")
560
+ txt = gr.Textbox(placeholder="Ask anything", show_label=False, lines=1, autofocus=True)
561
+ mic_btn = gr.Button("πŸŽ™οΈ", interactive=True, size="sm")
562
+ combined_btn = gr.Button("✈️", variant="primary", size="sm")
563
+
564
+ audio_output = gr.Audio(visible=False)
565
+
566
+ # Group all output components for convenience
567
+ output_components = [chatbot, stop_signal, hint_box, txt, combined_btn, audio_output, is_voice_chat, fact_check_btn_row, staged_image, file_input, file_download_output]
568
+
569
+ # --- WIRE EVENTS ---
570
+
571
+ # 1. Menu Button
572
+ btn_menu.click(
573
+ fn=toggle_menu,
574
+ inputs=[menu_visible_state],
575
+ outputs=[menu_visible_state, menu_options_row, fact_check_btn_row, btn_menu],
576
+ queue=False
577
+ )
578
+
579
+ # 2. File Upload
580
+ def prepare_file_upload():
581
+ return gr.update(visible=False), gr.update(value="βž•"), gr.update(visible=False), gr.update(interactive=True), gr.update(value="")
582
+
583
+ btn_add_files.click(fn=prepare_file_upload, inputs=[], outputs=[menu_options_row, btn_menu, fact_check_btn_row, file_input, txt], queue=False)
584
+
585
+ file_input.change(
586
+ fn=stage_file_upload,
587
+ inputs=[file_input],
588
+ outputs=[staged_image, hint_box, txt, file_input],
589
+ queue=False
590
+ )
591
+
592
+ # 3. 'Take photo' (Webcam)
593
+ btn_take_photo.click(
594
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "πŸ“Έ Camera Active. Capture an image.", gr.update(value="βž•")),
595
+ inputs=[],
596
+ outputs=[menu_options_row, webcam_capture_row, input_row, hint_box, btn_menu],
597
+ queue=False
598
+ )
599
+
600
+ # 4. Webcam Close
601
+ close_webcam_btn.click(
602
+ fn=lambda img: (gr.update(visible=True), gr.update(visible=False), img, f"πŸ“Έ Photo staged: Click send (✈️) to process.", gr.update(value="")),
603
+ inputs=[webcam_capture_component],
604
+ outputs=[input_row, webcam_capture_row, staged_image, hint_box, txt],
605
+ queue=False
606
+ )
607
+
608
+ # 5. Mic wiring (Fixed with simulate_recording_delay)
609
+ mic_btn.click(
610
+ fn=lambda: (gr.update(visible=False), gr.update(visible=True), "πŸŽ™οΈ Recording..."),
611
+ inputs=[],
612
+ outputs=[input_row, audio_record_row, hint_box],
613
+ queue=False
614
+ ).then(
615
+ fn=simulate_recording_delay, # <<< NEW STEP FOR DELAY
616
+ inputs=[],
617
+ outputs=[],
618
+ queue=False,
619
+ ).then(
620
+ fn=lambda: (gr.update(visible=True), gr.update(visible=False), "πŸŽ™οΈ Processing recording..."),
621
+ inputs=[],
622
+ outputs=[input_row, audio_record_row, hint_box],
623
+ queue=False,
624
+ ).then(
625
+ fn=transcribe_audio,
626
+ inputs=audio_input,
627
+ outputs=[txt, hint_box, txt, combined_btn, is_voice_chat, fact_check_btn_row],
628
+ queue=False
629
+ ).then(
630
+ fn=user_turn,
631
+ inputs=[txt, chatbot],
632
+ outputs=[txt, chatbot],
633
+ queue=False
634
+ ).then(
635
+ fn=chat_generator,
636
+ inputs=[txt, staged_image, chatbot, stop_signal, is_voice_chat],
637
+ outputs=output_components,
638
+ queue=True,
639
+ ).then(
640
+ fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
641
+ )
642
+
643
+ # 6. Main Submission Logic (Text submit and Send button)
644
+ generator_inputs = [txt, staged_image, chatbot, stop_signal, is_voice_chat]
645
+
646
+ # Text submit (Enter key)
647
+ txt.submit(
648
+ fn=user_turn,
649
+ inputs=[txt, chatbot],
650
+ outputs=[txt, chatbot],
651
+ queue=False
652
+ ).then(
653
+ fn=chat_generator,
654
+ inputs=generator_inputs,
655
+ outputs=output_components,
656
+ queue=True,
657
+ ).then(
658
+ fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
659
+ )
660
+
661
+ # Send button click
662
+ combined_btn.click(
663
+ fn=user_turn,
664
+ inputs=[txt, chatbot],
665
+ outputs=[txt, chatbot],
666
+ queue=False
667
+ ).then(
668
+ fn=chat_generator,
669
+ inputs=generator_inputs,
670
+ outputs=output_components,
671
+ queue=True
672
+ ).then(
673
+ fn=clear_staged_media, inputs=[], outputs=[staged_image], queue=False
674
+ )
675
+
676
+ # 7. Fact Check Button
677
+ btn_fact_check.click(
678
+ fn=manual_fact_check,
679
+ inputs=[chatbot],
680
+ outputs=[chatbot, hint_box, fact_check_btn_row],
681
+ queue=True
682
+ )
683
+
684
+ demo.queue(max_size=20).launch(server_name="0.0.0.0")