jesusvilela commited on
Commit
862dde4
·
verified ·
1 Parent(s): 3d6f41a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -15
app.py CHANGED
@@ -39,13 +39,13 @@ try: import whisper; WHISPER_AVAILABLE = True
39
  except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
40
 
41
  # Google GenAI (Used by LangChain integration AND direct client)
42
- from google.genai.types import HarmCategory, HarmBlockThreshold # CORRECTED IMPORT
43
  from google.ai import generativelanguage as glm # For FileState enum
44
 
45
  # LangChain
46
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
47
  from langchain.prompts import PromptTemplate
48
- from langchain.tools import BaseTool, tool as lc_tool_decorator # Use langchain.tools.tool
49
  from langchain_google_genai import ChatGoogleGenerativeAI
50
  from langchain.agents import AgentExecutor, create_react_agent
51
  from langchain_community.tools import DuckDuckGoSearchRun
@@ -261,7 +261,6 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
261
  logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
262
 
263
  # --- Tool Function Definitions ---
264
- # Corrected: Removed 'description' from @lc_tool_decorator, use docstring
265
  @lc_tool_decorator
266
  def read_pdf_tool(action_input_json_str: str) -> str:
267
  """Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
@@ -326,7 +325,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
326
  if not file_identifier: return "Error: 'file_identifier' for image missing."
327
  logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
328
  local_image_path = _download_file(file_identifier, task_id)
329
- if local_image_path.startswith("Error:"): return f"Error downloading image for Direct Multimodal Tool: {local_image_path}"
330
  try:
331
  pil_image = Image.open(local_image_path)
332
  except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
@@ -337,7 +336,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
337
  )
338
  logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
339
  return response.text[:40000]
340
- except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct Multimodal Tool: {str(e_json_mm)}. Input: {action_input_json_str}"
341
  except Exception as e_tool_mm:
342
  logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
343
  return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
@@ -381,8 +380,7 @@ def initialize_agent_and_tools(force_reinit=False):
381
  logger.info("Initializing agent and tools...")
382
  if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
383
 
384
- # Corrected safety_settings format for ChatGoogleGenerativeAI
385
- # Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum members for values.
386
  llm_safety_settings_corrected_final = {
387
  HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
388
  HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
@@ -395,9 +393,9 @@ def initialize_agent_and_tools(force_reinit=False):
395
  model=GEMINI_MODEL_NAME,
396
  google_api_key=GOOGLE_API_KEY,
397
  temperature=0.0,
398
- #safety_settings=llm_safety_settings_corrected_final,
399
  timeout=120,
400
- convert_system_message_to_human=True
401
  )
402
  logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
403
  except Exception as e:
@@ -429,8 +427,17 @@ def initialize_agent_and_tools(force_reinit=False):
429
  )
430
  def agent_node(state: AgentState):
431
  current_input = state.get('input', '')
432
- formatted_system_prompt = prompt_content_lg_init.replace("{input}", current_input)
433
- messages_for_llm = [SystemMessage(content=formatted_system_prompt)] + state.get('messages', [])
 
 
 
 
 
 
 
 
 
434
  bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
435
  response = bound_llm.invoke(messages_for_llm)
436
  return {"messages": [response]}
@@ -506,8 +513,8 @@ def get_agent_response(prompt: str, task_id: Optional[str]=None, thread_id: Opti
506
  try:
507
  if is_langgraph_agent_get:
508
  logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
509
- initial_messages_lg_get = []
510
- input_for_lg_get = {"input": prompt, "messages": initial_messages_lg_get}
511
  final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
512
  if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
513
  logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
@@ -600,7 +607,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
600
  gr.LoginButton()
601
  run_button = gr.Button("Run Evaluation & Submit All Answers")
602
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
603
- results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True) # Removed height
604
 
605
  run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
606
 
@@ -626,7 +633,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
626
  demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
627
 
628
  if __name__ == "__main__":
629
- logger.info(f"Application starting up (v7 - Final SafetySettings Fix)...")
630
  if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
631
  if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
632
  if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
 
39
  except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
40
 
41
  # Google GenAI (Used by LangChain integration AND direct client)
42
+ from google.genai.types import HarmCategory, HarmBlockThreshold
43
  from google.ai import generativelanguage as glm # For FileState enum
44
 
45
  # LangChain
46
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
47
  from langchain.prompts import PromptTemplate
48
+ from langchain.tools import BaseTool, tool as lc_tool_decorator
49
  from langchain_google_genai import ChatGoogleGenerativeAI
50
  from langchain.agents import AgentExecutor, create_react_agent
51
  from langchain_community.tools import DuckDuckGoSearchRun
 
261
  logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
262
 
263
  # --- Tool Function Definitions ---
 
264
  @lc_tool_decorator
265
  def read_pdf_tool(action_input_json_str: str) -> str:
266
  """Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
 
325
  if not file_identifier: return "Error: 'file_identifier' for image missing."
326
  logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
327
  local_image_path = _download_file(file_identifier, task_id)
328
+ if local_image_path.startswith("Error:"): return f"Error downloading image for Direct MM Tool: {local_image_path}"
329
  try:
330
  pil_image = Image.open(local_image_path)
331
  except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
 
336
  )
337
  logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
338
  return response.text[:40000]
339
+ except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct MM Tool: {str(e_json_mm)}. Input: {action_input_json_str}"
340
  except Exception as e_tool_mm:
341
  logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
342
  return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
 
380
  logger.info("Initializing agent and tools...")
381
  if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
382
 
383
+ # Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum .value for values.
 
384
  llm_safety_settings_corrected_final = {
385
  HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
386
  HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
 
393
  model=GEMINI_MODEL_NAME,
394
  google_api_key=GOOGLE_API_KEY,
395
  temperature=0.0,
396
+ safety_settings=llm_safety_settings_corrected_final,
397
  timeout=120,
398
+ convert_system_message_to_human=True # This flag might be interacting with how system prompts are handled
399
  )
400
  logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
401
  except Exception as e:
 
427
  )
428
  def agent_node(state: AgentState):
429
  current_input = state.get('input', '')
430
+ # The LANGGRAPH_PROMPT_TEMPLATE_STR serves as the system message, with the current task input.
431
+ system_message_content = prompt_content_lg_init.replace("{input}", current_input)
432
+
433
+ # Messages for LLM: System prompt followed by history
434
+ messages_for_llm = [SystemMessage(content=system_message_content)] + state.get('messages', [])
435
+
436
+ logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
437
+ if not messages_for_llm or (isinstance(messages_for_llm[0], SystemMessage) and not messages_for_llm[0].content.strip()):
438
+ logger.error("LLM call would fail: first message is SystemMessage with no/empty content or messages_for_llm is empty.")
439
+ return {"messages": [AIMessage(content="[ERROR] Agent node: System message content is empty.")]}
440
+
441
  bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
442
  response = bound_llm.invoke(messages_for_llm)
443
  return {"messages": [response]}
 
513
  try:
514
  if is_langgraph_agent_get:
515
  logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
516
+ # The 'input' for LangGraph state is the fully constructed prompt for the task
517
+ input_for_lg_get = {"input": prompt, "messages": []}
518
  final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
519
  if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
520
  logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
 
607
  gr.LoginButton()
608
  run_button = gr.Button("Run Evaluation & Submit All Answers")
609
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
610
+ results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
611
 
612
  run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
613
 
 
633
  demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
634
 
635
  if __name__ == "__main__":
636
+ logger.info(f"Application starting up (v7 - Corrected HarmCategory/BlockThreshold Import & SafetySettings format)...")
637
  if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
638
  if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
639
  if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")