m-ric HF Staff commited on
Commit
4e27b40
·
1 Parent(s): 2cc10ab

Working Ctrl F tool

Browse files
Files changed (2) hide show
  1. app.py +9 -4
  2. e2bqwen.py +19 -0
app.py CHANGED
@@ -585,6 +585,7 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
585
  examples=[
586
  "Check the commuting time between Bern and Zurich on Google maps",
587
  "Write 'Hello World' in a text editor",
 
588
  "Search a flight Rome - Berlin for tomorrow",
589
  "What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
590
  "Go generate a picture of the Golden Gate bridge on a FLUX1.dev space",
@@ -657,7 +658,6 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
657
  label="Header"
658
  )
659
 
660
- stop_btn = gr.Button("Stop the agent!", variant="stop")
661
 
662
  chatbot_display = gr.Chatbot(
663
  elem_id="chatbot",
@@ -672,6 +672,8 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
672
 
673
  agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=None, name="ok", description="ok"))
674
 
 
 
675
  def read_log_content(log_file, tail=4):
676
  """Read the contents of a log file for a specific session"""
677
  if not log_file:
@@ -696,7 +698,7 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
696
  return update_html(True, request)
697
 
698
  def reactivate_stop_btn():
699
- return gr.Button("Stop the agent!", variant="stop")
700
 
701
  is_interactive = gr.Checkbox(value=True, visible=False)
702
 
@@ -718,8 +720,11 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js) as demo:
718
  )
719
 
720
  def interrupt_agent(session_state):
721
- session_state["agent"].interrupt()
722
- return gr.Button("Stopping agent... (could take time)", variant="secondary")
 
 
 
723
 
724
  stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
725
 
 
585
  examples=[
586
  "Check the commuting time between Bern and Zurich on Google maps",
587
  "Write 'Hello World' in a text editor",
588
+ "When was Temple Grandin introduced to the American Academy of Arts and Sciences, according to Wikipedia?",
589
  "Search a flight Rome - Berlin for tomorrow",
590
  "What' s the name of the pond just south of Château de Fontainebleau in Google maps?",
591
  "Go generate a picture of the Golden Gate bridge on a FLUX1.dev space",
 
658
  label="Header"
659
  )
660
 
 
661
 
662
  chatbot_display = gr.Chatbot(
663
  elem_id="chatbot",
 
672
 
673
  agent_ui = EnrichedGradioUI(CodeAgent(tools=[], model=None, name="ok", description="ok"))
674
 
675
+ stop_btn = gr.Button("Stop the agent!", variant="huggingface")
676
+
677
  def read_log_content(log_file, tail=4):
678
  """Read the contents of a log file for a specific session"""
679
  if not log_file:
 
698
  return update_html(True, request)
699
 
700
  def reactivate_stop_btn():
701
+ return gr.Button("Stop the agent!", variant="huggingface")
702
 
703
  is_interactive = gr.Checkbox(value=True, visible=False)
704
 
 
720
  )
721
 
722
  def interrupt_agent(session_state):
723
+ if not session_state["agent"].interrupt_switch:
724
+ session_state["agent"].interrupt()
725
+ return gr.Button("Stopping agent... (could take time)", variant="secondary")
726
+ else:
727
+ return gr.Button("Stop the agent!", variant="huggingface")
728
 
729
  stop_btn.click(fn=interrupt_agent, inputs=[session_state], outputs=[stop_btn])
730
 
e2bqwen.py CHANGED
@@ -356,6 +356,24 @@ class E2BVisionAgent(CodeAgent):
356
  self.logger.log(f"Opening URL: {url}")
357
  return f"Opened URL: {url}"
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
  # Register the tools
361
  self.tools["click"] = click
@@ -369,6 +387,7 @@ class E2BVisionAgent(CodeAgent):
369
  self.tools["open_url"] = open_url
370
  self.tools["go_back"] = go_back
371
  self.tools["drag_and_drop"] = drag_and_drop
 
372
 
373
 
374
  def take_screenshot_callback(self, memory_step: ActionStep, agent=None) -> None:
 
356
  self.logger.log(f"Opening URL: {url}")
357
  return f"Opened URL: {url}"
358
 
359
+ @tool
360
+ def find_on_page_ctrl_f(search_string: str) -> str:
361
+ """
362
+ Scroll the viewport to the first occurrence of the search string. This is equivalent to Ctrl+F.
363
+ Args:
364
+ search_string: The string to search for on the page.
365
+ """
366
+ self.desktop.press(["ctrl", "f"])
367
+ time.sleep(0.3)
368
+ clean_text = normalize_text(search_string)
369
+ self.desktop.write(clean_text, delay_in_ms=75)
370
+ time.sleep(0.3)
371
+ self.desktop.press("enter")
372
+ time.sleep(0.3)
373
+ self.desktop.press("esc")
374
+ output_message = f"Scrolled to the first occurrence of '{clean_text}'"
375
+ self.logger.log(output_message)
376
+ return output_message
377
 
378
  # Register the tools
379
  self.tools["click"] = click
 
387
  self.tools["open_url"] = open_url
388
  self.tools["go_back"] = go_back
389
  self.tools["drag_and_drop"] = drag_and_drop
390
+ self.tools["find_on_page_ctrl_f"] = find_on_page_ctrl_f
391
 
392
 
393
  def take_screenshot_callback(self, memory_step: ActionStep, agent=None) -> None: