Spaces:

tfrere
/

leaderboard-parser-agent

Build error

App Files Files Community

leaderboard-parser-agent / src /agents /tools.py

tfrere HF Staff

first commit

0821095 8 months ago

raw

history blame

17.5 kB

	"""
	Tools for the leaderboard agent.
	"""
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.common.keys import Keys
	from selenium.webdriver.common.action_chains import ActionChains
	import re
	import time
	import helium

	from smolagents import tool


	@tool
	def search_item_ctrl_f(text: str, nth_result: int = 1) -> str:
	"""
	Searches for text on the current page via Ctrl + F and jumps to the nth occurrence.
	Args:
	text: The text to search for
	nth_result: Which occurrence to jump to (default: 1)
	"""
	from src.agents.browser import driver

	elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
	if nth_result > len(elements):
	raise Exception(f"Match n°{nth_result} not found (only {len(elements)} matches found)")
	result = f"Found {len(elements)} matches for '{text}'."
	elem = elements[nth_result - 1]
	driver.execute_script("arguments[0].scrollIntoView(true);", elem)
	result += f"Focused on element {nth_result} of {len(elements)}"
	return result


	@tool
	def go_back() -> str:
	"""
	Navigate back to the previous page.
	"""
	from src.agents.browser import driver

	driver.back()
	time.sleep(2) # Wait for page to load
	return "Navigated back to previous page"


	@tool
	def close_popups() -> str:
	"""
	Closes any popup/modal dialogs that might be open on the page.
	Useful when pop-ups appear (cookies, login prompts, etc.) that block interaction.
	"""
	from src.agents.browser import driver

	# Try to find common popup elements
	popup_selectors = [
	"//button[contains(text(), 'Accept')]",
	"//button[contains(text(), 'Close')]",
	"//button[contains(text(), 'Fermer')]",
	"//button[contains(text(), 'OK')]",
	"//button[contains(text(), 'Got it')]",
	"//button[contains(@class, 'close')]",
	"//div[contains(@class, 'popup')]//button",
	"//div[contains(@class, 'modal')]//button",
	"//div[contains(@class, 'dialog')]//button"
	]

	found = False
	for selector in popup_selectors:
	try:
	popup_elements = driver.find_elements(By.XPATH, selector)
	for elem in popup_elements:
	if elem.is_displayed():
	elem.click()
	found = True
	time.sleep(0.5) # Wait for popup to disappear
	except Exception as e:
	pass # Ignore errors, try next selector

	return "Closed popup dialogs" if found else "No popup dialogs found"


	@tool
	def extract_table_data(table_caption: str = None, table_index: int = 1) -> str:
	"""
	Extracts data from a table on the page. Can find a table by caption/title or by index.
	Args:
	table_caption: Text in or near the table to find (default: None - will use index)
	table_index: The index of the table if caption is not provided (1-based)
	"""
	from src.agents.browser import driver

	tables = driver.find_elements(By.TAG_NAME, "table")
	if not tables:
	return "No tables found on the page."

	result = f"Found {len(tables)} table(s) on the page.\n"

	for i, table in enumerate(tables):
	result += f"\nTable {i+1}:\n"

	# Try to get headers
	headers = table.find_elements(By.TAG_NAME, "th")
	if headers:
	header_texts = [header.text for header in headers]
	result += f"Headers: {', '.join(header_texts)}\n"

	# Get rows
	rows = table.find_elements(By.TAG_NAME, "tr")
	result += f"Found {len(rows)} rows.\n"

	# Get first 5 rows as sample
	for j, row in enumerate(rows[:5]):
	cells = row.find_elements(By.TAG_NAME, "td")
	if cells:
	cell_texts = [cell.text for cell in cells]
	result += f"Row {j+1}: {' \| '.join(cell_texts)}\n"

	return result


	@tool
	def find_leaderboard_elements() -> str:
	"""
	Find key elements of a leaderboard: title, evaluation criteria, and model rankings.
	Returns a structured description of what was found.
	"""
	from src.agents.browser import driver

	result = ""

	# Check for tables first
	tables = driver.find_elements(By.TAG_NAME, "table")
	if tables:
	result += f"Found {len(tables)} table(s) that might contain leaderboard data.\n"

	# Check for ordered lists
	ol_elements = driver.find_elements(By.TAG_NAME, "ol")
	if ol_elements:
	result += f"Found {len(ol_elements)} ordered list(s) that might contain rankings.\n"

	# Check for div elements with grid or flex display that might be custom leaderboards
	grid_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'grid') or contains(@class, 'flex') or contains(@class, 'table') or contains(@class, 'rank') or contains(@class, 'leaderboard')]")
	if grid_elements:
	result += f"Found {len(grid_elements)} div elements with grid/flex/table classes that might be custom leaderboards.\n"

	# Look for elements with rank or position indicators
	rank_elements = driver.find_elements(By.XPATH, "//*[contains(@class, 'rank') or contains(@class, 'position') or contains(@class, 'standing')]")
	if rank_elements:
	result += f"Found {len(rank_elements)} elements with rank/position classes.\n"

	if not result:
	return "Could not find any obvious leaderboard elements. Try scrolling or navigating to the correct section."

	return result

	@tool
	def map_clickable_elements(keyword: str = None) -> str:
	"""
	Displays a list of all clickable elements on the page with their coordinates.

	Args:
	keyword: Optional keyword to filter elements. If specified, only elements containing this keyword will be displayed.

	Returns:
	A string listing all clickable elements with their coordinates.
	"""
	from src.agents.browser import driver

	clickable_selectors = [
	"a", "button", "input[type='button']", "input[type='submit']",
	".clickable", "[role='button']", "[onclick]"
	]

	result = "Éléments cliquables détectés:\n"
	total = 0

	for selector in clickable_selectors:
	elements = driver.find_elements(By.CSS_SELECTOR, selector)
	for i, element in enumerate(elements):
	try:
	text = element.text.strip()
	if not text and element.get_attribute("value"):
	text = element.get_attribute("value")

	# Ignorer les éléments vides ou non visibles
	if not text or not element.is_displayed():
	continue

	# Filtrer par mot-clé si spécifié
	if keyword and keyword.lower() not in text.lower():
	continue

	rect = element.rect
	x = int(rect['x'] + rect['width']/2)
	y = int(rect['y'] + rect['height']/2)

	result += f"{total+1}. '{text}' ({selector}) - coords: x={x}, y={y}\n"
	total += 1
	except:
	continue

	result += f"\nTotal: {total} éléments cliquables" + (" contenant '" + keyword + "'" if keyword else "")
	return result

	@tool
	def copy_link_from_element(text_to_find: str, link_position: int = 1) -> str:
	"""
	Find elements with specified text and return the URL if it's a link or has a parent link.
	Args:
	text_to_find: Text to search for
	link_position: If multiple matches, which one to use (1-based)
	"""
	from src.agents.browser import driver

	try:
	# Try to find an element with the given text
	element = driver.find_element_by_xpath(f"//*[contains(text(), '{text_to_find}')]")
	if not element:
	return f"No element containing the text '{text_to_find}' was found."

	# Try to find URL directly from the element
	href = element.get_attribute("href")
	if href:
	return f"URL found: {href}"

	# Try to find a parent that is a link
	parent = element.find_element_by_xpath("./ancestor::a")
	if parent:
	href = parent.get_attribute("href")
	if href:
	return f"URL found in parent element: {href}"

	# Try to find a child that is a link
	child = element.find_element_by_xpath(".//a")
	if child:
	href = child.get_attribute("href")
	if href:
	return f"URL found in child element: {href}"

	# Méthode 4: Essayer le clic droit et "Copier l'adresse du lien"
	actions = ActionChains(driver)
	actions.context_click(element).perform()

	# Attendre un peu pour que le menu contextuel s'affiche
	import time
	time.sleep(1)

	# Essayer de trouver et cliquer sur "Copier l'adresse du lien" ou équivalent
	# Note: Cette partie est très dépendante du navigateur et de la langue
	copy_link_texts = ["Copy link address", "Copier l'adresse du lien", "Copy Link", "Copier le lien"]

	for text in copy_link_texts:
	try:
	link_option = driver.find_element(By.XPATH, f"//div[contains(text(), '{text}')]")
	link_option.click()
	return f"Action 'Copier l'adresse du lien' effectuée pour '{text_to_find}'"
	except:
	continue

	# Annuler le menu contextuel
	webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()

	return f"Impossible de trouver un lien pour l'élément '{text_to_find}' avec les méthodes disponibles."

	except Exception as e:
	return f"Erreur lors de la recherche du lien: {str(e)}"

	@tool
	def validate_json_results(result: dict) -> tuple[bool, str]:
	"""
	Checks that the results do not contain generic placeholders.
	Args:
	result: The result to validate
	Returns:
	A tuple containing a boolean indicating if the result is valid and a message
	explaining why the result is invalid if it is not valid.
	"""
	if not result or not isinstance(result, dict):
	return False, "Invalid result"

	if "top_models" not in result or len(result.get("top_models", [])) < 3:
	return False, "Less than 3 models found"

	# Check for duplicate models
	seen_models = set()
	for model in result.get("top_models", []):
	model_name = model.get("name", "").lower()
	if model_name in seen_models:
	return False, f"Duplicate model '{model.get('name')}' found. Please ensure each model is unique."
	seen_models.add(model_name)

	# Check for generic names
	generic_names = ["model a", "model b", "model c", "model 1", "model 2", "model 3", "model name", "unavailable"]
	model_names = [m.get("name", "").lower() for m in result.get("top_models", [])]
	if any(name in generic_names for name in model_names):
	return False, "Generic model names detected"

	# Check for unwanted suffixes in model names
	unwanted_suffix_pattern = r"$.*$$"
	for model in result.get("top_models", []):
	if re.search(unwanted_suffix_pattern, model.get("name", "")):
	return False, f"Model name '{model.get('name')}' contains unwanted suffixes. Please remove them if you think they are not part of the model name. If it's a version number or a date, keep it."

	# Check for generic URLs
	generic_urls = ["example.com", "example.org"]
	model_urls = [m.get("url", "").lower() for m in result.get("top_models", []) if m.get("url") is not None]
	if any(generic in url for url in model_urls for generic in generic_urls):
	return False, "Generic URLs detected"

	# Check for submatch between model name and URL
	for model in result.get("top_models", []):
	name = model.get("name", "").lower()
	url = model.get("url")

	# Skip validation if URL is None or empty - this is acceptable, so no warning
	if not url:
	continue

	url = url.lower()
	if url and not any(name[i:i+4] in url for i in range(len(name) - 3)):
	return False, f"URL for model '{model.get('name')}' does not have a valid submatch with the name. This is probably a wrong URL. Please check the URL and try again."

	# Check the evaluation criterion
	if "evaluation_criteria" not in result or len(result.get("evaluation_criteria", "")) < 10:
	return False, "Evaluation criterion missing or too short"

	return True, "Valid results"

	@tool
	def find_model_links(model_name: str) -> str:
	"""
	Search for links that might point to a model based on their URL
	and their match with the model name.
	Args:
	model_name: The name of the model to search for

	Returns:
	A list of potential links to the model
	"""
	from src.agents.browser import driver
	try:
	# 1. Retrieve all links on the page
	all_links = driver.find_elements(By.TAG_NAME, "a")
	if not all_links:
	return "No links were found on the page."

	# 2. Known patterns for model repositories
	model_url_patterns = [
	r'huggingface\.co/[^/]+/[^/]+', # Hugging Face model repo
	r'github\.com/[^/]+/[^/]+', # GitHub repo
	]

	model_links = []
	model_name_lower = model_name.lower()

	for link in all_links:
	try:
	# Check if the link is visible and has an href attribute
	if not link.is_displayed() or not link.get_attribute('href'):
	continue

	link_url = link.get_attribute('href')
	link_text = link.text.strip()

	# Ignore links to non-relevant resources
	if link_url.endswith(('.png', '.jpg', '.jpeg', '.gif', '.svg', '.webp', '.ico', '.css', '.js')):
	continue

	# Check if the URL matches a known pattern
	matches_pattern = any(re.search(pattern, link_url, re.IGNORECASE) for pattern in model_url_patterns)

	if matches_pattern:
	# Check for a 3-character submatch between the model name and the URL
	url_lower = link_url.lower()
	has_submatch = False

	# Search for a 3-character submatch in the model name
	for i in range(len(model_name_lower) - 4):
	if model_name_lower[i:i+5] in url_lower and model_name_lower[i:i+5] in link_text.lower():
	has_submatch = True
	break

	if has_submatch:
	# Calculate the confidence based on character matches
	confidence = sum(1 for c in model_name_lower if c in link_text.lower())
	model_links.append({
	'url': link_url,
	'text': link_text,
	'confidence': confidence
	})
	except Exception as e:
	continue # Ignore errors and continue

	# 3. Format the result
	if not model_links:
	return f"No potential links to the model '{model_name}' were found."

	result = f"Found {len(model_links)} potential links for the model '{model_name}':\n\n"

	for i, link in enumerate(model_links):
	result += f"Candidate {i+1}:\n"
	result += f"URL: {link['url']}\n"
	result += f"Text: {link['text']}\n"
	result += f"Confidence: {link['confidence']}\n\n"

	# 4. Suggest the best candidate (the one with the highest confidence)
	if model_links:
	best_candidate = max(model_links, key=lambda x: x['confidence'])
	result += f"Best candidate for '{model_name}':\nURL: {best_candidate['url']}\nText: {best_candidate['text']} "

	return result
	except Exception as e:
	return f"Error while searching for links for the model '{model_name}': {str(e)}"

	@tool
	def click_at_coordinates(x: int, y: int) -> str:
	"""
	Clicks at the specified x,y coordinates on the page.
	This is useful when other targeting methods fail or when dealing with complex UI elements.

	Args:
	x: The x-coordinate to click at
	y: The y-coordinate to click at

	Returns:
	A message confirming the click action
	"""
	from src.agents.browser import driver

	try:
	# Using ActionChains for precise coordinate clicks
	actions = ActionChains(driver)
	actions.move_by_offset(x, y).click().perform()
	actions.reset_actions() # Reset position after click

	# Alternative approach using Helium
	# helium.click_at_point(x, y)

	time.sleep(1) # Wait a moment for any reactions to the click
	return f"Successfully clicked at coordinates ({x}, {y})"
	except Exception as e:
	return f"Failed to click at coordinates ({x}, {y}): {str(e)}"