cisarik's picture
Reorder imports in app.py for better readability
661e8f7
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool
import datetime
import requests
import pytz
import yaml
from tools.final_answer import FinalAnswerTool
from bs4 import BeautifulSoup
import time
import re
from urllib.parse import quote
from Gradio_UI import GradioUI
def search_internet_archive(query):
"""
Search the Internet Archive for pages containing the exact query phrase.
Returns a list of (timestamp, snapshot_url) tuples sorted by date.
"""
# Encode the query and construct the search URL
encoded_query = quote(f'"{query}"')
search_url = f"https://archive.org/advancedsearch.php?q={encoded_query}&fl[]=identifier&sort[]=publicdate&output=json"
try:
response = requests.get(search_url, timeout=10)
response.raise_for_status()
data = response.json()
# Extract snapshot identifiers and construct full URLs
snapshots = []
for item in data.get('response', {}).get('docs', []):
identifier = item.get('identifier', '')
# Wayback URLs follow the pattern: /web/[timestamp]/[original_url]
# Identifiers are typically in the format: [timestamp][original_url]
match = re.match(r'(\d{14})(.+)', identifier)
if match:
timestamp, url_part = match.groups()
snapshot_url = f"https://web.archive.org/web/{timestamp}/{url_part}"
snapshots.append((timestamp, snapshot_url))
return sorted(snapshots, key=lambda x: x[0]) # Sort by timestamp
except Exception as e:
print(f"Error searching Internet Archive: {e}")
return []
def check_snapshot_for_phrase(snapshot_url, phrase):
"""
Retrieve the snapshot content and check if the phrase is present.
Returns True if found, False otherwise.
"""
try:
response = requests.get(snapshot_url, timeout=10)
response.raise_for_status()
return phrase.lower() in response.text.lower() # Case-insensitive search
except Exception as e:
print(f"Error retrieving snapshot {snapshot_url}: {e}")
return False
def find_first_mention(phrase):
"""
Find the first archived mention of the phrase on the internet.
Returns the timestamp and URL of the earliest snapshot containing the phrase.
"""
print(f"Searching for '{phrase}'...")
snapshots = search_internet_archive(phrase)
if not snapshots:
return None, None
for timestamp, snapshot_url in snapshots:
print(f"Checking snapshot from {timestamp}...")
if check_snapshot_for_phrase(snapshot_url, phrase):
return timestamp, snapshot_url
time.sleep(1) # Be polite to the server
return None, None
@tool
def first_mention(phrase:str)-> str:
#Keep this format for the description / args / args description but feel free to modify the tool
"""A tool that is able to find the first archived mention of the phrase on the internet.
Args:
phrase: phrase first archived mention wants to find
"""
timestamp, snapshot_url = find_first_mention(phrase)
if timestamp and snapshot_url:
return f"First mention found at {timestamp}: {snapshot_url}"
else:
return "No mentions found or an error occurred."
@tool
def get_current_time_in_timezone(timezone: str) -> str:
"""A tool that fetches the current local time in a specified timezone.
Args:
timezone: A string representing a valid timezone (e.g., 'America/New_York').
"""
try:
# Create timezone object
tz = pytz.timezone(timezone)
# Get current time in that timezone
local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
return f"The current local time in {timezone} is: {local_time}"
except Exception as e:
return f"Error fetching time for timezone '{timezone}': {str(e)}"
final_answer = FinalAnswerTool()
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = HfApiModel(
max_tokens=2096,
temperature=0.5,
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded
custom_role_conversions=None,
)
# Import tool from Hub
image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[final_answer, first_mention, get_current_time_in_timezone], ## add your tools here (don't remove final answer)
max_steps=6,
verbosity_level=1,
grammar=None,
planning_interval=None,
name=None,
description=None,
prompt_templates=prompt_templates
)
GradioUI(agent).launch()