Spaces:
Build error
Build error
Artem Zhirkevich
commited on
Commit
·
f590bb2
1
Parent(s):
81917a3
new version
Browse files- .gitignore +6 -0
- agent.py +334 -0
- app.py +52 -47
- dry_run.py +60 -0
- evaluation_api.py +30 -0
- requirements.txt +29 -1
- system_prompt.txt +8 -0
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
venv/
|
2 |
+
__pycache__/
|
3 |
+
.env
|
4 |
+
|
5 |
+
# https://huggingface.co/datasets/gaia-benchmark/GAIA/tree/main/2023/validation
|
6 |
+
gaia_2023_set/
|
agent.py
ADDED
@@ -0,0 +1,334 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import tempfile
|
4 |
+
import requests
|
5 |
+
import pytesseract
|
6 |
+
import wikipedia
|
7 |
+
import mwclient
|
8 |
+
import pandas as pd
|
9 |
+
import easyocr
|
10 |
+
from typing import List, Optional, Dict, Any
|
11 |
+
from urllib.parse import urlparse
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from PIL import Image
|
14 |
+
from tavily import TavilyClient
|
15 |
+
from arxiv import Search, Client, SortCriterion, SortOrder
|
16 |
+
|
17 |
+
from langgraph.graph.state import CompiledStateGraph
|
18 |
+
from langgraph.graph import START, StateGraph, MessagesState
|
19 |
+
from langgraph.prebuilt import tools_condition
|
20 |
+
from langgraph.prebuilt import ToolNode
|
21 |
+
|
22 |
+
from langchain_groq import ChatGroq
|
23 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
24 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
25 |
+
from langchain.memory import ConversationBufferMemory
|
26 |
+
from langchain.tools import Tool, tool
|
27 |
+
from langchain.callbacks.tracers import ConsoleCallbackHandler
|
28 |
+
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
|
29 |
+
from langchain_community.utilities import WikipediaAPIWrapper
|
30 |
+
from langchain_experimental.utilities import PythonREPL
|
31 |
+
from langchain_community.document_loaders import WebBaseLoader
|
32 |
+
|
33 |
+
|
34 |
+
load_dotenv()
|
35 |
+
|
36 |
+
vision_llm = ChatGroq(model="meta-llama/llama-4-scout-17b-16e-instruct", groq_api_key=os.getenv('GROQ_API_KEY'))
|
37 |
+
|
38 |
+
|
39 |
+
@tool
|
40 |
+
def web_search(query: str, domain: Optional[str] = None) -> str:
|
41 |
+
"""
|
42 |
+
Perform a web search and return the raw results as a string.
|
43 |
+
|
44 |
+
Args:
|
45 |
+
query (str): The search query.
|
46 |
+
domain (Optional[str]): If provided, restricts the search to this domain.
|
47 |
+
|
48 |
+
Returns:
|
49 |
+
str: Raw search results concatenated into a string.
|
50 |
+
"""
|
51 |
+
try:
|
52 |
+
time.sleep(2)
|
53 |
+
search = DuckDuckGoSearchAPIWrapper()
|
54 |
+
if domain:
|
55 |
+
query = f"{query} site:{domain}"
|
56 |
+
results = search.results(query, max_results=3)
|
57 |
+
|
58 |
+
if not results:
|
59 |
+
return "No results found."
|
60 |
+
|
61 |
+
# Format into simple title + snippet
|
62 |
+
formatted = ""
|
63 |
+
for r in results:
|
64 |
+
formatted += f"Title: {r['title']}\nURL: {r['link']}\nSnippet: {r['snippet']}\n\n"
|
65 |
+
return formatted.strip()
|
66 |
+
|
67 |
+
except Exception as e:
|
68 |
+
return f"Search error: {e}"
|
69 |
+
|
70 |
+
|
71 |
+
@tool
|
72 |
+
def visit_webpage(url: str):
|
73 |
+
"""
|
74 |
+
Fetches and loads the content of a webpage given its URL.
|
75 |
+
|
76 |
+
Parameters:
|
77 |
+
url (str): The URL of the webpage to be visited.
|
78 |
+
|
79 |
+
Returns:
|
80 |
+
str: A string containing the loaded content of the webpage.
|
81 |
+
"""
|
82 |
+
|
83 |
+
# Initialize a WebBaseLoader with the provided URL
|
84 |
+
loader = WebBaseLoader(url)
|
85 |
+
|
86 |
+
# Set requests_kwargs to disable SSL certificate verification
|
87 |
+
# This can help bypass SSL certificate errors but should be used cautiously
|
88 |
+
loader.requests_kwargs = {'verify': False}
|
89 |
+
|
90 |
+
# Load the webpage content using the loader
|
91 |
+
docs = loader.load()
|
92 |
+
|
93 |
+
# Return the loaded content formatted as a string
|
94 |
+
return f"Page content: {docs}"
|
95 |
+
|
96 |
+
|
97 |
+
@tool
|
98 |
+
def wikipedia_search(query: str, max_docs: int = 1) -> str:
|
99 |
+
"""
|
100 |
+
Search Wikipedia using mwclient and return exactly `max_docs` results.
|
101 |
+
|
102 |
+
Args:
|
103 |
+
query (str): The search query.
|
104 |
+
max_docs (int): Number of results to return. Default is 1.
|
105 |
+
"""
|
106 |
+
try:
|
107 |
+
time.sleep(2)
|
108 |
+
site = mwclient.Site("en.wikipedia.org")
|
109 |
+
results = site.search(query, limit=max_docs)
|
110 |
+
|
111 |
+
output = ""
|
112 |
+
count = 0
|
113 |
+
|
114 |
+
for page_info in results:
|
115 |
+
title = page_info["title"]
|
116 |
+
try:
|
117 |
+
page = site.pages[title]
|
118 |
+
content = page.text()
|
119 |
+
first_paragraph = content.split('\n\n')[0]
|
120 |
+
|
121 |
+
url = f"https://en.wikipedia.org/wiki/{title.replace(' ', '_')}"
|
122 |
+
|
123 |
+
output += (
|
124 |
+
f"--- Result {count + 1} ---\n"
|
125 |
+
f"Title: {title}\n"
|
126 |
+
f"Summary: {first_paragraph}...\n"
|
127 |
+
f"URL: {url}\n\n"
|
128 |
+
)
|
129 |
+
count += 1
|
130 |
+
if count >= max_docs:
|
131 |
+
break
|
132 |
+
|
133 |
+
except Exception:
|
134 |
+
continue
|
135 |
+
|
136 |
+
return output.strip() or "No valid matching pages found."
|
137 |
+
|
138 |
+
except Exception as e:
|
139 |
+
return f"Wikipedia search error: {str(e)}"
|
140 |
+
|
141 |
+
|
142 |
+
@tool
|
143 |
+
def extract_text_from_image(image_path: str) -> str:
|
144 |
+
"""
|
145 |
+
Extracts text from an image file.
|
146 |
+
|
147 |
+
Args:
|
148 |
+
image_path (str): The file path to the image
|
149 |
+
(e.g., '/path/to/document.png').
|
150 |
+
|
151 |
+
Returns:
|
152 |
+
str: Extracted text paragraphs separated by newlines,
|
153 |
+
prefixed with "Extracted text:\n". Returns an error message
|
154 |
+
string starting with 'Error:' on failure.
|
155 |
+
"""
|
156 |
+
|
157 |
+
try:
|
158 |
+
time.sleep(2)
|
159 |
+
|
160 |
+
with open(image_path, "rb") as image_file:
|
161 |
+
image_bytes = image_file.read()
|
162 |
+
|
163 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
164 |
+
|
165 |
+
message = [
|
166 |
+
HumanMessage(
|
167 |
+
content=[
|
168 |
+
{
|
169 |
+
"type": "text",
|
170 |
+
"text": (
|
171 |
+
"Extract text or provide explanation of this image"
|
172 |
+
),
|
173 |
+
},
|
174 |
+
{
|
175 |
+
"type": "image_url",
|
176 |
+
"image_url": {
|
177 |
+
"url": f"data:image/png;base64,{image_base64}"
|
178 |
+
},
|
179 |
+
},
|
180 |
+
]
|
181 |
+
)
|
182 |
+
]
|
183 |
+
|
184 |
+
response = vision_llm.invoke(message)
|
185 |
+
|
186 |
+
all_text = response.content + "\n\n"
|
187 |
+
|
188 |
+
return all_text.strip()
|
189 |
+
except Exception as e:
|
190 |
+
# A butler should handle errors gracefully
|
191 |
+
error_msg = f"Error extracting text: {str(e)}"
|
192 |
+
print(error_msg)
|
193 |
+
return ""
|
194 |
+
|
195 |
+
|
196 |
+
|
197 |
+
@tool
|
198 |
+
def analyze_file(file_path: str) -> str:
|
199 |
+
"""
|
200 |
+
Load and analyze a CSV or Excel file using pandas.
|
201 |
+
|
202 |
+
Provides basic metadata and summary statistics for numeric columns.
|
203 |
+
|
204 |
+
Args:
|
205 |
+
file_path (str): Path to the CSV or Excel file.
|
206 |
+
|
207 |
+
Returns:
|
208 |
+
str: Summary statistics and metadata about the file data.
|
209 |
+
"""
|
210 |
+
try:
|
211 |
+
# Determine file type
|
212 |
+
_, ext = os.path.splitext(file_path.lower())
|
213 |
+
|
214 |
+
if ext == '.csv':
|
215 |
+
df = pd.read_csv(file_path)
|
216 |
+
elif ext in ['.xls', '.xlsx']:
|
217 |
+
df = pd.read_excel(file_path)
|
218 |
+
else:
|
219 |
+
return f"Error: Unsupported file extension '{ext}'. Supported: .csv, .xls, .xlsx"
|
220 |
+
|
221 |
+
result = "Summary statistics for numeric columns:\n"
|
222 |
+
result += str(df.describe())
|
223 |
+
result += "\n\n"
|
224 |
+
|
225 |
+
result += f"Columns: {', '.join(df.columns)}\n\n"
|
226 |
+
result += "Content:\n"
|
227 |
+
result += df.astype(str).head(1000).to_string(index=False)
|
228 |
+
|
229 |
+
return result
|
230 |
+
|
231 |
+
except ImportError:
|
232 |
+
return "Error: Required libraries are not installed. Install with 'pip install pandas openpyxl'."
|
233 |
+
except FileNotFoundError:
|
234 |
+
return f"Error: File not found at path '{file_path}'."
|
235 |
+
except Exception as e:
|
236 |
+
return f"Error analyzing file: {str(e)}"
|
237 |
+
|
238 |
+
|
239 |
+
class Agent:
|
240 |
+
|
241 |
+
_api_key: str
|
242 |
+
_model_name: str
|
243 |
+
_tools: List[Tool]
|
244 |
+
_memory: ConversationBufferMemory
|
245 |
+
_llm: ChatGoogleGenerativeAI
|
246 |
+
_graph: CompiledStateGraph
|
247 |
+
|
248 |
+
def __init__(
|
249 |
+
self
|
250 |
+
):
|
251 |
+
self._api_key = os.getenv('GOOGLE_API_KEY')
|
252 |
+
self._model_name = "gemini-2.0-flash"
|
253 |
+
|
254 |
+
self._tools = self._setup_tools()
|
255 |
+
self._llm = self._setup_llm()
|
256 |
+
self._graph = self._setup_graph()
|
257 |
+
|
258 |
+
def run(self, query: str) -> str:
|
259 |
+
max_retries: int = 3
|
260 |
+
system_prompt: str
|
261 |
+
|
262 |
+
with open('system_prompt.txt') as file:
|
263 |
+
system_prompt = SystemMessage(
|
264 |
+
content=file.read()
|
265 |
+
)
|
266 |
+
|
267 |
+
for attempt in range(max_retries):
|
268 |
+
try:
|
269 |
+
|
270 |
+
# If no match found in answer bank, use the agent
|
271 |
+
response = self._graph.invoke({
|
272 |
+
"messages": [
|
273 |
+
system_prompt,
|
274 |
+
HumanMessage(content=query)
|
275 |
+
]
|
276 |
+
}, config={'callbacks': [ConsoleCallbackHandler()]})
|
277 |
+
|
278 |
+
return response
|
279 |
+
|
280 |
+
except Exception as e:
|
281 |
+
sleep_time = (attempt + 1) * 3
|
282 |
+
if attempt < max_retries - 1:
|
283 |
+
print(f"Attempt {attempt + 1} failed. Retrying in {sleep_time} seconds...")
|
284 |
+
time.sleep(sleep_time)
|
285 |
+
|
286 |
+
print(f"Error: {str(e)}")
|
287 |
+
|
288 |
+
continue
|
289 |
+
|
290 |
+
return f"Error processing query after {max_retries} attempts: {str(e)}"
|
291 |
+
|
292 |
+
print("Agent processed all queries!")
|
293 |
+
|
294 |
+
def _setup_llm(self) -> ChatGoogleGenerativeAI:
|
295 |
+
return ChatGoogleGenerativeAI(
|
296 |
+
model=self._model_name,
|
297 |
+
google_api_key=self._api_key,
|
298 |
+
temperature=0,
|
299 |
+
)
|
300 |
+
|
301 |
+
def _setup_tools(self) -> List[Tool]:
|
302 |
+
return [
|
303 |
+
web_search,
|
304 |
+
visit_webpage,
|
305 |
+
wikipedia_search,
|
306 |
+
extract_text_from_image,
|
307 |
+
analyze_file,
|
308 |
+
]
|
309 |
+
|
310 |
+
def _setup_graph(self) -> CompiledStateGraph:
|
311 |
+
llm_with_tools = self._llm.bind_tools(self._tools)
|
312 |
+
|
313 |
+
def assistant(state: MessagesState):
|
314 |
+
return {
|
315 |
+
"messages": [
|
316 |
+
llm_with_tools.invoke(state["messages"])
|
317 |
+
]
|
318 |
+
}
|
319 |
+
|
320 |
+
|
321 |
+
builder = StateGraph(MessagesState)
|
322 |
+
|
323 |
+
builder.add_node("assistant", assistant)
|
324 |
+
builder.add_node("tools", ToolNode(self._tools))
|
325 |
+
|
326 |
+
builder.add_edge(START, "assistant")
|
327 |
+
builder.add_conditional_edges(
|
328 |
+
"assistant",
|
329 |
+
tools_condition,
|
330 |
+
)
|
331 |
+
builder.add_edge("tools", "assistant")
|
332 |
+
|
333 |
+
return builder.compile()
|
334 |
+
|
app.py
CHANGED
@@ -1,23 +1,27 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
import inspect
|
5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
# (Keep Constants as is)
|
8 |
-
# --- Constants ---
|
9 |
-
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
10 |
-
|
11 |
-
# --- Basic Agent Definition ---
|
12 |
-
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
13 |
-
class BasicAgent:
|
14 |
-
def __init__(self):
|
15 |
-
print("BasicAgent initialized.")
|
16 |
-
def __call__(self, question: str) -> str:
|
17 |
-
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
18 |
-
fixed_answer = "This is a default answer."
|
19 |
-
print(f"Agent returning fixed answer: {fixed_answer}")
|
20 |
-
return fixed_answer
|
21 |
|
22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
23 |
"""
|
@@ -31,48 +35,35 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
31 |
username= f"{profile.username}"
|
32 |
print(f"User logged in: {username}")
|
33 |
else:
|
34 |
-
print("User not logged in.")
|
35 |
return "Please Login to Hugging Face with the button.", None
|
36 |
|
37 |
-
|
38 |
-
questions_url = f"{api_url}/questions"
|
39 |
-
submit_url = f"{api_url}/submit"
|
40 |
|
41 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
42 |
try:
|
43 |
-
agent =
|
44 |
except Exception as e:
|
45 |
-
print(f"Error instantiating agent: {e}")
|
46 |
return f"Error initializing agent: {e}", None
|
|
|
47 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
48 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
49 |
-
print(agent_code)
|
50 |
|
51 |
# 2. Fetch Questions
|
52 |
-
print(f"Fetching questions from: {questions_url}")
|
53 |
try:
|
54 |
-
|
55 |
-
response.raise_for_status()
|
56 |
-
questions_data = response.json()
|
57 |
if not questions_data:
|
58 |
-
print("Fetched questions list is empty.")
|
59 |
return "Fetched questions list is empty or invalid format.", None
|
60 |
-
print(f"Fetched {len(questions_data)} questions.")
|
61 |
except requests.exceptions.RequestException as e:
|
62 |
-
print(f"Error fetching questions: {e}")
|
63 |
return f"Error fetching questions: {e}", None
|
64 |
except requests.exceptions.JSONDecodeError as e:
|
65 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
66 |
-
print(f"Response text: {response.text[:500]}")
|
67 |
return f"Error decoding server response for questions: {e}", None
|
68 |
except Exception as e:
|
69 |
-
print(f"An unexpected error occurred fetching questions: {e}")
|
70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
71 |
|
72 |
# 3. Run your Agent
|
73 |
results_log = []
|
74 |
answers_payload = []
|
75 |
-
|
76 |
for item in questions_data:
|
77 |
task_id = item.get("task_id")
|
78 |
question_text = item.get("question")
|
@@ -80,28 +71,40 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
80 |
print(f"Skipping item with missing task_id or question: {item}")
|
81 |
continue
|
82 |
try:
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
|
|
85 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
86 |
except Exception as e:
|
87 |
-
print(f"
|
88 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
89 |
|
90 |
if not answers_payload:
|
91 |
-
print("Agent did not produce any answers to submit.")
|
92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
93 |
|
94 |
# 4. Prepare Submission
|
95 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
96 |
-
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
97 |
-
print(status_update)
|
98 |
|
99 |
# 5. Submit
|
100 |
-
print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
|
101 |
try:
|
102 |
-
|
103 |
-
response.raise_for_status()
|
104 |
-
result_data = response.json()
|
105 |
final_status = (
|
106 |
f"Submission Successful!\n"
|
107 |
f"User: {result_data.get('username')}\n"
|
@@ -109,34 +112,36 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
109 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
110 |
f"Message: {result_data.get('message', 'No message received.')}"
|
111 |
)
|
112 |
-
print("Submission successful.")
|
113 |
results_df = pd.DataFrame(results_log)
|
|
|
114 |
return final_status, results_df
|
115 |
except requests.exceptions.HTTPError as e:
|
116 |
error_detail = f"Server responded with status {e.response.status_code}."
|
|
|
117 |
try:
|
118 |
error_json = e.response.json()
|
119 |
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
120 |
except requests.exceptions.JSONDecodeError:
|
121 |
error_detail += f" Response: {e.response.text[:500]}"
|
|
|
122 |
status_message = f"Submission Failed: {error_detail}"
|
123 |
-
print(status_message)
|
124 |
results_df = pd.DataFrame(results_log)
|
|
|
125 |
return status_message, results_df
|
126 |
except requests.exceptions.Timeout:
|
127 |
status_message = "Submission Failed: The request timed out."
|
128 |
-
print(status_message)
|
129 |
results_df = pd.DataFrame(results_log)
|
|
|
130 |
return status_message, results_df
|
131 |
except requests.exceptions.RequestException as e:
|
132 |
status_message = f"Submission Failed: Network error - {e}"
|
133 |
-
print(status_message)
|
134 |
results_df = pd.DataFrame(results_log)
|
|
|
135 |
return status_message, results_df
|
136 |
except Exception as e:
|
137 |
status_message = f"An unexpected error occurred during submission: {e}"
|
138 |
-
print(status_message)
|
139 |
results_df = pd.DataFrame(results_log)
|
|
|
140 |
return status_message, results_df
|
141 |
|
142 |
|
@@ -193,4 +198,4 @@ if __name__ == "__main__":
|
|
193 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
194 |
|
195 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
196 |
-
demo.launch(debug=True, share=False)
|
|
|
1 |
import os
|
2 |
+
import tempfile
|
3 |
+
import time
|
4 |
import gradio as gr
|
5 |
import requests
|
6 |
import inspect
|
7 |
import pandas as pd
|
8 |
+
from agent import Agent
|
9 |
+
from evaluation_api import EvaluationApi
|
10 |
+
|
11 |
+
|
12 |
+
def save_tmp_file(file_name: str, content: bytes) -> str:
|
13 |
+
temp_dir = tempfile.gettempdir()
|
14 |
+
if file_name is None:
|
15 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
|
16 |
+
file_path = temp_file.name
|
17 |
+
else:
|
18 |
+
file_path = os.path.join(temp_dir, file_name)
|
19 |
+
|
20 |
+
with open(file_path, "wb") as file:
|
21 |
+
file.write(content)
|
22 |
+
|
23 |
+
return file_path
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
27 |
"""
|
|
|
35 |
username= f"{profile.username}"
|
36 |
print(f"User logged in: {username}")
|
37 |
else:
|
|
|
38 |
return "Please Login to Hugging Face with the button.", None
|
39 |
|
40 |
+
evaluation_api = EvaluationApi()
|
|
|
|
|
41 |
|
42 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
43 |
try:
|
44 |
+
agent = Agent()
|
45 |
except Exception as e:
|
|
|
46 |
return f"Error initializing agent: {e}", None
|
47 |
+
|
48 |
# In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
|
49 |
agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
|
|
|
50 |
|
51 |
# 2. Fetch Questions
|
|
|
52 |
try:
|
53 |
+
questions_data = evaluation_api.get_questions()
|
|
|
|
|
54 |
if not questions_data:
|
|
|
55 |
return "Fetched questions list is empty or invalid format.", None
|
|
|
56 |
except requests.exceptions.RequestException as e:
|
|
|
57 |
return f"Error fetching questions: {e}", None
|
58 |
except requests.exceptions.JSONDecodeError as e:
|
|
|
|
|
59 |
return f"Error decoding server response for questions: {e}", None
|
60 |
except Exception as e:
|
|
|
61 |
return f"An unexpected error occurred fetching questions: {e}", None
|
62 |
|
63 |
# 3. Run your Agent
|
64 |
results_log = []
|
65 |
answers_payload = []
|
66 |
+
|
67 |
for item in questions_data:
|
68 |
task_id = item.get("task_id")
|
69 |
question_text = item.get("question")
|
|
|
71 |
print(f"Skipping item with missing task_id or question: {item}")
|
72 |
continue
|
73 |
try:
|
74 |
+
time.sleep(10)
|
75 |
+
|
76 |
+
print(f"Agent received question (first 50 chars): {question_text[:50]}...")
|
77 |
+
|
78 |
+
question = f"Question: `{question_text}`"
|
79 |
+
if file_name := item.get("file_name"):
|
80 |
+
print('question has file')
|
81 |
+
file_content = evaluation_api.get_file(task_id)
|
82 |
+
file_path = save_tmp_file(file_name, file_content)
|
83 |
+
question = f"{question} File path: `{file_path}`"
|
84 |
+
|
85 |
+
messages = agent.run(question)
|
86 |
+
|
87 |
+
final_answer = messages['messages'][-1].content
|
88 |
+
print(f"final_answer: {final_answer}")
|
89 |
+
submitted_answer = final_answer.split('ANSWER: ')[-1]
|
90 |
+
|
91 |
+
|
92 |
answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
|
93 |
+
|
94 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
|
95 |
except Exception as e:
|
96 |
+
print(f"AGENT ERROR: {e}")
|
97 |
results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
|
98 |
|
99 |
if not answers_payload:
|
|
|
100 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
101 |
|
102 |
# 4. Prepare Submission
|
103 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
|
|
|
|
104 |
|
105 |
# 5. Submit
|
|
|
106 |
try:
|
107 |
+
result_data = evaluation_api.submit(submission_data)
|
|
|
|
|
108 |
final_status = (
|
109 |
f"Submission Successful!\n"
|
110 |
f"User: {result_data.get('username')}\n"
|
|
|
112 |
f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
|
113 |
f"Message: {result_data.get('message', 'No message received.')}"
|
114 |
)
|
|
|
115 |
results_df = pd.DataFrame(results_log)
|
116 |
+
|
117 |
return final_status, results_df
|
118 |
except requests.exceptions.HTTPError as e:
|
119 |
error_detail = f"Server responded with status {e.response.status_code}."
|
120 |
+
|
121 |
try:
|
122 |
error_json = e.response.json()
|
123 |
error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
|
124 |
except requests.exceptions.JSONDecodeError:
|
125 |
error_detail += f" Response: {e.response.text[:500]}"
|
126 |
+
|
127 |
status_message = f"Submission Failed: {error_detail}"
|
|
|
128 |
results_df = pd.DataFrame(results_log)
|
129 |
+
|
130 |
return status_message, results_df
|
131 |
except requests.exceptions.Timeout:
|
132 |
status_message = "Submission Failed: The request timed out."
|
|
|
133 |
results_df = pd.DataFrame(results_log)
|
134 |
+
|
135 |
return status_message, results_df
|
136 |
except requests.exceptions.RequestException as e:
|
137 |
status_message = f"Submission Failed: Network error - {e}"
|
|
|
138 |
results_df = pd.DataFrame(results_log)
|
139 |
+
|
140 |
return status_message, results_df
|
141 |
except Exception as e:
|
142 |
status_message = f"An unexpected error occurred during submission: {e}"
|
|
|
143 |
results_df = pd.DataFrame(results_log)
|
144 |
+
|
145 |
return status_message, results_df
|
146 |
|
147 |
|
|
|
198 |
print("-"*(60 + len(" App Starting ")) + "\n")
|
199 |
|
200 |
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
201 |
+
demo.launch(debug=True, share=False)
|
dry_run.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import random
|
2 |
+
import tempfile
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
from agent import Agent
|
7 |
+
|
8 |
+
random.seed(1)
|
9 |
+
|
10 |
+
def get_question(file_path: str) -> str:
|
11 |
+
with open(file_path, "r") as file:
|
12 |
+
lines = file.readlines()
|
13 |
+
line_number = random.randrange(len(lines))
|
14 |
+
|
15 |
+
return lines[line_number]
|
16 |
+
|
17 |
+
|
18 |
+
def get_file(file_name: str) -> bytes:
|
19 |
+
with open(f"./gaia_2023_set/{file_name}", "rb") as file:
|
20 |
+
return file.read()
|
21 |
+
|
22 |
+
|
23 |
+
def save_tmp_file(file_name: str, content: bytes) -> str:
|
24 |
+
temp_dir = tempfile.gettempdir()
|
25 |
+
if file_name is None:
|
26 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir)
|
27 |
+
file_path = temp_file.name
|
28 |
+
else:
|
29 |
+
file_path = os.path.join(temp_dir, file_name)
|
30 |
+
|
31 |
+
with open(file_path, "wb") as file:
|
32 |
+
file.write(content)
|
33 |
+
|
34 |
+
return file_path
|
35 |
+
|
36 |
+
|
37 |
+
question_json: str = get_question('./gaia_2023_set/metadata.jsonl')
|
38 |
+
question = json.loads(question_json)
|
39 |
+
|
40 |
+
print(json.dumps(question, indent=2))
|
41 |
+
|
42 |
+
# file_name = question["file_name"]
|
43 |
+
# file_content = get_file(question["file_name"])
|
44 |
+
# file_path = save_tmp_file(file_name, file_content)
|
45 |
+
|
46 |
+
# print(file_path)
|
47 |
+
|
48 |
+
agent = Agent()
|
49 |
+
|
50 |
+
# messages = agent.run(f"Question: `{question["Question"]}` File path: {file_path}")
|
51 |
+
messages = agent.run(f"Question: `{question["Question"]}`")
|
52 |
+
|
53 |
+
final_answer = messages['messages'][-1].content
|
54 |
+
submitted_answer = final_answer.split('FINAL ANSWER: ')[-1]
|
55 |
+
|
56 |
+
print(final_answer)
|
57 |
+
|
58 |
+
print(submitted_answer)
|
59 |
+
|
60 |
+
|
evaluation_api.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
|
3 |
+
class EvaluationApi:
|
4 |
+
|
5 |
+
api_url: str = "https://agents-course-unit4-scoring.hf.space"
|
6 |
+
timeout: int = 30
|
7 |
+
|
8 |
+
def get_questions(self) -> list[dict]:
|
9 |
+
response = requests.get(f"{self.api_url}/questions", timeout=self.timeout)
|
10 |
+
response.raise_for_status()
|
11 |
+
|
12 |
+
return response.json()
|
13 |
+
|
14 |
+
def get_file(self, task_id: str) -> bytes:
|
15 |
+
response = requests.get(f"{self.api_url}/files/{task_id}", timeout=self.timeout)
|
16 |
+
response.raise_for_status()
|
17 |
+
|
18 |
+
return response.content
|
19 |
+
|
20 |
+
def get_random_question(self) -> dict:
|
21 |
+
response = requests.get(f"{self.api_url}/random-question", timeout=self.timeout)
|
22 |
+
response.raise_for_status()
|
23 |
+
|
24 |
+
return response.json()
|
25 |
+
|
26 |
+
def submit(self, data: dict) -> dict:
|
27 |
+
response = requests.post(f"{self.api_url}/submit", json=data, timeout=self.timeout)
|
28 |
+
response.raise_for_status()
|
29 |
+
|
30 |
+
return response.json()
|
requirements.txt
CHANGED
@@ -1,2 +1,30 @@
|
|
1 |
gradio
|
2 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
gradio
|
2 |
+
requests
|
3 |
+
pandas
|
4 |
+
openpyxl
|
5 |
+
openai
|
6 |
+
google-genai
|
7 |
+
google-generativeai
|
8 |
+
langchain
|
9 |
+
langchain-community
|
10 |
+
langchain-core
|
11 |
+
langchain-google-genai
|
12 |
+
langgraph
|
13 |
+
huggingface_hub
|
14 |
+
python-dotenv
|
15 |
+
wikipedia-api
|
16 |
+
wikipedia
|
17 |
+
arxiv
|
18 |
+
datasets
|
19 |
+
yt-dlp
|
20 |
+
google-cloud-speech
|
21 |
+
google-api-python-client
|
22 |
+
duckduckgo-search
|
23 |
+
pytesseract
|
24 |
+
tavily-python
|
25 |
+
langchain_groq
|
26 |
+
langchain-tavily
|
27 |
+
mwclient
|
28 |
+
langchain_experimental
|
29 |
+
easyocr
|
30 |
+
smolagents
|
system_prompt.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
You are a rapid, concise AI assistant. Your primary goal is to provide quick and direct answers according to the specified format.
|
2 |
+
Briefly report your essential reasoning steps before the final answer.
|
3 |
+
Deliver your final answer strictly following this template: FINAL ANSWER: [YOUR FINAL ANSWER].
|
4 |
+
YOUR FINAL ANSWER must be the most direct response, limited to a single number, the absolute fewest necessary words, or a comma-separated list of numbers and/or strings.
|
5 |
+
Adhere to these strict formatting requirements for the content of the FINAL ANSWER:
|
6 |
+
- **Numbers:** Do not use commas for digit grouping (e.g., use 1000, not 1,000). Exclude units like '$' or '%' unless they are explicitly requested as part of the answer.
|
7 |
+
- **Strings:** Do not include articles (a, an, the). Do not use abbreviations. Any digits that are part of a string must be written out in plain text (e.g., "level two" instead of "level 2"), unless you are specifically instructed to use numerals.
|
8 |
+
- **Lists:** Apply the above formatting rules for numbers and strings to each corresponding element within the comma-separated list.
|