magdap116 commited on
Commit
3d32e7d
·
verified ·
1 Parent(s): 6530149

New tooling for math, youtube and wikipedia.

Browse files

added math, wikipedia and youtube tools. Testing indexing questions

Files changed (4) hide show
  1. app.py +287 -301
  2. requirements.txt +10 -7
  3. tooling.py +125 -65
  4. wikipedia_utils.py +4 -12
app.py CHANGED
@@ -1,301 +1,287 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import inspect
5
- import pandas as pd
6
- from smolagents import DuckDuckGoSearchTool,HfApiModel,load_tool,PythonInterpreterTool,VisitWebpageTool,Tool, CodeAgent
7
- import hashlib
8
- import json
9
- from transformers import AutoTokenizer, AutoModelForCausalLM , pipeline, TransformersEngine
10
- import wikipedia
11
- import torch
12
- from tooling import ModelMathTool, WikipediaTool
13
-
14
- # (Keep Constants as is)
15
- # --- Constants ---
16
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
-
18
- # --- Basic Agent Definition ---
19
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
20
- import os
21
-
22
-
23
- cache= {}
24
-
25
-
26
- web_search = DuckDuckGoSearchTool()
27
- python_interpreter = PythonInterpreterTool()
28
- visit_webpage_tool = VisitWebpageTool()
29
- wiki_tool = WikipediaTool()
30
-
31
- #model_math_tool = ModelMathTool()
32
-
33
- # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
34
- # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
35
-
36
-
37
-
38
-
39
-
40
- def load_cached_answer(question_id: str) -> str:
41
- if question_id in cache.keys():
42
- return cache[question_id]
43
- else:
44
- return None
45
-
46
- def cache_answer(question_id: str, answer: str):
47
- cache[question_id] = answer
48
-
49
-
50
- # --- Model Setup ---
51
- MODEL_NAME = 'Qwen/Qwen2.5-3B-Instruct'#'meta-llama/Llama-3.2-3B-Instruct'
52
- #"Qwen/Qwen2.5-VL-3B-Instruct"#'meta-llama/Llama-2-7b-hf'#'meta-llama/Llama-3.1-8B-Instruct'#'TinyLlama/TinyLlama-1.1B-Chat-v1.0'#'mistralai/Mistral-7B-Instruct-v0.2'#'microsoft/DialoGPT-small'# 'EleutherAI/gpt-neo-2.7B'#'distilbert/distilgpt2'#'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'#'mistralai/Mistral-7B-Instruct-v0.2'
53
-
54
-
55
-
56
-
57
-
58
- def load_model(model_name):
59
- """Download and load the model and tokenizer."""
60
- try:
61
- print(f"Loading model {MODEL_NAME}...")
62
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
63
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
64
-
65
- if tokenizer.pad_token is None:
66
- tokenizer.pad_token = tokenizer.eos_token
67
-
68
-
69
- print(f"Model {MODEL_NAME} loaded successfully.")
70
-
71
- transformers_engine = TransformersEngine(pipeline("text-generation", model=model, tokenizer=tokenizer))
72
-
73
-
74
- return transformers_engine, model
75
- except Exception as e:
76
- print(f"Error loading model: {e}")
77
- raise
78
-
79
- # Load the model and tokenizer locally
80
- #model, tokenizer = load_model()
81
- model = HfApiModel()#model_id=MODEL_NAME, max_tokens=512)
82
-
83
- from smolagents import TransformersModel
84
-
85
-
86
-
87
- model_id ="meta-llama/Llama-3.2-3B-Instruct"#"microsoft/phi-2"# not working out of the box"google/gemma-2-2b-it" #toobig"Qwen/Qwen1.5-7B-Chat"#working but stupid: "meta-llama/Llama-3.2-3B-Instruct"
88
- #model = TransformersModel(
89
- # model_id=model_id,
90
- # max_new_tokens=256)
91
-
92
- #model = HfApiModel()
93
-
94
-
95
- class BasicAgent:
96
- def __init__(self):
97
- print("BasicAgent initialized.")
98
- self.agent = CodeAgent(
99
- model=model,
100
- tools=[web_search,python_interpreter,visit_webpage_tool,wiki_tool],
101
- max_steps=5,
102
- verbosity_level=1,
103
- grammar=None,
104
- planning_interval=3,
105
- add_base_tools=True,
106
- additional_authorized_imports=['requests','wikipedia']
107
-
108
- )
109
-
110
-
111
- def __call__(self, question: str) -> str:
112
- print(f"Agent received question (first 50 chars): {question[:50]}...")
113
- answer = self.agent.run(question)
114
- return answer
115
-
116
-
117
-
118
-
119
- def run_and_submit_all( profile: gr.OAuthProfile | None):
120
- """
121
- Fetches all questions, runs the BasicAgent on them, submits all answers,
122
- and displays the results.
123
- """
124
- # --- Determine HF Space Runtime URL and Repo URL ---
125
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
126
-
127
- if profile:
128
- username= f"{profile.username}"
129
- print(f"User logged in: {username}")
130
- else:
131
- print("User not logged in.")
132
- return "Please Login to Hugging Face with the button.", None
133
-
134
- api_url = DEFAULT_API_URL
135
- questions_url = f"{api_url}/questions"
136
- submit_url = f"{api_url}/submit"
137
-
138
- # 1. Instantiate Agent ( modify this part to create your agent)
139
- try:
140
- agent = BasicAgent()
141
- except Exception as e:
142
- print(f"Error instantiating agent: {e}")
143
- return f"Error initializing agent: {e}", None
144
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
145
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
146
- print(agent_code)
147
-
148
- # 2. Fetch Questions
149
- print(f"Fetching questions from: {questions_url}")
150
- try:
151
- response = requests.get(questions_url, timeout=15)
152
- response.raise_for_status()
153
- questions_data = response.json()
154
- if not questions_data:
155
- print("Fetched questions list is empty.")
156
- return "Fetched questions list is empty or invalid format.", None
157
- print(f"Fetched {len(questions_data)} questions.")
158
- except requests.exceptions.RequestException as e:
159
- print(f"Error fetching questions: {e}")
160
- return f"Error fetching questions: {e}", None
161
- except requests.exceptions.JSONDecodeError as e:
162
- print(f"Error decoding JSON response from questions endpoint: {e}")
163
- print(f"Response text: {response.text[:500]}")
164
- return f"Error decoding server response for questions: {e}", None
165
- except Exception as e:
166
- print(f"An unexpected error occurred fetching questions: {e}")
167
- return f"An unexpected error occurred fetching questions: {e}", None
168
-
169
- # 3. Run your Agent
170
- results_log = []
171
- answers_payload = []
172
- print(f"Running agent on {len(questions_data)} questions...")
173
- for item in questions_data:
174
- task_id = item.get("task_id")
175
- question_text = item.get("question")
176
- if not task_id or question_text is None:
177
- print(f"Skipping item with missing task_id or question: {item}")
178
- continue
179
- try:
180
- cached = load_cached_answer(task_id)
181
- if cached:
182
- submitted_answer = cached
183
- print(f"Loaded cached answer for task {task_id}")
184
- else:
185
- submitted_answer = agent(question_text)
186
- cache_answer(task_id, submitted_answer)
187
- print(f"Generated and cached answer for task {task_id}")
188
-
189
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
190
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
191
- except Exception as e:
192
- print(f"Error running agent on task {task_id}: {e}")
193
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
194
-
195
- if not answers_payload:
196
- print("Agent did not produce any answers to submit.")
197
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
198
-
199
- # 4. Prepare Submission
200
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
201
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
202
- print(status_update)
203
-
204
- # 5. Submit
205
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
206
- try:
207
- response = requests.post(submit_url, json=submission_data, timeout=60)
208
- response.raise_for_status()
209
- result_data = response.json()
210
- final_status = (
211
- f"Submission Successful!\n"
212
- f"User: {result_data.get('username')}\n"
213
- f"Overall Score: {result_data.get('score', 'N/A')}% "
214
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
215
- f"Message: {result_data.get('message', 'No message received.')}"
216
- )
217
- print("Submission successful.")
218
- results_df = pd.DataFrame(results_log)
219
- return final_status, results_df
220
- except requests.exceptions.HTTPError as e:
221
- error_detail = f"Server responded with status {e.response.status_code}."
222
- try:
223
- error_json = e.response.json()
224
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
225
- except requests.exceptions.JSONDecodeError:
226
- error_detail += f" Response: {e.response.text[:500]}"
227
- status_message = f"Submission Failed: {error_detail}"
228
- print(status_message)
229
- results_df = pd.DataFrame(results_log)
230
- return status_message, results_df
231
- except requests.exceptions.Timeout:
232
- status_message = "Submission Failed: The request timed out."
233
- print(status_message)
234
- results_df = pd.DataFrame(results_log)
235
- return status_message, results_df
236
- except requests.exceptions.RequestException as e:
237
- status_message = f"Submission Failed: Network error - {e}"
238
- print(status_message)
239
- results_df = pd.DataFrame(results_log)
240
- return status_message, results_df
241
- except Exception as e:
242
- status_message = f"An unexpected error occurred during submission: {e}"
243
- print(status_message)
244
- results_df = pd.DataFrame(results_log)
245
- return status_message, results_df
246
-
247
-
248
- # --- Build Gradio Interface using Blocks ---
249
- with gr.Blocks() as demo:
250
- gr.Markdown("# Basic Agent Evaluation Runner")
251
- gr.Markdown(
252
- """
253
- **Instructions:**
254
-
255
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
256
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
257
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
258
-
259
- ---
260
- **Disclaimers:**
261
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
262
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
263
- """
264
- )
265
-
266
- gr.LoginButton()
267
-
268
- run_button = gr.Button("Run Evaluation & Submit All Answers")
269
-
270
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
271
- # Removed max_rows=10 from DataFrame constructor
272
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
273
-
274
- run_button.click(
275
- fn=run_and_submit_all,
276
- outputs=[status_output, results_table]
277
- )
278
-
279
- if __name__ == "__main__":
280
- print("\n" + "-"*30 + " App Starting " + "-"*30)
281
- # Check for SPACE_HOST and SPACE_ID at startup for information
282
- space_host_startup = os.getenv("SPACE_HOST")
283
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
284
-
285
- if space_host_startup:
286
- print(f" SPACE_HOST found: {space_host_startup}")
287
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
288
- else:
289
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
290
-
291
- if space_id_startup: # Print repo URLs if SPACE_ID is found
292
- print(f"✅ SPACE_ID found: {space_id_startup}")
293
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
294
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
295
- else:
296
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
297
-
298
- print("-"*(60 + len(" App Starting ")) + "\n")
299
-
300
- print("Launching Gradio Interface for Basic Agent Evaluation...")
301
- demo.launch(debug=True, share=False)
 
1
+ import os
2
+ import gradio as gr
3
+ import requests
4
+ import inspect
5
+ import pandas as pd
6
+ from smolagents import DuckDuckGoSearchTool, HfApiModel, PythonInterpreterTool, VisitWebpageTool, CodeAgent
7
+ import hashlib
8
+ import json
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TransformersEngine
10
+ import wikipedia
11
+ import torch
12
+ from tooling import MathModelQuerer, WikipediaPageFetcher, YoutubeTranscriptFetcher, CodeModelQuerer
13
+
14
+ # (Keep Constants as is)
15
+ # --- Constants ---
16
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
17
+
18
+ # --- Basic Agent Definition ---
19
+ # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
20
+ import os
21
+
22
+ cache = {}
23
+
24
+ web_search = DuckDuckGoSearchTool()
25
+ python_interpreter = PythonInterpreterTool()
26
+ visit_webpage_tool = VisitWebpageTool()
27
+ wiki_tool = WikipediaPageFetcher()
28
+ yt_transcript_fetcher = YoutubeTranscriptFetcher()
29
+ math_model_querer = MathModelQuerer()
30
+ code_model_querer = CodeModelQuerer()
31
+
32
+
33
+ def load_cached_answer(question_id: str) -> str:
34
+ if question_id in cache.keys():
35
+ return cache[question_id]
36
+ else:
37
+ return None
38
+
39
+
40
+ def cache_answer(question_id: str, answer: str):
41
+ cache[question_id] = answer
42
+
43
+
44
+ # --- Model Setup ---
45
+ MODEL_NAME = 'Qwen/Qwen2.5-3B-Instruct' # 'meta-llama/Llama-3.2-3B-Instruct'
46
+
47
+
48
+ # "Qwen/Qwen2.5-VL-3B-Instruct"#'meta-llama/Llama-2-7b-hf'#'meta-llama/Llama-3.1-8B-Instruct'#'TinyLlama/TinyLlama-1.1B-Chat-v1.0'#'mistralai/Mistral-7B-Instruct-v0.2'#'microsoft/DialoGPT-small'# 'EleutherAI/gpt-neo-2.7B'#'distilbert/distilgpt2'#'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B'#'mistralai/Mistral-7B-Instruct-v0.2'
49
+
50
+
51
+ def load_model(model_name):
52
+ """Download and load the model and tokenizer."""
53
+ try:
54
+ print(f"Loading model {MODEL_NAME}...")
55
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
56
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
57
+
58
+ if tokenizer.pad_token is None:
59
+ tokenizer.pad_token = tokenizer.eos_token
60
+
61
+ print(f"Model {MODEL_NAME} loaded successfully.")
62
+
63
+ transformers_engine = TransformersEngine(pipeline("text-generation", model=model, tokenizer=tokenizer))
64
+
65
+ return transformers_engine, model
66
+ except Exception as e:
67
+ print(f"Error loading model: {e}")
68
+ raise
69
+
70
+
71
+ # Load the model and tokenizer locally
72
+ # model, tokenizer = load_model()
73
+ model = HfApiModel() # model_id=MODEL_NAME, max_tokens=512)
74
+ model_id = "reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B" # "microsoft/phi-2"# not working out of the box"google/gemma-2-2b-it" #toobig"Qwen/Qwen1.5-7B-Chat"#working but stupid: "meta-llama/Llama-3.2-3B-Instruct"
75
+ model = HfApiModel(model_id)
76
+ #from smolagents import TransformersModel
77
+ # model = TransformersModel(
78
+ # model_id=model_id,
79
+ # max_new_tokens=256)
80
+
81
+ # model = HfApiModel()
82
+
83
+
84
+ class BasicAgent:
85
+ def __init__(self):
86
+ print("BasicAgent initialized.")
87
+ self.agent = CodeAgent(
88
+ model=model,
89
+ tools=[web_search, python_interpreter, visit_webpage_tool, wiki_tool, code_model_querer, math_model_querer],
90
+ max_steps=3,
91
+ verbosity_level=1,
92
+ grammar=None,
93
+ planning_interval=3,
94
+ add_base_tools=True,
95
+ additional_authorized_imports=['requests', 'wikipedia', 'pandas']
96
+
97
+ )
98
+
99
+ def __call__(self, question: str) -> str:
100
+ print(f"Agent received question (first 50 chars): {question[:50]}...")
101
+ answer = self.agent.run(question)
102
+ return answer
103
+
104
+
105
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
106
+ """
107
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
108
+ and displays the results.
109
+ """
110
+ # --- Determine HF Space Runtime URL and Repo URL ---
111
+ space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
112
+
113
+ if profile:
114
+ username = f"{profile.username}"
115
+ print(f"User logged in: {username}")
116
+ else:
117
+ print("User not logged in.")
118
+ return "Please Login to Hugging Face with the button.", None
119
+
120
+ api_url = DEFAULT_API_URL
121
+ questions_url = f"{api_url}/questions"
122
+ submit_url = f"{api_url}/submit"
123
+
124
+ # 1. Instantiate Agent ( modify this part to create your agent)
125
+ try:
126
+ agent = BasicAgent()
127
+ except Exception as e:
128
+ print(f"Error instantiating agent: {e}")
129
+ return f"Error initializing agent: {e}", None
130
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
131
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
132
+ print(agent_code)
133
+
134
+ # 2. Fetch Questions
135
+ print(f"Fetching questions from: {questions_url}")
136
+ try:
137
+ response = requests.get(questions_url, timeout=15)
138
+ response.raise_for_status()
139
+ questions_data = response.json()
140
+ if not questions_data:
141
+ print("Fetched questions list is empty.")
142
+ return "Fetched questions list is empty or invalid format.", None
143
+ print(f"Fetched {len(questions_data)} questions.")
144
+ except requests.exceptions.RequestException as e:
145
+ print(f"Error fetching questions: {e}")
146
+ return f"Error fetching questions: {e}", None
147
+ except requests.exceptions.JSONDecodeError as e:
148
+ print(f"Error decoding JSON response from questions endpoint: {e}")
149
+ print(f"Response text: {response.text[:500]}")
150
+ return f"Error decoding server response for questions: {e}", None
151
+ except Exception as e:
152
+ print(f"An unexpected error occurred fetching questions: {e}")
153
+ return f"An unexpected error occurred fetching questions: {e}", None
154
+
155
+ # 3. Run your Agent
156
+ results_log = []
157
+ answers_payload = []
158
+ print(f"Running agent on {len(questions_data)} questions...")
159
+ for item in questions_data[:1]:
160
+ task_id = item.get("task_id")
161
+ question_text = item.get("question")
162
+ if not task_id or question_text is None:
163
+ print(f"Skipping item with missing task_id or question: {item}")
164
+ continue
165
+ try:
166
+ cached = load_cached_answer(task_id)
167
+ if cached:
168
+ submitted_answer = cached
169
+ print(f"Loaded cached answer for task {task_id}")
170
+ else:
171
+ submitted_answer = agent(question_text)
172
+ cache_answer(task_id, submitted_answer)
173
+ print(f"Generated and cached answer for task {task_id}")
174
+
175
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
176
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
177
+ except Exception as e:
178
+ print(f"Error running agent on task {task_id}: {e}")
179
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
180
+
181
+ if not answers_payload:
182
+ print("Agent did not produce any answers to submit.")
183
+ return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
184
+
185
+ # 4. Prepare Submission
186
+ submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
187
+ status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
188
+ print(status_update)
189
+
190
+ # 5. Submit
191
+ print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
192
+ try:
193
+ response = requests.post(submit_url, json=submission_data, timeout=60)
194
+ response.raise_for_status()
195
+ result_data = response.json()
196
+ final_status = (
197
+ f"Submission Successful!\n"
198
+ f"User: {result_data.get('username')}\n"
199
+ f"Overall Score: {result_data.get('score', 'N/A')}% "
200
+ f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
201
+ f"Message: {result_data.get('message', 'No message received.')}"
202
+ )
203
+ print("Submission successful.")
204
+ results_df = pd.DataFrame(results_log)
205
+ return final_status, results_df
206
+ except requests.exceptions.HTTPError as e:
207
+ error_detail = f"Server responded with status {e.response.status_code}."
208
+ try:
209
+ error_json = e.response.json()
210
+ error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
211
+ except requests.exceptions.JSONDecodeError:
212
+ error_detail += f" Response: {e.response.text[:500]}"
213
+ status_message = f"Submission Failed: {error_detail}"
214
+ print(status_message)
215
+ results_df = pd.DataFrame(results_log)
216
+ return status_message, results_df
217
+ except requests.exceptions.Timeout:
218
+ status_message = "Submission Failed: The request timed out."
219
+ print(status_message)
220
+ results_df = pd.DataFrame(results_log)
221
+ return status_message, results_df
222
+ except requests.exceptions.RequestException as e:
223
+ status_message = f"Submission Failed: Network error - {e}"
224
+ print(status_message)
225
+ results_df = pd.DataFrame(results_log)
226
+ return status_message, results_df
227
+ except Exception as e:
228
+ status_message = f"An unexpected error occurred during submission: {e}"
229
+ print(status_message)
230
+ results_df = pd.DataFrame(results_log)
231
+ return status_message, results_df
232
+
233
+
234
+ # --- Build Gradio Interface using Blocks ---
235
+ with gr.Blocks() as demo:
236
+ gr.Markdown("# Basic Agent Evaluation Runner")
237
+ gr.Markdown(
238
+ """
239
+ **Instructions:**
240
+
241
+ 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
242
+ 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
243
+ 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
244
+
245
+ ---
246
+ **Disclaimers:**
247
+ Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
248
+ This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
249
+ """
250
+ )
251
+
252
+ gr.LoginButton()
253
+
254
+ run_button = gr.Button("Run Evaluation & Submit All Answers")
255
+
256
+ status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
257
+ # Removed max_rows=10 from DataFrame constructor
258
+ results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
259
+
260
+ run_button.click(
261
+ fn=run_and_submit_all,
262
+ outputs=[status_output, results_table]
263
+ )
264
+
265
+ if __name__ == "__main__":
266
+ print("\n" + "-" * 30 + " App Starting " + "-" * 30)
267
+ # Check for SPACE_HOST and SPACE_ID at startup for information
268
+ space_host_startup = os.getenv("SPACE_HOST")
269
+ space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
270
+
271
+ if space_host_startup:
272
+ print(f" SPACE_HOST found: {space_host_startup}")
273
+ print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
274
+ else:
275
+ print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
276
+
277
+ if space_id_startup: # Print repo URLs if SPACE_ID is found
278
+ print(f"✅ SPACE_ID found: {space_id_startup}")
279
+ print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
280
+ print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
281
+ else:
282
+ print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
283
+
284
+ print("-" * (60 + len(" App Starting ")) + "\n")
285
+
286
+ print("Launching Gradio Interface for Basic Agent Evaluation...")
287
+ demo.launch(debug=True, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +1,10 @@
1
- gradio
2
- requests
3
- smolagents
4
- transformers
5
- wikipedia
6
- torch
7
- accelerate
 
 
 
 
1
+ gradio
2
+ requests
3
+ smolagents
4
+ transformers
5
+ wikipedia
6
+ torch
7
+ accelerate
8
+ youtube_transcript_api
9
+ spacy
10
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl
tooling.py CHANGED
@@ -1,65 +1,125 @@
1
- from smolagents import Tool
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
3
- import torch
4
-
5
-
6
- class ModelMathTool(Tool):
7
- name = "math_model"
8
- description = "Answers advanced math questions using a pretrained math model."
9
-
10
- inputs = {
11
- "problem": {
12
- "type": "string",
13
- "description": "Math problem to solve.",
14
- }
15
- }
16
-
17
- output_type = "string"
18
-
19
- def __init__(self, model_name= "deepseek-ai/deepseek-math-7b-base"):
20
- print(f"Loading math model: {model_name}")
21
-
22
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
23
- print("loaded tokenizer")
24
- self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
25
- print("loaded auto model")
26
-
27
- self.model.generation_config = GenerationConfig.from_pretrained(model_name)
28
- print("loaded coonfig")
29
-
30
- self.model.generation_config.pad_token_id = self.model.generation_config.eos_token_id
31
- print("loaded pad token")
32
-
33
-
34
-
35
- def forward(self, problem: str) -> str:
36
- print(f"[MathModelTool] Question: {problem}")
37
-
38
- inputs = self.tokenizer(problem, return_tensors="pt")
39
- outputs =self.model.generate(**inputs, max_new_tokens=100)
40
-
41
- result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
42
-
43
- return result
44
-
45
- class WikipediaTool(Tool):
46
- name = "wikip_tool"
47
- description = "Searches Wikipedia and provides summary about the queried topic."
48
-
49
- inputs = {
50
- "query": {
51
- "type": "string",
52
- "description": "Topic of wikipedia search",
53
- }
54
- }
55
-
56
- output_type = "string"
57
-
58
-
59
- def __init__(self):
60
- import wikipedia
61
-
62
- def forward(self, query: str) -> str:
63
- return wikipedia.summary(query, sentences=3)
64
-
65
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
3
+ import torch
4
+ from wikipedia_utils import *
5
+ from youtube_utils import *
6
+
7
+
8
+ class MathModelQuerer(Tool):
9
+ name = "math_model"
10
+ description = "Answers advanced math questions using a pretrained math model."
11
+
12
+ inputs = {
13
+ "problem": {
14
+ "type": "string",
15
+ "description": "Math problem to solve.",
16
+ }
17
+ }
18
+
19
+ output_type = "string"
20
+
21
+ def __init__(self, model_name="deepseek-ai/deepseek-math-7b-base"):
22
+ print(f"Loading math model: {model_name}")
23
+
24
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
25
+ print("loaded tokenizer")
26
+ self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
27
+ print("loaded auto model")
28
+
29
+ self.model.generation_config = GenerationConfig.from_pretrained(model_name)
30
+ print("loaded coonfig")
31
+
32
+ self.model.generation_config.pad_token_id = self.model.generation_config.eos_token_id
33
+ print("loaded pad token")
34
+
35
+ def forward(self, problem: str) -> str:
36
+ try:
37
+ print(f"[MathModelTool] Question: {problem}")
38
+
39
+ inputs = self.tokenizer(problem, return_tensors="pt")
40
+ outputs = self.model.generate(**inputs, max_new_tokens=100)
41
+
42
+ result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+
44
+ return result
45
+ except:
46
+ return f"Failed using the tool {self.name}"
47
+
48
+
49
+ class CodeModelQuerer(Tool):
50
+ name = "code_querer"
51
+ description = "Given a problem description, generates a piece of code used specialized LLM model. Returns output of the model."
52
+
53
+ inputs = {
54
+ "problem": {
55
+ "type": "string",
56
+ "description": "Description of a code sample to be generated",
57
+ }
58
+ }
59
+
60
+ output_type = "string"
61
+
62
+ def __init__(self, model_name="Qwen/Qwen2.5-Coder-32B-Instruct"):
63
+ from smolagents import HfApiModel
64
+ print(f"Loading llm for Code tool: {model_name}")
65
+ self.model = HfApiModel()
66
+
67
+ def forward(self, problem: str) -> str:
68
+ try:
69
+ return self.model.generate(problem, max_new_tokens=512)
70
+ except:
71
+ return f"Failed using the tool {self.name}"
72
+
73
+
74
+ class WikipediaPageFetcher(Tool):
75
+ name = "wiki_page_fetcher"
76
+ description = "Searches Wikipedia and provides summary about the queried topic as a string."
77
+
78
+ inputs = {
79
+ "query": {
80
+ "type": "string",
81
+ "description": "Topic of wikipedia search",
82
+ }
83
+ }
84
+
85
+ output_type = "string"
86
+
87
+ def forward(self, query: str) -> str:
88
+ try:
89
+ wiki_query = query(query)
90
+ wiki_page = fetch_wikipedia_page(wiki_query)
91
+ return wiki_page
92
+ except:
93
+ return f"Failed using the tool {self.name}"
94
+
95
+
96
+ class YoutubeTranscriptFetcher(Tool):
97
+ name = "youtube_transcript_fetcher"
98
+ description = "Attempts to fetch a youtube transcript in english, if provided with a query \\" \
99
+ " that contains a youtube link with video id. Returns a transcript content as a string. Alternatively, if tool is provided with a\\"" \
100
+ youtube video id, it can fetch the transcript directly."
101
+
102
+ inputs = {
103
+ "query": {
104
+ "type": "string",
105
+ "description": "A query that includes youtube id."
106
+ },
107
+ "video_id" : {
108
+ "type" : "string",
109
+ "description" : "Optional string with video id from youtube.",
110
+ "nullable" : True
111
+ }
112
+ }
113
+
114
+ output_type = "string"
115
+
116
+ def forward(self, query: str, video_id=None) -> str:
117
+ try:
118
+ if video_id is None:
119
+ video_id = get_youtube_video_id(query)
120
+
121
+ fetched_transcript = fetch_transcript_english(video_id)
122
+
123
+ return post_process_transcript(fetched_transcript)
124
+ except:
125
+ return f"Failed using the tool {self.name}"
wikipedia_utils.py CHANGED
@@ -1,6 +1,7 @@
1
  import wikipedia
2
  import spacy
3
 
 
4
  def get_wiki_query(query):
5
  try:
6
  ### spacy code
@@ -11,11 +12,10 @@ def get_wiki_query(query):
11
  # Parse the sentence
12
  doc = nlp(query)
13
 
14
-
15
  # Entity path (people, evenrs, books)
16
  entities_components = [entity_substring.text for entity_substring in doc.ents]
17
  if len(entities_components) > 0:
18
- subject_of_the_query= ""
19
  for substrings in entities_components:
20
  subject_of_the_query = subject_of_the_query + substrings
21
 
@@ -34,7 +34,7 @@ def get_wiki_query(query):
34
 
35
 
36
  except Exception as e:
37
- print("Failed parsing a query subject from query" ,query)
38
  print(e)
39
 
40
 
@@ -43,18 +43,10 @@ def fetch_wikipedia_page(wiki_query):
43
  matched_articles = wikipedia.search(wiki_query)
44
  if len(matched_articles) > 0:
45
  used_article = matched_articles[0]
46
- page_content = wikipedia.page(used_article,auto_suggest=False)
47
  return page_content.content
48
  else:
49
  return ""
50
  except Exception as e:
51
  print("Could not fetch the wikipedia article using ", wiki_query)
52
  print(e)
53
-
54
- test_queries = ["How many albums did Amy Winehouse publish?", "Who is Evora Cesaria?","Is cat an animal?"]
55
- wiki_queries = []
56
- wiki_pages= []
57
- for tq in test_queries:
58
- wiki_queries.append(get_wiki_query(tq))
59
- for wq in wiki_queries:
60
- wiki_pages.append(fetch_wikipedia_page(wq))
 
1
  import wikipedia
2
  import spacy
3
 
4
+
5
  def get_wiki_query(query):
6
  try:
7
  ### spacy code
 
12
  # Parse the sentence
13
  doc = nlp(query)
14
 
 
15
  # Entity path (people, evenrs, books)
16
  entities_components = [entity_substring.text for entity_substring in doc.ents]
17
  if len(entities_components) > 0:
18
+ subject_of_the_query = ""
19
  for substrings in entities_components:
20
  subject_of_the_query = subject_of_the_query + substrings
21
 
 
34
 
35
 
36
  except Exception as e:
37
+ print("Failed parsing a query subject from query", query)
38
  print(e)
39
 
40
 
 
43
  matched_articles = wikipedia.search(wiki_query)
44
  if len(matched_articles) > 0:
45
  used_article = matched_articles[0]
46
+ page_content = wikipedia.page(used_article, auto_suggest=False)
47
  return page_content.content
48
  else:
49
  return ""
50
  except Exception as e:
51
  print("Could not fetch the wikipedia article using ", wiki_query)
52
  print(e)