Rounak Sen commited on
Commit
19efa7d
Β·
1 Parent(s): 81917a3

created agent

Browse files
Files changed (7) hide show
  1. .gitignore +2 -0
  2. .python-version +1 -0
  3. app.py +395 -78
  4. pyproject.toml +32 -0
  5. requirements.txt +291 -2
  6. temp.ipynb +352 -0
  7. uv.lock +0 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ .env
2
+ *.m4a
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
app.py CHANGED
@@ -1,38 +1,314 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
- import inspect
5
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
 
36
 
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
@@ -40,7 +316,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -55,16 +331,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
55
  response.raise_for_status()
56
  questions_data = response.json()
57
  if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
  print(f"Fetched {len(questions_data)} questions.")
61
  except requests.exceptions.RequestException as e:
62
  print(f"Error fetching questions: {e}")
63
  return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
  except Exception as e:
69
  print(f"An unexpected error occurred fetching questions: {e}")
70
  return f"An unexpected error occurred fetching questions: {e}", None
@@ -76,22 +348,59 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
76
  for item in questions_data:
77
  task_id = item.get("task_id")
78
  question_text = item.get("question")
 
79
  if not task_id or question_text is None:
80
  print(f"Skipping item with missing task_id or question: {item}")
81
  continue
82
  try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
 
 
 
 
 
 
 
 
89
 
90
  if not answers_payload:
91
  print("Agent did not produce any answers to submit.")
92
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
 
94
- # 4. Prepare Submission
 
 
 
95
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
@@ -110,8 +419,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
110
  f"Message: {result_data.get('message', 'No message received.')}"
111
  )
112
  print("Submission successful.")
 
 
113
  results_df = pd.DataFrame(results_log)
114
  return final_status, results_df
 
115
  except requests.exceptions.HTTPError as e:
116
  error_detail = f"Server responded with status {e.response.status_code}."
117
  try:
@@ -123,16 +435,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
123
  print(status_message)
124
  results_df = pd.DataFrame(results_log)
125
  return status_message, results_df
 
126
  except requests.exceptions.Timeout:
127
  status_message = "Submission Failed: The request timed out."
128
  print(status_message)
129
  results_df = pd.DataFrame(results_log)
130
  return status_message, results_df
 
131
  except requests.exceptions.RequestException as e:
132
  status_message = f"Submission Failed: Network error - {e}"
133
  print(status_message)
134
  results_df = pd.DataFrame(results_log)
135
  return status_message, results_df
 
136
  except Exception as e:
137
  status_message = f"An unexpected error occurred during submission: {e}"
138
  print(status_message)
@@ -141,56 +456,58 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
- gr.Markdown(
147
- """
148
- **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
 
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
- """
159
- )
160
 
161
- gr.LoginButton()
162
 
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"βœ… SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"βœ… SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
-
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
  import os
2
+ import io
3
  import gradio as gr
4
  import requests
 
5
  import pandas as pd
6
+ from time import sleep
7
+ from PIL import Image
8
+ import helium
9
+ from selenium import webdriver
10
+ from selenium.webdriver.common.by import By
11
+ from selenium.webdriver.common.keys import Keys
12
+ from selenium.webdriver.remote.webelement import WebElement
13
+ from smolagents import (
14
+ LiteLLMModel,
15
+ InferenceClientModel,
16
+ CodeAgent,
17
+ tool,
18
+ )
19
+ from yt_dlp import YoutubeDL
20
+ from pprint import pprint
21
+ from markdownify import markdownify as md
22
+ import urllib
23
+ from unstructured.partition.auto import partition
24
+ import whisper
25
+ from helium import *
26
+ from dotenv import load_dotenv
27
+ from phoenix.otel import register
28
+ from openinference.instrumentation.smolagents import SmolagentsInstrumentor
29
+
30
+
31
+ register()
32
+ SmolagentsInstrumentor().instrument()
33
+ audio_model = whisper.load_model("turbo")
34
+
35
+ load_dotenv()
36
 
37
  # (Keep Constants as is)
38
  # --- Constants ---
39
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
40
 
41
  # --- Basic Agent Definition ---
42
+ # ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
43
+
44
+
45
+ def get_agent():
46
+ chrome_options = webdriver.ChromeOptions()
47
+ chrome_options.add_argument("--force-device-scale-factor=1")
48
+ # chrome_options.add_argument("--window-size=1000,1350")
49
+ # chrome_options.add_argument("--disable-pdf-viewer")
50
+ chrome_options.add_argument("--window-position=0,0")
51
+
52
+ # Initialize the browser
53
+ driver = helium.start_chrome(headless=False, options=chrome_options)
54
+ helium_instructions = """
55
+ You can use helium to access websites. Don't bother about the helium driver, it's already managed.
56
+ We've already ran "from helium import *"
57
+ Then you can go to pages!
58
+ Code:
59
+ ```py
60
+ go_to('github.com/trending')
61
+ ```<end_code>
62
+
63
+ You can directly click clickable elements by inputting the text that appears on them using the tool `click_element` with element as an argument.
64
+ This element is retrieved using the tool `get_element_by_text`.
65
+ Code:
66
+ ```py
67
+ click_element(get_element_by_text("Top products"), None)
68
+ ```<end_code>
69
+
70
+ If you try to interact with an element and it's not found, you'll get a LookupError.
71
+ Never try to login in a page.
72
+
73
+ You can search for a text on the page using the tool `search_item_ctrl_f` with text as an argument and the index of the element as an optional argument.
74
+ Code:
75
+ ```py
76
+ search_item_ctrl_f("Top products")
77
+ ```<end_code>
78
+
79
+ When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
80
+ Just use your built-in tool `close_popups` to close them:
81
+ Code:
82
+ ```py
83
+ close_popups()
84
+ ```<end_code>
85
+
86
+ You can use .exists() to check for the existence of an element. For example:
87
+ Code:
88
+ ```py
89
+ if Text('Accept cookies?').exists():
90
+ click('I accept')
91
+ ```<end_code>
92
+ """
93
+
94
+ @tool
95
+ def search_item_ctrl_f(text: str, nth_result: int | None = None) -> str:
96
+ """
97
+ Searches for text on the current page via Ctrl + F and jumps to the nth occurrence and scroll into view.
98
+ Args:
99
+ text: The text to search for
100
+ nth_result: Which occurrence to jump to (default: None)
101
+ """
102
+ elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
103
+ if nth_result is not None and nth_result > len(elements):
104
+ raise Exception(
105
+ f"Match nΒ°{nth_result} not found (only {len(elements)} matches found)"
106
+ )
107
+ result = f"Found {len(elements)} matches for '{text}'."
108
+ if nth_result is None:
109
+ return (
110
+ result
111
+ + "\n"
112
+ + "\n".join([get_surrounding_elements(element) for element in elements])
113
+ )
114
+ elem = elements[nth_result - 1]
115
+ driver.execute_script("arguments[0].scrollIntoView(true);", elem)
116
+ return (
117
+ result
118
+ + "\n"
119
+ + f"This is the element : {nth_result}"
120
+ + "\n"
121
+ + get_surrounding_elements(elem)
122
+ )
123
+
124
+ @tool
125
+ def go_back() -> None:
126
+ """Goes back to previous page."""
127
+ driver.back()
128
+
129
+ @tool
130
+ def close_popups() -> str:
131
+ """
132
+ Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
133
+ This does not work on cookie consent banners.
134
+ """
135
+ webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
136
+
137
+ @tool
138
+ def scroll_into_view(element: WebElement) -> None:
139
+ """Scrolls an element into view.
140
+ Args:
141
+ element: The element to scroll into view.
142
+ """
143
+ driver.execute_script("arguments[0].scrollIntoView(true);", element)
144
+
145
+ @tool
146
+ def click_element(element: WebElement) -> None:
147
+ """Clicks an element.
148
+ Args:
149
+ element: The element to click.
150
+ """
151
+ element.click()
152
+
153
+ @tool
154
+ def get_element_by_text(text: str) -> WebElement:
155
+ """Returns an element with the specified text.
156
+ Args:
157
+ text: The text of the element to return.
158
+ """
159
+ return driver.find_element(By.XPATH, f"//*[contains(text(), '{text}')]")
160
+
161
+ @tool
162
+ def visit_webpage_in_markdown(url: str) -> str:
163
+ """Visits a webpage. Returns the markdown content of the page.
164
+ Args:
165
+ url: The URL of the webpage to visit.
166
+ """
167
+ driver.get(url)
168
+ return md(driver.page_source)
169
+
170
+ @tool
171
+ def visit_webpage_in_html(url: str) -> str:
172
+ """Visits a webpage. Returns the HTML content of the page.
173
+ Args:
174
+ url: The URL of the webpage to visit.
175
+ """
176
+ driver.get(url)
177
+ return driver.page_source
178
+
179
+ @tool
180
+ def get_surrounding_elements(element: WebElement, num_elements: int = 50) -> str:
181
+ """Returns the surrounding elements of an element.
182
+ Args:
183
+ element: The element to return the surrounding elements of.
184
+ num_elements: The number of elements to return. Default is 50.
185
+ """
186
+ target = md(element.get_attribute("outerHTML"))
187
+ elements = [
188
+ element
189
+ for element in md(driver.page_source).split("\n")
190
+ if element.strip()
191
+ ]
192
+ for i, element in enumerate(elements):
193
+ if element in target or target in element:
194
+ return "\n".join(elements[i - num_elements : i + num_elements])
195
+ return "\n".join(elements[:num_elements])
196
+
197
+ @tool
198
+ def web_search(query: str) -> str:
199
+ """Searches for a query on the web and returns the markdown content of the page.
200
+ Args:
201
+ query: The query to search for.
202
+ """
203
+ query = urllib.parse.quote(query)
204
+ go_to(f"https://duckduckgo.com/?q={query}&ia=web")
205
+ return md(driver.page_source)
206
+
207
+ @tool
208
+ def transcribe_youtube_video(video_url: str) -> str:
209
+ """Transcribe a YouTube video using yt-dlp and Whisper.
210
+ Args:
211
+ video_url: The URL of the YouTube video to transcribe.
212
+ """
213
+ ydl_opts = {
214
+ "format": "m4a/bestaudio/best",
215
+ "outtmpl": "audio.m4a",
216
+ "key": "FFmpegExtractAudio",
217
+ "preferredcodec": "m4a",
218
+ }
219
+ with YoutubeDL(ydl_opts) as ydl:
220
+ info = ydl.extract_info(video_url)
221
+ captions = info.get("automatic_captions", {})
222
+ if "en" in captions:
223
+ captions = captions["en"]
224
+ for caption in captions:
225
+ if caption.get("ext", "") == "srt":
226
+ url = caption.get("url", "")
227
+ return requests.get(url).text
228
+
229
+ ydl.download(video_url)
230
+ transcript = audio_model.transcribe("audio.m4a")
231
+ return transcript["text"]
232
+
233
+ @tool
234
+ def parse_doc_file(file_url: str) -> str:
235
+ """
236
+ Parse any document type file like pdf, docx, xls, xlsx, etc and return its content in markdown format.
237
+ Args:
238
+ file_url: The URL of the document file to parse.
239
+ """
240
+ try:
241
+ response = requests.get(file_url)
242
+ response.raise_for_status()
243
+ elements = partition(file=io.BytesIO(response.content), include_page_breaks=True)
244
+ return "\n\n".join([str(el) for el in elements])
245
+ except Exception as e:
246
+ return f"Failed to fetch file: {e}"
247
+
248
+ @tool
249
+ def parse_audio_file(file_url: str) -> str:
250
+ """
251
+ Parse an audio file and return its content in markdown format.
252
+ Args:
253
+ file_url: The URL of the audio file to parse.
254
+ """
255
+ try:
256
+ response = requests.get(file_url)
257
+ response.raise_for_status()
258
+ return audio_model.transcribe(io.BytesIO(response.content))['text']
259
+ except Exception as e:
260
+ return f"Failed to fetch file: {e}"
261
+
262
+ # think_agent = CodeAgent(
263
+ # model=LiteLLMModel("gemini/gemini-2.5-flash-preview-05-20"),
264
+ # tools=[web_search],
265
+ # additional_authorized_imports="*",
266
+ # name="Think Agent",
267
+ # description="You are the thinking agent who will think step by step to solve the problem."
268
+ # )
269
+
270
+ agent = CodeAgent(
271
+ tools=[
272
+ web_search,
273
+ visit_webpage_in_markdown,
274
+ visit_webpage_in_html,
275
+ scroll_into_view,
276
+ click_element,
277
+ get_element_by_text,
278
+ get_surrounding_elements,
279
+ go_back,
280
+ close_popups,
281
+ search_item_ctrl_f,
282
+ parse_doc_file,
283
+ parse_audio_file,
284
+ transcribe_youtube_video,
285
+ ],
286
+ model=LiteLLMModel("gemini/gemini-2.0-flash-lite"),
287
+ # model=InferenceClientModel(),
288
+ additional_authorized_imports="*",
289
+ # managed_agents=[think_agent],
290
+ )
291
+ agent.prompt_templates["system_prompt"] += helium_instructions
292
+ agent.python_executor("from helium import *")
293
+
294
+ return agent
295
+
296
+
297
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
298
  """
299
  Fetches all questions, runs the BasicAgent on them, submits all answers,
300
  and displays the results.
301
  """
302
  # --- Determine HF Space Runtime URL and Repo URL ---
303
+ space_id = os.getenv("SPACE_ID", "rony000013/hf_agent_course") # Get the SPACE_ID for sending link to the code
304
 
305
+ # if profile:
306
+ # username= f"{profile.username}"
307
+ # print(f"User logged in: {username}")
308
+ # else:
309
+ # print("User not logged in.")
310
+ # return "Please Login to Hugging Face with the button.", None
311
+ username = "rony000013"
312
 
313
  api_url = DEFAULT_API_URL
314
  questions_url = f"{api_url}/questions"
 
316
 
317
  # 1. Instantiate Agent ( modify this part to create your agent)
318
  try:
319
+ agent = get_agent()
320
  except Exception as e:
321
  print(f"Error instantiating agent: {e}")
322
  return f"Error initializing agent: {e}", None
 
331
  response.raise_for_status()
332
  questions_data = response.json()
333
  if not questions_data:
334
+ print("Fetched questions list is empty.")
335
+ return "Fetched questions list is empty or invalid format.", None
336
  print(f"Fetched {len(questions_data)} questions.")
337
  except requests.exceptions.RequestException as e:
338
  print(f"Error fetching questions: {e}")
339
  return f"Error fetching questions: {e}", None
 
 
 
 
340
  except Exception as e:
341
  print(f"An unexpected error occurred fetching questions: {e}")
342
  return f"An unexpected error occurred fetching questions: {e}", None
 
348
  for item in questions_data:
349
  task_id = item.get("task_id")
350
  question_text = item.get("question")
351
+ file_name = item.get("file_name")
352
  if not task_id or question_text is None:
353
  print(f"Skipping item with missing task_id or question: {item}")
354
  continue
355
  try:
356
+ if file_name != "" and file_name is not None:
357
+ if (
358
+ file_name.endswith(".png")
359
+ or file_name.endswith(".jpg")
360
+ or file_name.endswith(".jpeg")
361
+ ):
362
+ image_url = f"{api_url}/files/{task_id}"
363
+ image_response = requests.get(image_url)
364
+ image_response.raise_for_status()
365
+ image_data = image_response.content
366
+ image = Image.open(io.BytesIO(image_data))
367
+ submitted_answer = agent.run(question_text, images=[image], reset=True)
368
+ else:
369
+ submitted_answer = agent.run(
370
+ f"{question_text}\n\nFile name: {file_name}\n\nFile URL: {api_url}/files/{task_id}", reset=True
371
+ )
372
+ else:
373
+ submitted_answer = agent.run(question_text)
374
+ answers_payload.append(
375
+ {"task_id": task_id, "submitted_answer": submitted_answer}
376
+ )
377
+ results_log.append(
378
+ {
379
+ "Task ID": task_id,
380
+ "Question": question_text,
381
+ "Submitted Answer": submitted_answer,
382
+ }
383
+ )
384
  except Exception as e:
385
+ print(f"Error running agent on task {task_id}: {e}")
386
+ results_log.append(
387
+ {
388
+ "Task ID": task_id,
389
+ "Question": question_text,
390
+ "Submitted Answer": f"AGENT ERROR: {e}",
391
+ }
392
+ )
393
+
394
+ sleep(30)
395
 
396
  if not answers_payload:
397
  print("Agent did not produce any answers to submit.")
398
  return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
399
 
400
+ print("Agent produced answers to submit.")
401
+ print(answers_payload)
402
+
403
+ # 4. Prepare Submission
404
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
405
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
406
  print(status_update)
 
419
  f"Message: {result_data.get('message', 'No message received.')}"
420
  )
421
  print("Submission successful.")
422
+ pprint(result_data)
423
+ pprint(final_status)
424
  results_df = pd.DataFrame(results_log)
425
  return final_status, results_df
426
+
427
  except requests.exceptions.HTTPError as e:
428
  error_detail = f"Server responded with status {e.response.status_code}."
429
  try:
 
435
  print(status_message)
436
  results_df = pd.DataFrame(results_log)
437
  return status_message, results_df
438
+
439
  except requests.exceptions.Timeout:
440
  status_message = "Submission Failed: The request timed out."
441
  print(status_message)
442
  results_df = pd.DataFrame(results_log)
443
  return status_message, results_df
444
+
445
  except requests.exceptions.RequestException as e:
446
  status_message = f"Submission Failed: Network error - {e}"
447
  print(status_message)
448
  results_df = pd.DataFrame(results_log)
449
  return status_message, results_df
450
+
451
  except Exception as e:
452
  status_message = f"An unexpected error occurred during submission: {e}"
453
  print(status_message)
 
456
 
457
 
458
  # --- Build Gradio Interface using Blocks ---
459
+ # with gr.Blocks() as demo:
460
+ # gr.Markdown("# Basic Agent Evaluation Runner")
461
+ # gr.Markdown(
462
+ # """
463
+ # **Instructions:**
464
 
465
+ # 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
466
+ # 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
467
+ # 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
468
 
469
+ # ---
470
+ # **Disclaimers:**
471
+ # Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
472
+ # This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
473
+ # """
474
+ # )
475
 
476
+ # gr.LoginButton()
477
 
478
+ # run_button = gr.Button("Run Evaluation & Submit All Answers")
479
 
480
+ # status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
481
+ # # Removed max_rows=10 from DataFrame constructor
482
+ # results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
483
 
484
+ # run_button.click(
485
+ # fn=run_and_submit_all,
486
+ # outputs=[status_output, results_table]
487
+ # )
488
+
489
+ # if __name__ == "__main__":
490
+ # print("\n" + "-"*30 + " App Starting " + "-"*30)
491
+ # # Check for SPACE_HOST and SPACE_ID at startup for information
492
+ # space_host_startup = os.getenv("SPACE_HOST")
493
+ # space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
494
+
495
+ # if space_host_startup:
496
+ # print(f"βœ… SPACE_HOST found: {space_host_startup}")
497
+ # print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
498
+ # else:
499
+ # print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
500
+
501
+ # if space_id_startup: # Print repo URLs if SPACE_ID is found
502
+ # print(f"βœ… SPACE_ID found: {space_id_startup}")
503
+ # print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
504
+ # print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
505
+ # else:
506
+ # print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
507
+
508
+ # print("-"*(60 + len(" App Starting ")) + "\n")
509
+
510
+ # print("Launching Gradio Interface for Basic Agent Evaluation...")
511
+ # demo.launch(debug=True, share=False)
512
 
513
+ run_and_submit_all(None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "hf-agents-course-final-assignment"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "arize-phoenix>=10.11.0",
9
+ "arize-phoenix-otel>=0.10.3",
10
+ "gradio>=5.33.0",
11
+ "helium>=5.1.1",
12
+ "huggingface-hub>=0.32.4",
13
+ "itsdangerous>=2.2.0",
14
+ "jupyter>=1.1.1",
15
+ "langchain-community>=0.3.24",
16
+ "markdownify>=1.1.0",
17
+ "openai-whisper>=20240930",
18
+ "openinference-instrumentation-litellm>=0.1.22",
19
+ "openinference-instrumentation-smolagents>=0.1.13",
20
+ "opentelemetry-exporter-otlp>=1.34.1",
21
+ "opentelemetry-sdk>=1.34.1",
22
+ "pandas>=2.3.0",
23
+ "pillow>=11.2.1",
24
+ "polars>=1.30.0",
25
+ "requests>=2.32.3",
26
+ "selenium>=4.33.0",
27
+ "smolagents[litellm,telemetry,toolkit]>=1.17.0",
28
+ "unstructured[all-docs]>=0.17.2",
29
+ "whisper>=1.1.10",
30
+ "wikipedia>=1.4.0",
31
+ "yt-dlp>=2025.6.9",
32
+ ]
requirements.txt CHANGED
@@ -1,2 +1,291 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.7.0
2
+ aiofiles==24.1.0
3
+ aiohappyeyeballs==2.6.1
4
+ aiohttp==3.12.9
5
+ aioitertools==0.12.0
6
+ aiosignal==1.3.2
7
+ aiosqlite==0.21.0
8
+ alembic==1.16.1
9
+ annotated-types==0.7.0
10
+ antlr4-python3-runtime==4.9.3
11
+ anyio==4.9.0
12
+ argon2-cffi==25.1.0
13
+ argon2-cffi-bindings==21.2.0
14
+ arize-phoenix==10.11.0
15
+ arize-phoenix-client==1.10.0
16
+ arize-phoenix-evals==0.20.8
17
+ arize-phoenix-otel==0.10.3
18
+ arrow==1.3.0
19
+ asttokens==3.0.0
20
+ async-lru==2.0.5
21
+ attrs==25.3.0
22
+ authlib==1.6.0
23
+ babel==2.17.0
24
+ backoff==2.2.1
25
+ beautifulsoup4==4.13.4
26
+ bleach==6.2.0
27
+ cachetools==6.0.0
28
+ certifi==2025.4.26
29
+ cffi==1.17.1
30
+ chardet==5.2.0
31
+ charset-normalizer==3.4.2
32
+ click==8.2.1
33
+ colorama==0.4.6
34
+ coloredlogs==15.0.1
35
+ comm==0.2.2
36
+ contourpy==1.3.2
37
+ cryptography==45.0.4
38
+ cycler==0.12.1
39
+ dataclasses-json==0.6.7
40
+ debugpy==1.8.14
41
+ decorator==5.2.1
42
+ defusedxml==0.7.1
43
+ deprecated==1.2.18
44
+ distro==1.9.0
45
+ dnspython==2.7.0
46
+ duckduckgo-search==8.0.2
47
+ effdet==0.4.1
48
+ email-validator==2.2.0
49
+ emoji==2.14.1
50
+ et-xmlfile==2.0.0
51
+ executing==2.2.0
52
+ fastapi==0.115.12
53
+ fastjsonschema==2.21.1
54
+ ffmpy==0.6.0
55
+ filelock==3.18.0
56
+ filetype==1.2.0
57
+ flatbuffers==25.2.10
58
+ fonttools==4.58.2
59
+ fqdn==1.5.1
60
+ frozenlist==1.6.2
61
+ fsspec==2025.5.1
62
+ google-api-core==1.16.0
63
+ google-auth==1.6.3
64
+ google-cloud-vision==1.0.0
65
+ googleapis-common-protos==1.70.0
66
+ gradio==5.33.0
67
+ gradio-client==1.10.2
68
+ graphql-core==3.2.6
69
+ greenlet==3.2.2
70
+ groovy==0.1.2
71
+ grpc-interceptor==0.15.4
72
+ grpcio==1.73.0
73
+ h11==0.16.0
74
+ helium==5.1.1
75
+ html5lib==1.1
76
+ httpcore==1.0.9
77
+ httpx==0.28.1
78
+ httpx-sse==0.4.0
79
+ huggingface-hub==0.32.4
80
+ humanfriendly==10.0
81
+ idna==3.10
82
+ importlib-metadata==8.7.0
83
+ ipykernel==6.29.5
84
+ ipython==9.3.0
85
+ ipython-pygments-lexers==1.1.1
86
+ ipywidgets==8.1.7
87
+ isoduration==20.11.0
88
+ itsdangerous==2.2.0
89
+ jedi==0.19.2
90
+ jinja2==3.1.6
91
+ jiter==0.10.0
92
+ joblib==1.5.1
93
+ json5==0.12.0
94
+ jsonpatch==1.33
95
+ jsonpointer==3.0.0
96
+ jsonschema==4.24.0
97
+ jsonschema-specifications==2025.4.1
98
+ jupyter==1.1.1
99
+ jupyter-client==8.6.3
100
+ jupyter-console==6.6.3
101
+ jupyter-core==5.8.1
102
+ jupyter-events==0.12.0
103
+ jupyter-lsp==2.2.5
104
+ jupyter-server==2.16.0
105
+ jupyter-server-terminals==0.5.3
106
+ jupyterlab==4.4.3
107
+ jupyterlab-pygments==0.3.0
108
+ jupyterlab-server==2.27.3
109
+ jupyterlab-widgets==3.0.15
110
+ kiwisolver==1.4.8
111
+ langchain==0.3.25
112
+ langchain-community==0.3.24
113
+ langchain-core==0.3.63
114
+ langchain-text-splitters==0.3.8
115
+ langdetect==1.0.9
116
+ langsmith==0.3.45
117
+ litellm==1.72.1
118
+ llvmlite==0.44.0
119
+ lxml==5.4.0
120
+ mako==1.3.10
121
+ markdown==3.8
122
+ markdown-it-py==3.0.0
123
+ markdownify==1.1.0
124
+ markupsafe==3.0.2
125
+ marshmallow==3.26.1
126
+ matplotlib==3.10.3
127
+ matplotlib-inline==0.1.7
128
+ mdurl==0.1.2
129
+ mistune==3.1.3
130
+ more-itertools==10.7.0
131
+ mpmath==1.3.0
132
+ multidict==6.4.4
133
+ mypy-extensions==1.1.0
134
+ nbclient==0.10.2
135
+ nbconvert==7.16.6
136
+ nbformat==5.10.4
137
+ nest-asyncio==1.6.0
138
+ networkx==3.5
139
+ nltk==3.9.1
140
+ notebook==7.4.3
141
+ notebook-shim==0.2.4
142
+ numba==0.61.2
143
+ numpy==2.2.6
144
+ olefile==0.47
145
+ omegaconf==2.3.0
146
+ onnx==1.18.0
147
+ onnxruntime==1.22.0
148
+ openai==1.84.0
149
+ openai-whisper==20240930
150
+ opencv-python==4.11.0.86
151
+ openinference-instrumentation==0.1.33
152
+ openinference-instrumentation-litellm==0.1.22
153
+ openinference-instrumentation-smolagents==0.1.13
154
+ openinference-semantic-conventions==0.1.20
155
+ openpyxl==3.1.5
156
+ opentelemetry-api==1.34.1
157
+ opentelemetry-exporter-otlp==1.34.1
158
+ opentelemetry-exporter-otlp-proto-common==1.34.1
159
+ opentelemetry-exporter-otlp-proto-grpc==1.34.1
160
+ opentelemetry-exporter-otlp-proto-http==1.34.1
161
+ opentelemetry-instrumentation==0.55b1
162
+ opentelemetry-proto==1.34.1
163
+ opentelemetry-sdk==1.34.1
164
+ opentelemetry-semantic-conventions==0.55b1
165
+ orjson==3.10.18
166
+ outcome==1.3.0.post0
167
+ overrides==7.7.0
168
+ packaging==24.2
169
+ pandas==2.3.0
170
+ pandocfilters==1.5.1
171
+ parso==0.8.4
172
+ pdf2image==1.17.0
173
+ pdfminer-six==20250506
174
+ pi-heif==0.22.0
175
+ pikepdf==9.8.1
176
+ pillow==11.2.1
177
+ platformdirs==4.3.8
178
+ polars==1.30.0
179
+ primp==0.15.0
180
+ prometheus-client==0.22.1
181
+ prompt-toolkit==3.0.51
182
+ propcache==0.3.1
183
+ protobuf==5.29.5
184
+ psutil==7.0.0
185
+ pure-eval==0.2.3
186
+ pyarrow==20.0.0
187
+ pyasn1==0.6.1
188
+ pyasn1-modules==0.4.2
189
+ pycocotools==2.0.10
190
+ pycparser==2.22
191
+ pydantic==2.11.5
192
+ pydantic-core==2.33.2
193
+ pydantic-settings==2.9.1
194
+ pydub==0.25.1
195
+ pygments==2.19.1
196
+ pypandoc==1.15
197
+ pyparsing==3.2.3
198
+ pypdf==5.6.0
199
+ pypdfium2==4.30.1
200
+ pyreadline3==3.5.4
201
+ pysocks==1.7.1
202
+ python-dateutil==2.9.0.post0
203
+ python-docx==1.1.2
204
+ python-dotenv==1.1.0
205
+ python-iso639==2025.2.18
206
+ python-json-logger==3.3.0
207
+ python-magic==0.4.27
208
+ python-multipart==0.0.20
209
+ python-oxmsg==0.0.2
210
+ python-pptx==1.0.2
211
+ pytz==2025.2
212
+ pywin32==310
213
+ pywinpty==2.0.15
214
+ pyyaml==6.0.2
215
+ pyzmq==26.4.0
216
+ rapidfuzz==3.13.0
217
+ referencing==0.36.2
218
+ regex==2024.11.6
219
+ requests==2.32.3
220
+ requests-toolbelt==1.0.0
221
+ rfc3339-validator==0.1.4
222
+ rfc3986-validator==0.1.1
223
+ rich==14.0.0
224
+ rpds-py==0.25.1
225
+ rsa==4.9.1
226
+ ruff==0.11.12
227
+ safehttpx==0.1.6
228
+ safetensors==0.5.3
229
+ scikit-learn==1.7.0
230
+ scipy==1.15.3
231
+ selenium==4.33.0
232
+ semantic-version==2.10.0
233
+ send2trash==1.8.3
234
+ setuptools==80.9.0
235
+ shellingham==1.5.4
236
+ six==1.17.0
237
+ smolagents==1.17.0
238
+ sniffio==1.3.1
239
+ sortedcontainers==2.4.0
240
+ soupsieve==2.7
241
+ sqlalchemy==2.0.41
242
+ sqlean-py==3.49.1
243
+ stack-data==0.6.3
244
+ starlette==0.46.2
245
+ strawberry-graphql==0.270.1
246
+ sympy==1.14.0
247
+ tenacity==9.1.2
248
+ terminado==0.18.1
249
+ threadpoolctl==3.6.0
250
+ tiktoken==0.9.0
251
+ timm==1.0.15
252
+ tinycss2==1.4.0
253
+ tokenizers==0.21.1
254
+ tomlkit==0.13.2
255
+ torch==2.7.1
256
+ torchvision==0.22.1
257
+ tornado==6.5.1
258
+ tqdm==4.67.1
259
+ traitlets==5.14.3
260
+ transformers==4.52.4
261
+ trio==0.30.0
262
+ trio-websocket==0.12.2
263
+ typer==0.16.0
264
+ types-python-dateutil==2.9.0.20250516
265
+ typing-extensions==4.13.2
266
+ typing-inspect==0.9.0
267
+ typing-inspection==0.4.1
268
+ tzdata==2025.2
269
+ unstructured==0.17.2
270
+ unstructured-client==0.36.0
271
+ unstructured-inference==1.0.5
272
+ unstructured-pytesseract==0.3.15
273
+ uri-template==1.3.0
274
+ urllib3==2.4.0
275
+ uvicorn==0.34.3
276
+ wcwidth==0.2.13
277
+ webcolors==24.11.1
278
+ webencodings==0.5.1
279
+ websocket-client==1.8.0
280
+ websockets==15.0.1
281
+ whisper==1.1.10
282
+ widgetsnbextension==4.0.14
283
+ wikipedia==1.4.0
284
+ wrapt==1.17.2
285
+ wsproto==1.2.0
286
+ xlrd==2.0.1
287
+ xlsxwriter==3.2.3
288
+ yarl==1.20.0
289
+ yt-dlp==2025.6.9
290
+ zipp==3.22.0
291
+ zstandard==0.23.0
temp.ipynb ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 10,
6
+ "id": "8473efba",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "from helium import *\n",
11
+ "from selenium import webdriver\n",
12
+ "from selenium.webdriver.common.by import By\n",
13
+ "from selenium.webdriver.common.keys import Keys\n",
14
+ "from markdownify import markdownify as md\n",
15
+ "import urllib\n",
16
+ "import requests\n",
17
+ "from unstructured.partition.auto import partition\n",
18
+ "import io\n",
19
+ "import whisper\n",
20
+ "from pprint import pprint\n",
21
+ "audio_model = whisper.load_model(\"turbo\")"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 6,
27
+ "id": "a03c6324",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "chrome_options = webdriver.ChromeOptions()\n",
32
+ "chrome_options.add_argument(\"--force-device-scale-factor=1\")\n",
33
+ "# chrome_options.add_argument(\"--window-size=1000,1350\")\n",
34
+ "# chrome_options.add_argument(\"--disable-pdf-viewer\")\n",
35
+ "chrome_options.add_argument(\"--window-position=0,0\")\n",
36
+ "\n",
37
+ "# Initialize the browser\n",
38
+ "driver = helium.start_chrome(headless=False, options=chrome_options)"
39
+ ]
40
+ },
41
+ {
42
+ "cell_type": "code",
43
+ "execution_count": null,
44
+ "id": "3088cb0a",
45
+ "metadata": {},
46
+ "outputs": [],
47
+ "source": [
48
+ "def web_search(query: str) -> str:\n",
49
+ " query = urllib.parse.quote(query)\n",
50
+ " go_to(f\"https://duckduckgo.com/?q={query}&ia=web\")\n",
51
+ " return md(driver.page_source)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 8,
57
+ "id": "9b8c0eb9",
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/plain": [
63
+ "'What is the capital of France? at DuckDuckGo\\n\\n[DuckDuckGo](/)\\n\\nShortcuts to other sites to search off DuckDuckGo[Learn More](/bang)\\n\\nOpen menu\\n\\n* [All](/?q=What+is+the+capital+of+France%3F&ia=web)\\n* [Images](/?q=What+is+the+capital+of+France%3F&ia=images&iax=images)\\n* [Videos](/?q=What+is+the+capital+of+France%3F&ia=videos&iax=videos)\\n* [Q&A](/?q=What+is+the+capital+of+France%3F&ia=qa)\\n* More\\n\\n [News](/?q=What+is+the+capital+of+France%3F&ia=news&iar=news)\\n\\n [Maps](/?q=What+is+the+capital+of+France%3F&iaxm=maps)\\n\\n* [Assist](/?q=What+is+the+capital+of+France%3F&ia=web&assist=false)\\n* [Duck.ai](/?q=What+is+the+capital+of+France%3F&ia=chat)\\n* Search Settings\\n\\nYou are being redirected to the non-JavaScript site.\\n\\nClick [here](/html/?q=What%20is%20the%20capital%20of%20France%3F) if it doesn\\'t happen automatically.\\n\\nAlways protected\\n\\nDuckDuckGo never tracks your searches.\\n\\n[Learn More](https://duckduckgo.com/duckduckgo-help-pages/search-privacy/)\\n\\nYou can hide this reminder in [Search Settings](/settings#appearance)\\n\\nIndia (en)\\n\\nRecent:\\n\\nClear All\\n\\nIndia (en)\\n\\nAll regions\\n\\nArgentina\\n\\nAustralia\\n\\nAustria\\n\\nBelgium (fr)\\n\\nBelgium (nl)\\n\\nBrazil\\n\\nBulgaria\\n\\nCanada (en)\\n\\nCanada (fr)\\n\\nCatalonia\\n\\nChile\\n\\nChina\\n\\nColombia\\n\\nCroatia\\n\\nCzechia\\n\\nDenmark\\n\\nEstonia\\n\\nFinland\\n\\nFrance\\n\\nGermany\\n\\nGreece\\n\\nHong Kong\\n\\nHungary\\n\\nIceland\\n\\nIndonesia (en)\\n\\nIreland\\n\\nIsrael (en)\\n\\nItaly\\n\\nJapan\\n\\nKorea\\n\\nLatvia\\n\\nLithuania\\n\\nMalaysia (en)\\n\\nMexico\\n\\nNetherlands\\n\\nNew Zealand\\n\\nNorway\\n\\nPakistan (en)\\n\\nPeru\\n\\nPhilippines (en)\\n\\nPoland\\n\\nPortugal\\n\\nRomania\\n\\nRussia\\n\\nSaudi Arabia\\n\\nSingapore\\n\\nSlovakia\\n\\nSlovenia\\n\\nSouth Africa\\n\\nSpain (ca)\\n\\nSpain (es)\\n\\nSweden\\n\\nSwitzerland (de)\\n\\nSwitzerland (fr)\\n\\nTaiwan\\n\\nThailand (en)\\n\\nTurkey\\n\\nUkraine\\n\\nUnited Kingdom\\n\\nUS (English)\\n\\nUS (Spanish)\\n\\nVietnam (en)\\n\\nSafe search: moderate\\n\\nStrict\\n\\nModerate\\n\\nOff\\n\\nAny time\\n\\nAny time\\n\\nPast day\\n\\nPast week\\n\\nPast month\\n\\nPast year\\n\\nCustom date range\\n\\n1. Assist\\n\\n The capital of France is Paris. It is the largest city in the country and a major center for culture, finance, and diplomacy.\\n\\n [![](/assets/icons/favicons/wikipedia.white.png) Wikipedia](https://en.wikipedia.org/wiki/Paris)[![](//external-content.duckduckgo.com/ip3/www.britannica.com.ico) Encyclopedia Britannica](https://www.britannica.com/video/video-production-overview-city-Paris-Encyclopaedia-Britannica-1994/-68351)\\n\\n Auto-generated based on listed sources. May contain inaccuracies.\\n\\n ShowNeverSometimesOften\\n\\n Chat\\n\\n Was this helpful?\\n2. en.wikipedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Aen.wikipedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Aen.wikipedia.org)\\n\\n Share feedback about this site\\n\\n [![](/assets/icons/favicons/wikipedia.white.png)](/?q=What%20is%20the%20capital%20of%20France%3F+site:en.wikipedia.org \"Search domain en.wikipedia.org\")\\n\\n Wikipedia\\n\\n [https://en.wikipedia.org\\xa0β€Ί\\xa0wiki β€Ί Paris](https://en.wikipedia.org/wiki/Paris)\\n\\n [Paris - Wikipedia](https://en.wikipedia.org/wiki/Paris)\\n --------------------------------------------------------\\n\\n Paris (French pronunciation: [paʁi] **β“˜)** **is** **the** **capital** and largest city of **France**. With an estimated population of 2,048,472 residents in January 2025 [3] in an area of more than 105 km 2 (41 sq mi), [4] Paris is the fourth-most populous city in the European Union and the 30th most densely populated city in the world in 2022. [5] Since the 17th century, Paris has been one of the world\\'s ...\\n3. britannica.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.britannica.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.britannica.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.britannica.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.britannica.com \"Search domain britannica.com\")\\n\\n Britannica\\n\\n [https://www.britannica.com\\xa0β€Ί\\xa0place β€Ί Paris](https://www.britannica.com/place/Paris)\\n\\n [Paris | Definition, Map, Population, Facts, & History | Britannica](https://www.britannica.com/place/Paris)\\n ------------------------------------------------------------------------------------------------------------\\n\\n Jun 5, 2025Paris, city and **capital** **of** **France**, located along the Seine River, in the north-central part of the country. Paris is one of the world\\'s most important and attractive cities, famed for its gastronomy, haute couture, painting, literature, and intellectual community. Learn more about Paris in this article.\\n4. worldatlas.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.worldatlas.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.worldatlas.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.worldatlas.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.worldatlas.com \"Search domain worldatlas.com\")\\n\\n WorldAtlas\\n\\n [https://www.worldatlas.com\\xa0β€Ί\\xa0articles β€Ί what-is-the-capital-of-france.html](https://www.worldatlas.com/articles/what-is-the-capital-of-france.html)\\n\\n [What is the Capital of France? - WorldAtlas](https://www.worldatlas.com/articles/what-is-the-capital-of-france.html)\\n ---------------------------------------------------------------------------------------------------------------------\\n\\n Learn about Paris, the largest and most populous city in **France**, and its history, geography, economy, tourism, and administration. Find out why Paris is called the City of Light and the City of Love.\\n5. mappr.co\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.mappr.co)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.mappr.co)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.mappr.co.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.mappr.co \"Search domain mappr.co\")\\n\\n Mappr\\n\\n [https://www.mappr.co\\xa0β€Ί\\xa0capital-cities β€Ί france](https://www.mappr.co/capital-cities/france/)\\n\\n [What is the Capital of France? - Mappr](https://www.mappr.co/capital-cities/france/)\\n -------------------------------------------------------------------------------------\\n\\n Learn why Paris is the **capital** **of** **France** and how it became a global city with a rich cultural heritage. Discover its geography, climate, population, landmarks, and industries.\\n6. wikiwand.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.wikiwand.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.wikiwand.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.wikiwand.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.wikiwand.com \"Search domain wikiwand.com\")\\n\\n Wikiwand\\n\\n [https://www.wikiwand.com\\xa0β€Ί\\xa0en β€Ί articles β€Ί Paris](https://www.wikiwand.com/en/articles/Paris)\\n\\n [Paris - Wikiwand](https://www.wikiwand.com/en/articles/Paris)\\n --------------------------------------------------------------\\n\\n Paris is the **capital** and largest city of **France**. With an estimated population of 2,048,472 residents in January 2025 in an area of more than 105 km2 (41 sq mi),...\\n7. theworldcountries.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Atheworldcountries.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Atheworldcountries.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/theworldcountries.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:theworldcountries.com \"Search domain theworldcountries.com\")\\n\\n The World Countries\\n\\n [https://theworldcountries.com\\xa0β€Ί\\xa0place β€Ί paris](https://theworldcountries.com/place/paris/)\\n\\n [Paris - capital city of France - The World Countries](https://theworldcountries.com/place/paris/)\\n --------------------------------------------------------------------------------------------------\\n\\n Learn about the history, geography, culture, and attractions of Paris, the city and **capital** **of** **France**. Find out why Paris is called the City of Light and explore its landmarks, museums, and parks.\\n8. simple.wikipedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Asimple.wikipedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Asimple.wikipedia.org)\\n\\n Share feedback about this site\\n\\n [![](/assets/icons/favicons/wikipedia.white.png)](/?q=What%20is%20the%20capital%20of%20France%3F+site:simple.wikipedia.org \"Search domain simple.wikipedia.org\")\\n\\n Wikipedia\\n\\n [https://simple.wikipedia.org\\xa0β€Ί\\xa0wiki β€Ί Paris](https://simple.wikipedia.org/wiki/Paris)\\n\\n [Paris - Simple English Wikipedia, the free encyclopedia](https://simple.wikipedia.org/wiki/Paris)\\n --------------------------------------------------------------------------------------------------\\n\\n Paris is the **capital** city of **France** and the largest city in **France**. It has a rich history, many art museums, historical buildings, and a famous landmark, the Eiffel Tower.\\n9. countryaah.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.countryaah.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.countryaah.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.countryaah.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.countryaah.com \"Search domain countryaah.com\")\\n\\n Countryaah.com\\n\\n [https://www.countryaah.com\\xa0β€Ί\\xa0france-faqs](https://www.countryaah.com/france-faqs/)\\n\\n [What is the Capital of France? Paris - Countryaah.com](https://www.countryaah.com/france-faqs/)\\n ------------------------------------------------------------------------------------------------\\n\\n Learn about Paris, the **capital** city of **France**, and its rich history, culture, and landmarks. Find out how Paris became the political and administrative center of **France** and why it is called \"**The** City of Light\".\\n10. newworldencyclopedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.newworldencyclopedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.newworldencyclopedia.org)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/www.newworldencyclopedia.org.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.newworldencyclopedia.org \"Search domain newworldencyclopedia.org\")\\n\\n New World Encyclopedia\\n\\n [https://www.newworldencyclopedia.org\\xa0β€Ί\\xa0entry β€Ί Paris,\\\\_France](https://www.newworldencyclopedia.org/entry/Paris,_France)\\n\\n [Paris, France - New World Encyclopedia](https://www.newworldencyclopedia.org/entry/Paris,_France)\\n --------------------------------------------------------------------------------------------------\\n\\n Paris is the **capital** city of **France**, situated on the River Seine, in northern **France**, at the heart of the Île-de-**France** region. Learn about its history, culture, landmarks, and attractions in this comprehensive article.\\n11. isolatedtraveller.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Aisolatedtraveller.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Aisolatedtraveller.com)\\n\\n Share feedback about this site\\n\\n [![](//external-content.duckduckgo.com/ip3/isolatedtraveller.com.ico)](/?q=What%20is%20the%20capital%20of%20France%3F+site:isolatedtraveller.com \"Search domain isolatedtraveller.com\")\\n\\n Isolated Traveller\\n\\n [https://isolatedtraveller.com\\xa0β€Ί\\xa0what-is-the-capital-city-of-france](https://isolatedtraveller.com/what-is-the-capital-city-of-france/)\\n\\n [What Is The Capital City Of France? | Isolated Traveller](https://isolatedtraveller.com/what-is-the-capital-city-of-france/)\\n -----------------------------------------------------------------------------------------------------------------------------\\n\\n Learn about the history, population, landmarks and role of Paris as the **capital** city of **France**. Find out how Paris became the French **capital** in 1944 and what international organizations have their headquarters there.\\n\\nMore results\\n\\n1. ![map](//external-content.duckduckgo.com/ssv2/?scale=1&lang=en-US&colorScheme=dark&format=png&size=253x157&spn=0.0899%2C0.1367&center=48.8567%2C2.3522&annotations=%5B%7B%22point%22%3A%2248.8567%2C2.3522%22%2C%22color%22%3A%2266ABFF%22%7D%5D)\\n\\n Directions[Paris\\n -----](https://en.wikipedia.org/wiki/Paris)\\n\\n Capital city and largest city of France\\n\\n β€’ [paris.fr](https://paris.fr)\\n\\n Paris is the capital and largest city of France. With an estimated population of 2,048,472 residents in January 2025 in an area of more than 105 kmΒ², Paris is the fourth-most populous city in the European Union and the 30th most densely populated city in the world in 2022. Since the 17th century, Paris has been one of the world\\'s major centres of finance, diplomacy, commerce, culture, fashion, and gastronomy. Because of its leading role in the arts and sciences and its early adaptation of extensive street lighting, Paris became known as the City of Light in the 19th century. The City of Paris is the centre of the Île-de-France region, or Paris Region, with an official estimated population of 12,271,794 inhabitants in January 2023, or about 19% of the population of France. The Paris Region had a nominal GDP of €765 billion in 2021, the highest in the European Union. [Wikipedia](https://en.wikipedia.org/wiki/Paris)\\n\\n | | |\\n | --- | --- |\\n | Country | France |\\n | Arrondissement | None |\\n | Intercommunality | MΓ©tropole du Grand Paris |\\n\\n [![](/assets/icons/thirdparty/globe_dark.svg)Website](https://paris.fr)[![](/assets/icons/thirdparty/wikipedia28px_dark.svg)Wikipedia](https://en.wikipedia.org/wiki/Paris)[![](/assets/icons/thirdparty/instagram28px.svg)Instagram](https://instagram.com/paris_maville)[![](/assets/icons/thirdparty/facebook28px.svg)Facebook](https://facebook.com/paris)\\n\\n Was this helpful?\\n2. Searches related to **What is the capital of France?**\\n\\n Related Searches\\n\\n 1. [**explain** capital of france\\u200b](?q=explain%20capital%20of%20france)\\n 2. [capital of france **during** **ww2**\\u200b](?q=capital%20of%20france%20during%20ww2)\\n 3. [capital **city** of france **facts**\\u200b](?q=capital%20city%20of%20france%20facts)\\n 4. [**biggest** **city** **in** france capital\\u200b](?q=biggest%20city%20in%20france%20capital)\\n 1. [what is france\\'**s** capital **city**\\u200b](?q=what%20is%20france%27s%20capital%20city)\\n 2. [what **region** is **paris** **located**\\u200b](?q=what%20region%20is%20paris%20located)\\n 3. [**largest** **city** **in** france capital\\u200b](?q=largest%20city%20in%20france%20capital)\\n 4. [capital of france **in** **french**\\u200b](?q=capital%20of%20france%20in%20french)\\n\\nClose menu\\n\\nUpgrade to our Private Browser\\n\\nFast. Secure. Free.\\n\\n[Install Windows Browser](/windows)\\n\\n+ Search\\n+ [Homepage](https://start.duckduckgo.com/)\\n+ [Themes](/settings#appearance)\\n+ [Settings](/settings)\\n\\n+ Share Feedback\\n\\n+ Downloads\\n+ [iOS Browser](https://apps.apple.com/app/duckduckgo-private-browser/id663592361?platform=iphone&pt=866401&mt=8&ct=serp-atb-serp)\\n+ [Android Browser](https://play.google.com/store/apps/details?id=com.duckduckgo.mobile.android&referrer=utm_campaign%3Dserp-atb-serp%26origin%3Dfunnel_playstore_searchresults)\\n+ [Mac Browser](/mac?origin=funnel_browser_searchresults)\\n+ [Windows Browser](/windows?origin=funnel_browser_searchresults)\\n+ [Browser Extensions](/duckduckgo-help-pages/desktop/adding-duckduckgo-to-your-browser/)\\n\\n+ More From DuckDuckGo\\n+ [Duck.ai](https://duck.ai)\\n\\n NEW\\n+ [Email Protection](/email)\\n+ [Newsletter](/newsletter)\\n+ [Blog](/blog)\\n\\n+ Learn More\\n+ [What’s New](/updates)\\n+ [Compare Privacy](/compare-privacy)\\n+ [About Our Browser](/app)\\n+ [About DuckDuckGo](/about)\\n\\n+ Other Resources\\n+ [Help](/duckduckgo-help-pages)\\n+ [Community](https://www.reddit.com/r/duckduckgo/)\\n+ [Careers](/careers)\\n+ [Privacy Policy](/privacy)\\n+ [Terms of Service](/terms)\\n+ [Press Kit](/press)\\n+ [Advertise on Search](/duckduckgo-help-pages/company/advertise-on-duckduckgo-search)\\n\\n### Get Our Windows Browser\\n\\nProtect your data as you search and browse.\\n\\n[Download](https://duckduckgo.com/windows?origin=funnel_browser_searchresults__footercard)\\n\\nShare Feedback\\n\\nCustom date rangeX'"
64
+ ]
65
+ },
66
+ "execution_count": 8,
67
+ "metadata": {},
68
+ "output_type": "execute_result"
69
+ }
70
+ ],
71
+ "source": [
72
+ "web_search(\"What is the capital of France?\")"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": null,
78
+ "id": "7d282ad1",
79
+ "metadata": {},
80
+ "outputs": [],
81
+ "source": [
82
+ "import yt_dlp\n",
83
+ "def transcribe_youtube_video(video_url: str) -> str:\n",
84
+ " \"\"\"Transcribe a YouTube video using yt-dlp and Whisper.\"\"\"\n",
85
+ " ydl_opts = {'format': 'm4a/bestaudio/best',\n",
86
+ " 'outtmpl': 'audio.m4a',\n",
87
+ " 'key': 'FFmpegExtractAudio',\n",
88
+ " 'preferredcodec': 'm4a',\n",
89
+ " }\n",
90
+ " with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
91
+ " info = ydl.extract_info(video_url)\n",
92
+ " captions = info.get(\"automatic_captions\", {})\n",
93
+ " if \"en\" in captions:\n",
94
+ " captions = captions[\"en\"]\n",
95
+ " for caption in captions:\n",
96
+ " if caption.get(\"ext\", \"\") == \"srt\":\n",
97
+ " url = caption.get(\"url\", \"\")\n",
98
+ " return requests.get(url).text\n",
99
+ "\n",
100
+ " ydl.download(video_url)\n",
101
+ "\n",
102
+ " transcript = audio_model.transcribe(\"audio.m4a\")\n",
103
+ " return transcript[\"text\"]"
104
+ ]
105
+ },
106
+ {
107
+ "cell_type": "code",
108
+ "execution_count": 24,
109
+ "id": "4304e783",
110
+ "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "name": "stdout",
114
+ "output_type": "stream",
115
+ "text": [
116
+ "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
117
+ "[youtube] 1htKBjuUWec: Downloading webpage\n",
118
+ "[youtube] 1htKBjuUWec: Downloading tv client config\n",
119
+ "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
120
+ "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
121
+ "[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
122
+ "[info] 1htKBjuUWec: Downloading 1 format(s): 140\n",
123
+ "[download] audio.m4a has already been downloaded\n",
124
+ "[download] 100% of 463.20KiB\n",
125
+ "[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
126
+ "[youtube] 1htKBjuUWec: Downloading webpage\n",
127
+ "[youtube] 1htKBjuUWec: Downloading tv client config\n",
128
+ "[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
129
+ "[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
130
+ "[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
131
+ "[info] 1htKBjuUWec: Downloading 1 format(s): 140\n",
132
+ "[download] audio.m4a has already been downloaded\n",
133
+ "[download] 100% of 463.20KiB\n"
134
+ ]
135
+ },
136
+ {
137
+ "name": "stderr",
138
+ "output_type": "stream",
139
+ "text": [
140
+ "x:\\Python Projects\\HF_Agents_Course_Final_Assignment\\.venv\\Lib\\site-packages\\whisper\\transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
141
+ " warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
142
+ ]
143
+ },
144
+ {
145
+ "data": {
146
+ "text/plain": [
147
+ "\" Wow, this coffee's great. I was just thinking that. Yeah, is that cinnamon? It's chicory. Chicory. Teal'c? Isn't that hot? Extremely.\""
148
+ ]
149
+ },
150
+ "execution_count": 24,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "transcribe_youtube_video(\"https://www.youtube.com/watch?v=1htKBjuUWec\")"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "id": "d650487e",
163
+ "metadata": {},
164
+ "outputs": [
165
+ {
166
+ "name": "stderr",
167
+ "output_type": "stream",
168
+ "text": [
169
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.51G/1.51G [13:03<00:00, 2.07MiB/s]\n"
170
+ ]
171
+ }
172
+ ],
173
+ "source": [
174
+ "def parse_doc_file(file_url: str) -> str:\n",
175
+ " \"\"\"\n",
176
+ " Parse any file and return its content.\n",
177
+ " \"\"\"\n",
178
+ " try:\n",
179
+ " response = requests.get(file_url)\n",
180
+ " response.raise_for_status()\n",
181
+ " elements = partition(file=io.BytesIO(response.content), include_page_breaks=True)\n",
182
+ " return \"\\n\\n\".join([str(el) for el in elements])\n",
183
+ " except Exception as e:\n",
184
+ " return f\"Failed to fetch file: {e}\"\n",
185
+ "\n",
186
+ "\n",
187
+ "def parse_audio_file(file_url: str) -> str:\n",
188
+ " \"\"\"\n",
189
+ " Parse an audio file and return its content.\n",
190
+ " \"\"\"\n",
191
+ " try:\n",
192
+ " response = requests.get(file_url)\n",
193
+ " response.raise_for_status()\n",
194
+ " return audio_model.transcribe(io.BytesIO(response.content))['text']\n",
195
+ " except Exception as e:\n",
196
+ " return f\"Failed to fetch file: {e}\""
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "code",
201
+ "execution_count": 3,
202
+ "id": "4000380f",
203
+ "metadata": {},
204
+ "outputs": [
205
+ {
206
+ "ename": "NameError",
207
+ "evalue": "name 'parse_audio_file' is not defined",
208
+ "output_type": "error",
209
+ "traceback": [
210
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
211
+ "\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
212
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mparse_audio_file\u001b[49m(\u001b[33m\"\u001b[39m\u001b[33mhttps://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3\u001b[39m\u001b[33m\"\u001b[39m)\n",
213
+ "\u001b[31mNameError\u001b[39m: name 'parse_audio_file' is not defined"
214
+ ]
215
+ }
216
+ ],
217
+ "source": [
218
+ "parse_audio_file(\"https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3\")"
219
+ ]
220
+ },
221
+ {
222
+ "cell_type": "code",
223
+ "execution_count": null,
224
+ "id": "9670ed13",
225
+ "metadata": {},
226
+ "outputs": [
227
+ {
228
+ "data": {
229
+ "text/plain": [
230
+ "161996"
231
+ ]
232
+ },
233
+ "execution_count": 15,
234
+ "metadata": {},
235
+ "output_type": "execute_result"
236
+ }
237
+ ],
238
+ "source": [
239
+ "len(text)"
240
+ ]
241
+ },
242
+ {
243
+ "cell_type": "code",
244
+ "execution_count": null,
245
+ "id": "c8f56eff",
246
+ "metadata": {},
247
+ "outputs": [
248
+ {
249
+ "name": "stdout",
250
+ "output_type": "stream",
251
+ "text": [
252
+ "115 \n",
253
+ "116 Toggle Discography subsection\n",
254
+ "117 + [5.1\n",
255
+ "118 Studio albums](#Studio_albums)\n",
256
+ "119 + [5.2\n",
257
+ "120 EPs](#EPs)\n",
258
+ "121 + [5.3\n",
259
+ "122 Live albums](#Live_albums)\n",
260
+ "123 + [5.4\n",
261
+ "124 Compilation albums](#Compilation_albums)\n",
262
+ "125 * [6\n",
263
+ "126 Filmography](#Filmography)\n",
264
+ "127 * [7\n",
265
+ "128 Further reading](#Further_reading)\n",
266
+ "\n",
267
+ "341 \n",
268
+ "342 Sosa recorded forty albums.[[4]](#cite_note-Legendary_folk_singer_Mercedes_Sosa_dies_at_74-4)[[9]](#cite_note-Latin_artist_Mercedes_Sosa_dies-9)\n",
269
+ "343 \n",
270
+ "344 ### Studio albums\n",
271
+ "345 \n",
272
+ "346 [[edit](/w/index.php?title=Mercedes_Sosa&action=edit&section=6 \"Edit section: Studio albums\")]\n",
273
+ "347 \n",
274
+ "348 | Year | Album details |\n",
275
+ "349 | --- | --- |\n",
276
+ "350 | 1962 | [La Voz De La Zafra](/wiki/La_Voz_De_La_Zafra \"La Voz De La Zafra\") * Label: RCA |\n",
277
+ "351 | 1965 | Canciones Con Fundamento * Label: El Grillo |\n",
278
+ "352 | 1966 | Hermano * Label: Philips |\n",
279
+ "353 | 1966 | Yo No Canto Por Cantar * Label: Philips |\n",
280
+ "354 | 1967 | Para Cantarle A Mi Gente * Label: Philips |\n",
281
+ "\n",
282
+ "343 \n",
283
+ "344 ### Studio albums\n",
284
+ "345 \n",
285
+ "346 [[edit](/w/index.php?title=Mercedes_Sosa&action=edit&section=6 \"Edit section: Studio albums\")]\n",
286
+ "347 \n",
287
+ "348 | Year | Album details |\n",
288
+ "349 | --- | --- |\n",
289
+ "350 | 1962 | [La Voz De La Zafra](/wiki/La_Voz_De_La_Zafra \"La Voz De La Zafra\") * Label: RCA |\n",
290
+ "351 | 1965 | Canciones Con Fundamento * Label: El Grillo |\n",
291
+ "352 | 1966 | Hermano * Label: Philips |\n",
292
+ "353 | 1966 | Yo No Canto Por Cantar * Label: Philips |\n",
293
+ "354 | 1967 | Para Cantarle A Mi Gente * Label: Philips |\n",
294
+ "355 | 1968 | Con Sabor A Mercedes Sosa * Label: Philips |\n",
295
+ "356 | 1969 | Mujeres Argentinas * Label: Philips |\n",
296
+ "\n"
297
+ ]
298
+ }
299
+ ],
300
+ "source": [
301
+ "import re\n",
302
+ "lines = text.splitlines()\n",
303
+ "for i, line in enumerate(lines):\n",
304
+ " if re.search(r'Studio albums', line):\n",
305
+ " print(i-3, lines[i-3])\n",
306
+ " print(i-2, lines[i-2])\n",
307
+ " print(i-1, lines[i-1])\n",
308
+ " print(i, line)\n",
309
+ " print(i+1, lines[i+1])\n",
310
+ " print(i+2, lines[i+2])\n",
311
+ " print(i+3, lines[i+3])\n",
312
+ " print(i+4, lines[i+4])\n",
313
+ " print(i+5, lines[i+5])\n",
314
+ " print(i+6, lines[i+6])\n",
315
+ " print(i+7, lines[i+7])\n",
316
+ " print(i+8, lines[i+8])\n",
317
+ " print(i+9, lines[i+9])\n",
318
+ " print(i+10, lines[i+10])\n",
319
+ " print()"
320
+ ]
321
+ },
322
+ {
323
+ "cell_type": "code",
324
+ "execution_count": null,
325
+ "id": "a904e623",
326
+ "metadata": {},
327
+ "outputs": [],
328
+ "source": []
329
+ }
330
+ ],
331
+ "metadata": {
332
+ "kernelspec": {
333
+ "display_name": ".venv",
334
+ "language": "python",
335
+ "name": "python3"
336
+ },
337
+ "language_info": {
338
+ "codemirror_mode": {
339
+ "name": "ipython",
340
+ "version": 3
341
+ },
342
+ "file_extension": ".py",
343
+ "mimetype": "text/x-python",
344
+ "name": "python",
345
+ "nbconvert_exporter": "python",
346
+ "pygments_lexer": "ipython3",
347
+ "version": "3.12.4"
348
+ }
349
+ },
350
+ "nbformat": 4,
351
+ "nbformat_minor": 5
352
+ }
uv.lock ADDED
The diff for this file is too large to render. See raw diff