Rounak Sen commited on
Commit Β·
19efa7d
1
Parent(s): 81917a3
created agent
Browse files- .gitignore +2 -0
- .python-version +1 -0
- app.py +395 -78
- pyproject.toml +32 -0
- requirements.txt +291 -2
- temp.ipynb +352 -0
- uv.lock +0 -0
.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
*.m4a
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
app.py
CHANGED
|
@@ -1,38 +1,314 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import requests
|
| 4 |
-
import inspect
|
| 5 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# (Keep Constants as is)
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
# --- Basic Agent Definition ---
|
| 12 |
-
# ----- THIS IS
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
"""
|
| 24 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 25 |
and displays the results.
|
| 26 |
"""
|
| 27 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 28 |
-
space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
|
| 29 |
|
| 30 |
-
if profile:
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
else:
|
| 34 |
-
|
| 35 |
-
|
|
|
|
| 36 |
|
| 37 |
api_url = DEFAULT_API_URL
|
| 38 |
questions_url = f"{api_url}/questions"
|
|
@@ -40,7 +316,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 40 |
|
| 41 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 42 |
try:
|
| 43 |
-
agent =
|
| 44 |
except Exception as e:
|
| 45 |
print(f"Error instantiating agent: {e}")
|
| 46 |
return f"Error initializing agent: {e}", None
|
|
@@ -55,16 +331,12 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 55 |
response.raise_for_status()
|
| 56 |
questions_data = response.json()
|
| 57 |
if not questions_data:
|
| 58 |
-
|
| 59 |
-
|
| 60 |
print(f"Fetched {len(questions_data)} questions.")
|
| 61 |
except requests.exceptions.RequestException as e:
|
| 62 |
print(f"Error fetching questions: {e}")
|
| 63 |
return f"Error fetching questions: {e}", None
|
| 64 |
-
except requests.exceptions.JSONDecodeError as e:
|
| 65 |
-
print(f"Error decoding JSON response from questions endpoint: {e}")
|
| 66 |
-
print(f"Response text: {response.text[:500]}")
|
| 67 |
-
return f"Error decoding server response for questions: {e}", None
|
| 68 |
except Exception as e:
|
| 69 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 70 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
@@ -76,22 +348,59 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 76 |
for item in questions_data:
|
| 77 |
task_id = item.get("task_id")
|
| 78 |
question_text = item.get("question")
|
|
|
|
| 79 |
if not task_id or question_text is None:
|
| 80 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 81 |
continue
|
| 82 |
try:
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
except Exception as e:
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
if not answers_payload:
|
| 91 |
print("Agent did not produce any answers to submit.")
|
| 92 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 93 |
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
| 95 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 96 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 97 |
print(status_update)
|
|
@@ -110,8 +419,11 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 110 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 111 |
)
|
| 112 |
print("Submission successful.")
|
|
|
|
|
|
|
| 113 |
results_df = pd.DataFrame(results_log)
|
| 114 |
return final_status, results_df
|
|
|
|
| 115 |
except requests.exceptions.HTTPError as e:
|
| 116 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 117 |
try:
|
|
@@ -123,16 +435,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 123 |
print(status_message)
|
| 124 |
results_df = pd.DataFrame(results_log)
|
| 125 |
return status_message, results_df
|
|
|
|
| 126 |
except requests.exceptions.Timeout:
|
| 127 |
status_message = "Submission Failed: The request timed out."
|
| 128 |
print(status_message)
|
| 129 |
results_df = pd.DataFrame(results_log)
|
| 130 |
return status_message, results_df
|
|
|
|
| 131 |
except requests.exceptions.RequestException as e:
|
| 132 |
status_message = f"Submission Failed: Network error - {e}"
|
| 133 |
print(status_message)
|
| 134 |
results_df = pd.DataFrame(results_log)
|
| 135 |
return status_message, results_df
|
|
|
|
| 136 |
except Exception as e:
|
| 137 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 138 |
print(status_message)
|
|
@@ -141,56 +456,58 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 141 |
|
| 142 |
|
| 143 |
# --- Build Gradio Interface using Blocks ---
|
| 144 |
-
with gr.Blocks() as demo:
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
|
| 161 |
-
|
| 162 |
|
| 163 |
-
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
-
|
| 175 |
-
print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 176 |
-
# Check for SPACE_HOST and SPACE_ID at startup for information
|
| 177 |
-
space_host_startup = os.getenv("SPACE_HOST")
|
| 178 |
-
space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 179 |
-
|
| 180 |
-
if space_host_startup:
|
| 181 |
-
print(f"β
SPACE_HOST found: {space_host_startup}")
|
| 182 |
-
print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
| 183 |
-
else:
|
| 184 |
-
print("βΉοΈ SPACE_HOST environment variable not found (running locally?).")
|
| 185 |
-
|
| 186 |
-
if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 187 |
-
print(f"β
SPACE_ID found: {space_id_startup}")
|
| 188 |
-
print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 189 |
-
print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 190 |
-
else:
|
| 191 |
-
print("βΉοΈ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 192 |
-
|
| 193 |
-
print("-"*(60 + len(" App Starting ")) + "\n")
|
| 194 |
-
|
| 195 |
-
print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 196 |
-
demo.launch(debug=True, share=False)
|
|
|
|
| 1 |
import os
|
| 2 |
+
import io
|
| 3 |
import gradio as gr
|
| 4 |
import requests
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
+
from time import sleep
|
| 7 |
+
from PIL import Image
|
| 8 |
+
import helium
|
| 9 |
+
from selenium import webdriver
|
| 10 |
+
from selenium.webdriver.common.by import By
|
| 11 |
+
from selenium.webdriver.common.keys import Keys
|
| 12 |
+
from selenium.webdriver.remote.webelement import WebElement
|
| 13 |
+
from smolagents import (
|
| 14 |
+
LiteLLMModel,
|
| 15 |
+
InferenceClientModel,
|
| 16 |
+
CodeAgent,
|
| 17 |
+
tool,
|
| 18 |
+
)
|
| 19 |
+
from yt_dlp import YoutubeDL
|
| 20 |
+
from pprint import pprint
|
| 21 |
+
from markdownify import markdownify as md
|
| 22 |
+
import urllib
|
| 23 |
+
from unstructured.partition.auto import partition
|
| 24 |
+
import whisper
|
| 25 |
+
from helium import *
|
| 26 |
+
from dotenv import load_dotenv
|
| 27 |
+
from phoenix.otel import register
|
| 28 |
+
from openinference.instrumentation.smolagents import SmolagentsInstrumentor
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
register()
|
| 32 |
+
SmolagentsInstrumentor().instrument()
|
| 33 |
+
audio_model = whisper.load_model("turbo")
|
| 34 |
+
|
| 35 |
+
load_dotenv()
|
| 36 |
|
| 37 |
# (Keep Constants as is)
|
| 38 |
# --- Constants ---
|
| 39 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 40 |
|
| 41 |
# --- Basic Agent Definition ---
|
| 42 |
+
# ----- THIS IS WHERE YOU CAN BUILD WHAT YOU WANT ------
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def get_agent():
|
| 46 |
+
chrome_options = webdriver.ChromeOptions()
|
| 47 |
+
chrome_options.add_argument("--force-device-scale-factor=1")
|
| 48 |
+
# chrome_options.add_argument("--window-size=1000,1350")
|
| 49 |
+
# chrome_options.add_argument("--disable-pdf-viewer")
|
| 50 |
+
chrome_options.add_argument("--window-position=0,0")
|
| 51 |
+
|
| 52 |
+
# Initialize the browser
|
| 53 |
+
driver = helium.start_chrome(headless=False, options=chrome_options)
|
| 54 |
+
helium_instructions = """
|
| 55 |
+
You can use helium to access websites. Don't bother about the helium driver, it's already managed.
|
| 56 |
+
We've already ran "from helium import *"
|
| 57 |
+
Then you can go to pages!
|
| 58 |
+
Code:
|
| 59 |
+
```py
|
| 60 |
+
go_to('github.com/trending')
|
| 61 |
+
```<end_code>
|
| 62 |
+
|
| 63 |
+
You can directly click clickable elements by inputting the text that appears on them using the tool `click_element` with element as an argument.
|
| 64 |
+
This element is retrieved using the tool `get_element_by_text`.
|
| 65 |
+
Code:
|
| 66 |
+
```py
|
| 67 |
+
click_element(get_element_by_text("Top products"), None)
|
| 68 |
+
```<end_code>
|
| 69 |
+
|
| 70 |
+
If you try to interact with an element and it's not found, you'll get a LookupError.
|
| 71 |
+
Never try to login in a page.
|
| 72 |
+
|
| 73 |
+
You can search for a text on the page using the tool `search_item_ctrl_f` with text as an argument and the index of the element as an optional argument.
|
| 74 |
+
Code:
|
| 75 |
+
```py
|
| 76 |
+
search_item_ctrl_f("Top products")
|
| 77 |
+
```<end_code>
|
| 78 |
+
|
| 79 |
+
When you have pop-ups with a cross icon to close, don't try to click the close icon by finding its element or targeting an 'X' element (this most often fails).
|
| 80 |
+
Just use your built-in tool `close_popups` to close them:
|
| 81 |
+
Code:
|
| 82 |
+
```py
|
| 83 |
+
close_popups()
|
| 84 |
+
```<end_code>
|
| 85 |
+
|
| 86 |
+
You can use .exists() to check for the existence of an element. For example:
|
| 87 |
+
Code:
|
| 88 |
+
```py
|
| 89 |
+
if Text('Accept cookies?').exists():
|
| 90 |
+
click('I accept')
|
| 91 |
+
```<end_code>
|
| 92 |
+
"""
|
| 93 |
+
|
| 94 |
+
@tool
|
| 95 |
+
def search_item_ctrl_f(text: str, nth_result: int | None = None) -> str:
|
| 96 |
+
"""
|
| 97 |
+
Searches for text on the current page via Ctrl + F and jumps to the nth occurrence and scroll into view.
|
| 98 |
+
Args:
|
| 99 |
+
text: The text to search for
|
| 100 |
+
nth_result: Which occurrence to jump to (default: None)
|
| 101 |
+
"""
|
| 102 |
+
elements = driver.find_elements(By.XPATH, f"//*[contains(text(), '{text}')]")
|
| 103 |
+
if nth_result is not None and nth_result > len(elements):
|
| 104 |
+
raise Exception(
|
| 105 |
+
f"Match nΒ°{nth_result} not found (only {len(elements)} matches found)"
|
| 106 |
+
)
|
| 107 |
+
result = f"Found {len(elements)} matches for '{text}'."
|
| 108 |
+
if nth_result is None:
|
| 109 |
+
return (
|
| 110 |
+
result
|
| 111 |
+
+ "\n"
|
| 112 |
+
+ "\n".join([get_surrounding_elements(element) for element in elements])
|
| 113 |
+
)
|
| 114 |
+
elem = elements[nth_result - 1]
|
| 115 |
+
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
|
| 116 |
+
return (
|
| 117 |
+
result
|
| 118 |
+
+ "\n"
|
| 119 |
+
+ f"This is the element : {nth_result}"
|
| 120 |
+
+ "\n"
|
| 121 |
+
+ get_surrounding_elements(elem)
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
@tool
|
| 125 |
+
def go_back() -> None:
|
| 126 |
+
"""Goes back to previous page."""
|
| 127 |
+
driver.back()
|
| 128 |
+
|
| 129 |
+
@tool
|
| 130 |
+
def close_popups() -> str:
|
| 131 |
+
"""
|
| 132 |
+
Closes any visible modal or pop-up on the page. Use this to dismiss pop-up windows!
|
| 133 |
+
This does not work on cookie consent banners.
|
| 134 |
+
"""
|
| 135 |
+
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
|
| 136 |
+
|
| 137 |
+
@tool
|
| 138 |
+
def scroll_into_view(element: WebElement) -> None:
|
| 139 |
+
"""Scrolls an element into view.
|
| 140 |
+
Args:
|
| 141 |
+
element: The element to scroll into view.
|
| 142 |
+
"""
|
| 143 |
+
driver.execute_script("arguments[0].scrollIntoView(true);", element)
|
| 144 |
+
|
| 145 |
+
@tool
|
| 146 |
+
def click_element(element: WebElement) -> None:
|
| 147 |
+
"""Clicks an element.
|
| 148 |
+
Args:
|
| 149 |
+
element: The element to click.
|
| 150 |
+
"""
|
| 151 |
+
element.click()
|
| 152 |
+
|
| 153 |
+
@tool
|
| 154 |
+
def get_element_by_text(text: str) -> WebElement:
|
| 155 |
+
"""Returns an element with the specified text.
|
| 156 |
+
Args:
|
| 157 |
+
text: The text of the element to return.
|
| 158 |
+
"""
|
| 159 |
+
return driver.find_element(By.XPATH, f"//*[contains(text(), '{text}')]")
|
| 160 |
+
|
| 161 |
+
@tool
|
| 162 |
+
def visit_webpage_in_markdown(url: str) -> str:
|
| 163 |
+
"""Visits a webpage. Returns the markdown content of the page.
|
| 164 |
+
Args:
|
| 165 |
+
url: The URL of the webpage to visit.
|
| 166 |
+
"""
|
| 167 |
+
driver.get(url)
|
| 168 |
+
return md(driver.page_source)
|
| 169 |
+
|
| 170 |
+
@tool
|
| 171 |
+
def visit_webpage_in_html(url: str) -> str:
|
| 172 |
+
"""Visits a webpage. Returns the HTML content of the page.
|
| 173 |
+
Args:
|
| 174 |
+
url: The URL of the webpage to visit.
|
| 175 |
+
"""
|
| 176 |
+
driver.get(url)
|
| 177 |
+
return driver.page_source
|
| 178 |
+
|
| 179 |
+
@tool
|
| 180 |
+
def get_surrounding_elements(element: WebElement, num_elements: int = 50) -> str:
|
| 181 |
+
"""Returns the surrounding elements of an element.
|
| 182 |
+
Args:
|
| 183 |
+
element: The element to return the surrounding elements of.
|
| 184 |
+
num_elements: The number of elements to return. Default is 50.
|
| 185 |
+
"""
|
| 186 |
+
target = md(element.get_attribute("outerHTML"))
|
| 187 |
+
elements = [
|
| 188 |
+
element
|
| 189 |
+
for element in md(driver.page_source).split("\n")
|
| 190 |
+
if element.strip()
|
| 191 |
+
]
|
| 192 |
+
for i, element in enumerate(elements):
|
| 193 |
+
if element in target or target in element:
|
| 194 |
+
return "\n".join(elements[i - num_elements : i + num_elements])
|
| 195 |
+
return "\n".join(elements[:num_elements])
|
| 196 |
+
|
| 197 |
+
@tool
|
| 198 |
+
def web_search(query: str) -> str:
|
| 199 |
+
"""Searches for a query on the web and returns the markdown content of the page.
|
| 200 |
+
Args:
|
| 201 |
+
query: The query to search for.
|
| 202 |
+
"""
|
| 203 |
+
query = urllib.parse.quote(query)
|
| 204 |
+
go_to(f"https://duckduckgo.com/?q={query}&ia=web")
|
| 205 |
+
return md(driver.page_source)
|
| 206 |
+
|
| 207 |
+
@tool
|
| 208 |
+
def transcribe_youtube_video(video_url: str) -> str:
|
| 209 |
+
"""Transcribe a YouTube video using yt-dlp and Whisper.
|
| 210 |
+
Args:
|
| 211 |
+
video_url: The URL of the YouTube video to transcribe.
|
| 212 |
+
"""
|
| 213 |
+
ydl_opts = {
|
| 214 |
+
"format": "m4a/bestaudio/best",
|
| 215 |
+
"outtmpl": "audio.m4a",
|
| 216 |
+
"key": "FFmpegExtractAudio",
|
| 217 |
+
"preferredcodec": "m4a",
|
| 218 |
+
}
|
| 219 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 220 |
+
info = ydl.extract_info(video_url)
|
| 221 |
+
captions = info.get("automatic_captions", {})
|
| 222 |
+
if "en" in captions:
|
| 223 |
+
captions = captions["en"]
|
| 224 |
+
for caption in captions:
|
| 225 |
+
if caption.get("ext", "") == "srt":
|
| 226 |
+
url = caption.get("url", "")
|
| 227 |
+
return requests.get(url).text
|
| 228 |
+
|
| 229 |
+
ydl.download(video_url)
|
| 230 |
+
transcript = audio_model.transcribe("audio.m4a")
|
| 231 |
+
return transcript["text"]
|
| 232 |
+
|
| 233 |
+
@tool
|
| 234 |
+
def parse_doc_file(file_url: str) -> str:
|
| 235 |
+
"""
|
| 236 |
+
Parse any document type file like pdf, docx, xls, xlsx, etc and return its content in markdown format.
|
| 237 |
+
Args:
|
| 238 |
+
file_url: The URL of the document file to parse.
|
| 239 |
+
"""
|
| 240 |
+
try:
|
| 241 |
+
response = requests.get(file_url)
|
| 242 |
+
response.raise_for_status()
|
| 243 |
+
elements = partition(file=io.BytesIO(response.content), include_page_breaks=True)
|
| 244 |
+
return "\n\n".join([str(el) for el in elements])
|
| 245 |
+
except Exception as e:
|
| 246 |
+
return f"Failed to fetch file: {e}"
|
| 247 |
+
|
| 248 |
+
@tool
|
| 249 |
+
def parse_audio_file(file_url: str) -> str:
|
| 250 |
+
"""
|
| 251 |
+
Parse an audio file and return its content in markdown format.
|
| 252 |
+
Args:
|
| 253 |
+
file_url: The URL of the audio file to parse.
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
response = requests.get(file_url)
|
| 257 |
+
response.raise_for_status()
|
| 258 |
+
return audio_model.transcribe(io.BytesIO(response.content))['text']
|
| 259 |
+
except Exception as e:
|
| 260 |
+
return f"Failed to fetch file: {e}"
|
| 261 |
+
|
| 262 |
+
# think_agent = CodeAgent(
|
| 263 |
+
# model=LiteLLMModel("gemini/gemini-2.5-flash-preview-05-20"),
|
| 264 |
+
# tools=[web_search],
|
| 265 |
+
# additional_authorized_imports="*",
|
| 266 |
+
# name="Think Agent",
|
| 267 |
+
# description="You are the thinking agent who will think step by step to solve the problem."
|
| 268 |
+
# )
|
| 269 |
+
|
| 270 |
+
agent = CodeAgent(
|
| 271 |
+
tools=[
|
| 272 |
+
web_search,
|
| 273 |
+
visit_webpage_in_markdown,
|
| 274 |
+
visit_webpage_in_html,
|
| 275 |
+
scroll_into_view,
|
| 276 |
+
click_element,
|
| 277 |
+
get_element_by_text,
|
| 278 |
+
get_surrounding_elements,
|
| 279 |
+
go_back,
|
| 280 |
+
close_popups,
|
| 281 |
+
search_item_ctrl_f,
|
| 282 |
+
parse_doc_file,
|
| 283 |
+
parse_audio_file,
|
| 284 |
+
transcribe_youtube_video,
|
| 285 |
+
],
|
| 286 |
+
model=LiteLLMModel("gemini/gemini-2.0-flash-lite"),
|
| 287 |
+
# model=InferenceClientModel(),
|
| 288 |
+
additional_authorized_imports="*",
|
| 289 |
+
# managed_agents=[think_agent],
|
| 290 |
+
)
|
| 291 |
+
agent.prompt_templates["system_prompt"] += helium_instructions
|
| 292 |
+
agent.python_executor("from helium import *")
|
| 293 |
+
|
| 294 |
+
return agent
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def run_and_submit_all(profile: gr.OAuthProfile | None):
|
| 298 |
"""
|
| 299 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 300 |
and displays the results.
|
| 301 |
"""
|
| 302 |
# --- Determine HF Space Runtime URL and Repo URL ---
|
| 303 |
+
space_id = os.getenv("SPACE_ID", "rony000013/hf_agent_course") # Get the SPACE_ID for sending link to the code
|
| 304 |
|
| 305 |
+
# if profile:
|
| 306 |
+
# username= f"{profile.username}"
|
| 307 |
+
# print(f"User logged in: {username}")
|
| 308 |
+
# else:
|
| 309 |
+
# print("User not logged in.")
|
| 310 |
+
# return "Please Login to Hugging Face with the button.", None
|
| 311 |
+
username = "rony000013"
|
| 312 |
|
| 313 |
api_url = DEFAULT_API_URL
|
| 314 |
questions_url = f"{api_url}/questions"
|
|
|
|
| 316 |
|
| 317 |
# 1. Instantiate Agent ( modify this part to create your agent)
|
| 318 |
try:
|
| 319 |
+
agent = get_agent()
|
| 320 |
except Exception as e:
|
| 321 |
print(f"Error instantiating agent: {e}")
|
| 322 |
return f"Error initializing agent: {e}", None
|
|
|
|
| 331 |
response.raise_for_status()
|
| 332 |
questions_data = response.json()
|
| 333 |
if not questions_data:
|
| 334 |
+
print("Fetched questions list is empty.")
|
| 335 |
+
return "Fetched questions list is empty or invalid format.", None
|
| 336 |
print(f"Fetched {len(questions_data)} questions.")
|
| 337 |
except requests.exceptions.RequestException as e:
|
| 338 |
print(f"Error fetching questions: {e}")
|
| 339 |
return f"Error fetching questions: {e}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
except Exception as e:
|
| 341 |
print(f"An unexpected error occurred fetching questions: {e}")
|
| 342 |
return f"An unexpected error occurred fetching questions: {e}", None
|
|
|
|
| 348 |
for item in questions_data:
|
| 349 |
task_id = item.get("task_id")
|
| 350 |
question_text = item.get("question")
|
| 351 |
+
file_name = item.get("file_name")
|
| 352 |
if not task_id or question_text is None:
|
| 353 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 354 |
continue
|
| 355 |
try:
|
| 356 |
+
if file_name != "" and file_name is not None:
|
| 357 |
+
if (
|
| 358 |
+
file_name.endswith(".png")
|
| 359 |
+
or file_name.endswith(".jpg")
|
| 360 |
+
or file_name.endswith(".jpeg")
|
| 361 |
+
):
|
| 362 |
+
image_url = f"{api_url}/files/{task_id}"
|
| 363 |
+
image_response = requests.get(image_url)
|
| 364 |
+
image_response.raise_for_status()
|
| 365 |
+
image_data = image_response.content
|
| 366 |
+
image = Image.open(io.BytesIO(image_data))
|
| 367 |
+
submitted_answer = agent.run(question_text, images=[image], reset=True)
|
| 368 |
+
else:
|
| 369 |
+
submitted_answer = agent.run(
|
| 370 |
+
f"{question_text}\n\nFile name: {file_name}\n\nFile URL: {api_url}/files/{task_id}", reset=True
|
| 371 |
+
)
|
| 372 |
+
else:
|
| 373 |
+
submitted_answer = agent.run(question_text)
|
| 374 |
+
answers_payload.append(
|
| 375 |
+
{"task_id": task_id, "submitted_answer": submitted_answer}
|
| 376 |
+
)
|
| 377 |
+
results_log.append(
|
| 378 |
+
{
|
| 379 |
+
"Task ID": task_id,
|
| 380 |
+
"Question": question_text,
|
| 381 |
+
"Submitted Answer": submitted_answer,
|
| 382 |
+
}
|
| 383 |
+
)
|
| 384 |
except Exception as e:
|
| 385 |
+
print(f"Error running agent on task {task_id}: {e}")
|
| 386 |
+
results_log.append(
|
| 387 |
+
{
|
| 388 |
+
"Task ID": task_id,
|
| 389 |
+
"Question": question_text,
|
| 390 |
+
"Submitted Answer": f"AGENT ERROR: {e}",
|
| 391 |
+
}
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
sleep(30)
|
| 395 |
|
| 396 |
if not answers_payload:
|
| 397 |
print("Agent did not produce any answers to submit.")
|
| 398 |
return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
|
| 399 |
|
| 400 |
+
print("Agent produced answers to submit.")
|
| 401 |
+
print(answers_payload)
|
| 402 |
+
|
| 403 |
+
# 4. Prepare Submission
|
| 404 |
submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
|
| 405 |
status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
|
| 406 |
print(status_update)
|
|
|
|
| 419 |
f"Message: {result_data.get('message', 'No message received.')}"
|
| 420 |
)
|
| 421 |
print("Submission successful.")
|
| 422 |
+
pprint(result_data)
|
| 423 |
+
pprint(final_status)
|
| 424 |
results_df = pd.DataFrame(results_log)
|
| 425 |
return final_status, results_df
|
| 426 |
+
|
| 427 |
except requests.exceptions.HTTPError as e:
|
| 428 |
error_detail = f"Server responded with status {e.response.status_code}."
|
| 429 |
try:
|
|
|
|
| 435 |
print(status_message)
|
| 436 |
results_df = pd.DataFrame(results_log)
|
| 437 |
return status_message, results_df
|
| 438 |
+
|
| 439 |
except requests.exceptions.Timeout:
|
| 440 |
status_message = "Submission Failed: The request timed out."
|
| 441 |
print(status_message)
|
| 442 |
results_df = pd.DataFrame(results_log)
|
| 443 |
return status_message, results_df
|
| 444 |
+
|
| 445 |
except requests.exceptions.RequestException as e:
|
| 446 |
status_message = f"Submission Failed: Network error - {e}"
|
| 447 |
print(status_message)
|
| 448 |
results_df = pd.DataFrame(results_log)
|
| 449 |
return status_message, results_df
|
| 450 |
+
|
| 451 |
except Exception as e:
|
| 452 |
status_message = f"An unexpected error occurred during submission: {e}"
|
| 453 |
print(status_message)
|
|
|
|
| 456 |
|
| 457 |
|
| 458 |
# --- Build Gradio Interface using Blocks ---
|
| 459 |
+
# with gr.Blocks() as demo:
|
| 460 |
+
# gr.Markdown("# Basic Agent Evaluation Runner")
|
| 461 |
+
# gr.Markdown(
|
| 462 |
+
# """
|
| 463 |
+
# **Instructions:**
|
| 464 |
|
| 465 |
+
# 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
|
| 466 |
+
# 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
|
| 467 |
+
# 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
|
| 468 |
|
| 469 |
+
# ---
|
| 470 |
+
# **Disclaimers:**
|
| 471 |
+
# Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
|
| 472 |
+
# This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
|
| 473 |
+
# """
|
| 474 |
+
# )
|
| 475 |
|
| 476 |
+
# gr.LoginButton()
|
| 477 |
|
| 478 |
+
# run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 479 |
|
| 480 |
+
# status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
|
| 481 |
+
# # Removed max_rows=10 from DataFrame constructor
|
| 482 |
+
# results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
|
| 483 |
|
| 484 |
+
# run_button.click(
|
| 485 |
+
# fn=run_and_submit_all,
|
| 486 |
+
# outputs=[status_output, results_table]
|
| 487 |
+
# )
|
| 488 |
+
|
| 489 |
+
# if __name__ == "__main__":
|
| 490 |
+
# print("\n" + "-"*30 + " App Starting " + "-"*30)
|
| 491 |
+
# # Check for SPACE_HOST and SPACE_ID at startup for information
|
| 492 |
+
# space_host_startup = os.getenv("SPACE_HOST")
|
| 493 |
+
# space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
|
| 494 |
+
|
| 495 |
+
# if space_host_startup:
|
| 496 |
+
# print(f"β
SPACE_HOST found: {space_host_startup}")
|
| 497 |
+
# print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
|
| 498 |
+
# else:
|
| 499 |
+
# print("βΉοΈ SPACE_HOST environment variable not found (running locally?).")
|
| 500 |
+
|
| 501 |
+
# if space_id_startup: # Print repo URLs if SPACE_ID is found
|
| 502 |
+
# print(f"β
SPACE_ID found: {space_id_startup}")
|
| 503 |
+
# print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
|
| 504 |
+
# print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
|
| 505 |
+
# else:
|
| 506 |
+
# print("βΉοΈ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
|
| 507 |
+
|
| 508 |
+
# print("-"*(60 + len(" App Starting ")) + "\n")
|
| 509 |
+
|
| 510 |
+
# print("Launching Gradio Interface for Basic Agent Evaluation...")
|
| 511 |
+
# demo.launch(debug=True, share=False)
|
| 512 |
|
| 513 |
+
run_and_submit_all(None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "hf-agents-course-final-assignment"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"arize-phoenix>=10.11.0",
|
| 9 |
+
"arize-phoenix-otel>=0.10.3",
|
| 10 |
+
"gradio>=5.33.0",
|
| 11 |
+
"helium>=5.1.1",
|
| 12 |
+
"huggingface-hub>=0.32.4",
|
| 13 |
+
"itsdangerous>=2.2.0",
|
| 14 |
+
"jupyter>=1.1.1",
|
| 15 |
+
"langchain-community>=0.3.24",
|
| 16 |
+
"markdownify>=1.1.0",
|
| 17 |
+
"openai-whisper>=20240930",
|
| 18 |
+
"openinference-instrumentation-litellm>=0.1.22",
|
| 19 |
+
"openinference-instrumentation-smolagents>=0.1.13",
|
| 20 |
+
"opentelemetry-exporter-otlp>=1.34.1",
|
| 21 |
+
"opentelemetry-sdk>=1.34.1",
|
| 22 |
+
"pandas>=2.3.0",
|
| 23 |
+
"pillow>=11.2.1",
|
| 24 |
+
"polars>=1.30.0",
|
| 25 |
+
"requests>=2.32.3",
|
| 26 |
+
"selenium>=4.33.0",
|
| 27 |
+
"smolagents[litellm,telemetry,toolkit]>=1.17.0",
|
| 28 |
+
"unstructured[all-docs]>=0.17.2",
|
| 29 |
+
"whisper>=1.1.10",
|
| 30 |
+
"wikipedia>=1.4.0",
|
| 31 |
+
"yt-dlp>=2025.6.9",
|
| 32 |
+
]
|
requirements.txt
CHANGED
|
@@ -1,2 +1,291 @@
|
|
| 1 |
-
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate==1.7.0
|
| 2 |
+
aiofiles==24.1.0
|
| 3 |
+
aiohappyeyeballs==2.6.1
|
| 4 |
+
aiohttp==3.12.9
|
| 5 |
+
aioitertools==0.12.0
|
| 6 |
+
aiosignal==1.3.2
|
| 7 |
+
aiosqlite==0.21.0
|
| 8 |
+
alembic==1.16.1
|
| 9 |
+
annotated-types==0.7.0
|
| 10 |
+
antlr4-python3-runtime==4.9.3
|
| 11 |
+
anyio==4.9.0
|
| 12 |
+
argon2-cffi==25.1.0
|
| 13 |
+
argon2-cffi-bindings==21.2.0
|
| 14 |
+
arize-phoenix==10.11.0
|
| 15 |
+
arize-phoenix-client==1.10.0
|
| 16 |
+
arize-phoenix-evals==0.20.8
|
| 17 |
+
arize-phoenix-otel==0.10.3
|
| 18 |
+
arrow==1.3.0
|
| 19 |
+
asttokens==3.0.0
|
| 20 |
+
async-lru==2.0.5
|
| 21 |
+
attrs==25.3.0
|
| 22 |
+
authlib==1.6.0
|
| 23 |
+
babel==2.17.0
|
| 24 |
+
backoff==2.2.1
|
| 25 |
+
beautifulsoup4==4.13.4
|
| 26 |
+
bleach==6.2.0
|
| 27 |
+
cachetools==6.0.0
|
| 28 |
+
certifi==2025.4.26
|
| 29 |
+
cffi==1.17.1
|
| 30 |
+
chardet==5.2.0
|
| 31 |
+
charset-normalizer==3.4.2
|
| 32 |
+
click==8.2.1
|
| 33 |
+
colorama==0.4.6
|
| 34 |
+
coloredlogs==15.0.1
|
| 35 |
+
comm==0.2.2
|
| 36 |
+
contourpy==1.3.2
|
| 37 |
+
cryptography==45.0.4
|
| 38 |
+
cycler==0.12.1
|
| 39 |
+
dataclasses-json==0.6.7
|
| 40 |
+
debugpy==1.8.14
|
| 41 |
+
decorator==5.2.1
|
| 42 |
+
defusedxml==0.7.1
|
| 43 |
+
deprecated==1.2.18
|
| 44 |
+
distro==1.9.0
|
| 45 |
+
dnspython==2.7.0
|
| 46 |
+
duckduckgo-search==8.0.2
|
| 47 |
+
effdet==0.4.1
|
| 48 |
+
email-validator==2.2.0
|
| 49 |
+
emoji==2.14.1
|
| 50 |
+
et-xmlfile==2.0.0
|
| 51 |
+
executing==2.2.0
|
| 52 |
+
fastapi==0.115.12
|
| 53 |
+
fastjsonschema==2.21.1
|
| 54 |
+
ffmpy==0.6.0
|
| 55 |
+
filelock==3.18.0
|
| 56 |
+
filetype==1.2.0
|
| 57 |
+
flatbuffers==25.2.10
|
| 58 |
+
fonttools==4.58.2
|
| 59 |
+
fqdn==1.5.1
|
| 60 |
+
frozenlist==1.6.2
|
| 61 |
+
fsspec==2025.5.1
|
| 62 |
+
google-api-core==1.16.0
|
| 63 |
+
google-auth==1.6.3
|
| 64 |
+
google-cloud-vision==1.0.0
|
| 65 |
+
googleapis-common-protos==1.70.0
|
| 66 |
+
gradio==5.33.0
|
| 67 |
+
gradio-client==1.10.2
|
| 68 |
+
graphql-core==3.2.6
|
| 69 |
+
greenlet==3.2.2
|
| 70 |
+
groovy==0.1.2
|
| 71 |
+
grpc-interceptor==0.15.4
|
| 72 |
+
grpcio==1.73.0
|
| 73 |
+
h11==0.16.0
|
| 74 |
+
helium==5.1.1
|
| 75 |
+
html5lib==1.1
|
| 76 |
+
httpcore==1.0.9
|
| 77 |
+
httpx==0.28.1
|
| 78 |
+
httpx-sse==0.4.0
|
| 79 |
+
huggingface-hub==0.32.4
|
| 80 |
+
humanfriendly==10.0
|
| 81 |
+
idna==3.10
|
| 82 |
+
importlib-metadata==8.7.0
|
| 83 |
+
ipykernel==6.29.5
|
| 84 |
+
ipython==9.3.0
|
| 85 |
+
ipython-pygments-lexers==1.1.1
|
| 86 |
+
ipywidgets==8.1.7
|
| 87 |
+
isoduration==20.11.0
|
| 88 |
+
itsdangerous==2.2.0
|
| 89 |
+
jedi==0.19.2
|
| 90 |
+
jinja2==3.1.6
|
| 91 |
+
jiter==0.10.0
|
| 92 |
+
joblib==1.5.1
|
| 93 |
+
json5==0.12.0
|
| 94 |
+
jsonpatch==1.33
|
| 95 |
+
jsonpointer==3.0.0
|
| 96 |
+
jsonschema==4.24.0
|
| 97 |
+
jsonschema-specifications==2025.4.1
|
| 98 |
+
jupyter==1.1.1
|
| 99 |
+
jupyter-client==8.6.3
|
| 100 |
+
jupyter-console==6.6.3
|
| 101 |
+
jupyter-core==5.8.1
|
| 102 |
+
jupyter-events==0.12.0
|
| 103 |
+
jupyter-lsp==2.2.5
|
| 104 |
+
jupyter-server==2.16.0
|
| 105 |
+
jupyter-server-terminals==0.5.3
|
| 106 |
+
jupyterlab==4.4.3
|
| 107 |
+
jupyterlab-pygments==0.3.0
|
| 108 |
+
jupyterlab-server==2.27.3
|
| 109 |
+
jupyterlab-widgets==3.0.15
|
| 110 |
+
kiwisolver==1.4.8
|
| 111 |
+
langchain==0.3.25
|
| 112 |
+
langchain-community==0.3.24
|
| 113 |
+
langchain-core==0.3.63
|
| 114 |
+
langchain-text-splitters==0.3.8
|
| 115 |
+
langdetect==1.0.9
|
| 116 |
+
langsmith==0.3.45
|
| 117 |
+
litellm==1.72.1
|
| 118 |
+
llvmlite==0.44.0
|
| 119 |
+
lxml==5.4.0
|
| 120 |
+
mako==1.3.10
|
| 121 |
+
markdown==3.8
|
| 122 |
+
markdown-it-py==3.0.0
|
| 123 |
+
markdownify==1.1.0
|
| 124 |
+
markupsafe==3.0.2
|
| 125 |
+
marshmallow==3.26.1
|
| 126 |
+
matplotlib==3.10.3
|
| 127 |
+
matplotlib-inline==0.1.7
|
| 128 |
+
mdurl==0.1.2
|
| 129 |
+
mistune==3.1.3
|
| 130 |
+
more-itertools==10.7.0
|
| 131 |
+
mpmath==1.3.0
|
| 132 |
+
multidict==6.4.4
|
| 133 |
+
mypy-extensions==1.1.0
|
| 134 |
+
nbclient==0.10.2
|
| 135 |
+
nbconvert==7.16.6
|
| 136 |
+
nbformat==5.10.4
|
| 137 |
+
nest-asyncio==1.6.0
|
| 138 |
+
networkx==3.5
|
| 139 |
+
nltk==3.9.1
|
| 140 |
+
notebook==7.4.3
|
| 141 |
+
notebook-shim==0.2.4
|
| 142 |
+
numba==0.61.2
|
| 143 |
+
numpy==2.2.6
|
| 144 |
+
olefile==0.47
|
| 145 |
+
omegaconf==2.3.0
|
| 146 |
+
onnx==1.18.0
|
| 147 |
+
onnxruntime==1.22.0
|
| 148 |
+
openai==1.84.0
|
| 149 |
+
openai-whisper==20240930
|
| 150 |
+
opencv-python==4.11.0.86
|
| 151 |
+
openinference-instrumentation==0.1.33
|
| 152 |
+
openinference-instrumentation-litellm==0.1.22
|
| 153 |
+
openinference-instrumentation-smolagents==0.1.13
|
| 154 |
+
openinference-semantic-conventions==0.1.20
|
| 155 |
+
openpyxl==3.1.5
|
| 156 |
+
opentelemetry-api==1.34.1
|
| 157 |
+
opentelemetry-exporter-otlp==1.34.1
|
| 158 |
+
opentelemetry-exporter-otlp-proto-common==1.34.1
|
| 159 |
+
opentelemetry-exporter-otlp-proto-grpc==1.34.1
|
| 160 |
+
opentelemetry-exporter-otlp-proto-http==1.34.1
|
| 161 |
+
opentelemetry-instrumentation==0.55b1
|
| 162 |
+
opentelemetry-proto==1.34.1
|
| 163 |
+
opentelemetry-sdk==1.34.1
|
| 164 |
+
opentelemetry-semantic-conventions==0.55b1
|
| 165 |
+
orjson==3.10.18
|
| 166 |
+
outcome==1.3.0.post0
|
| 167 |
+
overrides==7.7.0
|
| 168 |
+
packaging==24.2
|
| 169 |
+
pandas==2.3.0
|
| 170 |
+
pandocfilters==1.5.1
|
| 171 |
+
parso==0.8.4
|
| 172 |
+
pdf2image==1.17.0
|
| 173 |
+
pdfminer-six==20250506
|
| 174 |
+
pi-heif==0.22.0
|
| 175 |
+
pikepdf==9.8.1
|
| 176 |
+
pillow==11.2.1
|
| 177 |
+
platformdirs==4.3.8
|
| 178 |
+
polars==1.30.0
|
| 179 |
+
primp==0.15.0
|
| 180 |
+
prometheus-client==0.22.1
|
| 181 |
+
prompt-toolkit==3.0.51
|
| 182 |
+
propcache==0.3.1
|
| 183 |
+
protobuf==5.29.5
|
| 184 |
+
psutil==7.0.0
|
| 185 |
+
pure-eval==0.2.3
|
| 186 |
+
pyarrow==20.0.0
|
| 187 |
+
pyasn1==0.6.1
|
| 188 |
+
pyasn1-modules==0.4.2
|
| 189 |
+
pycocotools==2.0.10
|
| 190 |
+
pycparser==2.22
|
| 191 |
+
pydantic==2.11.5
|
| 192 |
+
pydantic-core==2.33.2
|
| 193 |
+
pydantic-settings==2.9.1
|
| 194 |
+
pydub==0.25.1
|
| 195 |
+
pygments==2.19.1
|
| 196 |
+
pypandoc==1.15
|
| 197 |
+
pyparsing==3.2.3
|
| 198 |
+
pypdf==5.6.0
|
| 199 |
+
pypdfium2==4.30.1
|
| 200 |
+
pyreadline3==3.5.4
|
| 201 |
+
pysocks==1.7.1
|
| 202 |
+
python-dateutil==2.9.0.post0
|
| 203 |
+
python-docx==1.1.2
|
| 204 |
+
python-dotenv==1.1.0
|
| 205 |
+
python-iso639==2025.2.18
|
| 206 |
+
python-json-logger==3.3.0
|
| 207 |
+
python-magic==0.4.27
|
| 208 |
+
python-multipart==0.0.20
|
| 209 |
+
python-oxmsg==0.0.2
|
| 210 |
+
python-pptx==1.0.2
|
| 211 |
+
pytz==2025.2
|
| 212 |
+
pywin32==310
|
| 213 |
+
pywinpty==2.0.15
|
| 214 |
+
pyyaml==6.0.2
|
| 215 |
+
pyzmq==26.4.0
|
| 216 |
+
rapidfuzz==3.13.0
|
| 217 |
+
referencing==0.36.2
|
| 218 |
+
regex==2024.11.6
|
| 219 |
+
requests==2.32.3
|
| 220 |
+
requests-toolbelt==1.0.0
|
| 221 |
+
rfc3339-validator==0.1.4
|
| 222 |
+
rfc3986-validator==0.1.1
|
| 223 |
+
rich==14.0.0
|
| 224 |
+
rpds-py==0.25.1
|
| 225 |
+
rsa==4.9.1
|
| 226 |
+
ruff==0.11.12
|
| 227 |
+
safehttpx==0.1.6
|
| 228 |
+
safetensors==0.5.3
|
| 229 |
+
scikit-learn==1.7.0
|
| 230 |
+
scipy==1.15.3
|
| 231 |
+
selenium==4.33.0
|
| 232 |
+
semantic-version==2.10.0
|
| 233 |
+
send2trash==1.8.3
|
| 234 |
+
setuptools==80.9.0
|
| 235 |
+
shellingham==1.5.4
|
| 236 |
+
six==1.17.0
|
| 237 |
+
smolagents==1.17.0
|
| 238 |
+
sniffio==1.3.1
|
| 239 |
+
sortedcontainers==2.4.0
|
| 240 |
+
soupsieve==2.7
|
| 241 |
+
sqlalchemy==2.0.41
|
| 242 |
+
sqlean-py==3.49.1
|
| 243 |
+
stack-data==0.6.3
|
| 244 |
+
starlette==0.46.2
|
| 245 |
+
strawberry-graphql==0.270.1
|
| 246 |
+
sympy==1.14.0
|
| 247 |
+
tenacity==9.1.2
|
| 248 |
+
terminado==0.18.1
|
| 249 |
+
threadpoolctl==3.6.0
|
| 250 |
+
tiktoken==0.9.0
|
| 251 |
+
timm==1.0.15
|
| 252 |
+
tinycss2==1.4.0
|
| 253 |
+
tokenizers==0.21.1
|
| 254 |
+
tomlkit==0.13.2
|
| 255 |
+
torch==2.7.1
|
| 256 |
+
torchvision==0.22.1
|
| 257 |
+
tornado==6.5.1
|
| 258 |
+
tqdm==4.67.1
|
| 259 |
+
traitlets==5.14.3
|
| 260 |
+
transformers==4.52.4
|
| 261 |
+
trio==0.30.0
|
| 262 |
+
trio-websocket==0.12.2
|
| 263 |
+
typer==0.16.0
|
| 264 |
+
types-python-dateutil==2.9.0.20250516
|
| 265 |
+
typing-extensions==4.13.2
|
| 266 |
+
typing-inspect==0.9.0
|
| 267 |
+
typing-inspection==0.4.1
|
| 268 |
+
tzdata==2025.2
|
| 269 |
+
unstructured==0.17.2
|
| 270 |
+
unstructured-client==0.36.0
|
| 271 |
+
unstructured-inference==1.0.5
|
| 272 |
+
unstructured-pytesseract==0.3.15
|
| 273 |
+
uri-template==1.3.0
|
| 274 |
+
urllib3==2.4.0
|
| 275 |
+
uvicorn==0.34.3
|
| 276 |
+
wcwidth==0.2.13
|
| 277 |
+
webcolors==24.11.1
|
| 278 |
+
webencodings==0.5.1
|
| 279 |
+
websocket-client==1.8.0
|
| 280 |
+
websockets==15.0.1
|
| 281 |
+
whisper==1.1.10
|
| 282 |
+
widgetsnbextension==4.0.14
|
| 283 |
+
wikipedia==1.4.0
|
| 284 |
+
wrapt==1.17.2
|
| 285 |
+
wsproto==1.2.0
|
| 286 |
+
xlrd==2.0.1
|
| 287 |
+
xlsxwriter==3.2.3
|
| 288 |
+
yarl==1.20.0
|
| 289 |
+
yt-dlp==2025.6.9
|
| 290 |
+
zipp==3.22.0
|
| 291 |
+
zstandard==0.23.0
|
temp.ipynb
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": 10,
|
| 6 |
+
"id": "8473efba",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"from helium import *\n",
|
| 11 |
+
"from selenium import webdriver\n",
|
| 12 |
+
"from selenium.webdriver.common.by import By\n",
|
| 13 |
+
"from selenium.webdriver.common.keys import Keys\n",
|
| 14 |
+
"from markdownify import markdownify as md\n",
|
| 15 |
+
"import urllib\n",
|
| 16 |
+
"import requests\n",
|
| 17 |
+
"from unstructured.partition.auto import partition\n",
|
| 18 |
+
"import io\n",
|
| 19 |
+
"import whisper\n",
|
| 20 |
+
"from pprint import pprint\n",
|
| 21 |
+
"audio_model = whisper.load_model(\"turbo\")"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"cell_type": "code",
|
| 26 |
+
"execution_count": 6,
|
| 27 |
+
"id": "a03c6324",
|
| 28 |
+
"metadata": {},
|
| 29 |
+
"outputs": [],
|
| 30 |
+
"source": [
|
| 31 |
+
"chrome_options = webdriver.ChromeOptions()\n",
|
| 32 |
+
"chrome_options.add_argument(\"--force-device-scale-factor=1\")\n",
|
| 33 |
+
"# chrome_options.add_argument(\"--window-size=1000,1350\")\n",
|
| 34 |
+
"# chrome_options.add_argument(\"--disable-pdf-viewer\")\n",
|
| 35 |
+
"chrome_options.add_argument(\"--window-position=0,0\")\n",
|
| 36 |
+
"\n",
|
| 37 |
+
"# Initialize the browser\n",
|
| 38 |
+
"driver = helium.start_chrome(headless=False, options=chrome_options)"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": null,
|
| 44 |
+
"id": "3088cb0a",
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"source": [
|
| 48 |
+
"def web_search(query: str) -> str:\n",
|
| 49 |
+
" query = urllib.parse.quote(query)\n",
|
| 50 |
+
" go_to(f\"https://duckduckgo.com/?q={query}&ia=web\")\n",
|
| 51 |
+
" return md(driver.page_source)"
|
| 52 |
+
]
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"cell_type": "code",
|
| 56 |
+
"execution_count": 8,
|
| 57 |
+
"id": "9b8c0eb9",
|
| 58 |
+
"metadata": {},
|
| 59 |
+
"outputs": [
|
| 60 |
+
{
|
| 61 |
+
"data": {
|
| 62 |
+
"text/plain": [
|
| 63 |
+
"'What is the capital of France? at DuckDuckGo\\n\\n[DuckDuckGo](/)\\n\\nShortcuts to other sites to search off DuckDuckGo[Learn More](/bang)\\n\\nOpen menu\\n\\n* [All](/?q=What+is+the+capital+of+France%3F&ia=web)\\n* [Images](/?q=What+is+the+capital+of+France%3F&ia=images&iax=images)\\n* [Videos](/?q=What+is+the+capital+of+France%3F&ia=videos&iax=videos)\\n* [Q&A](/?q=What+is+the+capital+of+France%3F&ia=qa)\\n* More\\n\\n [News](/?q=What+is+the+capital+of+France%3F&ia=news&iar=news)\\n\\n [Maps](/?q=What+is+the+capital+of+France%3F&iaxm=maps)\\n\\n* [Assist](/?q=What+is+the+capital+of+France%3F&ia=web&assist=false)\\n* [Duck.ai](/?q=What+is+the+capital+of+France%3F&ia=chat)\\n* Search Settings\\n\\nYou are being redirected to the non-JavaScript site.\\n\\nClick [here](/html/?q=What%20is%20the%20capital%20of%20France%3F) if it doesn\\'t happen automatically.\\n\\nAlways protected\\n\\nDuckDuckGo never tracks your searches.\\n\\n[Learn More](https://duckduckgo.com/duckduckgo-help-pages/search-privacy/)\\n\\nYou can hide this reminder in [Search Settings](/settings#appearance)\\n\\nIndia (en)\\n\\nRecent:\\n\\nClear All\\n\\nIndia (en)\\n\\nAll regions\\n\\nArgentina\\n\\nAustralia\\n\\nAustria\\n\\nBelgium (fr)\\n\\nBelgium (nl)\\n\\nBrazil\\n\\nBulgaria\\n\\nCanada (en)\\n\\nCanada (fr)\\n\\nCatalonia\\n\\nChile\\n\\nChina\\n\\nColombia\\n\\nCroatia\\n\\nCzechia\\n\\nDenmark\\n\\nEstonia\\n\\nFinland\\n\\nFrance\\n\\nGermany\\n\\nGreece\\n\\nHong Kong\\n\\nHungary\\n\\nIceland\\n\\nIndonesia (en)\\n\\nIreland\\n\\nIsrael (en)\\n\\nItaly\\n\\nJapan\\n\\nKorea\\n\\nLatvia\\n\\nLithuania\\n\\nMalaysia (en)\\n\\nMexico\\n\\nNetherlands\\n\\nNew Zealand\\n\\nNorway\\n\\nPakistan (en)\\n\\nPeru\\n\\nPhilippines (en)\\n\\nPoland\\n\\nPortugal\\n\\nRomania\\n\\nRussia\\n\\nSaudi Arabia\\n\\nSingapore\\n\\nSlovakia\\n\\nSlovenia\\n\\nSouth Africa\\n\\nSpain (ca)\\n\\nSpain (es)\\n\\nSweden\\n\\nSwitzerland (de)\\n\\nSwitzerland (fr)\\n\\nTaiwan\\n\\nThailand (en)\\n\\nTurkey\\n\\nUkraine\\n\\nUnited Kingdom\\n\\nUS (English)\\n\\nUS (Spanish)\\n\\nVietnam (en)\\n\\nSafe search: moderate\\n\\nStrict\\n\\nModerate\\n\\nOff\\n\\nAny time\\n\\nAny time\\n\\nPast day\\n\\nPast week\\n\\nPast month\\n\\nPast year\\n\\nCustom date range\\n\\n1. Assist\\n\\n The capital of France is Paris. It is the largest city in the country and a major center for culture, finance, and diplomacy.\\n\\n [ Wikipedia](https://en.wikipedia.org/wiki/Paris)[ Encyclopedia Britannica](https://www.britannica.com/video/video-production-overview-city-Paris-Encyclopaedia-Britannica-1994/-68351)\\n\\n Auto-generated based on listed sources. May contain inaccuracies.\\n\\n ShowNeverSometimesOften\\n\\n Chat\\n\\n Was this helpful?\\n2. en.wikipedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Aen.wikipedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Aen.wikipedia.org)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:en.wikipedia.org \"Search domain en.wikipedia.org\")\\n\\n Wikipedia\\n\\n [https://en.wikipedia.org\\xa0βΊ\\xa0wiki βΊ Paris](https://en.wikipedia.org/wiki/Paris)\\n\\n [Paris - Wikipedia](https://en.wikipedia.org/wiki/Paris)\\n --------------------------------------------------------\\n\\n Paris (French pronunciation: [paΚi] **β)** **is** **the** **capital** and largest city of **France**. With an estimated population of 2,048,472 residents in January 2025 [3] in an area of more than 105 km 2 (41 sq mi), [4] Paris is the fourth-most populous city in the European Union and the 30th most densely populated city in the world in 2022. [5] Since the 17th century, Paris has been one of the world\\'s ...\\n3. britannica.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.britannica.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.britannica.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.britannica.com \"Search domain britannica.com\")\\n\\n Britannica\\n\\n [https://www.britannica.com\\xa0βΊ\\xa0place βΊ Paris](https://www.britannica.com/place/Paris)\\n\\n [Paris | Definition, Map, Population, Facts, & History | Britannica](https://www.britannica.com/place/Paris)\\n ------------------------------------------------------------------------------------------------------------\\n\\n Jun 5, 2025Paris, city and **capital** **of** **France**, located along the Seine River, in the north-central part of the country. Paris is one of the world\\'s most important and attractive cities, famed for its gastronomy, haute couture, painting, literature, and intellectual community. Learn more about Paris in this article.\\n4. worldatlas.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.worldatlas.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.worldatlas.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.worldatlas.com \"Search domain worldatlas.com\")\\n\\n WorldAtlas\\n\\n [https://www.worldatlas.com\\xa0βΊ\\xa0articles βΊ what-is-the-capital-of-france.html](https://www.worldatlas.com/articles/what-is-the-capital-of-france.html)\\n\\n [What is the Capital of France? - WorldAtlas](https://www.worldatlas.com/articles/what-is-the-capital-of-france.html)\\n ---------------------------------------------------------------------------------------------------------------------\\n\\n Learn about Paris, the largest and most populous city in **France**, and its history, geography, economy, tourism, and administration. Find out why Paris is called the City of Light and the City of Love.\\n5. mappr.co\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.mappr.co)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.mappr.co)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.mappr.co \"Search domain mappr.co\")\\n\\n Mappr\\n\\n [https://www.mappr.co\\xa0βΊ\\xa0capital-cities βΊ france](https://www.mappr.co/capital-cities/france/)\\n\\n [What is the Capital of France? - Mappr](https://www.mappr.co/capital-cities/france/)\\n -------------------------------------------------------------------------------------\\n\\n Learn why Paris is the **capital** **of** **France** and how it became a global city with a rich cultural heritage. Discover its geography, climate, population, landmarks, and industries.\\n6. wikiwand.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.wikiwand.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.wikiwand.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.wikiwand.com \"Search domain wikiwand.com\")\\n\\n Wikiwand\\n\\n [https://www.wikiwand.com\\xa0βΊ\\xa0en βΊ articles βΊ Paris](https://www.wikiwand.com/en/articles/Paris)\\n\\n [Paris - Wikiwand](https://www.wikiwand.com/en/articles/Paris)\\n --------------------------------------------------------------\\n\\n Paris is the **capital** and largest city of **France**. With an estimated population of 2,048,472 residents in January 2025 in an area of more than 105 km2 (41 sq mi),...\\n7. theworldcountries.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Atheworldcountries.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Atheworldcountries.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:theworldcountries.com \"Search domain theworldcountries.com\")\\n\\n The World Countries\\n\\n [https://theworldcountries.com\\xa0βΊ\\xa0place βΊ paris](https://theworldcountries.com/place/paris/)\\n\\n [Paris - capital city of France - The World Countries](https://theworldcountries.com/place/paris/)\\n --------------------------------------------------------------------------------------------------\\n\\n Learn about the history, geography, culture, and attractions of Paris, the city and **capital** **of** **France**. Find out why Paris is called the City of Light and explore its landmarks, museums, and parks.\\n8. simple.wikipedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Asimple.wikipedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Asimple.wikipedia.org)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:simple.wikipedia.org \"Search domain simple.wikipedia.org\")\\n\\n Wikipedia\\n\\n [https://simple.wikipedia.org\\xa0βΊ\\xa0wiki βΊ Paris](https://simple.wikipedia.org/wiki/Paris)\\n\\n [Paris - Simple English Wikipedia, the free encyclopedia](https://simple.wikipedia.org/wiki/Paris)\\n --------------------------------------------------------------------------------------------------\\n\\n Paris is the **capital** city of **France** and the largest city in **France**. It has a rich history, many art museums, historical buildings, and a famous landmark, the Eiffel Tower.\\n9. countryaah.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.countryaah.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.countryaah.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.countryaah.com \"Search domain countryaah.com\")\\n\\n Countryaah.com\\n\\n [https://www.countryaah.com\\xa0βΊ\\xa0france-faqs](https://www.countryaah.com/france-faqs/)\\n\\n [What is the Capital of France? Paris - Countryaah.com](https://www.countryaah.com/france-faqs/)\\n ------------------------------------------------------------------------------------------------\\n\\n Learn about Paris, the **capital** city of **France**, and its rich history, culture, and landmarks. Find out how Paris became the political and administrative center of **France** and why it is called \"**The** City of Light\".\\n10. newworldencyclopedia.org\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Awww.newworldencyclopedia.org)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Awww.newworldencyclopedia.org)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:www.newworldencyclopedia.org \"Search domain newworldencyclopedia.org\")\\n\\n New World Encyclopedia\\n\\n [https://www.newworldencyclopedia.org\\xa0βΊ\\xa0entry βΊ Paris,\\\\_France](https://www.newworldencyclopedia.org/entry/Paris,_France)\\n\\n [Paris, France - New World Encyclopedia](https://www.newworldencyclopedia.org/entry/Paris,_France)\\n --------------------------------------------------------------------------------------------------\\n\\n Paris is the **capital** city of **France**, situated on the River Seine, in northern **France**, at the heart of the Γle-de-**France** region. Learn about its history, culture, landmarks, and attractions in this comprehensive article.\\n11. isolatedtraveller.com\\n\\n [Only include results for this site](?q=What%20is%20the%20capital%20of%20France%3F%20site%3Aisolatedtraveller.com)[Hide site from these results](?q=What%20is%20the%20capital%20of%20France%3F%20-site%3Aisolatedtraveller.com)\\n\\n Share feedback about this site\\n\\n [](/?q=What%20is%20the%20capital%20of%20France%3F+site:isolatedtraveller.com \"Search domain isolatedtraveller.com\")\\n\\n Isolated Traveller\\n\\n [https://isolatedtraveller.com\\xa0βΊ\\xa0what-is-the-capital-city-of-france](https://isolatedtraveller.com/what-is-the-capital-city-of-france/)\\n\\n [What Is The Capital City Of France? | Isolated Traveller](https://isolatedtraveller.com/what-is-the-capital-city-of-france/)\\n -----------------------------------------------------------------------------------------------------------------------------\\n\\n Learn about the history, population, landmarks and role of Paris as the **capital** city of **France**. Find out how Paris became the French **capital** in 1944 and what international organizations have their headquarters there.\\n\\nMore results\\n\\n1. \\n\\n Directions[Paris\\n -----](https://en.wikipedia.org/wiki/Paris)\\n\\n Capital city and largest city of France\\n\\n β’ [paris.fr](https://paris.fr)\\n\\n Paris is the capital and largest city of France. With an estimated population of 2,048,472 residents in January 2025 in an area of more than 105 kmΒ², Paris is the fourth-most populous city in the European Union and the 30th most densely populated city in the world in 2022. Since the 17th century, Paris has been one of the world\\'s major centres of finance, diplomacy, commerce, culture, fashion, and gastronomy. Because of its leading role in the arts and sciences and its early adaptation of extensive street lighting, Paris became known as the City of Light in the 19th century. The City of Paris is the centre of the Γle-de-France region, or Paris Region, with an official estimated population of 12,271,794 inhabitants in January 2023, or about 19% of the population of France. The Paris Region had a nominal GDP of β¬765 billion in 2021, the highest in the European Union. [Wikipedia](https://en.wikipedia.org/wiki/Paris)\\n\\n | | |\\n | --- | --- |\\n | Country | France |\\n | Arrondissement | None |\\n | Intercommunality | MΓ©tropole du Grand Paris |\\n\\n [Website](https://paris.fr)[Wikipedia](https://en.wikipedia.org/wiki/Paris)[Instagram](https://instagram.com/paris_maville)[Facebook](https://facebook.com/paris)\\n\\n Was this helpful?\\n2. Searches related to **What is the capital of France?**\\n\\n Related Searches\\n\\n 1. [**explain** capital of france\\u200b](?q=explain%20capital%20of%20france)\\n 2. [capital of france **during** **ww2**\\u200b](?q=capital%20of%20france%20during%20ww2)\\n 3. [capital **city** of france **facts**\\u200b](?q=capital%20city%20of%20france%20facts)\\n 4. [**biggest** **city** **in** france capital\\u200b](?q=biggest%20city%20in%20france%20capital)\\n 1. [what is france\\'**s** capital **city**\\u200b](?q=what%20is%20france%27s%20capital%20city)\\n 2. [what **region** is **paris** **located**\\u200b](?q=what%20region%20is%20paris%20located)\\n 3. [**largest** **city** **in** france capital\\u200b](?q=largest%20city%20in%20france%20capital)\\n 4. [capital of france **in** **french**\\u200b](?q=capital%20of%20france%20in%20french)\\n\\nClose menu\\n\\nUpgrade to our Private Browser\\n\\nFast. Secure. Free.\\n\\n[Install Windows Browser](/windows)\\n\\n+ Search\\n+ [Homepage](https://start.duckduckgo.com/)\\n+ [Themes](/settings#appearance)\\n+ [Settings](/settings)\\n\\n+ Share Feedback\\n\\n+ Downloads\\n+ [iOS Browser](https://apps.apple.com/app/duckduckgo-private-browser/id663592361?platform=iphone&pt=866401&mt=8&ct=serp-atb-serp)\\n+ [Android Browser](https://play.google.com/store/apps/details?id=com.duckduckgo.mobile.android&referrer=utm_campaign%3Dserp-atb-serp%26origin%3Dfunnel_playstore_searchresults)\\n+ [Mac Browser](/mac?origin=funnel_browser_searchresults)\\n+ [Windows Browser](/windows?origin=funnel_browser_searchresults)\\n+ [Browser Extensions](/duckduckgo-help-pages/desktop/adding-duckduckgo-to-your-browser/)\\n\\n+ More From DuckDuckGo\\n+ [Duck.ai](https://duck.ai)\\n\\n NEW\\n+ [Email Protection](/email)\\n+ [Newsletter](/newsletter)\\n+ [Blog](/blog)\\n\\n+ Learn More\\n+ [Whatβs New](/updates)\\n+ [Compare Privacy](/compare-privacy)\\n+ [About Our Browser](/app)\\n+ [About DuckDuckGo](/about)\\n\\n+ Other Resources\\n+ [Help](/duckduckgo-help-pages)\\n+ [Community](https://www.reddit.com/r/duckduckgo/)\\n+ [Careers](/careers)\\n+ [Privacy Policy](/privacy)\\n+ [Terms of Service](/terms)\\n+ [Press Kit](/press)\\n+ [Advertise on Search](/duckduckgo-help-pages/company/advertise-on-duckduckgo-search)\\n\\n### Get Our Windows Browser\\n\\nProtect your data as you search and browse.\\n\\n[Download](https://duckduckgo.com/windows?origin=funnel_browser_searchresults__footercard)\\n\\nShare Feedback\\n\\nCustom date rangeX'"
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
"execution_count": 8,
|
| 67 |
+
"metadata": {},
|
| 68 |
+
"output_type": "execute_result"
|
| 69 |
+
}
|
| 70 |
+
],
|
| 71 |
+
"source": [
|
| 72 |
+
"web_search(\"What is the capital of France?\")"
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"cell_type": "code",
|
| 77 |
+
"execution_count": null,
|
| 78 |
+
"id": "7d282ad1",
|
| 79 |
+
"metadata": {},
|
| 80 |
+
"outputs": [],
|
| 81 |
+
"source": [
|
| 82 |
+
"import yt_dlp\n",
|
| 83 |
+
"def transcribe_youtube_video(video_url: str) -> str:\n",
|
| 84 |
+
" \"\"\"Transcribe a YouTube video using yt-dlp and Whisper.\"\"\"\n",
|
| 85 |
+
" ydl_opts = {'format': 'm4a/bestaudio/best',\n",
|
| 86 |
+
" 'outtmpl': 'audio.m4a',\n",
|
| 87 |
+
" 'key': 'FFmpegExtractAudio',\n",
|
| 88 |
+
" 'preferredcodec': 'm4a',\n",
|
| 89 |
+
" }\n",
|
| 90 |
+
" with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n",
|
| 91 |
+
" info = ydl.extract_info(video_url)\n",
|
| 92 |
+
" captions = info.get(\"automatic_captions\", {})\n",
|
| 93 |
+
" if \"en\" in captions:\n",
|
| 94 |
+
" captions = captions[\"en\"]\n",
|
| 95 |
+
" for caption in captions:\n",
|
| 96 |
+
" if caption.get(\"ext\", \"\") == \"srt\":\n",
|
| 97 |
+
" url = caption.get(\"url\", \"\")\n",
|
| 98 |
+
" return requests.get(url).text\n",
|
| 99 |
+
"\n",
|
| 100 |
+
" ydl.download(video_url)\n",
|
| 101 |
+
"\n",
|
| 102 |
+
" transcript = audio_model.transcribe(\"audio.m4a\")\n",
|
| 103 |
+
" return transcript[\"text\"]"
|
| 104 |
+
]
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"cell_type": "code",
|
| 108 |
+
"execution_count": 24,
|
| 109 |
+
"id": "4304e783",
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [
|
| 112 |
+
{
|
| 113 |
+
"name": "stdout",
|
| 114 |
+
"output_type": "stream",
|
| 115 |
+
"text": [
|
| 116 |
+
"[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
|
| 117 |
+
"[youtube] 1htKBjuUWec: Downloading webpage\n",
|
| 118 |
+
"[youtube] 1htKBjuUWec: Downloading tv client config\n",
|
| 119 |
+
"[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
|
| 120 |
+
"[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
|
| 121 |
+
"[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
|
| 122 |
+
"[info] 1htKBjuUWec: Downloading 1 format(s): 140\n",
|
| 123 |
+
"[download] audio.m4a has already been downloaded\n",
|
| 124 |
+
"[download] 100% of 463.20KiB\n",
|
| 125 |
+
"[youtube] Extracting URL: https://www.youtube.com/watch?v=1htKBjuUWec\n",
|
| 126 |
+
"[youtube] 1htKBjuUWec: Downloading webpage\n",
|
| 127 |
+
"[youtube] 1htKBjuUWec: Downloading tv client config\n",
|
| 128 |
+
"[youtube] 1htKBjuUWec: Downloading tv player API JSON\n",
|
| 129 |
+
"[youtube] 1htKBjuUWec: Downloading ios player API JSON\n",
|
| 130 |
+
"[youtube] 1htKBjuUWec: Downloading m3u8 information\n",
|
| 131 |
+
"[info] 1htKBjuUWec: Downloading 1 format(s): 140\n",
|
| 132 |
+
"[download] audio.m4a has already been downloaded\n",
|
| 133 |
+
"[download] 100% of 463.20KiB\n"
|
| 134 |
+
]
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"name": "stderr",
|
| 138 |
+
"output_type": "stream",
|
| 139 |
+
"text": [
|
| 140 |
+
"x:\\Python Projects\\HF_Agents_Course_Final_Assignment\\.venv\\Lib\\site-packages\\whisper\\transcribe.py:126: UserWarning: FP16 is not supported on CPU; using FP32 instead\n",
|
| 141 |
+
" warnings.warn(\"FP16 is not supported on CPU; using FP32 instead\")\n"
|
| 142 |
+
]
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"data": {
|
| 146 |
+
"text/plain": [
|
| 147 |
+
"\" Wow, this coffee's great. I was just thinking that. Yeah, is that cinnamon? It's chicory. Chicory. Teal'c? Isn't that hot? Extremely.\""
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
"execution_count": 24,
|
| 151 |
+
"metadata": {},
|
| 152 |
+
"output_type": "execute_result"
|
| 153 |
+
}
|
| 154 |
+
],
|
| 155 |
+
"source": [
|
| 156 |
+
"transcribe_youtube_video(\"https://www.youtube.com/watch?v=1htKBjuUWec\")"
|
| 157 |
+
]
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"cell_type": "code",
|
| 161 |
+
"execution_count": null,
|
| 162 |
+
"id": "d650487e",
|
| 163 |
+
"metadata": {},
|
| 164 |
+
"outputs": [
|
| 165 |
+
{
|
| 166 |
+
"name": "stderr",
|
| 167 |
+
"output_type": "stream",
|
| 168 |
+
"text": [
|
| 169 |
+
"100%|βββββββββββββββββββββββββββββββββββββ| 1.51G/1.51G [13:03<00:00, 2.07MiB/s]\n"
|
| 170 |
+
]
|
| 171 |
+
}
|
| 172 |
+
],
|
| 173 |
+
"source": [
|
| 174 |
+
"def parse_doc_file(file_url: str) -> str:\n",
|
| 175 |
+
" \"\"\"\n",
|
| 176 |
+
" Parse any file and return its content.\n",
|
| 177 |
+
" \"\"\"\n",
|
| 178 |
+
" try:\n",
|
| 179 |
+
" response = requests.get(file_url)\n",
|
| 180 |
+
" response.raise_for_status()\n",
|
| 181 |
+
" elements = partition(file=io.BytesIO(response.content), include_page_breaks=True)\n",
|
| 182 |
+
" return \"\\n\\n\".join([str(el) for el in elements])\n",
|
| 183 |
+
" except Exception as e:\n",
|
| 184 |
+
" return f\"Failed to fetch file: {e}\"\n",
|
| 185 |
+
"\n",
|
| 186 |
+
"\n",
|
| 187 |
+
"def parse_audio_file(file_url: str) -> str:\n",
|
| 188 |
+
" \"\"\"\n",
|
| 189 |
+
" Parse an audio file and return its content.\n",
|
| 190 |
+
" \"\"\"\n",
|
| 191 |
+
" try:\n",
|
| 192 |
+
" response = requests.get(file_url)\n",
|
| 193 |
+
" response.raise_for_status()\n",
|
| 194 |
+
" return audio_model.transcribe(io.BytesIO(response.content))['text']\n",
|
| 195 |
+
" except Exception as e:\n",
|
| 196 |
+
" return f\"Failed to fetch file: {e}\""
|
| 197 |
+
]
|
| 198 |
+
},
|
| 199 |
+
{
|
| 200 |
+
"cell_type": "code",
|
| 201 |
+
"execution_count": 3,
|
| 202 |
+
"id": "4000380f",
|
| 203 |
+
"metadata": {},
|
| 204 |
+
"outputs": [
|
| 205 |
+
{
|
| 206 |
+
"ename": "NameError",
|
| 207 |
+
"evalue": "name 'parse_audio_file' is not defined",
|
| 208 |
+
"output_type": "error",
|
| 209 |
+
"traceback": [
|
| 210 |
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
| 211 |
+
"\u001b[31mNameError\u001b[39m Traceback (most recent call last)",
|
| 212 |
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mparse_audio_file\u001b[49m(\u001b[33m\"\u001b[39m\u001b[33mhttps://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3\u001b[39m\u001b[33m\"\u001b[39m)\n",
|
| 213 |
+
"\u001b[31mNameError\u001b[39m: name 'parse_audio_file' is not defined"
|
| 214 |
+
]
|
| 215 |
+
}
|
| 216 |
+
],
|
| 217 |
+
"source": [
|
| 218 |
+
"parse_audio_file(\"https://agents-course-unit4-scoring.hf.space/files/1f975693-876d-457b-a649-393859e79bf3\")"
|
| 219 |
+
]
|
| 220 |
+
},
|
| 221 |
+
{
|
| 222 |
+
"cell_type": "code",
|
| 223 |
+
"execution_count": null,
|
| 224 |
+
"id": "9670ed13",
|
| 225 |
+
"metadata": {},
|
| 226 |
+
"outputs": [
|
| 227 |
+
{
|
| 228 |
+
"data": {
|
| 229 |
+
"text/plain": [
|
| 230 |
+
"161996"
|
| 231 |
+
]
|
| 232 |
+
},
|
| 233 |
+
"execution_count": 15,
|
| 234 |
+
"metadata": {},
|
| 235 |
+
"output_type": "execute_result"
|
| 236 |
+
}
|
| 237 |
+
],
|
| 238 |
+
"source": [
|
| 239 |
+
"len(text)"
|
| 240 |
+
]
|
| 241 |
+
},
|
| 242 |
+
{
|
| 243 |
+
"cell_type": "code",
|
| 244 |
+
"execution_count": null,
|
| 245 |
+
"id": "c8f56eff",
|
| 246 |
+
"metadata": {},
|
| 247 |
+
"outputs": [
|
| 248 |
+
{
|
| 249 |
+
"name": "stdout",
|
| 250 |
+
"output_type": "stream",
|
| 251 |
+
"text": [
|
| 252 |
+
"115 \n",
|
| 253 |
+
"116 Toggle Discography subsection\n",
|
| 254 |
+
"117 + [5.1\n",
|
| 255 |
+
"118 Studio albums](#Studio_albums)\n",
|
| 256 |
+
"119 + [5.2\n",
|
| 257 |
+
"120 EPs](#EPs)\n",
|
| 258 |
+
"121 + [5.3\n",
|
| 259 |
+
"122 Live albums](#Live_albums)\n",
|
| 260 |
+
"123 + [5.4\n",
|
| 261 |
+
"124 Compilation albums](#Compilation_albums)\n",
|
| 262 |
+
"125 * [6\n",
|
| 263 |
+
"126 Filmography](#Filmography)\n",
|
| 264 |
+
"127 * [7\n",
|
| 265 |
+
"128 Further reading](#Further_reading)\n",
|
| 266 |
+
"\n",
|
| 267 |
+
"341 \n",
|
| 268 |
+
"342 Sosa recorded forty albums.[[4]](#cite_note-Legendary_folk_singer_Mercedes_Sosa_dies_at_74-4)[[9]](#cite_note-Latin_artist_Mercedes_Sosa_dies-9)\n",
|
| 269 |
+
"343 \n",
|
| 270 |
+
"344 ### Studio albums\n",
|
| 271 |
+
"345 \n",
|
| 272 |
+
"346 [[edit](/w/index.php?title=Mercedes_Sosa&action=edit§ion=6 \"Edit section: Studio albums\")]\n",
|
| 273 |
+
"347 \n",
|
| 274 |
+
"348 | Year | Album details |\n",
|
| 275 |
+
"349 | --- | --- |\n",
|
| 276 |
+
"350 | 1962 | [La Voz De La Zafra](/wiki/La_Voz_De_La_Zafra \"La Voz De La Zafra\") * Label: RCA |\n",
|
| 277 |
+
"351 | 1965 | Canciones Con Fundamento * Label: El Grillo |\n",
|
| 278 |
+
"352 | 1966 | Hermano * Label: Philips |\n",
|
| 279 |
+
"353 | 1966 | Yo No Canto Por Cantar * Label: Philips |\n",
|
| 280 |
+
"354 | 1967 | Para Cantarle A Mi Gente * Label: Philips |\n",
|
| 281 |
+
"\n",
|
| 282 |
+
"343 \n",
|
| 283 |
+
"344 ### Studio albums\n",
|
| 284 |
+
"345 \n",
|
| 285 |
+
"346 [[edit](/w/index.php?title=Mercedes_Sosa&action=edit§ion=6 \"Edit section: Studio albums\")]\n",
|
| 286 |
+
"347 \n",
|
| 287 |
+
"348 | Year | Album details |\n",
|
| 288 |
+
"349 | --- | --- |\n",
|
| 289 |
+
"350 | 1962 | [La Voz De La Zafra](/wiki/La_Voz_De_La_Zafra \"La Voz De La Zafra\") * Label: RCA |\n",
|
| 290 |
+
"351 | 1965 | Canciones Con Fundamento * Label: El Grillo |\n",
|
| 291 |
+
"352 | 1966 | Hermano * Label: Philips |\n",
|
| 292 |
+
"353 | 1966 | Yo No Canto Por Cantar * Label: Philips |\n",
|
| 293 |
+
"354 | 1967 | Para Cantarle A Mi Gente * Label: Philips |\n",
|
| 294 |
+
"355 | 1968 | Con Sabor A Mercedes Sosa * Label: Philips |\n",
|
| 295 |
+
"356 | 1969 | Mujeres Argentinas * Label: Philips |\n",
|
| 296 |
+
"\n"
|
| 297 |
+
]
|
| 298 |
+
}
|
| 299 |
+
],
|
| 300 |
+
"source": [
|
| 301 |
+
"import re\n",
|
| 302 |
+
"lines = text.splitlines()\n",
|
| 303 |
+
"for i, line in enumerate(lines):\n",
|
| 304 |
+
" if re.search(r'Studio albums', line):\n",
|
| 305 |
+
" print(i-3, lines[i-3])\n",
|
| 306 |
+
" print(i-2, lines[i-2])\n",
|
| 307 |
+
" print(i-1, lines[i-1])\n",
|
| 308 |
+
" print(i, line)\n",
|
| 309 |
+
" print(i+1, lines[i+1])\n",
|
| 310 |
+
" print(i+2, lines[i+2])\n",
|
| 311 |
+
" print(i+3, lines[i+3])\n",
|
| 312 |
+
" print(i+4, lines[i+4])\n",
|
| 313 |
+
" print(i+5, lines[i+5])\n",
|
| 314 |
+
" print(i+6, lines[i+6])\n",
|
| 315 |
+
" print(i+7, lines[i+7])\n",
|
| 316 |
+
" print(i+8, lines[i+8])\n",
|
| 317 |
+
" print(i+9, lines[i+9])\n",
|
| 318 |
+
" print(i+10, lines[i+10])\n",
|
| 319 |
+
" print()"
|
| 320 |
+
]
|
| 321 |
+
},
|
| 322 |
+
{
|
| 323 |
+
"cell_type": "code",
|
| 324 |
+
"execution_count": null,
|
| 325 |
+
"id": "a904e623",
|
| 326 |
+
"metadata": {},
|
| 327 |
+
"outputs": [],
|
| 328 |
+
"source": []
|
| 329 |
+
}
|
| 330 |
+
],
|
| 331 |
+
"metadata": {
|
| 332 |
+
"kernelspec": {
|
| 333 |
+
"display_name": ".venv",
|
| 334 |
+
"language": "python",
|
| 335 |
+
"name": "python3"
|
| 336 |
+
},
|
| 337 |
+
"language_info": {
|
| 338 |
+
"codemirror_mode": {
|
| 339 |
+
"name": "ipython",
|
| 340 |
+
"version": 3
|
| 341 |
+
},
|
| 342 |
+
"file_extension": ".py",
|
| 343 |
+
"mimetype": "text/x-python",
|
| 344 |
+
"name": "python",
|
| 345 |
+
"nbconvert_exporter": "python",
|
| 346 |
+
"pygments_lexer": "ipython3",
|
| 347 |
+
"version": "3.12.4"
|
| 348 |
+
}
|
| 349 |
+
},
|
| 350 |
+
"nbformat": 4,
|
| 351 |
+
"nbformat_minor": 5
|
| 352 |
+
}
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|