Ali Abid
first commit
0534c31
"""HTML processing functions"""
from __future__ import annotations
from bs4 import BeautifulSoup
from requests.compat import urljoin
def extract_hyperlinks(soup: BeautifulSoup, base_url: str) -> list[tuple[str, str]]:
"""Extract hyperlinks from a BeautifulSoup object
Args:
soup (BeautifulSoup): The BeautifulSoup object
base_url (str): The base URL
Returns:
List[Tuple[str, str]]: The extracted hyperlinks
"""
return [
(link.text, urljoin(base_url, link["href"]))
for link in soup.find_all("a", href=True)
]
def format_hyperlinks(hyperlinks: list[tuple[str, str]]) -> list[str]:
"""Format hyperlinks to be displayed to the user
Args:
hyperlinks (List[Tuple[str, str]]): The hyperlinks to format
Returns:
List[str]: The formatted hyperlinks
"""
return [f"{link_text} ({link_url})" for link_text, link_url in hyperlinks]