|
|
from dotenv import load_dotenv |
|
|
|
|
|
import httpx |
|
|
import urllib.parse |
|
|
from typing import Self |
|
|
from src.libs.helper_functions import convert_to_snakecase |
|
|
from src.databases.redis import REDIS_CACHED |
|
|
from src.libs.constants import ONE_MINUTE_IN_SECONDS |
|
|
from src.libs.constants import JINA_READER_BASE_ENDPOINT, JINA_SEARCH_BASE_ENDPOINT, ONE_MINUTE_IN_SECONDS |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
redis_cache = REDIS_CACHED |
|
|
|
|
|
class JinaAI: |
|
|
""" |
|
|
A class for interacting with Jina AI's search and reader services. |
|
|
|
|
|
Attributes: |
|
|
JINA_SEARCH_BASE_ENDPOINT (str): The base URL for the Jina AI search service. |
|
|
JINA_READER_BASE_ENDPOINT (str): The base URL for the Jina AI reader service. |
|
|
|
|
|
Methods: |
|
|
__init__(self, search_base_url: str = None, reader_base_url: str = None) -> None: |
|
|
Initialize the JinaAI instance with optional search and reader base URLs. |
|
|
|
|
|
search_web_with_jina(self, search_query: str) -> dict | None: |
|
|
Search the web using Jina AI and return the search results as a dictionary. |
|
|
|
|
|
read_website_with_jina(self, website_url: str) -> dict | None: |
|
|
Read a website using Jina AI and return the website content as a dictionary. |
|
|
""" |
|
|
def __init__(self, search_base_url: str = None, reader_base_url: str = None) -> None: |
|
|
self.JINA_SEARCH_BASE_ENDPOINT = search_base_url or JINA_SEARCH_BASE_ENDPOINT |
|
|
self.JINA_READER_BASE_ENDPOINT = reader_base_url or JINA_READER_BASE_ENDPOINT |
|
|
|
|
|
@redis_cache(ttl=ONE_MINUTE_IN_SECONDS) |
|
|
def search_web_with_jina(self, search_query: str) -> dict | None: |
|
|
""" |
|
|
Search the web using Jina AI. |
|
|
|
|
|
Args: |
|
|
search_query (str): The query to be searched on the web. |
|
|
|
|
|
Returns: |
|
|
dict | None: A dictionary containing the search results if successful, otherwise None. |
|
|
|
|
|
Raises: |
|
|
httpx.HTTPError: If an HTTP error occurs during the request. |
|
|
|
|
|
Usage: |
|
|
jina_ai_instance.search_web_with_jina(search_query) |
|
|
|
|
|
Example: |
|
|
jina_ai_instance.search_web_with_jina("example search query") |
|
|
""" |
|
|
url = self.JINA_SEARCH_BASE_ENDPOINT |
|
|
encoded_search_query = urllib.parse.quote(search_query) |
|
|
headers = {"Accept": "application/json"} |
|
|
|
|
|
try: |
|
|
with httpx.Client(timeout=30.0) as client: |
|
|
response = client.get(f"{url}{encoded_search_query}", headers=headers) |
|
|
response.raise_for_status() |
|
|
return convert_to_snakecase(response.json()) |
|
|
except httpx.HTTPError as e: |
|
|
print(f"An error occurred: {e}") |
|
|
return None |
|
|
|
|
|
@redis_cache(ttl=ONE_MINUTE_IN_SECONDS) |
|
|
def read_website_with_jina(self, website_url: str) -> dict | None: |
|
|
""" |
|
|
Read a website using Jina AI. |
|
|
|
|
|
Args: |
|
|
website_url (str): The URL of the website to be read. |
|
|
|
|
|
Returns: |
|
|
dict | None: A dictionary containing the content of the website if successful, otherwise None. |
|
|
|
|
|
Raises: |
|
|
httpx.HTTPError: If an HTTP error occurs during the request. |
|
|
|
|
|
Usage: |
|
|
jina_ai_instance.read_website_with_jina(website_url) |
|
|
|
|
|
Example: |
|
|
jina_ai_instance.read_website_with_jina("https://example.com") |
|
|
""" |
|
|
url = self.JINA_READER_BASE_ENDPOINT |
|
|
headers = {"Accept": "application/json"} |
|
|
|
|
|
try: |
|
|
with httpx.Client(timeout=30.0) as client: |
|
|
response = client.get(f"{url}{website_url}", headers=headers) |
|
|
response.raise_for_status() |
|
|
return convert_to_snakecase(response.json()) |
|
|
except httpx.HTTPError as e: |
|
|
print(f"An error occurred: {e}") |
|
|
return None |