app / src /search_services /jina_ai.py
lemdaddy's picture
first commit
9c400b9
raw
history blame
3.73 kB
from dotenv import load_dotenv
import httpx
import urllib.parse
from typing import Self
from src.libs.helper_functions import convert_to_snakecase
from src.databases.redis import REDIS_CACHED
from src.libs.constants import ONE_MINUTE_IN_SECONDS
from src.libs.constants import JINA_READER_BASE_ENDPOINT, JINA_SEARCH_BASE_ENDPOINT, ONE_MINUTE_IN_SECONDS
load_dotenv()
redis_cache = REDIS_CACHED
class JinaAI:
"""
A class for interacting with Jina AI's search and reader services.
Attributes:
JINA_SEARCH_BASE_ENDPOINT (str): The base URL for the Jina AI search service.
JINA_READER_BASE_ENDPOINT (str): The base URL for the Jina AI reader service.
Methods:
__init__(self, search_base_url: str = None, reader_base_url: str = None) -> None:
Initialize the JinaAI instance with optional search and reader base URLs.
search_web_with_jina(self, search_query: str) -> dict | None:
Search the web using Jina AI and return the search results as a dictionary.
read_website_with_jina(self, website_url: str) -> dict | None:
Read a website using Jina AI and return the website content as a dictionary.
"""
def __init__(self, search_base_url: str = None, reader_base_url: str = None) -> None:
self.JINA_SEARCH_BASE_ENDPOINT = search_base_url or JINA_SEARCH_BASE_ENDPOINT
self.JINA_READER_BASE_ENDPOINT = reader_base_url or JINA_READER_BASE_ENDPOINT
@redis_cache(ttl=ONE_MINUTE_IN_SECONDS)
def search_web_with_jina(self, search_query: str) -> dict | None:
"""
Search the web using Jina AI.
Args:
search_query (str): The query to be searched on the web.
Returns:
dict | None: A dictionary containing the search results if successful, otherwise None.
Raises:
httpx.HTTPError: If an HTTP error occurs during the request.
Usage:
jina_ai_instance.search_web_with_jina(search_query)
Example:
jina_ai_instance.search_web_with_jina("example search query")
"""
url = self.JINA_SEARCH_BASE_ENDPOINT
encoded_search_query = urllib.parse.quote(search_query)
headers = {"Accept": "application/json"}
try:
with httpx.Client(timeout=30.0) as client:
response = client.get(f"{url}{encoded_search_query}", headers=headers)
response.raise_for_status()
return convert_to_snakecase(response.json())
except httpx.HTTPError as e:
print(f"An error occurred: {e}")
return None
@redis_cache(ttl=ONE_MINUTE_IN_SECONDS)
def read_website_with_jina(self, website_url: str) -> dict | None:
"""
Read a website using Jina AI.
Args:
website_url (str): The URL of the website to be read.
Returns:
dict | None: A dictionary containing the content of the website if successful, otherwise None.
Raises:
httpx.HTTPError: If an HTTP error occurs during the request.
Usage:
jina_ai_instance.read_website_with_jina(website_url)
Example:
jina_ai_instance.read_website_with_jina("https://example.com")
"""
url = self.JINA_READER_BASE_ENDPOINT
headers = {"Accept": "application/json"}
try:
with httpx.Client(timeout=30.0) as client:
response = client.get(f"{url}{website_url}", headers=headers)
response.raise_for_status()
return convert_to_snakecase(response.json())
except httpx.HTTPError as e:
print(f"An error occurred: {e}")
return None