Spaces:
Runtime error
Runtime error
File size: 6,830 Bytes
129cd69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
"""Util that calls Tavily Search API.
In order to set this up, follow instructions at:
"""
import json
from typing import Dict, List, Optional
import aiohttp
import requests
from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator
from langchain.utils import get_from_dict_or_env
TAVILY_API_URL = "https://api.tavily.com"
class TavilySearchAPIWrapper(BaseModel):
"""Wrapper for Tavily Search API."""
tavily_api_key: str
class Config:
"""Configuration for this pydantic object."""
extra = Extra.forbid
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key and endpoint exists in environment."""
tavily_api_key = get_from_dict_or_env(
values, "tavily_api_key", "TAVILY_API_KEY"
)
values["tavily_api_key"] = tavily_api_key
return values
def raw_results(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> Dict:
params = {
"api_key": self.tavily_api_key,
"query": query,
"max_results": max_results,
"search_depth": search_depth,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
"include_answer": include_answer,
"include_raw_content": include_raw_content,
"include_images": include_images,
}
response = requests.post(
# type: ignore
f"{TAVILY_API_URL}/search",
json=params,
)
response.raise_for_status()
return response.json()
def results(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> List[Dict]:
"""Run query through Tavily Search and return metadata.
Args:
query: The query to search for.
max_results: The maximum number of results to return.
search_depth: The depth of the search. Can be "basic" or "advanced".
include_domains: A list of domains to include in the search.
exclude_domains: A list of domains to exclude from the search.
include_answer: Whether to include the answer in the results.
include_raw_content: Whether to include the raw content in the results.
include_images: Whether to include images in the results.
Returns:
query: The query that was searched for.
follow_up_questions: A list of follow up questions.
response_time: The response time of the query.
answer: The answer to the query.
images: A list of images.
results: A list of dictionaries containing the results:
title: The title of the result.
url: The url of the result.
content: The content of the result.
score: The score of the result.
raw_content: The raw content of the result.
""" # noqa: E501
raw_search_results = self.raw_results(
query,
max_results=max_results,
search_depth=search_depth,
include_domains=include_domains,
exclude_domains=exclude_domains,
include_answer=include_answer,
include_raw_content=include_raw_content,
include_images=include_images,
)
return self.clean_results(raw_search_results["results"])
async def raw_results_async(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> Dict:
"""Get results from the Tavily Search API asynchronously."""
# Function to perform the API call
async def fetch() -> str:
params = {
"api_key": self.tavily_api_key,
"query": query,
"max_results": max_results,
"search_depth": search_depth,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
"include_answer": include_answer,
"include_raw_content": include_raw_content,
"include_images": include_images,
}
async with aiohttp.ClientSession() as session:
async with session.post(f"{TAVILY_API_URL}/search", json=params) as res:
if res.status == 200:
data = await res.text()
return data
else:
raise Exception(f"Error {res.status}: {res.reason}")
results_json_str = await fetch()
return json.loads(results_json_str)
async def results_async(
self,
query: str,
max_results: Optional[int] = 5,
search_depth: Optional[str] = "advanced",
include_domains: Optional[List[str]] = [],
exclude_domains: Optional[List[str]] = [],
include_answer: Optional[bool] = False,
include_raw_content: Optional[bool] = False,
include_images: Optional[bool] = False,
) -> List[Dict]:
results_json = await self.raw_results_async(
query=query,
max_results=max_results,
search_depth=search_depth,
include_domains=include_domains,
exclude_domains=exclude_domains,
include_answer=include_answer,
include_raw_content=include_raw_content,
include_images=include_images,
)
return self.clean_results(results_json["results"])
def clean_results(self, results: List[Dict]) -> List[Dict]:
"""Clean results from Tavily Search API."""
clean_results = []
for result in results:
clean_results.append(
{
"url": result["url"],
"content": result["content"],
}
)
return clean_results
|