Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
""" | |
@author:XuMing(xuming624@qq.com) | |
@description: | |
""" | |
import json | |
from itertools import islice | |
import requests | |
from fastapi import HTTPException | |
from loguru import logger | |
# Search engine related. You don't really need to change this. | |
BING_SEARCH_V7_ENDPOINT = "https://api.bing.microsoft.com/v7.0/search" | |
BING_MKT = "en-US" | |
GOOGLE_SEARCH_ENDPOINT = "https://customsearch.googleapis.com/customsearch/v1" | |
SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search" | |
SEARCHAPI_SEARCH_ENDPOINT = "https://www.searchapi.io/api/v1/search" | |
# Specify the number of references from the search engine you want to use. | |
# 8 is usually a good number. | |
REFERENCE_COUNT = 8 | |
# Specify the default timeout for the search engine. If the search engine | |
# does not respond within this time, we will return an error. | |
DEFAULT_SEARCH_ENGINE_TIMEOUT = 5 | |
def search_with_bing(query: str, subscription_key: str): | |
""" | |
Search with bing and return the contexts. | |
""" | |
params = {"q": query, "mkt": BING_MKT} | |
response = requests.get( | |
BING_SEARCH_V7_ENDPOINT, | |
headers={"Ocp-Apim-Subscription-Key": subscription_key}, | |
params=params, | |
timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT, | |
) | |
if not response.ok: | |
logger.error(f"{response.status_code} {response.text}") | |
raise HTTPException(response.status_code, "Search engine error.") | |
json_content = response.json() | |
try: | |
contexts = json_content["webPages"]["value"][:REFERENCE_COUNT] | |
except KeyError: | |
logger.error(f"Error encountered: {json_content}") | |
return [] | |
return contexts | |
def search_with_google(query: str, subscription_key: str, cx: str): | |
""" | |
Search with google and return the contexts. | |
""" | |
params = { | |
"key": subscription_key, | |
"cx": cx, | |
"q": query, | |
"num": REFERENCE_COUNT, | |
} | |
response = requests.get( | |
GOOGLE_SEARCH_ENDPOINT, params=params, timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT | |
) | |
if not response.ok: | |
logger.error(f"{response.status_code} {response.text}") | |
raise HTTPException(response.status_code, "Search engine error.") | |
json_content = response.json() | |
try: | |
contexts = json_content["items"][:REFERENCE_COUNT] | |
except KeyError: | |
logger.error(f"Error encountered: {json_content}") | |
return [] | |
return contexts | |
def search_with_serper(query: str, subscription_key: str): | |
""" | |
Search with serper and return the contexts. | |
""" | |
payload = json.dumps({ | |
"q": query, | |
"num": ( | |
REFERENCE_COUNT | |
if REFERENCE_COUNT % 10 == 0 | |
else (REFERENCE_COUNT // 10 + 1) * 10 | |
), | |
}) | |
headers = {"X-API-KEY": subscription_key, "Content-Type": "application/json"} | |
logger.info( | |
f"{payload} {headers} {subscription_key} {query} {SERPER_SEARCH_ENDPOINT}" | |
) | |
response = requests.post( | |
SERPER_SEARCH_ENDPOINT, | |
headers=headers, | |
data=payload, | |
timeout=DEFAULT_SEARCH_ENGINE_TIMEOUT, | |
) | |
if not response.ok: | |
logger.error(f"{response.status_code} {response.text}") | |
raise HTTPException(response.status_code, "Search engine error.") | |
json_content = response.json() | |
try: | |
# convert to the same format as bing/google | |
contexts = [] | |
if json_content.get("knowledgeGraph"): | |
url = json_content["knowledgeGraph"].get("descriptionUrl") or json_content["knowledgeGraph"].get("website") | |
snippet = json_content["knowledgeGraph"].get("description") | |
if url and snippet: | |
contexts.append({ | |
"name": json_content["knowledgeGraph"].get("title", ""), | |
"url": url, | |
"snippet": snippet | |
}) | |
if json_content.get("answerBox"): | |
url = json_content["answerBox"].get("url") | |
snippet = json_content["answerBox"].get("snippet") or json_content["answerBox"].get("answer") | |
if url and snippet: | |
contexts.append({ | |
"name": json_content["answerBox"].get("title", ""), | |
"url": url, | |
"snippet": snippet | |
}) | |
contexts += [ | |
{"name": c["title"], "url": c["link"], "snippet": c.get("snippet", "")} | |
for c in json_content["organic"] | |
] | |
return contexts[:REFERENCE_COUNT] | |
except KeyError: | |
logger.error(f"Error encountered: {json_content}") | |
return [] | |
def search_with_searchapi(query: str, subscription_key: str): | |
""" | |
Search with SearchApi.io and return the contexts. | |
""" | |
payload = { | |
"q": query, | |
"engine": "google", | |
"num": ( | |
REFERENCE_COUNT | |
if REFERENCE_COUNT % 10 == 0 | |
else (REFERENCE_COUNT // 10 + 1) * 10 | |
), | |
} | |
headers = {"Authorization": f"Bearer {subscription_key}", "Content-Type": "application/json"} | |
logger.info( | |
f"{payload} {headers} {subscription_key} {query} {SEARCHAPI_SEARCH_ENDPOINT}" | |
) | |
response = requests.get( | |
SEARCHAPI_SEARCH_ENDPOINT, | |
headers=headers, | |
params=payload, | |
timeout=30, | |
) | |
if not response.ok: | |
logger.error(f"{response.status_code} {response.text}") | |
raise HTTPException(response.status_code, "Search engine error.") | |
json_content = response.json() | |
try: | |
# convert to the same format as bing/google | |
contexts = [] | |
if json_content.get("answer_box"): | |
if json_content["answer_box"].get("organic_result"): | |
title = json_content["answer_box"].get("organic_result").get("title", "") | |
url = json_content["answer_box"].get("organic_result").get("link", "") | |
if json_content["answer_box"].get("type") == "population_graph": | |
title = json_content["answer_box"].get("place", "") | |
url = json_content["answer_box"].get("explore_more_link", "") | |
title = json_content["answer_box"].get("title", "") | |
url = json_content["answer_box"].get("link") | |
snippet = json_content["answer_box"].get("answer") or json_content["answer_box"].get("snippet") | |
if url and snippet: | |
contexts.append({ | |
"name": title, | |
"url": url, | |
"snippet": snippet | |
}) | |
if json_content.get("knowledge_graph"): | |
if json_content["knowledge_graph"].get("source"): | |
url = json_content["knowledge_graph"].get("source").get("link", "") | |
url = json_content["knowledge_graph"].get("website", "") | |
snippet = json_content["knowledge_graph"].get("description") | |
if url and snippet: | |
contexts.append({ | |
"name": json_content["knowledge_graph"].get("title", ""), | |
"url": url, | |
"snippet": snippet | |
}) | |
contexts += [ | |
{"name": c["title"], "url": c["link"], "snippet": c.get("snippet", "")} | |
for c in json_content["organic_results"] | |
] | |
if json_content.get("related_questions"): | |
for question in json_content["related_questions"]: | |
if question.get("source"): | |
url = question.get("source").get("link", "") | |
else: | |
url = "" | |
snippet = question.get("answer", "") | |
if url and snippet: | |
contexts.append({ | |
"name": question.get("question", ""), | |
"url": url, | |
"snippet": snippet | |
}) | |
return contexts[:REFERENCE_COUNT] | |
except KeyError: | |
logger.error(f"Error encountered: {json_content}") | |
return [] | |
def search_with_duckduckgo(query: str): | |
""" | |
Search with DuckDuckGo and return the contexts. | |
""" | |
try: | |
from duckduckgo_search import DDGS | |
except ImportError: | |
raise ImportError("Please install duckduckgo-search to use this search engine.") | |
contexts = [] | |
with DDGS() as ddgs: | |
ddgs_gen = ddgs.text(query, backend="lite") | |
for r in islice(ddgs_gen, REFERENCE_COUNT): | |
contexts.append({ | |
"name": r['title'], | |
"url": r['href'], | |
"snippet": r['body'] | |
}) | |
return contexts | |