Bing / main.py
rkihacker's picture
Update main.py
333f8d9 verified
# main.py
import json
import time
from typing import Dict, List, Optional
from urllib.parse import urlencode, urlparse, parse_qs
from bs4 import BeautifulSoup
from curl_cffi.requests import Session
from fastapi import FastAPI, HTTPException, Query, Request
from pydantic import BaseModel, Field
# 1. Pydantic Models for API Responses (Unchanged)
class BingSearchResult(BaseModel):
url: str = Field(..., description="The URL of the search result.")
title: str = Field(..., description="The title of the search result.")
description: str = Field(..., description="A brief description or snippet from the result page.")
class BingImageResult(BaseModel):
title: str = Field(..., description="The title or caption of the image.")
image: str = Field(..., description="The direct URL to the full-resolution image.")
thumbnail: str = Field(..., description="The URL to the thumbnail of the image.")
url: str = Field(..., description="The URL of the webpage where the image was found.")
source: str = Field(..., description="The source domain of the image.")
class BingNewsResult(BaseModel):
title: str = Field(..., description="The title of the news article.")
url: str = Field(..., description="The URL to the full news article.")
description: str = Field(..., description="A snippet from the news article.")
source: str = Field(..., description="The publisher or source of the news article.")
# 2. FastAPI Application Setup (Unchanged)
app = FastAPI(
title="Definitive Fast Bing Search API",
description="Returns correct, non-localized search results from Bing using advanced techniques.",
version="9.0.0-complete"
)
# 3. Middleware to Add Custom Headers (Unchanged)
@app.middleware("http")
async def add_custom_headers(request: Request, call_next):
start_time = time.time()
response = await call_next(request)
process_time = time.time() - start_time
response.headers["X-Process-Time"] = f"{process_time:.4f} seconds"
response.headers["X-Powered-By"] = "NiansuhAI"
return response
# 4. The Definitive Bing Search Class
class BingSearch:
"""The definitive Bing search scraper that counters aggressive localization."""
def __init__(
self,
proxies: Optional[Dict[str, str]] = None,
timeout: int = 15,
impersonate: str = "chrome110"
):
self.session = Session(
proxies=proxies or {},
timeout=timeout,
impersonate=impersonate,
verify=False
)
self._base_url = "https://www.bing.com"
self.session.headers.update({
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.9',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
})
def _update_session_for_region(self, region: str = "en-US"):
"""THE CRUCIAL FIX: Sets a cookie that explicitly tells Bing our preferred market."""
self.session.cookies.set("SRCHHPGUSR", f"SRCHLANG=en&MKT={region}", domain=".bing.com")
def text(
self, keywords: str, max_results: int, region: str, safesearch: str
) -> List[BingSearchResult]:
self._update_session_for_region(region)
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
safe = safe_map.get(safesearch.lower(), "Moderate")
fetched_results = []
page = 1
while len(fetched_results) < max_results:
params = { "q": keywords, "first": (page - 1) * 10 + 1, "safeSearch": safe }
try:
resp = self.session.get(self._base_url + "/search", params=params)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
except Exception as e:
print(f"Error fetching text search page: {e}"); break
result_blocks = soup.select("li.b_algo")
if not result_blocks: break
for result in result_blocks:
link_tag = result.select_one("h2 a")
desc_tag = result.select_one(".b_caption p")
if link_tag and desc_tag and link_tag.get('href'):
fetched_results.append(BingSearchResult(
url=link_tag['href'], title=link_tag.get_text(strip=True),
description=desc_tag.get_text(strip=True)))
if len(fetched_results) >= max_results: break
page += 1
return fetched_results[:max_results]
def images(
self, keywords: str, max_results: int, region: str, safesearch: str
) -> List[BingImageResult]:
self._update_session_for_region(region)
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
safe = safe_map.get(safesearch.lower(), "Moderate")
params = {"q": keywords, "safeSearch": safe, "form": "HDRSC2"}
try:
resp = self.session.get(f"{self._base_url}/images/search", params=params)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
except Exception as e:
raise Exception(f"Bing image search failed: {e}")
results = []
for item in soup.select("a.iusc"):
if len(results) >= max_results: break
try:
m_data = json.loads(item.get("m", "{}"))
if m_data and m_data.get("murl"):
results.append(BingImageResult(
title=m_data.get("t", ""), image=m_data.get("murl"),
thumbnail=m_data.get("turl", ""), url=m_data.get("purl", ""),
source=m_data.get("surl", "")))
except Exception: continue
return results
def news(
self, keywords: str, max_results: int, region: str, safesearch: str
) -> List[BingNewsResult]:
self._update_session_for_region(region)
safe_map = {"on": "Strict", "moderate": "Moderate", "off": "Off"}
safe = safe_map.get(safesearch.lower(), "Moderate")
params = {"q": keywords, "safeSearch": safe, "form": "QBNH"}
try:
resp = self.session.get(f"{self._base_url}/news/search", params=params)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
except Exception as e:
raise Exception(f"Bing news search failed: {e}")
results = []
for item in soup.select("div.news-card"):
if len(results) >= max_results: break
a_tag = item.find("a", class_="title")
snippet = item.find("div", class_="snippet")
source = item.find("div", class_="source")
if a_tag and a_tag.get('href'):
results.append(BingNewsResult(
title=a_tag.get_text(strip=True), url=a_tag['href'],
description=snippet.get_text(strip=True) if snippet else "",
source=source.get_text(strip=True) if source else ""))
return results
def suggestions(self, query: str, region: str = "en-US") -> List[str]:
# The suggestions endpoint is an API and correctly uses the 'mkt' parameter.
params = {"query": query, "mkt": region}
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
try:
resp = self.session.get(url)
resp.raise_for_status()
data = resp.json()
return data[1] if isinstance(data, list) and len(data) > 1 else []
except Exception: return []
# 5. API Endpoints
# IMPORTANT: For guaranteed results from a specific country (e.g., en-US),
# you MUST use a proxy server from that country.
#
# Example proxy setup:
# proxies = {
# "http": "http://USERNAME:PASSWORD@us-residential-proxy.com:PORT",
# "https": "http://USERNAME:PASSWORD@us-residential-proxy.com:PORT",
# }
# bing = BingSearch(proxies=proxies)
bing = BingSearch() # Without a proxy, results may still be localized.
@app.get("/search", response_model=List[BingSearchResult], summary="Perform a Bing text search")
async def text_search(
keywords: str = Query(..., description="The search query."),
max_results: int = Query(10, ge=1, le=50, description="Maximum number of results."),
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
):
try:
return bing.text(keywords, max_results, region, safesearch)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/images", response_model=List[BingImageResult], summary="Perform a Bing image search")
async def image_search(
keywords: str = Query(..., description="The image search query."),
max_results: int = Query(10, ge=1, le=50, description="Maximum number of image results."),
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
):
try:
return bing.images(keywords, max_results, region, safesearch)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/news", response_model=List[BingNewsResult], summary="Perform a Bing news search")
async def news_search(
keywords: str = Query(..., description="The news search query."),
max_results: int = Query(10, ge=1, le=50, description="Maximum number of news results."),
region: str = Query("en-US", description="Market to search in (e.g., 'en-US'). A proxy is recommended."),
safesearch: str = Query("moderate", description="Safe search level: 'on', 'moderate', or 'off'.")
):
try:
return bing.news(keywords, max_results, region, safesearch)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/suggestions", response_model=List[str], summary="Get Bing search suggestions")
async def get_suggestions(
query: str = Query(..., description="The query to get suggestions for."),
region: str = Query("en-US", description="Market for suggestions (e.g., 'en-US').")
):
try:
return bing.suggestions(query, region)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)