Spaces:

andronasef
/

googleit-server

Sleeping

App Files Files

googleit-server / searchlib.py

andronasef

limit results to 10

38b2348 over 2 years ago

raw

history blame

2.07 kB

	from bs4 import BeautifulSoup
	from requests import get

	usr_agent = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}


	def _req(term, results, lang, start, proxies):
	resp = get(
	url="https://www.google.com/search",
	headers=usr_agent,
	params=dict(
	q=term,
	num=results + 2, # Prevents multiple requests
	hl=lang,
	start=start,
	),
	proxies=proxies,
	)
	resp.raise_for_status()
	return resp


	class SearchResult:
	def __init__(self, url, title, description):
	self.url = url
	self.title = title
	self.description = description

	def __repr__(self):
	return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"


	def search(term, num_results=10, lang="en", proxy=None, advanced=False):
	escaped_term = term.replace(' ', '+')

	# Proxy
	proxies = None
	if proxy:
	if proxy[:5] == "https":
	proxies = {"https": proxy}
	else:
	proxies = {"http": proxy}

	# Fetch
	start = 0
	while start < num_results:
	# Send request
	resp = _req(escaped_term, num_results-start, lang, start, proxies)

	# Parse
	soup = BeautifulSoup(resp.text, 'html.parser')
	result_block = soup.find_all('div', attrs={'class': 'g'})
	for result in result_block:
	# Find link, title, description
	link = result.find('a', href=True)
	title = result.find('h3')
	description_box = result.find(
	'div', {'style': '-webkit-line-clamp:2'})
	if description_box:
	description = description_box.find('span')
	if link and title and description:
	start += 1
	if advanced:
	yield SearchResult(link['href'], title.text, description.text)
	else:
	yield link['href']