web-search-api / networks /google_searcher.py
Hansimov's picture
:recycle: [Refactor] Replace output_path with html_path to avoid confuse
8c0b736
raw
history blame
1.77 kB
import requests
from pathlib import Path
from utils.enver import enver
from utils.logger import logger
from networks.filepath_converter import QueryToFilepathConverter
from networks.network_configs import REQUESTS_HEADERS
class GoogleSearcher:
# https://github.com/Nv7-GitHub/googlesearch/blob/master/googlesearch/__init__.py
def __init__(self):
self.url = "https://www.google.com/search"
self.enver = enver
self.enver.set_envs(proxies=True)
self.filepath_converter = QueryToFilepathConverter()
def send_request(self, result_num=10, safe=False):
self.request_response = requests.get(
url=self.url,
headers=REQUESTS_HEADERS,
params={
"q": self.query,
"num": result_num,
},
proxies=self.enver.requests_proxies,
)
def save_response(self):
if not self.html_path.exists():
self.html_path.parent.mkdir(parents=True, exist_ok=True)
logger.note(f"Saving to: [{self.html_path}]")
with open(self.html_path, "wb") as wf:
wf.write(self.request_response.content)
def search(self, query, result_num=10, safe=False, overwrite=False):
self.query = query
self.html_path = self.filepath_converter.convert(self.query)
logger.note(f"Searching: [{self.query}]")
if self.html_path.exists() and not overwrite:
logger.success(f"HTML existed: {self.html_path}")
else:
self.send_request(result_num=result_num, safe=safe)
self.save_response()
return self.html_path
if __name__ == "__main__":
searcher = GoogleSearcher()
# searcher.search("python教程")
searcher.search("python tutorials")