Hansimov commited on
Commit
e448a74
1 Parent(s): f150f6b

:recycle: [Refactor] Move header constructor, and prettier logging

Browse files
Files changed (1) hide show
  1. networks/google_searcher.py +15 -23
networks/google_searcher.py CHANGED
@@ -1,16 +1,7 @@
1
  import requests
2
  from pathlib import Path
3
  from utils.enver import enver
4
-
5
-
6
- class RequestHeaderConstructor:
7
- def __init__(self):
8
- self.construct()
9
-
10
- def construct(self):
11
- self.headers = {
12
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
13
- }
14
 
15
 
16
  class GoogleSearcher:
@@ -21,35 +12,36 @@ class GoogleSearcher:
21
  self.enver.set_envs(proxies=True)
22
  self.output_root = Path(__file__).parents[1] / "files"
23
 
24
- def send_request(self, query, result_num=10):
25
- res = requests.get(
 
26
  url=self.url,
27
- headers=RequestHeaderConstructor().headers,
 
 
28
  params={
29
  "q": self.query,
30
  "num": result_num,
31
- # "hl": "en",
32
- # "start": 0,
33
  },
34
  proxies=self.enver.requests_proxies,
35
  )
36
- return res
37
 
38
- def save_response(self, res, query):
39
- output_filename = query.replace(" ", "_") + ".html"
40
  if not self.output_root.exists():
41
  self.output_root.mkdir(parents=True, exist_ok=True)
42
  output_path = self.output_root / output_filename
 
43
  with open(output_path, "wb") as wf:
44
- wf.write(res.content)
45
 
46
  def search(self, query):
47
  self.query = query
48
- res = self.send_request(query)
49
- self.save_response(res, query)
50
 
51
 
52
  if __name__ == "__main__":
53
  searcher = GoogleSearcher()
54
- # searcher.search("python tutorials")
55
- searcher.search("python教程")
 
1
  import requests
2
  from pathlib import Path
3
  from utils.enver import enver
4
+ from utils.logger import logger
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  class GoogleSearcher:
 
12
  self.enver.set_envs(proxies=True)
13
  self.output_root = Path(__file__).parents[1] / "files"
14
 
15
+ def send_request(self, result_num=10):
16
+ logger.note(f"Searching: [{self.query}]")
17
+ self.request_response = requests.get(
18
  url=self.url,
19
+ headers={
20
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
21
+ },
22
  params={
23
  "q": self.query,
24
  "num": result_num,
 
 
25
  },
26
  proxies=self.enver.requests_proxies,
27
  )
 
28
 
29
+ def save_response(self):
30
+ output_filename = self.query.replace(" ", "_") + ".html"
31
  if not self.output_root.exists():
32
  self.output_root.mkdir(parents=True, exist_ok=True)
33
  output_path = self.output_root / output_filename
34
+ logger.note(f"Saving to: [{output_path}]")
35
  with open(output_path, "wb") as wf:
36
+ wf.write(self.request_response.content)
37
 
38
  def search(self, query):
39
  self.query = query
40
+ self.send_request()
41
+ self.save_response()
42
 
43
 
44
  if __name__ == "__main__":
45
  searcher = GoogleSearcher()
46
+ # searcher.search("python教程")
47
+ searcher.search("python tutorials")