johann22 commited on
Commit
76489d2
1 Parent(s): 44e3c5c

Create i_search.py

Browse files
Files changed (1) hide show
  1. i_search.py +50 -0
i_search.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ headers_Get = {
5
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:49.0) Gecko/20100101 Firefox/49.0',
6
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
7
+ 'Accept-Language': 'en-US,en;q=0.5',
8
+ 'Accept-Encoding': 'gzip, deflate',
9
+ 'DNT': '1',
10
+ 'Connection': 'keep-alive',
11
+ 'Upgrade-Insecure-Requests': '1'
12
+ }
13
+
14
+ def i_search(url):
15
+ response = requests.get(url)
16
+ response.raise_for_status()
17
+ soup = BeautifulSoup(response.content, 'html.parser')
18
+ return ' '.join([p.text for p in soup.find_all('p')])
19
+
20
+
21
+
22
+ def b_search(q):
23
+ #s = requests.Session()
24
+ #url = q
25
+ #r = s.get(url, headers=headers_Get)
26
+ r=requests.get(q)
27
+ soup = BeautifulSoup(r.text, "html.parser")
28
+ output = []
29
+ for searchWrapper in soup.find_all('article'): #this line may change in future based on google's web page structure
30
+ url = searchWrapper.find('a')["href"]
31
+ text = searchWrapper.find('a').text.strip()
32
+ result = {'text': text, 'url': url}
33
+ output.append(result)
34
+
35
+ return output
36
+ def google(q):
37
+ s = requests.Session()
38
+ q = '+'.join(q.split())
39
+ url = 'https://www.google.com/search?q=' + q + '&ie=utf-8&oe=utf-8'
40
+ r = s.get(url, headers=headers_Get)
41
+
42
+ soup = BeautifulSoup(r.text, "html.parser")
43
+ output = []
44
+ for searchWrapper in soup.find_all('h3', {'class':'r'}): #this line may change in future based on google's web page structure
45
+ url = searchWrapper.find('a')["href"]
46
+ text = searchWrapper.find('a').text.strip()
47
+ result = {'text': text, 'url': url}
48
+ output.append(result)
49
+
50
+ return output