hussein2000 commited on
Commit
03e7882
1 Parent(s): d8e023f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -47
app.py CHANGED
@@ -1,65 +1,69 @@
1
- from flask import Flask, request, jsonify, render_template
2
  import requests
3
  from bs4 import BeautifulSoup
4
- from urllib.parse import urlparse, parse_qs, unquote
5
 
6
  app = Flask(__name__)
7
 
8
- def duckduckgo_search(query):
9
- url = 'https://duckduckgo.com/html/'
10
- params = {'q': query}
11
- headers = {'User-Agent': 'Mozilla/5.0'}
12
 
13
- response = requests.get(url, params=params, headers=headers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  if response.status_code != 200:
15
- return []
16
-
 
17
  soup = BeautifulSoup(response.text, 'html.parser')
 
 
18
  results = []
19
- for result in soup.find_all('div', class_='result'):
20
- title_tag = result.find('a', class_='result__a')
21
- if not title_tag:
22
- continue
23
- title = title_tag.get_text()
24
- link = title_tag['href']
25
-
26
- # Decode the actual URL from the DuckDuckGo redirect link
27
- parsed_link = urlparse(link)
28
- query_params = parse_qs(parsed_link.query)
29
- actual_url = unquote(query_params.get('uddg', [''])[0])
30
-
31
- description_tag = result.find('a', class_='result__snippet')
32
- description = description_tag.get_text() if description_tag else 'No description available'
33
 
34
- # Attempt to fetch the favicon
35
- icon = 'No icon available'
36
- if actual_url:
37
- parsed_actual_url = urlparse(actual_url)
38
- favicon_url = f"{parsed_actual_url.scheme}://{parsed_actual_url.netloc}/favicon.ico"
39
- favicon_response = requests.get(favicon_url, headers=headers)
40
- if favicon_response.status_code == 200:
41
- icon = favicon_url
42
-
43
- results.append({
44
- 'title': title,
45
- 'link': actual_url,
46
- 'description': description,
47
- 'icon': icon
48
- })
49
  return results
50
 
51
- @app.route('/')
52
- def index():
53
- return render_template('index.html')
54
-
55
  @app.route('/search', methods=['GET'])
56
  def search():
57
- query = request.args.get('query')
58
- if not query:
59
- return jsonify({'error': 'No query provided'}), 400
60
-
61
- results = duckduckgo_search(query)
 
 
 
 
 
 
62
  return jsonify(results)
63
 
 
64
  if __name__ == '__main__':
65
  app.run(host="0.0.0.0", port=7860)
 
1
+ from flask import Flask, request, jsonify
2
  import requests
3
  from bs4 import BeautifulSoup
 
4
 
5
  app = Flask(__name__)
6
 
7
+ # Example URL, modify based on the search page you're targeting
8
+ url = "https://html.duckduckgo.com/html/"
 
 
9
 
10
+ # Function to fetch results from a specific page (1st page: page_num=1, 2nd page: page_num=2, etc.)
11
+ def fetch_duckduckgo_results(search_query, page_num=1):
12
+ headers = {
13
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36'
14
+ }
15
+
16
+ # DuckDuckGo uses 's' parameter for result offset, calculate offset for pages (e.g., 0 for first, 50 for second)
17
+ offset = (page_num - 1) * 50
18
+
19
+ # Search query parameters (assuming DuckDuckGo), 's' is for pagination
20
+ params = {'q': search_query, 's': offset}
21
+
22
+ # Send request to DuckDuckGo or relevant HTML page
23
+ response = requests.get(url, headers=headers, params=params)
24
+
25
+ # Check if request was successful
26
  if response.status_code != 200:
27
+ return {"error": f"Failed to retrieve data: {response.status_code}"}
28
+
29
+ # Parse HTML content
30
  soup = BeautifulSoup(response.text, 'html.parser')
31
+
32
+ # Find all result blocks
33
  results = []
34
+ for result in soup.find_all('div', class_='result__body'):
35
+ title_tag = result.find('h2', class_='result__title')
36
+ snippet_tag = result.find('a', class_='result__snippet')
37
+ icon_tag = result.find('img', class_='result__icon__img')
38
+ url_tag = result.find('a', class_='result__a')
 
 
 
 
 
 
 
 
 
39
 
40
+ if title_tag and snippet_tag and icon_tag and url_tag:
41
+ result_data = {
42
+ 'title': title_tag.text.strip(),
43
+ 'description': snippet_tag.text.strip(),
44
+ 'icon_url': "https:" + icon_tag['src'] if icon_tag else None,
45
+ 'url': url_tag['href']
46
+ }
47
+ results.append(result_data)
48
+
 
 
 
 
 
 
49
  return results
50
 
51
+ # API endpoint for fetching search results
 
 
 
52
  @app.route('/search', methods=['GET'])
53
  def search():
54
+ # Get query parameters
55
+ search_query = request.args.get('query', default='', type=str)
56
+ page_num = request.args.get('page', default=1, type=int)
57
+
58
+ if not search_query:
59
+ return jsonify({"error": "Search query is required"}), 400
60
+
61
+ # Fetch results from DuckDuckGo
62
+ results = fetch_duckduckgo_results(search_query, page_num)
63
+
64
+ # Return results as JSON
65
  return jsonify(results)
66
 
67
+ # Run the Flask application
68
  if __name__ == '__main__':
69
  app.run(host="0.0.0.0", port=7860)