tx3bas commited on
Commit
afb444f
1 Parent(s): c23813a

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +16 -1
extract.py CHANGED
@@ -1,8 +1,13 @@
1
  from selenium import webdriver
 
 
 
2
  from selenium.common.exceptions import WebDriverException
 
3
  import time
4
  import random
5
- from bs4 import BeautifulSoup
 
6
 
7
  # Lista de User Agents para rotar
8
  user_agents = [
@@ -28,6 +33,7 @@ user_agents = [
28
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:91.0) Gecko/20100101 Firefox/91.0",
29
  ]
30
 
 
31
  def get_random_user_agent():
32
  return random.choice(user_agents)
33
 
@@ -37,12 +43,20 @@ def get_random_window_size():
37
  ]
38
  return random.choice(window_sizes)
39
 
 
 
 
 
40
  def extract_data(user_input, mode):
 
 
 
41
  options = webdriver.ChromeOptions()
42
  options.add_argument('--headless')
43
  options.add_argument('--no-sandbox')
44
  options.add_argument('--disable-dev-shm-usage')
45
  options.add_argument(f"user-agent={get_random_user_agent()}")
 
46
 
47
  try:
48
  wd = webdriver.Chrome(options=options)
@@ -60,6 +74,7 @@ def extract_data(user_input, mode):
60
  page_content = wd.page_source
61
 
62
  except WebDriverException as e:
 
63
  return []
64
  finally:
65
  if wd:
 
1
  from selenium import webdriver
2
+ from selenium.webdriver.common.by import By
3
+ from selenium.webdriver.common.keys import Keys
4
+ from selenium.webdriver.chrome.service import Service
5
  from selenium.common.exceptions import WebDriverException
6
+ from bs4 import BeautifulSoup
7
  import time
8
  import random
9
+ import requests
10
+ from fp.fp import FreeProxy
11
 
12
  # Lista de User Agents para rotar
13
  user_agents = [
 
33
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:91.0) Gecko/20100101 Firefox/91.0",
34
  ]
35
 
36
+
37
  def get_random_user_agent():
38
  return random.choice(user_agents)
39
 
 
43
  ]
44
  return random.choice(window_sizes)
45
 
46
+ def get_proxy():
47
+ proxy = FreeProxy(rand=True, timeout=1).get()
48
+ return proxy
49
+
50
  def extract_data(user_input, mode):
51
+ proxy = get_proxy()
52
+ proxy_url = f"http://{proxy}"
53
+
54
  options = webdriver.ChromeOptions()
55
  options.add_argument('--headless')
56
  options.add_argument('--no-sandbox')
57
  options.add_argument('--disable-dev-shm-usage')
58
  options.add_argument(f"user-agent={get_random_user_agent()}")
59
+ options.add_argument('--proxy-server=%s' % proxy_url)
60
 
61
  try:
62
  wd = webdriver.Chrome(options=options)
 
74
  page_content = wd.page_source
75
 
76
  except WebDriverException as e:
77
+ print(f"Request failed with proxy {proxy_url}. Error: {e}")
78
  return []
79
  finally:
80
  if wd: