kristada673's picture
Upload _base.py
9ad659c
raw
history blame
No virus
3.16 kB
from finnlp.utils.get_proxy import get_china_free_proxy, get_us_free_proxy, Kuaidaili
import requests
class FinNLP_Downloader:
def __init__(self, args = {}):
self.use_proxy = True if "use_proxy" in args.keys() else False
if self.use_proxy:
self.country = args["use_proxy"]
else:
self.country = None
self.max_retry = args["max_retry"] if "max_retry" in args.keys() else 1
self.proxy_pages = args["proxy_pages"] if "proxy_pages" in args.keys() else 5
if self.use_proxy:
if "kuaidaili" in self.country:
# tunnel, username, password
assert "tunnel" in args.keys(), "Please make sure \'tunnel\' in your keys"
assert "username" in args.keys(), "Please make sure \'username\' in your keys"
assert "password" in args.keys(), "Please make sure \'password\' in your keys"
self.proxy_list = Kuaidaili(args["tunnel"], args["username"], args["password"])
else:
self.proxy_id = 0
self.proxy_list = self._update_proxy()
else:
self.proxy_list = []
def _get_proxy(self):
if self.use_proxy:
if "kuaidaili" in self.country:
proxy = self.proxy_list.get_kuaidaili_tunnel_proxy()
return proxy
elif len(self.proxy_list) >0:
proxy = self.proxy_list[self.proxy_id]
self.proxy_id += 1
if self.proxy_id == len(self.proxy_list):
self.proxy_id = 0
return proxy
else:
return None
def _update_proxy(self):
if "china" in self.country or "China" in self.country:
return get_china_free_proxy(self.proxy_pages)
else:
return get_us_free_proxy(self.proxy_pages)
def _request_get(self, url, headers = None, verify = None, params = None):
if headers is None:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/112.0"
}
max_retry = self.max_retry
proxies = self._get_proxy()
for _ in range(max_retry):
try:
response = requests.get(url = url, proxies = proxies, headers = headers, verify = verify, params = params)
if response.status_code == 200:
break
except:
response = None
if response is not None and response.status_code != 200:
response = None
return response
def _request_post(self, url, headers, json):
max_retry = self.max_retry
proxies = self._get_proxy()
for _ in range(max_retry):
try:
response = requests.post(url = url, headers = headers, json = json, proxies = proxies)
if response.status_code == 200:
break
except:
response = None
if response is not None and response.status_code != 200:
response = None
return response