Spaces:
Runtime error
Runtime error
File size: 1,960 Bytes
e919aa3 1a24a58 e919aa3 1a24a58 e919aa3 1a24a58 e919aa3 c9f2bb9 1a24a58 e919aa3 1a24a58 e919aa3 1a24a58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
#!/usr/bin/env python
import csv
import requests
import shutil
import re
from bs4 import BeautifulSoup
from progress.bar import ChargingBar
import concurrent.futures
import web
from entity import Entity
from common import selectors, defaults, mkdir
URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
options = soup.find(class_='form-control').find_all('option')
mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])
def get_links(soup):
for l in soup.select('.post-pagina-interior'):
for a in l.select('a'):
if 'href' in a.attrs and a.attrs['href'].startswith('http'):
return a.attrs['href']
with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(Entity.row_names())
bar = ChargingBar('get entities', max=len(options))
def get_bco(o, i):
(name, bco)= (o.text, o.attrs['value'])
page = requests.post(URL, data={'bco': bco}, stream=False)
soup = BeautifulSoup(page.content, 'html.parser')
img = f'https://www.bcra.gob.ar/Imagenes/logosbancos/{bco}.jpg'
e = Entity(name, id=i, bco=bco, logo=str(img), url=str(get_links(soup)))
writer.writerow(e.to_row())
i+=1
with concurrent.futures.ThreadPoolExecutor(max_workers = 20) as executor:
futures = {executor.submit(get_bco, o, i): o for (i, o) in enumerate(options[1:])}
for f in concurrent.futures.as_completed(futures):
o = futures[f]
try:
f.result()
except Exception as err:
print(f'({o}) generated an exception: {err}')
bar.next()
bar.finish()
shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
print(f'scrape finished, found {len(options[1:])} entities, dumped to {defaults.MAIN_CSV_PATH}')
|