Spaces:

mediaparty2023
/

spoof-detect

Runtime error

Niv Sardi

import python

1a24a58 about 1 year ago

1.96 kB

	#!/usr/bin/env python
	import csv
	import requests
	import shutil
	import re

	from bs4 import BeautifulSoup
	from progress.bar import ChargingBar
	import concurrent.futures

	import web
	from entity import Entity
	from common import selectors, defaults, mkdir

	URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
	page = requests.get(URL)
	soup = BeautifulSoup(page.content, 'html.parser')

	options = soup.find(class_='form-control').find_all('option')
	mkdir.make_dirs([defaults.DATA_PATH, defaults.LOGOS_DATA_PATH])

	def get_links(soup):
	for l in soup.select('.post-pagina-interior'):
	for a in l.select('a'):
	if 'href' in a.attrs and a.attrs['href'].startswith('http'):
	return a.attrs['href']


	with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(Entity.row_names())

	bar = ChargingBar('get entities', max=len(options))
	def get_bco(o, i):
	(name, bco)= (o.text, o.attrs['value'])

	page = requests.post(URL, data={'bco': bco}, stream=False)
	soup = BeautifulSoup(page.content, 'html.parser')
	img = f'https://www.bcra.gob.ar/Imagenes/logosbancos/{bco}.jpg'
	e = Entity(name, id=i, bco=bco, logo=str(img), url=str(get_links(soup)))
	writer.writerow(e.to_row())
	i+=1
	with concurrent.futures.ThreadPoolExecutor(max_workers = 20) as executor:
	futures = {executor.submit(get_bco, o, i): o for (i, o) in enumerate(options[1:])}
	for f in concurrent.futures.as_completed(futures):
	o = futures[f]
	try:
	f.result()
	except Exception as err:
	print(f'({o}) generated an exception: {err}')
	bar.next()
	bar.finish()

	shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
	print(f'scrape finished, found {len(options[1:])} entities, dumped to {defaults.MAIN_CSV_PATH}')