File size: 3,334 Bytes
485f76b
 
 
f1ab0d5
485f76b
 
1a24a58
ae7097b
f1ab0d5
7c115c7
74a29fd
 
cf048df
1a24a58
cf048df
 
 
 
 
 
 
1a24a58
 
 
485f76b
1a24a58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf048df
1a24a58
 
 
cf048df
1a24a58
cf048df
 
 
485f76b
cf048df
485f76b
cf048df
 
 
485f76b
 
 
f1ab0d5
485f76b
 
 
dd9165a
1a24a58
dd9165a
 
 
 
 
 
 
1a24a58
 
 
 
 
 
 
 
 
 
 
 
dd9165a
 
1a24a58
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python3
import concurrent.futures
import requests

from progress.bar import ChargingBar

from entity import Entity, read_entities
from common import defaults,mkdir
import web

PARALLEL = 20

def do_screenshot(e: Entity):
    assert(e.url)
    sfn = requests.post('http://puppet:8000/screenshot', json={
        'url': e.url,
        'id': e.id,
        'path': f'{defaults.SCREENSHOT_PATH}/{e.bco}.png',
        'logos': f'{defaults.LOGOS_DATA_PATH}/{e.bco}.png'
    })

def get_entity_logo(e: Entity):
    fn = f"{defaults.LOGOS_DATA_PATH}/{e.bco}.0.png"
    web.get_img_logo(e.logo, fn)

def from_csv(args):
    ACTIONS = []
    if (args.certs):
        ACTIONS.append(web.get_cert)
        mkdir.make_dirs([defaults.CERTS_PATH])
    if (args.logos):
        ACTIONS.append(web.get_logos)
        mkdir.make_dirs([defaults.LOGOS_DATA_PATH])
    if (args.screenshots):
        ACTIONS.append(do_screenshot)
        mkdir.make_dirs([defaults.SCREENSHOT_PATH])
    if (args.entity_logo):
        ACTIONS.append(get_entity_logo)
        mkdir.make_dirs([defaults.LOGOS_DATA_PATH])

    print(ACTIONS)
    with concurrent.futures.ThreadPoolExecutor(max_workers = args.parallel) as executor:
            futures = {}
            entities = read_entities(args.csv)
            qs = len(entities.keys())*len(ACTIONS)
            bar = ChargingBar(f'vendor ({qs} jobs)', max=qs)

            for e in entities.values():
                futures.update({executor.submit(f, e): (e, f) for f in ACTIONS})
            print('waiting for futures')

            for f in concurrent.futures.as_completed(futures):
                (e, a) = futures[f]
                try:
                    f.result()
                except Exception as err:
                    print(f'{a}({e.url}) generated an exception: {err}')
                bar.next()
            bar.finish()

#query_vendor_site(Entity.from_dict({'url':'http://www.bancoprovincia.com.ar', 'bco':'debug'}))
#exit()

if __name__ == '__main__':
    import argparse
    print("🌏 getting vendor data")
    parser = argparse.ArgumentParser(description='extract certificates and screenshots websites')
    parser.add_argument('--csv', metavar='csv', type=str,
                        default=defaults.MAIN_CSV_PATH,
                        help='main database')
    parser.add_argument('--parallel', metavar='parallel', type=int,
                        default=PARALLEL,
                        help='number of concurrent jobs')
    parser.add_argument('--logos', metavar='logos', type=bool,
                        action=argparse.BooleanOptionalAction,
                        default=True, help='try to get logos')
    parser.add_argument('--entity-logo', metavar='entity_logo', type=bool,
                        action=argparse.BooleanOptionalAction,
                        default=True, help='try to get logos form ENTITY')
    parser.add_argument('--certs', metavar='certs', type=bool,
                        action=argparse.BooleanOptionalAction,
                        default=True, help='try to get certs')
    parser.add_argument('--screenshots', metavar='screenshots', type=bool,
                        action=argparse.BooleanOptionalAction,
                        default=True, help='try to get screenshots')

    args = parser.parse_args()
    from_csv(args)