#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Thu Sep 23 09:27:21 2021 @author: benjaminull """ import pandas as pd from datetime import datetime import numpy as np from datetime import timedelta import requests import io import openpyxl from pandas import ExcelWriter import requests from bs4 import BeautifulSoup from bs4 import BeautifulSoup import pandas as pd from selenium import webdriver import requests def GenerarExcel(ruta_guardado, Pestañas, Data): wb = openpyxl.Workbook() writer = ExcelWriter(ruta_guardado) for pestaña in Pestañas: wb.create_sheet(pestaña) std = wb.get_sheet_by_name('Sheet') wb.remove_sheet(std) wb.save(ruta_guardado) for i in range(len(Pestañas)): print(Data[i]) Data[i].to_excel(writer, sheet_name=Pestañas[i]) writer.save() def run_data_covid(): options = webdriver.ChromeOptions() options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe' path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe' browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options) # Ir a página deseada url = 'https://covid19.apple.com/mobility' browser.get(url) page = requests.get(url) html=browser.page_source soup = BeautifulSoup(html, "html.parser") link = str(soup.find_all("a")[1]).split('"')[1] link_apple = "https://covid19.apple.com/mobility" r = requests.get(link_apple) soup = BeautifulSoup(r.text, 'lxml') data_agg = pd.DataFrame() i = 0 for chunk in pd.read_csv( 'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv', usecols=['country_region', 'date', 'retail_and_recreation_percent_change_from_baseline', 'grocery_and_pharmacy_percent_change_from_baseline', 'parks_percent_change_from_baseline', 'transit_stations_percent_change_from_baseline', 'workplaces_percent_change_from_baseline'], dtype = {"workplaces_percent_change_from_baseline": "float32", "parks_percent_change_from_baseline": "float32", "retail_and_recreation_percent_change_from_baseline": "float32", 'transit_stations_percent_change_from_baseline': "float32", },chunksize = 150000): if i == 65: break i=i+1 data_agg = pd.concat([data_agg, chunk], ignore_index=True) data_agg.info(memory_usage="deep") data_agg.set_index(['country_region', 'date'], inplace=True) data_agg = data_agg.groupby(level=[0, 1]).mean() data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google') yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d") url=f''+link CONFIRMED_CONTENT = requests.get(url).content data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')), error_bad_lines=False) # Dejamos solo la data a nivel pais data_app.info(memory_usage="deep") data_app = data_app.loc[data_app['geo_type'] == 'country/region'] data_app = data_app.drop(columns=['geo_type', 'country', 'alternative_name', 'sub-region']) data_app = data_app.set_index(['region', 'transportation_type']).stack() data_app = data_app.unstack(level='transportation_type') - 100 data_app.index.names = data_agg.index.names data_agg = data_agg.join(data_app) print(data_app.columns) mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google', 'parks_google', 'transit_stations_google', 'workplaces_google', 'driving', 'transit', 'walking'] data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1) regiones = {} regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru'] regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France'] regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India', 'Thailand', 'Indonesia'] regiones['USA'] = ['United States'] # regiones['Israel'] = ['Israel'] data_dict = {} for col in data_agg.columns: df = data_agg[col].unstack().T.rolling(7, 3).mean() for region, paises in regiones.items(): df[region] = df[paises].mean(1) data_dict[col] = df GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()), list(data_dict.values())) np.save('Scheduler/dict_movilidad.npy', data_dict)