from io import BytesIO import streamlit as st import pandas as pd from scipy.cluster.hierarchy import linkage from datetime import date from dateutil.relativedelta import relativedelta import plotly.figure_factory as ff from fastdtw import fastdtw from scipy.spatial.distance import euclidean import numpy as np from modules import tables @st.cache(suppress_st_warning=True) def data_request(country_to_request, start, currency='USD', end=date.today()): data = tables.EquityMaster(country=country_to_request, field='IQ_CLOSEPRICE_ADJ', currency=currency).query(rename=['asset'], start=start, end=str(end)) adtv = tables.EquityMaster(country=country_to_request, field='IQ_VALUE_TRADED', currency=currency).query(rename=['asset'], start=start, end=str(end)).median() marketcap = tables.EquityMaster(country=country_to_request, field='IQ_MARKETCAP', currency=currency).query(rename=['asset'], start=start, end=str(end)).median() return data, adtv, marketcap @st.cache(suppress_st_warning=True) def data_filter(data, adtv, marketcap, adtv_threshold, mktcap_threshold, p): adtv_filter = (adtv >= adtv_threshold) adtv = adtv.loc[adtv_filter] marketcap_filter = (marketcap >= mktcap_threshold) marketcap = marketcap.loc[marketcap_filter] data = data.loc[:, data.columns.isin(adtv.index)] data = data.loc[:, data.columns.isin(marketcap.index)] file_to_read = 'Data/Company_Base_Definitivo.xlsx' company_base = pd.read_excel(file_to_read, sheet_name='Compilado') id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_base.iterrows()} data = data.loc[:, data.columns.isin(id_to_ticker.keys())] data.columns = [id_to_ticker[col] for col in data.columns] if isinstance(p, str): returns_final = data.resample(p).last().pct_change().fillna(0) else: returns_final = data.iloc[::p].pct_change().fillna(0) return returns_final def dist(correlation): return ((1-correlation)/2.)**.5 def to_excel(df_to_write): output = BytesIO() writer = pd.ExcelWriter(output, engine='xlsxwriter') df_to_write.to_excel(writer, index=False, sheet_name='Sheet1') workbook = writer.book worksheet = writer.sheets['Sheet1'] format1 = workbook.add_format({'num_format': '0.00'}) worksheet.set_column('A:A', None, format1) writer.save() processed_data = output.getvalue() return processed_data @st.cache(suppress_st_warning=True) def get_dtw_distance(x, y): distance_dtw = fastdtw(x, y, dist=euclidean)[0] return distance_dtw def clustering_basado_en_correlacion(): form = st.form("Correlation Clustering") posible_countries = ('Todos', 'Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) if 'Todos' in countries: countries = ('Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru') adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., format="%.2f") mktcap_thresh = form.number_input('Ingrese el mínimo Market Cap que desea considerar', value=200., format="%.2f") start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) accept = form.form_submit_button('Aceptar') if accept: start_date = str(date.today() - relativedelta(months=int(start_date[0]))) for country in countries: data_, adtv_, marketcap_ = data_request(country, start_date) # Filtramos para que se cumplan los filtros del usuario en los datos returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) # Normalizamos base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) base = base.sort_index(axis=1) # Procedemos a calcular correlación y covarianza corr, covs = base.corr(), base.cov() file = to_excel(corr) # Definimos la matriz de distancia dist_matrix = dist(corr) hierarchy = linkage(dist_matrix) ct = 0.54 * max(hierarchy[:, 2]) fig = ff.create_dendrogram(dist_matrix, orientation='left', labels=list(base.columns), color_threshold=ct, linkagefun=linkage) fig.update_layout(title='{} desde {} hasta {}'.format(country, returns.index[0].date(), returns.index[-1].date())) if country == 'Brazil': fig.update_layout(height=2000) else: fig.update_layout(height=900) st.plotly_chart(fig, use_container_width=True) st.download_button(label='Descargar Matriz de Correlación para {}'.format(country), data=file, file_name='{}_correlacion.xlsx'.format(country)) def clustering_con_dtw(): form = st.form("Dynamic Time Warping Clustering") posible_countries = ('Todos', 'Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') countries = form.multiselect('¿Qué países desea visualizar?', posible_countries) if 'Todos' in countries: countries = ('Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru') adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., format="%.2f") mktcap_thresh = form.number_input('Ingrese el Mínimo Market Cap que desea considerar', value=200., format="%.2f") start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año')) period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1) accept = form.form_submit_button('Aceptar') if accept: start_date = str(date.today() - relativedelta(months=int(start_date[0]))) for country in countries: data_, adtv_, marketcap_ = data_request(country, start_date) # Filtramos para que se cumplan los filtros del usuario en los datos returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period) # Normalizamos returns base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1) base = base.sort_index(axis=1) # Procedemos a calcular correlación y covarianza N = len(base[:base.index[0]].T) # Creamos la Matriz de Distancias para DTW Dist = np.zeros((N, N)) place = st.empty() for i in range(N): place.write("Cargando: " + str(round(i*100/N)) + " %") for j in range(i - 1, N): company_1 = base.columns[i] company_2 = base.columns[j] Dist[i, j] = get_dtw_distance(base[company_1], base[company_2]) # La matriz es simétrica Dist[j, i] = Dist[i, j] # Creamos un DataFrame con la matriz de distancias df = pd.DataFrame(Dist) df.index = base.columns df.columns = base.columns # Pasamos el df a excel para descarga del usuario file = to_excel(df) hierarchy = linkage(Dist) ct = 0.54 * max(hierarchy[:, 2]) fig = ff.create_dendrogram(Dist, orientation='left', labels=list(base.columns), color_threshold=ct, linkagefun=linkage) fig.update_layout( title='{} desde {} hasta {}'.format(country, returns.index[0].date(), returns.index[-1].date())) if country == 'Brazil': fig.update_layout(height=2000) else: fig.update_layout(height=900) st.plotly_chart(fig, use_container_width=True) st.download_button(label='Descargar Matriz de distancias con DTW para {}'.format(country), data=file, file_name='{}_correlacion.xlsx'.format(country))