File size: 8,727 Bytes
025632f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from io import BytesIO
import streamlit as st
import pandas as pd
from scipy.cluster.hierarchy import linkage
from datetime import date
from dateutil.relativedelta import relativedelta
import plotly.figure_factory as ff
from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
import numpy as np
from modules import tables


@st.cache(suppress_st_warning=True)
def data_request(country_to_request, start, currency='USD', end=date.today()):
    data = tables.EquityMaster(country=country_to_request, field='IQ_CLOSEPRICE_ADJ', currency=currency).query(rename=['asset'],
                                                                                                                start=start, end=str(end))
    adtv = tables.EquityMaster(country=country_to_request, field='IQ_VALUE_TRADED', currency=currency).query(rename=['asset'],
                                                                                                             start=start, end=str(end)).median()
    marketcap = tables.EquityMaster(country=country_to_request, field='IQ_MARKETCAP', currency=currency).query(rename=['asset'],
                                                                                                               start=start, end=str(end)).median()
    return data, adtv, marketcap


@st.cache(suppress_st_warning=True)
def data_filter(data, adtv, marketcap, adtv_threshold, mktcap_threshold, p):
    adtv_filter = (adtv >= adtv_threshold)
    adtv = adtv.loc[adtv_filter]
    marketcap_filter = (marketcap >= mktcap_threshold)
    marketcap = marketcap.loc[marketcap_filter]
    data = data.loc[:, data.columns.isin(adtv.index)]
    data = data.loc[:, data.columns.isin(marketcap.index)]

    file_to_read = 'Data/Company_Base_Definitivo.xlsx'
    company_base = pd.read_excel(file_to_read, sheet_name='Compilado')
    id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_base.iterrows()}
    data = data.loc[:, data.columns.isin(id_to_ticker.keys())]
    data.columns = [id_to_ticker[col] for col in data.columns]

    if isinstance(p, str):
        returns_final = data.resample(p).last().pct_change().fillna(0)
    else:
        returns_final = data.iloc[::p].pct_change().fillna(0)
    return returns_final


def dist(correlation):
    return ((1-correlation)/2.)**.5


def to_excel(df_to_write):
    output = BytesIO()
    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    df_to_write.to_excel(writer, index=False, sheet_name='Sheet1')
    workbook = writer.book
    worksheet = writer.sheets['Sheet1']
    format1 = workbook.add_format({'num_format': '0.00'})
    worksheet.set_column('A:A', None, format1)
    writer.save()
    processed_data = output.getvalue()
    return processed_data


@st.cache(suppress_st_warning=True)
def get_dtw_distance(x, y):
    distance_dtw = fastdtw(x, y, dist=euclidean)[0]
    return distance_dtw


def clustering_basado_en_correlacion():

    form = st.form("Correlation Clustering")
    posible_countries = ('Todos', 'Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru')
    countries = form.multiselect('¿Qué países desea visualizar?', posible_countries)
    if 'Todos' in countries:
        countries = ('Argentina', 'Brazil', 'Chile', 'Colombia', 'Mexico', 'Peru')

    adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1., format="%.2f")
    mktcap_thresh = form.number_input('Ingrese el mínimo Market Cap que desea considerar', value=200., format="%.2f")
    start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año'))
    period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1)
    accept = form.form_submit_button('Aceptar')

    if accept:
        start_date = str(date.today() - relativedelta(months=int(start_date[0])))
        for country in countries:
            data_, adtv_, marketcap_ = data_request(country, start_date)
            # Filtramos para que se cumplan los filtros del usuario en los datos
            returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period)

            # Normalizamos
            base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1)
            base = base.sort_index(axis=1)

            # Procedemos a calcular correlación y covarianza
            corr, covs = base.corr(), base.cov()
            file = to_excel(corr)

            # Definimos la matriz de distancia
            dist_matrix = dist(corr)

            hierarchy = linkage(dist_matrix)
            ct = 0.54 * max(hierarchy[:, 2])
            fig = ff.create_dendrogram(dist_matrix, orientation='left', labels=list(base.columns),
                                       color_threshold=ct, linkagefun=linkage)
            
            fig.update_layout(title='{} desde {} hasta {}'.format(country,
                                                                  returns.index[0].date(),
                                                                  returns.index[-1].date()))
            if country == 'Brazil':
                fig.update_layout(height=2000)
            else:
                fig.update_layout(height=900)

            st.plotly_chart(fig, use_container_width=True)

            st.download_button(label='Descargar Matriz de Correlación para {}'.format(country), data=file,
                               file_name='{}_correlacion.xlsx'.format(country))


def clustering_con_dtw():
    form = st.form("Dynamic Time Warping Clustering")
    posible_countries = ('Todos', 'Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru')
    countries = form.multiselect('¿Qué países desea visualizar?', posible_countries)
    if 'Todos' in countries:
        countries = ('Brazil', 'Argentina', 'Chile', 'Colombia', 'Mexico', 'Peru')

    adtv_p = form.number_input('Ingrese el mínimo Average Daily Traded Value que desea considerar', value=1.,
                               format="%.2f")
    mktcap_thresh = form.number_input('Ingrese el Mínimo Market Cap que desea considerar', value=200., format="%.2f")
    start_date = form.selectbox('Ingrese la fecha de inicio que desea considerar', ('3 Meses', '6 Meses', '1 Año'))
    period = form.number_input('Defina la frecuencia en la que desea las observaciones (en días)', value=1)
    accept = form.form_submit_button('Aceptar')

    if accept:
        start_date = str(date.today() - relativedelta(months=int(start_date[0])))

        for country in countries:
            data_, adtv_, marketcap_ = data_request(country, start_date)
            # Filtramos para que se cumplan los filtros del usuario en los datos
            returns = data_filter(data_, adtv_, marketcap_, adtv_p, mktcap_thresh, period)
            # Normalizamos returns
            base = (returns.subtract(returns.mean(0), axis=1)).div(returns.std(axis=0), axis=1)
            base = base.sort_index(axis=1)
            # Procedemos a calcular correlación y covarianza
            N = len(base[:base.index[0]].T)
            # Creamos la Matriz de Distancias para DTW
            Dist = np.zeros((N, N))
            place = st.empty()
            for i in range(N):
                place.write("Cargando: " + str(round(i*100/N)) + " %")
                for j in range(i - 1, N):
                    company_1 = base.columns[i]
                    company_2 = base.columns[j]
                    Dist[i, j] = get_dtw_distance(base[company_1], base[company_2])
                    # La matriz es simétrica
                    Dist[j, i] = Dist[i, j]
            # Creamos un DataFrame con la matriz de distancias
            df = pd.DataFrame(Dist)
            df.index = base.columns
            df.columns = base.columns

            # Pasamos el df a excel para descarga del usuario
            file = to_excel(df)

            hierarchy = linkage(Dist)
            ct = 0.54 * max(hierarchy[:, 2])

            fig = ff.create_dendrogram(Dist, orientation='left', labels=list(base.columns),
                                       color_threshold=ct, linkagefun=linkage)
            fig.update_layout(
                title='{} desde {} hasta {}'.format(country, returns.index[0].date(), returns.index[-1].date()))

            if country == 'Brazil':
                fig.update_layout(height=2000)
            else:
                fig.update_layout(height=900)

            st.plotly_chart(fig, use_container_width=True)

            st.download_button(label='Descargar Matriz de distancias con DTW para {}'.format(country), data=file,
                               file_name='{}_correlacion.xlsx'.format(country))