oort commited on
Commit
bceb8c4
·
1 Parent(s): b0cfd02

Add application file

Browse files
Files changed (6) hide show
  1. README.md +6 -7
  2. app.py +216 -0
  3. blau.png +0 -0
  4. demo_data.pkl +3 -0
  5. mundus.png +0 -0
  6. requirements.txt +9 -0
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Cc Clusters
3
- emoji: 🔥
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: streamlit
7
- sdk_version: 1.10.0
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Cc_clusters
3
+ emoji: 💩
4
+ colorFrom: indigo
5
+ colorTo: green
6
  sdk: streamlit
 
7
  app_file: app.py
8
  pinned: false
9
+ license: unlicense
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # File: app.py
3
+ # Project: 'Homework #3 OTUS.ML.Advanced'
4
+ # Created by Gennady Matveev (gm@og.ly) on 02-01-2022.
5
+ # %%
6
+ # Import libraries
7
+ import re
8
+ import pandas as pd
9
+ import numpy as np
10
+ import streamlit as st
11
+ import requests
12
+ import pickle
13
+ from sklearn.preprocessing import StandardScaler
14
+ from sklearn.cluster import KMeans
15
+ import tsfel
16
+ from kneed import KneeLocator
17
+ import cryptocompare as cc
18
+ import matplotlib.pyplot as plt
19
+ import plotly.express as px
20
+ from umap import UMAP
21
+ import warnings
22
+
23
+ plt.style.use("ggplot")
24
+ plt.rcParams["figure.figsize"] = (10, 5)
25
+ warnings.filterwarnings("ignore")
26
+ # pd.options.display.precision = 4
27
+
28
+ random_state = 17
29
+ scaler = StandardScaler()
30
+ n_jobs = -1
31
+
32
+
33
+ # %%
34
+ st.set_page_config(page_title="Cryptocurrencies clustering",
35
+ page_icon='./head.ico', layout='centered', initial_sidebar_state='expanded') # wide
36
+
37
+ padding = 0
38
+ st.markdown(f""" <style>
39
+ .reportview-container .main .block-container{{
40
+ padding-top: {padding}rem;
41
+ padding-right: {padding}rem;
42
+ padding-left: {padding}rem;
43
+ padding-bottom: {padding}rem;
44
+ }} </style> """, unsafe_allow_html=True)
45
+
46
+ st.image('./mundus.png')
47
+ st.subheader('Clustering analysis of cryptocurrencies')
48
+ st.markdown(
49
+ '*Explore similarities in statisticial, temporal and spectral domains*')
50
+ st.markdown('''Top 100 cryptocurrencies' daily closing prices are downloaded.
51
+ Their dynamics can be analized in search of similarities between coins.
52
+ Up to 8 currencies from each cluster are shown below.''')
53
+ st.markdown("""---""")
54
+
55
+ # %%
56
+ # Set cryptocompare API key:
57
+ api_key = st.secrets["api_key"]
58
+ # %%
59
+ headers = {
60
+ "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36"
61
+ }
62
+ req = f"https://min-api.cryptocompare.com/data/top/mktcapfull?limit=100&tsym=USD&api_key={api_key}"
63
+
64
+ # Utility functions for data download
65
+
66
+
67
+ @st.cache(ttl=600)
68
+ def get_price(ticker: str, limit: int):
69
+
70
+ return cc.get_historical_price_day(ticker, currency="USD",
71
+ limit=limit)
72
+
73
+
74
+ @st.cache(ttl=600)
75
+ def get_all_cc(limit: int):
76
+ df = pd.DataFrame(index=range(limit))
77
+ for tick in tickers:
78
+ # print(tick, end="\t")
79
+ try:
80
+ d = get_price(tick, limit)
81
+ one_cc = pd.DataFrame.from_dict(d)["close"]
82
+ one_cc.rename(index=tick, inplace=True)
83
+ df = pd.concat([df, one_cc], axis=1)
84
+ except:
85
+ pass
86
+
87
+ return df
88
+
89
+ # Utility functions for clustering analysis
90
+
91
+
92
+ def elbow_study(data, k_max: int = 10, model=KMeans):
93
+ X = scaler.fit_transform(data)
94
+ inertia = []
95
+ for k in range(2, k_max):
96
+ clusterer = model(n_clusters=k, random_state=random_state)
97
+ X_km = clusterer.fit(X)
98
+ inertia.append(np.sqrt(X_km.inertia_))
99
+
100
+ # Find a knee
101
+ kneedle = KneeLocator(range(2, k_max), inertia, S=2,
102
+ curve="convex", direction="decreasing")
103
+ # Use 3 clusters in case kneed doesn't find a knee
104
+ n_clusters = kneedle.knee or 3
105
+
106
+ return n_clusters
107
+
108
+
109
+ def plot_clusters_2(data, Xt, n_clusters, random_state):
110
+ clusterer = KMeans(n_clusters=n_clusters, max_iter=100,
111
+ random_state=random_state)
112
+ X = scaler.fit_transform(Xt)
113
+ dd = data.copy()
114
+ dd.loc["cluster"] = clusterer.fit_predict(X.T)
115
+ color = ["red", "green", "blue", "purple",
116
+ "orange", "magenta", "goldenrod"]
117
+ clusters_no = dd.loc["cluster"].value_counts(sort=False)
118
+
119
+ for c in range(n_clusters):
120
+ cc = color[c]
121
+ fig, ax = plt.subplots(2, 4, sharex='col', figsize=(15, 5))
122
+ cluster_ticks = dd.T[dd.T.loc[:, "cluster"] == c].index
123
+ for i, tick in enumerate(cluster_ticks[:8]):
124
+ ax[i % 2, i//2].plot(dd.iloc[:-1][tick],
125
+ color=cc) # , label=tick)
126
+ ax[i % 2, i//2].set_title(tick)
127
+ fig.suptitle(f"Cluster {c}, {clusters_no[c]} items\n", y=1.02)
128
+ st.pyplot(fig)
129
+ return dd
130
+
131
+ def visualize(Xt, n_clusters):
132
+ clusterer = KMeans(n_clusters=n_clusters, max_iter=100,
133
+ random_state=random_state)
134
+
135
+ X = scaler.fit_transform(Xt.T)
136
+ X_clust = clusterer.fit_predict(X)
137
+ X_color = X_clust.astype(str)
138
+
139
+ features = Xt.values
140
+
141
+ # UMAP
142
+ umap_3d = UMAP(n_components=3, init='random',
143
+ random_state=random_state)
144
+
145
+ proj_3d = umap_3d.fit_transform(features)
146
+
147
+ fig_3d = px.scatter_3d(
148
+ proj_3d, x=0, y=1, z=2,
149
+ color=X_color, labels={'color': 'clusters'},
150
+ color_discrete_sequence=["red", "green", "blue",
151
+ "purple", "orange", "magenta", "goldenrod"],
152
+ title=f"UMAP projection from feature space",
153
+ width=800, height=600,
154
+ )
155
+ fig_3d.update_traces(marker_size=5)
156
+ # fig_3d.show()
157
+ st.write(fig_3d)
158
+ # %%
159
+ # START Sidebar ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
160
+
161
+
162
+ st.sidebar.image('./blau.png')
163
+ demo = st.sidebar.checkbox(label="Use demo data?", value=True, help="Use demo data or fetch actual?")
164
+ days=st.sidebar.number_input('Number of days for analysis',
165
+ min_value=30, max_value=100, value=60)
166
+ domain=st.sidebar.selectbox('Domain', ('statistical', 'temporal', 'spectral', 'all'),
167
+ index=1, help='Domain to use feature extraction')
168
+ st.sidebar.markdown("""---""")
169
+ analyze=st.sidebar.button('Start analysis')
170
+
171
+ # END Sidebar ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
172
+
173
+ # Analysis
174
+ if analyze:
175
+ with st.spinner('Downloading data...'):
176
+ if demo==True:
177
+ with open("./demo_data.pkl", "rb") as f:
178
+ demo_data = pickle.load(f)
179
+ dl = demo_data.shape[0]
180
+ data_day = demo_data.iloc[dl-days:]
181
+ tickers = demo_data.columns
182
+ else:
183
+ top100=requests.get(req, headers=headers)
184
+ rs=re.compile(r"\"Name\":\"(?P<ticker>[A-Z0-9]+)\"")
185
+ tickers=rs.findall(top100.text)
186
+ data_day=get_all_cc(limit=days).copy()
187
+
188
+ with st.spinner(f'Extracting {domain} features...'):
189
+ dom=domain if domain != 'all' else None
190
+ cfg_file=tsfel.get_features_by_domain(dom)
191
+ # tsfel analysis
192
+ x_temp=tsfel.time_series_features_extractor(
193
+ cfg_file, data_day["BTC"], window_size=days)
194
+ tf_columns=x_temp.columns
195
+ xtf=pd.DataFrame(columns=data_day.columns, index=tf_columns)
196
+ # Fill df with features
197
+ for col in xtf.columns:
198
+ xtf[col]=tsfel.time_series_features_extractor(
199
+ cfg_file, data_day[col], window_size=days
200
+ ).T
201
+ xtf.dropna(inplace=True)
202
+
203
+ # Features dataframe
204
+ Xt=pd.DataFrame(scaler.fit_transform(
205
+ xtf), columns=data_day.columns, index=xtf.index)
206
+ with st.spinner('Calculating optimal number of clusters...'):
207
+ # Get optimal no of clusters
208
+ n_clusters=elbow_study(Xt.T, model=KMeans) # metric="euclidean",
209
+
210
+ # Plot clusters
211
+ plot_clusters_2(data_day, Xt=Xt, n_clusters=n_clusters,
212
+ random_state=random_state
213
+ )
214
+
215
+ # Plot umap
216
+ # visualize(Xt, n_clusters)
blau.png ADDED
demo_data.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9909080bb27a99e54587dd6007450e7b9430a2a63a549d988bbeb26792d12e6d
3
+ size 80516
mundus.png ADDED
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pandas==1.4.0
2
+ numpy==1.22.0
3
+ scikit-learn==1.0.2
4
+ tsfel==0.1.4
5
+ kneed==0.7.0
6
+ cryptocompare==0.7.5
7
+ plotly==5.5.0
8
+ umap-learn==0.5.2
9
+ streamlit==1.12.2