Krzysiek111 commited on
Commit
cc50161
1 Parent(s): ba8a301

hosting RNN Playgroung on hugging face

Browse files
Files changed (7) hide show
  1. .gitattributes +3 -0
  2. app.py +184 -0
  3. button_style.css +3 -0
  4. info.jpg +3 -0
  5. predict.py +152 -0
  6. requirements.txt +6 -0
  7. wait.gif +3 -0
.gitattributes CHANGED
@@ -32,3 +32,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.jpg filter=lfs diff=lfs merge=lfs -text
36
+ *.gif filter=lfs diff=lfs merge=lfs -text
37
+ *.css filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ import requests
6
+ import json
7
+
8
+ from predict import predict_series
9
+
10
+ st.set_page_config(page_title='RNN Playground')
11
+ #st.set_option('deprecation.showPyplotGlobalUse', False)
12
+
13
+ pages = {'Intro': 0, 'Implementation details': 1, 'The model': 2}
14
+ choice = pages[st.sidebar.radio("Select the chapter: ", tuple(pages.keys()))]
15
+
16
+
17
+ if choice == 0:
18
+ st.title("Recurrent Neural Networks playground")
19
+ st.subheader("The purpose")
20
+ st.write("""\n
21
+ The goal of this app is to allow the user to experiment easily with Recurrent Neural Networks. Thanks to that, the app helps to understand: \n
22
+ - When to use recurrent neural networks
23
+ - Which scenarios are more straightforward for the RNN to predict and which are more difficult
24
+ - How the noise interrupts the predictions
25
+ - Difference between LSTM and GRU nodes
26
+ - Understanding that the increasing number of nodes doesn't always lead to better performance
27
+ """)
28
+
29
+ st.subheader("Typical use case")
30
+ st.write("""
31
+ 1. Create a synthetic dataset, with a wide range of many choices of parameters
32
+ 2. Create a Recurrent Neural Networks model by selecting a number of nodes in particular layers of the model
33
+ 3. Automatically train the RNN model and make predictions
34
+ 4. Compare the predicted values with the actual values
35
+ \n""")
36
+ st.subheader('The architecture of the model')
37
+ st.image('info.jpg', use_column_width=True, caption='Hover the cursor over image to see the enlarge button')
38
+ st.write(""" \n Use the radio buttons on the left to navigate between chapters \n \n \n \n""")
39
+
40
+ elif choice == 1:
41
+
42
+ st.title(""" \n Implementation details""")
43
+ st.subheader("Front-end")
44
+ st.write("""\n
45
+ The front-end part was made with the use of [the Streamlit library](https://www.streamlit.io/).
46
+ The parameters from the sidebar are used to create a dataset. The dataset is visualised using the Seaborn
47
+ library and finally sent (with the parameters specifying the number of neurons in particular layers) to
48
+ the back-end part through REST API. \n
49
+ The front-end is served on Azure as a Web App. """)
50
+
51
+ st.subheader("Back-end")
52
+ st.subheader("[Since the playground is stored on HuggingFace now - the backend is a module of the frontend]")
53
+ st.write("""\n
54
+
55
+ The backed-part is responsible for:
56
+ - Retrieving the dataset from the front-end through REST API
57
+ - Creating an RNN model using parameters passed from the user
58
+ - Training the model
59
+ - Predicting the values and returning them to the front-end \n
60
+
61
+ The most crucial requirements are:
62
+ - The neural networks setup has to be able to accurately predict the further shape of a curve for
63
+ the widest range of parameters selected by the user.
64
+ - Time execution of the back-end part must be short.
65
+ - Which means balancing over tradeoff between the time needed for the response and the accuracy of the results
66
+ - Cost efficiency
67
+ - Since the app is desired to be on-line all the time, the serverless approach has been taken.
68
+ That's why the back-end is served on Azure as a serverless Function App.
69
+
70
+ """)
71
+
72
+ else:
73
+
74
+ gran = 0.25
75
+ test_len = 8
76
+ st.sidebar.header('User Input Parameters')
77
+
78
+
79
+ def user_input_features():
80
+ predefined_sets = {'length': [30, ], 'period': [1.34, ], 'amplitude': [0.64, ], 'growth': [0.04, ],
81
+ 'amplitude_growth': [0.03, ], 'r1_nodes': [20, ], 'r2_nodes': [20, ], 'fc1_nodes': [34, ]}
82
+
83
+ data, nn = {}, {}
84
+ st.sidebar.header('Dataset:')
85
+ data['length'] = st.sidebar.slider('Training data length', 20, 50, 28)
86
+ data['period'] = st.sidebar.slider('Period of the wave', 0.75, 2.0, 1.0)
87
+ data['growth'] = st.sidebar.slider('Values growth', -0.25, 0.25, 0.0)
88
+ data['amplitude'] = st.sidebar.slider('Amplitude', 0.25, 1.75, 1.0)
89
+ data['amplitude_growth'] = st.sidebar.slider('Amplitude growth', -0.01, 0.1, 0.0)
90
+ data['noise'] = st.sidebar.slider('Noise', 0.0, 1.0, 0.0)
91
+ st.sidebar.header('Model setup')
92
+ nn['use_lstm'] = st.sidebar.radio('Select the type of Recurrent Neuron to use', ['LSTM', 'GRU']) == 'LSTM'
93
+ nn['r1_nodes'] = st.sidebar.slider('Number of nodes in the first RNN layer', 1, 30, 13)
94
+ nn['r2_nodes'] = st.sidebar.slider('Number of nodes in the second RNN layer', 0, 30, 0)
95
+ nn['fc1_nodes'] = st.sidebar.slider('Number of nodes in the fully connected RNN layer', 0, 40, 10)
96
+ nn['steps'] = len(np.arange(0, test_len, gran))
97
+
98
+ #if st.sidebar.button('Load one of the pretested configurations'):
99
+ #i = st.sidebar.selectbox('Select:', [-1, 0])
100
+ #i = int(np.random.rand(len(predefined_sets['length']))) # Selecting one pretested configuration
101
+ #data.update({k: predefined_sets[k][i] for k in set(data) & set(predefined_sets)})
102
+ #nn.update({k: predefined_sets[k][i] for k in set(nn) & set(predefined_sets)})"""
103
+
104
+
105
+ return data, nn
106
+
107
+
108
+ params, setup = user_input_features()
109
+
110
+ st.subheader("Instructions:")
111
+ st.write("""
112
+ 1. Modify the dataset by using the sliders in the Dataset group on the left on the screen.
113
+ 2. Select the number of nodes in the model by using the sliders in the RNN setup group.
114
+ 3. Press the "Train and Predict" button to Train and Predict the model - note: many operations performing under the hood - please be patient.
115
+ 4. The predicted values will be shown at the bottom of the page.
116
+ 5. If you are not satisfied with the results - modify the model and try again!
117
+ 6. Have fun!
118
+ \n""")
119
+
120
+ st.subheader("Generated data:")
121
+ X = np.arange(0, params['length'], gran)
122
+ X_pred = np.arange(params['length'], params['length'] + test_len, gran)
123
+
124
+
125
+ def generate_wave(x_set):
126
+ return np.sin(x_set / params['period']) * (1 + params['amplitude_growth'] * x_set) * params[
127
+ 'amplitude'] + x_set * params['growth'] + params['noise']*np.random.randn(len(x_set))
128
+
129
+
130
+ Y = generate_wave(X)
131
+ Y_pred = generate_wave(X_pred)
132
+
133
+ X_pred, Y_pred = np.append(X[-1], X_pred), np.append(Y[-1], Y_pred)
134
+
135
+ c1, c2, c3 = '#1e4a76', '#7dc0f7', '#ff7c0a' # colors
136
+ # sns.scatterplot(x=X, y=Y, color=c1)
137
+ # st.pyplot()
138
+ sns.lineplot(x=X, y=Y, color=c1)
139
+ sns.lineplot(x=X_pred, y=Y_pred, color=c2, linestyle=':')
140
+ plt.ylim(min(-2, min(Y), min(Y_pred)), max(2, max(Y), max(Y_pred)))
141
+ plt.legend(['Train data', 'Test data'], loc=3)
142
+ plt.xlabel('Sample number')
143
+ plt.ylabel('Sample value')
144
+ st.pyplot()
145
+ st.write("The plot presents generated train and test data. Use the sliders on the left to modify the curve.")
146
+
147
+
148
+ def local_css(file_name):
149
+ with open(file_name) as f:
150
+ st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
151
+ local_css("button_style.css")
152
+
153
+ st.subheader('Predicted data:')
154
+ reminder = st.text('Press the train and predict button on the sidebar once you are ready with the selections.')
155
+
156
+
157
+ if st.sidebar.button('Train and Predict'):
158
+ setup['values'] = list(Y)
159
+ reminder.empty()
160
+
161
+ waiters = list()
162
+ waiters.append(st.text('Please wait till the train and predict process is finished.'))
163
+ waiters.append(st.image('wait.gif'))
164
+ waiters.append(st.text("""The process should take around 20-60 seconds."""))
165
+
166
+ # myUrl = 'http://localhost:7071/api/predict'
167
+ myUrl = 'https://rnn-background.azurewebsites.net/api/predict?code=a/X0yioXXY4CFVd9UFTw4MiyStNJ2qh3oae7FdFN7VBFMFhqe/qK7Q=='
168
+ # request = json.dumps(setup)
169
+ result = predict_series(data, steps=66, r1_nodes=14, r2_nodes=14, fc1_nodes=20)
170
+ # _ = [waiter.empty() for waiter in waiters]
171
+
172
+ result = np.array(range(5))
173
+
174
+ sns.lineplot(x=X_pred, y=Y_pred, color=c2, linestyle=':')
175
+ sns.lineplot(x=X, y=Y, color=c1)
176
+ sns.lineplot(np.append(X[-1], np.arange(0, test_len, gran) + max(X) + gran), np.append(Y[-1], result['result']), color=c3)
177
+ plt.legend(['Train data', 'Test data', 'Predicted data'], loc=3)
178
+ plt.xlabel('Sample number')
179
+ plt.ylabel('Sample value')
180
+ st.pyplot()
181
+
182
+ st.write("The prediction isn't good enough? Try to change settings in the model setup or increase the dataset length.")
183
+ st.write('Training took {} epochs, Mean Squared Error: {:.2e}'.format(result['epochs'], result['loss']))
184
+ #st.write('Training took {} epochs, Mean Squared Error {}, last loss {}'.format(result['epochs'], result['loss'], result['loss_last']))
button_style.css ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:388e988962ab53220113c1c3c41f470d8a3bde4e0cd43e0e45b1a51751a7816c
3
+ size 163
info.jpg ADDED

Git LFS Details

  • SHA256: 3c5cbaf43f9bd1709af28aaf379f2afb76a86ea15605c63735dc7249f3b417ba
  • Pointer size: 131 Bytes
  • Size of remote file: 516 kB
predict.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow.keras as tf
2
+ import numpy as np
3
+ from sklearn.preprocessing import StandardScaler
4
+
5
+ verbose = 0
6
+
7
+
8
+ def predict_series(values, r1_nodes=5, r2_nodes=0, fc1_nodes=0, steps=20, use_lstm=True, *args, **kwargs):
9
+
10
+ train = np.array(values)
11
+
12
+ train_last_value = train[-1]
13
+ train = train[1:] - train[:-1]
14
+ sc = StandardScaler()
15
+ train = sc.fit_transform(train.reshape(-1, 1))
16
+
17
+ T = 25
18
+ X = []
19
+ Y = []
20
+ for t in range(len(train) - T):
21
+ x = train[t:t + T]
22
+ X.append(x)
23
+ Y.append(train[t + T])
24
+
25
+ X = np.array(X).reshape(-1, T, 1)
26
+ Y = np.array(Y)
27
+
28
+ nb_stats = 0
29
+ """
30
+ X_temp = np.zeros(X.size + nb_stats * len(X)).reshape(-1, T + nb_stats)
31
+
32
+ step_size = 1 / (len(X) + steps)
33
+
34
+
35
+ def update_stats(row):
36
+ new_stat = row[T:]
37
+ new_stat[0] += step_size # number of sample
38
+
39
+ minimum = min(row[:T]) # minimum value, and when it occurred
40
+ if minimum < row[T + 1]:
41
+ new_stat[1], new_stat[2] = minimum, new_stat[0]
42
+
43
+ maximum = max(row[:T]) # maximum value, and when it occurred
44
+ if maximum > row[T + 3]:
45
+ new_stat[3], new_stat[4] = maximum, new_stat[0]
46
+
47
+ new_stat[5] = (row[T + 5] * row[T] + row[T - 1]) / (new_stat[0]) # rolling average
48
+
49
+ difference10 = row[T - 1] - row[T - 11] # the biggest difference within 10 items
50
+ if difference10 > row[T + 6]:
51
+ new_stat[6], new_stat[7] = difference10, new_stat[0]
52
+ if difference10 < row[T + 8]:
53
+ new_stat[8], new_stat[9] = difference10, new_stat[0]
54
+
55
+ abs_difference10 = abs(difference10) # the biggest absolute difference within 10 items
56
+ if abs_difference10 > row[T + 10]:
57
+ new_stat[10], new_stat[11] = abs_difference10, new_stat[0]
58
+ if abs_difference10 < row[T + 12]:
59
+ new_stat[12], new_stat[13] = abs_difference10, new_stat[0]
60
+
61
+ return new_stat
62
+
63
+ X_temp[0] = X[0] #np.append(X[0])#, [0, np.inf, 0, -np.inf, 0]) #, 0, -np.inf, 0, +np.inf, 0, 0, 0, np.inf, 0])
64
+ for i in range(1, len(X)):
65
+ X_temp[i] = np.append(X[i][:T], X_temp[i - 1][T:])
66
+ X_temp[i][T:] = update_stats(X_temp[i])
67
+ """
68
+ #X = X_temp[1:].reshape(-1, T + nb_stats, 1)
69
+ #Y = Y[1:]
70
+
71
+ i = tf.layers.Input(shape=(T + nb_stats, 1))
72
+
73
+ if use_lstm:
74
+ rnn_layer = tf.layers.LSTM
75
+ else:
76
+ rnn_layer = tf.layers.GRU
77
+
78
+ if r2_nodes:
79
+ x = rnn_layer(r1_nodes, return_sequences=True)(i)
80
+ x = rnn_layer(r2_nodes)(x)
81
+ else:
82
+ x = rnn_layer(r1_nodes)(i)
83
+ if fc1_nodes:
84
+ x = tf.layers.Dense(fc1_nodes, activation='relu')(x)
85
+ x = tf.layers.Dense(1)(x)
86
+ model = tf.models.Model(i, x)
87
+
88
+
89
+ """lr_schedule = tf.optimizers.schedules.ExponentialDecay(
90
+ initial_learning_rate=0.2,
91
+ decay_steps=10,
92
+ decay_rate=0.8)
93
+ optimizer = tf.optimizers.Ftrl(learning_rate=0.001, learning_rate_power=-0.1)"""
94
+ #for i in range(0, 500, 10):
95
+ #print('{}: {}'.format(i, lr_schedule(i)))
96
+
97
+
98
+ model.compile(
99
+ loss='mse', #tf.losses.LogCosh(),
100
+ optimizer=tf.optimizers.Adamax(lr=0.1) #LogCosh()'sgd'
101
+ )
102
+
103
+ callbacks = [tf.callbacks.EarlyStopping(patience=150, monitor='loss', restore_best_weights=True)]
104
+
105
+ r = model.fit(
106
+ X, Y,
107
+ epochs=500,
108
+ callbacks=callbacks,
109
+ verbose=verbose,
110
+ validation_split=0.0
111
+ )
112
+ pred = np.array([])
113
+ last_x = X[-1]
114
+
115
+
116
+ for _ in range(steps):
117
+ p = model.predict(last_x.reshape(1, -1, 1))[0, 0]
118
+ pred = np.append(pred, p)
119
+ #last_x[:T] = np.roll(last_x[:T], -1)
120
+ #last_x[T - 1] = p
121
+ #last_x[T:] = update_stats(last_x)
122
+ last_x = np.roll(last_x, -1)
123
+ last_x[-1] = p
124
+
125
+ pred = sc.inverse_transform(pred.reshape(-1, 1))
126
+ # pred = np.array(pred).astype('float64')
127
+ # pred = list(pred)
128
+ # logging.info(pred)
129
+
130
+ pred.reshape(-1)
131
+ pred[0] = train_last_value + pred[0]
132
+
133
+ for i in range(1, len(pred)):
134
+ pred[i] += pred[i-1]
135
+
136
+
137
+ result = {'result': list(pred.reshape(-1)), 'epochs': r.epoch[-1] + 1, 'loss': min(r.history['loss']), 'loss_last': r.history['loss'][-1]}
138
+ return result
139
+
140
+
141
+ if __name__ == "__main__":
142
+ from time import time
143
+ t1 = time()
144
+ verbose = 2
145
+ data = np.sin(np.arange(0.0, 28.0, 0.35)*2)
146
+ result = predict_series(data, steps=66, r1_nodes=14, r2_nodes=14, fc1_nodes=20)
147
+ print('exec time: {:8.3f}'.format(time()-t1))
148
+ #print(result['result'][:2])
149
+ print(print(result['epochs'], result['loss']))
150
+ import seaborn as sns
151
+ sns.lineplot(x=range(30), y=data[-30:], color='r')
152
+ sns.lineplot(x=range(30, 30+len(result['result'])), y=result['result'], color='b')
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==1.17.0
2
+ requests
3
+ numpy
4
+ seaborn
5
+ matplotlib
6
+ tensorflow
wait.gif ADDED

Git LFS Details

  • SHA256: 3dfd6975b74559880e73cf1cb07c5e0c2e4529b3bbe7dfdc8c2ffda29987fb7b
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB