BlendMMM commited on
Commit
c0ffc2f
1 Parent(s): 9070ab8

Upload 11 files

Browse files
pages/10_Optimized_Result_Analysis.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import pandas as pd
4
+ from utilities import (format_numbers,decimal_formater,
5
+ load_local_css,set_header,
6
+ initialize_data,
7
+ load_authenticator)
8
+ import pickle
9
+ import streamlit_authenticator as stauth
10
+ import yaml
11
+ from yaml import SafeLoader
12
+ from classes import class_from_dict
13
+ import plotly.express as px
14
+ import numpy as np
15
+ import plotly.graph_objects as go
16
+ import pandas as pd
17
+
18
+
19
+ def summary_plot(data, x, y, title, text_column, color, format_as_percent=False, format_as_decimal=False):
20
+ fig = px.bar(data, x=x, y=y, orientation='h',
21
+ title=title, text=text_column, color=color)
22
+ fig.update_layout(showlegend=False)
23
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
24
+
25
+ # Update the format of the displayed text based on the chosen format
26
+ if format_as_percent:
27
+ fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
28
+ elif format_as_decimal:
29
+ fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
30
+ else:
31
+ fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
32
+
33
+ fig.update_layout(xaxis_title=x, yaxis_title='Channel Name', showlegend=False)
34
+ return fig
35
+
36
+
37
+ def stacked_summary_plot(data, x, y, title, text_column, color_column, stack_column=None, format_as_percent=False, format_as_decimal=False):
38
+ fig = px.bar(data, x=x, y=y, orientation='h',
39
+ title=title, text=text_column, color=color_column, facet_col=stack_column)
40
+ fig.update_layout(showlegend=False)
41
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
42
+
43
+ # Update the format of the displayed text based on the chosen format
44
+ if format_as_percent:
45
+ fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
46
+ elif format_as_decimal:
47
+ fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
48
+ else:
49
+ fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
50
+
51
+ fig.update_layout(xaxis_title=x, yaxis_title='', showlegend=False)
52
+ return fig
53
+
54
+
55
+
56
+ def funnel_plot(data, x, y, title, text_column, color_column, format_as_percent=False, format_as_decimal=False):
57
+ data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
58
+
59
+ # Round the numeric values in the text column to two decimal points
60
+ data[text_column] = data[text_column].round(2)
61
+
62
+ # Create a color map for categorical data
63
+ color_map = {category: f'rgb({i * 30 % 255},{i * 50 % 255},{i * 70 % 255})' for i, category in enumerate(data[color_column].unique())}
64
+
65
+ fig = go.Figure(go.Funnel(
66
+ y=data[y],
67
+ x=data[x],
68
+ text=data[text_column],
69
+ marker=dict(color=data[color_column].map(color_map)),
70
+ textinfo="value",
71
+ hoverinfo='y+x+text'
72
+ ))
73
+
74
+ # Update the format of the displayed text based on the chosen format
75
+ if format_as_percent:
76
+ fig.update_layout(title=title, funnelmode="percent")
77
+ elif format_as_decimal:
78
+ fig.update_layout(title=title, funnelmode="overlay")
79
+ else:
80
+ fig.update_layout(title=title, funnelmode="group")
81
+
82
+ return fig
83
+
84
+
85
+ st.set_page_config(layout='wide')
86
+ load_local_css('styles.css')
87
+ set_header()
88
+
89
+ # for k, v in st.session_state.items():
90
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
91
+ # st.session_state[k] = v
92
+
93
+ st.empty()
94
+ st.header('Model Result Analysis')
95
+ spends_data=pd.read_excel('Overview_data_test.xlsx')
96
+
97
+ with open('summary_df.pkl', 'rb') as file:
98
+ summary_df_sorted = pickle.load(file)
99
+
100
+ selected_scenario= st.selectbox('Select Saved Scenarios',['S1','S2'])
101
+
102
+ st.header('Optimized Spends Overview')
103
+ ___columns=st.columns(3)
104
+ with ___columns[2]:
105
+ fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent',color='Channel_name')
106
+ st.plotly_chart(fig,use_container_width=True)
107
+ with ___columns[0]:
108
+ fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend',color='Channel_name')
109
+ st.plotly_chart(fig,use_container_width=True)
110
+ with ___columns[1]:
111
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
112
+ st.plotly_chart(fig,use_container_width=False)
113
+
114
+ st.header(' Budget Allocation')
115
+ summary_df_sorted['Perc_alloted']=np.round(summary_df_sorted['Optimized_spend']/summary_df_sorted['Optimized_spend'].sum(),2)
116
+ columns2=st.columns(2)
117
+ with columns2[0]:
118
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
119
+ st.plotly_chart(fig,use_container_width=True)
120
+ with columns2[1]:
121
+ fig=summary_plot(summary_df_sorted, x='Perc_alloted', y='Channel_name', title='% Split', text_column='Perc_alloted',color='Channel_name',format_as_percent=True)
122
+ st.plotly_chart(fig,use_container_width=True)
123
+
124
+
125
+ if 'raw_data' not in st.session_state:
126
+ st.session_state['raw_data']=pd.read_excel('raw_data_nov7_combined1.xlsx')
127
+ st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['MediaChannelName'].isin(summary_df_sorted['Channel_name'].unique())]
128
+ st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['Date'].isin(spends_data["Date"].unique())]
129
+
130
+
131
+
132
+ #st.write(st.session_state['raw_data']['ResponseMetricName'])
133
+ # st.write(st.session_state['raw_data'])
134
+
135
+
136
+ st.header('Response Forecast Overview')
137
+ raw_data=st.session_state['raw_data']
138
+ effectiveness_overall=raw_data.groupby('ResponseMetricName').agg({'ResponseMetricValue': 'sum'}).reset_index()
139
+ effectiveness_overall['Efficiency']=effectiveness_overall['ResponseMetricValue'].map(lambda x: x/raw_data['Media Spend'].sum() )
140
+ # st.write(effectiveness_overall)
141
+
142
+ columns6=st.columns(3)
143
+
144
+ effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False,inplace=True)
145
+ effectiveness_overall=np.round(effectiveness_overall,2)
146
+ effectiveness_overall['ResponseMetric'] = effectiveness_overall['ResponseMetricName'].apply(lambda x: 'BAU' if 'BAU' in x else ('Gamified' if 'Gamified' in x else x))
147
+ # effectiveness_overall=np.where(effectiveness_overall[effectiveness_overall['ResponseMetricName']=="Adjusted Account Approval BAU"],"Adjusted Account Approval BAU",effectiveness_overall['ResponseMetricName'])
148
+
149
+ effectiveness_overall.replace({'ResponseMetricName':{'BAU approved clients - Appsflyer':'Approved clients - Appsflyer',
150
+ 'Gamified approved clients - Appsflyer':'Approved clients - Appsflyer'}},inplace=True)
151
+
152
+ # st.write(effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False))
153
+
154
+
155
+ condition = effectiveness_overall['ResponseMetricName'] == "Adjusted Account Approval BAU"
156
+ condition1= effectiveness_overall['ResponseMetricName'] == "Approved clients - Appsflyer"
157
+ effectiveness_overall['ResponseMetric'] = np.where(condition, "Adjusted Account Approval BAU", effectiveness_overall['ResponseMetric'])
158
+
159
+ effectiveness_overall['ResponseMetricName'] = np.where(condition1, "Approved clients - Appsflyer (BAU, Gamified)", effectiveness_overall['ResponseMetricName'])
160
+ # effectiveness_overall=pd.DataFrame({'ResponseMetricName':["App Installs - Appsflyer",'Account Requests - Appsflyer',
161
+ # 'Total Adjusted Account Approval','Adjusted Account Approval BAU',
162
+ # 'Approved clients - Appsflyer','Approved clients - Appsflyer'],
163
+ # 'ResponseMetricValue':[683067,367020,112315,79768,36661,16834],
164
+ # 'Efficiency':[1.24,0.67,0.2,0.14,0.07,0.03],
165
+ custom_colors = {
166
+ 'App Installs - Appsflyer': 'rgb(255, 135, 0)', # Steel Blue (Blue)
167
+ 'Account Requests - Appsflyer': 'rgb(125, 239, 161)', # Cornflower Blue (Blue)
168
+ 'Adjusted Account Approval': 'rgb(129, 200, 255)', # Dodger Blue (Blue)
169
+ 'Adjusted Account Approval BAU': 'rgb(255, 207, 98)', # Light Sky Blue (Blue)
170
+ 'Approved clients - Appsflyer': 'rgb(0, 97, 198)', # Light Blue (Blue)
171
+ "BAU": 'rgb(41, 176, 157)', # Steel Blue (Blue)
172
+ "Gamified": 'rgb(213, 218, 229)' # Silver (Gray)
173
+ # Add more categories and their respective shades of blue as needed
174
+ }
175
+
176
+
177
+
178
+
179
+
180
+
181
+ with columns6[0]:
182
+ revenue=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Total Approved Accounts - Revenue']['ResponseMetricValue']).iloc[0]
183
+ revenue=round(revenue / 1_000_000, 2)
184
+
185
+ # st.metric('Total Revenue', f"${revenue} M")
186
+ # with columns6[1]:
187
+ # BAU=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='BAU approved clients - Revenue']['ResponseMetricValue']).iloc[0]
188
+ # BAU=round(BAU / 1_000_000, 2)
189
+ # st.metric('BAU approved clients - Revenue', f"${BAU} M")
190
+ # with columns6[2]:
191
+ # Gam=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Gamified approved clients - Revenue']['ResponseMetricValue']).iloc[0]
192
+ # Gam=round(Gam / 1_000_000, 2)
193
+ # st.metric('Gamified approved clients - Revenue', f"${Gam} M")
194
+
195
+ # st.write(effectiveness_overall)
196
+ data = {'Revenue': ['BAU approved clients - Revenue', 'Gamified approved clients- Revenue'],
197
+ 'ResponseMetricValue': [70200000, 1770000],
198
+ 'Efficiency':[127.54,3.21]}
199
+ df = pd.DataFrame(data)
200
+
201
+
202
+ columns9=st.columns([0.60,0.40])
203
+ with columns9[0]:
204
+ figd = px.pie(df,
205
+ names='Revenue',
206
+ values='ResponseMetricValue',
207
+ hole=0.3, # set the size of the hole in the donut
208
+ title='Effectiveness')
209
+ figd.update_layout(
210
+ margin=dict(l=0, r=0, b=0, t=0),width=100, height=180,legend=dict(
211
+ orientation='v', # set orientation to horizontal
212
+ x=0, # set x to 0 to move to the left
213
+ y=0.8 # adjust y as needed
214
+ )
215
+ )
216
+
217
+ st.plotly_chart(figd, use_container_width=True)
218
+
219
+ with columns9[1]:
220
+ figd1 = px.pie(df,
221
+ names='Revenue',
222
+ values='Efficiency',
223
+ hole=0.3, # set the size of the hole in the donut
224
+ title='Efficiency')
225
+ figd1.update_layout(
226
+ margin=dict(l=0, r=0, b=0, t=0),width=100,height=180,showlegend=False
227
+ )
228
+ st.plotly_chart(figd1, use_container_width=True)
229
+
230
+ effectiveness_overall['Response Metric Name']=effectiveness_overall['ResponseMetricName']
231
+
232
+
233
+
234
+ columns4= st.columns([0.55,0.45])
235
+ with columns4[0]:
236
+ fig=px.funnel(effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
237
+ 'BAU approved clients - Revenue',
238
+ 'Gamified approved clients - Revenue',
239
+ "Total Approved Accounts - Appsflyer"]))],
240
+ x='ResponseMetricValue', y='Response Metric Name',color='ResponseMetric',
241
+ color_discrete_map=custom_colors,title='Effectiveness',
242
+ labels=None)
243
+ custom_y_labels=['App Installs - Appsflyer','Account Requests - Appsflyer','Adjusted Account Approval','Adjusted Account Approval BAU',
244
+ "Approved clients - Appsflyer (BAU, Gamified)"
245
+ ]
246
+ fig.update_layout(showlegend=False,
247
+ yaxis=dict(
248
+ tickmode='array',
249
+ ticktext=custom_y_labels,
250
+ )
251
+ )
252
+ fig.update_traces(textinfo='value', textposition='inside', texttemplate='%{x:.2s} ', hoverinfo='y+x+percent initial')
253
+
254
+ last_trace_index = len(fig.data) - 1
255
+ fig.update_traces(marker=dict(line=dict(color='black', width=2)), selector=dict(marker=dict(color='blue')))
256
+
257
+ st.plotly_chart(fig,use_container_width=True)
258
+
259
+
260
+
261
+
262
+
263
+ with columns4[1]:
264
+
265
+ # Your existing code for creating the bar chart
266
+ fig1 = px.bar((effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
267
+ 'BAU approved clients - Revenue',
268
+ 'Gamified approved clients - Revenue',
269
+ "Total Approved Accounts - Appsflyer"]))]).sort_values(by='ResponseMetricValue'),
270
+ x='Efficiency', y='Response Metric Name',
271
+ color_discrete_map=custom_colors, color='ResponseMetric',
272
+ labels=None,text_auto=True,title='Efficiency'
273
+ )
274
+
275
+ # Update layout and traces
276
+ fig1.update_traces(customdata=effectiveness_overall['Efficiency'],
277
+ textposition='auto')
278
+ fig1.update_layout(showlegend=False)
279
+ fig1.update_yaxes(title='',showticklabels=False)
280
+ fig1.update_xaxes(title='',showticklabels=False)
281
+ fig1.update_xaxes(tickfont=dict(size=20))
282
+ fig1.update_yaxes(tickfont=dict(size=20))
283
+ st.plotly_chart(fig1, use_container_width=True)
284
+
285
+
286
+ effectiveness_overall_revenue=pd.DataFrame({'ResponseMetricName':['Approved Clients','Approved Clients'],
287
+ 'ResponseMetricValue':[70201070,1768900],
288
+ 'Efficiency':[127.54,3.21],
289
+ 'ResponseMetric':['BAU','Gamified']
290
+ })
291
+ # from plotly.subplots import make_subplots
292
+ # fig = make_subplots(rows=1, cols=2,
293
+ # subplot_titles=["Effectiveness", "Efficiency"])
294
+
295
+ # # Add first plot as subplot
296
+ # fig.add_trace(go.Funnel(
297
+ # x = fig.data[0].x,
298
+ # y = fig.data[0].y,
299
+ # textinfo = 'value+percent initial',
300
+ # hoverinfo = 'x+y+percent initial'
301
+ # ), row=1, col=1)
302
+
303
+ # # Update layout for first subplot
304
+ # fig.update_xaxes(title_text="Response Metric Value", row=1, col=1)
305
+ # fig.update_yaxes(ticktext = custom_y_labels, row=1, col=1)
306
+
307
+ # # Add second plot as subplot
308
+ # fig.add_trace(go.Bar(
309
+ # x = fig1.data[0].x,
310
+ # y = fig1.data[0].y,
311
+ # customdata = fig1.data[0].customdata,
312
+ # textposition = 'auto'
313
+ # ), row=1, col=2)
314
+
315
+ # # Update layout for second subplot
316
+ # fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
317
+ # fig.update_yaxes(title='', showticklabels=False, row=1, col=2)
318
+
319
+ # fig.update_layout(height=600, width=800, title_text="Key Metrics")
320
+ # st.plotly_chart(fig)
321
+
322
+
323
+ st.header('Return Forecast by Media Channel')
324
+ with st.expander("Return Forecast by Media Channel"):
325
+ metric_data=[val for val in list(st.session_state['raw_data']['ResponseMetricName'].unique()) if val!=np.NaN]
326
+ # st.write(metric_data)
327
+ metric=st.selectbox('Select Metric',metric_data,index=1)
328
+
329
+ selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
330
+ # st.dataframe(selected_metric.head(2))
331
+ selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
332
+ effectiveness=selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()
333
+ effectiveness_df=pd.DataFrame({'Channel':effectiveness.index,"ResponseMetricValue":effectiveness.values})
334
+
335
+ summary_df_sorted=summary_df_sorted.merge(effectiveness_df,left_on="Channel_name",right_on='Channel')
336
+
337
+ # st.dataframe(summary_df_sorted.head(2))
338
+ summary_df_sorted['Efficiency']=summary_df_sorted['ResponseMetricValue']/summary_df_sorted['Optimized_spend']
339
+ # # # st.dataframe(summary_df_sorted.head(2))
340
+ # st.dataframe(summary_df_sorted.head(2))
341
+
342
+ columns= st.columns(3)
343
+ with columns[0]:
344
+ fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='', text_column='Optimized_spend',color='Channel_name')
345
+ st.plotly_chart(fig,use_container_width=True)
346
+ with columns[1]:
347
+
348
+ # effectiveness=(selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()).values
349
+ # effectiveness_df=pd.DataFrame({'Channel':st.session_state['raw_data']['MediaChannelName'].unique(),"ResponseMetricValue":effectiveness})
350
+ # # effectiveness.reset_index(inplace=True)
351
+ # # st.dataframe(effectiveness.head())
352
+ fig=summary_plot(summary_df_sorted, x='ResponseMetricValue', y='Channel_name', title='Effectiveness', text_column='ResponseMetricValue',color='Channel_name')
353
+ st.plotly_chart(fig,use_container_width=True)
354
+
355
+ with columns[2]:
356
+ fig=summary_plot(summary_df_sorted, x='Efficiency', y='Channel_name', title='Efficiency', text_column='Efficiency',color='Channel_name',format_as_decimal=True)
357
+ st.plotly_chart(fig,use_container_width=True)
358
+
359
+ import plotly.express as px
360
+ import plotly.graph_objects as go
361
+ from plotly.subplots import make_subplots
362
+
363
+ # Create figure with subplots
364
+ # fig = make_subplots(rows=1, cols=2)
365
+
366
+ # # Add funnel plot to subplot 1
367
+ # fig.add_trace(
368
+ # go.Funnel(
369
+ # x=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricValue'],
370
+ # y=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricName'],
371
+ # textposition="inside",
372
+ # texttemplate="%{x:.2s}",
373
+ # customdata=effectiveness_overall['Efficiency'],
374
+ # hovertemplate="%{customdata:.2f}<extra></extra>"
375
+ # ),
376
+ # row=1, col=1
377
+ # )
378
+
379
+ # # Add bar plot to subplot 2
380
+ # fig.add_trace(
381
+ # go.Bar(
382
+ # x=effectiveness_overall.sort_values(by='ResponseMetricValue')['Efficiency'],
383
+ # y=effectiveness_overall.sort_values(by='ResponseMetricValue')['ResponseMetricName'],
384
+ # marker_color=effectiveness_overall['ResponseMetric'],
385
+ # customdata=effectiveness_overall['Efficiency'],
386
+ # hovertemplate="%{customdata:.2f}<extra></extra>",
387
+ # textposition="outside"
388
+ # ),
389
+ # row=1, col=2
390
+ # )
391
+
392
+ # # Update layout
393
+ # fig.update_layout(title_text="Effectiveness")
394
+ # fig.update_yaxes(title_text="", row=1, col=1)
395
+ # fig.update_yaxes(title_text="", showticklabels=False, row=1, col=2)
396
+ # fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
397
+
398
+ # # Show figure
399
+ # st.plotly_chart(fig)
pages/1_Data_Import 2.py ADDED
@@ -0,0 +1,891 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing necessary libraries
2
+ import streamlit as st
3
+
4
+ st.set_page_config(
5
+ page_title="Model Build",
6
+ page_icon=":shark:",
7
+ layout="wide",
8
+ initial_sidebar_state="collapsed",
9
+ )
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from utilities import set_header, load_local_css, load_authenticator
14
+ import pickle
15
+
16
+
17
+ load_local_css("styles.css")
18
+ set_header()
19
+
20
+ authenticator = st.session_state.get("authenticator")
21
+ if authenticator is None:
22
+ authenticator = load_authenticator()
23
+
24
+ name, authentication_status, username = authenticator.login("Login", "main")
25
+ auth_status = st.session_state.get("authentication_status")
26
+
27
+ # Check for authentication status
28
+ if auth_status != True:
29
+ st.stop()
30
+
31
+
32
+ # Function to validate date column in dataframe
33
+ def validate_date_column(df):
34
+ try:
35
+ # Attempt to convert the 'Date' column to datetime
36
+ df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y")
37
+ return True
38
+ except:
39
+ return False
40
+
41
+
42
+ # Function to determine data interval
43
+ def determine_data_interval(common_freq):
44
+ if common_freq == 1:
45
+ return "daily"
46
+ elif common_freq == 7:
47
+ return "weekly"
48
+ elif 28 <= common_freq <= 31:
49
+ return "monthly"
50
+ else:
51
+ return "irregular"
52
+
53
+
54
+ # Function to read each uploaded Excel file into a pandas DataFrame and stores them in a dictionary
55
+ st.cache_resource(show_spinner=False)
56
+
57
+
58
+ def files_to_dataframes(uploaded_files):
59
+ df_dict = {}
60
+ for uploaded_file in uploaded_files:
61
+ # Extract file name without extension
62
+ file_name = uploaded_file.name.rsplit(".", 1)[0]
63
+
64
+ # Check for duplicate file names
65
+ if file_name in df_dict:
66
+ st.warning(
67
+ f"Duplicate File: {file_name}. This file will be skipped.",
68
+ icon="⚠️",
69
+ )
70
+ continue
71
+
72
+ # Read the file into a DataFrame
73
+ df = pd.read_excel(uploaded_file)
74
+
75
+ # Convert all column names to lowercase
76
+ df.columns = df.columns.str.lower().str.strip()
77
+
78
+ # Separate numeric and non-numeric columns
79
+ numeric_cols = list(df.select_dtypes(include=["number"]).columns)
80
+ non_numeric_cols = [
81
+ col
82
+ for col in df.select_dtypes(exclude=["number"]).columns
83
+ if col.lower() != "date"
84
+ ]
85
+
86
+ # Check for 'Date' column
87
+ if not (validate_date_column(df) and len(numeric_cols) > 0):
88
+ st.warning(
89
+ f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
90
+ icon="⚠️",
91
+ )
92
+ continue
93
+
94
+ # Check for interval
95
+ common_freq = common_freq = (
96
+ pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
97
+ )
98
+ # Calculate the data interval (daily, weekly, monthly or irregular)
99
+ interval = determine_data_interval(common_freq)
100
+ if interval == "irregular":
101
+ st.warning(
102
+ f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
103
+ icon="⚠️",
104
+ )
105
+ continue
106
+
107
+ # Store both DataFrames in the dictionary under their respective keys
108
+ df_dict[file_name] = {
109
+ "numeric": numeric_cols,
110
+ "non_numeric": non_numeric_cols,
111
+ "interval": interval,
112
+ "df": df,
113
+ }
114
+
115
+ return df_dict
116
+
117
+
118
+ # Function to adjust dataframe granularity
119
+ # def adjust_dataframe_granularity(df, current_granularity, target_granularity):
120
+ # # Set index
121
+ # df.set_index("date", inplace=True)
122
+
123
+ # # Define aggregation rules for resampling
124
+ # aggregation_rules = {
125
+ # col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
126
+ # for col in df.columns
127
+ # }
128
+
129
+ # resampled_df = df
130
+ # if current_granularity == "daily" and target_granularity == "weekly":
131
+ # resampled_df = df.resample("W-MON").agg(aggregation_rules)
132
+
133
+ # elif current_granularity == "daily" and target_granularity == "monthly":
134
+ # resampled_df = df.resample("MS").agg(aggregation_rules)
135
+
136
+ # elif current_granularity == "daily" and target_granularity == "daily":
137
+ # resampled_df = df.resample("D").agg(aggregation_rules)
138
+
139
+ # elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
140
+ # # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
141
+ # expanded_data = []
142
+ # for _, row in df.iterrows():
143
+ # if current_granularity == "weekly":
144
+ # period_range = pd.date_range(start=row.name, periods=7)
145
+ # elif current_granularity == "monthly":
146
+ # period_range = pd.date_range(
147
+ # start=row.name, periods=row.name.days_in_month
148
+ # )
149
+
150
+ # for date in period_range:
151
+ # new_row = {}
152
+ # for col in df.columns:
153
+ # if pd.api.types.is_numeric_dtype(df[col]):
154
+ # if current_granularity == "weekly":
155
+ # new_row[col] = row[col] / 7
156
+ # elif current_granularity == "monthly":
157
+ # new_row[col] = row[col] / row.name.days_in_month
158
+ # else:
159
+ # new_row[col] = row[col]
160
+ # expanded_data.append((date, new_row))
161
+
162
+ # resampled_df = pd.DataFrame(
163
+ # [data for _, data in expanded_data],
164
+ # index=[date for date, _ in expanded_data],
165
+ # )
166
+
167
+ # # Reset index
168
+ # resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
169
+
170
+ # return resampled_df
171
+
172
+
173
+ def adjust_dataframe_granularity(df, current_granularity, target_granularity):
174
+ # Set index
175
+ df.set_index("date", inplace=True)
176
+
177
+ # Define aggregation rules for resampling
178
+ aggregation_rules = {
179
+ col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
180
+ for col in df.columns
181
+ }
182
+
183
+ # Initialize resampled_df
184
+ resampled_df = df
185
+ if current_granularity == "daily" and target_granularity == "weekly":
186
+ resampled_df = df.resample("W-MON", closed="left", label="left").agg(
187
+ aggregation_rules
188
+ )
189
+
190
+ elif current_granularity == "daily" and target_granularity == "monthly":
191
+ resampled_df = df.resample("MS", closed="left", label="left").agg(
192
+ aggregation_rules
193
+ )
194
+
195
+ elif current_granularity == "daily" and target_granularity == "daily":
196
+ resampled_df = df.resample("D").agg(aggregation_rules)
197
+
198
+ elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
199
+ # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
200
+ expanded_data = []
201
+ for _, row in df.iterrows():
202
+ if current_granularity == "weekly":
203
+ period_range = pd.date_range(start=row.name, periods=7)
204
+ elif current_granularity == "monthly":
205
+ period_range = pd.date_range(
206
+ start=row.name, periods=row.name.days_in_month
207
+ )
208
+
209
+ for date in period_range:
210
+ new_row = {}
211
+ for col in df.columns:
212
+ if pd.api.types.is_numeric_dtype(df[col]):
213
+ if current_granularity == "weekly":
214
+ new_row[col] = row[col] / 7
215
+ elif current_granularity == "monthly":
216
+ new_row[col] = row[col] / row.name.days_in_month
217
+ else:
218
+ new_row[col] = row[col]
219
+ expanded_data.append((date, new_row))
220
+
221
+ resampled_df = pd.DataFrame(
222
+ [data for _, data in expanded_data],
223
+ index=[date for date, _ in expanded_data],
224
+ )
225
+
226
+ # Reset index
227
+ resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
228
+
229
+ return resampled_df
230
+
231
+
232
+ # Function to clean and extract unique values of DMA and Panel
233
+ st.cache_resource(show_spinner=False)
234
+
235
+
236
+ def clean_and_extract_unique_values(files_dict, selections):
237
+ all_dma_values = set()
238
+ all_panel_values = set()
239
+
240
+ for file_name, file_data in files_dict.items():
241
+ df = file_data["df"]
242
+
243
+ # 'DMA' and 'Panel' selections
244
+ selected_dma = selections[file_name].get("DMA")
245
+ selected_panel = selections[file_name].get("Panel")
246
+
247
+ # Clean and standardize DMA column if it exists and is selected
248
+ if selected_dma and selected_dma != "N/A" and selected_dma in df.columns:
249
+ df[selected_dma] = (
250
+ df[selected_dma].str.lower().str.strip().str.replace("_", " ")
251
+ )
252
+ all_dma_values.update(df[selected_dma].dropna().unique())
253
+
254
+ # Clean and standardize Panel column if it exists and is selected
255
+ if selected_panel and selected_panel != "N/A" and selected_panel in df.columns:
256
+ df[selected_panel] = (
257
+ df[selected_panel].str.lower().str.strip().str.replace("_", " ")
258
+ )
259
+ all_panel_values.update(df[selected_panel].dropna().unique())
260
+
261
+ # Update the processed DataFrame back in the dictionary
262
+ files_dict[file_name]["df"] = df
263
+
264
+ return all_dma_values, all_panel_values
265
+
266
+
267
+ # Function to format values for display
268
+ st.cache_resource(show_spinner=False)
269
+
270
+
271
+ def format_values_for_display(values_list):
272
+ # Capitalize the first letter of each word and replace underscores with spaces
273
+ formatted_list = [value.replace("_", " ").title() for value in values_list]
274
+ # Join values with commas and 'and' before the last value
275
+ if len(formatted_list) > 1:
276
+ return ", ".join(formatted_list[:-1]) + ", and " + formatted_list[-1]
277
+ elif formatted_list:
278
+ return formatted_list[0]
279
+ return "No values available"
280
+
281
+
282
+ # Function to normalizes all data within files_dict to a daily granularity
283
+ st.cache(show_spinner=False, allow_output_mutation=True)
284
+
285
+
286
+ def standardize_data_to_daily(files_dict, selections):
287
+ # Normalize all data to a daily granularity using a provided function
288
+ files_dict = apply_granularity_to_all(files_dict, "daily", selections)
289
+
290
+ # Update the "interval" attribute for each dataset to indicate the new granularity
291
+ for files_name, files_data in files_dict.items():
292
+ files_data["interval"] = "daily"
293
+
294
+ return files_dict
295
+
296
+
297
+ # Function to apply granularity transformation to all DataFrames in files_dict
298
+ st.cache_resource(show_spinner=False)
299
+
300
+
301
+ def apply_granularity_to_all(files_dict, granularity_selection, selections):
302
+ for file_name, file_data in files_dict.items():
303
+ df = file_data["df"].copy()
304
+
305
+ # Handling when DMA or Panel might be 'N/A'
306
+ selected_dma = selections[file_name].get("DMA")
307
+ selected_panel = selections[file_name].get("Panel")
308
+
309
+ # Correcting the segment selection logic & handling 'N/A'
310
+ if selected_dma != "N/A" and selected_panel != "N/A":
311
+ unique_combinations = df[[selected_dma, selected_panel]].drop_duplicates()
312
+ elif selected_dma != "N/A":
313
+ unique_combinations = df[[selected_dma]].drop_duplicates()
314
+ selected_panel = None # Ensure Panel is ignored if N/A
315
+ elif selected_panel != "N/A":
316
+ unique_combinations = df[[selected_panel]].drop_duplicates()
317
+ selected_dma = None # Ensure DMA is ignored if N/A
318
+ else:
319
+ # If both are 'N/A', process the entire dataframe as is
320
+ df = adjust_dataframe_granularity(
321
+ df, file_data["interval"], granularity_selection
322
+ )
323
+ files_dict[file_name]["df"] = df
324
+ continue # Skip to the next file
325
+
326
+ transformed_segments = []
327
+ for _, combo in unique_combinations.iterrows():
328
+ if selected_dma and selected_panel:
329
+ segment = df[
330
+ (df[selected_dma] == combo[selected_dma])
331
+ & (df[selected_panel] == combo[selected_panel])
332
+ ]
333
+ elif selected_dma:
334
+ segment = df[df[selected_dma] == combo[selected_dma]]
335
+ elif selected_panel:
336
+ segment = df[df[selected_panel] == combo[selected_panel]]
337
+
338
+ # Adjust granularity of the segment
339
+ transformed_segment = adjust_dataframe_granularity(
340
+ segment, file_data["interval"], granularity_selection
341
+ )
342
+ transformed_segments.append(transformed_segment)
343
+
344
+ # Combine all transformed segments into a single DataFrame for this file
345
+ transformed_df = pd.concat(transformed_segments, ignore_index=True)
346
+ files_dict[file_name]["df"] = transformed_df
347
+
348
+ return files_dict
349
+
350
+
351
+ # Function to create main dataframe structure
352
+ st.cache_resource(show_spinner=False)
353
+
354
+
355
+ def create_main_dataframe(
356
+ files_dict, all_dma_values, all_panel_values, granularity_selection
357
+ ):
358
+ # Determine the global start and end dates across all DataFrames
359
+ global_start = min(df["df"]["date"].min() for df in files_dict.values())
360
+ global_end = max(df["df"]["date"].max() for df in files_dict.values())
361
+
362
+ # Adjust the date_range generation based on the granularity_selection
363
+ if granularity_selection == "weekly":
364
+ # Generate a weekly range, with weeks starting on Monday
365
+ date_range = pd.date_range(start=global_start, end=global_end, freq="W-MON")
366
+ elif granularity_selection == "monthly":
367
+ # Generate a monthly range, starting from the first day of each month
368
+ date_range = pd.date_range(start=global_start, end=global_end, freq="MS")
369
+ else: # Default to daily if not weekly or monthly
370
+ date_range = pd.date_range(start=global_start, end=global_end, freq="D")
371
+
372
+ # Collect all unique DMA and Panel values, excluding 'N/A'
373
+ all_dmas = all_dma_values
374
+ all_panels = all_panel_values
375
+
376
+ # Dynamically build the list of dimensions (Panel, DMA) to include in the main DataFrame based on availability
377
+ dimensions, merge_keys = [], []
378
+ if all_panels:
379
+ dimensions.append(all_panels)
380
+ merge_keys.append("Panel")
381
+ if all_dmas:
382
+ dimensions.append(all_dmas)
383
+ merge_keys.append("DMA")
384
+
385
+ dimensions.append(date_range) # Date range is always included
386
+ merge_keys.append("date") # Date range is always included
387
+
388
+ # Create a main DataFrame template with the dimensions
389
+ main_df = pd.MultiIndex.from_product(
390
+ dimensions,
391
+ names=[name for name, _ in zip(merge_keys, dimensions)],
392
+ ).to_frame(index=False)
393
+
394
+ return main_df.reset_index(drop=True)
395
+
396
+
397
+ # Function to prepare and merge dataFrames
398
+ st.cache_resource(show_spinner=False)
399
+
400
+
401
+ def merge_into_main_df(main_df, files_dict, selections):
402
+ for file_name, file_data in files_dict.items():
403
+ df = file_data["df"].copy()
404
+
405
+ # Rename selected DMA and Panel columns if not 'N/A'
406
+ selected_dma = selections[file_name].get("DMA", "N/A")
407
+ selected_panel = selections[file_name].get("Panel", "N/A")
408
+ if selected_dma != "N/A":
409
+ df.rename(columns={selected_dma: "DMA"}, inplace=True)
410
+ if selected_panel != "N/A":
411
+ df.rename(columns={selected_panel: "Panel"}, inplace=True)
412
+
413
+ # Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel' and 'DMA'
414
+ merge_keys = ["date"]
415
+ if "Panel" in df.columns:
416
+ merge_keys.append("Panel")
417
+ if "DMA" in df.columns:
418
+ merge_keys.append("DMA")
419
+ main_df = pd.merge(main_df, df, on=merge_keys, how="left")
420
+
421
+ # After all merges, sort by 'date' and reset index for cleanliness
422
+ sort_by = ["date"]
423
+ if "Panel" in main_df.columns:
424
+ sort_by.append("Panel")
425
+ if "DMA" in main_df.columns:
426
+ sort_by.append("DMA")
427
+ main_df.sort_values(by=sort_by, inplace=True)
428
+ main_df.reset_index(drop=True, inplace=True)
429
+
430
+ return main_df
431
+
432
+
433
+ # Function to categorize column
434
+ def categorize_column(column_name):
435
+ # Define keywords for each category
436
+ internal_keywords = [
437
+ "Price",
438
+ "Discount",
439
+ "product_price",
440
+ "cost",
441
+ "margin",
442
+ "inventory",
443
+ "sales",
444
+ "revenue",
445
+ "turnover",
446
+ "expense",
447
+ ]
448
+ exogenous_keywords = [
449
+ "GDP",
450
+ "Tax",
451
+ "Inflation",
452
+ "interest_rate",
453
+ "employment_rate",
454
+ "exchange_rate",
455
+ "consumer_spending",
456
+ "retail_sales",
457
+ "oil_prices",
458
+ "weather",
459
+ ]
460
+
461
+ # Check if the column name matches any of the keywords for Internal or Exogenous categories
462
+ for keyword in internal_keywords:
463
+ if keyword.lower() in column_name.lower():
464
+ return "Internal"
465
+ for keyword in exogenous_keywords:
466
+ if keyword.lower() in column_name.lower():
467
+ return "Exogenous"
468
+
469
+ # Default to Media if no match found
470
+ return "Media"
471
+
472
+
473
+ # Function to calculate missing stats and prepare for editable DataFrame
474
+ st.cache_resource(show_spinner=False)
475
+
476
+
477
+ def prepare_missing_stats_df(df):
478
+ missing_stats = []
479
+ for column in df.columns:
480
+ if (
481
+ column == "date" or column == "DMA" or column == "Panel"
482
+ ): # Skip Date, DMA and Panel column
483
+ continue
484
+
485
+ missing = df[column].isnull().sum()
486
+ pct_missing = round((missing / len(df)) * 100, 2)
487
+
488
+ # Dynamically assign category based on column name
489
+ # category = categorize_column(column)
490
+ category = "Media"
491
+
492
+ missing_stats.append(
493
+ {
494
+ "Column": column,
495
+ "Missing Values": missing,
496
+ "Missing Percentage": pct_missing,
497
+ "Impute Method": "Fill with 0", # Default value
498
+ "Category": category,
499
+ }
500
+ )
501
+ stats_df = pd.DataFrame(missing_stats)
502
+
503
+ return stats_df
504
+
505
+
506
+ # Function to add API DataFrame details to the files dictionary
507
+ st.cache_resource(show_spinner=False)
508
+
509
+
510
+ def add_api_dataframe_to_dict(main_df, files_dict):
511
+ files_dict["API"] = {
512
+ "numeric": list(main_df.select_dtypes(include=["number"]).columns),
513
+ "non_numeric": [
514
+ col
515
+ for col in main_df.select_dtypes(exclude=["number"]).columns
516
+ if col.lower() != "date"
517
+ ],
518
+ "interval": determine_data_interval(
519
+ pd.Series(main_df["date"].unique()).diff().dt.days.dropna().mode()[0]
520
+ ),
521
+ "df": main_df,
522
+ }
523
+
524
+ return files_dict
525
+
526
+
527
+ # Function to reads an API into a DataFrame, parsing specified columns as datetime
528
+ @st.cache_resource(show_spinner=False)
529
+ def read_API_data():
530
+ return pd.read_excel(r"upf_data_converted.xlsx", parse_dates=["Date"])
531
+
532
+
533
+ # Function to set the 'DMA_Panel_Selected' session state variable to False
534
+ def set_DMA_Panel_Selected_false():
535
+ st.session_state["DMA_Panel_Selected"] = False
536
+
537
+
538
+ # Initialize 'final_df' in session state
539
+ if "final_df" not in st.session_state:
540
+ st.session_state["final_df"] = pd.DataFrame()
541
+
542
+ # Initialize 'bin_dict' in session state
543
+ if "bin_dict" not in st.session_state:
544
+ st.session_state["bin_dict"] = {}
545
+
546
+ # Initialize 'DMA_Panel_Selected' in session state
547
+ if "DMA_Panel_Selected" not in st.session_state:
548
+ st.session_state["DMA_Panel_Selected"] = False
549
+
550
+ # Page Title
551
+ st.write("") # Top padding
552
+ st.title("Data Import")
553
+
554
+
555
+ #########################################################################################################################################################
556
+ # Create a dictionary to hold all DataFrames and collect user input to specify "DMA" and "Panel" columns for each file
557
+ #########################################################################################################################################################
558
+
559
+
560
+ # Read the Excel file, parsing 'Date' column as datetime
561
+ main_df = read_API_data()
562
+
563
+ # Convert all column names to lowercase
564
+ main_df.columns = main_df.columns.str.lower().str.strip()
565
+
566
+ # File uploader
567
+ uploaded_files = st.file_uploader(
568
+ "Upload additional data",
569
+ type=["xlsx"],
570
+ accept_multiple_files=True,
571
+ on_change=set_DMA_Panel_Selected_false,
572
+ )
573
+
574
+ # Custom HTML for upload instructions
575
+ recommendation_html = f"""
576
+ <div style="text-align: justify;">
577
+ <strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including DMA, Panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
578
+ </div>
579
+ """
580
+ st.markdown(recommendation_html, unsafe_allow_html=True)
581
+
582
+ # Choose Date Granularity
583
+ st.markdown("#### Choose Date Granularity")
584
+ # Granularity Selection
585
+ granularity_selection = st.selectbox(
586
+ "Choose Date Granularity",
587
+ ["Daily", "Weekly", "Monthly"],
588
+ label_visibility="collapsed",
589
+ on_change=set_DMA_Panel_Selected_false,
590
+ )
591
+ granularity_selection = str(granularity_selection).lower()
592
+
593
+ # Convert files to dataframes
594
+ files_dict = files_to_dataframes(uploaded_files)
595
+
596
+ # Add API Dataframe
597
+ if main_df is not None:
598
+ files_dict = add_api_dataframe_to_dict(main_df, files_dict)
599
+
600
+ # Display a warning message if no files have been uploaded and halt further execution
601
+ if not files_dict:
602
+ st.warning(
603
+ "Please upload at least one file to proceed.",
604
+ icon="⚠️",
605
+ )
606
+ st.stop() # Halts further execution until file is uploaded
607
+
608
+
609
+ # Select DMA and Panel columns
610
+ st.markdown("#### Select DMA and Panel columns")
611
+ selections = {}
612
+ with st.expander("Select DMA and Panel columns", expanded=False):
613
+ count = 0 # Initialize counter to manage the visibility of labels and keys
614
+ for file_name, file_data in files_dict.items():
615
+ # Determine visibility of the label based on the count
616
+ if count == 0:
617
+ label_visibility = "visible"
618
+ else:
619
+ label_visibility = "collapsed"
620
+
621
+ # Extract non-numeric columns
622
+ non_numeric_cols = file_data["non_numeric"]
623
+
624
+ # Prepare DMA and Panel values for dropdown, adding "N/A" as an option
625
+ dma_values = non_numeric_cols + ["N/A"]
626
+ panel_values = non_numeric_cols + ["N/A"]
627
+
628
+ # Skip if only one option is available
629
+ if len(dma_values) == 1 and len(panel_values) == 1:
630
+ selected_dma, selected_panel = "N/A", "N/A"
631
+ # Update the selections for DMA and Panel for the current file
632
+ selections[file_name] = {
633
+ "DMA": selected_dma,
634
+ "Panel": selected_panel,
635
+ }
636
+ continue
637
+
638
+ # Create layout columns for File Name, DMA, and Panel selections
639
+ file_name_col, DMA_col, Panel_col = st.columns([2, 4, 4])
640
+
641
+ with file_name_col:
642
+ # Display "File Name" label only for the first file
643
+ if count == 0:
644
+ st.write("File Name")
645
+ else:
646
+ st.write("")
647
+ st.write(file_name) # Display the file name
648
+
649
+ with DMA_col:
650
+ # Display a selectbox for DMA values
651
+ selected_dma = st.selectbox(
652
+ "Select DMA",
653
+ dma_values,
654
+ on_change=set_DMA_Panel_Selected_false,
655
+ label_visibility=label_visibility, # Control visibility of the label
656
+ key=f"DMA_selectbox{count}", # Ensure unique key for each selectbox
657
+ )
658
+
659
+ with Panel_col:
660
+ # Display a selectbox for Panel values
661
+ selected_panel = st.selectbox(
662
+ "Select Panel",
663
+ panel_values,
664
+ on_change=set_DMA_Panel_Selected_false,
665
+ label_visibility=label_visibility, # Control visibility of the label
666
+ key=f"Panel_selectbox{count}", # Ensure unique key for each selectbox
667
+ )
668
+
669
+ # Skip processing if the same column is selected for both Panel and DMA due to potential data integrity issues
670
+ if selected_panel == selected_dma and not (
671
+ selected_panel == "N/A" and selected_dma == "N/A"
672
+ ):
673
+ st.warning(
674
+ f"File: {file_name} → The same column cannot serve as both Panel and DMA. Please adjust your selections.",
675
+ )
676
+ selected_dma, selected_panel = "N/A", "N/A"
677
+ st.stop()
678
+
679
+ # Update the selections for DMA and Panel for the current file
680
+ selections[file_name] = {
681
+ "DMA": selected_dma,
682
+ "Panel": selected_panel,
683
+ }
684
+
685
+ count += 1 # Increment the counter after processing each file
686
+
687
+ # Accept DMA and Panel selection
688
+ if st.button("Accept and Process", use_container_width=True):
689
+
690
+ # Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
691
+ with st.spinner("Processing...", cache=True):
692
+ files_dict = standardize_data_to_daily(files_dict, selections)
693
+
694
+ # Convert all data to daily level granularity
695
+ files_dict = apply_granularity_to_all(
696
+ files_dict, granularity_selection, selections
697
+ )
698
+
699
+ st.session_state["files_dict"] = files_dict
700
+ st.session_state["DMA_Panel_Selected"] = True
701
+
702
+
703
+ #########################################################################################################################################################
704
+ # Display unique DMA and Panel values
705
+ #########################################################################################################################################################
706
+
707
+
708
+ # Halts further execution until DMA and Panel columns are selected
709
+ if "files_dict" in st.session_state and st.session_state["DMA_Panel_Selected"]:
710
+ files_dict = st.session_state["files_dict"]
711
+ else:
712
+ st.stop()
713
+
714
+ # Set to store unique values of DMA and Panel
715
+ with st.spinner("Fetching DMA and Panel values..."):
716
+ all_dma_values, all_panel_values = clean_and_extract_unique_values(
717
+ files_dict, selections
718
+ )
719
+
720
+ # List of DMA and Panel columns unique values
721
+ list_of_all_dma_values = list(all_dma_values)
722
+ list_of_all_panel_values = list(all_panel_values)
723
+
724
+ # Format DMA and Panel values for display
725
+ formatted_dma_values = format_values_for_display(list_of_all_dma_values)
726
+ formatted_panel_values = format_values_for_display(list_of_all_panel_values)
727
+
728
+ # Unique DMA and Panel values
729
+ st.markdown("#### Unique DMA and Panel values")
730
+ # Display DMA and Panel values
731
+ with st.expander("Unique DMA and Panel values"):
732
+ st.write("")
733
+ st.markdown(
734
+ f"""
735
+ <style>
736
+ .justify-text {{
737
+ text-align: justify;
738
+ }}
739
+ </style>
740
+ <div class="justify-text">
741
+ <strong>Panel Values:</strong> {formatted_panel_values}<br>
742
+ <strong>DMA Values:</strong> {formatted_dma_values}
743
+ </div>
744
+ """,
745
+ unsafe_allow_html=True,
746
+ )
747
+
748
+ # Display total DMA and Panel
749
+ st.write("")
750
+ st.markdown(
751
+ f"""
752
+ <div style="text-align: justify;">
753
+ <strong>Number of DMAs detected:</strong> {len(list_of_all_dma_values)}<br>
754
+ <strong>Number of Panels detected:</strong> {len(list_of_all_panel_values)}
755
+ </div>
756
+ """,
757
+ unsafe_allow_html=True,
758
+ )
759
+ st.write("")
760
+
761
+
762
+ #########################################################################################################################################################
763
+ # Merge all DataFrames
764
+ #########################################################################################################################################################
765
+
766
+
767
+ # Merge all DataFrames selected
768
+ main_df = create_main_dataframe(
769
+ files_dict, all_dma_values, all_panel_values, granularity_selection
770
+ )
771
+ merged_df = merge_into_main_df(main_df, files_dict, selections)
772
+
773
+ # # Display the merged DataFrame
774
+ # st.markdown("#### Merged DataFrame based on selected DMA and Panel")
775
+ # st.dataframe(merged_df)
776
+
777
+
778
+ #########################################################################################################################################################
779
+ # Categorize Variables and Impute Missing Values
780
+ #########################################################################################################################################################
781
+
782
+
783
+ # Create an editable DataFrame in Streamlit
784
+ st.markdown("#### Select Variables Category & Impute Missing Values")
785
+
786
+ # Prepare missing stats DataFrame for editing
787
+ missing_stats_df = prepare_missing_stats_df(merged_df)
788
+
789
+ edited_stats_df = st.data_editor(
790
+ missing_stats_df,
791
+ column_config={
792
+ "Impute Method": st.column_config.SelectboxColumn(
793
+ options=[
794
+ "Drop Column",
795
+ "Fill with Mean",
796
+ "Fill with Median",
797
+ "Fill with 0",
798
+ ],
799
+ required=True,
800
+ default="Fill with 0",
801
+ ),
802
+ "Category": st.column_config.SelectboxColumn(
803
+ options=[
804
+ "Media",
805
+ "Exogenous",
806
+ "Internal",
807
+ "Response_Metric"
808
+ ],
809
+ required=True,
810
+ default="Media",
811
+ ),
812
+ },
813
+ disabled=["Column", "Missing Values", "Missing Percentage"],
814
+ hide_index=True,
815
+ use_container_width=True,
816
+ )
817
+
818
+ # Apply changes based on edited DataFrame
819
+ for i, row in edited_stats_df.iterrows():
820
+ column = row["Column"]
821
+ if row["Impute Method"] == "Drop Column":
822
+ merged_df.drop(columns=[column], inplace=True)
823
+
824
+ elif row["Impute Method"] == "Fill with Mean":
825
+ merged_df[column].fillna(merged_df[column].mean(), inplace=True)
826
+
827
+ elif row["Impute Method"] == "Fill with Median":
828
+ merged_df[column].fillna(merged_df[column].median(), inplace=True)
829
+
830
+ elif row["Impute Method"] == "Fill with 0":
831
+ merged_df[column].fillna(0, inplace=True)
832
+
833
+ # Display the Final DataFrame and exogenous variables
834
+ st.markdown("#### Final DataFrame")
835
+ final_df = merged_df
836
+ st.dataframe(final_df, hide_index=True)
837
+
838
+ # Initialize an empty dictionary to hold categories and their variables
839
+ category_dict = {}
840
+
841
+ # Iterate over each row in the edited DataFrame to populate the dictionary
842
+ for i, row in edited_stats_df.iterrows():
843
+ column = row["Column"]
844
+ category = row["Category"] # The category chosen by the user for this variable
845
+
846
+ # Check if the category already exists in the dictionary
847
+ if category not in category_dict:
848
+ # If not, initialize it with the current column as its first element
849
+ category_dict[category] = [column]
850
+ else:
851
+ # If it exists, append the current column to the list of variables under this category
852
+ category_dict[category].append(column)
853
+
854
+ # Add Date, DMA and Panel in category dictionary
855
+ category_dict.update({"Date": ["date"]})
856
+ if "DMA" in final_df.columns:
857
+ category_dict["DMA"] = ["DMA"]
858
+
859
+ if "Panel" in final_df.columns:
860
+ category_dict["Panel"] = ["Panel"]
861
+
862
+ # Display the dictionary
863
+ st.markdown("#### Variable Category")
864
+ for category, variables in category_dict.items():
865
+ # Check if there are multiple variables to handle "and" insertion correctly
866
+ if len(variables) > 1:
867
+ # Join all but the last variable with ", ", then add " and " before the last variable
868
+ variables_str = ", ".join(variables[:-1]) + " and " + variables[-1]
869
+ else:
870
+ # If there's only one variable, no need for "and"
871
+ variables_str = variables[0]
872
+
873
+ # Display the category and its variables in the desired format
874
+ st.markdown(
875
+ f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
876
+ unsafe_allow_html=True,
877
+ )
878
+
879
+ # Store final dataframe and bin dictionary into session state
880
+ st.session_state["final_df"], st.session_state["bin_dict"] = final_df, category_dict
881
+
882
+ if st.button('Save Changes'):
883
+
884
+ with open("Pickle_files/main_df", 'wb') as f:
885
+ pickle.dump(st.session_state["final_df"], f)
886
+ with open("Pickle_files/category_dict",'wb') as c:
887
+ pickle.dump(st.session_state["bin_dict"],c)
888
+ st.success('Changes Saved!')
889
+
890
+
891
+
pages/1_Data_Validation.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from Eda_functions import *
6
+ import numpy as np
7
+ import re
8
+ import pickle
9
+ from ydata_profiling import ProfileReport
10
+ from streamlit_pandas_profiling import st_profile_report
11
+ import streamlit as st
12
+ import streamlit.components.v1 as components
13
+ import sweetviz as sv
14
+ from utilities import set_header,initialize_data,load_local_css
15
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
16
+ from st_aggrid import GridOptionsBuilder
17
+ from st_aggrid import AgGrid
18
+ import base64
19
+
20
+ st.set_page_config(
21
+ page_title="Data Validation",
22
+ page_icon=":shark:",
23
+ layout="wide",
24
+ initial_sidebar_state='collapsed'
25
+ )
26
+ load_local_css('styles.css')
27
+ set_header()
28
+
29
+
30
+
31
+ #preprocessing
32
+ # with open('Categorised_data.pkl', 'rb') as file:
33
+ # Categorised_data = pickle.load(file)
34
+ # with open("edited_dataframe.pkl", 'rb') as file:
35
+
36
+
37
+ # df = pickle.load(file)
38
+ # date=df.index
39
+ # df.reset_index(inplace=True)
40
+ # df['Date'] = pd.to_datetime(date)
41
+
42
+
43
+ #prospects=pd.read_excel('EDA_Data.xlsx',sheet_name='Prospects')
44
+ #spends=pd.read_excel('EDA_Data.xlsx',sheet_name='SPEND INPUT')
45
+ #spends.columns=['Week','Streaming (Spends)','TV (Spends)','Search (Spends)','Digital (Spends)']
46
+ #df=pd.concat([df,spends],axis=1)
47
+
48
+ #df['Date'] =pd.to_datetime(df['Date']).dt.strftime('%m/%d/%Y')
49
+ #df['Prospects']=prospects['Prospects']
50
+ #df.drop(['Week'],axis=1,inplace=True)
51
+
52
+
53
+ st.title('Data Validation and Insights')
54
+
55
+ with open("Pickle_files/main_df",'rb') as f:
56
+ st.session_state['cleaned_data']= pickle.load(f)
57
+ with open("Pickle_files/category_dict",'rb') as c:
58
+ st.session_state['category_dict']=pickle.load(c)
59
+
60
+ # st.write(st.session_state['cleaned_data'])
61
+
62
+ target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response_Metric']
63
+
64
+
65
+ target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
66
+ st.session_state['target_column']=target_column
67
+
68
+
69
+ fig=line_plot_target(st.session_state['cleaned_data'], target=target_column, title=f'{target_column} Over Time')
70
+ st.plotly_chart(fig, use_container_width=True)
71
+
72
+
73
+ media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
74
+ # st.write(media_channel)
75
+
76
+ Non_media_channel=[col for col in st.session_state['cleaned_data'].columns if col not in media_channel]
77
+
78
+
79
+ st.markdown('### Annual Data Summary')
80
+ st.dataframe(summary(st.session_state['cleaned_data'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
81
+
82
+ if st.checkbox('Show raw data'):
83
+ st.write(pd.concat([pd.to_datetime(st.session_state['cleaned_data']['Date']).dt.strftime('%m/%d/%Y'),st.session_state['cleaned_data'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
84
+ col1 = st.columns(1)
85
+
86
+ if "selected_feature" not in st.session_state:
87
+ st.session_state['selected_feature']=None
88
+
89
+ st.header('1. Media Channels')
90
+
91
+ if 'Validation' not in st.session_state:
92
+ st.session_state['Validation']=[]
93
+
94
+ eda_columns=st.columns(2)
95
+ with eda_columns[0]:
96
+ if st.button('Generate Profile Report'):
97
+ pr = st.session_state['cleaned_data'].profile_report()
98
+
99
+ pr.to_file("Profile_Report.html")
100
+
101
+ with open("Profile_Report.html", "rb") as f:
102
+ profile_report_html = f.read()
103
+ b64 = base64.b64encode(profile_report_html).decode()
104
+ href = f'<a href="data:text/html;base64,{b64}" download="Profile_Report.html">Download Profile Report</a>'
105
+ st.markdown(href, unsafe_allow_html=True)
106
+
107
+ with eda_columns[1]:
108
+ if st.button('Generate Sweetviz Report'):
109
+
110
+ def generate_report_with_target(df, target_feature):
111
+ report = sv.analyze([df, "Dataset"], target_feat=target_feature)
112
+ return report
113
+
114
+ report = generate_report_with_target(st.session_state['cleaned_data'], target_feature=target_column)
115
+ report.show_html()
116
+
117
+
118
+ selected_media = st.selectbox('Select media', np.unique([Categorised_data[col]['VB'] for col in media_channel]))
119
+ # selected_feature=st.multiselect('Select Metric', df.columns[df.columns.str.contains(selected_media,case=False)])
120
+ st.session_state["selected_feature"]=st.selectbox('Select Metric',[col for col in media_channel if Categorised_data[col]['VB'] in selected_media ] )
121
+ spends_features=[col for col in df.columns if 'spends' in col.lower() or 'cost' in col.lower()]
122
+ spends_feature=[col for col in spends_features if col.split('_')[0] in st.session_state["selected_feature"].split('_')[0]]
123
+ #st.write(spends_features)
124
+ #st.write(spends_feature)
125
+ #st.write(selected_feature)
126
+
127
+
128
+ val_variables=[col for col in media_channel if col!='Date']
129
+ if len(spends_feature)==0:
130
+ st.warning('No spends varaible available for the selected metric in data')
131
+
132
+ else:
133
+ st.write(f'Selected spends variable {spends_feature[0]} if wrong please name the varaibles properly')
134
+ # Create the dual-axis line plot
135
+ fig_row1 = line_plot(df, x_col='Date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
136
+ st.plotly_chart(fig_row1, use_container_width=True)
137
+ st.markdown('### Annual Data Summary')
138
+ st.dataframe(summary(df,[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
139
+ if st.button('Validate'):
140
+ st.session_state['Validation'].append(st.session_state["selected_feature"])
141
+
142
+ if st.checkbox('Validate all'):
143
+ st.session_state['Validation'].extend(val_variables)
144
+ st.success('All media variables are validated ✅')
145
+ if len(set(st.session_state['Validation']).intersection(val_variables))!=len(val_variables):
146
+ #st.write(st.session_state['Validation'])
147
+ validation_data=pd.DataFrame({'Variables':val_variables,
148
+ 'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
149
+ 'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
150
+ gd=GridOptionsBuilder.from_dataframe(validation_data)
151
+ gd.configure_pagination(enabled=True)
152
+ gd.configure_selection(use_checkbox=True,selection_mode='multiple')
153
+ #gd.configure_selection_toggle_all(None, show_toggle_all=True)
154
+ #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
155
+ gridoptions=gd.build()
156
+ #st.text(st.session_state['Validation'])
157
+ table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
158
+ #st.table(table)
159
+ selected_rows = table["selected_rows"]
160
+ st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
161
+ not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
162
+ if not_validated_variables:
163
+ not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
164
+ st.warning(not_validated_message)
165
+
166
+
167
+
168
+ st.header('2. Non Media Variables')
169
+ selected_columns_row = [col for col in df.columns if ("imp" not in col.lower()) and ('cli' not in col.lower() ) and ('spend' not in col.lower()) and col!='Date']
170
+ selected_columns_row4 = st.selectbox('Select Channel',selected_columns_row )
171
+ if not selected_columns_row4:
172
+ st.warning('Please select at least one.')
173
+ else:
174
+ # Create the dual-axis line plot
175
+ fig_row4 = line_plot(df, x_col='Date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
176
+ st.plotly_chart(fig_row4, use_container_width=True)
177
+ selected_non_media=selected_columns_row4
178
+ sum_df = df[['Date', selected_non_media,target_column]]
179
+ sum_df['Year']=pd.to_datetime(df['Date']).dt.year
180
+ #st.dataframe(df)
181
+ #st.dataframe(sum_df.head(2))
182
+ sum_df=sum_df.groupby('Year').agg('sum')
183
+ sum_df.loc['Grand Total']=sum_df.sum()
184
+ sum_df=sum_df.applymap(format_numbers)
185
+ sum_df.fillna('-',inplace=True)
186
+ sum_df=sum_df.replace({"0.0":'-','nan':'-'})
187
+ st.markdown('### Annual Data Summary')
188
+ st.dataframe(sum_df,use_container_width=True)
189
+
190
+ # if st.checkbox('Validate',key='2'):
191
+ # st.session_state['Validation'].append(selected_columns_row4)
192
+ # val_variables=[col for col in media_channel if col!='Date']
193
+ # if st.checkbox('Validate all'):
194
+ # st.session_state['Validation'].extend(val_variables)
195
+ # validation_data=pd.DataFrame({'Variables':val_variables,
196
+ # 'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
197
+ # 'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
198
+ # gd=GridOptionsBuilder.from_dataframe(validation_data)
199
+ # gd.configure_pagination(enabled=True)
200
+ # gd.configure_selection(use_checkbox=True,selection_mode='multiple')
201
+ # #gd.configure_selection_toggle_all(None, show_toggle_all=True)
202
+ # #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
203
+ # gridoptions=gd.build()
204
+ # #st.text(st.session_state['Validation'])
205
+ # table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
206
+ # #st.table(table)
207
+ # selected_rows = table["selected_rows"]
208
+ # st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
209
+ # not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
210
+ # if not_validated_variables:
211
+ # not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
212
+ # st.warning(not_validated_message)
213
+
214
+ options = list(df.select_dtypes(np.number).columns)
215
+ st.markdown(' ')
216
+ st.markdown(' ')
217
+ st.markdown('# Exploratory Data Analysis')
218
+ st.markdown(' ')
219
+
220
+ selected_options = []
221
+ num_columns = 4
222
+ num_rows = -(-len(options) // num_columns) # Ceiling division to calculate rows
223
+
224
+ # Create a grid of checkboxes
225
+ st.header('Select Features for Correlation Plot')
226
+ tick=False
227
+ if st.checkbox('Select all'):
228
+ tick=True
229
+ selected_options = []
230
+ for row in range(num_rows):
231
+ cols = st.columns(num_columns)
232
+ for col in cols:
233
+ if options:
234
+ option = options.pop(0)
235
+ selected = col.checkbox(option,value=tick)
236
+ if selected:
237
+ selected_options.append(option)
238
+ # Display selected options
239
+ #st.write('You selected:', selected_options)
240
+ st.pyplot(correlation_plot(df,selected_options,target_column))
241
+
pages/2_Transformations_with_panel.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ date :
4
+ additions : adding more variables to session state for saved model : random effect, predicted train & test
5
+ '''
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from Eda_functions import format_numbers
12
+ import numpy as np
13
+ import pickle
14
+ from st_aggrid import AgGrid
15
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
16
+ from utilities import set_header,load_local_css
17
+ from st_aggrid import GridOptionsBuilder
18
+ import time
19
+ import itertools
20
+ import statsmodels.api as sm
21
+ import numpy as npc
22
+ import re
23
+ import itertools
24
+ from sklearn.metrics import mean_absolute_error, r2_score,mean_absolute_percentage_error
25
+ from sklearn.preprocessing import MinMaxScaler
26
+ import os
27
+ import matplotlib.pyplot as plt
28
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
29
+ st.set_option('deprecation.showPyplotGlobalUse', False)
30
+ import statsmodels.api as sm
31
+ import statsmodels.formula.api as smf
32
+
33
+ from datetime import datetime
34
+ import seaborn as sns
35
+ from Data_prep_functions import *
36
+
37
+
38
+ def get_random_effects(media_data, panel_col, mdf):
39
+ random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
40
+
41
+ for i, market in enumerate(media_data[panel_col].unique()):
42
+ print(i, end='\r')
43
+ intercept = mdf.random_effects[market].values[0]
44
+ random_eff_df.loc[i, 'random_effect'] = intercept
45
+ random_eff_df.loc[i, panel_col] = market
46
+
47
+ return random_eff_df
48
+
49
+
50
+ def mdf_predict(X_df, mdf, random_eff_df) :
51
+ X=X_df.copy()
52
+ X['fixed_effect'] = mdf.predict(X)
53
+ X=pd.merge(X, random_eff_df, on=panel_col, how='left')
54
+ X['pred'] = X['fixed_effect'] + X['random_effect']
55
+ # X.to_csv('Test/megred_df.csv',index=False)
56
+ X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
57
+ return X['pred']
58
+
59
+ st.set_page_config(
60
+ page_title="Model Build",
61
+ page_icon=":shark:",
62
+ layout="wide",
63
+ initial_sidebar_state='collapsed'
64
+ )
65
+
66
+ load_local_css('styles.css')
67
+ set_header()
68
+
69
+
70
+ st.title('1. Build Your Model')
71
+
72
+ # set the panel column
73
+ date_col = 'date'
74
+
75
+
76
+ media_data=pd.read_csv(r'upf_data_converted.csv')
77
+ # with open("Pickle_files/main_df",'rb') as f:
78
+ # media_data= pickle.load(f)
79
+
80
+
81
+ media_data.columns=[i.lower().strip().replace(' ','_').replace('-','').replace(':','').replace("__", "_") for i in media_data.columns]
82
+ #st.write(media_data.columns)
83
+ #media_data.drop(['indicacao_impressions','infleux_impressions','influencer_impressions'],axis=1,inplace=True)
84
+ target_col = 'total_approved_accounts_revenue'
85
+ # st.write(media_data.columns)
86
+ media_data.sort_values(date_col, inplace=True)
87
+ media_data.reset_index(drop=True,inplace=True)
88
+
89
+ date=media_data[date_col]
90
+ st.session_state['date']=date
91
+ revenue=media_data[target_col]
92
+ media_data.drop([target_col],axis=1,inplace=True)
93
+ media_data.drop([date_col],axis=1,inplace=True)
94
+ media_data.reset_index(drop=True,inplace=True)
95
+
96
+
97
+ if st.toggle('Apply Transformations on DMA/Panel Level'):
98
+ dma=st.selectbox('Select the Level of data ',[ col for col in media_data.columns if col.lower() in ['dma','panel', 'markets']])
99
+ panel_col= dma
100
+
101
+ else:
102
+ #""" code to aggregate data on date """
103
+
104
+
105
+ dma=None
106
+
107
+ # dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
108
+ # st.write(dma_dict)
109
+
110
+ st.markdown('## Select the Range of Transformations')
111
+ columns = st.columns(2)
112
+ old_shape=media_data.shape
113
+
114
+
115
+ if "old_shape" not in st.session_state:
116
+ st.session_state['old_shape']=old_shape
117
+
118
+
119
+ with columns[0]:
120
+ slider_value_adstock = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1, format="%.2f")
121
+ with columns[1]:
122
+ slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3), step=1)
123
+
124
+ # with columns[2]:
125
+ # slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
126
+
127
+ # with columns[1]:
128
+ # st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
129
+ # st.number_input('Select the range of ')
130
+
131
+ # Section 1 - Transformations Functions
132
+ def lag(data,features,lags,dma=None):
133
+ if dma:
134
+
135
+ transformed_data=pd.concat([data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
136
+ transformed_data=transformed_data.fillna(method='bfill')
137
+ return pd.concat([transformed_data,data],axis=1)
138
+
139
+ else:
140
+
141
+ #''' data should be aggregated on date'''
142
+
143
+ transformed_data=pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
144
+ transformed_data=transformed_data.fillna(method='bfill')
145
+
146
+ return pd.concat([transformed_data,data],axis=1)
147
+
148
+ #adstock
149
+ def adstock(df, alphas, cutoff, features,dma=None):
150
+ # st.write(features)
151
+
152
+ if dma:
153
+ transformed_data=pd.DataFrame()
154
+ for d in df[dma].unique():
155
+ dma_sub_df = df[df[dma] == d]
156
+ n = len(dma_sub_df)
157
+
158
+
159
+ weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
160
+ X = dma_sub_df[features].to_numpy()
161
+
162
+ res = pd.DataFrame(np.hstack(weights @ X),
163
+ columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
164
+
165
+ transformed_data=pd.concat([transformed_data,res],axis=0)
166
+ transformed_data.reset_index(drop=True,inplace=True)
167
+ return pd.concat([transformed_data,df],axis=1)
168
+
169
+ else:
170
+
171
+ n = len(df)
172
+
173
+
174
+ weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
175
+
176
+ X = df[features].to_numpy()
177
+ res = pd.DataFrame(np.hstack(weights @ X),
178
+ columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
179
+ return pd.concat([res,df],axis=1)
180
+
181
+
182
+
183
+
184
+ # Section 2 - Begin Transformations
185
+
186
+ if 'media_data' not in st.session_state:
187
+
188
+ st.session_state['media_data']=pd.DataFrame()
189
+
190
+ # Sprint3 additions
191
+ if 'random_effects' not in st.session_state:
192
+ st.session_state['random_effects']=pd.DataFrame()
193
+ if 'pred_train' not in st.session_state:
194
+ st.session_state['pred_train'] = []
195
+ if 'pred_test' not in st.session_state:
196
+ st.session_state['pred_test'] = []
197
+ # end of Sprint3 additions
198
+
199
+ # variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
200
+ variables_to_be_transformed=[col for col in media_data.columns if '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
201
+ # st.write(variables_to_be_transformed)
202
+ # st.write(media_data[variables_to_be_transformed].dtypes)
203
+
204
+ with columns[0]:
205
+ if st.button('Apply Transformations'):
206
+ with st.spinner('Applying Transformations'):
207
+ transformed_data_lag=lag(media_data,features=variables_to_be_transformed,lags=np.arange(slider_value_lag[0],slider_value_lag[1]+1,1),dma=dma)
208
+
209
+ # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
210
+ variables_to_be_transformed = [col for col in media_data.columns if
211
+ '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
212
+
213
+ transformed_data_adstock=adstock(df=transformed_data_lag, alphas=np.arange(slider_value_adstock[0],slider_value_adstock[1],0.1), cutoff=8, features=variables_to_be_transformed,dma=dma)
214
+
215
+ # st.success('Done')
216
+ st.success("Transformations complete!")
217
+
218
+ st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
219
+ # st.write(media_data.head(10))
220
+ # st.write(transformed_data_adstock.head(10))
221
+
222
+ transformed_data_adstock.columns = [c.replace(".","_") for c in transformed_data_adstock.columns] # srishti
223
+ # st.write(transformed_data_adstock.columns)
224
+ st.session_state['media_data']=transformed_data_adstock # srishti
225
+
226
+ # with st.spinner('Applying Transformations'):
227
+ # time.sleep(2)
228
+ # st.success("Transformations complete!")
229
+
230
+ # if st.session_state['media_data'].shape[1]>old_shape[1]:
231
+ # with columns[0]:
232
+ # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
233
+ #st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
234
+
235
+ # Section 3 - Create combinations
236
+
237
+ # bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
238
+ # ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
239
+ # ' GA App: Will And Cid Pequena Baixo Risco Clicks',
240
+ # 'digital_tactic_others',"programmatic"
241
+ # ]
242
+
243
+ # srishti - bucket names changed
244
+ bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','fb_level_achieved_tier_2',
245
+ 'fb_level_achieved_tier_1','paid_social_others',
246
+ 'ga_app',
247
+ 'digital_tactic_others',"programmatic"
248
+ ]
249
+
250
+ with columns[1]:
251
+ if st.button('Create Combinations of Variables'):
252
+
253
+ top_3_correlated_features=[]
254
+ # for col in st.session_state['media_data'].columns[:19]:
255
+ original_cols = [c for c in st.session_state['media_data'].columns if "_clicks" in c.lower() or "_impressions" in c.lower()]
256
+ original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
257
+ # st.write(original_cols)
258
+
259
+ # for col in st.session_state['media_data'].columns[:19]:
260
+ for col in original_cols: # srishti - new
261
+ corr_df=pd.concat([st.session_state['media_data'].filter(regex=col),
262
+ revenue],axis=1).corr()[target_col].iloc[:-1]
263
+ top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
264
+ # st.write(col, top_3_correlated_features)
265
+ flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
266
+ # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
267
+ all_features_set={var:[col for col in flattened_list if var in col] for var in bucket if len([col for col in flattened_list if var in col])>0} # srishti
268
+
269
+ channels_all=[values for values in all_features_set.values()]
270
+ # st.write(channels_all)
271
+ st.session_state['combinations'] = list(itertools.product(*channels_all))
272
+ # if 'combinations' not in st.session_state:
273
+ # st.session_state['combinations']=combinations_all
274
+
275
+ st.session_state['final_selection']=st.session_state['combinations']
276
+ st.success('Done')
277
+ # st.write(f"{len(st.session_state['combinations'])} combinations created")
278
+
279
+
280
+ revenue.reset_index(drop=True,inplace=True)
281
+ if 'Model_results' not in st.session_state:
282
+ st.session_state['Model_results']={'Model_object':[],
283
+ 'Model_iteration':[],
284
+ 'Feature_set':[],
285
+ 'MAPE':[],
286
+ 'R2':[],
287
+ 'ADJR2':[]
288
+ }
289
+
290
+ def reset_model_result_dct():
291
+ st.session_state['Model_results']={'Model_object':[],
292
+ 'Model_iteration':[],
293
+ 'Feature_set':[],
294
+ 'MAPE':[],
295
+ 'R2':[],
296
+ 'ADJR2':[]
297
+ }
298
+
299
+ # if st.button('Build Model'):
300
+ if 'iterations' not in st.session_state:
301
+ st.session_state['iterations']=0
302
+ # st.write("1",st.session_state["final_selection"])
303
+
304
+ if 'final_selection' not in st.session_state:
305
+ st.session_state['final_selection']=False
306
+
307
+ save_path = r"Model/"
308
+ with columns[1]:
309
+ if st.session_state['final_selection']:
310
+ st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
311
+
312
+
313
+ if st.checkbox('Build all iterations'):
314
+ iterations=len(st.session_state['final_selection'])
315
+ else:
316
+ iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100, value=st.session_state['iterations'],on_change=reset_model_result_dct)
317
+ # st.write("iterations=", iterations)
318
+
319
+ if st.button('Build Model',on_click=reset_model_result_dct):
320
+ st.session_state['iterations']=iterations
321
+ # st.write("2",st.session_state["final_selection"])
322
+
323
+ # Section 4 - Model
324
+
325
+ st.session_state['media_data']=st.session_state['media_data'].fillna(method='ffill')
326
+ st.markdown(
327
+ 'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
328
+ progress_bar = st.progress(0) # Initialize the progress bar
329
+ # time_remaining_text = st.empty() # Create an empty space for time remaining text
330
+ start_time = time.time() # Record the start time
331
+ progress_text = st.empty()
332
+ # time_elapsed_text = st.empty()
333
+ # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
334
+ # st.write(st.session_state["final_selection"])
335
+ # for i, selected_features in enumerate(st.session_state["final_selection"]):
336
+ for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]): # srishti
337
+ df = st.session_state['media_data']
338
+
339
+ fet = [var for var in selected_features if len(var) > 0]
340
+ inp_vars_str = " + ".join(fet) # new
341
+
342
+
343
+ X = df[fet]
344
+ y = revenue
345
+ ss = MinMaxScaler()
346
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
347
+ # X = sm.add_constant(X)
348
+
349
+ X['total_approved_accounts_revenue'] = revenue # Sprint2
350
+ X[panel_col] = df[panel_col] # Sprint2
351
+
352
+
353
+
354
+ X_train=X.iloc[:8000]
355
+ X_test=X.iloc[8000:]
356
+ y_train=y.iloc[:8000]
357
+ y_test=y.iloc[8000:]
358
+
359
+
360
+
361
+ md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
362
+ data=X_train[['total_approved_accounts_revenue'] + fet],
363
+ groups=X_train[panel_col])
364
+ mdf = md.fit()
365
+ predicted_values = mdf.fittedvalues
366
+
367
+ # st.write(fet)
368
+ # positive_coeff=fet
369
+ # negetive_coeff=[]
370
+
371
+ coefficients = mdf.fe_params.to_dict()
372
+ model_possitive = [col for col in coefficients.keys() if coefficients[col] > 0]
373
+ # st.write(positive_coeff)
374
+ # st.write(model_possitive)
375
+ pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
376
+
377
+ # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
378
+ if (len(model_possitive) / len(selected_features)) > 0 and (len(pvalues) / len(selected_features)) >= 0: # srishti - changed just for testing, revert later
379
+ # predicted_values = model.predict(X_train)
380
+ mape = mean_absolute_percentage_error(y_train, predicted_values)
381
+ r2 = r2_score(y_train, predicted_values)
382
+ adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
383
+
384
+ filename = os.path.join(save_path, f"model_{i}.pkl")
385
+ with open(filename, "wb") as f:
386
+ pickle.dump(mdf, f)
387
+ # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
388
+ # model = pickle.load(file)
389
+
390
+ st.session_state['Model_results']['Model_object'].append(filename)
391
+ st.session_state['Model_results']['Model_iteration'].append(i)
392
+ st.session_state['Model_results']['Feature_set'].append(fet)
393
+ st.session_state['Model_results']['MAPE'].append(mape)
394
+ st.session_state['Model_results']['R2'].append(r2)
395
+ st.session_state['Model_results']['ADJR2'].append(adjr2)
396
+
397
+ current_time = time.time()
398
+ time_taken = current_time - start_time
399
+ time_elapsed_minutes = time_taken / 60
400
+ completed_iterations_text = f"{i + 1}/{iterations}"
401
+ progress_bar.progress((i + 1) / int(iterations))
402
+ progress_text.text(f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
403
+
404
+ st.write(f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
405
+ pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
406
+
407
+ def to_percentage(value):
408
+ return f'{value * 100:.1f}%'
409
+
410
+ ## Section 5 - Select Model
411
+ st.title('2. Select Models')
412
+ if 'tick' not in st.session_state:
413
+ st.session_state['tick']=False
414
+ if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)',value=st.session_state['tick']):
415
+ st.session_state['tick']=True
416
+ st.write('Select one model iteration to generate performance metrics for it:')
417
+ data=pd.DataFrame(st.session_state['Model_results'])
418
+ data.sort_values(by=['MAPE'],ascending=False,inplace=True)
419
+ data.drop_duplicates(subset='Model_iteration',inplace=True)
420
+ top_10=data.head(10)
421
+ top_10['Rank']=np.arange(1,len(top_10)+1,1)
422
+ top_10[['MAPE','R2','ADJR2']]=np.round(top_10[['MAPE','R2','ADJR2']],4).applymap(to_percentage)
423
+ top_10_table = top_10[['Rank','Model_iteration','MAPE','ADJR2','R2']]
424
+ #top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
425
+ gd=GridOptionsBuilder.from_dataframe(top_10_table)
426
+ gd.configure_pagination(enabled=True)
427
+ gd.configure_selection(use_checkbox=True)
428
+
429
+
430
+ gridoptions=gd.build()
431
+
432
+ table = AgGrid(top_10,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED)
433
+
434
+ selected_rows=table.selected_rows
435
+ # if st.session_state["selected_rows"] != selected_rows:
436
+ # st.session_state["build_rc_cb"] = False
437
+ st.session_state["selected_rows"] = selected_rows
438
+ if 'Model' not in st.session_state:
439
+ st.session_state['Model']={}
440
+
441
+ # Section 6 - Display Results
442
+
443
+ if len(selected_rows)>0:
444
+ st.header('2.1 Results Summary')
445
+
446
+ model_object=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Model_object']
447
+ features_set=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Feature_set']
448
+
449
+ with open(str(model_object.values[0]), 'rb') as file:
450
+ # print(file)
451
+ model = pickle.load(file)
452
+ st.write(model.summary())
453
+ st.header('2.2 Actual vs. Predicted Plot')
454
+
455
+ df=st.session_state['media_data']
456
+ X=df[features_set.values[0]]
457
+ # X = sm.add_constant(X)
458
+ y=revenue
459
+
460
+ ss = MinMaxScaler()
461
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
462
+
463
+ # Sprint2 changes
464
+ X['total_approved_accounts_revenue'] = revenue # new
465
+ X[panel_col] = df[panel_col]
466
+ X[date_col]=date
467
+
468
+
469
+
470
+ X_train=X.iloc[:8000]
471
+ X_test=X.iloc[8000:].reset_index(drop=True)
472
+ y_train=y.iloc[:8000]
473
+ y_test=y.iloc[8000:].reset_index(drop=True)
474
+
475
+
476
+ random_eff_df = get_random_effects(media_data, panel_col, model)
477
+ train_pred = model.fittedvalues
478
+ test_pred = mdf_predict(X_test, model, random_eff_df)
479
+ print("__"*20, test_pred.isna().sum())
480
+
481
+ # save x test to test - srishti
482
+ x_test_to_save = X_test.copy()
483
+ x_test_to_save['Actuals'] = y_test
484
+ x_test_to_save['Predictions'] = test_pred
485
+
486
+ x_train_to_save=X_train.copy()
487
+ x_train_to_save['Actuals'] = y_train
488
+ x_train_to_save['Predictions'] = train_pred
489
+
490
+ x_train_to_save.to_csv('Test/x_train_to_save.csv',index=False)
491
+ x_test_to_save.to_csv('Test/x_test_to_save.csv',index=False)
492
+
493
+ st.session_state['X']=X_train
494
+ st.session_state['features_set']=features_set.values[0]
495
+ print("**"*20,"selected model features : ",features_set.values[0])
496
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train[date_col], y_train, train_pred, model,target_column='Revenue',is_panel=True) # Sprint2
497
+
498
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
499
+
500
+
501
+
502
+ st.markdown('## 2.3 Residual Analysis')
503
+ columns=st.columns(2)
504
+ with columns[0]:
505
+ fig=plot_residual_predicted(y_train,train_pred,X_train) # Sprint2
506
+ st.plotly_chart(fig)
507
+
508
+ with columns[1]:
509
+ st.empty()
510
+ fig = qqplot(y_train,train_pred) # Sprint2
511
+ st.plotly_chart(fig)
512
+
513
+ with columns[0]:
514
+ fig=residual_distribution(y_train,train_pred) # Sprint2
515
+ st.pyplot(fig)
516
+
517
+
518
+
519
+ vif_data = pd.DataFrame()
520
+ # X=X.drop('const',axis=1)
521
+ X_train_with_panels = X_train.copy() # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
522
+ X_train.drop(columns=[target_col, panel_col, date_col], inplace=True) # Sprint2
523
+ vif_data["Variable"] = X_train.columns
524
+ vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
525
+ vif_data.sort_values(by=['VIF'],ascending=False,inplace=True)
526
+ vif_data=np.round(vif_data)
527
+ vif_data['VIF']=vif_data['VIF'].astype(float)
528
+ st.header('2.4 Variance Inflation Factor (VIF)')
529
+ #st.dataframe(vif_data)
530
+ color_mapping = {
531
+ 'darkgreen': (vif_data['VIF'] < 3),
532
+ 'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
533
+ 'darkred': (vif_data['VIF'] > 10)
534
+ }
535
+
536
+ # Create a horizontal bar plot
537
+ fig, ax = plt.subplots()
538
+ fig.set_figwidth(10) # Adjust the width of the figure as needed
539
+
540
+ # Sort the bars by descending VIF values
541
+ vif_data = vif_data.sort_values(by='VIF', ascending=False)
542
+
543
+ # Iterate through the color mapping and plot bars with corresponding colors
544
+ for color, condition in color_mapping.items():
545
+ subset = vif_data[condition]
546
+ bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
547
+
548
+ # Add text annotations on top of the bars
549
+ for bar in bars:
550
+ width = bar.get_width()
551
+ ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
552
+ textcoords='offset points', va='center')
553
+
554
+ # Customize the plot
555
+ ax.set_xlabel('VIF Values')
556
+ #ax.set_title('2.4 Variance Inflation Factor (VIF)')
557
+ #ax.legend(loc='upper right')
558
+
559
+ # Display the plot in Streamlit
560
+ st.pyplot(fig)
561
+
562
+
563
+
564
+ with st.expander('Results Summary Test data'):
565
+ # ss = MinMaxScaler()
566
+ # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
567
+ st.header('2.2 Actual vs. Predicted Plot')
568
+
569
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_test[date_col], y_test, test_pred, model,target_column='Revenue',is_panel=True) # Sprint2
570
+
571
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
572
+
573
+ st.markdown('## 2.3 Residual Analysis')
574
+ columns=st.columns(2)
575
+ with columns[0]:
576
+ fig=plot_residual_predicted(revenue,test_pred,X_test) # Sprint2
577
+ st.plotly_chart(fig)
578
+
579
+ with columns[1]:
580
+ st.empty()
581
+ fig = qqplot(revenue,test_pred) # Sprint2
582
+ st.plotly_chart(fig)
583
+
584
+ with columns[0]:
585
+ fig=residual_distribution(revenue,test_pred) # Sprint2
586
+ st.pyplot(fig)
587
+
588
+ value=False
589
+ if st.checkbox('Save this model to tune',key='build_rc_cb'):
590
+ mod_name=st.text_input('Enter model name')
591
+ if len(mod_name)>0:
592
+ st.session_state['Model'][mod_name]={"Model_object":model,'feature_set':st.session_state['features_set'],'X_train':X_train_with_panels}
593
+ st.session_state['X_train']=X_train_with_panels
594
+ st.session_state['X_test']=X_test
595
+ st.session_state['y_train']=y_train
596
+ st.session_state['y_test']=y_test
597
+
598
+ # Sprint3 additions
599
+ random_eff_df= get_random_effects(media_data, panel_col, model)
600
+ st.session_state['random_effects']=random_eff_df
601
+
602
+ st.session_state['pred_train']=model.fittedvalues
603
+ st.session_state['pred_test']=mdf_predict(X_test, model, random_eff_df)
604
+ # End of Sprint3 additions
605
+
606
+ with open("best_models.pkl", "wb") as f:
607
+ pickle.dump(st.session_state['Model'], f)
608
+ st.success('Model saved! Proceed to the next page to tune the model')
609
+ value=False
610
+
611
+ # st.write(st.session_state['Model'][mod_name]['X_train'].columns)
612
+ # st.write(st.session_state['X_test'].columns)
pages/3_Model_Tuning_with_panel.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ date :
4
+ changes : capability to tune MixedLM as well as simple LR in the same page
5
+ '''
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ from Eda_functions import format_numbers
10
+ import pickle
11
+ from utilities import set_header,load_local_css
12
+ import statsmodels.api as sm
13
+ import re
14
+ from sklearn.preprocessing import MinMaxScaler
15
+ import matplotlib.pyplot as plt
16
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
17
+ st.set_option('deprecation.showPyplotGlobalUse', False)
18
+ import statsmodels.formula.api as smf
19
+ from Data_prep_functions import *
20
+
21
+ for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features"] :
22
+ if i not in st.session_state :
23
+ st.session_state[i] = None
24
+
25
+ st.set_page_config(
26
+ page_title="Model Tuning",
27
+ page_icon=":shark:",
28
+ layout="wide",
29
+ initial_sidebar_state='collapsed'
30
+ )
31
+ load_local_css('styles.css')
32
+ set_header()
33
+
34
+ # Sprint3
35
+ is_panel= True
36
+ panel_col= 'dma' # set the panel column
37
+ date_col = 'date'
38
+ target_col = 'total_approved_accounts_revenue'
39
+
40
+ st.title('1. Model Tuning')
41
+
42
+
43
+ if "X_train" not in st.session_state:
44
+ st.error(
45
+ "Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
46
+ st.stop()
47
+ X_train=st.session_state['X_train']
48
+ X_test=st.session_state['X_test']
49
+ y_train=st.session_state['y_train']
50
+ y_test=st.session_state['y_test']
51
+ df=st.session_state['media_data']
52
+
53
+ # st.write(X_train.columns)
54
+ # st.write(X_test.columns)
55
+
56
+ with open("best_models.pkl", 'rb') as file:
57
+ model_dict= pickle.load(file)
58
+
59
+ if 'selected_model' not in st.session_state:
60
+ st.session_state['selected_model']=0
61
+
62
+ # st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
63
+
64
+ st.markdown('### 1.1 Event Flags')
65
+ st.markdown('Helps in quantifying the impact of specific occurrences of events')
66
+ with st.expander('Apply Event Flags'):
67
+ st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
68
+ model =model_dict[st.session_state["selected_model"]]['Model_object']
69
+ date=st.session_state['date']
70
+ date=pd.to_datetime(date)
71
+ X_train =model_dict[st.session_state["selected_model"]]['X_train']
72
+
73
+ features_set= model_dict[st.session_state["selected_model"]]['feature_set']
74
+
75
+ col=st.columns(3)
76
+ min_date=min(date)
77
+ max_date=max(date)
78
+ with col[0]:
79
+ start_date=st.date_input('Select Start Date',min_date,min_value=min_date,max_value=max_date)
80
+ with col[1]:
81
+ end_date=st.date_input('Select End Date',max_date,min_value=min_date,max_value=max_date)
82
+ with col[2]:
83
+ repeat=st.selectbox('Repeat Annually',['Yes','No'],index=1)
84
+ if repeat =='Yes':
85
+ repeat=True
86
+ else:
87
+ repeat=False
88
+ # X_train=sm.add_constant(X_train)
89
+
90
+ if 'Flags' not in st.session_state:
91
+ st.session_state['Flags']={}
92
+ # print("**"*50)
93
+ # print(y_train)
94
+ # print("**"*50)
95
+ # print(model.fittedvalues)
96
+ if is_panel : # Sprint3
97
+ met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
98
+ model.fittedvalues, model,
99
+ target_column='Revenue',
100
+ flag=(start_date, end_date),
101
+ repeat_all_years=repeat, is_panel=True)
102
+ st.plotly_chart(fig_flag, use_container_width=True)
103
+
104
+ # create flag on test
105
+ met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
106
+ st.session_state['pred_test'], model,
107
+ target_column='Revenue',
108
+ flag=(start_date, end_date),
109
+ repeat_all_years=repeat, is_panel=True)
110
+
111
+ else :
112
+ met,line_values,fig_flag=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,flag=(start_date,end_date),repeat_all_years=repeat)
113
+ st.plotly_chart(fig_flag,use_container_width=True)
114
+
115
+ met,test_line_values,fig_flag=plot_actual_vs_predicted(date[150:], y_test, model.predict(X_test), model,flag=(start_date,end_date),repeat_all_years=repeat)
116
+
117
+
118
+ flag_name='f1'
119
+ flag_name=st.text_input('Enter Flag Name')
120
+ if st.button('Update flag'):
121
+ st.session_state['Flags'][flag_name]= {}
122
+ st.session_state['Flags'][flag_name]['train']=line_values
123
+ st.session_state['Flags'][flag_name]['test']=test_line_values
124
+ # st.write(st.session_state['Flags'][flag_name])
125
+ st.success(f'{flag_name} stored')
126
+
127
+ options=list(st.session_state['Flags'].keys())
128
+ selected_options = []
129
+ num_columns = 4
130
+ num_rows = -(-len(options) // num_columns)
131
+
132
+
133
+ tick=False
134
+ if st.checkbox('Select all'):
135
+ tick=True
136
+ selected_options = []
137
+ for row in range(num_rows):
138
+ cols = st.columns(num_columns)
139
+ for col in cols:
140
+ if options:
141
+ option = options.pop(0)
142
+ selected = col.checkbox(option,value=tick)
143
+ if selected:
144
+ selected_options.append(option)
145
+
146
+ st.markdown('### 1.2 Select Parameters to Apply')
147
+ parameters=st.columns(3)
148
+ with parameters[0]:
149
+ Trend=st.checkbox("**Trend**")
150
+ st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
151
+ with parameters[1]:
152
+ week_number=st.checkbox('**Week_number**')
153
+ st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
154
+ with parameters[2]:
155
+ sine_cosine=st.checkbox('**Sine and Cosine Waves**')
156
+ st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
157
+
158
+ if st.button('Build model with Selected Parameters and Flags'):
159
+ st.header('2.1 Results Summary')
160
+ # date=list(df.index)
161
+ # df = df.reset_index(drop=True)
162
+ # st.write(df.head(2))
163
+ # X_train=df[features_set]
164
+ ss = MinMaxScaler()
165
+ if is_panel == True :
166
+ X = X_train[features_set]
167
+ X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
168
+ X_train_tuned[target_col] = X_train[target_col]
169
+ X_train_tuned[date_col] = X_train[date_col]
170
+ X_train_tuned[panel_col] = X_train[panel_col]
171
+
172
+ X = X_test[features_set]
173
+ X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
174
+ X_test_tuned[target_col] = X_test[target_col]
175
+ X_test_tuned[date_col] = X_test[date_col]
176
+ X_test_tuned[panel_col] = X_test[panel_col]
177
+
178
+ else :
179
+ X_train_tuned = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.columns)
180
+ X_train_tuned = sm.add_constant(X_train_tuned)
181
+
182
+ X_test_tuned = pd.DataFrame(ss.transform(X_test), columns=X_test.columns)
183
+ X_test_tuned = sm.add_constant(X_test_tuned)
184
+
185
+ for flag in selected_options:
186
+ X_train_tuned[flag]=st.session_state['Flags'][flag]['train']
187
+ X_test_tuned[flag]=st.session_state['Flags'][flag]['test']
188
+
189
+ #test
190
+ # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
191
+ # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
192
+
193
+ new_features = features_set
194
+ # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
195
+ if Trend:
196
+ # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
197
+ if is_panel :
198
+ newdata = pd.DataFrame()
199
+ panel_wise_end_point_train = {}
200
+ for panel, groupdf in X_train_tuned.groupby(panel_col):
201
+ groupdf.sort_values(date_col, inplace=True)
202
+ groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
203
+ newdata = pd.concat([newdata, groupdf])
204
+ panel_wise_end_point_train[panel] = len(groupdf)
205
+ X_train_tuned = newdata.copy()
206
+
207
+ test_newdata=pd.DataFrame()
208
+ for panel, test_groupdf in X_test_tuned.groupby(panel_col):
209
+ test_groupdf.sort_values(date_col, inplace=True)
210
+ start = panel_wise_end_point_train[panel]+1
211
+ end = start + len(test_groupdf)
212
+ # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
213
+ test_groupdf['Trend'] = np.arange(start, end, 1)
214
+ test_newdata = pd.concat([test_newdata, test_groupdf])
215
+ X_test_tuned = test_newdata.copy()
216
+
217
+ new_features = new_features + ['Trend']
218
+
219
+ # test
220
+ X_test_tuned.to_csv("Test/X_test_tuned_trend.csv", index=False)
221
+ X_train_tuned.to_csv("Test/X_train_tuned_trend.csv", index=False)
222
+ pd.concat([X_train_tuned,X_test_tuned]).sort_values([panel_col, date_col]).to_csv("Test/X_train_test_tuned_trend.csv", index=False)
223
+
224
+ else :
225
+ X_train_tuned['Trend']=np.arange(1,len(X_train_tuned)+1,1)
226
+ X_test_tuned['Trend'] = np.arange(len(X_train_tuned)+1, len(X_train_tuned)+len(X_test_tuned), 1)
227
+
228
+ if week_number :
229
+ # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
230
+ if is_panel :
231
+ X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
232
+ X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
233
+ if X_train_tuned['Week_number'].nunique() == 1 :
234
+ st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
235
+ else :
236
+ X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
237
+ X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
238
+ new_features = new_features + ['Week_number']
239
+
240
+ else :
241
+ date = pd.to_datetime(date.values)
242
+ X_train_tuned['Week_number'] = date.dt.day_of_week[:150]
243
+ X_test_tuned['Week_number'] = date.dt.day_of_week[150:]
244
+
245
+ if sine_cosine :
246
+ # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
247
+ if is_panel :
248
+ new_features = new_features + ['sine_wave', 'cosine_wave']
249
+ newdata = pd.DataFrame()
250
+ groups = X_train_tuned.groupby(panel_col)
251
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
252
+
253
+ train_panel_wise_end_point = {}
254
+ for panel, groupdf in groups:
255
+ num_samples = len(groupdf)
256
+ train_panel_wise_end_point[panel] = num_samples
257
+ days_since_start = np.arange(num_samples)
258
+ sine_wave = np.sin(frequency * days_since_start)
259
+ cosine_wave = np.cos(frequency * days_since_start)
260
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
261
+ assert len(sine_cosine_df) == len(groupdf)
262
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
263
+ groupdf['sine_wave'] = sine_wave
264
+ groupdf['cosine_wave'] = cosine_wave
265
+ newdata = pd.concat([newdata, groupdf])
266
+
267
+ test_groups = X_test_tuned.groupby(panel_col)
268
+ for panel, test_groupdf in test_groups:
269
+ num_samples = len(test_groupdf)
270
+ start = train_panel_wise_end_point[panel]
271
+ days_since_start = np.arange(start, start+num_samples, 1)
272
+ # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
273
+ sine_wave = np.sin(frequency * days_since_start)
274
+ cosine_wave = np.cos(frequency * days_since_start)
275
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
276
+ assert len(sine_cosine_df) == len(test_groupdf)
277
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
278
+ test_groupdf['sine_wave'] = sine_wave
279
+ test_groupdf['cosine_wave'] = cosine_wave
280
+ newdata = pd.concat([newdata, test_groupdf])
281
+
282
+ X_train_tuned = newdata.copy()
283
+
284
+
285
+ else :
286
+ num_samples = len(X_train_tuned)
287
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
288
+ days_since_start = np.arange(num_samples)
289
+ sine_wave = np.sin(frequency * days_since_start)
290
+ cosine_wave = np.cos(frequency * days_since_start)
291
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
292
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
293
+ X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
294
+
295
+ test_num_samples = len(X_test_tuned)
296
+ start = num_samples
297
+ days_since_start = np.arange(start, start+test_num_samples, 1)
298
+ sine_wave = np.sin(frequency * days_since_start)
299
+ cosine_wave = np.cos(frequency * days_since_start)
300
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
301
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
302
+ X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
303
+
304
+ # model
305
+
306
+ if is_panel :
307
+ if selected_options :
308
+ new_features = new_features + selected_options
309
+
310
+ inp_vars_str = " + ".join(new_features)
311
+
312
+ # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
313
+ # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
314
+ # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
315
+
316
+ md_tuned = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
317
+ data=X_train_tuned[['total_approved_accounts_revenue'] + new_features],
318
+ groups=X_train_tuned[panel_col])
319
+ model_tuned = md_tuned.fit()
320
+
321
+
322
+
323
+ # plot act v pred for original model and tuned model
324
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
325
+ model.fittedvalues, model,
326
+ target_column='Revenue',
327
+ is_panel=True)
328
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
329
+ X_train_tuned[target_col],
330
+ model_tuned.fittedvalues,
331
+ model_tuned,
332
+ target_column='Revenue',
333
+ is_panel=True)
334
+
335
+ else :
336
+ model_tuned = sm.OLS(y_train, X_train_tuned).fit()
337
+
338
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:150], y_train,
339
+ model.predict(X_train), model,
340
+ target_column='Revenue')
341
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:150], y_train,
342
+ model_tuned.predict(
343
+ X_train_tuned),
344
+ model_tuned,
345
+ target_column='Revenue')
346
+
347
+ # st.write(metrics_table_tuned)
348
+ mape=np.round(metrics_table.iloc[0,1],2)
349
+ r2=np.round(metrics_table.iloc[1,1],2)
350
+ adjr2=np.round(metrics_table.iloc[2,1],2)
351
+
352
+ mape_tuned=np.round(metrics_table_tuned.iloc[0,1],2)
353
+ r2_tuned=np.round(metrics_table_tuned.iloc[1,1],2)
354
+ adjr2_tuned=np.round(metrics_table_tuned.iloc[2,1],2)
355
+
356
+ parameters_=st.columns(3)
357
+ with parameters_[0]:
358
+ st.metric('R2',r2_tuned,np.round(r2_tuned-r2,2))
359
+ with parameters_[1]:
360
+ st.metric('Adjusted R2',adjr2_tuned,np.round(adjr2_tuned-adjr2,2))
361
+ with parameters_[2]:
362
+ st.metric('MAPE',mape_tuned,np.round(mape_tuned-mape,2),'inverse')
363
+
364
+ st.header('2.2 Actual vs. Predicted Plot')
365
+ # if is_panel:
366
+ # metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
367
+ # model, target_column='Revenue',is_panel=True)
368
+ # else:
369
+ # metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
370
+
371
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train_tuned[date_col], X_train_tuned[target_col],
372
+ model_tuned.fittedvalues, model_tuned,
373
+ target_column='Revenue',
374
+ is_panel=True)
375
+ # plot_actual_vs_predicted(X_train[date_col], y_train,
376
+ # model.fittedvalues, model,
377
+ # target_column='Revenue',
378
+ # is_panel=is_panel)
379
+
380
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
381
+
382
+
383
+
384
+ st.markdown('## 2.3 Residual Analysis')
385
+ columns=st.columns(2)
386
+ with columns[0]:
387
+ fig=plot_residual_predicted(y_train,model.predict(X_train),X_train)
388
+ st.plotly_chart(fig)
389
+
390
+ with columns[1]:
391
+ st.empty()
392
+ fig = qqplot(y_train,model.predict(X_train))
393
+ st.plotly_chart(fig)
394
+
395
+ with columns[0]:
396
+ fig=residual_distribution(y_train,model.predict(X_train))
397
+ st.pyplot(fig)
398
+
399
+ if st.checkbox('Use this model to build response curves',key='123'):
400
+ st.session_state["tuned_model"] = model_tuned
401
+ st.session_state["X_train_tuned"] = X_train_tuned
402
+ st.session_state["X_test_tuned"] = X_test_tuned
403
+ st.session_state["X_train_tuned"] = X_train_tuned
404
+ st.session_state["X_test_tuned"] = X_test_tuned
405
+ if is_panel :
406
+ st.session_state["tuned_model_features"] = new_features
407
+ with open("tuned_model.pkl", "wb") as f:
408
+ pickle.dump(st.session_state['tuned_model'], f)
409
+ st.success('Model saved!')
410
+
411
+ # raw_data=df[features_set]
412
+ # columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
413
+ # raw_data.columns=columns_raw
414
+ # columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
415
+ # raw_data=raw_data[columns_media]
416
+
417
+ # raw_data['Date']=list(df.index)
418
+
419
+ # spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
420
+ # spends_df=df[spends_var]
421
+ # spends_df['Week']=list(df.index)
422
+
423
+
424
+ # j=0
425
+ # X1=X.copy()
426
+ # col=X1.columns
427
+ # for i in model.params.values:
428
+ # X1[col[j]]=X1.iloc[:,j]*i
429
+ # j+=1
430
+ # contribution_df=X1
431
+ # contribution_df['Date']=list(df.index)
432
+ # excel_file='Overview_data.xlsx'
433
+
434
+ # with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
435
+ # raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
436
+ # spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
437
+ # contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')
pages/4_Saved_Model_Results.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import plotly.express as px
2
+ import numpy as np
3
+ import plotly.graph_objects as go
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import statsmodels.api as sm
7
+ from sklearn.metrics import mean_absolute_percentage_error
8
+ import sys
9
+ import os
10
+ from utilities import (set_header,
11
+ load_local_css,
12
+ load_authenticator)
13
+ import seaborn as sns
14
+ import matplotlib.pyplot as plt
15
+ import sweetviz as sv
16
+ import tempfile
17
+ from sklearn.preprocessing import MinMaxScaler
18
+ from st_aggrid import AgGrid
19
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
20
+ from st_aggrid import GridOptionsBuilder
21
+ import sys
22
+ import re
23
+
24
+ sys.setrecursionlimit(10**6)
25
+
26
+ original_stdout = sys.stdout
27
+ sys.stdout = open('temp_stdout.txt', 'w')
28
+ sys.stdout.close()
29
+ sys.stdout = original_stdout
30
+
31
+ st.set_page_config(layout='wide')
32
+ load_local_css('styles.css')
33
+ set_header()
34
+
35
+ for k, v in st.session_state.items():
36
+ if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
37
+ st.session_state[k] = v
38
+
39
+ authenticator = st.session_state.get('authenticator')
40
+ if authenticator is None:
41
+ authenticator = load_authenticator()
42
+
43
+ name, authentication_status, username = authenticator.login('Login', 'main')
44
+ auth_status = st.session_state.get('authentication_status')
45
+
46
+ if auth_status == True:
47
+ is_state_initiaized = st.session_state.get('initialized',False)
48
+ if not is_state_initiaized:
49
+ a=1
50
+
51
+
52
+ def plot_residual_predicted(actual, predicted, df_):
53
+ df_['Residuals'] = actual - pd.Series(predicted)
54
+ df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std()
55
+
56
+ # Create a Plotly scatter plot
57
+ fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"])
58
+
59
+ # Add horizontal lines
60
+ fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
61
+ fig.add_hline(y=2, line_color="red")
62
+ fig.add_hline(y=-2, line_color="red")
63
+
64
+ fig.update_xaxes(title='Predicted')
65
+ fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)')
66
+
67
+ # Set the same width and height for both figures
68
+ fig.update_layout(title='Residuals over Predicted Values', autosize=False, width=600, height=400)
69
+
70
+ return fig
71
+
72
+ def residual_distribution(actual, predicted):
73
+ Residuals = actual - pd.Series(predicted)
74
+
75
+ # Create a Seaborn distribution plot
76
+ sns.set(style="whitegrid")
77
+ plt.figure(figsize=(6, 4))
78
+ sns.histplot(Residuals, kde=True, color="#11B6BD")
79
+
80
+ plt.title(' Distribution of Residuals')
81
+ plt.xlabel('Residuals')
82
+ plt.ylabel('Probability Density')
83
+
84
+ return plt
85
+
86
+
87
+ def qqplot(actual, predicted):
88
+ Residuals = actual - pd.Series(predicted)
89
+ Residuals = pd.Series(Residuals)
90
+ Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
91
+
92
+ # Create a QQ plot using Plotly with custom colors
93
+ fig = go.Figure()
94
+ fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles,
95
+ y=sm.ProbPlot(Resud_std).sample_quantiles,
96
+ mode='markers',
97
+ marker=dict(size=5, color="#11B6BD"),
98
+ name='QQ Plot'))
99
+
100
+ # Add the 45-degree reference line
101
+ diagonal_line = go.Scatter(
102
+ x=[-2, 2], # Adjust the x values as needed to fit the range of your data
103
+ y=[-2, 2], # Adjust the y values accordingly
104
+ mode='lines',
105
+ line=dict(color='red'), # Customize the line color and style
106
+ name=' '
107
+ )
108
+ fig.add_trace(diagonal_line)
109
+
110
+ # Customize the layout
111
+ fig.update_layout(title='QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400,
112
+ xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
113
+
114
+ return fig
115
+
116
+
117
+ def plot_actual_vs_predicted(date, y, predicted_values, model):
118
+
119
+ fig = go.Figure()
120
+
121
+ fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='blue')))
122
+ fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='orange')))
123
+
124
+ # Calculate MAPE
125
+ mape = mean_absolute_percentage_error(y, predicted_values)*100
126
+
127
+ # Calculate R-squared
128
+ rss = np.sum((y - predicted_values) ** 2)
129
+ tss = np.sum((y - np.mean(y)) ** 2)
130
+ r_squared = 1 - (rss / tss)
131
+
132
+ # Get the number of predictors
133
+ num_predictors = model.df_model
134
+
135
+ # Get the number of samples
136
+ num_samples = len(y)
137
+
138
+ # Calculate Adjusted R-squared
139
+ adj_r_squared = 1 - ((1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1)))
140
+ metrics_table = pd.DataFrame({
141
+ 'Metric': ['MAPE', 'R-squared', 'AdjR-squared'],
142
+ 'Value': [mape, r_squared, adj_r_squared]})
143
+ fig.update_layout(
144
+ xaxis=dict(title='Date'),
145
+ yaxis=dict(title='Value'),
146
+ title=f'MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}',
147
+ xaxis_tickangle=-30
148
+ )
149
+
150
+ return metrics_table,fig
151
+ def contributions(X, model):
152
+ X1 = X.copy()
153
+ for j, col in enumerate(X1.columns):
154
+ X1[col] = X1[col] * model.params.values[j]
155
+
156
+ return np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
157
+
158
+ transformed_data=pd.read_csv('transformed_data.csv')
159
+
160
+ # hard coded for now, need to get features set from model
161
+
162
+ feature_set_dct={'app_installs_-_appsflyer':['paid_search_clicks',
163
+ 'fb:_level_achieved_-_tier_1_impressions_lag2',
164
+ 'fb:_level_achieved_-_tier_2_clicks_lag2',
165
+ 'paid_social_others_impressions_adst.1',
166
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2',
167
+ 'digital_tactic_others_clicks',
168
+ 'kwai_clicks_adst.3',
169
+ 'programmaticclicks',
170
+ 'indicacao_clicks_adst.1',
171
+ 'infleux_clicks_adst.4',
172
+ 'influencer_clicks'],
173
+
174
+ 'account_requests_-_appsflyer':['paid_search_impressions',
175
+ 'fb:_level_achieved_-_tier_1_clicks_adst.1',
176
+ 'fb:_level_achieved_-_tier_2_clicks_adst.1',
177
+ 'paid_social_others_clicks_lag2',
178
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1',
179
+ 'digital_tactic_others_clicks_adst.1',
180
+ 'kwai_clicks_adst.2',
181
+ 'programmaticimpressions_lag4_adst.1',
182
+ 'indicacao_clicks',
183
+ 'infleux_clicks_adst.2',
184
+ 'influencer_clicks'],
185
+
186
+ 'total_approved_accounts_-_appsflyer':['paid_search_clicks',
187
+ 'fb:_level_achieved_-_tier_1_impressions_lag2_adst.1',
188
+ 'fb:_level_achieved_-_tier_2_impressions_lag2',
189
+ 'paid_social_others_clicks_lag2_adst.2',
190
+ 'ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4',
191
+ 'digital_tactic_others_clicks',
192
+ 'kwai_impressions_adst.2',
193
+ 'programmaticclicks_adst.5',
194
+ 'indicacao_clicks_adst.1',
195
+ 'infleux_clicks_adst.3',
196
+ 'influencer_clicks'],
197
+
198
+ 'total_approved_accounts_-_revenue':['paid_search_impressions_adst.5',
199
+ 'kwai_impressions_lag2_adst.3',
200
+ 'indicacao_clicks_adst.3',
201
+ 'infleux_clicks_adst.3',
202
+ 'programmaticclicks_adst.4',
203
+ 'influencer_clicks_adst.3',
204
+ 'fb:_level_achieved_-_tier_1_impressions_adst.2',
205
+ 'fb:_level_achieved_-_tier_2_impressions_lag3_adst.5',
206
+ 'paid_social_others_impressions_adst.3',
207
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5',
208
+ 'digital_tactic_others_clicks_adst.2']
209
+
210
+ }
211
+
212
+ #""" the above part should be modified so that we are fetching features set from the saved model"""
213
+
214
+
215
+
216
+ def contributions(X, model,target):
217
+ X1 = X.copy()
218
+ for j, col in enumerate(X1.columns):
219
+ X1[col] = X1[col] * model.params.values[j]
220
+
221
+ contributions= np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
222
+ contributions=pd.DataFrame(contributions,columns=target).reset_index().rename(columns={'index':'Channel'})
223
+ contributions['Channel']=[ re.split(r'_imp|_cli', col)[0] for col in contributions['Channel']]
224
+
225
+ return contributions
226
+
227
+
228
+ def model_fit(features_set,target):
229
+ X = transformed_data[features_set]
230
+ y= transformed_data[target]
231
+ ss = MinMaxScaler()
232
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
233
+ X = sm.add_constant(X)
234
+ X_train=X.iloc[:150]
235
+ X_test=X.iloc[150:]
236
+ y_train=y.iloc[:150]
237
+ y_test=y.iloc[150:]
238
+ model = sm.OLS(y_train, X_train).fit()
239
+ predicted_values_train = model.predict(X_train)
240
+ r2 = model.rsquared
241
+ adjr2 = model.rsquared_adj
242
+ train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
243
+ test_mape=mean_absolute_percentage_error(y_test, model.predict(X_test))
244
+ summary=model.summary()
245
+ train_contributions=contributions(X_train,model,[target])
246
+ return pd.DataFrame({'Model':target,'R2':np.round(r2,2),'ADJr2':np.round(adjr2,2),'Train Mape':np.round(train_mape,2),
247
+ 'Test Mape':np.round(test_mape,2),'Summary':summary,'Model_object':model
248
+ },index=[0]), train_contributions
249
+
250
+ metrics_table=pd.DataFrame()
251
+
252
+ if 'contribution_df' not in st.session_state:
253
+ st.session_state["contribution_df"]=pd.DataFrame()
254
+
255
+ for target,feature_set in feature_set_dct.items():
256
+ metrics_table= pd.concat([metrics_table,model_fit(features_set=feature_set,target=target)[0]])
257
+ if st.session_state["contribution_df"].empty:
258
+ st.session_state["contribution_df"]= model_fit(features_set=feature_set,target=target)[1]
259
+ else:
260
+ st.session_state["contribution_df"]=pd.merge(st.session_state["contribution_df"],model_fit(features_set=feature_set,target=target)[1])
261
+
262
+ # st.write(st.session_state["contribution_df"])
263
+
264
+
265
+ metrics_table.reset_index(drop=True,inplace=True)
266
+
267
+
268
+
269
+
270
+
271
+
272
+
273
+
274
+ eda_columns=st.columns(2)
275
+ with eda_columns[1]:
276
+ eda=st.button('Generate EDA Report',help="Click to generate a bivariate report for the selected response metric from the table below.")
277
+
278
+
279
+
280
+ # st.markdown('Model Metrics')
281
+
282
+ st.title('Contribution Overview')
283
+
284
+ contribution_selections=st.multiselect('Select the models to compare contributions',[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel' ],default=[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel' ][-1])
285
+ trace_data=[]
286
+
287
+ for selection in contribution_selections:
288
+
289
+ trace=go.Bar(x=st.session_state['contribution_df']['Channel'], y=st.session_state['contribution_df'][selection],name=selection,text=np.round(st.session_state['contribution_df'][selection],0).astype(int).astype(str)+'%',textposition='outside')
290
+ trace_data.append(trace)
291
+
292
+ layout = go.Layout(
293
+ title='Metrics Contribution by Channel',
294
+ xaxis=dict(title='Channel Name'),
295
+ yaxis=dict(title='Metrics Contribution'),
296
+ barmode='group'
297
+ )
298
+ fig = go.Figure(data=trace_data, layout=layout)
299
+ st.plotly_chart(fig,use_container_width=True)
300
+
301
+ st.title('Analysis of Models Result')
302
+ #st.markdown()
303
+ gd_table=metrics_table.iloc[:,:-2]
304
+ gd=GridOptionsBuilder.from_dataframe(gd_table)
305
+ #gd.configure_pagination(enabled=True)
306
+ gd.configure_selection(use_checkbox=True)
307
+
308
+
309
+ gridoptions=gd.build()
310
+ table = AgGrid(gd_table,gridOptions=gridoptions,fit_columns_on_grid_load=True,height=200)
311
+ # table=metrics_table.iloc[:,:-2]
312
+ # table.insert(0, "Select", False)
313
+ # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
314
+
315
+
316
+
317
+ if len(table.selected_rows)==0:
318
+ st.warning("Click on the checkbox to view comprehensive results of the selected model.")
319
+ st.stop()
320
+ else:
321
+ target_column=table.selected_rows[0]['Model']
322
+ feature_set=feature_set_dct[target_column]
323
+
324
+ with eda_columns[1]:
325
+ if eda:
326
+ def generate_report_with_target(channel_data, target_feature):
327
+ report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature,verbose=False)
328
+ temp_dir = tempfile.mkdtemp()
329
+ report_path = os.path.join(temp_dir, "report.html")
330
+ report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
331
+ return report_path
332
+
333
+ report_data=transformed_data[feature_set]
334
+ report_data[target_column]=transformed_data[target_column]
335
+ report_file = generate_report_with_target(report_data, target_column)
336
+
337
+ if os.path.exists(report_file):
338
+ with open(report_file, 'rb') as f:
339
+ st.download_button(
340
+ label="Download EDA Report",
341
+ data=f.read(),
342
+ file_name="report.html",
343
+ mime="text/html"
344
+ )
345
+ else:
346
+ st.warning("Report generation failed. Unable to find the report file.")
347
+
348
+
349
+
350
+ model=metrics_table[metrics_table['Model']==target_column]['Model_object'].iloc[0]
351
+ st.header('Model Summary')
352
+ st.write(model.summary())
353
+ X=transformed_data[feature_set]
354
+ ss=MinMaxScaler()
355
+ X=pd.DataFrame(ss.fit_transform(X),columns=X.columns)
356
+ X=sm.add_constant(X)
357
+ y=transformed_data[target_column]
358
+ X_train=X.iloc[:150]
359
+ X_test=X.iloc[150:]
360
+ y_train=y.iloc[:150]
361
+ y_test=y.iloc[150:]
362
+ X.index=transformed_data['date']
363
+ y.index=transformed_data['date']
364
+
365
+ metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model)
366
+ metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model)
367
+
368
+ metrics_table_train=metrics_table_train.set_index('Metric').transpose()
369
+ metrics_table_train.index=['Train']
370
+ metrics_table_test=metrics_table_test.set_index('Metric').transpose()
371
+ metrics_table_test.index=['test']
372
+ metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2)
373
+
374
+ st.markdown('Result Overview')
375
+ st.dataframe(np.round(metrics_table,2),use_container_width=True)
376
+
377
+ st.subheader('Actual vs Predicted Plot Train')
378
+
379
+ st.plotly_chart(fig_train,use_container_width=True)
380
+ st.subheader('Actual vs Predicted Plot Test')
381
+ st.plotly_chart(fig_test,use_container_width=True)
382
+
383
+ st.markdown('## Residual Analysis')
384
+ columns=st.columns(2)
385
+
386
+
387
+ Xtrain1=X_train.copy()
388
+ with columns[0]:
389
+ fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1)
390
+ st.plotly_chart(fig)
391
+
392
+ with columns[1]:
393
+ st.empty()
394
+ fig = qqplot(y_train,model.predict(X_train))
395
+ st.plotly_chart(fig)
396
+
397
+ with columns[0]:
398
+ fig=residual_distribution(y_train,model.predict(X_train))
399
+ st.pyplot(fig)
400
+
401
+
402
+
403
+ elif auth_status == False:
404
+ st.error('Username/Password is incorrect')
405
+ try:
406
+ username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
407
+ if username_forgot_pw:
408
+ st.success('New password sent securely')
409
+ # Random password to be transferred to the user securely
410
+ elif username_forgot_pw == False:
411
+ st.error('Username not found')
412
+ except Exception as e:
413
+ st.error(e)
pages/5_Model_Result_Overview.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities import (set_header,
3
+ initialize_data,
4
+ load_local_css,
5
+ create_channel_summary,
6
+ create_contribution_pie,
7
+ create_contribuion_stacked_plot,
8
+ create_channel_spends_sales_plot,
9
+ format_numbers,
10
+ channel_name_formating,
11
+ load_authenticator)
12
+ import plotly.graph_objects as go
13
+ import streamlit_authenticator as stauth
14
+ import yaml
15
+ from yaml import SafeLoader
16
+ import time
17
+
18
+ st.set_page_config(layout='wide')
19
+ load_local_css('styles.css')
20
+ set_header()
21
+
22
+ target='Revenue'
23
+ # for k, v in st.session_state.items():
24
+
25
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
26
+ # st.session_state[k] = v
27
+
28
+ # authenticator = st.session_state.get('authenticator')
29
+
30
+ # if authenticator is None:
31
+ # authenticator = load_authenticator()
32
+
33
+ # name, authentication_status, username = authenticator.login('Login', 'main')
34
+ # auth_status = st.session_state['authentication_status']
35
+
36
+ # if auth_status:
37
+ # authenticator.logout('Logout', 'main')
38
+
39
+ # is_state_initiaized = st.session_state.get('initialized',False)
40
+ # if not is_state_initiaized:
41
+ initialize_data()
42
+ scenario = st.session_state['scenario']
43
+ raw_df = st.session_state['raw_df']
44
+ st.header('Overview of previous spends')
45
+
46
+
47
+ columns = st.columns((1,1,3))
48
+
49
+ with columns[0]:
50
+ st.metric(label = 'Spends', value=format_numbers(float(scenario.actual_total_spends)))
51
+ ###print(f"##################### {scenario.actual_total_sales} ##################")
52
+ with columns[1]:
53
+ st.metric(label = target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
54
+
55
+
56
+ actual_summary_df = create_channel_summary(scenario)
57
+ actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
58
+
59
+ columns = st.columns((2,1))
60
+ with columns[0]:
61
+ with st.expander('Channel wise overview'):
62
+ st.markdown(actual_summary_df.style.set_table_styles(
63
+ [{
64
+ 'selector': 'th',
65
+ 'props': [('background-color', '#11B6BD')]
66
+ },
67
+ {
68
+ 'selector' : 'tr:nth-child(even)',
69
+ 'props' : [('background-color', '#11B6BD')]
70
+ }]).to_html(), unsafe_allow_html=True)
71
+
72
+ st.markdown("<hr>",unsafe_allow_html=True)
73
+ ##############################
74
+
75
+ st.plotly_chart(create_contribution_pie(),use_container_width=True)
76
+ st.markdown("<hr>",unsafe_allow_html=True)
77
+
78
+
79
+ ################################3
80
+ st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
81
+ st.markdown("<hr>",unsafe_allow_html=True)
82
+ #######################################
83
+
84
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
85
+ selected_channel = scenario.channels.get(selected_channel_name,None)
86
+
87
+ st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
88
+
89
+ st.markdown("<hr>",unsafe_allow_html=True)
90
+
91
+ # elif auth_status == False:
92
+ # st.error('Username/Password is incorrect')
93
+
94
+ # if auth_status != True:
95
+ # try:
96
+ # username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
97
+ # if username_forgot_pw:
98
+ # st.success('New password sent securely')
99
+ # # Random password to be transferred to user securely
100
+ # elif username_forgot_pw == False:
101
+ # st.error('Username not found')
102
+ # except Exception as e:
103
+ # st.error(e)
pages/6_Build_Response_Curves.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.express as px
3
+ import numpy as np
4
+ import plotly.graph_objects as go
5
+ from utilities import channel_name_formating, load_authenticator, initialize_data
6
+ from sklearn.metrics import r2_score
7
+ from collections import OrderedDict
8
+ from classes import class_from_dict,class_to_dict
9
+ import pickle
10
+ import json
11
+
12
+ for k, v in st.session_state.items():
13
+ if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
14
+ st.session_state[k] = v
15
+
16
+ def s_curve(x,K,b,a,x0):
17
+ return K / (1 + b*np.exp(-a*(x-x0)))
18
+
19
+ def save_scenario(scenario_name):
20
+ """
21
+ Save the current scenario with the mentioned name in the session state
22
+
23
+ Parameters
24
+ ----------
25
+ scenario_name
26
+ Name of the scenario to be saved
27
+ """
28
+ if 'saved_scenarios' not in st.session_state:
29
+ st.session_state = OrderedDict()
30
+
31
+ #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
32
+ st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
33
+ st.session_state['scenario_input'] = ""
34
+ print(type(st.session_state['saved_scenarios']))
35
+ with open('../saved_scenarios.pkl', 'wb') as f:
36
+ pickle.dump(st.session_state['saved_scenarios'],f)
37
+
38
+
39
+ def reset_curve_parameters():
40
+ del st.session_state['K']
41
+ del st.session_state['b']
42
+ del st.session_state['a']
43
+ del st.session_state['x0']
44
+
45
+ def update_response_curve():
46
+ # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
47
+ # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
48
+ # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
49
+ # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
50
+ # rcs = st.session_state['rcs']
51
+ _channel_class = st.session_state['scenario'].channels[selected_channel_name]
52
+ _channel_class.update_response_curves({
53
+ 'K' : st.session_state['K'],
54
+ 'b' : st.session_state['b'],
55
+ 'a' : st.session_state['a'],
56
+ 'x0' : st.session_state['x0']})
57
+
58
+
59
+ # authenticator = st.session_state.get('authenticator')
60
+ # if authenticator is None:
61
+ # authenticator = load_authenticator()
62
+
63
+ # name, authentication_status, username = authenticator.login('Login', 'main')
64
+ # auth_status = st.session_state.get('authentication_status')
65
+
66
+ # if auth_status == True:
67
+ # is_state_initiaized = st.session_state.get('initialized',False)
68
+ # if not is_state_initiaized:
69
+ # print("Scenario page state reloaded")
70
+
71
+ initialize_data()
72
+
73
+ st.subheader("Build response curves")
74
+
75
+ channels_list = st.session_state['channels_list']
76
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
77
+
78
+ rcs = {}
79
+ for channel_name in channels_list:
80
+ rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
81
+ # rcs = st.session_state['rcs']
82
+
83
+
84
+ if 'K' not in st.session_state:
85
+ st.session_state['K'] = rcs[selected_channel_name]['K']
86
+ if 'b' not in st.session_state:
87
+ st.session_state['b'] = rcs[selected_channel_name]['b']
88
+ if 'a' not in st.session_state:
89
+ st.session_state['a'] = rcs[selected_channel_name]['a']
90
+ if 'x0' not in st.session_state:
91
+ st.session_state['x0'] = rcs[selected_channel_name]['x0']
92
+
93
+ x = st.session_state['actual_input_df'][selected_channel_name].values
94
+ y = st.session_state['actual_contribution_df'][selected_channel_name].values
95
+
96
+ power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
97
+
98
+ # fig = px.scatter(x, s_curve(x/10**power,
99
+ # st.session_state['K'],
100
+ # st.session_state['b'],
101
+ # st.session_state['a'],
102
+ # st.session_state['x0']))
103
+
104
+ fig = px.scatter(x=x, y=y)
105
+ fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
106
+ st.session_state['b'],
107
+ st.session_state['a'],
108
+ st.session_state['x0']),
109
+ line=dict(color='red')))
110
+
111
+ fig.update_layout(title_text="Response Curve",showlegend=False)
112
+ fig.update_annotations(font_size=10)
113
+ fig.update_xaxes(title='Spends')
114
+ fig.update_yaxes(title='Revenue')
115
+
116
+ st.plotly_chart(fig,use_container_width=True)
117
+
118
+ r2 = r2_score(y, s_curve(x / 10**power,
119
+ st.session_state['K'],
120
+ st.session_state['b'],
121
+ st.session_state['a'],
122
+ st.session_state['x0']))
123
+
124
+ st.metric('R2',round(r2,2))
125
+ columns = st.columns(4)
126
+
127
+ with columns[0]:
128
+ st.number_input('K',key='K',format="%0.5f")
129
+ with columns[1]:
130
+ st.number_input('b',key='b',format="%0.5f")
131
+ with columns[2]:
132
+ st.number_input('a',key='a',step=0.0001,format="%0.5f")
133
+ with columns[3]:
134
+ st.number_input('x0',key='x0',format="%0.5f")
135
+
136
+
137
+ st.button('Update parameters',on_click=update_response_curve)
138
+ st.button('Reset parameters',on_click=reset_curve_parameters)
139
+ scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
140
+ st.button('Save', on_click=lambda : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
141
+
142
+ file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
143
+ st.download_button(
144
+ label="Download response curves",
145
+ data=json.dumps(rcs),
146
+ file_name=f"{file_name}.json",
147
+ mime="application/json",
148
+ disabled= len(file_name) == 0,
149
+ )
150
+
151
+
152
+ def s_curve_derivative(x, K, b, a, x0):
153
+ # Derivative of the S-curve function
154
+ return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
155
+
156
+ # Parameters of the S-curve
157
+ K = st.session_state['K']
158
+ b = st.session_state['b']
159
+ a = st.session_state['a']
160
+ x0 = st.session_state['x0']
161
+
162
+ # Optimized spend value obtained from the tool
163
+ optimized_spend = st.number_input('value of x') # Replace this with your optimized spend value
164
+
165
+ # Calculate the slope at the optimized spend value
166
+ slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
167
+
168
+ st.write("Slope ", slope_at_optimized_spend)
pages/8_Scenario_Planner.py ADDED
@@ -0,0 +1,1133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import numpy as np
4
+ from functools import partial
5
+ from collections import OrderedDict
6
+ from plotly.subplots import make_subplots
7
+ import plotly.graph_objects as go
8
+ from utilities import (
9
+ format_numbers,
10
+ load_local_css,
11
+ set_header,
12
+ initialize_data,
13
+ load_authenticator,
14
+ send_email,
15
+ channel_name_formating,
16
+ )
17
+ from classes import class_from_dict, class_to_dict
18
+ import pickle
19
+ import streamlit_authenticator as stauth
20
+ import yaml
21
+ from yaml import SafeLoader
22
+ import re
23
+ import pandas as pd
24
+ import plotly.express as px
25
+
26
+ target = "Revenue"
27
+ st.set_page_config(layout="wide")
28
+ load_local_css("styles.css")
29
+ set_header()
30
+
31
+ for k, v in st.session_state.items():
32
+ if k not in ["logout", "login", "config"] and not k.startswith(
33
+ "FormSubmitter"
34
+ ):
35
+ st.session_state[k] = v
36
+ # ======================================================== #
37
+ # ======================= Functions ====================== #
38
+ # ======================================================== #
39
+
40
+
41
+ def optimize(key):
42
+ """
43
+ Optimize the spends for the sales
44
+ """
45
+
46
+ channel_list = [
47
+ key
48
+ for key, value in st.session_state["optimization_channels"].items()
49
+ if value
50
+ ]
51
+ # print('channel_list')
52
+ # print(channel_list)
53
+ # print('@@@@@@@@')
54
+ if len(channel_list) > 0:
55
+ scenario = st.session_state["scenario"]
56
+ if key.lower() == "spends":
57
+ with status_placeholder:
58
+ with st.spinner("Optimizing Sales"):
59
+ result = st.session_state["scenario"].optimize(
60
+ st.session_state["total_spends_change"], channel_list
61
+ )
62
+ elif key.lower() == "sales":
63
+ with status_placeholder:
64
+ with st.spinner("Optimizing Spends"):
65
+
66
+ result = st.session_state["scenario"].optimize_spends(
67
+ st.session_state["total_sales_change"], channel_list
68
+ )
69
+ for channel_name, modified_spends in result:
70
+
71
+ st.session_state[channel_name] = numerize(
72
+ modified_spends
73
+ * scenario.channels[channel_name].conversion_rate,
74
+ 1,
75
+ )
76
+ prev_spends = (
77
+ st.session_state["scenario"]
78
+ .channels[channel_name]
79
+ .actual_total_spends
80
+ )
81
+ st.session_state[f"{channel_name}_change"] = round(
82
+ 100 * (modified_spends - prev_spends) / prev_spends, 2
83
+ )
84
+
85
+
86
+ def save_scenario(scenario_name):
87
+ """
88
+ Save the current scenario with the mentioned name in the session state
89
+
90
+ Parameters
91
+ ----------
92
+ scenario_name
93
+ Name of the scenario to be saved
94
+ """
95
+ if "saved_scenarios" not in st.session_state:
96
+ st.session_state = OrderedDict()
97
+
98
+ # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
99
+ st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
100
+ st.session_state["scenario"]
101
+ )
102
+ st.session_state["scenario_input"] = ""
103
+ # print(type(st.session_state['saved_scenarios']))
104
+ with open("../saved_scenarios.pkl", "wb") as f:
105
+ pickle.dump(st.session_state["saved_scenarios"], f)
106
+
107
+
108
+ def update_sales_abs():
109
+ actual_sales = _scenario.actual_total_sales
110
+ if validate_input(st.session_state["total_sales_change_abs"]):
111
+ modified_sales = extract_number_for_string(
112
+ st.session_state["total_sales_change_abs"]
113
+ )
114
+ st.session_state["total_sales_change"] = round(
115
+ ((modified_sales / actual_sales) - 1) * 100
116
+ )
117
+
118
+
119
+ def update_sales():
120
+ st.session_state["total_sales_change_abs"] = numerize(
121
+ (1 + st.session_state["total_sales_change"] / 100)
122
+ * _scenario.actual_total_sales,
123
+ 1,
124
+ )
125
+
126
+
127
+ def update_all_spends_abs():
128
+ actual_spends = _scenario.actual_total_spends
129
+ if validate_input(st.session_state["total_spends_change_abs"]):
130
+ modified_spends = extract_number_for_string(
131
+ st.session_state["total_spends_change_abs"]
132
+ )
133
+ print(modified_spends)
134
+ print(actual_spends)
135
+
136
+ st.session_state["total_spends_change"] = (
137
+ (modified_spends / actual_spends) - 1
138
+ ) * 100
139
+
140
+ update_all_spends()
141
+
142
+
143
+ def update_all_spends():
144
+ """
145
+ Updates spends for all the channels with the given overall spends change
146
+ """
147
+ percent_change = st.session_state["total_spends_change"]
148
+ st.session_state["total_spends_change_abs"] = numerize(
149
+ (1 + percent_change / 100) * _scenario.actual_total_spends, 1
150
+ )
151
+ for channel_name in st.session_state["channels_list"]:
152
+ channel = st.session_state["scenario"].channels[channel_name]
153
+ current_spends = channel.actual_total_spends
154
+ modified_spends = (1 + percent_change / 100) * current_spends
155
+ st.session_state["scenario"].update(channel_name, modified_spends)
156
+ st.session_state[channel_name] = numerize(
157
+ modified_spends * channel.conversion_rate, 1
158
+ )
159
+ st.session_state[f"{channel_name}_change"] = percent_change
160
+
161
+
162
+ def extract_number_for_string(string_input):
163
+ string_input = string_input.upper()
164
+ if string_input.endswith("K"):
165
+ return float(string_input[:-1]) * 10**3
166
+ elif string_input.endswith("M"):
167
+ return float(string_input[:-1]) * 10**6
168
+ elif string_input.endswith("B"):
169
+ return float(string_input[:-1]) * 10**9
170
+
171
+
172
+ def validate_input(string_input):
173
+ pattern = r"\d+\.?\d*[K|M|B]$"
174
+ match = re.match(pattern, string_input)
175
+ if match is None:
176
+ return False
177
+ return True
178
+
179
+
180
+ def update_data_by_percent(channel_name):
181
+ prev_spends = (
182
+ st.session_state["scenario"].channels[channel_name].actual_total_spends
183
+ * st.session_state["scenario"].channels[channel_name].conversion_rate
184
+ )
185
+ modified_spends = prev_spends * (
186
+ 1 + st.session_state[f"{channel_name}_change"] / 100
187
+ )
188
+ st.session_state[channel_name] = numerize(modified_spends, 1)
189
+ st.session_state["scenario"].update(
190
+ channel_name,
191
+ modified_spends
192
+ / st.session_state["scenario"].channels[channel_name].conversion_rate,
193
+ )
194
+
195
+
196
+ def update_data(channel_name):
197
+ """
198
+ Updates the spends for the given channel
199
+ """
200
+
201
+ if validate_input(st.session_state[channel_name]):
202
+ modified_spends = extract_number_for_string(
203
+ st.session_state[channel_name]
204
+ )
205
+ prev_spends = (
206
+ st.session_state["scenario"]
207
+ .channels[channel_name]
208
+ .actual_total_spends
209
+ * st.session_state["scenario"]
210
+ .channels[channel_name]
211
+ .conversion_rate
212
+ )
213
+ st.session_state[f"{channel_name}_change"] = round(
214
+ 100 * (modified_spends - prev_spends) / prev_spends, 2
215
+ )
216
+ st.session_state["scenario"].update(
217
+ channel_name,
218
+ modified_spends
219
+ / st.session_state["scenario"]
220
+ .channels[channel_name]
221
+ .conversion_rate,
222
+ )
223
+ # st.session_state['scenario'].update(channel_name, modified_spends)
224
+ # else:
225
+ # try:
226
+ # modified_spends = float(st.session_state[channel_name])
227
+ # prev_spends = st.session_state['scenario'].channels[channel_name].actual_total_spends * st.session_state['scenario'].channels[channel_name].conversion_rate
228
+ # st.session_state[f'{channel_name}_change'] = round(100*(modified_spends - prev_spends) / prev_spends,2)
229
+ # st.session_state['scenario'].update(channel_name, modified_spends/st.session_state['scenario'].channels[channel_name].conversion_rate)
230
+ # st.session_state[f'{channel_name}'] = numerize(modified_spends,1)
231
+ # except ValueError:
232
+ # st.write('Invalid input')
233
+
234
+
235
+ def select_channel_for_optimization(channel_name):
236
+ """
237
+ Marks the given channel for optimization
238
+ """
239
+ st.session_state["optimization_channels"][channel_name] = st.session_state[
240
+ f"{channel_name}_selected"
241
+ ]
242
+
243
+
244
+ def select_all_channels_for_optimization():
245
+ """
246
+ Marks all the channel for optimization
247
+ """
248
+ for channel_name in st.session_state["optimization_channels"].keys():
249
+ st.session_state[f"{channel_name}_selected"] = st.session_state[
250
+ "optimze_all_channels"
251
+ ]
252
+ st.session_state["optimization_channels"][channel_name] = (
253
+ st.session_state["optimze_all_channels"]
254
+ )
255
+
256
+
257
+ def update_penalty():
258
+ """
259
+ Updates the penalty flag for sales calculation
260
+ """
261
+ st.session_state["scenario"].update_penalty(
262
+ st.session_state["apply_penalty"]
263
+ )
264
+
265
+
266
+ def reset_scenario():
267
+ # #print(st.session_state['default_scenario_dict'])
268
+ # st.session_state['scenario'] = class_from_dict(st.session_state['default_scenario_dict'])
269
+ # for channel in st.session_state['scenario'].channels.values():
270
+ # st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
271
+ initialize_data()
272
+ for channel_name in st.session_state["channels_list"]:
273
+ st.session_state[f"{channel_name}_selected"] = False
274
+ st.session_state[f"{channel_name}_change"] = 0
275
+ st.session_state["optimze_all_channels"] = False
276
+
277
+
278
+ def format_number(num):
279
+ if num >= 1_000_000:
280
+ return f"{num / 1_000_000:.2f}M"
281
+ elif num >= 1_000:
282
+ return f"{num / 1_000:.0f}K"
283
+ else:
284
+ return f"{num:.2f}"
285
+
286
+
287
+ def summary_plot(data, x, y, title, text_column):
288
+ fig = px.bar(
289
+ data,
290
+ x=x,
291
+ y=y,
292
+ orientation="h",
293
+ title=title,
294
+ text=text_column,
295
+ color="Channel_name",
296
+ )
297
+
298
+ # Convert text_column to numeric values
299
+ data[text_column] = pd.to_numeric(data[text_column], errors="coerce")
300
+
301
+ # Update the format of the displayed text based on magnitude
302
+ fig.update_traces(
303
+ texttemplate="%{text:.2s}",
304
+ textposition="outside",
305
+ hovertemplate="%{x:.2s}",
306
+ )
307
+
308
+ fig.update_layout(
309
+ xaxis_title=x, yaxis_title="Channel Name", showlegend=False
310
+ )
311
+ return fig
312
+
313
+
314
+ def s_curve(x, K, b, a, x0):
315
+ return K / (1 + b * np.exp(-a * (x - x0)))
316
+
317
+
318
+ def find_segment_value(x, roi, mroi):
319
+ start_value = x[0]
320
+ end_value = x[len(x) - 1]
321
+
322
+ # Condition for green region: Both MROI and ROI > 1
323
+ green_condition = (roi > 1) & (mroi > 1)
324
+ left_indices = np.where(green_condition)[0]
325
+ left_value = x[left_indices[0]] if left_indices.size > 0 else x[0]
326
+
327
+ right_indices = np.where(green_condition)[0]
328
+ right_value = x[right_indices[-1]] if right_indices.size > 0 else x[0]
329
+
330
+ return start_value, end_value, left_value, right_value
331
+
332
+
333
+ def calculate_rgba(
334
+ start_value, end_value, left_value, right_value, current_channel_spends
335
+ ):
336
+ # Initialize alpha to None for clarity
337
+ alpha = None
338
+
339
+ # Determine the color and calculate relative_position and alpha based on the point's position
340
+ if start_value <= current_channel_spends <= left_value:
341
+ color = "yellow"
342
+ relative_position = (current_channel_spends - start_value) / (
343
+ left_value - start_value
344
+ )
345
+ alpha = 0.8 - (
346
+ 0.6 * relative_position
347
+ ) # Alpha decreases from start to end
348
+
349
+ elif left_value < current_channel_spends <= right_value:
350
+ color = "green"
351
+ relative_position = (current_channel_spends - left_value) / (
352
+ right_value - left_value
353
+ )
354
+ alpha = 0.8 - (
355
+ 0.6 * relative_position
356
+ ) # Alpha decreases from start to end
357
+
358
+ elif right_value < current_channel_spends <= end_value:
359
+ color = "red"
360
+ relative_position = (current_channel_spends - right_value) / (
361
+ end_value - right_value
362
+ )
363
+ alpha = 0.2 + (
364
+ 0.6 * relative_position
365
+ ) # Alpha increases from start to end
366
+
367
+ else:
368
+ # Default case, if the spends are outside the defined ranges
369
+ return "rgba(136, 136, 136, 0.5)" # Grey for values outside the range
370
+
371
+ # Ensure alpha is within the intended range in case of any calculation overshoot
372
+ alpha = max(0.2, min(alpha, 0.8))
373
+
374
+ # Define color codes for RGBA
375
+ color_codes = {
376
+ "yellow": "255, 255, 0", # RGB for yellow
377
+ "green": "0, 128, 0", # RGB for green
378
+ "red": "255, 0, 0", # RGB for red
379
+ }
380
+
381
+ rgba = f"rgba({color_codes[color]}, {alpha})"
382
+ return rgba
383
+
384
+
385
+ def debug_temp(x_test, power, K, b, a, x0):
386
+ print("*" * 100)
387
+ # Calculate the count of bins
388
+ count_lower_bin = sum(1 for x in x_test if x <= 2524)
389
+ count_center_bin = sum(1 for x in x_test if x > 2524 and x <= 3377)
390
+ count_ = sum(1 for x in x_test if x > 3377)
391
+
392
+ print(
393
+ f"""
394
+ lower : {count_lower_bin}
395
+ center : {count_center_bin}
396
+ upper : {count_}
397
+ """
398
+ )
399
+
400
+
401
+ # @st.cache
402
+ def plot_response_curves():
403
+ cols = 4
404
+ rows = (
405
+ len(channels_list) // cols
406
+ if len(channels_list) % cols == 0
407
+ else len(channels_list) // cols + 1
408
+ )
409
+ rcs = st.session_state["rcs"]
410
+ shapes = []
411
+ fig = make_subplots(rows=rows, cols=cols, subplot_titles=channels_list)
412
+ for i in range(0, len(channels_list)):
413
+ col = channels_list[i]
414
+ x_actual = st.session_state["scenario"].channels[col].actual_spends
415
+ # x_modified = st.session_state["scenario"].channels[col].modified_spends
416
+
417
+ power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
418
+
419
+ K = rcs[col]["K"]
420
+ b = rcs[col]["b"]
421
+ a = rcs[col]["a"]
422
+ x0 = rcs[col]["x0"]
423
+
424
+ x_plot = np.linspace(0, 5 * x_actual.sum(), 50)
425
+
426
+ x, y, marginal_roi = [], [], []
427
+ for x_p in x_plot:
428
+ x.append(x_p * x_actual / x_actual.sum())
429
+
430
+ for index in range(len(x_plot)):
431
+ y.append(s_curve(x[index] / 10**power, K, b, a, x0))
432
+
433
+ for index in range(len(x_plot)):
434
+ marginal_roi.append(
435
+ a
436
+ * y[index]
437
+ * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
438
+ )
439
+
440
+ x = (
441
+ np.sum(x, axis=1)
442
+ * st.session_state["scenario"].channels[col].conversion_rate
443
+ )
444
+ y = np.sum(y, axis=1)
445
+ marginal_roi = (
446
+ np.average(marginal_roi, axis=1)
447
+ / st.session_state["scenario"].channels[col].conversion_rate
448
+ )
449
+
450
+ roi = y / np.maximum(x, np.finfo(float).eps)
451
+
452
+ fig.add_trace(
453
+ go.Scatter(
454
+ x=x,
455
+ y=y,
456
+ name=col,
457
+ customdata=np.stack((roi, marginal_roi), axis=-1),
458
+ hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
459
+ line=dict(color="blue"),
460
+ ),
461
+ row=1 + (i) // cols,
462
+ col=i % cols + 1,
463
+ )
464
+
465
+ x_optimal = (
466
+ st.session_state["scenario"].channels[col].modified_total_spends
467
+ * st.session_state["scenario"].channels[col].conversion_rate
468
+ )
469
+ y_optimal = (
470
+ st.session_state["scenario"].channels[col].modified_total_sales
471
+ )
472
+
473
+ # if col == "Paid_social_others":
474
+ # debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
475
+
476
+ fig.add_trace(
477
+ go.Scatter(
478
+ x=[x_optimal],
479
+ y=[y_optimal],
480
+ name=col,
481
+ legendgroup=col,
482
+ showlegend=False,
483
+ marker=dict(color=["black"]),
484
+ ),
485
+ row=1 + (i) // cols,
486
+ col=i % cols + 1,
487
+ )
488
+
489
+ shapes.append(
490
+ go.layout.Shape(
491
+ type="line",
492
+ x0=0,
493
+ y0=y_optimal,
494
+ x1=x_optimal,
495
+ y1=y_optimal,
496
+ line_width=1,
497
+ line_dash="dash",
498
+ line_color="black",
499
+ xref=f"x{i+1}",
500
+ yref=f"y{i+1}",
501
+ )
502
+ )
503
+
504
+ shapes.append(
505
+ go.layout.Shape(
506
+ type="line",
507
+ x0=x_optimal,
508
+ y0=0,
509
+ x1=x_optimal,
510
+ y1=y_optimal,
511
+ line_width=1,
512
+ line_dash="dash",
513
+ line_color="black",
514
+ xref=f"x{i+1}",
515
+ yref=f"y{i+1}",
516
+ )
517
+ )
518
+
519
+ start_value, end_value, left_value, right_value = find_segment_value(
520
+ x,
521
+ roi,
522
+ marginal_roi,
523
+ )
524
+
525
+ # Adding background colors
526
+ y_max = y.max() * 1.3 # 30% extra space above the max
527
+
528
+ # Yellow region
529
+ shapes.append(
530
+ go.layout.Shape(
531
+ type="rect",
532
+ x0=start_value,
533
+ y0=0,
534
+ x1=left_value,
535
+ y1=y_max,
536
+ line=dict(width=0),
537
+ fillcolor="rgba(255, 255, 0, 0.3)",
538
+ layer="below",
539
+ xref=f"x{i+1}",
540
+ yref=f"y{i+1}",
541
+ )
542
+ )
543
+
544
+ # Green region
545
+ shapes.append(
546
+ go.layout.Shape(
547
+ type="rect",
548
+ x0=left_value,
549
+ y0=0,
550
+ x1=right_value,
551
+ y1=y_max,
552
+ line=dict(width=0),
553
+ fillcolor="rgba(0, 255, 0, 0.3)",
554
+ layer="below",
555
+ xref=f"x{i+1}",
556
+ yref=f"y{i+1}",
557
+ )
558
+ )
559
+
560
+ # Red region
561
+ shapes.append(
562
+ go.layout.Shape(
563
+ type="rect",
564
+ x0=right_value,
565
+ y0=0,
566
+ x1=end_value,
567
+ y1=y_max,
568
+ line=dict(width=0),
569
+ fillcolor="rgba(255, 0, 0, 0.3)",
570
+ layer="below",
571
+ xref=f"x{i+1}",
572
+ yref=f"y{i+1}",
573
+ )
574
+ )
575
+
576
+ fig.update_layout(
577
+ # height=1000,
578
+ # width=1000,
579
+ title_text="Response Curves (X: Spends Vs Y: Revenue)",
580
+ showlegend=False,
581
+ shapes=shapes,
582
+ )
583
+ fig.update_annotations(font_size=10)
584
+ # fig.update_xaxes(title="Spends")
585
+ # fig.update_yaxes(title=target)
586
+ fig.update_yaxes(
587
+ gridcolor="rgba(136, 136, 136, 0.5)", gridwidth=0.5, griddash="dash"
588
+ )
589
+
590
+ return fig
591
+
592
+
593
+ # @st.cache
594
+ # def plot_response_curves():
595
+ # cols = 4
596
+ # rcs = st.session_state["rcs"]
597
+ # shapes = []
598
+ # fig = make_subplots(rows=6, cols=cols, subplot_titles=channels_list)
599
+ # for i in range(0, len(channels_list)):
600
+ # col = channels_list[i]
601
+ # x = st.session_state["actual_df"][col].values
602
+ # spends = x.sum()
603
+ # power = np.ceil(np.log(x.max()) / np.log(10)) - 3
604
+ # x = np.linspace(0, 3 * x.max(), 200)
605
+
606
+ # K = rcs[col]["K"]
607
+ # b = rcs[col]["b"]
608
+ # a = rcs[col]["a"]
609
+ # x0 = rcs[col]["x0"]
610
+
611
+ # y = s_curve(x / 10**power, K, b, a, x0)
612
+ # roi = y / x
613
+ # marginal_roi = a * (y) * (1 - y / K)
614
+ # fig.add_trace(
615
+ # go.Scatter(
616
+ # x=52
617
+ # * x
618
+ # * st.session_state["scenario"].channels[col].conversion_rate,
619
+ # y=52 * y,
620
+ # name=col,
621
+ # customdata=np.stack((roi, marginal_roi), axis=-1),
622
+ # hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
623
+ # ),
624
+ # row=1 + (i) // cols,
625
+ # col=i % cols + 1,
626
+ # )
627
+
628
+ # fig.add_trace(
629
+ # go.Scatter(
630
+ # x=[
631
+ # spends
632
+ # * st.session_state["scenario"]
633
+ # .channels[col]
634
+ # .conversion_rate
635
+ # ],
636
+ # y=[52 * s_curve(spends / (10**power * 52), K, b, a, x0)],
637
+ # name=col,
638
+ # legendgroup=col,
639
+ # showlegend=False,
640
+ # marker=dict(color=["black"]),
641
+ # ),
642
+ # row=1 + (i) // cols,
643
+ # col=i % cols + 1,
644
+ # )
645
+
646
+ # shapes.append(
647
+ # go.layout.Shape(
648
+ # type="line",
649
+ # x0=0,
650
+ # y0=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
651
+ # x1=spends
652
+ # * st.session_state["scenario"].channels[col].conversion_rate,
653
+ # y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
654
+ # line_width=1,
655
+ # line_dash="dash",
656
+ # line_color="black",
657
+ # xref=f"x{i+1}",
658
+ # yref=f"y{i+1}",
659
+ # )
660
+ # )
661
+
662
+ # shapes.append(
663
+ # go.layout.Shape(
664
+ # type="line",
665
+ # x0=spends
666
+ # * st.session_state["scenario"].channels[col].conversion_rate,
667
+ # y0=0,
668
+ # x1=spends
669
+ # * st.session_state["scenario"].channels[col].conversion_rate,
670
+ # y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
671
+ # line_width=1,
672
+ # line_dash="dash",
673
+ # line_color="black",
674
+ # xref=f"x{i+1}",
675
+ # yref=f"y{i+1}",
676
+ # )
677
+ # )
678
+
679
+ # fig.update_layout(
680
+ # height=1500,
681
+ # width=1000,
682
+ # title_text="Response Curves",
683
+ # showlegend=False,
684
+ # shapes=shapes,
685
+ # )
686
+ # fig.update_annotations(font_size=10)
687
+ # fig.update_xaxes(title="Spends")
688
+ # fig.update_yaxes(title=target)
689
+ # return fig
690
+
691
+
692
+ # ======================================================== #
693
+ # ==================== HTML Components =================== #
694
+ # ======================================================== #
695
+
696
+
697
+ def generate_spending_header(heading):
698
+ return st.markdown(
699
+ f"""<h2 class="spends-header">{heading}</h2>""", unsafe_allow_html=True
700
+ )
701
+
702
+
703
+ # ======================================================== #
704
+ # =================== Session variables ================== #
705
+ # ======================================================== #
706
+
707
+ with open("config.yaml") as file:
708
+ config = yaml.load(file, Loader=SafeLoader)
709
+ st.session_state["config"] = config
710
+
711
+ authenticator = stauth.Authenticate(
712
+ config["credentials"],
713
+ config["cookie"]["name"],
714
+ config["cookie"]["key"],
715
+ config["cookie"]["expiry_days"],
716
+ config["preauthorized"],
717
+ )
718
+ st.session_state["authenticator"] = authenticator
719
+ name, authentication_status, username = authenticator.login("Login", "main")
720
+ auth_status = st.session_state.get("authentication_status")
721
+ if auth_status == True:
722
+ authenticator.logout("Logout", "main")
723
+ is_state_initiaized = st.session_state.get("initialized", False)
724
+ if not is_state_initiaized:
725
+ initialize_data()
726
+
727
+ channels_list = st.session_state["channels_list"]
728
+
729
+ # ======================================================== #
730
+ # ========================== UI ========================== #
731
+ # ======================================================== #
732
+
733
+ # print(list(st.session_state.keys()))
734
+
735
+ st.header("Simulation")
736
+ main_header = st.columns((2, 2))
737
+ sub_header = st.columns((1, 1, 1, 1))
738
+ _scenario = st.session_state["scenario"]
739
+
740
+ if "total_spends_change_abs" not in st.session_state:
741
+ st.session_state["total_spends_change_abs"] = numerize(
742
+ _scenario.actual_total_spends, 1
743
+ )
744
+
745
+ if "total_sales_change_abs" not in st.session_state:
746
+ st.session_state["total_sales_change_abs"] = numerize(
747
+ _scenario.actual_total_sales, 1
748
+ )
749
+
750
+ with main_header[0]:
751
+ st.subheader("Actual")
752
+
753
+ with main_header[-1]:
754
+ st.subheader("Simulated")
755
+
756
+ with sub_header[0]:
757
+ st.metric(
758
+ label="Spends", value=format_numbers(_scenario.actual_total_spends)
759
+ )
760
+
761
+ with sub_header[1]:
762
+ st.metric(
763
+ label=target,
764
+ value=format_numbers(
765
+ float(_scenario.actual_total_sales), include_indicator=False
766
+ ),
767
+ )
768
+
769
+ with sub_header[2]:
770
+ st.metric(
771
+ label="Spends",
772
+ value=format_numbers(_scenario.modified_total_spends),
773
+ delta=numerize(_scenario.delta_spends, 1),
774
+ )
775
+
776
+ with sub_header[3]:
777
+ st.metric(
778
+ label=target,
779
+ value=format_numbers(
780
+ float(_scenario.modified_total_sales), include_indicator=False
781
+ ),
782
+ delta=numerize(_scenario.delta_sales, 1),
783
+ )
784
+
785
+ with st.expander("Channel Spends Simulator"):
786
+ _columns1 = st.columns((2, 2, 1, 1))
787
+ with _columns1[0]:
788
+
789
+ optimization_selection = st.selectbox(
790
+ "Optimize", options=["Spends", "Sales"], key="optimization_key"
791
+ )
792
+ with _columns1[1]:
793
+ st.markdown("#")
794
+ st.checkbox(
795
+ label="Optimize all Channels",
796
+ key=f"optimze_all_channels",
797
+ value=False,
798
+ on_change=select_all_channels_for_optimization,
799
+ )
800
+
801
+ with _columns1[2]:
802
+ st.markdown("#")
803
+ st.button(
804
+ "Optimize",
805
+ on_click=optimize,
806
+ args=(st.session_state["optimization_key"],),
807
+ )
808
+
809
+ with _columns1[3]:
810
+ st.markdown("#")
811
+ st.button("Reset", on_click=reset_scenario)
812
+
813
+ _columns2 = st.columns((2, 2, 2))
814
+ if st.session_state["optimization_key"] == "Spends":
815
+ with _columns2[0]:
816
+ spend_input = st.text_input(
817
+ "Absolute",
818
+ key="total_spends_change_abs",
819
+ # label_visibility="collapsed",
820
+ on_change=update_all_spends_abs,
821
+ )
822
+ with _columns2[1]:
823
+
824
+ st.number_input(
825
+ "Percent",
826
+ key=f"total_spends_change",
827
+ step=1,
828
+ on_change=update_all_spends,
829
+ )
830
+ elif st.session_state["optimization_key"] == "Sales":
831
+ with _columns2[0]:
832
+
833
+ sales_input = st.text_input(
834
+ "Absolute",
835
+ key="total_sales_change_abs",
836
+ on_change=update_sales_abs,
837
+ )
838
+ with _columns2[1]:
839
+ st.number_input(
840
+ "Percent change",
841
+ key=f"total_sales_change",
842
+ step=1,
843
+ on_change=update_sales,
844
+ )
845
+
846
+ with _columns2[2]:
847
+ st.markdown("#")
848
+ status_placeholder = st.empty()
849
+
850
+ st.markdown(
851
+ """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
852
+ )
853
+ _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
854
+ with _columns[0]:
855
+ generate_spending_header("Channel")
856
+ with _columns[1]:
857
+ generate_spending_header("Spends Input")
858
+ with _columns[2]:
859
+ generate_spending_header("Spends")
860
+ with _columns[3]:
861
+ generate_spending_header(target)
862
+ with _columns[4]:
863
+ generate_spending_header("Optimize")
864
+
865
+ st.markdown(
866
+ """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
867
+ )
868
+
869
+ if "acutual_predicted" not in st.session_state:
870
+ st.session_state["acutual_predicted"] = {
871
+ "Channel_name": [],
872
+ "Actual_spend": [],
873
+ "Optimized_spend": [],
874
+ "Delta": [],
875
+ }
876
+ for i, channel_name in enumerate(channels_list):
877
+ _channel_class = st.session_state["scenario"].channels[
878
+ channel_name
879
+ ]
880
+ _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
881
+ with _columns[0]:
882
+ st.write(channel_name_formating(channel_name))
883
+ bin_placeholder = st.container()
884
+
885
+ with _columns[1]:
886
+ channel_bounds = _channel_class.bounds
887
+ channel_spends = float(_channel_class.actual_total_spends)
888
+ min_value = float(
889
+ (1 + channel_bounds[0] / 100) * channel_spends
890
+ )
891
+ max_value = float(
892
+ (1 + channel_bounds[1] / 100) * channel_spends
893
+ )
894
+ ##print(st.session_state[channel_name])
895
+ spend_input = st.text_input(
896
+ channel_name,
897
+ key=channel_name,
898
+ label_visibility="collapsed",
899
+ on_change=partial(update_data, channel_name),
900
+ )
901
+ if not validate_input(spend_input):
902
+ st.error("Invalid input")
903
+
904
+ st.number_input(
905
+ "Percent change",
906
+ key=f"{channel_name}_change",
907
+ step=1,
908
+ on_change=partial(update_data_by_percent, channel_name),
909
+ )
910
+
911
+ with _columns[2]:
912
+ # spends
913
+ current_channel_spends = float(
914
+ _channel_class.modified_total_spends
915
+ * _channel_class.conversion_rate
916
+ )
917
+ actual_channel_spends = float(
918
+ _channel_class.actual_total_spends
919
+ * _channel_class.conversion_rate
920
+ )
921
+ spends_delta = float(
922
+ _channel_class.delta_spends
923
+ * _channel_class.conversion_rate
924
+ )
925
+ st.session_state["acutual_predicted"]["Channel_name"].append(
926
+ channel_name
927
+ )
928
+ st.session_state["acutual_predicted"]["Actual_spend"].append(
929
+ actual_channel_spends
930
+ )
931
+ st.session_state["acutual_predicted"][
932
+ "Optimized_spend"
933
+ ].append(current_channel_spends)
934
+ st.session_state["acutual_predicted"]["Delta"].append(
935
+ spends_delta
936
+ )
937
+ ## REMOVE
938
+ st.metric(
939
+ "Spends",
940
+ format_numbers(current_channel_spends),
941
+ delta=numerize(spends_delta, 1),
942
+ label_visibility="collapsed",
943
+ )
944
+
945
+ with _columns[3]:
946
+ # sales
947
+ current_channel_sales = float(
948
+ _channel_class.modified_total_sales
949
+ )
950
+ actual_channel_sales = float(_channel_class.actual_total_sales)
951
+ sales_delta = float(_channel_class.delta_sales)
952
+ st.metric(
953
+ target,
954
+ format_numbers(
955
+ current_channel_sales, include_indicator=False
956
+ ),
957
+ delta=numerize(sales_delta, 1),
958
+ label_visibility="collapsed",
959
+ )
960
+
961
+ with _columns[4]:
962
+
963
+ st.checkbox(
964
+ label="select for optimization",
965
+ key=f"{channel_name}_selected",
966
+ value=False,
967
+ on_change=partial(
968
+ select_channel_for_optimization, channel_name
969
+ ),
970
+ label_visibility="collapsed",
971
+ )
972
+
973
+ st.markdown(
974
+ """<hr class="spends-child-seperator">""",
975
+ unsafe_allow_html=True,
976
+ )
977
+
978
+ # Bins
979
+ col = channels_list[i]
980
+ x_actual = st.session_state["scenario"].channels[col].actual_spends
981
+ x_modified = (
982
+ st.session_state["scenario"].channels[col].modified_spends
983
+ )
984
+
985
+ x_total = x_modified.sum()
986
+ power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
987
+
988
+ K = st.session_state["rcs"][col]["K"]
989
+ b = st.session_state["rcs"][col]["b"]
990
+ a = st.session_state["rcs"][col]["a"]
991
+ x0 = st.session_state["rcs"][col]["x0"]
992
+
993
+ x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
994
+
995
+ x, y, marginal_roi = [], [], []
996
+ for x_p in x_plot:
997
+ x.append(x_p * x_actual / x_actual.sum())
998
+
999
+ for index in range(len(x_plot)):
1000
+ y.append(s_curve(x[index] / 10**power, K, b, a, x0))
1001
+
1002
+ for index in range(len(x_plot)):
1003
+ marginal_roi.append(
1004
+ a
1005
+ * y[index]
1006
+ * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
1007
+ )
1008
+
1009
+ x = (
1010
+ np.sum(x, axis=1)
1011
+ * st.session_state["scenario"].channels[col].conversion_rate
1012
+ )
1013
+ y = np.sum(y, axis=1)
1014
+ marginal_roi = (
1015
+ np.average(marginal_roi, axis=1)
1016
+ / st.session_state["scenario"].channels[col].conversion_rate
1017
+ )
1018
+
1019
+ roi = y / np.maximum(x, np.finfo(float).eps)
1020
+
1021
+ start_value, end_value, left_value, right_value = (
1022
+ find_segment_value(
1023
+ x,
1024
+ roi,
1025
+ marginal_roi,
1026
+ )
1027
+ )
1028
+
1029
+ rgba = calculate_rgba(
1030
+ start_value,
1031
+ end_value,
1032
+ left_value,
1033
+ right_value,
1034
+ current_channel_spends,
1035
+ )
1036
+
1037
+ # Protecting division by zero by adding a small epsilon to denominators
1038
+ roi_current = current_channel_sales / np.maximum(
1039
+ current_channel_spends, np.finfo(float).eps
1040
+ )
1041
+ marginal_roi_current = (
1042
+ st.session_state["scenario"]
1043
+ .channels[col]
1044
+ .get_marginal_roi("modified")
1045
+ )
1046
+
1047
+ with bin_placeholder:
1048
+ st.markdown(
1049
+ f"""
1050
+ <div style="
1051
+ border-radius: 12px;
1052
+ background-color: {rgba};
1053
+ padding: 10px;
1054
+ text-align: center;
1055
+ color: #006EC0;
1056
+ ">
1057
+ <p style="margin: 0; font-size: 20px;">ROI: {round(roi_current,1)}</p>
1058
+ <p style="margin: 0; font-size: 20px;">Marginal ROI: {round(marginal_roi_current,1)}</p>
1059
+ </div>
1060
+ """,
1061
+ unsafe_allow_html=True,
1062
+ )
1063
+
1064
+ with st.expander("See Response Curves"):
1065
+ fig = plot_response_curves()
1066
+ st.plotly_chart(fig, use_container_width=True)
1067
+
1068
+ _columns = st.columns(2)
1069
+ with _columns[0]:
1070
+ st.subheader("Save Scenario")
1071
+ scenario_name = st.text_input(
1072
+ "Scenario name",
1073
+ key="scenario_input",
1074
+ placeholder="Scenario name",
1075
+ label_visibility="collapsed",
1076
+ )
1077
+ st.button(
1078
+ "Save",
1079
+ on_click=lambda: save_scenario(scenario_name),
1080
+ disabled=len(st.session_state["scenario_input"]) == 0,
1081
+ )
1082
+
1083
+ summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
1084
+ summary_df.drop_duplicates(
1085
+ subset="Channel_name", keep="last", inplace=True
1086
+ )
1087
+
1088
+ summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
1089
+ summary_df_sorted["Delta_percent"] = np.round(
1090
+ (
1091
+ (
1092
+ summary_df_sorted["Optimized_spend"]
1093
+ / summary_df_sorted["Actual_spend"]
1094
+ )
1095
+ - 1
1096
+ )
1097
+ * 100,
1098
+ 2,
1099
+ )
1100
+
1101
+ with open("summary_df.pkl", "wb") as f:
1102
+ pickle.dump(summary_df_sorted, f)
1103
+ # st.dataframe(summary_df_sorted)
1104
+ # ___columns=st.columns(3)
1105
+ # with ___columns[2]:
1106
+ # fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent')
1107
+ # st.plotly_chart(fig,use_container_width=True)
1108
+ # with ___columns[0]:
1109
+ # fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend')
1110
+ # st.plotly_chart(fig,use_container_width=True)
1111
+ # with ___columns[1]:
1112
+ # fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend')
1113
+ # st.plotly_chart(fig,use_container_width=True)
1114
+
1115
+ elif auth_status == False:
1116
+ st.error("Username/Password is incorrect")
1117
+
1118
+ if auth_status != True:
1119
+ try:
1120
+ username_forgot_pw, email_forgot_password, random_password = (
1121
+ authenticator.forgot_password("Forgot password")
1122
+ )
1123
+ if username_forgot_pw:
1124
+ st.session_state["config"]["credentials"]["usernames"][
1125
+ username_forgot_pw
1126
+ ]["password"] = stauth.Hasher([random_password]).generate()[0]
1127
+ send_email(email_forgot_password, random_password)
1128
+ st.success("New password sent securely")
1129
+ # Random password to be transferred to user securely
1130
+ elif username_forgot_pw == False:
1131
+ st.error("Username not found")
1132
+ except Exception as e:
1133
+ st.error(e)
pages/9_Saved_Scenarios.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from numerize.numerize import numerize
3
+ import io
4
+ import pandas as pd
5
+ from utilities import (format_numbers,decimal_formater,
6
+ channel_name_formating,
7
+ load_local_css,set_header,
8
+ initialize_data,
9
+ load_authenticator)
10
+ from openpyxl import Workbook
11
+ from openpyxl.styles import Alignment,Font,PatternFill
12
+ import pickle
13
+ import streamlit_authenticator as stauth
14
+ import yaml
15
+ from yaml import SafeLoader
16
+ from classes import class_from_dict
17
+
18
+ st.set_page_config(layout='wide')
19
+ load_local_css('styles.css')
20
+ set_header()
21
+
22
+ # for k, v in st.session_state.items():
23
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
24
+ # st.session_state[k] = v
25
+
26
+ def create_scenario_summary(scenario_dict):
27
+ summary_rows = []
28
+ for channel_dict in scenario_dict['channels']:
29
+ name_mod = channel_name_formating(channel_dict['name'])
30
+ summary_rows.append([name_mod,
31
+ channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
32
+ channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate'),
33
+ channel_dict.get('actual_total_sales') ,
34
+ channel_dict.get('modified_total_sales'),
35
+ channel_dict.get('actual_total_sales') / (channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate')),
36
+ channel_dict.get('modified_total_sales') / (channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate')),
37
+ channel_dict.get('actual_mroi'),
38
+ channel_dict.get('modified_mroi'),
39
+ channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('actual_total_sales'),
40
+ channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('modified_total_sales')])
41
+
42
+ summary_rows.append(['Total',
43
+ scenario_dict.get('actual_total_spends'),
44
+ scenario_dict.get('modified_total_spends'),
45
+ scenario_dict.get('actual_total_sales'),
46
+ scenario_dict.get('modified_total_sales'),
47
+ scenario_dict.get('actual_total_sales') / scenario_dict.get('actual_total_spends'),
48
+ scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
49
+ '-',
50
+ '-',
51
+ scenario_dict.get('actual_total_spends') / scenario_dict.get('actual_total_sales'),
52
+ scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
53
+
54
+ columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
55
+ columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','NRPU','ROI','MROI','Spend per NRPU'],['Actual','Simulated']], names=["first", "second"]))
56
+ return pd.DataFrame(summary_rows, columns=columns_index)
57
+
58
+
59
+
60
+ def summary_df_to_worksheet(df, ws):
61
+ heading_fill = PatternFill(fill_type='solid',start_color='FF11B6BD',end_color='FF11B6BD')
62
+ for j,header in enumerate(df.columns.values):
63
+ col = j + 1
64
+ for i in range(1,3):
65
+ ws.cell(row=i, column=j + 1, value=header[i - 1]).font = Font(bold=True, color='FF11B6BD')
66
+ ws.cell(row=i,column=j+1).fill = heading_fill
67
+ if col > 1 and (col - 6)%5==0:
68
+ ws.merge_cells(start_row=1, end_row=1, start_column = col-3, end_column=col)
69
+ ws.cell(row=1,column=col).alignment = Alignment(horizontal='center')
70
+ for i,row in enumerate(df.itertuples()):
71
+ for j,value in enumerate(row):
72
+ if j == 0:
73
+ continue
74
+ elif (j-2)%4 == 0 or (j-3)%4 == 0:
75
+ ws.cell(row=i+3, column = j, value=value).number_format = '$#,##0.0'
76
+ else:
77
+ ws.cell(row=i+3, column = j, value=value)
78
+
79
+ from openpyxl.utils import get_column_letter
80
+ from openpyxl.styles import Font, PatternFill
81
+ import logging
82
+
83
+ def scenario_df_to_worksheet(df, ws):
84
+ heading_fill = PatternFill(start_color='FF11B6BD', end_color='FF11B6BD', fill_type='solid')
85
+
86
+ for j, header in enumerate(df.columns.values):
87
+ cell = ws.cell(row=1, column=j + 1, value=header)
88
+ cell.font = Font(bold=True, color='FF11B6BD')
89
+ cell.fill = heading_fill
90
+
91
+ for i, row in enumerate(df.itertuples()):
92
+ for j, value in enumerate(row[1:], start=1): # Start from index 1 to skip the index column
93
+ try:
94
+ cell = ws.cell(row=i + 2, column=j, value=value)
95
+ if isinstance(value, (int, float)):
96
+ cell.number_format = '$#,##0.0'
97
+ elif isinstance(value, str):
98
+ cell.value = value[:32767]
99
+ else:
100
+ cell.value = str(value)
101
+ except ValueError as e:
102
+ logging.error(f"Error assigning value '{value}' to cell {get_column_letter(j)}{i+2}: {e}")
103
+ cell.value = None # Assign None to the cell where the error occurred
104
+
105
+ return ws
106
+
107
+
108
+
109
+
110
+
111
+
112
+ def download_scenarios():
113
+ """
114
+ Makes a excel with all saved scenarios and saves it locally
115
+ """
116
+ ## create summary page
117
+ if len(scenarios_to_download) == 0:
118
+ return
119
+ wb = Workbook()
120
+ wb.iso_dates = True
121
+ wb.remove(wb.active)
122
+ st.session_state['xlsx_buffer'] = io.BytesIO()
123
+ summary_df = None
124
+ #print(scenarios_to_download)
125
+ for scenario_name in scenarios_to_download:
126
+ scenario_dict = st.session_state['saved_scenarios'][scenario_name]
127
+ _spends = []
128
+ column_names = ['Date']
129
+ _sales = None
130
+ dates = None
131
+ summary_rows = []
132
+ for channel in scenario_dict['channels']:
133
+ if dates is None:
134
+ dates = channel.get('dates')
135
+ _spends.append(dates)
136
+ if _sales is None:
137
+ _sales = channel.get('modified_sales')
138
+ else:
139
+ _sales += channel.get('modified_sales')
140
+ _spends.append(channel.get('modified_spends') * channel.get('conversion_rate'))
141
+ column_names.append(channel.get('name'))
142
+
143
+ name_mod = channel_name_formating(channel['name'])
144
+ summary_rows.append([name_mod,
145
+ channel.get('modified_total_spends') * channel.get('conversion_rate') ,
146
+ channel.get('modified_total_sales'),
147
+ channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate'),
148
+ channel.get('modified_mroi'),
149
+ channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate')])
150
+ _spends.append(_sales)
151
+ column_names.append('NRPU')
152
+ scenario_df = pd.DataFrame(_spends).T
153
+ scenario_df.columns = column_names
154
+ ## write to sheet
155
+ ws = wb.create_sheet(scenario_name)
156
+ scenario_df_to_worksheet(scenario_df, ws)
157
+ summary_rows.append(['Total',
158
+ scenario_dict.get('modified_total_spends') ,
159
+ scenario_dict.get('modified_total_sales'),
160
+ scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
161
+ '-',
162
+ scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
163
+ columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
164
+ columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','NRPU','ROI','MROI','Spends per NRPU']], names=["first", "second"]))
165
+ if summary_df is None:
166
+ summary_df = pd.DataFrame(summary_rows, columns = columns_index)
167
+ summary_df = summary_df.set_index(('','Channel'))
168
+ else:
169
+ _df = pd.DataFrame(summary_rows, columns = columns_index)
170
+ _df = _df.set_index(('','Channel'))
171
+ summary_df = summary_df.merge(_df, left_index=True, right_index=True)
172
+ ws = wb.create_sheet('Summary',0)
173
+ summary_df_to_worksheet(summary_df.reset_index(), ws)
174
+ wb.save(st.session_state['xlsx_buffer'])
175
+ st.session_state['disable_download_button'] = False
176
+
177
+ def disable_download_button():
178
+ st.session_state['disable_download_button'] =True
179
+
180
+ def transform(x):
181
+ if x.name == ("",'Channel'):
182
+ return x
183
+ elif x.name[0] == 'ROI' or x.name[0] == 'MROI':
184
+ return x.apply(lambda y : y if isinstance(y,str) else decimal_formater(format_numbers(y,include_indicator=False,n_decimals=4),n_decimals=4))
185
+ else:
186
+ return x.apply(lambda y : y if isinstance(y,str) else format_numbers(y))
187
+
188
+ def delete_scenario():
189
+ if selected_scenario in st.session_state['saved_scenarios']:
190
+ del st.session_state['saved_scenarios'][selected_scenario]
191
+ with open('../saved_scenarios.pkl', 'wb') as f:
192
+ pickle.dump(st.session_state['saved_scenarios'],f)
193
+
194
+ def load_scenario():
195
+ if selected_scenario in st.session_state['saved_scenarios']:
196
+ st.session_state['scenario'] = class_from_dict(selected_scenario_details)
197
+
198
+
199
+
200
+ authenticator = st.session_state.get('authenticator')
201
+ if authenticator is None:
202
+ authenticator = load_authenticator()
203
+
204
+ name, authentication_status, username = authenticator.login('Login', 'main')
205
+ auth_status = st.session_state.get('authentication_status')
206
+
207
+ if auth_status == True:
208
+ is_state_initiaized = st.session_state.get('initialized',False)
209
+ if not is_state_initiaized:
210
+ #print("Scenario page state reloaded")
211
+ initialize_data()
212
+
213
+
214
+ saved_scenarios = st.session_state['saved_scenarios']
215
+
216
+
217
+ if len(saved_scenarios) ==0:
218
+ st.header('No saved scenarios')
219
+
220
+ else:
221
+
222
+ with st.sidebar:
223
+ selected_scenario = st.radio(
224
+ 'Pick a scenario to view details',
225
+ list(saved_scenarios.keys())
226
+ )
227
+ st.markdown("""<hr>""", unsafe_allow_html=True)
228
+ scenarios_to_download = st.multiselect('Select scenarios to download',
229
+ list(saved_scenarios.keys()))
230
+
231
+ st.button('Prepare download',on_click=download_scenarios)
232
+ st.download_button(
233
+ label="Download Scenarios",
234
+ data=st.session_state['xlsx_buffer'].getvalue(),
235
+ file_name="scenarios.xlsx",
236
+ mime="application/vnd.ms-excel",
237
+ disabled= st.session_state['disable_download_button'],
238
+ on_click= disable_download_button
239
+ )
240
+
241
+ column_1, column_2,column_3 = st.columns((6,1,1))
242
+ with column_1:
243
+ st.header(selected_scenario)
244
+ with column_2:
245
+ st.button('Delete scenarios', on_click=delete_scenario)
246
+ with column_3:
247
+ st.button('Load Scenario', on_click=load_scenario)
248
+
249
+ selected_scenario_details = saved_scenarios[selected_scenario]
250
+
251
+ pd.set_option('display.max_colwidth', 100)
252
+
253
+ st.markdown(create_scenario_summary(selected_scenario_details).transform(transform).style.set_table_styles(
254
+ [{
255
+ 'selector': 'th',
256
+ 'props': [('background-color', '#11B6BD')]
257
+ },
258
+ {
259
+ 'selector' : 'tr:nth-child(even)',
260
+ 'props' : [('background-color', '#11B6BD')]
261
+ }
262
+ ]).to_html(),unsafe_allow_html=True)
263
+
264
+ elif auth_status == False:
265
+ st.error('Username/Password is incorrect')
266
+
267
+ if auth_status != True:
268
+ try:
269
+ username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
270
+ if username_forgot_pw:
271
+ st.success('New password sent securely')
272
+ # Random password to be transferred to user securely
273
+ elif username_forgot_pw == False:
274
+ st.error('Username not found')
275
+ except Exception as e:
276
+ st.error(e)
pages/actual_data.csv ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const,clicks_search_decay.2,impressions_tv_lag3,online_edu_trend_lag3,clicks_digital_lag2_decay.3,impressions_streaming_lag2_decay.4,covid_cases_lag3,unemployement_rate_lead4,season,flag_Aug_1,flag_Aug_2,flag_Aug_3,flag_dec_1,flag_dec_-1,flag_dec_-2,flag_dec_-3,flag_easter_-1,flag_easter_-2,flag_may_-1,flag_may_-2,flag_jun_-1,flag_jun_-2,covid_flag1,flag_june28,flag_aug13,flag_sep13,flag_mar_feb,date,total_prospect_id
2
+ 1.0,0.03264506089026503,0.0,0.0,0.0,0.11920857922376585,0.0,0.2448979591836735,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-10,3106
3
+ 1.0,0.1203178311529351,0.0,0.0,0.0,0.23575959332216032,0.0,0.2448979591836735,101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-17,7809
4
+ 1.0,0.037674240888288246,0.0,0.0,0.30427286753070926,0.14866425214344534,0.0,0.2448979591836735,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-24,5658
5
+ 1.0,0.114056065999327,0.25459834519940233,0.5700000000000001,0.3210660307498862,0.06375317695001911,0.0,0.2448979591836735,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-01,7528
6
+ 1.0,0.15091848146432302,0.04759636387261456,0.58,0.2652143429433443,0.02550166207848893,0.0,0.2380952380952381,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-08,8913
7
+ 1.0,0.09691798534505919,0.0,0.41000000000000003,0.27398476053158455,0.22803554179688423,0.0,0.2380952380952381,105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-15,7974
8
+ 1.0,0.0,0.2185391903071715,0.53,0.3093665823461814,0.3016670242357716,0.0,0.2380952380952381,106,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-22,5034
9
+ 1.0,0.06818143419410627,0.0645557652165116,0.6,0.35005256364095544,0.3915886857834677,0.0,0.2380952380952381,107,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-29,8296
10
+ 1.0,0.19748095587743647,0.0,0.49,0.2866388037412839,0.4644891817948484,0.0,0.2380952380952381,108,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-05,10953
11
+ 1.0,0.2718903484441833,0.31632836028874944,0.42,0.38339772931601046,0.4758788391710054,0.0,0.2380952380952381,109,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-12,11583
12
+ 1.0,0.29329394272923165,0.710207473795361,0.56,0.4716341482535363,0.47415700741999534,0.0,0.2380952380952381,110,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2020-01-19,11650
13
+ 1.0,0.3150710926081645,0.6225458397661645,0.66,0.5560651882029227,0.2282082561307921,0.0,0.2380952380952381,111,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-26,10086
14
+ 1.0,0.23335326208386092,0.5093471390869946,0.65,0.5990392189890996,0.09128427138188955,0.0,0.2993197278911565,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-02,8454
15
+ 1.0,0.18339704064539092,0.46920681970876166,0.66,0.5097387360461574,0.03651393215188798,0.0,0.2993197278911565,113,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-09,7842
16
+ 1.0,0.1829206162885479,0.5702922924005152,0.64,0.3647117781342298,0.5333315970976881,0.0,0.2993197278911565,114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-16,8528
17
+ 1.0,0.17708137647064887,0.4762803199026322,0.62,0.2994390381863003,0.9999999999999999,0.0,0.2993197278911565,115,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-23,9230
18
+ 1.0,0.2110785179466496,0.31643298954206356,0.65,0.318727924805625,0.5153399788387041,0.0,0.2993197278911565,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-01,8210
19
+ 1.0,0.1922309642774856,0.35110354589746834,0.65,0.3435805763353255,0.20613623376787482,0.0,1.0,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-08,6573
20
+ 1.0,0.1174971533357681,0.4397302099507956,0.64,0.37079693119819457,0.08245451214041095,0.0,1.0,118,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-15,4464
21
+ 1.0,0.04487177585471158,0.5651604986093057,0.66,0.3797815418753292,0.032981804856164386,3.6661729553753427e-06,1.0,119,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-22,5498
22
+ 1.0,0.04417426781579725,0.5142518574426083,0.77,0.3239901926717436,0.013192796475509808,0.00016497778299189042,1.0,120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-29,7134
23
+ 1.0,0.09508966430933447,0.4246084040047787,1.0,0.22766051203571303,0.005277118590203924,0.01074555293220513,0.8979591836734694,121,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2020-04-05,6507
24
+ 1.0,0.1727148072921107,0.3306303340730278,0.92,0.2557126494916798,0.0021108474360815696,0.07506489126131015,0.8979591836734694,122,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2020-04-12,6752
25
+ 1.0,0.2757761792524949,0.9059477066272279,0.87,0.2910560761584964,0.0008443389744326279,0.11051311756683434,0.8979591836734694,123,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-19,7874
26
+ 1.0,0.46164669127102737,1.0,0.8200000000000001,0.29288325042575475,0.0003377355897730512,0.1323451775160945,0.8979591836734694,124,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-26,8706
27
+ 1.0,0.3631365926708698,0.8555262504044332,0.85,0.3143348639913703,0.00013509423590922048,0.12527679605813083,0.8979591836734694,125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-03,9593
28
+ 1.0,0.3556269301486625,0.5998066602658987,0.8,0.3573452157072908,5.4838924587260594e-05,0.08418266340132861,0.7482993197278912,126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-10,9554
29
+ 1.0,0.3898924329688705,0.31953123019194307,0.76,0.3492819601843694,0.08837696494340691,0.06699197841357364,0.7482993197278912,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-17,9461
30
+ 1.0,0.3270785638817633,0.5040802333471541,0.88,0.37224504100306005,0.12944061135952373,0.04806352744497074,0.7482993197278912,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-24,8347
31
+ 1.0,0.29596428185745655,0.6228739252579004,0.8300000000000001,0.3873711562094451,0.14079607140381442,0.028926104617911456,0.7482993197278912,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-31,7926
32
+ 1.0,0.23446621861142697,0.644779308361226,0.8,0.3519020717491842,0.15750706055823313,0.024482702995996537,0.6938775510204082,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-07,8606
33
+ 1.0,0.2202508917985891,0.726916988225644,0.71,0.32726146750928653,0.0797309833640819,0.022000703905207433,0.6938775510204082,131,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-14,7573
34
+ 1.0,0.18610614076735926,0.5963517592669729,0.73,0.31618831243754153,0.03501476889363339,0.015086301711369536,0.6938775510204082,132,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2020-06-21,6983
35
+ 1.0,0.1568177529621934,0.6764095796293655,0.75,0.2836099513597926,0.014005944823975384,0.011489786042146325,0.6938775510204082,133,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-28,6277
36
+ 1.0,0.22774801916471138,0.6466210070345804,0.72,0.25409997289933184,0.006272411362367827,0.00871449311492719,0.5714285714285715,134,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-05,7421
37
+ 1.0,0.24542124594101095,0.6580063264819511,0.73,0.2516667689694555,0.05947462601462651,0.008318546435746652,0.5714285714285715,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-12,7852
38
+ 1.0,0.24895270375190542,0.32749815383926373,0.68,0.2671053898526598,0.0888609058832765,0.008014254080450499,0.5714285714285715,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-19,7396
39
+ 1.0,0.16285259960994197,0.3666961464656464,0.78,0.26077100654286645,0.12420199588573878,0.008058248155915004,0.5714285714285715,137,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-26,7041
40
+ 1.0,0.16864346155569104,0.39341698388602436,0.84,0.25893225300958655,0.10423952696584138,0.00920209411799211,0.5714285714285715,138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-02,7470
41
+ 1.0,0.22582910125625383,0.41507293852636135,0.8300000000000001,0.2528768986269057,0.08197739941078482,0.009315745479608745,0.5374149659863946,139,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-09,8725
42
+ 1.0,0.2778946696783185,0.7857143231388266,0.8,0.2772125371796957,0.07178679747906064,0.007237025413910927,0.5374149659863946,140,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-16,9657
43
+ 1.0,0.3062154076077969,0.434016630925742,0.87,0.33174759696083367,0.12078972986041582,0.006500124649880482,0.5374149659863946,141,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2020-08-23,10000
44
+ 1.0,0.2851073700683267,0.4051792323256236,0.8200000000000001,0.3621387745268235,0.1539969659046611,0.006118842662521447,0.5374149659863946,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-30,8941
45
+ 1.0,0.25999778433367665,0.4113785668398346,0.77,0.3604714968693371,0.1462622685965232,0.006375474769397721,0.4693877551020409,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-06,8507
46
+ 1.0,0.2947500457787596,0.43576671635701947,0.74,0.3084711376902622,0.1030893445960345,0.0060051913009048115,0.4693877551020409,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2020-09-13,9887
47
+ 1.0,0.3239559328273078,0.40721834097732834,0.72,0.24061271129609485,0.08422768334333634,0.006456130574415978,0.4693877551020409,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-20,9627
48
+ 1.0,0.3189849597494306,0.4831656702512836,0.68,0.28577062852640756,0.054400116894051116,0.006401137980085348,0.4693877551020409,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-27,8735
49
+ 1.0,0.2930673557404469,0.5423730023996388,0.62,0.32330756771945346,0.02176006539088146,0.007566980979894707,0.45578231292517013,147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-04,8138
50
+ 1.0,0.27381401410957934,0.48862464971809444,0.59,0.33668984325037016,0.008704026156352586,0.009172764734349107,0.45578231292517013,148,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-11,7966
51
+ 1.0,0.21658154029531146,0.5162854532967293,0.55,0.44481231480084876,0.003481610462541034,0.012223020633221393,0.45578231292517013,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-18,8109
52
+ 1.0,0.21772903332032795,0.47368257634991157,0.6,0.46141705479304307,0.0013926441850164136,0.013601501664442522,0.45578231292517013,150,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-25,7848
53
+ 1.0,0.16712357438522701,0.5132571164009214,0.5,0.38402389059771924,0.0005570576740065655,0.012915927321787332,0.45578231292517013,151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-01,6516
54
+ 1.0,0.1814031347156822,0.5409537987241609,0.5,0.2968208337801042,0.00022282306960262618,0.013091903623645349,0.45578231292517013,152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-08,7233
55
+ 1.0,0.16852532779394064,0.49490997931858044,0.5,0.22663075929954526,8.912922784105048e-05,0.014624363918992243,0.45578231292517013,153,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-15,7409
56
+ 1.0,0.10492104198879731,0.4086344123814518,0.41000000000000003,0.21669561761817938,3.565169113642019e-05,0.016127494830696133,0.45578231292517013,154,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-22,6232
57
+ 1.0,0.16920169406380464,0.45151008168804235,0.49,0.21833619946593313,1.4260676454568076e-05,0.024849320291534072,0.45578231292517013,155,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-29,8170
58
+ 1.0,0.1305885456099783,0.4543635808918873,0.47000000000000003,0.1596898931167178,5.704270581827231e-06,0.03519159419864792,0.435374149659864,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-06,7075
59
+ 1.0,0.1214984593864375,0.35070760971315756,0.4,0.15417676852356046,2.2817082327308923e-06,0.041732046751037526,0.435374149659864,157,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-13,7379
60
+ 1.0,0.057042007816384965,0.32470890321593604,0.47000000000000003,0.15442387578570832,9.126832930923571e-07,0.049892947749703036,0.435374149659864,158,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-20,5442
61
+ 1.0,0.12406882983279183,0.3135816516054531,0.45,0.1671308209739812,3.650733172369429e-07,0.0686930826648678,0.435374149659864,159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-27,7735
62
+ 1.0,0.24786523070013738,0.3102913429236421,0.42,0.16347790840061424,1.4602932689477716e-07,0.0732574679943101,0.435374149659864,160,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-03,9754
63
+ 1.0,0.26083059672146286,0.2649240941306087,0.34,0.25327016920452516,5.841173075791087e-08,0.07444897420480709,0.4217687074829932,161,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-10,10641
64
+ 1.0,0.24028847292133387,0.6513962629200784,0.38,0.3773812732234543,2.3364692303164347e-08,0.08318546435746653,0.4217687074829932,162,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2021-01-17,10230
65
+ 1.0,0.31526302386797916,0.531674302460824,0.47000000000000003,0.3527386460097067,9.345876921265738e-09,0.10258685163731283,0.4217687074829932,163,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-24,10352
66
+ 1.0,0.2966293410018717,0.44836670500794606,0.47000000000000003,0.3711695518795665,3.738350768506295e-09,0.13234151134313912,0.4217687074829932,164,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-31,9216
67
+ 1.0,0.20088776123137192,0.3815806999416851,0.45,0.33580461662371014,1.4953403074025183e-09,0.12043744775703538,0.40816326530612246,165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-07,8421
68
+ 1.0,0.173394454128539,0.343687050600215,0.48,0.3277941002786073,5.981361229610074e-10,0.11271648751301491,0.40816326530612246,166,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-14,9281
69
+ 1.0,0.1777198044422716,0.33051072402008147,0.5,0.31487397296804576,2.3925444918440296e-10,0.109699227170741,0.40816326530612246,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-21,8891
70
+ 1.0,0.1850269016675808,0.30627520154343757,0.46,0.3133091660972597,9.570177967376119e-11,0.08255854878209734,0.40816326530612246,168,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-28,8169
71
+ 1.0,0.2529549962208855,0.298123038215738,0.42,0.3358964981168952,3.828071186950448e-11,0.08351908609640568,0.40816326530612246,169,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-07,8724
72
+ 1.0,0.213028120324469,0.3267901551549544,0.44,0.3038053348505854,1.531228474780179e-11,0.07285052279626343,0.40816326530612246,170,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-14,8194
73
+ 1.0,0.16441430466323353,0.25967469209260036,0.5,0.32087357753439977,6.124913899120717e-12,0.07822879852179906,0.40816326530612246,171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-21,8254
74
+ 1.0,0.11053130189212229,0.260168451958828,0.42,0.3279459500984871,2.449965559648287e-12,0.07333812379932836,0.40816326530612246,172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-28,7026
75
+ 1.0,0.06917021315146277,0.0,0.38,0.37411287881420296,9.799862238593149e-13,0.07465061371735272,0.39455782312925175,173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-04,6412
76
+ 1.0,0.06728264676731566,0.0,0.44,0.4347510050616973,3.9199448954372595e-13,0.0732721326861316,0.39455782312925175,174,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2021-04-11,6297
77
+ 1.0,0.10167805497311716,0.0,0.43,0.4574504815633023,1.5679779581749037e-13,0.07982724993034271,0.39455782312925175,175,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2021-04-18,6687
78
+ 1.0,0.1734619149834527,0.0,0.48,0.48912312446006045,6.271911832699615e-14,0.06941165256412136,0.39455782312925175,176,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-25,8430
79
+ 1.0,0.2040432878056308,0.0,0.46,0.44466429049983563,2.5087647330798465e-14,0.06276854716898124,0.39455782312925175,177,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-02,8025
80
+ 1.0,0.20788046814877387,0.0,0.48,0.5722675873212515,1.0035058932319387e-14,0.04882242524673344,0.40136054421768713,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-09,8242
81
+ 1.0,0.14929264058846564,0.0,0.5,0.45913415146070335,4.014023572927755e-15,0.033618806000791895,0.40136054421768713,179,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-16,8280
82
+ 1.0,0.11694210039888364,0.0,0.51,0.39528662679579885,1.6056094291711022e-15,0.025182942030473228,0.40136054421768713,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-23,7909
83
+ 1.0,0.055184035342337234,0.0,0.51,0.3880077087936407,6.422437716684409e-16,0.017652622780132275,0.40136054421768713,181,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-30,7574
84
+ 1.0,0.04358787034563821,0.0,0.5,0.3863265622647678,2.568975086673764e-16,0.012651962869000308,0.3673469387755103,182,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-06,7270
85
+ 1.0,0.03833609653008979,0.0,0.46,0.3784495643657444,1.0275900346695056e-16,0.008835476822454577,0.3673469387755103,183,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-13,6716
86
+ 1.0,0.06111263589867566,0.0,0.48,0.38862024435317233,4.1103601386780226e-17,0.005939200187708055,0.3673469387755103,184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-20,6944
87
+ 1.0,0.07119833324643848,0.0,0.44,0.4039000969934476,1.644144055471209e-17,0.004967664354533589,0.3673469387755103,185,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2021-06-27,6803
88
+ 1.0,0.0659956847282599,0.0,0.45,0.4420872417106599,6.576576221884836e-18,0.004359079643941282,0.3537414965986395,186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-04,7019
89
+ 1.0,0.12577031397293442,0.0,0.45,0.4950177419852857,2.630630488753935e-18,0.003977797656582247,0.3537414965986395,187,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-11,8254
90
+ 1.0,0.1502746019886232,0.0,0.45,0.5650602702260171,1.052252195501574e-18,0.0040621196345558795,0.3537414965986395,188,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-18,7804
91
+ 1.0,0.21001397285486328,0.0,0.42,0.594015126140436,4.209008782006296e-19,0.004952999662712088,0.3537414965986395,189,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-25,8212
92
+ 1.0,0.23464189851384848,0.0,0.46,0.5484130743981998,1.6836035128025183e-19,0.008076579020691881,0.3537414965986395,190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-01,8378
93
+ 1.0,0.23496148203757855,0.0,0.47000000000000003,0.5324473242588711,6.734414051210074e-20,0.01220102359548914,0.3197278911564626,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-08,9496
94
+ 1.0,0.23319893582092505,0.0,0.53,0.5532778727756644,2.6937656204840295e-20,0.020152952735698258,0.3197278911564626,192,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-15,9511
95
+ 1.0,0.23262329847201318,0.0,0.49,0.7309984534528141,1.0775062481936118e-20,0.029028757460661962,0.3197278911564626,193,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-22,9569
96
+ 1.0,0.18495638415853394,0.0,0.46,0.8724050615489382,4.310024992774448e-21,0.03698435277382646,0.3197278911564626,194,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-29,7928
97
+ 1.0,0.2921700012245981,0.0,0.49,1.0,1.7240099971097793e-21,0.03982197064128697,0.3129251700680272,195,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-05,7840
98
+ 1.0,0.4172971677569805,0.0,0.48,0.8193686075762131,6.896039988439117e-22,0.03868179085216524,0.3129251700680272,196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-12,9521
99
+ 1.0,0.5004920981884484,0.0,0.53,0.4496097944711011,2.758415995375647e-22,0.03902274493701515,0.3129251700680272,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-19,9451
100
+ 1.0,0.6383788968475093,0.0,0.47000000000000003,0.3701822126418114,1.1033663981502588e-22,0.03567186285580209,0.3129251700680272,198,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-26,8898
101
+ 1.0,0.6501651617929107,0.0,0.51,0.34258196039636274,4.413465592601035e-23,0.0352539191388893,0.3129251700680272,199,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-03,8441
102
+ 1.0,0.6649283374522998,0.0,0.51,0.31355701111053985,1.7653862370404143e-23,0.03635010485254652,0.28571428571428575,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-10,8788
103
+ 1.0,0.6097114754591861,0.0,0.51,0.32306971094469733,7.061544948161657e-24,0.031323781730726925,0.28571428571428575,201,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-17,9569
104
+ 1.0,0.3964279757062242,0.0,0.51,0.33051520280988034,2.8246179792646632e-24,0.02719933715592967,0.28571428571428575,202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-24,9008
105
+ 1.0,0.33105364706311086,0.0,0.47000000000000003,0.3259978333423606,1.1298471917058652e-24,0.025967503042923553,0.28571428571428575,203,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-31,8495
106
+ 1.0,0.31714045716637634,0.0,0.55,0.3045528431182349,4.519388766823461e-25,0.02263128565353199,0.2653061224489796,204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-07,8807
107
+ 1.0,0.28268319082761023,0.0,0.49,0.31370309424641213,1.8077555067293845e-25,0.01786159463858867,0.2653061224489796,205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-14,8385
108
+ 1.0,0.15774740707436136,0.0,0.51,0.37945364695975814,7.231022026917538e-26,0.016409790148260033,0.2653061224489796,206,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-21,6964
109
+ 1.0,0.2836203500514554,0.0,0.55,0.36793503370466,2.892408810767015e-26,0.01882946429880776,0.2653061224489796,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-28,9340
110
+ 1.0,0.33646919882766096,0.0,0.49,0.3299836196379579,1.1569635243068062e-26,0.023555161238286576,0.272108843537415,208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-05,8632
111
+ 1.0,0.361268166630245,0.0,0.38,0.3243428164088717,4.6278540972272255e-27,0.029421037966887126,0.272108843537415,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-12,9271
112
+ 1.0,0.21850759166298056,0.0,0.51,0.34100191273497404,1.8511416388908902e-27,0.029549354020325262,0.272108843537415,210,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-19,7663
113
+ 1.0,0.2156152088113536,0.0,0.43,0.3876459690915292,7.404566555563562e-28,0.04853646375621416,0.272108843537415,211,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-26,7888
114
+ 1.0,0.4122692273972545,0.0,0.42,0.44121852053456856,2.961826622225425e-28,0.07303383144403221,0.272108843537415,212,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-02,11088
115
+ 1.0,0.5580863257308297,0.0,0.42,0.33648328199770844,1.18473064889017e-28,0.2914790808171166,0.2585034013605442,213,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-09,12850
116
+ 1.0,0.5441541455767391,0.0,0.45,0.5258301345263098,4.7389225955606806e-29,0.6228644542534939,0.2585034013605442,214,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-16,12768
117
+ 1.0,0.37953926965668333,0.0,0.51,0.6191133700101356,1.8955690382242722e-29,1.0,0.2585034013605442,215,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2022-01-23,11023
118
+ 1.0,0.3422525462363791,0.0,0.5,0.6600516747429145,7.582276152897087e-30,0.8603298089190655,0.2585034013605442,216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-30,10317
119
+ 1.0,0.3679329127754763,0.0,0.49,0.6150147631969254,3.0329104611588346e-30,0.3851571321728674,0.2448979591836735,217,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-02-06,10109
120
+ 1.0,0.3530129569359208,0.0,0.49,0.5435710104633258,1.2131641844635335e-30,0.18207314748280565,0.2448979591836735,218,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-13,10233
121
+ 1.0,0.3628237688509028,0.0,0.48,0.5395383650448762,4.852656737854129e-31,0.08532284319045035,0.2448979591836735,219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-20,10660
122
+ 1.0,0.3535562124344392,0.0,0.49,0.3713089856353334,1.941062695141646e-31,0.04778123212740684,0.2448979591836735,220,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-27,9862
123
+ 1.0,0.35851767100446613,0.0,0.49,0.33021424233802193,7.764250780566529e-32,0.028365180155739026,0.2448979591836735,221,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-06,10393
124
+ 1.0,0.3648140365425708,0.0,0.53,0.29899648842829235,3.105700312226557e-32,0.019053100849085656,0.2448979591836735,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-13,9914
125
+ 1.0,0.417768904168966,0.0,0.46,0.30801461857263196,1.242280124890568e-32,0.014096435013418193,0.2448979591836735,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-20,11027
126
+ 1.0,0.45364666714531404,0.0,0.5,0.29874033139572204,4.9691204995617213e-33,0.013440190054406007,0.2448979591836735,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-03-27,10066
127
+ 1.0,0.45997433293937545,0.0,0.45,0.3080341285301519,1.9876481998241388e-33,0.014672024167412121,0.2448979591836735,225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-03,8722
128
+ 1.0,0.4245480429075594,0.0,0.46,0.304189689538618,7.950592799291056e-34,0.01936472555029256,0.2448979591836735,226,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2022-04-10,7805
129
+ 1.0,0.4463068738641009,0.0,0.54,0.307818077305473,3.1802371197109226e-34,0.027822586558343475,0.2448979591836735,227,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2022-04-17,8519
130
+ 1.0,0.6012222981571669,0.0,0.53,0.29394180576819906,1.272094847878869e-34,0.033340176856183366,0.2448979591836735,228,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-24,10084
131
+ 1.0,0.6804106164543928,0.0,0.5,0.28219281269675367,5.088379391460478e-35,0.04576117082899503,0.2448979591836735,229,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-01,10291
132
+ 1.0,0.62805714350389,0.0,0.54,0.30839694661979145,2.035351756529193e-35,0.05172603422739071,0.2448979591836735,230,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-08,9743
133
+ 1.0,0.7470007501508245,0.0,0.54,0.3120111152265925,8.141407025566787e-36,0.04952999662712088,0.2448979591836735,231,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-15,10759
134
+ 1.0,0.6460736106378411,0.0,0.55,0.2905779236460707,3.25656280967673e-36,0.06457597043598129,0.2448979591836735,232,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-22,9845
135
+ 1.0,0.5732108245519132,0.0,0.52,0.38068837954927237,1.3026251233207076e-36,0.080201199571791,0.2448979591836735,233,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-29,9499
136
+ 1.0,0.5996683384067256,0.0,0.5,0.3940488499594224,5.210500487782985e-37,0.09049581323048496,0.40680272108843546,234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-05,10021
137
+ 1.0,0.5630659455826548,0.0,0.54,0.4539755399873685,2.0842001896133483e-37,0.09128037424293528,0.40680272108843546,235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-12,10112
138
+ 1.0,0.5482324249484887,0.0,0.45,0.48814019600803654,8.336800703454939e-38,0.08289217052103649,0.40680272108843546,236,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-19,10034
139
+ 1.0,0.5485743918729864,0.0,0.47000000000000003,0.475428506654356,3.3347202263835196e-38,0.06987359035649866,0.40680272108843546,237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2022-06-26,9209
140
+ 1.0,0.5559932625646005,0.0,0.43,0.510072176038165,1.333888035554951e-38,0.06264756346145385,0.40680272108843546,238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-03,10265
141
+ 1.0,0.6089718159266746,0.0,0.45,0.44215508529036335,5.33555159223524e-39,0.0627612148230705,0.40680272108843546,239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-10,10033
142
+ 1.0,0.6101706458097598,0.0,0.48,0.41550269661979555,2.1342200869095313e-39,0.07072780865510112,0.40680272108843546,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-17,9790
143
+ 1.0,0.6111403594460636,0.0,0.44,0.437146146258812,8.536874847792479e-40,0.07964760745552932,0.40680272108843546,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-24,9629
144
+ 1.0,0.6451477728019566,0.0,0.44,0.4975101423754845,3.4147444392713438e-40,0.0893739643061401,0.40680272108843546,242,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-31,10134
145
+ 1.0,0.7267513590970145,0.0,0.44,0.5042632593424633,1.3658922758628901e-40,0.09389435556011791,0.40680272108843546,243,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-07,12029
146
+ 1.0,0.832744074444703,0.0,0.46,0.5840915039533217,5.463514104995084e-41,0.08482790984147467,0.40680272108843546,244,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-14,12886
147
+ 1.0,0.8546151893753493,0.0,0.49,0.6374603327364593,2.1853506435415578e-41,0.07962194424484169,0.40680272108843546,245,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-21,12027
148
+ 1.0,0.9999999999999998,0.0,0.55,0.6022458246191313,8.740852589601472e-42,0.07178366646624922,0.40680272108843546,246,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-28,11375
149
+ 1.0,0.860672618209781,0.0,0.48,0.5735957859704555,3.495791051275827e-42,0.05725095687114135,0.40680272108843546,247,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-04,10824
150
+ 1.0,0.8622728019659036,0.0,0.54,0.5790428094946118,1.39776643594557e-42,0.050739833702394745,0.40680272108843546,248,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-11,12285
151
+ 1.0,0.7774120906393625,0.0,0.55,0.7618650061054455,5.585565898134668e-43,0.0440857297883885,0.40680272108843546,249,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-18,12146
152
+ 1.0,0.6580209603679659,0.0,0.52,0.8137272725878776,2.2287265136062566e-43,0.039975949905412735,0.40680272108843546,250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-25,10881
153
+ 1.0,0.9480011027127861,0.0,0.52,0.7867690657367606,8.859907597948911e-44,0.03648941942485079,0.40680272108843546,251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-02,11373
154
+ 1.0,0.709096498806814,0.0,0.46,0.7292818780372798,3.4889645827034517e-44,0.04076784326377381,0.40680272108843546,252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-09,10230
155
+ 1.0,0.5414415970743589,0.0,0.45,0.6974583695681711,1.340587376605267e-44,0.04368978310920796,0.0,253,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-16,11557
156
+ 1.0,0.6081525119323576,0.0,0.54,0.6240593695822464,4.812364941659934e-45,0.041156457597043596,0.0,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-23,10805
157
+ 1.0,0.5960421531458853,0.0,0.45,0.5899287906913332,1.3749614119028383e-45,0.03843982343711047,0.0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-30,9709
158
+ 1.0,0.848521629204434,0.0,0.47000000000000003,0.6201930426013046,0.0,0.040723849188309305,0.0,256,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-11-06,10098