BlendMMM commited on
Commit
00b2c95
1 Parent(s): bb080e9

Upload 11 files

Browse files
pages/1_Data_Validation.py CHANGED
@@ -4,20 +4,18 @@ import plotly.express as px
4
  import plotly.graph_objects as go
5
  from Eda_functions import *
6
  import numpy as np
 
7
  import pickle
 
8
  from streamlit_pandas_profiling import st_profile_report
9
  import streamlit as st
10
  import streamlit.components.v1 as components
11
  import sweetviz as sv
12
- from utilities import set_header,load_local_css
13
  from st_aggrid import GridOptionsBuilder,GridUpdateMode
14
  from st_aggrid import GridOptionsBuilder
15
  from st_aggrid import AgGrid
16
  import base64
17
- import os
18
- import tempfile
19
- #from ydata_profiling import ProfileReport
20
- import re
21
 
22
  st.set_page_config(
23
  page_title="Data Validation",
@@ -30,187 +28,155 @@ set_header()
30
 
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
33
 
 
 
 
 
 
 
 
 
34
 
35
- with open('data_import.pkl', 'rb') as f:
36
- data = pickle.load(f)
37
-
38
- st.session_state['cleaned_data']= data['final_df']
39
- st.session_state['category_dict'] = data['bin_dict']
40
 
41
  st.title('Data Validation and Insights')
42
 
 
 
 
 
 
 
 
 
43
 
44
- target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response Metrics']
45
 
46
  target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
47
  st.session_state['target_column']=target_column
48
- panels=st.session_state['category_dict']['Panel Level 1'][0]
49
- selected_panels=st.multiselect('Please choose the panels you wish to analyze.If no panels are selected, insights will be derived from the overall data.',st.session_state['cleaned_data'][panels].unique())
50
- aggregation_dict = {item: 'sum' if key == 'Media' else 'mean' for key, value in st.session_state['category_dict'].items() for item in value if item not in ['date','Panel_1']}
51
 
52
- with st.expander('**Reponse Metric Analysis**'):
53
-
54
- if len(selected_panels)>0:
55
- st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'][st.session_state['cleaned_data']['Panel_1'].isin(selected_panels)]
56
-
57
- st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].groupby(by='date').agg(aggregation_dict)
58
- st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
59
- else:
60
- st.session_state['Cleaned_data_panel']=st.session_state['cleaned_data'].groupby(by='date').agg(aggregation_dict)
61
- st.session_state['Cleaned_data_panel']=st.session_state['Cleaned_data_panel'].reset_index()
62
 
 
 
63
 
64
- fig=line_plot_target(st.session_state['Cleaned_data_panel'], target=target_column, title=f'{target_column} Over Time')
65
- st.plotly_chart(fig, use_container_width=True)
66
 
 
 
67
 
68
- media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
69
- # st.write(media_channel)
70
 
71
- Non_media_variables=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Exogenous' or key=='Internal'])
72
 
 
 
73
 
74
- st.markdown('### Annual Data Summary')
75
- st.dataframe(summary(st.session_state['Cleaned_data_panel'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
76
-
77
- if st.checkbox('Show raw data'):
78
- st.write(pd.concat([pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.strftime('%m/%d/%Y'),st.session_state['Cleaned_data_panel'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
79
  col1 = st.columns(1)
80
 
81
  if "selected_feature" not in st.session_state:
82
  st.session_state['selected_feature']=None
83
 
 
 
 
 
 
 
 
 
 
84
 
85
- def generate_report_with_target(channel_data, target_feature):
86
- report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature)
87
- temp_dir = tempfile.mkdtemp()
88
- report_path = os.path.join(temp_dir, "report.html")
89
- report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
90
- return report_path
91
-
92
- def generate_profile_report(df):
93
- pr = df.profile_report()
94
- temp_dir = tempfile.mkdtemp()
95
- report_path = os.path.join(temp_dir, "report.html")
96
- pr.to_file(report_path)
97
- return report_path
98
-
99
-
100
- #st.header()
101
- with st.expander('Univariate and Bivariate Report'):
102
- eda_columns=st.columns(2)
103
- with eda_columns[0]:
104
- if st.button('Generate Profile Report',help='Univariate report which inlcudes all statistical analysis'):
105
- with st.spinner('Generating Report'):
106
- report_file = generate_profile_report(st.session_state['Cleaned_data_panel'])
107
-
108
- if os.path.exists(report_file):
109
- with open(report_file, 'rb') as f:
110
- st.success('Report Generated')
111
- st.download_button(
112
- label="Download EDA Report",
113
- data=f.read(),
114
- file_name="pandas_profiling_report.html",
115
- mime="text/html"
116
- )
117
- else:
118
- st.warning("Report generation failed. Unable to find the report file.")
119
 
120
  with eda_columns[1]:
121
- if st.button('Generate Sweetviz Report',help='Bivariate report for selected response metric'):
122
- with st.spinner('Generating Report'):
123
- report_file = generate_report_with_target(st.session_state['Cleaned_data_panel'], target_column)
124
-
125
- if os.path.exists(report_file):
126
- with open(report_file, 'rb') as f:
127
- st.success('Report Generated')
128
- st.download_button(
129
- label="Download EDA Report",
130
- data=f.read(),
131
- file_name="report.html",
132
- mime="text/html"
133
- )
134
- else:
135
- st.warning("Report generation failed. Unable to find the report file.")
136
-
137
-
138
-
139
- #st.warning('Work in Progress')
140
- with st.expander('Media Variables Analysis'):
141
- # Get the selected feature
142
- st.session_state["selected_feature"]= st.selectbox('Select media', [col for col in media_channel if 'cost' not in col.lower() and 'spend' not in col.lower()])
143
-
144
- # Filter spends features based on the selected feature
145
- spends_features = [col for col in st.session_state['Cleaned_data_panel'].columns if any(keyword in col.lower() for keyword in ['cost', 'spend'])]
146
- spends_feature = [col for col in spends_features if re.split(r'_cost|_spend', col.lower())[0] in st.session_state["selected_feature"]]
147
-
148
- if 'validation' not in st.session_state:
149
- st.session_state['validation']=[]
150
-
151
-
152
- val_variables=[col for col in media_channel if col!='date']
153
- if len(spends_feature)==0:
154
- st.warning('No spends varaible available for the selected metric in data')
155
-
156
- else:
157
- fig_row1 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
158
- st.plotly_chart(fig_row1, use_container_width=True)
159
- st.markdown('### Summary')
160
- st.dataframe(summary(st.session_state['cleaned_data'],[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
161
-
162
- cols2=st.columns(2)
163
- with cols2[0]:
164
- if st.button('Validate'):
165
- st.session_state['validation'].append(st.session_state["selected_feature"])
166
- with cols2[1]:
167
- if st.checkbox('Validate all'):
168
- st.session_state['validation'].extend(val_variables)
169
- st.success('All media variables are validated ✅')
170
-
171
- if len(set(st.session_state['validation']).intersection(val_variables))!=len(val_variables):
172
- validation_data=pd.DataFrame({'Validate':[True if col in st.session_state['validation'] else False for col in val_variables],
173
- 'Variables':val_variables
174
- })
175
- cols3=st.columns([1,30])
176
- with cols3[1]:
177
- validation_df=st.data_editor(validation_data,
178
- # column_config={
179
- # 'Validate':st.column_config.CheckboxColumn(wi)
180
-
181
- # },
182
- column_config={
183
- "Validate": st.column_config.CheckboxColumn(
184
- default=False,
185
- width=100,
186
- ),
187
- 'Variables':st.column_config.TextColumn(
188
- width=1000
189
-
190
- )
191
- },hide_index=True)
192
-
193
- selected_rows = validation_df[validation_df['Validate']==True]['Variables']
194
-
195
- #st.write(selected_rows)
196
-
197
- st.session_state['validation'].extend(selected_rows)
198
-
199
- not_validated_variables = [col for col in val_variables if col not in st.session_state["validation"]]
200
- if not_validated_variables:
201
- not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
202
- st.warning(not_validated_message)
203
-
204
-
205
-
206
- with st.expander('Non Media Variables Analysis'):
207
- selected_columns_row4 = st.selectbox('Select Channel',Non_media_variables,index=1)
208
- # # Create the dual-axis line plot
209
- fig_row4 = line_plot(st.session_state['Cleaned_data_panel'], x_col='date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
210
  st.plotly_chart(fig_row4, use_container_width=True)
211
  selected_non_media=selected_columns_row4
212
- sum_df = st.session_state['Cleaned_data_panel'][['date', selected_non_media,target_column]]
213
- sum_df['Year']=pd.to_datetime(st.session_state['Cleaned_data_panel']['date']).dt.year
214
  #st.dataframe(df)
215
  #st.dataframe(sum_df.head(2))
216
  sum_df=sum_df.groupby('Year').agg('sum')
@@ -218,34 +184,58 @@ with st.expander('Non Media Variables Analysis'):
218
  sum_df=sum_df.applymap(format_numbers)
219
  sum_df.fillna('-',inplace=True)
220
  sum_df=sum_df.replace({"0.0":'-','nan':'-'})
221
- st.markdown('### Summary')
222
  st.dataframe(sum_df,use_container_width=True)
223
 
224
-
225
- with st.expander('Correlation Analysis'):
226
- options = list(st.session_state['Cleaned_data_panel'].select_dtypes(np.number).columns)
227
-
228
- # selected_options = []
229
- # num_columns = 4
230
- # num_rows = -(-len(options) // num_columns) # Ceiling division to calculate rows
231
-
232
- # # Create a grid of checkboxes
233
- # st.header('Select Features for Correlation Plot')
234
- # tick=False
235
- # if st.checkbox('Select all'):
236
- # tick=True
237
- # selected_options = []
238
- # for row in range(num_rows):
239
- # cols = st.columns(num_columns)
240
- # for col in cols:
241
- # if options:
242
- # option = options.pop(0)
243
- # selected = col.checkbox(option,value=tick)
244
- # if selected:
245
- # selected_options.append(option)
246
- # # Display selected options
247
-
248
- selected_options=st.multiselect('Select Variables For correlation plot',[var for var in options if var!= target_column],default=options[3])
249
-
250
- st.pyplot(correlation_plot(st.session_state['Cleaned_data_panel'],selected_options,target_column))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
 
4
  import plotly.graph_objects as go
5
  from Eda_functions import *
6
  import numpy as np
7
+ import re
8
  import pickle
9
+ from ydata_profiling import ProfileReport
10
  from streamlit_pandas_profiling import st_profile_report
11
  import streamlit as st
12
  import streamlit.components.v1 as components
13
  import sweetviz as sv
14
+ from utilities import set_header,initialize_data,load_local_css
15
  from st_aggrid import GridOptionsBuilder,GridUpdateMode
16
  from st_aggrid import GridOptionsBuilder
17
  from st_aggrid import AgGrid
18
  import base64
 
 
 
 
19
 
20
  st.set_page_config(
21
  page_title="Data Validation",
 
28
 
29
 
30
 
31
+ #preprocessing
32
+ # with open('Categorised_data.pkl', 'rb') as file:
33
+ # Categorised_data = pickle.load(file)
34
+ # with open("edited_dataframe.pkl", 'rb') as file:
35
+
36
+
37
+ # df = pickle.load(file)
38
+ # date=df.index
39
+ # df.reset_index(inplace=True)
40
+ # df['Date'] = pd.to_datetime(date)
41
+
42
 
43
+ #prospects=pd.read_excel('EDA_Data.xlsx',sheet_name='Prospects')
44
+ #spends=pd.read_excel('EDA_Data.xlsx',sheet_name='SPEND INPUT')
45
+ #spends.columns=['Week','Streaming (Spends)','TV (Spends)','Search (Spends)','Digital (Spends)']
46
+ #df=pd.concat([df,spends],axis=1)
47
+
48
+ #df['Date'] =pd.to_datetime(df['Date']).dt.strftime('%m/%d/%Y')
49
+ #df['Prospects']=prospects['Prospects']
50
+ #df.drop(['Week'],axis=1,inplace=True)
51
 
 
 
 
 
 
52
 
53
  st.title('Data Validation and Insights')
54
 
55
+ with open("Pickle_files/main_df",'rb') as f:
56
+ st.session_state['cleaned_data']= pickle.load(f)
57
+ with open("Pickle_files/category_dict",'rb') as c:
58
+ st.session_state['category_dict']=pickle.load(c)
59
+
60
+ # st.write(st.session_state['cleaned_data'])
61
+
62
+ target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response_Metric']
63
 
 
64
 
65
  target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
66
  st.session_state['target_column']=target_column
 
 
 
67
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ fig=line_plot_target(st.session_state['cleaned_data'], target=target_column, title=f'{target_column} Over Time')
70
+ st.plotly_chart(fig, use_container_width=True)
71
 
 
 
72
 
73
+ media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
74
+ # st.write(media_channel)
75
 
76
+ Non_media_channel=[col for col in st.session_state['cleaned_data'].columns if col not in media_channel]
 
77
 
 
78
 
79
+ st.markdown('### Annual Data Summary')
80
+ st.dataframe(summary(st.session_state['cleaned_data'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
81
 
82
+ if st.checkbox('Show raw data'):
83
+ st.write(pd.concat([pd.to_datetime(st.session_state['cleaned_data']['Date']).dt.strftime('%m/%d/%Y'),st.session_state['cleaned_data'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
 
 
 
84
  col1 = st.columns(1)
85
 
86
  if "selected_feature" not in st.session_state:
87
  st.session_state['selected_feature']=None
88
 
89
+ st.header('1. Media Channels')
90
+
91
+ if 'Validation' not in st.session_state:
92
+ st.session_state['Validation']=[]
93
+
94
+ eda_columns=st.columns(2)
95
+ with eda_columns[0]:
96
+ if st.button('Generate Profile Report'):
97
+ pr = st.session_state['cleaned_data'].profile_report()
98
 
99
+ pr.to_file("Profile_Report.html")
100
+
101
+ with open("Profile_Report.html", "rb") as f:
102
+ profile_report_html = f.read()
103
+ b64 = base64.b64encode(profile_report_html).decode()
104
+ href = f'<a href="data:text/html;base64,{b64}" download="Profile_Report.html">Download Profile Report</a>'
105
+ st.markdown(href, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  with eda_columns[1]:
108
+ if st.button('Generate Sweetviz Report'):
109
+
110
+ def generate_report_with_target(df, target_feature):
111
+ report = sv.analyze([df, "Dataset"], target_feat=target_feature)
112
+ return report
113
+
114
+ report = generate_report_with_target(st.session_state['cleaned_data'], target_feature=target_column)
115
+ report.show_html()
116
+
117
+
118
+ selected_media = st.selectbox('Select media', np.unique([Categorised_data[col]['VB'] for col in media_channel]))
119
+ # selected_feature=st.multiselect('Select Metric', df.columns[df.columns.str.contains(selected_media,case=False)])
120
+ st.session_state["selected_feature"]=st.selectbox('Select Metric',[col for col in media_channel if Categorised_data[col]['VB'] in selected_media ] )
121
+ spends_features=[col for col in df.columns if 'spends' in col.lower() or 'cost' in col.lower()]
122
+ spends_feature=[col for col in spends_features if col.split('_')[0] in st.session_state["selected_feature"].split('_')[0]]
123
+ #st.write(spends_features)
124
+ #st.write(spends_feature)
125
+ #st.write(selected_feature)
126
+
127
+
128
+ val_variables=[col for col in media_channel if col!='Date']
129
+ if len(spends_feature)==0:
130
+ st.warning('No spends varaible available for the selected metric in data')
131
+
132
+ else:
133
+ st.write(f'Selected spends variable {spends_feature[0]} if wrong please name the varaibles properly')
134
+ # Create the dual-axis line plot
135
+ fig_row1 = line_plot(df, x_col='Date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
136
+ st.plotly_chart(fig_row1, use_container_width=True)
137
+ st.markdown('### Annual Data Summary')
138
+ st.dataframe(summary(df,[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
139
+ if st.button('Validate'):
140
+ st.session_state['Validation'].append(st.session_state["selected_feature"])
141
+
142
+ if st.checkbox('Validate all'):
143
+ st.session_state['Validation'].extend(val_variables)
144
+ st.success('All media variables are validated ')
145
+ if len(set(st.session_state['Validation']).intersection(val_variables))!=len(val_variables):
146
+ #st.write(st.session_state['Validation'])
147
+ validation_data=pd.DataFrame({'Variables':val_variables,
148
+ 'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
149
+ 'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
150
+ gd=GridOptionsBuilder.from_dataframe(validation_data)
151
+ gd.configure_pagination(enabled=True)
152
+ gd.configure_selection(use_checkbox=True,selection_mode='multiple')
153
+ #gd.configure_selection_toggle_all(None, show_toggle_all=True)
154
+ #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
155
+ gridoptions=gd.build()
156
+ #st.text(st.session_state['Validation'])
157
+ table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
158
+ #st.table(table)
159
+ selected_rows = table["selected_rows"]
160
+ st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
161
+ not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
162
+ if not_validated_variables:
163
+ not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
164
+ st.warning(not_validated_message)
165
+
166
+
167
+
168
+ st.header('2. Non Media Variables')
169
+ selected_columns_row = [col for col in df.columns if ("imp" not in col.lower()) and ('cli' not in col.lower() ) and ('spend' not in col.lower()) and col!='Date']
170
+ selected_columns_row4 = st.selectbox('Select Channel',selected_columns_row )
171
+ if not selected_columns_row4:
172
+ st.warning('Please select at least one.')
173
+ else:
174
+ # Create the dual-axis line plot
175
+ fig_row4 = line_plot(df, x_col='Date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  st.plotly_chart(fig_row4, use_container_width=True)
177
  selected_non_media=selected_columns_row4
178
+ sum_df = df[['Date', selected_non_media,target_column]]
179
+ sum_df['Year']=pd.to_datetime(df['Date']).dt.year
180
  #st.dataframe(df)
181
  #st.dataframe(sum_df.head(2))
182
  sum_df=sum_df.groupby('Year').agg('sum')
 
184
  sum_df=sum_df.applymap(format_numbers)
185
  sum_df.fillna('-',inplace=True)
186
  sum_df=sum_df.replace({"0.0":'-','nan':'-'})
187
+ st.markdown('### Annual Data Summary')
188
  st.dataframe(sum_df,use_container_width=True)
189
 
190
+ # if st.checkbox('Validate',key='2'):
191
+ # st.session_state['Validation'].append(selected_columns_row4)
192
+ # val_variables=[col for col in media_channel if col!='Date']
193
+ # if st.checkbox('Validate all'):
194
+ # st.session_state['Validation'].extend(val_variables)
195
+ # validation_data=pd.DataFrame({'Variables':val_variables,
196
+ # 'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
197
+ # 'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
198
+ # gd=GridOptionsBuilder.from_dataframe(validation_data)
199
+ # gd.configure_pagination(enabled=True)
200
+ # gd.configure_selection(use_checkbox=True,selection_mode='multiple')
201
+ # #gd.configure_selection_toggle_all(None, show_toggle_all=True)
202
+ # #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
203
+ # gridoptions=gd.build()
204
+ # #st.text(st.session_state['Validation'])
205
+ # table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
206
+ # #st.table(table)
207
+ # selected_rows = table["selected_rows"]
208
+ # st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
209
+ # not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
210
+ # if not_validated_variables:
211
+ # not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
212
+ # st.warning(not_validated_message)
213
+
214
+ options = list(df.select_dtypes(np.number).columns)
215
+ st.markdown(' ')
216
+ st.markdown(' ')
217
+ st.markdown('# Exploratory Data Analysis')
218
+ st.markdown(' ')
219
+
220
+ selected_options = []
221
+ num_columns = 4
222
+ num_rows = -(-len(options) // num_columns) # Ceiling division to calculate rows
223
+
224
+ # Create a grid of checkboxes
225
+ st.header('Select Features for Correlation Plot')
226
+ tick=False
227
+ if st.checkbox('Select all'):
228
+ tick=True
229
+ selected_options = []
230
+ for row in range(num_rows):
231
+ cols = st.columns(num_columns)
232
+ for col in cols:
233
+ if options:
234
+ option = options.pop(0)
235
+ selected = col.checkbox(option,value=tick)
236
+ if selected:
237
+ selected_options.append(option)
238
+ # Display selected options
239
+ #st.write('You selected:', selected_options)
240
+ st.pyplot(correlation_plot(df,selected_options,target_column))
241
 
pages/2_Transformations_with_panel.py ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ date :
4
+ additions : adding more variables to session state for saved model : random effect, predicted train & test
5
+ '''
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from Eda_functions import format_numbers
12
+ import numpy as np
13
+ import pickle
14
+ from st_aggrid import AgGrid
15
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
16
+ from utilities import set_header,load_local_css
17
+ from st_aggrid import GridOptionsBuilder
18
+ import time
19
+ import itertools
20
+ import statsmodels.api as sm
21
+ import numpy as npc
22
+ import re
23
+ import itertools
24
+ from sklearn.metrics import mean_absolute_error, r2_score,mean_absolute_percentage_error
25
+ from sklearn.preprocessing import MinMaxScaler
26
+ import os
27
+ import matplotlib.pyplot as plt
28
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
29
+ st.set_option('deprecation.showPyplotGlobalUse', False)
30
+ import statsmodels.api as sm
31
+ import statsmodels.formula.api as smf
32
+
33
+ from datetime import datetime
34
+ import seaborn as sns
35
+ from Data_prep_functions import *
36
+
37
+
38
+ def get_random_effects(media_data, panel_col, mdf):
39
+ random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
40
+
41
+ for i, market in enumerate(media_data[panel_col].unique()):
42
+ print(i, end='\r')
43
+ intercept = mdf.random_effects[market].values[0]
44
+ random_eff_df.loc[i, 'random_effect'] = intercept
45
+ random_eff_df.loc[i, panel_col] = market
46
+
47
+ return random_eff_df
48
+
49
+
50
+ def mdf_predict(X_df, mdf, random_eff_df) :
51
+ X=X_df.copy()
52
+ X['fixed_effect'] = mdf.predict(X)
53
+ X=pd.merge(X, random_eff_df, on=panel_col, how='left')
54
+ X['pred'] = X['fixed_effect'] + X['random_effect']
55
+ # X.to_csv('Test/megred_df.csv',index=False)
56
+ X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
57
+ return X['pred']
58
+
59
+ st.set_page_config(
60
+ page_title="Model Build",
61
+ page_icon=":shark:",
62
+ layout="wide",
63
+ initial_sidebar_state='collapsed'
64
+ )
65
+
66
+ load_local_css('styles.css')
67
+ set_header()
68
+
69
+
70
+ st.title('1. Build Your Model')
71
+
72
+ # set the panel column
73
+ date_col = 'date'
74
+
75
+
76
+ media_data=pd.read_csv(r'upf_data_converted.csv')
77
+ # with open("Pickle_files/main_df",'rb') as f:
78
+ # media_data= pickle.load(f)
79
+
80
+
81
+ media_data.columns=[i.lower().strip().replace(' ','_').replace('-','').replace(':','').replace("__", "_") for i in media_data.columns]
82
+ #st.write(media_data.columns)
83
+ #media_data.drop(['indicacao_impressions','infleux_impressions','influencer_impressions'],axis=1,inplace=True)
84
+ target_col = 'total_approved_accounts_revenue'
85
+ # st.write(media_data.columns)
86
+ media_data.sort_values(date_col, inplace=True)
87
+ media_data.reset_index(drop=True,inplace=True)
88
+
89
+ date=media_data[date_col]
90
+ st.session_state['date']=date
91
+ revenue=media_data[target_col]
92
+ media_data.drop([target_col],axis=1,inplace=True)
93
+ media_data.drop([date_col],axis=1,inplace=True)
94
+ media_data.reset_index(drop=True,inplace=True)
95
+
96
+
97
+ if st.toggle('Apply Transformations on DMA/Panel Level'):
98
+ dma=st.selectbox('Select the Level of data ',[ col for col in media_data.columns if col.lower() in ['dma','panel', 'markets']])
99
+ panel_col= dma
100
+
101
+ else:
102
+ #""" code to aggregate data on date """
103
+
104
+
105
+ dma=None
106
+
107
+ # dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
108
+ # st.write(dma_dict)
109
+
110
+ st.markdown('## Select the Range of Transformations')
111
+ columns = st.columns(2)
112
+ old_shape=media_data.shape
113
+
114
+
115
+ if "old_shape" not in st.session_state:
116
+ st.session_state['old_shape']=old_shape
117
+
118
+
119
+ with columns[0]:
120
+ slider_value_adstock = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1, format="%.2f")
121
+ with columns[1]:
122
+ slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3), step=1)
123
+
124
+ # with columns[2]:
125
+ # slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
126
+
127
+ # with columns[1]:
128
+ # st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
129
+ # st.number_input('Select the range of ')
130
+
131
+ # Section 1 - Transformations Functions
132
+ def lag(data,features,lags,dma=None):
133
+ if dma:
134
+
135
+ transformed_data=pd.concat([data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
136
+ transformed_data=transformed_data.fillna(method='bfill')
137
+ return pd.concat([transformed_data,data],axis=1)
138
+
139
+ else:
140
+
141
+ #''' data should be aggregated on date'''
142
+
143
+ transformed_data=pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
144
+ transformed_data=transformed_data.fillna(method='bfill')
145
+
146
+ return pd.concat([transformed_data,data],axis=1)
147
+
148
+ #adstock
149
+ def adstock(df, alphas, cutoff, features,dma=None):
150
+ # st.write(features)
151
+
152
+ if dma:
153
+ transformed_data=pd.DataFrame()
154
+ for d in df[dma].unique():
155
+ dma_sub_df = df[df[dma] == d]
156
+ n = len(dma_sub_df)
157
+
158
+
159
+ weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
160
+ X = dma_sub_df[features].to_numpy()
161
+
162
+ res = pd.DataFrame(np.hstack(weights @ X),
163
+ columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
164
+
165
+ transformed_data=pd.concat([transformed_data,res],axis=0)
166
+ transformed_data.reset_index(drop=True,inplace=True)
167
+ return pd.concat([transformed_data,df],axis=1)
168
+
169
+ else:
170
+
171
+ n = len(df)
172
+
173
+
174
+ weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
175
+
176
+ X = df[features].to_numpy()
177
+ res = pd.DataFrame(np.hstack(weights @ X),
178
+ columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
179
+ return pd.concat([res,df],axis=1)
180
+
181
+
182
+
183
+
184
+ # Section 2 - Begin Transformations
185
+
186
+ if 'media_data' not in st.session_state:
187
+
188
+ st.session_state['media_data']=pd.DataFrame()
189
+
190
+ # Sprint3 additions
191
+ if 'random_effects' not in st.session_state:
192
+ st.session_state['random_effects']=pd.DataFrame()
193
+ if 'pred_train' not in st.session_state:
194
+ st.session_state['pred_train'] = []
195
+ if 'pred_test' not in st.session_state:
196
+ st.session_state['pred_test'] = []
197
+ # end of Sprint3 additions
198
+
199
+ # variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
200
+ variables_to_be_transformed=[col for col in media_data.columns if '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
201
+ # st.write(variables_to_be_transformed)
202
+ # st.write(media_data[variables_to_be_transformed].dtypes)
203
+
204
+ with columns[0]:
205
+ if st.button('Apply Transformations'):
206
+ with st.spinner('Applying Transformations'):
207
+ transformed_data_lag=lag(media_data,features=variables_to_be_transformed,lags=np.arange(slider_value_lag[0],slider_value_lag[1]+1,1),dma=dma)
208
+
209
+ # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
210
+ variables_to_be_transformed = [col for col in media_data.columns if
211
+ '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
212
+
213
+ transformed_data_adstock=adstock(df=transformed_data_lag, alphas=np.arange(slider_value_adstock[0],slider_value_adstock[1],0.1), cutoff=8, features=variables_to_be_transformed,dma=dma)
214
+
215
+ # st.success('Done')
216
+ st.success("Transformations complete!")
217
+
218
+ st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
219
+ # st.write(media_data.head(10))
220
+ # st.write(transformed_data_adstock.head(10))
221
+
222
+ transformed_data_adstock.columns = [c.replace(".","_") for c in transformed_data_adstock.columns] # srishti
223
+ # st.write(transformed_data_adstock.columns)
224
+ st.session_state['media_data']=transformed_data_adstock # srishti
225
+
226
+ # with st.spinner('Applying Transformations'):
227
+ # time.sleep(2)
228
+ # st.success("Transformations complete!")
229
+
230
+ # if st.session_state['media_data'].shape[1]>old_shape[1]:
231
+ # with columns[0]:
232
+ # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
233
+ #st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
234
+
235
+ # Section 3 - Create combinations
236
+
237
+ # bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
238
+ # ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
239
+ # ' GA App: Will And Cid Pequena Baixo Risco Clicks',
240
+ # 'digital_tactic_others',"programmatic"
241
+ # ]
242
+
243
+ # srishti - bucket names changed
244
+ bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','fb_level_achieved_tier_2',
245
+ 'fb_level_achieved_tier_1','paid_social_others',
246
+ 'ga_app',
247
+ 'digital_tactic_others',"programmatic"
248
+ ]
249
+
250
+ with columns[1]:
251
+ if st.button('Create Combinations of Variables'):
252
+
253
+ top_3_correlated_features=[]
254
+ # for col in st.session_state['media_data'].columns[:19]:
255
+ original_cols = [c for c in st.session_state['media_data'].columns if "_clicks" in c.lower() or "_impressions" in c.lower()]
256
+ original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
257
+ # st.write(original_cols)
258
+
259
+ # for col in st.session_state['media_data'].columns[:19]:
260
+ for col in original_cols: # srishti - new
261
+ corr_df=pd.concat([st.session_state['media_data'].filter(regex=col),
262
+ revenue],axis=1).corr()[target_col].iloc[:-1]
263
+ top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
264
+ # st.write(col, top_3_correlated_features)
265
+ flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
266
+ # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
267
+ all_features_set={var:[col for col in flattened_list if var in col] for var in bucket if len([col for col in flattened_list if var in col])>0} # srishti
268
+
269
+ channels_all=[values for values in all_features_set.values()]
270
+ # st.write(channels_all)
271
+ st.session_state['combinations'] = list(itertools.product(*channels_all))
272
+ # if 'combinations' not in st.session_state:
273
+ # st.session_state['combinations']=combinations_all
274
+
275
+ st.session_state['final_selection']=st.session_state['combinations']
276
+ st.success('Done')
277
+ # st.write(f"{len(st.session_state['combinations'])} combinations created")
278
+
279
+
280
+ revenue.reset_index(drop=True,inplace=True)
281
+ if 'Model_results' not in st.session_state:
282
+ st.session_state['Model_results']={'Model_object':[],
283
+ 'Model_iteration':[],
284
+ 'Feature_set':[],
285
+ 'MAPE':[],
286
+ 'R2':[],
287
+ 'ADJR2':[]
288
+ }
289
+
290
+ def reset_model_result_dct():
291
+ st.session_state['Model_results']={'Model_object':[],
292
+ 'Model_iteration':[],
293
+ 'Feature_set':[],
294
+ 'MAPE':[],
295
+ 'R2':[],
296
+ 'ADJR2':[]
297
+ }
298
+
299
+ # if st.button('Build Model'):
300
+ if 'iterations' not in st.session_state:
301
+ st.session_state['iterations']=0
302
+ # st.write("1",st.session_state["final_selection"])
303
+
304
+ if 'final_selection' not in st.session_state:
305
+ st.session_state['final_selection']=False
306
+
307
+ save_path = r"Model/"
308
+ with columns[1]:
309
+ if st.session_state['final_selection']:
310
+ st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
311
+
312
+
313
+ if st.checkbox('Build all iterations'):
314
+ iterations=len(st.session_state['final_selection'])
315
+ else:
316
+ iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100, value=st.session_state['iterations'],on_change=reset_model_result_dct)
317
+ # st.write("iterations=", iterations)
318
+
319
+ if st.button('Build Model',on_click=reset_model_result_dct):
320
+ st.session_state['iterations']=iterations
321
+ # st.write("2",st.session_state["final_selection"])
322
+
323
+ # Section 4 - Model
324
+
325
+ st.session_state['media_data']=st.session_state['media_data'].fillna(method='ffill')
326
+ st.markdown(
327
+ 'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
328
+ progress_bar = st.progress(0) # Initialize the progress bar
329
+ # time_remaining_text = st.empty() # Create an empty space for time remaining text
330
+ start_time = time.time() # Record the start time
331
+ progress_text = st.empty()
332
+ # time_elapsed_text = st.empty()
333
+ # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
334
+ # st.write(st.session_state["final_selection"])
335
+ # for i, selected_features in enumerate(st.session_state["final_selection"]):
336
+ for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]): # srishti
337
+ df = st.session_state['media_data']
338
+
339
+ fet = [var for var in selected_features if len(var) > 0]
340
+ inp_vars_str = " + ".join(fet) # new
341
+
342
+
343
+ X = df[fet]
344
+ y = revenue
345
+ ss = MinMaxScaler()
346
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
347
+ # X = sm.add_constant(X)
348
+
349
+ X['total_approved_accounts_revenue'] = revenue # Sprint2
350
+ X[panel_col] = df[panel_col] # Sprint2
351
+
352
+
353
+
354
+ X_train=X.iloc[:8000]
355
+ X_test=X.iloc[8000:]
356
+ y_train=y.iloc[:8000]
357
+ y_test=y.iloc[8000:]
358
+
359
+
360
+
361
+ md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
362
+ data=X_train[['total_approved_accounts_revenue'] + fet],
363
+ groups=X_train[panel_col])
364
+ mdf = md.fit()
365
+ predicted_values = mdf.fittedvalues
366
+
367
+ # st.write(fet)
368
+ # positive_coeff=fet
369
+ # negetive_coeff=[]
370
+
371
+ coefficients = mdf.fe_params.to_dict()
372
+ model_possitive = [col for col in coefficients.keys() if coefficients[col] > 0]
373
+ # st.write(positive_coeff)
374
+ # st.write(model_possitive)
375
+ pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
376
+
377
+ # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
378
+ if (len(model_possitive) / len(selected_features)) > 0 and (len(pvalues) / len(selected_features)) >= 0: # srishti - changed just for testing, revert later
379
+ # predicted_values = model.predict(X_train)
380
+ mape = mean_absolute_percentage_error(y_train, predicted_values)
381
+ r2 = r2_score(y_train, predicted_values)
382
+ adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
383
+
384
+ filename = os.path.join(save_path, f"model_{i}.pkl")
385
+ with open(filename, "wb") as f:
386
+ pickle.dump(mdf, f)
387
+ # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
388
+ # model = pickle.load(file)
389
+
390
+ st.session_state['Model_results']['Model_object'].append(filename)
391
+ st.session_state['Model_results']['Model_iteration'].append(i)
392
+ st.session_state['Model_results']['Feature_set'].append(fet)
393
+ st.session_state['Model_results']['MAPE'].append(mape)
394
+ st.session_state['Model_results']['R2'].append(r2)
395
+ st.session_state['Model_results']['ADJR2'].append(adjr2)
396
+
397
+ current_time = time.time()
398
+ time_taken = current_time - start_time
399
+ time_elapsed_minutes = time_taken / 60
400
+ completed_iterations_text = f"{i + 1}/{iterations}"
401
+ progress_bar.progress((i + 1) / int(iterations))
402
+ progress_text.text(f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
403
+
404
+ st.write(f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
405
+ pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
406
+
407
+ def to_percentage(value):
408
+ return f'{value * 100:.1f}%'
409
+
410
+ ## Section 5 - Select Model
411
+ st.title('2. Select Models')
412
+ if 'tick' not in st.session_state:
413
+ st.session_state['tick']=False
414
+ if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)',value=st.session_state['tick']):
415
+ st.session_state['tick']=True
416
+ st.write('Select one model iteration to generate performance metrics for it:')
417
+ data=pd.DataFrame(st.session_state['Model_results'])
418
+ data.sort_values(by=['MAPE'],ascending=False,inplace=True)
419
+ data.drop_duplicates(subset='Model_iteration',inplace=True)
420
+ top_10=data.head(10)
421
+ top_10['Rank']=np.arange(1,len(top_10)+1,1)
422
+ top_10[['MAPE','R2','ADJR2']]=np.round(top_10[['MAPE','R2','ADJR2']],4).applymap(to_percentage)
423
+ top_10_table = top_10[['Rank','Model_iteration','MAPE','ADJR2','R2']]
424
+ #top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
425
+ gd=GridOptionsBuilder.from_dataframe(top_10_table)
426
+ gd.configure_pagination(enabled=True)
427
+ gd.configure_selection(use_checkbox=True)
428
+
429
+
430
+ gridoptions=gd.build()
431
+
432
+ table = AgGrid(top_10,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED)
433
+
434
+ selected_rows=table.selected_rows
435
+ # if st.session_state["selected_rows"] != selected_rows:
436
+ # st.session_state["build_rc_cb"] = False
437
+ st.session_state["selected_rows"] = selected_rows
438
+ if 'Model' not in st.session_state:
439
+ st.session_state['Model']={}
440
+
441
+ # Section 6 - Display Results
442
+
443
+ if len(selected_rows)>0:
444
+ st.header('2.1 Results Summary')
445
+
446
+ model_object=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Model_object']
447
+ features_set=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Feature_set']
448
+
449
+ with open(str(model_object.values[0]), 'rb') as file:
450
+ # print(file)
451
+ model = pickle.load(file)
452
+ st.write(model.summary())
453
+ st.header('2.2 Actual vs. Predicted Plot')
454
+
455
+ df=st.session_state['media_data']
456
+ X=df[features_set.values[0]]
457
+ # X = sm.add_constant(X)
458
+ y=revenue
459
+
460
+ ss = MinMaxScaler()
461
+ X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
462
+
463
+ # Sprint2 changes
464
+ X['total_approved_accounts_revenue'] = revenue # new
465
+ X[panel_col] = df[panel_col]
466
+ X[date_col]=date
467
+
468
+
469
+
470
+ X_train=X.iloc[:8000]
471
+ X_test=X.iloc[8000:].reset_index(drop=True)
472
+ y_train=y.iloc[:8000]
473
+ y_test=y.iloc[8000:].reset_index(drop=True)
474
+
475
+
476
+ random_eff_df = get_random_effects(media_data, panel_col, model)
477
+ train_pred = model.fittedvalues
478
+ test_pred = mdf_predict(X_test, model, random_eff_df)
479
+ print("__"*20, test_pred.isna().sum())
480
+
481
+ # save x test to test - srishti
482
+ x_test_to_save = X_test.copy()
483
+ x_test_to_save['Actuals'] = y_test
484
+ x_test_to_save['Predictions'] = test_pred
485
+
486
+ x_train_to_save=X_train.copy()
487
+ x_train_to_save['Actuals'] = y_train
488
+ x_train_to_save['Predictions'] = train_pred
489
+
490
+ x_train_to_save.to_csv('Test/x_train_to_save.csv',index=False)
491
+ x_test_to_save.to_csv('Test/x_test_to_save.csv',index=False)
492
+
493
+ st.session_state['X']=X_train
494
+ st.session_state['features_set']=features_set.values[0]
495
+ print("**"*20,"selected model features : ",features_set.values[0])
496
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train[date_col], y_train, train_pred, model,target_column='Revenue',is_panel=True) # Sprint2
497
+
498
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
499
+
500
+
501
+
502
+ st.markdown('## 2.3 Residual Analysis')
503
+ columns=st.columns(2)
504
+ with columns[0]:
505
+ fig=plot_residual_predicted(y_train,train_pred,X_train) # Sprint2
506
+ st.plotly_chart(fig)
507
+
508
+ with columns[1]:
509
+ st.empty()
510
+ fig = qqplot(y_train,train_pred) # Sprint2
511
+ st.plotly_chart(fig)
512
+
513
+ with columns[0]:
514
+ fig=residual_distribution(y_train,train_pred) # Sprint2
515
+ st.pyplot(fig)
516
+
517
+
518
+
519
+ vif_data = pd.DataFrame()
520
+ # X=X.drop('const',axis=1)
521
+ X_train_with_panels = X_train.copy() # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
522
+ X_train.drop(columns=[target_col, panel_col, date_col], inplace=True) # Sprint2
523
+ vif_data["Variable"] = X_train.columns
524
+ vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
525
+ vif_data.sort_values(by=['VIF'],ascending=False,inplace=True)
526
+ vif_data=np.round(vif_data)
527
+ vif_data['VIF']=vif_data['VIF'].astype(float)
528
+ st.header('2.4 Variance Inflation Factor (VIF)')
529
+ #st.dataframe(vif_data)
530
+ color_mapping = {
531
+ 'darkgreen': (vif_data['VIF'] < 3),
532
+ 'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
533
+ 'darkred': (vif_data['VIF'] > 10)
534
+ }
535
+
536
+ # Create a horizontal bar plot
537
+ fig, ax = plt.subplots()
538
+ fig.set_figwidth(10) # Adjust the width of the figure as needed
539
+
540
+ # Sort the bars by descending VIF values
541
+ vif_data = vif_data.sort_values(by='VIF', ascending=False)
542
+
543
+ # Iterate through the color mapping and plot bars with corresponding colors
544
+ for color, condition in color_mapping.items():
545
+ subset = vif_data[condition]
546
+ bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
547
+
548
+ # Add text annotations on top of the bars
549
+ for bar in bars:
550
+ width = bar.get_width()
551
+ ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
552
+ textcoords='offset points', va='center')
553
+
554
+ # Customize the plot
555
+ ax.set_xlabel('VIF Values')
556
+ #ax.set_title('2.4 Variance Inflation Factor (VIF)')
557
+ #ax.legend(loc='upper right')
558
+
559
+ # Display the plot in Streamlit
560
+ st.pyplot(fig)
561
+
562
+
563
+
564
+ with st.expander('Results Summary Test data'):
565
+ # ss = MinMaxScaler()
566
+ # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
567
+ st.header('2.2 Actual vs. Predicted Plot')
568
+
569
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_test[date_col], y_test, test_pred, model,target_column='Revenue',is_panel=True) # Sprint2
570
+
571
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
572
+
573
+ st.markdown('## 2.3 Residual Analysis')
574
+ columns=st.columns(2)
575
+ with columns[0]:
576
+ fig=plot_residual_predicted(revenue,test_pred,X_test) # Sprint2
577
+ st.plotly_chart(fig)
578
+
579
+ with columns[1]:
580
+ st.empty()
581
+ fig = qqplot(revenue,test_pred) # Sprint2
582
+ st.plotly_chart(fig)
583
+
584
+ with columns[0]:
585
+ fig=residual_distribution(revenue,test_pred) # Sprint2
586
+ st.pyplot(fig)
587
+
588
+ value=False
589
+ if st.checkbox('Save this model to tune',key='build_rc_cb'):
590
+ mod_name=st.text_input('Enter model name')
591
+ if len(mod_name)>0:
592
+ st.session_state['Model'][mod_name]={"Model_object":model,'feature_set':st.session_state['features_set'],'X_train':X_train_with_panels}
593
+ st.session_state['X_train']=X_train_with_panels
594
+ st.session_state['X_test']=X_test
595
+ st.session_state['y_train']=y_train
596
+ st.session_state['y_test']=y_test
597
+
598
+ # Sprint3 additions
599
+ random_eff_df= get_random_effects(media_data, panel_col, model)
600
+ st.session_state['random_effects']=random_eff_df
601
+
602
+ st.session_state['pred_train']=model.fittedvalues
603
+ st.session_state['pred_test']=mdf_predict(X_test, model, random_eff_df)
604
+ # End of Sprint3 additions
605
+
606
+ with open("best_models.pkl", "wb") as f:
607
+ pickle.dump(st.session_state['Model'], f)
608
+ st.success('Model saved! Proceed to the next page to tune the model')
609
+ value=False
610
+
611
+ # st.write(st.session_state['Model'][mod_name]['X_train'].columns)
612
+ # st.write(st.session_state['X_test'].columns)
pages/3_Model_Tuning_with_panel.py ADDED
@@ -0,0 +1,437 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
+ MMO Build Sprint 3
3
+ date :
4
+ changes : capability to tune MixedLM as well as simple LR in the same page
5
+ '''
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ from Eda_functions import format_numbers
10
+ import pickle
11
+ from utilities import set_header,load_local_css
12
+ import statsmodels.api as sm
13
+ import re
14
+ from sklearn.preprocessing import MinMaxScaler
15
+ import matplotlib.pyplot as plt
16
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
17
+ st.set_option('deprecation.showPyplotGlobalUse', False)
18
+ import statsmodels.formula.api as smf
19
+ from Data_prep_functions import *
20
+
21
+ for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features"] :
22
+ if i not in st.session_state :
23
+ st.session_state[i] = None
24
+
25
+ st.set_page_config(
26
+ page_title="Model Tuning",
27
+ page_icon=":shark:",
28
+ layout="wide",
29
+ initial_sidebar_state='collapsed'
30
+ )
31
+ load_local_css('styles.css')
32
+ set_header()
33
+
34
+ # Sprint3
35
+ is_panel= True
36
+ panel_col= 'dma' # set the panel column
37
+ date_col = 'date'
38
+ target_col = 'total_approved_accounts_revenue'
39
+
40
+ st.title('1. Model Tuning')
41
+
42
+
43
+ if "X_train" not in st.session_state:
44
+ st.error(
45
+ "Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
46
+ st.stop()
47
+ X_train=st.session_state['X_train']
48
+ X_test=st.session_state['X_test']
49
+ y_train=st.session_state['y_train']
50
+ y_test=st.session_state['y_test']
51
+ df=st.session_state['media_data']
52
+
53
+ # st.write(X_train.columns)
54
+ # st.write(X_test.columns)
55
+
56
+ with open("best_models.pkl", 'rb') as file:
57
+ model_dict= pickle.load(file)
58
+
59
+ if 'selected_model' not in st.session_state:
60
+ st.session_state['selected_model']=0
61
+
62
+ # st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
63
+
64
+ st.markdown('### 1.1 Event Flags')
65
+ st.markdown('Helps in quantifying the impact of specific occurrences of events')
66
+ with st.expander('Apply Event Flags'):
67
+ st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
68
+ model =model_dict[st.session_state["selected_model"]]['Model_object']
69
+ date=st.session_state['date']
70
+ date=pd.to_datetime(date)
71
+ X_train =model_dict[st.session_state["selected_model"]]['X_train']
72
+
73
+ features_set= model_dict[st.session_state["selected_model"]]['feature_set']
74
+
75
+ col=st.columns(3)
76
+ min_date=min(date)
77
+ max_date=max(date)
78
+ with col[0]:
79
+ start_date=st.date_input('Select Start Date',min_date,min_value=min_date,max_value=max_date)
80
+ with col[1]:
81
+ end_date=st.date_input('Select End Date',max_date,min_value=min_date,max_value=max_date)
82
+ with col[2]:
83
+ repeat=st.selectbox('Repeat Annually',['Yes','No'],index=1)
84
+ if repeat =='Yes':
85
+ repeat=True
86
+ else:
87
+ repeat=False
88
+ # X_train=sm.add_constant(X_train)
89
+
90
+ if 'Flags' not in st.session_state:
91
+ st.session_state['Flags']={}
92
+ # print("**"*50)
93
+ # print(y_train)
94
+ # print("**"*50)
95
+ # print(model.fittedvalues)
96
+ if is_panel : # Sprint3
97
+ met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
98
+ model.fittedvalues, model,
99
+ target_column='Revenue',
100
+ flag=(start_date, end_date),
101
+ repeat_all_years=repeat, is_panel=True)
102
+ st.plotly_chart(fig_flag, use_container_width=True)
103
+
104
+ # create flag on test
105
+ met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
106
+ st.session_state['pred_test'], model,
107
+ target_column='Revenue',
108
+ flag=(start_date, end_date),
109
+ repeat_all_years=repeat, is_panel=True)
110
+
111
+ else :
112
+ met,line_values,fig_flag=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,flag=(start_date,end_date),repeat_all_years=repeat)
113
+ st.plotly_chart(fig_flag,use_container_width=True)
114
+
115
+ met,test_line_values,fig_flag=plot_actual_vs_predicted(date[150:], y_test, model.predict(X_test), model,flag=(start_date,end_date),repeat_all_years=repeat)
116
+
117
+
118
+ flag_name='f1'
119
+ flag_name=st.text_input('Enter Flag Name')
120
+ if st.button('Update flag'):
121
+ st.session_state['Flags'][flag_name]= {}
122
+ st.session_state['Flags'][flag_name]['train']=line_values
123
+ st.session_state['Flags'][flag_name]['test']=test_line_values
124
+ # st.write(st.session_state['Flags'][flag_name])
125
+ st.success(f'{flag_name} stored')
126
+
127
+ options=list(st.session_state['Flags'].keys())
128
+ selected_options = []
129
+ num_columns = 4
130
+ num_rows = -(-len(options) // num_columns)
131
+
132
+
133
+ tick=False
134
+ if st.checkbox('Select all'):
135
+ tick=True
136
+ selected_options = []
137
+ for row in range(num_rows):
138
+ cols = st.columns(num_columns)
139
+ for col in cols:
140
+ if options:
141
+ option = options.pop(0)
142
+ selected = col.checkbox(option,value=tick)
143
+ if selected:
144
+ selected_options.append(option)
145
+
146
+ st.markdown('### 1.2 Select Parameters to Apply')
147
+ parameters=st.columns(3)
148
+ with parameters[0]:
149
+ Trend=st.checkbox("**Trend**")
150
+ st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
151
+ with parameters[1]:
152
+ week_number=st.checkbox('**Week_number**')
153
+ st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
154
+ with parameters[2]:
155
+ sine_cosine=st.checkbox('**Sine and Cosine Waves**')
156
+ st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
157
+
158
+ if st.button('Build model with Selected Parameters and Flags'):
159
+ st.header('2.1 Results Summary')
160
+ # date=list(df.index)
161
+ # df = df.reset_index(drop=True)
162
+ # st.write(df.head(2))
163
+ # X_train=df[features_set]
164
+ ss = MinMaxScaler()
165
+ if is_panel == True :
166
+ X = X_train[features_set]
167
+ X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
168
+ X_train_tuned[target_col] = X_train[target_col]
169
+ X_train_tuned[date_col] = X_train[date_col]
170
+ X_train_tuned[panel_col] = X_train[panel_col]
171
+
172
+ X = X_test[features_set]
173
+ X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
174
+ X_test_tuned[target_col] = X_test[target_col]
175
+ X_test_tuned[date_col] = X_test[date_col]
176
+ X_test_tuned[panel_col] = X_test[panel_col]
177
+
178
+ else :
179
+ X_train_tuned = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.columns)
180
+ X_train_tuned = sm.add_constant(X_train_tuned)
181
+
182
+ X_test_tuned = pd.DataFrame(ss.transform(X_test), columns=X_test.columns)
183
+ X_test_tuned = sm.add_constant(X_test_tuned)
184
+
185
+ for flag in selected_options:
186
+ X_train_tuned[flag]=st.session_state['Flags'][flag]['train']
187
+ X_test_tuned[flag]=st.session_state['Flags'][flag]['test']
188
+
189
+ #test
190
+ # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
191
+ # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
192
+
193
+ new_features = features_set
194
+ # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
195
+ if Trend:
196
+ # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
197
+ if is_panel :
198
+ newdata = pd.DataFrame()
199
+ panel_wise_end_point_train = {}
200
+ for panel, groupdf in X_train_tuned.groupby(panel_col):
201
+ groupdf.sort_values(date_col, inplace=True)
202
+ groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
203
+ newdata = pd.concat([newdata, groupdf])
204
+ panel_wise_end_point_train[panel] = len(groupdf)
205
+ X_train_tuned = newdata.copy()
206
+
207
+ test_newdata=pd.DataFrame()
208
+ for panel, test_groupdf in X_test_tuned.groupby(panel_col):
209
+ test_groupdf.sort_values(date_col, inplace=True)
210
+ start = panel_wise_end_point_train[panel]+1
211
+ end = start + len(test_groupdf)
212
+ # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
213
+ test_groupdf['Trend'] = np.arange(start, end, 1)
214
+ test_newdata = pd.concat([test_newdata, test_groupdf])
215
+ X_test_tuned = test_newdata.copy()
216
+
217
+ new_features = new_features + ['Trend']
218
+
219
+ # test
220
+ X_test_tuned.to_csv("Test/X_test_tuned_trend.csv", index=False)
221
+ X_train_tuned.to_csv("Test/X_train_tuned_trend.csv", index=False)
222
+ pd.concat([X_train_tuned,X_test_tuned]).sort_values([panel_col, date_col]).to_csv("Test/X_train_test_tuned_trend.csv", index=False)
223
+
224
+ else :
225
+ X_train_tuned['Trend']=np.arange(1,len(X_train_tuned)+1,1)
226
+ X_test_tuned['Trend'] = np.arange(len(X_train_tuned)+1, len(X_train_tuned)+len(X_test_tuned), 1)
227
+
228
+ if week_number :
229
+ # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
230
+ if is_panel :
231
+ X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
232
+ X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
233
+ if X_train_tuned['Week_number'].nunique() == 1 :
234
+ st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
235
+ else :
236
+ X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
237
+ X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
238
+ new_features = new_features + ['Week_number']
239
+
240
+ else :
241
+ date = pd.to_datetime(date.values)
242
+ X_train_tuned['Week_number'] = date.dt.day_of_week[:150]
243
+ X_test_tuned['Week_number'] = date.dt.day_of_week[150:]
244
+
245
+ if sine_cosine :
246
+ # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
247
+ if is_panel :
248
+ new_features = new_features + ['sine_wave', 'cosine_wave']
249
+ newdata = pd.DataFrame()
250
+ groups = X_train_tuned.groupby(panel_col)
251
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
252
+
253
+ train_panel_wise_end_point = {}
254
+ for panel, groupdf in groups:
255
+ num_samples = len(groupdf)
256
+ train_panel_wise_end_point[panel] = num_samples
257
+ days_since_start = np.arange(num_samples)
258
+ sine_wave = np.sin(frequency * days_since_start)
259
+ cosine_wave = np.cos(frequency * days_since_start)
260
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
261
+ assert len(sine_cosine_df) == len(groupdf)
262
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
263
+ groupdf['sine_wave'] = sine_wave
264
+ groupdf['cosine_wave'] = cosine_wave
265
+ newdata = pd.concat([newdata, groupdf])
266
+
267
+ test_groups = X_test_tuned.groupby(panel_col)
268
+ for panel, test_groupdf in test_groups:
269
+ num_samples = len(test_groupdf)
270
+ start = train_panel_wise_end_point[panel]
271
+ days_since_start = np.arange(start, start+num_samples, 1)
272
+ # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
273
+ sine_wave = np.sin(frequency * days_since_start)
274
+ cosine_wave = np.cos(frequency * days_since_start)
275
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
276
+ assert len(sine_cosine_df) == len(test_groupdf)
277
+ # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
278
+ test_groupdf['sine_wave'] = sine_wave
279
+ test_groupdf['cosine_wave'] = cosine_wave
280
+ newdata = pd.concat([newdata, test_groupdf])
281
+
282
+ X_train_tuned = newdata.copy()
283
+
284
+
285
+ else :
286
+ num_samples = len(X_train_tuned)
287
+ frequency = 2 * np.pi / 365 # Adjust the frequency as needed
288
+ days_since_start = np.arange(num_samples)
289
+ sine_wave = np.sin(frequency * days_since_start)
290
+ cosine_wave = np.cos(frequency * days_since_start)
291
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
292
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
293
+ X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
294
+
295
+ test_num_samples = len(X_test_tuned)
296
+ start = num_samples
297
+ days_since_start = np.arange(start, start+test_num_samples, 1)
298
+ sine_wave = np.sin(frequency * days_since_start)
299
+ cosine_wave = np.cos(frequency * days_since_start)
300
+ sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
301
+ # Concatenate the sine and cosine waves with the scaled X DataFrame
302
+ X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
303
+
304
+ # model
305
+
306
+ if is_panel :
307
+ if selected_options :
308
+ new_features = new_features + selected_options
309
+
310
+ inp_vars_str = " + ".join(new_features)
311
+
312
+ # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
313
+ # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
314
+ # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
315
+
316
+ md_tuned = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
317
+ data=X_train_tuned[['total_approved_accounts_revenue'] + new_features],
318
+ groups=X_train_tuned[panel_col])
319
+ model_tuned = md_tuned.fit()
320
+
321
+
322
+
323
+ # plot act v pred for original model and tuned model
324
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
325
+ model.fittedvalues, model,
326
+ target_column='Revenue',
327
+ is_panel=True)
328
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
329
+ X_train_tuned[target_col],
330
+ model_tuned.fittedvalues,
331
+ model_tuned,
332
+ target_column='Revenue',
333
+ is_panel=True)
334
+
335
+ else :
336
+ model_tuned = sm.OLS(y_train, X_train_tuned).fit()
337
+
338
+ metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:150], y_train,
339
+ model.predict(X_train), model,
340
+ target_column='Revenue')
341
+ metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:150], y_train,
342
+ model_tuned.predict(
343
+ X_train_tuned),
344
+ model_tuned,
345
+ target_column='Revenue')
346
+
347
+ # st.write(metrics_table_tuned)
348
+ mape=np.round(metrics_table.iloc[0,1],2)
349
+ r2=np.round(metrics_table.iloc[1,1],2)
350
+ adjr2=np.round(metrics_table.iloc[2,1],2)
351
+
352
+ mape_tuned=np.round(metrics_table_tuned.iloc[0,1],2)
353
+ r2_tuned=np.round(metrics_table_tuned.iloc[1,1],2)
354
+ adjr2_tuned=np.round(metrics_table_tuned.iloc[2,1],2)
355
+
356
+ parameters_=st.columns(3)
357
+ with parameters_[0]:
358
+ st.metric('R2',r2_tuned,np.round(r2_tuned-r2,2))
359
+ with parameters_[1]:
360
+ st.metric('Adjusted R2',adjr2_tuned,np.round(adjr2_tuned-adjr2,2))
361
+ with parameters_[2]:
362
+ st.metric('MAPE',mape_tuned,np.round(mape_tuned-mape,2),'inverse')
363
+
364
+ st.header('2.2 Actual vs. Predicted Plot')
365
+ # if is_panel:
366
+ # metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
367
+ # model, target_column='Revenue',is_panel=True)
368
+ # else:
369
+ # metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
370
+
371
+ metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train_tuned[date_col], X_train_tuned[target_col],
372
+ model_tuned.fittedvalues, model_tuned,
373
+ target_column='Revenue',
374
+ is_panel=True)
375
+ # plot_actual_vs_predicted(X_train[date_col], y_train,
376
+ # model.fittedvalues, model,
377
+ # target_column='Revenue',
378
+ # is_panel=is_panel)
379
+
380
+ st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
381
+
382
+
383
+
384
+ st.markdown('## 2.3 Residual Analysis')
385
+ columns=st.columns(2)
386
+ with columns[0]:
387
+ fig=plot_residual_predicted(y_train,model.predict(X_train),X_train)
388
+ st.plotly_chart(fig)
389
+
390
+ with columns[1]:
391
+ st.empty()
392
+ fig = qqplot(y_train,model.predict(X_train))
393
+ st.plotly_chart(fig)
394
+
395
+ with columns[0]:
396
+ fig=residual_distribution(y_train,model.predict(X_train))
397
+ st.pyplot(fig)
398
+
399
+ if st.checkbox('Use this model to build response curves',key='123'):
400
+ st.session_state["tuned_model"] = model_tuned
401
+ st.session_state["X_train_tuned"] = X_train_tuned
402
+ st.session_state["X_test_tuned"] = X_test_tuned
403
+ st.session_state["X_train_tuned"] = X_train_tuned
404
+ st.session_state["X_test_tuned"] = X_test_tuned
405
+ if is_panel :
406
+ st.session_state["tuned_model_features"] = new_features
407
+ with open("tuned_model.pkl", "wb") as f:
408
+ pickle.dump(st.session_state['tuned_model'], f)
409
+ st.success('Model saved!')
410
+
411
+ # raw_data=df[features_set]
412
+ # columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
413
+ # raw_data.columns=columns_raw
414
+ # columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
415
+ # raw_data=raw_data[columns_media]
416
+
417
+ # raw_data['Date']=list(df.index)
418
+
419
+ # spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
420
+ # spends_df=df[spends_var]
421
+ # spends_df['Week']=list(df.index)
422
+
423
+
424
+ # j=0
425
+ # X1=X.copy()
426
+ # col=X1.columns
427
+ # for i in model.params.values:
428
+ # X1[col[j]]=X1.iloc[:,j]*i
429
+ # j+=1
430
+ # contribution_df=X1
431
+ # contribution_df['Date']=list(df.index)
432
+ # excel_file='Overview_data.xlsx'
433
+
434
+ # with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
435
+ # raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
436
+ # spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
437
+ # contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')
pages/4_Saved_Model_Results.py CHANGED
@@ -7,14 +7,16 @@ import statsmodels.api as sm
7
  from sklearn.metrics import mean_absolute_percentage_error
8
  import sys
9
  import os
10
- from utilities import set_header, load_local_css, load_authenticator
 
 
11
  import seaborn as sns
12
  import matplotlib.pyplot as plt
13
  import sweetviz as sv
14
  import tempfile
15
  from sklearn.preprocessing import MinMaxScaler
16
  from st_aggrid import AgGrid
17
- from st_aggrid import GridOptionsBuilder, GridUpdateMode
18
  from st_aggrid import GridOptionsBuilder
19
  import sys
20
  import re
@@ -22,586 +24,390 @@ import re
22
  sys.setrecursionlimit(10**6)
23
 
24
  original_stdout = sys.stdout
25
- sys.stdout = open("temp_stdout.txt", "w")
26
  sys.stdout.close()
27
  sys.stdout = original_stdout
28
 
29
- st.set_page_config(layout="wide")
30
- load_local_css("styles.css")
31
  set_header()
32
 
33
  for k, v in st.session_state.items():
34
- if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
35
  st.session_state[k] = v
36
 
37
- authenticator = st.session_state.get("authenticator")
38
  if authenticator is None:
39
  authenticator = load_authenticator()
40
 
41
- name, authentication_status, username = authenticator.login("Login", "main")
42
- auth_status = st.session_state.get("authentication_status")
43
 
44
  if auth_status == True:
45
- is_state_initiaized = st.session_state.get("initialized", False)
46
  if not is_state_initiaized:
47
- a = 1
 
48
 
49
  def plot_residual_predicted(actual, predicted, df_):
50
- df_["Residuals"] = actual - pd.Series(predicted)
51
- df_["StdResidual"] = (df_["Residuals"] - df_["Residuals"].mean()) / df_[
52
- "Residuals"
53
- ].std()
54
-
55
- # Create a Plotly scatter plot
56
- fig = px.scatter(
57
- df_,
58
- x=predicted,
59
- y="StdResidual",
60
- opacity=0.5,
61
- color_discrete_sequence=["#11B6BD"],
62
- )
63
-
64
- # Add horizontal lines
65
- fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
66
- fig.add_hline(y=2, line_color="red")
67
- fig.add_hline(y=-2, line_color="red")
68
-
69
- fig.update_xaxes(title="Predicted")
70
- fig.update_yaxes(title="Standardized Residuals (Actual - Predicted)")
71
-
72
- # Set the same width and height for both figures
73
- fig.update_layout(
74
- title="Residuals over Predicted Values",
75
- autosize=False,
76
- width=600,
77
- height=400,
78
- )
79
-
80
- return fig
81
 
82
  def residual_distribution(actual, predicted):
83
- Residuals = actual - pd.Series(predicted)
84
-
85
- # Create a Seaborn distribution plot
86
- sns.set(style="whitegrid")
87
- plt.figure(figsize=(6, 4))
88
- sns.histplot(Residuals, kde=True, color="#11B6BD")
89
-
90
- plt.title(" Distribution of Residuals")
91
- plt.xlabel("Residuals")
92
- plt.ylabel("Probability Density")
93
-
94
- return plt
95
-
 
96
  def qqplot(actual, predicted):
97
- Residuals = actual - pd.Series(predicted)
98
- Residuals = pd.Series(Residuals)
99
- Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
100
-
101
- # Create a QQ plot using Plotly with custom colors
102
- fig = go.Figure()
103
- fig.add_trace(
104
- go.Scatter(
105
- x=sm.ProbPlot(Resud_std).theoretical_quantiles,
106
- y=sm.ProbPlot(Resud_std).sample_quantiles,
107
- mode="markers",
108
- marker=dict(size=5, color="#11B6BD"),
109
- name="QQ Plot",
 
 
 
 
 
 
110
  )
111
- )
112
-
113
- # Add the 45-degree reference line
114
- diagonal_line = go.Scatter(
115
- x=[-2, 2], # Adjust the x values as needed to fit the range of your data
116
- y=[-2, 2], # Adjust the y values accordingly
117
- mode="lines",
118
- line=dict(color="red"), # Customize the line color and style
119
- name=" ",
120
- )
121
- fig.add_trace(diagonal_line)
122
-
123
- # Customize the layout
124
- fig.update_layout(
125
- title="QQ Plot of Residuals",
126
- title_x=0.5,
127
- autosize=False,
128
- width=600,
129
- height=400,
130
- xaxis_title="Theoretical Quantiles",
131
- yaxis_title="Sample Quantiles",
132
- )
133
 
134
- return fig
135
 
136
  def plot_actual_vs_predicted(date, y, predicted_values, model):
137
 
138
  fig = go.Figure()
139
 
140
- fig.add_trace(
141
- go.Scatter(
142
- x=date, y=y, mode="lines", name="Actual", line=dict(color="blue")
143
- )
144
- )
145
- fig.add_trace(
146
- go.Scatter(
147
- x=date,
148
- y=predicted_values,
149
- mode="lines",
150
- name="Predicted",
151
- line=dict(color="orange"),
152
- )
153
- )
154
-
155
  # Calculate MAPE
156
- mape = mean_absolute_percentage_error(y, predicted_values) * 100
157
-
158
  # Calculate R-squared
159
  rss = np.sum((y - predicted_values) ** 2)
160
  tss = np.sum((y - np.mean(y)) ** 2)
161
  r_squared = 1 - (rss / tss)
162
-
163
  # Get the number of predictors
164
  num_predictors = model.df_model
165
-
166
  # Get the number of samples
167
  num_samples = len(y)
168
-
169
  # Calculate Adjusted R-squared
170
- adj_r_squared = 1 - (
171
- (1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1))
172
- )
173
- metrics_table = pd.DataFrame(
174
- {
175
- "Metric": ["MAPE", "R-squared", "AdjR-squared"],
176
- "Value": [mape, r_squared, adj_r_squared],
177
- }
178
- )
179
  fig.update_layout(
180
- xaxis=dict(title="Date"),
181
- yaxis=dict(title="Value"),
182
- title=f"MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}",
183
- xaxis_tickangle=-30,
184
  )
185
 
186
- return metrics_table, fig
187
-
188
  def contributions(X, model):
189
  X1 = X.copy()
190
  for j, col in enumerate(X1.columns):
191
  X1[col] = X1[col] * model.params.values[j]
192
 
193
- return np.round(
194
- (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
195
- )
196
 
197
- transformed_data = pd.read_csv("transformed_data.csv")
198
 
199
  # hard coded for now, need to get features set from model
200
 
201
- feature_set_dct = {
202
- "app_installs_-_appsflyer": [
203
- "paid_search_clicks",
204
- "fb:_level_achieved_-_tier_1_impressions_lag2",
205
- "fb:_level_achieved_-_tier_2_clicks_lag2",
206
- "paid_social_others_impressions_adst.1",
207
- "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2",
208
- "digital_tactic_others_clicks",
209
- "kwai_clicks_adst.3",
210
- "programmaticclicks",
211
- "indicacao_clicks_adst.1",
212
- "infleux_clicks_adst.4",
213
- "influencer_clicks",
214
- ],
215
- "account_requests_-_appsflyer": [
216
- "paid_search_impressions",
217
- "fb:_level_achieved_-_tier_1_clicks_adst.1",
218
- "fb:_level_achieved_-_tier_2_clicks_adst.1",
219
- "paid_social_others_clicks_lag2",
220
- "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1",
221
- "digital_tactic_others_clicks_adst.1",
222
- "kwai_clicks_adst.2",
223
- "programmaticimpressions_lag4_adst.1",
224
- "indicacao_clicks",
225
- "infleux_clicks_adst.2",
226
- "influencer_clicks",
227
- ],
228
- "total_approved_accounts_-_appsflyer": [
229
- "paid_search_clicks",
230
- "fb:_level_achieved_-_tier_1_impressions_lag2_adst.1",
231
- "fb:_level_achieved_-_tier_2_impressions_lag2",
232
- "paid_social_others_clicks_lag2_adst.2",
233
- "ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4",
234
- "digital_tactic_others_clicks",
235
- "kwai_impressions_adst.2",
236
- "programmaticclicks_adst.5",
237
- "indicacao_clicks_adst.1",
238
- "infleux_clicks_adst.3",
239
- "influencer_clicks",
240
- ],
241
- "total_approved_accounts_-_revenue": [
242
- "paid_search_impressions_adst.5",
243
- "kwai_impressions_lag2_adst.3",
244
- "indicacao_clicks_adst.3",
245
- "infleux_clicks_adst.3",
246
- "programmaticclicks_adst.4",
247
- "influencer_clicks_adst.3",
248
- "fb:_level_achieved_-_tier_1_impressions_adst.2",
249
- "fb:_level_achieved_-_tier_2_impressions_lag3_adst.5",
250
- "paid_social_others_impressions_adst.3",
251
- "ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5",
252
- "digital_tactic_others_clicks_adst.2",
253
- ],
254
- }
255
-
256
- # """ the above part should be modified so that we are fetching features set from the saved model"""
257
-
258
- def contributions(X, model, target):
259
  X1 = X.copy()
260
  for j, col in enumerate(X1.columns):
261
  X1[col] = X1[col] * model.params.values[j]
262
-
263
- contributions = np.round(
264
- (X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2
265
- )
266
- contributions = (
267
- pd.DataFrame(contributions, columns=target)
268
- .reset_index()
269
- .rename(columns={"index": "Channel"})
270
- )
271
- contributions["Channel"] = [
272
- re.split(r"_imp|_cli", col)[0] for col in contributions["Channel"]
273
- ]
274
-
275
  return contributions
 
276
 
277
- def model_fit(features_set, target):
278
  X = transformed_data[features_set]
279
- y = transformed_data[target]
280
  ss = MinMaxScaler()
281
  X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
282
  X = sm.add_constant(X)
283
- X_train = X.iloc[:150]
284
- X_test = X.iloc[150:]
285
- y_train = y.iloc[:150]
286
- y_test = y.iloc[150:]
287
  model = sm.OLS(y_train, X_train).fit()
288
  predicted_values_train = model.predict(X_train)
289
  r2 = model.rsquared
290
  adjr2 = model.rsquared_adj
291
  train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
292
- test_mape = mean_absolute_percentage_error(y_test, model.predict(X_test))
293
- summary = model.summary()
294
- train_contributions = contributions(X_train, model, [target])
295
- return (
296
- pd.DataFrame(
297
- {
298
- "Model": target,
299
- "R2": np.round(r2, 2),
300
- "ADJr2": np.round(adjr2, 2),
301
- "Train Mape": np.round(train_mape, 2),
302
- "Test Mape": np.round(test_mape, 2),
303
- "Summary": summary,
304
- "Model_object": model,
305
- },
306
- index=[0],
307
- ),
308
- train_contributions,
309
- )
310
 
311
- metrics_table = pd.DataFrame()
312
 
313
- if "contribution_df" not in st.session_state:
314
- st.session_state["contribution_df"] = pd.DataFrame()
315
 
316
- for target, feature_set in feature_set_dct.items():
317
- metrics_table = pd.concat(
318
- [metrics_table, model_fit(features_set=feature_set, target=target)[0]]
319
- )
320
- if st.session_state["contribution_df"].empty:
321
- st.session_state["contribution_df"] = model_fit(
322
- features_set=feature_set, target=target
323
- )[1]
324
- else:
325
- st.session_state["contribution_df"] = pd.merge(
326
- st.session_state["contribution_df"],
327
- model_fit(features_set=feature_set, target=target)[1],
328
- )
329
 
330
  # st.write(st.session_state["contribution_df"])
 
 
 
 
331
 
332
- metrics_table.reset_index(drop=True, inplace=True)
333
 
334
- eda_columns = st.columns(2)
 
 
 
 
 
335
  with eda_columns[1]:
336
- eda = st.button(
337
- "Generate EDA Report",
338
- help="Click to generate a bivariate report for the selected response metric from the table below.",
339
- )
340
 
341
  # st.markdown('Model Metrics')
 
 
342
 
343
- st.title("Contribution Overview")
344
-
345
- contribution_selections = st.multiselect(
346
- "Select the models to compare contributions",
347
- [
348
- col
349
- for col in st.session_state["contribution_df"].columns
350
- if col.lower() != "channel"
351
- ],
352
- default=[
353
- col
354
- for col in st.session_state["contribution_df"].columns
355
- if col.lower() != "channel"
356
- ][-1],
357
- )
358
- trace_data = []
359
 
360
  for selection in contribution_selections:
361
 
362
- trace = go.Bar(
363
- x=st.session_state["contribution_df"]["Channel"],
364
- y=st.session_state["contribution_df"][selection],
365
- name=selection,
366
- text=np.round(st.session_state["contribution_df"][selection], 0)
367
- .astype(int)
368
- .astype(str)
369
- + "%",
370
- textposition="outside",
371
- )
372
  trace_data.append(trace)
373
 
374
  layout = go.Layout(
375
- title="Metrics Contribution by Channel",
376
- xaxis=dict(title="Channel Name"),
377
- yaxis=dict(title="Metrics Contribution"),
378
- barmode="group",
379
- )
380
  fig = go.Figure(data=trace_data, layout=layout)
381
- st.plotly_chart(fig, use_container_width=True)
382
-
383
- ############################################ Waterfall Chart ############################################
384
- # import plotly.graph_objects as go
385
-
386
- # # Initialize a Plotly figure
387
- # fig = go.Figure()
388
-
389
- # for selection in contribution_selections:
390
- # # Ensure y_values are numeric
391
- # y_values = st.session_state["contribution_df"][selection].values.astype(float)
392
-
393
- # # Generating text labels for each bar, ensuring operations are compatible with string formats
394
- # text_values = [f"{val}%" for val in np.round(y_values, 0).astype(int)]
395
-
396
- # fig.add_trace(
397
- # go.Waterfall(
398
- # name=selection,
399
- # orientation="v",
400
- # measure=["relative"]
401
- # * len(y_values), # Adjust if you have absolute values at certain points
402
- # x=st.session_state["contribution_df"]["Channel"].tolist(),
403
- # text=text_values,
404
- # textposition="outside",
405
- # y=y_values,
406
- # increasing={"marker": {"color": "green"}},
407
- # decreasing={"marker": {"color": "red"}},
408
- # totals={"marker": {"color": "blue"}},
409
- # )
410
- # )
411
-
412
- # fig.update_layout(
413
- # title="Metrics Contribution by Channel",
414
- # xaxis={"title": "Channel Name"},
415
- # yaxis={"title": "Metrics Contribution"},
416
- # height=600,
417
- # )
418
-
419
- # # Displaying the waterfall chart in Streamlit
420
- # st.plotly_chart(fig, use_container_width=True)
421
-
422
- import plotly.graph_objects as go
423
-
424
- # Initialize a Plotly figure
425
- fig = go.Figure()
426
 
427
- for selection in contribution_selections:
428
- # Ensure contributions are numeric
429
- contributions = (
430
- st.session_state["contribution_df"][selection].values.astype(float).tolist()
431
- )
432
- channel_names = st.session_state["contribution_df"]["Channel"].tolist()
433
 
434
- display_name, display_contribution, base_contribution = [], [], 0
435
- for channel_name, contribution in zip(channel_names, contributions):
436
- if channel_name != "const":
437
- display_name.append(channel_name)
438
- display_contribution.append(contribution)
439
- else:
440
- base_contribution = contribution
441
-
442
- display_name = ["Base Sales"] + display_name
443
- display_contribution = [base_contribution] + display_contribution
444
-
445
- # Generating text labels for each bar, ensuring operations are compatible with string formats
446
- text_values = [
447
- f"{val}%" for val in np.round(display_contribution, 0).astype(int)
448
- ]
449
-
450
- fig.add_trace(
451
- go.Waterfall(
452
- orientation="v",
453
- measure=["relative"]
454
- * len(
455
- display_contribution
456
- ), # Adjust if you have absolute values at certain points
457
- x=display_name,
458
- text=text_values,
459
- textposition="outside",
460
- y=display_contribution,
461
- increasing={"marker": {"color": "green"}},
462
- decreasing={"marker": {"color": "red"}},
463
- totals={"marker": {"color": "blue"}},
464
- )
465
- )
466
 
467
- fig.update_layout(
468
- title="Metrics Contribution by Channel",
469
- xaxis={"title": "Channel Name"},
470
- yaxis={"title": "Metrics Contribution"},
471
- height=600,
472
- )
473
-
474
- # Displaying the waterfall chart in Streamlit
475
- st.plotly_chart(fig, use_container_width=True)
476
-
477
- ############################################ Waterfall Chart ############################################
478
-
479
- st.title("Analysis of Models Result")
480
- # st.markdown()
481
- gd_table = metrics_table.iloc[:, :-2]
482
-
483
- gd = GridOptionsBuilder.from_dataframe(gd_table)
484
- # gd.configure_pagination(enabled=True)
485
- gd.configure_selection(
486
- use_checkbox=True,
487
- selection_mode="single",
488
- pre_select_all_rows=False,
489
- pre_selected_rows=[1],
490
- )
491
-
492
- gridoptions = gd.build()
493
- table = AgGrid(
494
- gd_table, gridOptions=gridoptions, fit_columns_on_grid_load=True, height=200
495
- )
496
  # table=metrics_table.iloc[:,:-2]
497
  # table.insert(0, "Select", False)
498
  # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
 
499
 
500
- if len(table.selected_rows) == 0:
501
- st.warning(
502
- "Click on the checkbox to view comprehensive results of the selected model."
503
- )
504
  st.stop()
505
- else:
506
- target_column = table.selected_rows[0]["Model"]
507
- feature_set = feature_set_dct[target_column]
508
 
509
  with eda_columns[1]:
510
  if eda:
511
-
512
  def generate_report_with_target(channel_data, target_feature):
513
- report = sv.analyze(
514
- [channel_data, "Dataset"], target_feat=target_feature, verbose=False
515
- )
516
  temp_dir = tempfile.mkdtemp()
517
  report_path = os.path.join(temp_dir, "report.html")
518
- report.show_html(
519
- filepath=report_path, open_browser=False
520
- ) # Generate the report as an HTML file
521
  return report_path
522
-
523
- report_data = transformed_data[feature_set]
524
- report_data[target_column] = transformed_data[target_column]
525
  report_file = generate_report_with_target(report_data, target_column)
526
-
527
  if os.path.exists(report_file):
528
- with open(report_file, "rb") as f:
529
  st.download_button(
530
  label="Download EDA Report",
531
  data=f.read(),
532
  file_name="report.html",
533
- mime="text/html",
534
  )
535
  else:
536
  st.warning("Report generation failed. Unable to find the report file.")
537
 
538
- model = metrics_table[metrics_table["Model"] == target_column]["Model_object"].iloc[
539
- 0
540
- ]
541
- st.header("Model Summary")
542
  st.write(model.summary())
543
- X = transformed_data[feature_set]
544
- ss = MinMaxScaler()
545
- X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
546
- X = sm.add_constant(X)
547
- y = transformed_data[target_column]
548
- X_train = X.iloc[:150]
549
- X_test = X.iloc[150:]
550
- y_train = y.iloc[:150]
551
- y_test = y.iloc[150:]
552
- X.index = transformed_data["date"]
553
- y.index = transformed_data["date"]
554
-
555
- metrics_table_train, fig_train = plot_actual_vs_predicted(
556
- X_train.index, y_train, model.predict(X_train), model
557
- )
558
- metrics_table_test, fig_test = plot_actual_vs_predicted(
559
- X_test.index, y_test, model.predict(X_test), model
560
- )
561
-
562
- metrics_table_train = metrics_table_train.set_index("Metric").transpose()
563
- metrics_table_train.index = ["Train"]
564
- metrics_table_test = metrics_table_test.set_index("Metric").transpose()
565
- metrics_table_test.index = ["test"]
566
- metrics_table = np.round(pd.concat([metrics_table_train, metrics_table_test]), 2)
567
-
568
- st.markdown("Result Overview")
569
- st.dataframe(np.round(metrics_table, 2), use_container_width=True)
570
-
571
- st.subheader("Actual vs Predicted Plot Train")
572
-
573
- st.plotly_chart(fig_train, use_container_width=True)
574
- st.subheader("Actual vs Predicted Plot Test")
575
- st.plotly_chart(fig_test, use_container_width=True)
576
-
577
- st.markdown("## Residual Analysis")
578
- columns = st.columns(2)
579
-
580
- Xtrain1 = X_train.copy()
581
  with columns[0]:
582
- fig = plot_residual_predicted(y_train, model.predict(Xtrain1), Xtrain1)
583
  st.plotly_chart(fig)
584
 
585
  with columns[1]:
586
  st.empty()
587
- fig = qqplot(y_train, model.predict(X_train))
588
  st.plotly_chart(fig)
589
 
590
  with columns[0]:
591
- fig = residual_distribution(y_train, model.predict(X_train))
592
  st.pyplot(fig)
593
 
594
 
 
595
  elif auth_status == False:
596
- st.error("Username/Password is incorrect")
597
  try:
598
- username_forgot_pw, email_forgot_password, random_password = (
599
- authenticator.forgot_password("Forgot password")
600
- )
601
  if username_forgot_pw:
602
- st.success("New password sent securely")
603
  # Random password to be transferred to the user securely
604
  elif username_forgot_pw == False:
605
- st.error("Username not found")
606
  except Exception as e:
607
  st.error(e)
 
7
  from sklearn.metrics import mean_absolute_percentage_error
8
  import sys
9
  import os
10
+ from utilities import (set_header,
11
+ load_local_css,
12
+ load_authenticator)
13
  import seaborn as sns
14
  import matplotlib.pyplot as plt
15
  import sweetviz as sv
16
  import tempfile
17
  from sklearn.preprocessing import MinMaxScaler
18
  from st_aggrid import AgGrid
19
+ from st_aggrid import GridOptionsBuilder,GridUpdateMode
20
  from st_aggrid import GridOptionsBuilder
21
  import sys
22
  import re
 
24
  sys.setrecursionlimit(10**6)
25
 
26
  original_stdout = sys.stdout
27
+ sys.stdout = open('temp_stdout.txt', 'w')
28
  sys.stdout.close()
29
  sys.stdout = original_stdout
30
 
31
+ st.set_page_config(layout='wide')
32
+ load_local_css('styles.css')
33
  set_header()
34
 
35
  for k, v in st.session_state.items():
36
+ if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
37
  st.session_state[k] = v
38
 
39
+ authenticator = st.session_state.get('authenticator')
40
  if authenticator is None:
41
  authenticator = load_authenticator()
42
 
43
+ name, authentication_status, username = authenticator.login('Login', 'main')
44
+ auth_status = st.session_state.get('authentication_status')
45
 
46
  if auth_status == True:
47
+ is_state_initiaized = st.session_state.get('initialized',False)
48
  if not is_state_initiaized:
49
+ a=1
50
+
51
 
52
  def plot_residual_predicted(actual, predicted, df_):
53
+ df_['Residuals'] = actual - pd.Series(predicted)
54
+ df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std()
55
+
56
+ # Create a Plotly scatter plot
57
+ fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"])
58
+
59
+ # Add horizontal lines
60
+ fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
61
+ fig.add_hline(y=2, line_color="red")
62
+ fig.add_hline(y=-2, line_color="red")
63
+
64
+ fig.update_xaxes(title='Predicted')
65
+ fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)')
66
+
67
+ # Set the same width and height for both figures
68
+ fig.update_layout(title='Residuals over Predicted Values', autosize=False, width=600, height=400)
69
+
70
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def residual_distribution(actual, predicted):
73
+ Residuals = actual - pd.Series(predicted)
74
+
75
+ # Create a Seaborn distribution plot
76
+ sns.set(style="whitegrid")
77
+ plt.figure(figsize=(6, 4))
78
+ sns.histplot(Residuals, kde=True, color="#11B6BD")
79
+
80
+ plt.title(' Distribution of Residuals')
81
+ plt.xlabel('Residuals')
82
+ plt.ylabel('Probability Density')
83
+
84
+ return plt
85
+
86
+
87
  def qqplot(actual, predicted):
88
+ Residuals = actual - pd.Series(predicted)
89
+ Residuals = pd.Series(Residuals)
90
+ Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
91
+
92
+ # Create a QQ plot using Plotly with custom colors
93
+ fig = go.Figure()
94
+ fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles,
95
+ y=sm.ProbPlot(Resud_std).sample_quantiles,
96
+ mode='markers',
97
+ marker=dict(size=5, color="#11B6BD"),
98
+ name='QQ Plot'))
99
+
100
+ # Add the 45-degree reference line
101
+ diagonal_line = go.Scatter(
102
+ x=[-2, 2], # Adjust the x values as needed to fit the range of your data
103
+ y=[-2, 2], # Adjust the y values accordingly
104
+ mode='lines',
105
+ line=dict(color='red'), # Customize the line color and style
106
+ name=' '
107
  )
108
+ fig.add_trace(diagonal_line)
109
+
110
+ # Customize the layout
111
+ fig.update_layout(title='QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400,
112
+ xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
113
+
114
+ return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
116
 
117
  def plot_actual_vs_predicted(date, y, predicted_values, model):
118
 
119
  fig = go.Figure()
120
 
121
+ fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='blue')))
122
+ fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='orange')))
123
+
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Calculate MAPE
125
+ mape = mean_absolute_percentage_error(y, predicted_values)*100
126
+
127
  # Calculate R-squared
128
  rss = np.sum((y - predicted_values) ** 2)
129
  tss = np.sum((y - np.mean(y)) ** 2)
130
  r_squared = 1 - (rss / tss)
131
+
132
  # Get the number of predictors
133
  num_predictors = model.df_model
134
+
135
  # Get the number of samples
136
  num_samples = len(y)
137
+
138
  # Calculate Adjusted R-squared
139
+ adj_r_squared = 1 - ((1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1)))
140
+ metrics_table = pd.DataFrame({
141
+ 'Metric': ['MAPE', 'R-squared', 'AdjR-squared'],
142
+ 'Value': [mape, r_squared, adj_r_squared]})
 
 
 
 
 
143
  fig.update_layout(
144
+ xaxis=dict(title='Date'),
145
+ yaxis=dict(title='Value'),
146
+ title=f'MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}',
147
+ xaxis_tickangle=-30
148
  )
149
 
150
+ return metrics_table,fig
 
151
  def contributions(X, model):
152
  X1 = X.copy()
153
  for j, col in enumerate(X1.columns):
154
  X1[col] = X1[col] * model.params.values[j]
155
 
156
+ return np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
 
 
157
 
158
+ transformed_data=pd.read_csv('transformed_data.csv')
159
 
160
  # hard coded for now, need to get features set from model
161
 
162
+ feature_set_dct={'app_installs_-_appsflyer':['paid_search_clicks',
163
+ 'fb:_level_achieved_-_tier_1_impressions_lag2',
164
+ 'fb:_level_achieved_-_tier_2_clicks_lag2',
165
+ 'paid_social_others_impressions_adst.1',
166
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2',
167
+ 'digital_tactic_others_clicks',
168
+ 'kwai_clicks_adst.3',
169
+ 'programmaticclicks',
170
+ 'indicacao_clicks_adst.1',
171
+ 'infleux_clicks_adst.4',
172
+ 'influencer_clicks'],
173
+
174
+ 'account_requests_-_appsflyer':['paid_search_impressions',
175
+ 'fb:_level_achieved_-_tier_1_clicks_adst.1',
176
+ 'fb:_level_achieved_-_tier_2_clicks_adst.1',
177
+ 'paid_social_others_clicks_lag2',
178
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1',
179
+ 'digital_tactic_others_clicks_adst.1',
180
+ 'kwai_clicks_adst.2',
181
+ 'programmaticimpressions_lag4_adst.1',
182
+ 'indicacao_clicks',
183
+ 'infleux_clicks_adst.2',
184
+ 'influencer_clicks'],
185
+
186
+ 'total_approved_accounts_-_appsflyer':['paid_search_clicks',
187
+ 'fb:_level_achieved_-_tier_1_impressions_lag2_adst.1',
188
+ 'fb:_level_achieved_-_tier_2_impressions_lag2',
189
+ 'paid_social_others_clicks_lag2_adst.2',
190
+ 'ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4',
191
+ 'digital_tactic_others_clicks',
192
+ 'kwai_impressions_adst.2',
193
+ 'programmaticclicks_adst.5',
194
+ 'indicacao_clicks_adst.1',
195
+ 'infleux_clicks_adst.3',
196
+ 'influencer_clicks'],
197
+
198
+ 'total_approved_accounts_-_revenue':['paid_search_impressions_adst.5',
199
+ 'kwai_impressions_lag2_adst.3',
200
+ 'indicacao_clicks_adst.3',
201
+ 'infleux_clicks_adst.3',
202
+ 'programmaticclicks_adst.4',
203
+ 'influencer_clicks_adst.3',
204
+ 'fb:_level_achieved_-_tier_1_impressions_adst.2',
205
+ 'fb:_level_achieved_-_tier_2_impressions_lag3_adst.5',
206
+ 'paid_social_others_impressions_adst.3',
207
+ 'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5',
208
+ 'digital_tactic_others_clicks_adst.2']
209
+
210
+ }
211
+
212
+ #""" the above part should be modified so that we are fetching features set from the saved model"""
213
+
214
+
215
+
216
+ def contributions(X, model,target):
 
 
 
217
  X1 = X.copy()
218
  for j, col in enumerate(X1.columns):
219
  X1[col] = X1[col] * model.params.values[j]
220
+
221
+ contributions= np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
222
+ contributions=pd.DataFrame(contributions,columns=target).reset_index().rename(columns={'index':'Channel'})
223
+ contributions['Channel']=[ re.split(r'_imp|_cli', col)[0] for col in contributions['Channel']]
224
+
 
 
 
 
 
 
 
 
225
  return contributions
226
+
227
 
228
+ def model_fit(features_set,target):
229
  X = transformed_data[features_set]
230
+ y= transformed_data[target]
231
  ss = MinMaxScaler()
232
  X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
233
  X = sm.add_constant(X)
234
+ X_train=X.iloc[:150]
235
+ X_test=X.iloc[150:]
236
+ y_train=y.iloc[:150]
237
+ y_test=y.iloc[150:]
238
  model = sm.OLS(y_train, X_train).fit()
239
  predicted_values_train = model.predict(X_train)
240
  r2 = model.rsquared
241
  adjr2 = model.rsquared_adj
242
  train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
243
+ test_mape=mean_absolute_percentage_error(y_test, model.predict(X_test))
244
+ summary=model.summary()
245
+ train_contributions=contributions(X_train,model,[target])
246
+ return pd.DataFrame({'Model':target,'R2':np.round(r2,2),'ADJr2':np.round(adjr2,2),'Train Mape':np.round(train_mape,2),
247
+ 'Test Mape':np.round(test_mape,2),'Summary':summary,'Model_object':model
248
+ },index=[0]), train_contributions
 
 
 
 
 
 
 
 
 
 
 
 
249
 
250
+ metrics_table=pd.DataFrame()
251
 
252
+ if 'contribution_df' not in st.session_state:
253
+ st.session_state["contribution_df"]=pd.DataFrame()
254
 
255
+ for target,feature_set in feature_set_dct.items():
256
+ metrics_table= pd.concat([metrics_table,model_fit(features_set=feature_set,target=target)[0]])
257
+ if st.session_state["contribution_df"].empty:
258
+ st.session_state["contribution_df"]= model_fit(features_set=feature_set,target=target)[1]
259
+ else:
260
+ st.session_state["contribution_df"]=pd.merge(st.session_state["contribution_df"],model_fit(features_set=feature_set,target=target)[1])
 
 
 
 
 
 
 
261
 
262
  # st.write(st.session_state["contribution_df"])
263
+
264
+
265
+ metrics_table.reset_index(drop=True,inplace=True)
266
+
267
 
 
268
 
269
+
270
+
271
+
272
+
273
+
274
+ eda_columns=st.columns(2)
275
  with eda_columns[1]:
276
+ eda=st.button('Generate EDA Report',help="Click to generate a bivariate report for the selected response metric from the table below.")
277
+
278
+
 
279
 
280
  # st.markdown('Model Metrics')
281
+
282
+ st.title('Contribution Overview')
283
 
284
+ contribution_selections=st.multiselect('Select the models to compare contributions',[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel' ],default=[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel' ][-1])
285
+ trace_data=[]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
 
287
  for selection in contribution_selections:
288
 
289
+ trace=go.Bar(x=st.session_state['contribution_df']['Channel'], y=st.session_state['contribution_df'][selection],name=selection,text=np.round(st.session_state['contribution_df'][selection],0).astype(int).astype(str)+'%',textposition='outside')
 
 
 
 
 
 
 
 
 
290
  trace_data.append(trace)
291
 
292
  layout = go.Layout(
293
+ title='Metrics Contribution by Channel',
294
+ xaxis=dict(title='Channel Name'),
295
+ yaxis=dict(title='Metrics Contribution'),
296
+ barmode='group'
297
+ )
298
  fig = go.Figure(data=trace_data, layout=layout)
299
+ st.plotly_chart(fig,use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
 
301
+ st.title('Analysis of Models Result')
302
+ #st.markdown()
303
+ gd_table=metrics_table.iloc[:,:-2]
304
+ gd=GridOptionsBuilder.from_dataframe(gd_table)
305
+ #gd.configure_pagination(enabled=True)
306
+ gd.configure_selection(use_checkbox=True)
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
+ gridoptions=gd.build()
310
+ table = AgGrid(gd_table,gridOptions=gridoptions,fit_columns_on_grid_load=True,height=200)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  # table=metrics_table.iloc[:,:-2]
312
  # table.insert(0, "Select", False)
313
  # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
314
+
315
 
316
+
317
+ if len(table.selected_rows)==0:
318
+ st.warning("Click on the checkbox to view comprehensive results of the selected model.")
 
319
  st.stop()
320
+ else:
321
+ target_column=table.selected_rows[0]['Model']
322
+ feature_set=feature_set_dct[target_column]
323
 
324
  with eda_columns[1]:
325
  if eda:
 
326
  def generate_report_with_target(channel_data, target_feature):
327
+ report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature,verbose=False)
 
 
328
  temp_dir = tempfile.mkdtemp()
329
  report_path = os.path.join(temp_dir, "report.html")
330
+ report.show_html(filepath=report_path, open_browser=False) # Generate the report as an HTML file
 
 
331
  return report_path
332
+
333
+ report_data=transformed_data[feature_set]
334
+ report_data[target_column]=transformed_data[target_column]
335
  report_file = generate_report_with_target(report_data, target_column)
336
+
337
  if os.path.exists(report_file):
338
+ with open(report_file, 'rb') as f:
339
  st.download_button(
340
  label="Download EDA Report",
341
  data=f.read(),
342
  file_name="report.html",
343
+ mime="text/html"
344
  )
345
  else:
346
  st.warning("Report generation failed. Unable to find the report file.")
347
 
348
+
349
+
350
+ model=metrics_table[metrics_table['Model']==target_column]['Model_object'].iloc[0]
351
+ st.header('Model Summary')
352
  st.write(model.summary())
353
+ X=transformed_data[feature_set]
354
+ ss=MinMaxScaler()
355
+ X=pd.DataFrame(ss.fit_transform(X),columns=X.columns)
356
+ X=sm.add_constant(X)
357
+ y=transformed_data[target_column]
358
+ X_train=X.iloc[:150]
359
+ X_test=X.iloc[150:]
360
+ y_train=y.iloc[:150]
361
+ y_test=y.iloc[150:]
362
+ X.index=transformed_data['date']
363
+ y.index=transformed_data['date']
364
+
365
+ metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model)
366
+ metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model)
367
+
368
+ metrics_table_train=metrics_table_train.set_index('Metric').transpose()
369
+ metrics_table_train.index=['Train']
370
+ metrics_table_test=metrics_table_test.set_index('Metric').transpose()
371
+ metrics_table_test.index=['test']
372
+ metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2)
373
+
374
+ st.markdown('Result Overview')
375
+ st.dataframe(np.round(metrics_table,2),use_container_width=True)
376
+
377
+ st.subheader('Actual vs Predicted Plot Train')
378
+
379
+ st.plotly_chart(fig_train,use_container_width=True)
380
+ st.subheader('Actual vs Predicted Plot Test')
381
+ st.plotly_chart(fig_test,use_container_width=True)
382
+
383
+ st.markdown('## Residual Analysis')
384
+ columns=st.columns(2)
385
+
386
+
387
+ Xtrain1=X_train.copy()
 
 
 
388
  with columns[0]:
389
+ fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1)
390
  st.plotly_chart(fig)
391
 
392
  with columns[1]:
393
  st.empty()
394
+ fig = qqplot(y_train,model.predict(X_train))
395
  st.plotly_chart(fig)
396
 
397
  with columns[0]:
398
+ fig=residual_distribution(y_train,model.predict(X_train))
399
  st.pyplot(fig)
400
 
401
 
402
+
403
  elif auth_status == False:
404
+ st.error('Username/Password is incorrect')
405
  try:
406
+ username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
 
 
407
  if username_forgot_pw:
408
+ st.success('New password sent securely')
409
  # Random password to be transferred to the user securely
410
  elif username_forgot_pw == False:
411
+ st.error('Username not found')
412
  except Exception as e:
413
  st.error(e)
pages/5_Model_Result_Overview.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from utilities import (set_header,
3
+ initialize_data,
4
+ load_local_css,
5
+ create_channel_summary,
6
+ create_contribution_pie,
7
+ create_contribuion_stacked_plot,
8
+ create_channel_spends_sales_plot,
9
+ format_numbers,
10
+ channel_name_formating,
11
+ load_authenticator)
12
+ import plotly.graph_objects as go
13
+ import streamlit_authenticator as stauth
14
+ import yaml
15
+ from yaml import SafeLoader
16
+ import time
17
+
18
+ st.set_page_config(layout='wide')
19
+ load_local_css('styles.css')
20
+ set_header()
21
+
22
+ target='Revenue'
23
+ # for k, v in st.session_state.items():
24
+
25
+ # if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
26
+ # st.session_state[k] = v
27
+
28
+ # authenticator = st.session_state.get('authenticator')
29
+
30
+ # if authenticator is None:
31
+ # authenticator = load_authenticator()
32
+
33
+ # name, authentication_status, username = authenticator.login('Login', 'main')
34
+ # auth_status = st.session_state['authentication_status']
35
+
36
+ # if auth_status:
37
+ # authenticator.logout('Logout', 'main')
38
+
39
+ # is_state_initiaized = st.session_state.get('initialized',False)
40
+ # if not is_state_initiaized:
41
+ initialize_data()
42
+ scenario = st.session_state['scenario']
43
+ raw_df = st.session_state['raw_df']
44
+ st.header('Overview of previous spends')
45
+
46
+
47
+ columns = st.columns((1,1,3))
48
+
49
+ with columns[0]:
50
+ st.metric(label = 'Spends', value=format_numbers(float(scenario.actual_total_spends)))
51
+ ###print(f"##################### {scenario.actual_total_sales} ##################")
52
+ with columns[1]:
53
+ st.metric(label = target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
54
+
55
+
56
+ actual_summary_df = create_channel_summary(scenario)
57
+ actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
58
+
59
+ columns = st.columns((2,1))
60
+ with columns[0]:
61
+ with st.expander('Channel wise overview'):
62
+ st.markdown(actual_summary_df.style.set_table_styles(
63
+ [{
64
+ 'selector': 'th',
65
+ 'props': [('background-color', '#11B6BD')]
66
+ },
67
+ {
68
+ 'selector' : 'tr:nth-child(even)',
69
+ 'props' : [('background-color', '#11B6BD')]
70
+ }]).to_html(), unsafe_allow_html=True)
71
+
72
+ st.markdown("<hr>",unsafe_allow_html=True)
73
+ ##############################
74
+
75
+ st.plotly_chart(create_contribution_pie(),use_container_width=True)
76
+ st.markdown("<hr>",unsafe_allow_html=True)
77
+
78
+
79
+ ################################3
80
+ st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
81
+ st.markdown("<hr>",unsafe_allow_html=True)
82
+ #######################################
83
+
84
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
85
+ selected_channel = scenario.channels.get(selected_channel_name,None)
86
+
87
+ st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
88
+
89
+ st.markdown("<hr>",unsafe_allow_html=True)
90
+
91
+ # elif auth_status == False:
92
+ # st.error('Username/Password is incorrect')
93
+
94
+ # if auth_status != True:
95
+ # try:
96
+ # username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
97
+ # if username_forgot_pw:
98
+ # st.success('New password sent securely')
99
+ # # Random password to be transferred to user securely
100
+ # elif username_forgot_pw == False:
101
+ # st.error('Username not found')
102
+ # except Exception as e:
103
+ # st.error(e)
pages/6_Build_Response_Curves.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import plotly.express as px
3
+ import numpy as np
4
+ import plotly.graph_objects as go
5
+ from utilities import channel_name_formating, load_authenticator, initialize_data
6
+ from sklearn.metrics import r2_score
7
+ from collections import OrderedDict
8
+ from classes import class_from_dict,class_to_dict
9
+ import pickle
10
+ import json
11
+
12
+ for k, v in st.session_state.items():
13
+ if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
14
+ st.session_state[k] = v
15
+
16
+ def s_curve(x,K,b,a,x0):
17
+ return K / (1 + b*np.exp(-a*(x-x0)))
18
+
19
+ def save_scenario(scenario_name):
20
+ """
21
+ Save the current scenario with the mentioned name in the session state
22
+
23
+ Parameters
24
+ ----------
25
+ scenario_name
26
+ Name of the scenario to be saved
27
+ """
28
+ if 'saved_scenarios' not in st.session_state:
29
+ st.session_state = OrderedDict()
30
+
31
+ #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
32
+ st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
33
+ st.session_state['scenario_input'] = ""
34
+ print(type(st.session_state['saved_scenarios']))
35
+ with open('../saved_scenarios.pkl', 'wb') as f:
36
+ pickle.dump(st.session_state['saved_scenarios'],f)
37
+
38
+
39
+ def reset_curve_parameters():
40
+ del st.session_state['K']
41
+ del st.session_state['b']
42
+ del st.session_state['a']
43
+ del st.session_state['x0']
44
+
45
+ def update_response_curve():
46
+ # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
47
+ # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
48
+ # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
49
+ # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
50
+ # rcs = st.session_state['rcs']
51
+ _channel_class = st.session_state['scenario'].channels[selected_channel_name]
52
+ _channel_class.update_response_curves({
53
+ 'K' : st.session_state['K'],
54
+ 'b' : st.session_state['b'],
55
+ 'a' : st.session_state['a'],
56
+ 'x0' : st.session_state['x0']})
57
+
58
+
59
+ # authenticator = st.session_state.get('authenticator')
60
+ # if authenticator is None:
61
+ # authenticator = load_authenticator()
62
+
63
+ # name, authentication_status, username = authenticator.login('Login', 'main')
64
+ # auth_status = st.session_state.get('authentication_status')
65
+
66
+ # if auth_status == True:
67
+ # is_state_initiaized = st.session_state.get('initialized',False)
68
+ # if not is_state_initiaized:
69
+ # print("Scenario page state reloaded")
70
+
71
+ initialize_data()
72
+
73
+ st.subheader("Build response curves")
74
+
75
+ channels_list = st.session_state['channels_list']
76
+ selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
77
+
78
+ rcs = {}
79
+ for channel_name in channels_list:
80
+ rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
81
+ # rcs = st.session_state['rcs']
82
+
83
+
84
+ if 'K' not in st.session_state:
85
+ st.session_state['K'] = rcs[selected_channel_name]['K']
86
+ if 'b' not in st.session_state:
87
+ st.session_state['b'] = rcs[selected_channel_name]['b']
88
+ if 'a' not in st.session_state:
89
+ st.session_state['a'] = rcs[selected_channel_name]['a']
90
+ if 'x0' not in st.session_state:
91
+ st.session_state['x0'] = rcs[selected_channel_name]['x0']
92
+
93
+ x = st.session_state['actual_input_df'][selected_channel_name].values
94
+ y = st.session_state['actual_contribution_df'][selected_channel_name].values
95
+
96
+ power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
97
+
98
+ # fig = px.scatter(x, s_curve(x/10**power,
99
+ # st.session_state['K'],
100
+ # st.session_state['b'],
101
+ # st.session_state['a'],
102
+ # st.session_state['x0']))
103
+
104
+ fig = px.scatter(x=x, y=y)
105
+ fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
106
+ st.session_state['b'],
107
+ st.session_state['a'],
108
+ st.session_state['x0']),
109
+ line=dict(color='red')))
110
+
111
+ fig.update_layout(title_text="Response Curve",showlegend=False)
112
+ fig.update_annotations(font_size=10)
113
+ fig.update_xaxes(title='Spends')
114
+ fig.update_yaxes(title='Revenue')
115
+
116
+ st.plotly_chart(fig,use_container_width=True)
117
+
118
+ r2 = r2_score(y, s_curve(x / 10**power,
119
+ st.session_state['K'],
120
+ st.session_state['b'],
121
+ st.session_state['a'],
122
+ st.session_state['x0']))
123
+
124
+ st.metric('R2',round(r2,2))
125
+ columns = st.columns(4)
126
+
127
+ with columns[0]:
128
+ st.number_input('K',key='K',format="%0.5f")
129
+ with columns[1]:
130
+ st.number_input('b',key='b',format="%0.5f")
131
+ with columns[2]:
132
+ st.number_input('a',key='a',step=0.0001,format="%0.5f")
133
+ with columns[3]:
134
+ st.number_input('x0',key='x0',format="%0.5f")
135
+
136
+
137
+ st.button('Update parameters',on_click=update_response_curve)
138
+ st.button('Reset parameters',on_click=reset_curve_parameters)
139
+ scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
140
+ st.button('Save', on_click=lambda : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
141
+
142
+ file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
143
+ st.download_button(
144
+ label="Download response curves",
145
+ data=json.dumps(rcs),
146
+ file_name=f"{file_name}.json",
147
+ mime="application/json",
148
+ disabled= len(file_name) == 0,
149
+ )
150
+
151
+
152
+ def s_curve_derivative(x, K, b, a, x0):
153
+ # Derivative of the S-curve function
154
+ return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
155
+
156
+ # Parameters of the S-curve
157
+ K = st.session_state['K']
158
+ b = st.session_state['b']
159
+ a = st.session_state['a']
160
+ x0 = st.session_state['x0']
161
+
162
+ # Optimized spend value obtained from the tool
163
+ optimized_spend = st.number_input('value of x') # Replace this with your optimized spend value
164
+
165
+ # Calculate the slope at the optimized spend value
166
+ slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
167
+
168
+ st.write("Slope ", slope_at_optimized_spend)
pages/8_Scenario_Planner.py CHANGED
@@ -23,38 +23,43 @@ import re
23
  import pandas as pd
24
  import plotly.express as px
25
 
26
-
27
  st.set_page_config(layout="wide")
28
  load_local_css("styles.css")
29
  set_header()
30
 
31
  for k, v in st.session_state.items():
32
- if k not in ["logout", "login", "config"] and not k.startswith("FormSubmitter"):
 
 
33
  st.session_state[k] = v
34
  # ======================================================== #
35
  # ======================= Functions ====================== #
36
  # ======================================================== #
37
 
38
 
39
- def optimize(key, status_placeholder):
40
  """
41
  Optimize the spends for the sales
42
  """
43
 
44
  channel_list = [
45
- key for key, value in st.session_state["optimization_channels"].items() if value
 
 
46
  ]
47
-
 
 
48
  if len(channel_list) > 0:
49
  scenario = st.session_state["scenario"]
50
- if key.lower() == "media spends":
51
  with status_placeholder:
52
  with st.spinner("Optimizing"):
53
  result = st.session_state["scenario"].optimize(
54
  st.session_state["total_spends_change"], channel_list
55
  )
56
- # elif key.lower() == "revenue":
57
- else:
58
  with status_placeholder:
59
  with st.spinner("Optimizing"):
60
 
@@ -64,11 +69,14 @@ def optimize(key, status_placeholder):
64
  for channel_name, modified_spends in result:
65
 
66
  st.session_state[channel_name] = numerize(
67
- modified_spends * scenario.channels[channel_name].conversion_rate,
 
68
  1,
69
  )
70
  prev_spends = (
71
- st.session_state["scenario"].channels[channel_name].actual_total_spends
 
 
72
  )
73
  st.session_state[f"{channel_name}_change"] = round(
74
  100 * (modified_spends - prev_spends) / prev_spends, 2
@@ -97,46 +105,15 @@ def save_scenario(scenario_name):
97
  pickle.dump(st.session_state["saved_scenarios"], f)
98
 
99
 
100
- if "allow_spends_update" not in st.session_state:
101
- st.session_state["allow_spends_update"] = True
102
-
103
- if "allow_sales_update" not in st.session_state:
104
- st.session_state["allow_sales_update"] = True
105
-
106
-
107
- def update_sales_abs_slider():
108
- actual_sales = _scenario.actual_total_sales
109
- if validate_input(st.session_state["total_sales_change_abs_slider"]):
110
- modified_sales = extract_number_for_string(
111
- st.session_state["total_sales_change_abs_slider"]
112
- )
113
- st.session_state["total_sales_change"] = round(
114
- ((modified_sales / actual_sales) - 1) * 100
115
- )
116
- st.session_state["total_sales_change_abs"] = numerize(modified_sales, 1)
117
-
118
-
119
  def update_sales_abs():
120
- if (
121
- st.session_state["total_sales_change_abs"]
122
- in st.session_state["total_sales_change_abs_slider_options"]
123
- ):
124
- st.session_state["allow_sales_update"] = True
125
- else:
126
- st.session_state["allow_sales_update"] = False
127
-
128
  actual_sales = _scenario.actual_total_sales
129
- if (
130
- validate_input(st.session_state["total_sales_change_abs"])
131
- and st.session_state["allow_sales_update"]
132
- ):
133
  modified_sales = extract_number_for_string(
134
  st.session_state["total_sales_change_abs"]
135
  )
136
  st.session_state["total_sales_change"] = round(
137
  ((modified_sales / actual_sales) - 1) * 100
138
  )
139
- st.session_state["total_sales_change_abs_slider"] = numerize(modified_sales, 1)
140
 
141
 
142
  def update_sales():
@@ -145,95 +122,32 @@ def update_sales():
145
  * _scenario.actual_total_sales,
146
  1,
147
  )
148
- st.session_state["total_sales_change_abs_slider"] = numerize(
149
- (1 + st.session_state["total_sales_change"] / 100)
150
- * _scenario.actual_total_sales,
151
- 1,
152
- )
153
-
154
-
155
- def update_all_spends_abs_slider():
156
- actual_spends = _scenario.actual_total_spends
157
- if validate_input(st.session_state["total_spends_change_abs_slider"]):
158
- modified_spends = extract_number_for_string(
159
- st.session_state["total_spends_change_abs_slider"]
160
- )
161
- st.session_state["total_spends_change"] = round(
162
- ((modified_spends / actual_spends) - 1) * 100
163
- )
164
- st.session_state["total_spends_change_abs"] = numerize(modified_spends, 1)
165
-
166
- update_all_spends()
167
-
168
-
169
- # def update_all_spends_abs_slider():
170
- # actual_spends = _scenario.actual_total_spends
171
- # if validate_input(st.session_state["total_spends_change_abs_slider"]):
172
- # print("#" * 100)
173
- # print(st.session_state["total_spends_change_abs_slider"])
174
- # print("#" * 100)
175
-
176
- # modified_spends = extract_number_for_string(
177
- # st.session_state["total_spends_change_abs_slider"]
178
- # )
179
- # st.session_state["total_spends_change"] = (
180
- # (modified_spends / actual_spends) - 1
181
- # ) * 100
182
- # st.session_state["total_spends_change_abs"] = st.session_state[
183
- # "total_spends_change_abs_slider"
184
- # ]
185
-
186
- # update_all_spends()
187
 
188
 
189
  def update_all_spends_abs():
190
- if (
191
- st.session_state["total_spends_change_abs"]
192
- in st.session_state["total_spends_change_abs_slider_options"]
193
- ):
194
- st.session_state["allow_spends_update"] = True
195
- else:
196
- st.session_state["allow_spends_update"] = False
197
-
198
  actual_spends = _scenario.actual_total_spends
199
- if (
200
- validate_input(st.session_state["total_spends_change_abs"])
201
- and st.session_state["allow_spends_update"]
202
- ):
203
  modified_spends = extract_number_for_string(
204
  st.session_state["total_spends_change_abs"]
205
  )
 
 
 
206
  st.session_state["total_spends_change"] = (
207
  (modified_spends / actual_spends) - 1
208
  ) * 100
209
- st.session_state["total_spends_change_abs_slider"] = st.session_state[
210
- "total_spends_change_abs"
211
- ]
212
 
213
  update_all_spends()
214
 
215
 
216
- def update_spends():
217
- st.session_state["total_spends_change_abs"] = numerize(
218
- (1 + st.session_state["total_spends_change"] / 100)
219
- * _scenario.actual_total_spends,
220
- 1,
221
- )
222
- st.session_state["total_spends_change_abs_slider"] = numerize(
223
- (1 + st.session_state["total_spends_change"] / 100)
224
- * _scenario.actual_total_spends,
225
- 1,
226
- )
227
-
228
- update_all_spends()
229
-
230
-
231
  def update_all_spends():
232
  """
233
  Updates spends for all the channels with the given overall spends change
234
  """
235
  percent_change = st.session_state["total_spends_change"]
236
-
 
 
237
  for channel_name in st.session_state["channels_list"]:
238
  channel = st.session_state["scenario"].channels[channel_name]
239
  current_spends = channel.actual_total_spends
@@ -285,10 +199,16 @@ def update_data(channel_name):
285
  """
286
 
287
  if validate_input(st.session_state[channel_name]):
288
- modified_spends = extract_number_for_string(st.session_state[channel_name])
 
 
289
  prev_spends = (
290
- st.session_state["scenario"].channels[channel_name].actual_total_spends
291
- * st.session_state["scenario"].channels[channel_name].conversion_rate
 
 
 
 
292
  )
293
  st.session_state[f"{channel_name}_change"] = round(
294
  100 * (modified_spends - prev_spends) / prev_spends, 2
@@ -296,7 +216,9 @@ def update_data(channel_name):
296
  st.session_state["scenario"].update(
297
  channel_name,
298
  modified_spends
299
- / st.session_state["scenario"].channels[channel_name].conversion_rate,
 
 
300
  )
301
  # st.session_state['scenario'].update(channel_name, modified_spends)
302
  # else:
@@ -327,55 +249,31 @@ def select_all_channels_for_optimization():
327
  st.session_state[f"{channel_name}_selected"] = st.session_state[
328
  "optimze_all_channels"
329
  ]
330
- st.session_state["optimization_channels"][channel_name] = st.session_state[
331
- "optimze_all_channels"
332
- ]
333
 
334
 
335
  def update_penalty():
336
  """
337
  Updates the penalty flag for sales calculation
338
  """
339
- st.session_state["scenario"].update_penalty(st.session_state["apply_penalty"])
 
 
340
 
341
 
342
- def reset_scenario(panel_selected, file_selected, updated_rcs):
343
  # #print(st.session_state['default_scenario_dict'])
344
  # st.session_state['scenario'] = class_from_dict(st.session_state['default_scenario_dict'])
345
  # for channel in st.session_state['scenario'].channels.values():
346
  # st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
347
- # initialize_data()
348
-
349
- if panel_selected == "Aggregated":
350
- initialize_data(
351
- panel=panel_selected,
352
- target_file=file_selected,
353
- updated_rcs=updated_rcs,
354
- metrics=metrics_selected,
355
- )
356
- panel = None
357
- else:
358
- initialize_data(
359
- panel=panel_selected,
360
- target_file=file_selected,
361
- updated_rcs=updated_rcs,
362
- metrics=metrics_selected,
363
- )
364
-
365
  for channel_name in st.session_state["channels_list"]:
366
  st.session_state[f"{channel_name}_selected"] = False
367
  st.session_state[f"{channel_name}_change"] = 0
368
  st.session_state["optimze_all_channels"] = False
369
 
370
- st.session_state["total_sales_change"] = 0
371
-
372
- update_spends()
373
- update_sales()
374
-
375
- reset_inputs()
376
-
377
- # st.rerun()
378
-
379
 
380
  def format_number(num):
381
  if num >= 1_000_000:
@@ -407,7 +305,9 @@ def summary_plot(data, x, y, title, text_column):
407
  hovertemplate="%{x:.2s}",
408
  )
409
 
410
- fig.update_layout(xaxis_title=x, yaxis_title="Channel Name", showlegend=False)
 
 
411
  return fig
412
 
413
 
@@ -442,21 +342,27 @@ def calculate_rgba(
442
  relative_position = (current_channel_spends - start_value) / (
443
  left_value - start_value
444
  )
445
- alpha = 0.8 - (0.6 * relative_position) # Alpha decreases from start to end
 
 
446
 
447
  elif left_value < current_channel_spends <= right_value:
448
  color = "green"
449
  relative_position = (current_channel_spends - left_value) / (
450
  right_value - left_value
451
  )
452
- alpha = 0.8 - (0.6 * relative_position) # Alpha decreases from start to end
 
 
453
 
454
  elif right_value < current_channel_spends <= end_value:
455
  color = "red"
456
  relative_position = (current_channel_spends - right_value) / (
457
  end_value - right_value
458
  )
459
- alpha = 0.2 + (0.6 * relative_position) # Alpha increases from start to end
 
 
460
 
461
  else:
462
  # Default case, if the spends are outside the defined ranges
@@ -526,7 +432,9 @@ def plot_response_curves():
526
 
527
  for index in range(len(x_plot)):
528
  marginal_roi.append(
529
- a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
 
 
530
  )
531
 
532
  x = (
@@ -558,7 +466,9 @@ def plot_response_curves():
558
  st.session_state["scenario"].channels[col].modified_total_spends
559
  * st.session_state["scenario"].channels[col].conversion_rate
560
  )
561
- y_optimal = st.session_state["scenario"].channels[col].modified_total_sales
 
 
562
 
563
  # if col == "Paid_social_others":
564
  # debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
@@ -666,7 +576,7 @@ def plot_response_curves():
666
  fig.update_layout(
667
  # height=1000,
668
  # width=1000,
669
- title_text=f"Response Curves (X: Spends Vs Y: {target})",
670
  showlegend=False,
671
  shapes=shapes,
672
  )
@@ -808,144 +718,12 @@ authenticator = stauth.Authenticate(
808
  st.session_state["authenticator"] = authenticator
809
  name, authentication_status, username = authenticator.login("Login", "main")
810
  auth_status = st.session_state.get("authentication_status")
811
-
812
- import os
813
- import glob
814
-
815
-
816
- def get_excel_names(directory):
817
- # Create a list to hold the final parts of the filenames
818
- last_portions = []
819
-
820
- # Patterns to match Excel files (.xlsx and .xls) that contain @#
821
- patterns = [
822
- os.path.join(directory, "*@#*.xlsx"),
823
- os.path.join(directory, "*@#*.xls"),
824
- ]
825
-
826
- # Process each pattern
827
- for pattern in patterns:
828
- files = glob.glob(pattern)
829
-
830
- # Extracting the last portion after @# for each file
831
- for file in files:
832
- base_name = os.path.basename(file)
833
- last_portion = base_name.split("@#")[-1]
834
- last_portion = last_portion.replace(".xlsx", "").replace(
835
- ".xls", ""
836
- ) # Removing extensions
837
- last_portions.append(last_portion)
838
-
839
- return last_portions
840
-
841
-
842
- def name_formating(channel_name):
843
- # Replace underscores with spaces
844
- name_mod = channel_name.replace("_", " ")
845
-
846
- # Capitalize the first letter of each word
847
- name_mod = name_mod.title()
848
-
849
- return name_mod
850
-
851
-
852
- @st.cache_resource(show_spinner=False)
853
- def panel_fetch(file_selected):
854
- raw_data_mmm_df = pd.read_excel(file_selected, sheet_name="RAW DATA MMM")
855
-
856
- if "Panel" in raw_data_mmm_df.columns:
857
- panel = list(set(raw_data_mmm_df["Panel"]))
858
- else:
859
- raw_data_mmm_df = None
860
- panel = None
861
-
862
- return panel
863
-
864
-
865
- def reset_inputs():
866
- if "total_spends_change_abs" in st.session_state:
867
- del st.session_state.total_spends_change_abs
868
- if "total_spends_change" in st.session_state:
869
- del st.session_state.total_spends_change
870
- if "total_spends_change_abs_slider" in st.session_state:
871
- del st.session_state.total_spends_change_abs_slider
872
-
873
- if "total_sales_change_abs" in st.session_state:
874
- del st.session_state.total_sales_change_abs
875
- if "total_sales_change" in st.session_state:
876
- del st.session_state.total_sales_change
877
- if "total_sales_change_abs_slider" in st.session_state:
878
- del st.session_state.total_sales_change_abs_slider
879
-
880
- st.session_state["initialized"] = False
881
-
882
-
883
  if auth_status == True:
884
  authenticator.logout("Logout", "main")
885
-
886
- st.header("Simulation")
887
- col1, col2 = st.columns([1, 1])
888
-
889
- # Response Metrics
890
- directory = "metrics_level_data"
891
- metrics_list = get_excel_names(directory)
892
- metrics_selected = col1.selectbox(
893
- "Response Metrics",
894
- metrics_list,
895
- format_func=name_formating,
896
- index=0,
897
- on_change=reset_inputs,
898
- )
899
-
900
- # Target
901
- target = name_formating(metrics_selected)
902
-
903
- file_selected = (
904
- f".\metrics_level_data\Overview_data_test_panel@#{metrics_selected}.xlsx"
905
- )
906
-
907
- # Panel List
908
- panel_list = panel_fetch(file_selected)
909
-
910
- # Panel Selected
911
- panel_selected = col2.selectbox(
912
- "Panel",
913
- ["Aggregated"] + panel_list,
914
- index=0,
915
- on_change=reset_inputs,
916
- )
917
-
918
- if "update_rcs" in st.session_state:
919
- updated_rcs = st.session_state["update_rcs"]
920
- else:
921
- updated_rcs = None
922
-
923
- if "first_time" not in st.session_state:
924
- st.session_state["first_time"] = True
925
-
926
- # Check if state is initiaized
927
  is_state_initiaized = st.session_state.get("initialized", False)
928
- if not is_state_initiaized or st.session_state["first_time"]:
929
- # initialize_data()
930
- if panel_selected == "Aggregated":
931
- initialize_data(
932
- panel=panel_selected,
933
- target_file=file_selected,
934
- updated_rcs=updated_rcs,
935
- metrics=metrics_selected,
936
- )
937
- panel = None
938
- else:
939
- initialize_data(
940
- panel=panel_selected,
941
- target_file=file_selected,
942
- updated_rcs=updated_rcs,
943
- metrics=metrics_selected,
944
- )
945
- st.session_state["initialized"] = True
946
- st.session_state["first_time"] = False
947
 
948
- # Channels List
949
  channels_list = st.session_state["channels_list"]
950
 
951
  # ======================================================== #
@@ -953,16 +731,12 @@ if auth_status == True:
953
  # ======================================================== #
954
 
955
  # print(list(st.session_state.keys()))
 
 
956
  main_header = st.columns((2, 2))
957
  sub_header = st.columns((1, 1, 1, 1))
958
  _scenario = st.session_state["scenario"]
959
 
960
- if "total_spends_change" not in st.session_state:
961
- st.session_state.total_spends_change = 0
962
-
963
- if "total_sales_change" not in st.session_state:
964
- st.session_state.total_sales_change = 0
965
-
966
  if "total_spends_change_abs" not in st.session_state:
967
  st.session_state["total_spends_change_abs"] = numerize(
968
  _scenario.actual_total_spends, 1
@@ -973,16 +747,6 @@ if auth_status == True:
973
  _scenario.actual_total_sales, 1
974
  )
975
 
976
- if "total_spends_change_abs_slider" not in st.session_state:
977
- st.session_state.total_spends_change_abs_slider = numerize(
978
- _scenario.actual_total_spends, 1
979
- )
980
-
981
- if "total_sales_change_abs_slider" not in st.session_state:
982
- st.session_state.total_sales_change_abs_slider = numerize(
983
- _scenario.actual_total_sales, 1
984
- )
985
-
986
  with main_header[0]:
987
  st.subheader("Actual")
988
 
@@ -990,7 +754,9 @@ if auth_status == True:
990
  st.subheader("Simulated")
991
 
992
  with sub_header[0]:
993
- st.metric(label="Spends", value=format_numbers(_scenario.actual_total_spends))
 
 
994
 
995
  with sub_header[1]:
996
  st.metric(
@@ -1016,52 +782,36 @@ if auth_status == True:
1016
  delta=numerize(_scenario.delta_sales, 1),
1017
  )
1018
 
1019
- with st.expander("Channel Spends Simulator", expanded=True):
1020
  _columns1 = st.columns((2, 2, 1, 1))
1021
  with _columns1[0]:
 
1022
  optimization_selection = st.selectbox(
1023
- "Optimize", options=["Media Spends", target], key="optimization_key"
1024
  )
1025
-
1026
  with _columns1[1]:
1027
  st.markdown("#")
1028
- # if st.checkbox(
1029
- # label="Optimize all Channels",
1030
- # key="optimze_all_channels",
1031
- # value=False,
1032
- # # on_change=select_all_channels_for_optimization,
1033
- # ):
1034
- # select_all_channels_for_optimization()
1035
-
1036
  st.checkbox(
1037
  label="Optimize all Channels",
1038
- key="optimze_all_channels",
1039
  value=False,
1040
  on_change=select_all_channels_for_optimization,
1041
  )
1042
 
1043
  with _columns1[2]:
1044
  st.markdown("#")
1045
- # st.button(
1046
- # "Optimize",
1047
- # on_click=optimize,
1048
- # args=(st.session_state["optimization_key"]),
1049
- # use_container_width=True,
1050
- # )
1051
-
1052
- optimize_placeholder = st.empty()
1053
 
1054
  with _columns1[3]:
1055
  st.markdown("#")
1056
- st.button(
1057
- "Reset",
1058
- on_click=reset_scenario,
1059
- args=(panel_selected, file_selected, updated_rcs),
1060
- use_container_width=True,
1061
- )
1062
 
1063
  _columns2 = st.columns((2, 2, 2))
1064
- if st.session_state["optimization_key"] == "Media Spends":
1065
  with _columns2[0]:
1066
  spend_input = st.text_input(
1067
  "Absolute",
@@ -1069,90 +819,37 @@ if auth_status == True:
1069
  # label_visibility="collapsed",
1070
  on_change=update_all_spends_abs,
1071
  )
1072
-
1073
  with _columns2[1]:
 
1074
  st.number_input(
1075
- "Percent Change",
1076
- key="total_spends_change",
1077
- min_value=-50,
1078
- max_value=50,
1079
  step=1,
1080
- on_change=update_spends,
1081
  )
1082
-
1083
- with _columns2[2]:
1084
- min_value = round(_scenario.actual_total_spends * 0.5)
1085
- max_value = round(_scenario.actual_total_spends * 1.5)
1086
- st.session_state["total_spends_change_abs_slider_options"] = [
1087
- numerize(value, 1)
1088
- for value in range(min_value, max_value + 1, int(1e4))
1089
- ]
1090
-
1091
- st.select_slider(
1092
- "Absolute Slider",
1093
- options=st.session_state["total_spends_change_abs_slider_options"],
1094
- key="total_spends_change_abs_slider",
1095
- on_change=update_all_spends_abs_slider,
1096
- )
1097
-
1098
- elif st.session_state["optimization_key"] == target:
1099
  with _columns2[0]:
 
1100
  sales_input = st.text_input(
1101
  "Absolute",
1102
  key="total_sales_change_abs",
1103
  on_change=update_sales_abs,
1104
  )
1105
-
1106
  with _columns2[1]:
1107
  st.number_input(
1108
- "Percent Change",
1109
- key="total_sales_change",
1110
- min_value=-50,
1111
- max_value=50,
1112
  step=1,
1113
  on_change=update_sales,
1114
  )
1115
- with _columns2[2]:
1116
- min_value = round(_scenario.actual_total_sales * 0.5)
1117
- max_value = round(_scenario.actual_total_sales * 1.5)
1118
- st.session_state["total_sales_change_abs_slider_options"] = [
1119
- numerize(value, 1)
1120
- for value in range(min_value, max_value + 1, int(1e5))
1121
- ]
1122
-
1123
- st.select_slider(
1124
- "Absolute Slider",
1125
- options=st.session_state["total_sales_change_abs_slider_options"],
1126
- key="total_sales_change_abs_slider",
1127
- on_change=update_sales_abs_slider,
1128
- )
1129
 
1130
- if (
1131
- not st.session_state["allow_sales_update"]
1132
- and optimization_selection == target
1133
- ):
1134
- st.warning("Invalid Input")
1135
-
1136
- if (
1137
- not st.session_state["allow_spends_update"]
1138
- and optimization_selection == "Media Spends"
1139
- ):
1140
- st.warning("Invalid Input")
1141
-
1142
- status_placeholder = st.empty()
1143
-
1144
- # if optimize_placeholder.button("Optimize", use_container_width=True):
1145
- # optimize(st.session_state["optimization_key"], status_placeholder)
1146
- # st.rerun()
1147
-
1148
- optimize_placeholder.button(
1149
- "Optimize",
1150
- on_click=optimize,
1151
- args=(st.session_state["optimization_key"], status_placeholder),
1152
- use_container_width=True,
1153
- )
1154
 
1155
- st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
 
 
1156
  _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
1157
  with _columns[0]:
1158
  generate_spending_header("Channel")
@@ -1165,7 +862,9 @@ if auth_status == True:
1165
  with _columns[4]:
1166
  generate_spending_header("Optimize")
1167
 
1168
- st.markdown("""<hr class="spends-heading-seperator">""", unsafe_allow_html=True)
 
 
1169
 
1170
  if "acutual_predicted" not in st.session_state:
1171
  st.session_state["acutual_predicted"] = {
@@ -1175,7 +874,9 @@ if auth_status == True:
1175
  "Delta": [],
1176
  }
1177
  for i, channel_name in enumerate(channels_list):
1178
- _channel_class = st.session_state["scenario"].channels[channel_name]
 
 
1179
  _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
1180
  with _columns[0]:
1181
  st.write(channel_name_formating(channel_name))
@@ -1184,8 +885,12 @@ if auth_status == True:
1184
  with _columns[1]:
1185
  channel_bounds = _channel_class.bounds
1186
  channel_spends = float(_channel_class.actual_total_spends)
1187
- min_value = float((1 + channel_bounds[0] / 100) * channel_spends)
1188
- max_value = float((1 + channel_bounds[1] / 100) * channel_spends)
 
 
 
 
1189
  ##print(st.session_state[channel_name])
1190
  spend_input = st.text_input(
1191
  channel_name,
@@ -1196,11 +901,9 @@ if auth_status == True:
1196
  if not validate_input(spend_input):
1197
  st.error("Invalid input")
1198
 
1199
- channel_name_current = f"{channel_name}_change"
1200
-
1201
  st.number_input(
1202
- "Percent Change",
1203
- key=channel_name_current,
1204
  step=1,
1205
  on_change=partial(update_data_by_percent, channel_name),
1206
  )
@@ -1212,10 +915,12 @@ if auth_status == True:
1212
  * _channel_class.conversion_rate
1213
  )
1214
  actual_channel_spends = float(
1215
- _channel_class.actual_total_spends * _channel_class.conversion_rate
 
1216
  )
1217
  spends_delta = float(
1218
- _channel_class.delta_spends * _channel_class.conversion_rate
 
1219
  )
1220
  st.session_state["acutual_predicted"]["Channel_name"].append(
1221
  channel_name
@@ -1223,10 +928,12 @@ if auth_status == True:
1223
  st.session_state["acutual_predicted"]["Actual_spend"].append(
1224
  actual_channel_spends
1225
  )
1226
- st.session_state["acutual_predicted"]["Optimized_spend"].append(
1227
- current_channel_spends
 
 
 
1228
  )
1229
- st.session_state["acutual_predicted"]["Delta"].append(spends_delta)
1230
  ## REMOVE
1231
  st.metric(
1232
  "Spends",
@@ -1237,32 +944,29 @@ if auth_status == True:
1237
 
1238
  with _columns[3]:
1239
  # sales
1240
- current_channel_sales = float(_channel_class.modified_total_sales)
 
 
1241
  actual_channel_sales = float(_channel_class.actual_total_sales)
1242
  sales_delta = float(_channel_class.delta_sales)
1243
  st.metric(
1244
  target,
1245
- format_numbers(current_channel_sales, include_indicator=False),
 
 
1246
  delta=numerize(sales_delta, 1),
1247
  label_visibility="collapsed",
1248
  )
1249
 
1250
  with _columns[4]:
1251
 
1252
- # if st.checkbox(
1253
- # label="select for optimization",
1254
- # key=f"{channel_name}_selected",
1255
- # value=False,
1256
- # # on_change=partial(select_channel_for_optimization, channel_name),
1257
- # label_visibility="collapsed",
1258
- # ):
1259
- # select_channel_for_optimization(channel_name)
1260
-
1261
  st.checkbox(
1262
  label="select for optimization",
1263
  key=f"{channel_name}_selected",
1264
  value=False,
1265
- on_change=partial(select_channel_for_optimization, channel_name),
 
 
1266
  label_visibility="collapsed",
1267
  )
1268
 
@@ -1274,29 +978,20 @@ if auth_status == True:
1274
  # Bins
1275
  col = channels_list[i]
1276
  x_actual = st.session_state["scenario"].channels[col].actual_spends
1277
- x_modified = st.session_state["scenario"].channels[col].modified_spends
 
 
1278
 
1279
  x_total = x_modified.sum()
1280
  power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
1281
 
1282
- updated_rcs_key = f"{metrics_selected}#@{panel_selected}#@{channel_name}"
1283
-
1284
- if updated_rcs and updated_rcs_key in list(updated_rcs.keys()):
1285
- K = updated_rcs[updated_rcs_key]["K"]
1286
- b = updated_rcs[updated_rcs_key]["b"]
1287
- a = updated_rcs[updated_rcs_key]["a"]
1288
- x0 = updated_rcs[updated_rcs_key]["x0"]
1289
- else:
1290
- K = st.session_state["rcs"][col]["K"]
1291
- b = st.session_state["rcs"][col]["b"]
1292
- a = st.session_state["rcs"][col]["a"]
1293
- x0 = st.session_state["rcs"][col]["x0"]
1294
 
1295
  x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
1296
 
1297
- # Append current_channel_spends to the end of x_plot
1298
- x_plot = np.append(x_plot, current_channel_spends)
1299
-
1300
  x, y, marginal_roi = [], [], []
1301
  for x_p in x_plot:
1302
  x.append(x_p * x_actual / x_actual.sum())
@@ -1306,7 +1001,9 @@ if auth_status == True:
1306
 
1307
  for index in range(len(x_plot)):
1308
  marginal_roi.append(
1309
- a * y[index] * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
 
 
1310
  )
1311
 
1312
  x = (
@@ -1321,18 +1018,12 @@ if auth_status == True:
1321
 
1322
  roi = y / np.maximum(x, np.finfo(float).eps)
1323
 
1324
- roi_current, marginal_roi_current = roi[-1], marginal_roi[-1]
1325
- x, y, roi, marginal_roi = (
1326
- x[:-1],
1327
- y[:-1],
1328
- roi[:-1],
1329
- marginal_roi[:-1],
1330
- ) # Drop data for current spends
1331
-
1332
- start_value, end_value, left_value, right_value = find_segment_value(
1333
- x,
1334
- roi,
1335
- marginal_roi,
1336
  )
1337
 
1338
  rgba = calculate_rgba(
@@ -1343,6 +1034,16 @@ if auth_status == True:
1343
  current_channel_spends,
1344
  )
1345
 
 
 
 
 
 
 
 
 
 
 
1346
  with bin_placeholder:
1347
  st.markdown(
1348
  f"""
@@ -1360,7 +1061,7 @@ if auth_status == True:
1360
  unsafe_allow_html=True,
1361
  )
1362
 
1363
- with st.expander("See Response Curves", expanded=True):
1364
  fig = plot_response_curves()
1365
  st.plotly_chart(fig, use_container_width=True)
1366
 
@@ -1380,11 +1081,19 @@ if auth_status == True:
1380
  )
1381
 
1382
  summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
1383
- summary_df.drop_duplicates(subset="Channel_name", keep="last", inplace=True)
 
 
1384
 
1385
  summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
1386
  summary_df_sorted["Delta_percent"] = np.round(
1387
- ((summary_df_sorted["Optimized_spend"] / summary_df_sorted["Actual_spend"]) - 1)
 
 
 
 
 
 
1388
  * 100,
1389
  2,
1390
  )
@@ -1412,9 +1121,9 @@ if auth_status != True:
1412
  authenticator.forgot_password("Forgot password")
1413
  )
1414
  if username_forgot_pw:
1415
- st.session_state["config"]["credentials"]["usernames"][username_forgot_pw][
1416
- "password"
1417
- ] = stauth.Hasher([random_password]).generate()[0]
1418
  send_email(email_forgot_password, random_password)
1419
  st.success("New password sent securely")
1420
  # Random password to be transferred to user securely
 
23
  import pandas as pd
24
  import plotly.express as px
25
 
26
+ target = "Revenue"
27
  st.set_page_config(layout="wide")
28
  load_local_css("styles.css")
29
  set_header()
30
 
31
  for k, v in st.session_state.items():
32
+ if k not in ["logout", "login", "config"] and not k.startswith(
33
+ "FormSubmitter"
34
+ ):
35
  st.session_state[k] = v
36
  # ======================================================== #
37
  # ======================= Functions ====================== #
38
  # ======================================================== #
39
 
40
 
41
+ def optimize(key):
42
  """
43
  Optimize the spends for the sales
44
  """
45
 
46
  channel_list = [
47
+ key
48
+ for key, value in st.session_state["optimization_channels"].items()
49
+ if value
50
  ]
51
+ # print('channel_list')
52
+ # print(channel_list)
53
+ # print('@@@@@@@@')
54
  if len(channel_list) > 0:
55
  scenario = st.session_state["scenario"]
56
+ if key.lower() == "spends":
57
  with status_placeholder:
58
  with st.spinner("Optimizing"):
59
  result = st.session_state["scenario"].optimize(
60
  st.session_state["total_spends_change"], channel_list
61
  )
62
+ elif key.lower() == "sales":
 
63
  with status_placeholder:
64
  with st.spinner("Optimizing"):
65
 
 
69
  for channel_name, modified_spends in result:
70
 
71
  st.session_state[channel_name] = numerize(
72
+ modified_spends
73
+ * scenario.channels[channel_name].conversion_rate,
74
  1,
75
  )
76
  prev_spends = (
77
+ st.session_state["scenario"]
78
+ .channels[channel_name]
79
+ .actual_total_spends
80
  )
81
  st.session_state[f"{channel_name}_change"] = round(
82
  100 * (modified_spends - prev_spends) / prev_spends, 2
 
105
  pickle.dump(st.session_state["saved_scenarios"], f)
106
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def update_sales_abs():
 
 
 
 
 
 
 
 
109
  actual_sales = _scenario.actual_total_sales
110
+ if validate_input(st.session_state["total_sales_change_abs"]):
 
 
 
111
  modified_sales = extract_number_for_string(
112
  st.session_state["total_sales_change_abs"]
113
  )
114
  st.session_state["total_sales_change"] = round(
115
  ((modified_sales / actual_sales) - 1) * 100
116
  )
 
117
 
118
 
119
  def update_sales():
 
122
  * _scenario.actual_total_sales,
123
  1,
124
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  def update_all_spends_abs():
 
 
 
 
 
 
 
 
128
  actual_spends = _scenario.actual_total_spends
129
+ if validate_input(st.session_state["total_spends_change_abs"]):
 
 
 
130
  modified_spends = extract_number_for_string(
131
  st.session_state["total_spends_change_abs"]
132
  )
133
+ print(modified_spends)
134
+ print(actual_spends)
135
+
136
  st.session_state["total_spends_change"] = (
137
  (modified_spends / actual_spends) - 1
138
  ) * 100
 
 
 
139
 
140
  update_all_spends()
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  def update_all_spends():
144
  """
145
  Updates spends for all the channels with the given overall spends change
146
  """
147
  percent_change = st.session_state["total_spends_change"]
148
+ st.session_state["total_spends_change_abs"] = numerize(
149
+ (1 + percent_change / 100) * _scenario.actual_total_spends, 1
150
+ )
151
  for channel_name in st.session_state["channels_list"]:
152
  channel = st.session_state["scenario"].channels[channel_name]
153
  current_spends = channel.actual_total_spends
 
199
  """
200
 
201
  if validate_input(st.session_state[channel_name]):
202
+ modified_spends = extract_number_for_string(
203
+ st.session_state[channel_name]
204
+ )
205
  prev_spends = (
206
+ st.session_state["scenario"]
207
+ .channels[channel_name]
208
+ .actual_total_spends
209
+ * st.session_state["scenario"]
210
+ .channels[channel_name]
211
+ .conversion_rate
212
  )
213
  st.session_state[f"{channel_name}_change"] = round(
214
  100 * (modified_spends - prev_spends) / prev_spends, 2
 
216
  st.session_state["scenario"].update(
217
  channel_name,
218
  modified_spends
219
+ / st.session_state["scenario"]
220
+ .channels[channel_name]
221
+ .conversion_rate,
222
  )
223
  # st.session_state['scenario'].update(channel_name, modified_spends)
224
  # else:
 
249
  st.session_state[f"{channel_name}_selected"] = st.session_state[
250
  "optimze_all_channels"
251
  ]
252
+ st.session_state["optimization_channels"][channel_name] = (
253
+ st.session_state["optimze_all_channels"]
254
+ )
255
 
256
 
257
  def update_penalty():
258
  """
259
  Updates the penalty flag for sales calculation
260
  """
261
+ st.session_state["scenario"].update_penalty(
262
+ st.session_state["apply_penalty"]
263
+ )
264
 
265
 
266
+ def reset_scenario():
267
  # #print(st.session_state['default_scenario_dict'])
268
  # st.session_state['scenario'] = class_from_dict(st.session_state['default_scenario_dict'])
269
  # for channel in st.session_state['scenario'].channels.values():
270
  # st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
271
+ initialize_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  for channel_name in st.session_state["channels_list"]:
273
  st.session_state[f"{channel_name}_selected"] = False
274
  st.session_state[f"{channel_name}_change"] = 0
275
  st.session_state["optimze_all_channels"] = False
276
 
 
 
 
 
 
 
 
 
 
277
 
278
  def format_number(num):
279
  if num >= 1_000_000:
 
305
  hovertemplate="%{x:.2s}",
306
  )
307
 
308
+ fig.update_layout(
309
+ xaxis_title=x, yaxis_title="Channel Name", showlegend=False
310
+ )
311
  return fig
312
 
313
 
 
342
  relative_position = (current_channel_spends - start_value) / (
343
  left_value - start_value
344
  )
345
+ alpha = 0.8 - (
346
+ 0.6 * relative_position
347
+ ) # Alpha decreases from start to end
348
 
349
  elif left_value < current_channel_spends <= right_value:
350
  color = "green"
351
  relative_position = (current_channel_spends - left_value) / (
352
  right_value - left_value
353
  )
354
+ alpha = 0.8 - (
355
+ 0.6 * relative_position
356
+ ) # Alpha decreases from start to end
357
 
358
  elif right_value < current_channel_spends <= end_value:
359
  color = "red"
360
  relative_position = (current_channel_spends - right_value) / (
361
  end_value - right_value
362
  )
363
+ alpha = 0.2 + (
364
+ 0.6 * relative_position
365
+ ) # Alpha increases from start to end
366
 
367
  else:
368
  # Default case, if the spends are outside the defined ranges
 
432
 
433
  for index in range(len(x_plot)):
434
  marginal_roi.append(
435
+ a
436
+ * y[index]
437
+ * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
438
  )
439
 
440
  x = (
 
466
  st.session_state["scenario"].channels[col].modified_total_spends
467
  * st.session_state["scenario"].channels[col].conversion_rate
468
  )
469
+ y_optimal = (
470
+ st.session_state["scenario"].channels[col].modified_total_sales
471
+ )
472
 
473
  # if col == "Paid_social_others":
474
  # debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
 
576
  fig.update_layout(
577
  # height=1000,
578
  # width=1000,
579
+ title_text="Response Curves (X: Spends Vs Y: Revenue)",
580
  showlegend=False,
581
  shapes=shapes,
582
  )
 
718
  st.session_state["authenticator"] = authenticator
719
  name, authentication_status, username = authenticator.login("Login", "main")
720
  auth_status = st.session_state.get("authentication_status")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
  if auth_status == True:
722
  authenticator.logout("Logout", "main")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
  is_state_initiaized = st.session_state.get("initialized", False)
724
+ if not is_state_initiaized:
725
+ initialize_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
 
 
727
  channels_list = st.session_state["channels_list"]
728
 
729
  # ======================================================== #
 
731
  # ======================================================== #
732
 
733
  # print(list(st.session_state.keys()))
734
+
735
+ st.header("Simulation")
736
  main_header = st.columns((2, 2))
737
  sub_header = st.columns((1, 1, 1, 1))
738
  _scenario = st.session_state["scenario"]
739
 
 
 
 
 
 
 
740
  if "total_spends_change_abs" not in st.session_state:
741
  st.session_state["total_spends_change_abs"] = numerize(
742
  _scenario.actual_total_spends, 1
 
747
  _scenario.actual_total_sales, 1
748
  )
749
 
 
 
 
 
 
 
 
 
 
 
750
  with main_header[0]:
751
  st.subheader("Actual")
752
 
 
754
  st.subheader("Simulated")
755
 
756
  with sub_header[0]:
757
+ st.metric(
758
+ label="Spends", value=format_numbers(_scenario.actual_total_spends)
759
+ )
760
 
761
  with sub_header[1]:
762
  st.metric(
 
782
  delta=numerize(_scenario.delta_sales, 1),
783
  )
784
 
785
+ with st.expander("Channel Spends Simulator"):
786
  _columns1 = st.columns((2, 2, 1, 1))
787
  with _columns1[0]:
788
+
789
  optimization_selection = st.selectbox(
790
+ "Optimize", options=["Spends", "Sales"], key="optimization_key"
791
  )
 
792
  with _columns1[1]:
793
  st.markdown("#")
 
 
 
 
 
 
 
 
794
  st.checkbox(
795
  label="Optimize all Channels",
796
+ key=f"optimze_all_channels",
797
  value=False,
798
  on_change=select_all_channels_for_optimization,
799
  )
800
 
801
  with _columns1[2]:
802
  st.markdown("#")
803
+ st.button(
804
+ "Optimize",
805
+ on_click=optimize,
806
+ args=(st.session_state["optimization_key"],),
807
+ )
 
 
 
808
 
809
  with _columns1[3]:
810
  st.markdown("#")
811
+ st.button("Reset", on_click=reset_scenario)
 
 
 
 
 
812
 
813
  _columns2 = st.columns((2, 2, 2))
814
+ if st.session_state["optimization_key"] == "Spends":
815
  with _columns2[0]:
816
  spend_input = st.text_input(
817
  "Absolute",
 
819
  # label_visibility="collapsed",
820
  on_change=update_all_spends_abs,
821
  )
 
822
  with _columns2[1]:
823
+
824
  st.number_input(
825
+ "Percent",
826
+ key=f"total_spends_change",
 
 
827
  step=1,
828
+ on_change=update_all_spends,
829
  )
830
+ elif st.session_state["optimization_key"] == "Sales":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
831
  with _columns2[0]:
832
+
833
  sales_input = st.text_input(
834
  "Absolute",
835
  key="total_sales_change_abs",
836
  on_change=update_sales_abs,
837
  )
 
838
  with _columns2[1]:
839
  st.number_input(
840
+ "Percent change",
841
+ key=f"total_sales_change",
 
 
842
  step=1,
843
  on_change=update_sales,
844
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
 
846
+ with _columns2[2]:
847
+ st.markdown("#")
848
+ status_placeholder = st.empty()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
 
850
+ st.markdown(
851
+ """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
852
+ )
853
  _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
854
  with _columns[0]:
855
  generate_spending_header("Channel")
 
862
  with _columns[4]:
863
  generate_spending_header("Optimize")
864
 
865
+ st.markdown(
866
+ """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
867
+ )
868
 
869
  if "acutual_predicted" not in st.session_state:
870
  st.session_state["acutual_predicted"] = {
 
874
  "Delta": [],
875
  }
876
  for i, channel_name in enumerate(channels_list):
877
+ _channel_class = st.session_state["scenario"].channels[
878
+ channel_name
879
+ ]
880
  _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
881
  with _columns[0]:
882
  st.write(channel_name_formating(channel_name))
 
885
  with _columns[1]:
886
  channel_bounds = _channel_class.bounds
887
  channel_spends = float(_channel_class.actual_total_spends)
888
+ min_value = float(
889
+ (1 + channel_bounds[0] / 100) * channel_spends
890
+ )
891
+ max_value = float(
892
+ (1 + channel_bounds[1] / 100) * channel_spends
893
+ )
894
  ##print(st.session_state[channel_name])
895
  spend_input = st.text_input(
896
  channel_name,
 
901
  if not validate_input(spend_input):
902
  st.error("Invalid input")
903
 
 
 
904
  st.number_input(
905
+ "Percent change",
906
+ key=f"{channel_name}_change",
907
  step=1,
908
  on_change=partial(update_data_by_percent, channel_name),
909
  )
 
915
  * _channel_class.conversion_rate
916
  )
917
  actual_channel_spends = float(
918
+ _channel_class.actual_total_spends
919
+ * _channel_class.conversion_rate
920
  )
921
  spends_delta = float(
922
+ _channel_class.delta_spends
923
+ * _channel_class.conversion_rate
924
  )
925
  st.session_state["acutual_predicted"]["Channel_name"].append(
926
  channel_name
 
928
  st.session_state["acutual_predicted"]["Actual_spend"].append(
929
  actual_channel_spends
930
  )
931
+ st.session_state["acutual_predicted"][
932
+ "Optimized_spend"
933
+ ].append(current_channel_spends)
934
+ st.session_state["acutual_predicted"]["Delta"].append(
935
+ spends_delta
936
  )
 
937
  ## REMOVE
938
  st.metric(
939
  "Spends",
 
944
 
945
  with _columns[3]:
946
  # sales
947
+ current_channel_sales = float(
948
+ _channel_class.modified_total_sales
949
+ )
950
  actual_channel_sales = float(_channel_class.actual_total_sales)
951
  sales_delta = float(_channel_class.delta_sales)
952
  st.metric(
953
  target,
954
+ format_numbers(
955
+ current_channel_sales, include_indicator=False
956
+ ),
957
  delta=numerize(sales_delta, 1),
958
  label_visibility="collapsed",
959
  )
960
 
961
  with _columns[4]:
962
 
 
 
 
 
 
 
 
 
 
963
  st.checkbox(
964
  label="select for optimization",
965
  key=f"{channel_name}_selected",
966
  value=False,
967
+ on_change=partial(
968
+ select_channel_for_optimization, channel_name
969
+ ),
970
  label_visibility="collapsed",
971
  )
972
 
 
978
  # Bins
979
  col = channels_list[i]
980
  x_actual = st.session_state["scenario"].channels[col].actual_spends
981
+ x_modified = (
982
+ st.session_state["scenario"].channels[col].modified_spends
983
+ )
984
 
985
  x_total = x_modified.sum()
986
  power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
987
 
988
+ K = st.session_state["rcs"][col]["K"]
989
+ b = st.session_state["rcs"][col]["b"]
990
+ a = st.session_state["rcs"][col]["a"]
991
+ x0 = st.session_state["rcs"][col]["x0"]
 
 
 
 
 
 
 
 
992
 
993
  x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
994
 
 
 
 
995
  x, y, marginal_roi = [], [], []
996
  for x_p in x_plot:
997
  x.append(x_p * x_actual / x_actual.sum())
 
1001
 
1002
  for index in range(len(x_plot)):
1003
  marginal_roi.append(
1004
+ a
1005
+ * y[index]
1006
+ * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
1007
  )
1008
 
1009
  x = (
 
1018
 
1019
  roi = y / np.maximum(x, np.finfo(float).eps)
1020
 
1021
+ start_value, end_value, left_value, right_value = (
1022
+ find_segment_value(
1023
+ x,
1024
+ roi,
1025
+ marginal_roi,
1026
+ )
 
 
 
 
 
 
1027
  )
1028
 
1029
  rgba = calculate_rgba(
 
1034
  current_channel_spends,
1035
  )
1036
 
1037
+ # Protecting division by zero by adding a small epsilon to denominators
1038
+ roi_current = current_channel_sales / np.maximum(
1039
+ current_channel_spends, np.finfo(float).eps
1040
+ )
1041
+ marginal_roi_current = (
1042
+ st.session_state["scenario"]
1043
+ .channels[col]
1044
+ .get_marginal_roi("modified")
1045
+ )
1046
+
1047
  with bin_placeholder:
1048
  st.markdown(
1049
  f"""
 
1061
  unsafe_allow_html=True,
1062
  )
1063
 
1064
+ with st.expander("See Response Curves"):
1065
  fig = plot_response_curves()
1066
  st.plotly_chart(fig, use_container_width=True)
1067
 
 
1081
  )
1082
 
1083
  summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
1084
+ summary_df.drop_duplicates(
1085
+ subset="Channel_name", keep="last", inplace=True
1086
+ )
1087
 
1088
  summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
1089
  summary_df_sorted["Delta_percent"] = np.round(
1090
+ (
1091
+ (
1092
+ summary_df_sorted["Optimized_spend"]
1093
+ / summary_df_sorted["Actual_spend"]
1094
+ )
1095
+ - 1
1096
+ )
1097
  * 100,
1098
  2,
1099
  )
 
1121
  authenticator.forgot_password("Forgot password")
1122
  )
1123
  if username_forgot_pw:
1124
+ st.session_state["config"]["credentials"]["usernames"][
1125
+ username_forgot_pw
1126
+ ]["password"] = stauth.Hasher([random_password]).generate()[0]
1127
  send_email(email_forgot_password, random_password)
1128
  st.success("New password sent securely")
1129
  # Random password to be transferred to user securely
pages/Data_Import.py ADDED
@@ -0,0 +1,891 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing necessary libraries
2
+ import streamlit as st
3
+
4
+ st.set_page_config(
5
+ page_title="Model Build",
6
+ page_icon=":shark:",
7
+ layout="wide",
8
+ initial_sidebar_state="collapsed",
9
+ )
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ from utilities import set_header, load_local_css, load_authenticator
14
+ import pickle
15
+
16
+
17
+ load_local_css("styles.css")
18
+ set_header()
19
+
20
+ authenticator = st.session_state.get("authenticator")
21
+ if authenticator is None:
22
+ authenticator = load_authenticator()
23
+
24
+ name, authentication_status, username = authenticator.login("Login", "main")
25
+ auth_status = st.session_state.get("authentication_status")
26
+
27
+ # Check for authentication status
28
+ if auth_status != True:
29
+ st.stop()
30
+
31
+
32
+ # Function to validate date column in dataframe
33
+ def validate_date_column(df):
34
+ try:
35
+ # Attempt to convert the 'Date' column to datetime
36
+ df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y")
37
+ return True
38
+ except:
39
+ return False
40
+
41
+
42
+ # Function to determine data interval
43
+ def determine_data_interval(common_freq):
44
+ if common_freq == 1:
45
+ return "daily"
46
+ elif common_freq == 7:
47
+ return "weekly"
48
+ elif 28 <= common_freq <= 31:
49
+ return "monthly"
50
+ else:
51
+ return "irregular"
52
+
53
+
54
+ # Function to read each uploaded Excel file into a pandas DataFrame and stores them in a dictionary
55
+ st.cache_resource(show_spinner=False)
56
+
57
+
58
+ def files_to_dataframes(uploaded_files):
59
+ df_dict = {}
60
+ for uploaded_file in uploaded_files:
61
+ # Extract file name without extension
62
+ file_name = uploaded_file.name.rsplit(".", 1)[0]
63
+
64
+ # Check for duplicate file names
65
+ if file_name in df_dict:
66
+ st.warning(
67
+ f"Duplicate File: {file_name}. This file will be skipped.",
68
+ icon="⚠️",
69
+ )
70
+ continue
71
+
72
+ # Read the file into a DataFrame
73
+ df = pd.read_excel(uploaded_file)
74
+
75
+ # Convert all column names to lowercase
76
+ df.columns = df.columns.str.lower().str.strip()
77
+
78
+ # Separate numeric and non-numeric columns
79
+ numeric_cols = list(df.select_dtypes(include=["number"]).columns)
80
+ non_numeric_cols = [
81
+ col
82
+ for col in df.select_dtypes(exclude=["number"]).columns
83
+ if col.lower() != "date"
84
+ ]
85
+
86
+ # Check for 'Date' column
87
+ if not (validate_date_column(df) and len(numeric_cols) > 0):
88
+ st.warning(
89
+ f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
90
+ icon="⚠️",
91
+ )
92
+ continue
93
+
94
+ # Check for interval
95
+ common_freq = common_freq = (
96
+ pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
97
+ )
98
+ # Calculate the data interval (daily, weekly, monthly or irregular)
99
+ interval = determine_data_interval(common_freq)
100
+ if interval == "irregular":
101
+ st.warning(
102
+ f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
103
+ icon="⚠️",
104
+ )
105
+ continue
106
+
107
+ # Store both DataFrames in the dictionary under their respective keys
108
+ df_dict[file_name] = {
109
+ "numeric": numeric_cols,
110
+ "non_numeric": non_numeric_cols,
111
+ "interval": interval,
112
+ "df": df,
113
+ }
114
+
115
+ return df_dict
116
+
117
+
118
+ # Function to adjust dataframe granularity
119
+ # def adjust_dataframe_granularity(df, current_granularity, target_granularity):
120
+ # # Set index
121
+ # df.set_index("date", inplace=True)
122
+
123
+ # # Define aggregation rules for resampling
124
+ # aggregation_rules = {
125
+ # col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
126
+ # for col in df.columns
127
+ # }
128
+
129
+ # resampled_df = df
130
+ # if current_granularity == "daily" and target_granularity == "weekly":
131
+ # resampled_df = df.resample("W-MON").agg(aggregation_rules)
132
+
133
+ # elif current_granularity == "daily" and target_granularity == "monthly":
134
+ # resampled_df = df.resample("MS").agg(aggregation_rules)
135
+
136
+ # elif current_granularity == "daily" and target_granularity == "daily":
137
+ # resampled_df = df.resample("D").agg(aggregation_rules)
138
+
139
+ # elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
140
+ # # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
141
+ # expanded_data = []
142
+ # for _, row in df.iterrows():
143
+ # if current_granularity == "weekly":
144
+ # period_range = pd.date_range(start=row.name, periods=7)
145
+ # elif current_granularity == "monthly":
146
+ # period_range = pd.date_range(
147
+ # start=row.name, periods=row.name.days_in_month
148
+ # )
149
+
150
+ # for date in period_range:
151
+ # new_row = {}
152
+ # for col in df.columns:
153
+ # if pd.api.types.is_numeric_dtype(df[col]):
154
+ # if current_granularity == "weekly":
155
+ # new_row[col] = row[col] / 7
156
+ # elif current_granularity == "monthly":
157
+ # new_row[col] = row[col] / row.name.days_in_month
158
+ # else:
159
+ # new_row[col] = row[col]
160
+ # expanded_data.append((date, new_row))
161
+
162
+ # resampled_df = pd.DataFrame(
163
+ # [data for _, data in expanded_data],
164
+ # index=[date for date, _ in expanded_data],
165
+ # )
166
+
167
+ # # Reset index
168
+ # resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
169
+
170
+ # return resampled_df
171
+
172
+
173
+ def adjust_dataframe_granularity(df, current_granularity, target_granularity):
174
+ # Set index
175
+ df.set_index("date", inplace=True)
176
+
177
+ # Define aggregation rules for resampling
178
+ aggregation_rules = {
179
+ col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
180
+ for col in df.columns
181
+ }
182
+
183
+ # Initialize resampled_df
184
+ resampled_df = df
185
+ if current_granularity == "daily" and target_granularity == "weekly":
186
+ resampled_df = df.resample("W-MON", closed="left", label="left").agg(
187
+ aggregation_rules
188
+ )
189
+
190
+ elif current_granularity == "daily" and target_granularity == "monthly":
191
+ resampled_df = df.resample("MS", closed="left", label="left").agg(
192
+ aggregation_rules
193
+ )
194
+
195
+ elif current_granularity == "daily" and target_granularity == "daily":
196
+ resampled_df = df.resample("D").agg(aggregation_rules)
197
+
198
+ elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
199
+ # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
200
+ expanded_data = []
201
+ for _, row in df.iterrows():
202
+ if current_granularity == "weekly":
203
+ period_range = pd.date_range(start=row.name, periods=7)
204
+ elif current_granularity == "monthly":
205
+ period_range = pd.date_range(
206
+ start=row.name, periods=row.name.days_in_month
207
+ )
208
+
209
+ for date in period_range:
210
+ new_row = {}
211
+ for col in df.columns:
212
+ if pd.api.types.is_numeric_dtype(df[col]):
213
+ if current_granularity == "weekly":
214
+ new_row[col] = row[col] / 7
215
+ elif current_granularity == "monthly":
216
+ new_row[col] = row[col] / row.name.days_in_month
217
+ else:
218
+ new_row[col] = row[col]
219
+ expanded_data.append((date, new_row))
220
+
221
+ resampled_df = pd.DataFrame(
222
+ [data for _, data in expanded_data],
223
+ index=[date for date, _ in expanded_data],
224
+ )
225
+
226
+ # Reset index
227
+ resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
228
+
229
+ return resampled_df
230
+
231
+
232
+ # Function to clean and extract unique values of DMA and Panel
233
+ st.cache_resource(show_spinner=False)
234
+
235
+
236
+ def clean_and_extract_unique_values(files_dict, selections):
237
+ all_dma_values = set()
238
+ all_panel_values = set()
239
+
240
+ for file_name, file_data in files_dict.items():
241
+ df = file_data["df"]
242
+
243
+ # 'DMA' and 'Panel' selections
244
+ selected_dma = selections[file_name].get("DMA")
245
+ selected_panel = selections[file_name].get("Panel")
246
+
247
+ # Clean and standardize DMA column if it exists and is selected
248
+ if selected_dma and selected_dma != "N/A" and selected_dma in df.columns:
249
+ df[selected_dma] = (
250
+ df[selected_dma].str.lower().str.strip().str.replace("_", " ")
251
+ )
252
+ all_dma_values.update(df[selected_dma].dropna().unique())
253
+
254
+ # Clean and standardize Panel column if it exists and is selected
255
+ if selected_panel and selected_panel != "N/A" and selected_panel in df.columns:
256
+ df[selected_panel] = (
257
+ df[selected_panel].str.lower().str.strip().str.replace("_", " ")
258
+ )
259
+ all_panel_values.update(df[selected_panel].dropna().unique())
260
+
261
+ # Update the processed DataFrame back in the dictionary
262
+ files_dict[file_name]["df"] = df
263
+
264
+ return all_dma_values, all_panel_values
265
+
266
+
267
+ # Function to format values for display
268
+ st.cache_resource(show_spinner=False)
269
+
270
+
271
+ def format_values_for_display(values_list):
272
+ # Capitalize the first letter of each word and replace underscores with spaces
273
+ formatted_list = [value.replace("_", " ").title() for value in values_list]
274
+ # Join values with commas and 'and' before the last value
275
+ if len(formatted_list) > 1:
276
+ return ", ".join(formatted_list[:-1]) + ", and " + formatted_list[-1]
277
+ elif formatted_list:
278
+ return formatted_list[0]
279
+ return "No values available"
280
+
281
+
282
+ # Function to normalizes all data within files_dict to a daily granularity
283
+ st.cache(show_spinner=False, allow_output_mutation=True)
284
+
285
+
286
+ def standardize_data_to_daily(files_dict, selections):
287
+ # Normalize all data to a daily granularity using a provided function
288
+ files_dict = apply_granularity_to_all(files_dict, "daily", selections)
289
+
290
+ # Update the "interval" attribute for each dataset to indicate the new granularity
291
+ for files_name, files_data in files_dict.items():
292
+ files_data["interval"] = "daily"
293
+
294
+ return files_dict
295
+
296
+
297
+ # Function to apply granularity transformation to all DataFrames in files_dict
298
+ st.cache_resource(show_spinner=False)
299
+
300
+
301
+ def apply_granularity_to_all(files_dict, granularity_selection, selections):
302
+ for file_name, file_data in files_dict.items():
303
+ df = file_data["df"].copy()
304
+
305
+ # Handling when DMA or Panel might be 'N/A'
306
+ selected_dma = selections[file_name].get("DMA")
307
+ selected_panel = selections[file_name].get("Panel")
308
+
309
+ # Correcting the segment selection logic & handling 'N/A'
310
+ if selected_dma != "N/A" and selected_panel != "N/A":
311
+ unique_combinations = df[[selected_dma, selected_panel]].drop_duplicates()
312
+ elif selected_dma != "N/A":
313
+ unique_combinations = df[[selected_dma]].drop_duplicates()
314
+ selected_panel = None # Ensure Panel is ignored if N/A
315
+ elif selected_panel != "N/A":
316
+ unique_combinations = df[[selected_panel]].drop_duplicates()
317
+ selected_dma = None # Ensure DMA is ignored if N/A
318
+ else:
319
+ # If both are 'N/A', process the entire dataframe as is
320
+ df = adjust_dataframe_granularity(
321
+ df, file_data["interval"], granularity_selection
322
+ )
323
+ files_dict[file_name]["df"] = df
324
+ continue # Skip to the next file
325
+
326
+ transformed_segments = []
327
+ for _, combo in unique_combinations.iterrows():
328
+ if selected_dma and selected_panel:
329
+ segment = df[
330
+ (df[selected_dma] == combo[selected_dma])
331
+ & (df[selected_panel] == combo[selected_panel])
332
+ ]
333
+ elif selected_dma:
334
+ segment = df[df[selected_dma] == combo[selected_dma]]
335
+ elif selected_panel:
336
+ segment = df[df[selected_panel] == combo[selected_panel]]
337
+
338
+ # Adjust granularity of the segment
339
+ transformed_segment = adjust_dataframe_granularity(
340
+ segment, file_data["interval"], granularity_selection
341
+ )
342
+ transformed_segments.append(transformed_segment)
343
+
344
+ # Combine all transformed segments into a single DataFrame for this file
345
+ transformed_df = pd.concat(transformed_segments, ignore_index=True)
346
+ files_dict[file_name]["df"] = transformed_df
347
+
348
+ return files_dict
349
+
350
+
351
+ # Function to create main dataframe structure
352
+ st.cache_resource(show_spinner=False)
353
+
354
+
355
+ def create_main_dataframe(
356
+ files_dict, all_dma_values, all_panel_values, granularity_selection
357
+ ):
358
+ # Determine the global start and end dates across all DataFrames
359
+ global_start = min(df["df"]["date"].min() for df in files_dict.values())
360
+ global_end = max(df["df"]["date"].max() for df in files_dict.values())
361
+
362
+ # Adjust the date_range generation based on the granularity_selection
363
+ if granularity_selection == "weekly":
364
+ # Generate a weekly range, with weeks starting on Monday
365
+ date_range = pd.date_range(start=global_start, end=global_end, freq="W-MON")
366
+ elif granularity_selection == "monthly":
367
+ # Generate a monthly range, starting from the first day of each month
368
+ date_range = pd.date_range(start=global_start, end=global_end, freq="MS")
369
+ else: # Default to daily if not weekly or monthly
370
+ date_range = pd.date_range(start=global_start, end=global_end, freq="D")
371
+
372
+ # Collect all unique DMA and Panel values, excluding 'N/A'
373
+ all_dmas = all_dma_values
374
+ all_panels = all_panel_values
375
+
376
+ # Dynamically build the list of dimensions (Panel, DMA) to include in the main DataFrame based on availability
377
+ dimensions, merge_keys = [], []
378
+ if all_panels:
379
+ dimensions.append(all_panels)
380
+ merge_keys.append("Panel")
381
+ if all_dmas:
382
+ dimensions.append(all_dmas)
383
+ merge_keys.append("DMA")
384
+
385
+ dimensions.append(date_range) # Date range is always included
386
+ merge_keys.append("date") # Date range is always included
387
+
388
+ # Create a main DataFrame template with the dimensions
389
+ main_df = pd.MultiIndex.from_product(
390
+ dimensions,
391
+ names=[name for name, _ in zip(merge_keys, dimensions)],
392
+ ).to_frame(index=False)
393
+
394
+ return main_df.reset_index(drop=True)
395
+
396
+
397
+ # Function to prepare and merge dataFrames
398
+ st.cache_resource(show_spinner=False)
399
+
400
+
401
+ def merge_into_main_df(main_df, files_dict, selections):
402
+ for file_name, file_data in files_dict.items():
403
+ df = file_data["df"].copy()
404
+
405
+ # Rename selected DMA and Panel columns if not 'N/A'
406
+ selected_dma = selections[file_name].get("DMA", "N/A")
407
+ selected_panel = selections[file_name].get("Panel", "N/A")
408
+ if selected_dma != "N/A":
409
+ df.rename(columns={selected_dma: "DMA"}, inplace=True)
410
+ if selected_panel != "N/A":
411
+ df.rename(columns={selected_panel: "Panel"}, inplace=True)
412
+
413
+ # Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel' and 'DMA'
414
+ merge_keys = ["date"]
415
+ if "Panel" in df.columns:
416
+ merge_keys.append("Panel")
417
+ if "DMA" in df.columns:
418
+ merge_keys.append("DMA")
419
+ main_df = pd.merge(main_df, df, on=merge_keys, how="left")
420
+
421
+ # After all merges, sort by 'date' and reset index for cleanliness
422
+ sort_by = ["date"]
423
+ if "Panel" in main_df.columns:
424
+ sort_by.append("Panel")
425
+ if "DMA" in main_df.columns:
426
+ sort_by.append("DMA")
427
+ main_df.sort_values(by=sort_by, inplace=True)
428
+ main_df.reset_index(drop=True, inplace=True)
429
+
430
+ return main_df
431
+
432
+
433
+ # Function to categorize column
434
+ def categorize_column(column_name):
435
+ # Define keywords for each category
436
+ internal_keywords = [
437
+ "Price",
438
+ "Discount",
439
+ "product_price",
440
+ "cost",
441
+ "margin",
442
+ "inventory",
443
+ "sales",
444
+ "revenue",
445
+ "turnover",
446
+ "expense",
447
+ ]
448
+ exogenous_keywords = [
449
+ "GDP",
450
+ "Tax",
451
+ "Inflation",
452
+ "interest_rate",
453
+ "employment_rate",
454
+ "exchange_rate",
455
+ "consumer_spending",
456
+ "retail_sales",
457
+ "oil_prices",
458
+ "weather",
459
+ ]
460
+
461
+ # Check if the column name matches any of the keywords for Internal or Exogenous categories
462
+ for keyword in internal_keywords:
463
+ if keyword.lower() in column_name.lower():
464
+ return "Internal"
465
+ for keyword in exogenous_keywords:
466
+ if keyword.lower() in column_name.lower():
467
+ return "Exogenous"
468
+
469
+ # Default to Media if no match found
470
+ return "Media"
471
+
472
+
473
+ # Function to calculate missing stats and prepare for editable DataFrame
474
+ st.cache_resource(show_spinner=False)
475
+
476
+
477
+ def prepare_missing_stats_df(df):
478
+ missing_stats = []
479
+ for column in df.columns:
480
+ if (
481
+ column == "date" or column == "DMA" or column == "Panel"
482
+ ): # Skip Date, DMA and Panel column
483
+ continue
484
+
485
+ missing = df[column].isnull().sum()
486
+ pct_missing = round((missing / len(df)) * 100, 2)
487
+
488
+ # Dynamically assign category based on column name
489
+ # category = categorize_column(column)
490
+ category = "Media"
491
+
492
+ missing_stats.append(
493
+ {
494
+ "Column": column,
495
+ "Missing Values": missing,
496
+ "Missing Percentage": pct_missing,
497
+ "Impute Method": "Fill with 0", # Default value
498
+ "Category": category,
499
+ }
500
+ )
501
+ stats_df = pd.DataFrame(missing_stats)
502
+
503
+ return stats_df
504
+
505
+
506
+ # Function to add API DataFrame details to the files dictionary
507
+ st.cache_resource(show_spinner=False)
508
+
509
+
510
+ def add_api_dataframe_to_dict(main_df, files_dict):
511
+ files_dict["API"] = {
512
+ "numeric": list(main_df.select_dtypes(include=["number"]).columns),
513
+ "non_numeric": [
514
+ col
515
+ for col in main_df.select_dtypes(exclude=["number"]).columns
516
+ if col.lower() != "date"
517
+ ],
518
+ "interval": determine_data_interval(
519
+ pd.Series(main_df["date"].unique()).diff().dt.days.dropna().mode()[0]
520
+ ),
521
+ "df": main_df,
522
+ }
523
+
524
+ return files_dict
525
+
526
+
527
+ # Function to reads an API into a DataFrame, parsing specified columns as datetime
528
+ @st.cache_resource(show_spinner=False)
529
+ def read_API_data():
530
+ return pd.read_excel(r"upf_data_converted.xlsx", parse_dates=["Date"])
531
+
532
+
533
+ # Function to set the 'DMA_Panel_Selected' session state variable to False
534
+ def set_DMA_Panel_Selected_false():
535
+ st.session_state["DMA_Panel_Selected"] = False
536
+
537
+
538
+ # Initialize 'final_df' in session state
539
+ if "final_df" not in st.session_state:
540
+ st.session_state["final_df"] = pd.DataFrame()
541
+
542
+ # Initialize 'bin_dict' in session state
543
+ if "bin_dict" not in st.session_state:
544
+ st.session_state["bin_dict"] = {}
545
+
546
+ # Initialize 'DMA_Panel_Selected' in session state
547
+ if "DMA_Panel_Selected" not in st.session_state:
548
+ st.session_state["DMA_Panel_Selected"] = False
549
+
550
+ # Page Title
551
+ st.write("") # Top padding
552
+ st.title("Data Import")
553
+
554
+
555
+ #########################################################################################################################################################
556
+ # Create a dictionary to hold all DataFrames and collect user input to specify "DMA" and "Panel" columns for each file
557
+ #########################################################################################################################################################
558
+
559
+
560
+ # Read the Excel file, parsing 'Date' column as datetime
561
+ main_df = read_API_data()
562
+
563
+ # Convert all column names to lowercase
564
+ main_df.columns = main_df.columns.str.lower().str.strip()
565
+
566
+ # File uploader
567
+ uploaded_files = st.file_uploader(
568
+ "Upload additional data",
569
+ type=["xlsx"],
570
+ accept_multiple_files=True,
571
+ on_change=set_DMA_Panel_Selected_false,
572
+ )
573
+
574
+ # Custom HTML for upload instructions
575
+ recommendation_html = f"""
576
+ <div style="text-align: justify;">
577
+ <strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including DMA, Panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
578
+ </div>
579
+ """
580
+ st.markdown(recommendation_html, unsafe_allow_html=True)
581
+
582
+ # Choose Date Granularity
583
+ st.markdown("#### Choose Date Granularity")
584
+ # Granularity Selection
585
+ granularity_selection = st.selectbox(
586
+ "Choose Date Granularity",
587
+ ["Daily", "Weekly", "Monthly"],
588
+ label_visibility="collapsed",
589
+ on_change=set_DMA_Panel_Selected_false,
590
+ )
591
+ granularity_selection = str(granularity_selection).lower()
592
+
593
+ # Convert files to dataframes
594
+ files_dict = files_to_dataframes(uploaded_files)
595
+
596
+ # Add API Dataframe
597
+ if main_df is not None:
598
+ files_dict = add_api_dataframe_to_dict(main_df, files_dict)
599
+
600
+ # Display a warning message if no files have been uploaded and halt further execution
601
+ if not files_dict:
602
+ st.warning(
603
+ "Please upload at least one file to proceed.",
604
+ icon="⚠️",
605
+ )
606
+ st.stop() # Halts further execution until file is uploaded
607
+
608
+
609
+ # Select DMA and Panel columns
610
+ st.markdown("#### Select DMA and Panel columns")
611
+ selections = {}
612
+ with st.expander("Select DMA and Panel columns", expanded=False):
613
+ count = 0 # Initialize counter to manage the visibility of labels and keys
614
+ for file_name, file_data in files_dict.items():
615
+ # Determine visibility of the label based on the count
616
+ if count == 0:
617
+ label_visibility = "visible"
618
+ else:
619
+ label_visibility = "collapsed"
620
+
621
+ # Extract non-numeric columns
622
+ non_numeric_cols = file_data["non_numeric"]
623
+
624
+ # Prepare DMA and Panel values for dropdown, adding "N/A" as an option
625
+ dma_values = non_numeric_cols + ["N/A"]
626
+ panel_values = non_numeric_cols + ["N/A"]
627
+
628
+ # Skip if only one option is available
629
+ if len(dma_values) == 1 and len(panel_values) == 1:
630
+ selected_dma, selected_panel = "N/A", "N/A"
631
+ # Update the selections for DMA and Panel for the current file
632
+ selections[file_name] = {
633
+ "DMA": selected_dma,
634
+ "Panel": selected_panel,
635
+ }
636
+ continue
637
+
638
+ # Create layout columns for File Name, DMA, and Panel selections
639
+ file_name_col, DMA_col, Panel_col = st.columns([2, 4, 4])
640
+
641
+ with file_name_col:
642
+ # Display "File Name" label only for the first file
643
+ if count == 0:
644
+ st.write("File Name")
645
+ else:
646
+ st.write("")
647
+ st.write(file_name) # Display the file name
648
+
649
+ with DMA_col:
650
+ # Display a selectbox for DMA values
651
+ selected_dma = st.selectbox(
652
+ "Select DMA",
653
+ dma_values,
654
+ on_change=set_DMA_Panel_Selected_false,
655
+ label_visibility=label_visibility, # Control visibility of the label
656
+ key=f"DMA_selectbox{count}", # Ensure unique key for each selectbox
657
+ )
658
+
659
+ with Panel_col:
660
+ # Display a selectbox for Panel values
661
+ selected_panel = st.selectbox(
662
+ "Select Panel",
663
+ panel_values,
664
+ on_change=set_DMA_Panel_Selected_false,
665
+ label_visibility=label_visibility, # Control visibility of the label
666
+ key=f"Panel_selectbox{count}", # Ensure unique key for each selectbox
667
+ )
668
+
669
+ # Skip processing if the same column is selected for both Panel and DMA due to potential data integrity issues
670
+ if selected_panel == selected_dma and not (
671
+ selected_panel == "N/A" and selected_dma == "N/A"
672
+ ):
673
+ st.warning(
674
+ f"File: {file_name} → The same column cannot serve as both Panel and DMA. Please adjust your selections.",
675
+ )
676
+ selected_dma, selected_panel = "N/A", "N/A"
677
+ st.stop()
678
+
679
+ # Update the selections for DMA and Panel for the current file
680
+ selections[file_name] = {
681
+ "DMA": selected_dma,
682
+ "Panel": selected_panel,
683
+ }
684
+
685
+ count += 1 # Increment the counter after processing each file
686
+
687
+ # Accept DMA and Panel selection
688
+ if st.button("Accept and Process", use_container_width=True):
689
+
690
+ # Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
691
+ with st.spinner("Processing...", cache=True):
692
+ files_dict = standardize_data_to_daily(files_dict, selections)
693
+
694
+ # Convert all data to daily level granularity
695
+ files_dict = apply_granularity_to_all(
696
+ files_dict, granularity_selection, selections
697
+ )
698
+
699
+ st.session_state["files_dict"] = files_dict
700
+ st.session_state["DMA_Panel_Selected"] = True
701
+
702
+
703
+ #########################################################################################################################################################
704
+ # Display unique DMA and Panel values
705
+ #########################################################################################################################################################
706
+
707
+
708
+ # Halts further execution until DMA and Panel columns are selected
709
+ if "files_dict" in st.session_state and st.session_state["DMA_Panel_Selected"]:
710
+ files_dict = st.session_state["files_dict"]
711
+ else:
712
+ st.stop()
713
+
714
+ # Set to store unique values of DMA and Panel
715
+ with st.spinner("Fetching DMA and Panel values..."):
716
+ all_dma_values, all_panel_values = clean_and_extract_unique_values(
717
+ files_dict, selections
718
+ )
719
+
720
+ # List of DMA and Panel columns unique values
721
+ list_of_all_dma_values = list(all_dma_values)
722
+ list_of_all_panel_values = list(all_panel_values)
723
+
724
+ # Format DMA and Panel values for display
725
+ formatted_dma_values = format_values_for_display(list_of_all_dma_values)
726
+ formatted_panel_values = format_values_for_display(list_of_all_panel_values)
727
+
728
+ # Unique DMA and Panel values
729
+ st.markdown("#### Unique DMA and Panel values")
730
+ # Display DMA and Panel values
731
+ with st.expander("Unique DMA and Panel values"):
732
+ st.write("")
733
+ st.markdown(
734
+ f"""
735
+ <style>
736
+ .justify-text {{
737
+ text-align: justify;
738
+ }}
739
+ </style>
740
+ <div class="justify-text">
741
+ <strong>Panel Values:</strong> {formatted_panel_values}<br>
742
+ <strong>DMA Values:</strong> {formatted_dma_values}
743
+ </div>
744
+ """,
745
+ unsafe_allow_html=True,
746
+ )
747
+
748
+ # Display total DMA and Panel
749
+ st.write("")
750
+ st.markdown(
751
+ f"""
752
+ <div style="text-align: justify;">
753
+ <strong>Number of DMAs detected:</strong> {len(list_of_all_dma_values)}<br>
754
+ <strong>Number of Panels detected:</strong> {len(list_of_all_panel_values)}
755
+ </div>
756
+ """,
757
+ unsafe_allow_html=True,
758
+ )
759
+ st.write("")
760
+
761
+
762
+ #########################################################################################################################################################
763
+ # Merge all DataFrames
764
+ #########################################################################################################################################################
765
+
766
+
767
+ # Merge all DataFrames selected
768
+ main_df = create_main_dataframe(
769
+ files_dict, all_dma_values, all_panel_values, granularity_selection
770
+ )
771
+ merged_df = merge_into_main_df(main_df, files_dict, selections)
772
+
773
+ # # Display the merged DataFrame
774
+ # st.markdown("#### Merged DataFrame based on selected DMA and Panel")
775
+ # st.dataframe(merged_df)
776
+
777
+
778
+ #########################################################################################################################################################
779
+ # Categorize Variables and Impute Missing Values
780
+ #########################################################################################################################################################
781
+
782
+
783
+ # Create an editable DataFrame in Streamlit
784
+ st.markdown("#### Select Variables Category & Impute Missing Values")
785
+
786
+ # Prepare missing stats DataFrame for editing
787
+ missing_stats_df = prepare_missing_stats_df(merged_df)
788
+
789
+ edited_stats_df = st.data_editor(
790
+ missing_stats_df,
791
+ column_config={
792
+ "Impute Method": st.column_config.SelectboxColumn(
793
+ options=[
794
+ "Drop Column",
795
+ "Fill with Mean",
796
+ "Fill with Median",
797
+ "Fill with 0",
798
+ ],
799
+ required=True,
800
+ default="Fill with 0",
801
+ ),
802
+ "Category": st.column_config.SelectboxColumn(
803
+ options=[
804
+ "Media",
805
+ "Exogenous",
806
+ "Internal",
807
+ "Response_Metric"
808
+ ],
809
+ required=True,
810
+ default="Media",
811
+ ),
812
+ },
813
+ disabled=["Column", "Missing Values", "Missing Percentage"],
814
+ hide_index=True,
815
+ use_container_width=True,
816
+ )
817
+
818
+ # Apply changes based on edited DataFrame
819
+ for i, row in edited_stats_df.iterrows():
820
+ column = row["Column"]
821
+ if row["Impute Method"] == "Drop Column":
822
+ merged_df.drop(columns=[column], inplace=True)
823
+
824
+ elif row["Impute Method"] == "Fill with Mean":
825
+ merged_df[column].fillna(merged_df[column].mean(), inplace=True)
826
+
827
+ elif row["Impute Method"] == "Fill with Median":
828
+ merged_df[column].fillna(merged_df[column].median(), inplace=True)
829
+
830
+ elif row["Impute Method"] == "Fill with 0":
831
+ merged_df[column].fillna(0, inplace=True)
832
+
833
+ # Display the Final DataFrame and exogenous variables
834
+ st.markdown("#### Final DataFrame")
835
+ final_df = merged_df
836
+ st.dataframe(final_df, hide_index=True)
837
+
838
+ # Initialize an empty dictionary to hold categories and their variables
839
+ category_dict = {}
840
+
841
+ # Iterate over each row in the edited DataFrame to populate the dictionary
842
+ for i, row in edited_stats_df.iterrows():
843
+ column = row["Column"]
844
+ category = row["Category"] # The category chosen by the user for this variable
845
+
846
+ # Check if the category already exists in the dictionary
847
+ if category not in category_dict:
848
+ # If not, initialize it with the current column as its first element
849
+ category_dict[category] = [column]
850
+ else:
851
+ # If it exists, append the current column to the list of variables under this category
852
+ category_dict[category].append(column)
853
+
854
+ # Add Date, DMA and Panel in category dictionary
855
+ category_dict.update({"Date": ["date"]})
856
+ if "DMA" in final_df.columns:
857
+ category_dict["DMA"] = ["DMA"]
858
+
859
+ if "Panel" in final_df.columns:
860
+ category_dict["Panel"] = ["Panel"]
861
+
862
+ # Display the dictionary
863
+ st.markdown("#### Variable Category")
864
+ for category, variables in category_dict.items():
865
+ # Check if there are multiple variables to handle "and" insertion correctly
866
+ if len(variables) > 1:
867
+ # Join all but the last variable with ", ", then add " and " before the last variable
868
+ variables_str = ", ".join(variables[:-1]) + " and " + variables[-1]
869
+ else:
870
+ # If there's only one variable, no need for "and"
871
+ variables_str = variables[0]
872
+
873
+ # Display the category and its variables in the desired format
874
+ st.markdown(
875
+ f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
876
+ unsafe_allow_html=True,
877
+ )
878
+
879
+ # Store final dataframe and bin dictionary into session state
880
+ st.session_state["final_df"], st.session_state["bin_dict"] = final_df, category_dict
881
+
882
+ if st.button('Save Changes'):
883
+
884
+ with open("Pickle_files/main_df", 'wb') as f:
885
+ pickle.dump(st.session_state["final_df"], f)
886
+ with open("Pickle_files/category_dict",'wb') as c:
887
+ pickle.dump(st.session_state["bin_dict"],c)
888
+ st.success('Changes Saved!')
889
+
890
+
891
+
pages/actual_data.csv ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const,clicks_search_decay.2,impressions_tv_lag3,online_edu_trend_lag3,clicks_digital_lag2_decay.3,impressions_streaming_lag2_decay.4,covid_cases_lag3,unemployement_rate_lead4,season,flag_Aug_1,flag_Aug_2,flag_Aug_3,flag_dec_1,flag_dec_-1,flag_dec_-2,flag_dec_-3,flag_easter_-1,flag_easter_-2,flag_may_-1,flag_may_-2,flag_jun_-1,flag_jun_-2,covid_flag1,flag_june28,flag_aug13,flag_sep13,flag_mar_feb,date,total_prospect_id
2
+ 1.0,0.03264506089026503,0.0,0.0,0.0,0.11920857922376585,0.0,0.2448979591836735,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-10,3106
3
+ 1.0,0.1203178311529351,0.0,0.0,0.0,0.23575959332216032,0.0,0.2448979591836735,101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-17,7809
4
+ 1.0,0.037674240888288246,0.0,0.0,0.30427286753070926,0.14866425214344534,0.0,0.2448979591836735,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-24,5658
5
+ 1.0,0.114056065999327,0.25459834519940233,0.5700000000000001,0.3210660307498862,0.06375317695001911,0.0,0.2448979591836735,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-01,7528
6
+ 1.0,0.15091848146432302,0.04759636387261456,0.58,0.2652143429433443,0.02550166207848893,0.0,0.2380952380952381,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-08,8913
7
+ 1.0,0.09691798534505919,0.0,0.41000000000000003,0.27398476053158455,0.22803554179688423,0.0,0.2380952380952381,105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-15,7974
8
+ 1.0,0.0,0.2185391903071715,0.53,0.3093665823461814,0.3016670242357716,0.0,0.2380952380952381,106,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-22,5034
9
+ 1.0,0.06818143419410627,0.0645557652165116,0.6,0.35005256364095544,0.3915886857834677,0.0,0.2380952380952381,107,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-29,8296
10
+ 1.0,0.19748095587743647,0.0,0.49,0.2866388037412839,0.4644891817948484,0.0,0.2380952380952381,108,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-05,10953
11
+ 1.0,0.2718903484441833,0.31632836028874944,0.42,0.38339772931601046,0.4758788391710054,0.0,0.2380952380952381,109,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-12,11583
12
+ 1.0,0.29329394272923165,0.710207473795361,0.56,0.4716341482535363,0.47415700741999534,0.0,0.2380952380952381,110,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2020-01-19,11650
13
+ 1.0,0.3150710926081645,0.6225458397661645,0.66,0.5560651882029227,0.2282082561307921,0.0,0.2380952380952381,111,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-26,10086
14
+ 1.0,0.23335326208386092,0.5093471390869946,0.65,0.5990392189890996,0.09128427138188955,0.0,0.2993197278911565,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-02,8454
15
+ 1.0,0.18339704064539092,0.46920681970876166,0.66,0.5097387360461574,0.03651393215188798,0.0,0.2993197278911565,113,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-09,7842
16
+ 1.0,0.1829206162885479,0.5702922924005152,0.64,0.3647117781342298,0.5333315970976881,0.0,0.2993197278911565,114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-16,8528
17
+ 1.0,0.17708137647064887,0.4762803199026322,0.62,0.2994390381863003,0.9999999999999999,0.0,0.2993197278911565,115,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-23,9230
18
+ 1.0,0.2110785179466496,0.31643298954206356,0.65,0.318727924805625,0.5153399788387041,0.0,0.2993197278911565,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-01,8210
19
+ 1.0,0.1922309642774856,0.35110354589746834,0.65,0.3435805763353255,0.20613623376787482,0.0,1.0,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-08,6573
20
+ 1.0,0.1174971533357681,0.4397302099507956,0.64,0.37079693119819457,0.08245451214041095,0.0,1.0,118,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-15,4464
21
+ 1.0,0.04487177585471158,0.5651604986093057,0.66,0.3797815418753292,0.032981804856164386,3.6661729553753427e-06,1.0,119,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-22,5498
22
+ 1.0,0.04417426781579725,0.5142518574426083,0.77,0.3239901926717436,0.013192796475509808,0.00016497778299189042,1.0,120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-29,7134
23
+ 1.0,0.09508966430933447,0.4246084040047787,1.0,0.22766051203571303,0.005277118590203924,0.01074555293220513,0.8979591836734694,121,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2020-04-05,6507
24
+ 1.0,0.1727148072921107,0.3306303340730278,0.92,0.2557126494916798,0.0021108474360815696,0.07506489126131015,0.8979591836734694,122,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2020-04-12,6752
25
+ 1.0,0.2757761792524949,0.9059477066272279,0.87,0.2910560761584964,0.0008443389744326279,0.11051311756683434,0.8979591836734694,123,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-19,7874
26
+ 1.0,0.46164669127102737,1.0,0.8200000000000001,0.29288325042575475,0.0003377355897730512,0.1323451775160945,0.8979591836734694,124,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-26,8706
27
+ 1.0,0.3631365926708698,0.8555262504044332,0.85,0.3143348639913703,0.00013509423590922048,0.12527679605813083,0.8979591836734694,125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-03,9593
28
+ 1.0,0.3556269301486625,0.5998066602658987,0.8,0.3573452157072908,5.4838924587260594e-05,0.08418266340132861,0.7482993197278912,126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-10,9554
29
+ 1.0,0.3898924329688705,0.31953123019194307,0.76,0.3492819601843694,0.08837696494340691,0.06699197841357364,0.7482993197278912,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-17,9461
30
+ 1.0,0.3270785638817633,0.5040802333471541,0.88,0.37224504100306005,0.12944061135952373,0.04806352744497074,0.7482993197278912,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-24,8347
31
+ 1.0,0.29596428185745655,0.6228739252579004,0.8300000000000001,0.3873711562094451,0.14079607140381442,0.028926104617911456,0.7482993197278912,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-31,7926
32
+ 1.0,0.23446621861142697,0.644779308361226,0.8,0.3519020717491842,0.15750706055823313,0.024482702995996537,0.6938775510204082,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-07,8606
33
+ 1.0,0.2202508917985891,0.726916988225644,0.71,0.32726146750928653,0.0797309833640819,0.022000703905207433,0.6938775510204082,131,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-14,7573
34
+ 1.0,0.18610614076735926,0.5963517592669729,0.73,0.31618831243754153,0.03501476889363339,0.015086301711369536,0.6938775510204082,132,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2020-06-21,6983
35
+ 1.0,0.1568177529621934,0.6764095796293655,0.75,0.2836099513597926,0.014005944823975384,0.011489786042146325,0.6938775510204082,133,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-28,6277
36
+ 1.0,0.22774801916471138,0.6466210070345804,0.72,0.25409997289933184,0.006272411362367827,0.00871449311492719,0.5714285714285715,134,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-05,7421
37
+ 1.0,0.24542124594101095,0.6580063264819511,0.73,0.2516667689694555,0.05947462601462651,0.008318546435746652,0.5714285714285715,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-12,7852
38
+ 1.0,0.24895270375190542,0.32749815383926373,0.68,0.2671053898526598,0.0888609058832765,0.008014254080450499,0.5714285714285715,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-19,7396
39
+ 1.0,0.16285259960994197,0.3666961464656464,0.78,0.26077100654286645,0.12420199588573878,0.008058248155915004,0.5714285714285715,137,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-26,7041
40
+ 1.0,0.16864346155569104,0.39341698388602436,0.84,0.25893225300958655,0.10423952696584138,0.00920209411799211,0.5714285714285715,138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-02,7470
41
+ 1.0,0.22582910125625383,0.41507293852636135,0.8300000000000001,0.2528768986269057,0.08197739941078482,0.009315745479608745,0.5374149659863946,139,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-09,8725
42
+ 1.0,0.2778946696783185,0.7857143231388266,0.8,0.2772125371796957,0.07178679747906064,0.007237025413910927,0.5374149659863946,140,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-16,9657
43
+ 1.0,0.3062154076077969,0.434016630925742,0.87,0.33174759696083367,0.12078972986041582,0.006500124649880482,0.5374149659863946,141,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2020-08-23,10000
44
+ 1.0,0.2851073700683267,0.4051792323256236,0.8200000000000001,0.3621387745268235,0.1539969659046611,0.006118842662521447,0.5374149659863946,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-30,8941
45
+ 1.0,0.25999778433367665,0.4113785668398346,0.77,0.3604714968693371,0.1462622685965232,0.006375474769397721,0.4693877551020409,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-06,8507
46
+ 1.0,0.2947500457787596,0.43576671635701947,0.74,0.3084711376902622,0.1030893445960345,0.0060051913009048115,0.4693877551020409,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2020-09-13,9887
47
+ 1.0,0.3239559328273078,0.40721834097732834,0.72,0.24061271129609485,0.08422768334333634,0.006456130574415978,0.4693877551020409,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-20,9627
48
+ 1.0,0.3189849597494306,0.4831656702512836,0.68,0.28577062852640756,0.054400116894051116,0.006401137980085348,0.4693877551020409,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-27,8735
49
+ 1.0,0.2930673557404469,0.5423730023996388,0.62,0.32330756771945346,0.02176006539088146,0.007566980979894707,0.45578231292517013,147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-04,8138
50
+ 1.0,0.27381401410957934,0.48862464971809444,0.59,0.33668984325037016,0.008704026156352586,0.009172764734349107,0.45578231292517013,148,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-11,7966
51
+ 1.0,0.21658154029531146,0.5162854532967293,0.55,0.44481231480084876,0.003481610462541034,0.012223020633221393,0.45578231292517013,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-18,8109
52
+ 1.0,0.21772903332032795,0.47368257634991157,0.6,0.46141705479304307,0.0013926441850164136,0.013601501664442522,0.45578231292517013,150,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-25,7848
53
+ 1.0,0.16712357438522701,0.5132571164009214,0.5,0.38402389059771924,0.0005570576740065655,0.012915927321787332,0.45578231292517013,151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-01,6516
54
+ 1.0,0.1814031347156822,0.5409537987241609,0.5,0.2968208337801042,0.00022282306960262618,0.013091903623645349,0.45578231292517013,152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-08,7233
55
+ 1.0,0.16852532779394064,0.49490997931858044,0.5,0.22663075929954526,8.912922784105048e-05,0.014624363918992243,0.45578231292517013,153,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-15,7409
56
+ 1.0,0.10492104198879731,0.4086344123814518,0.41000000000000003,0.21669561761817938,3.565169113642019e-05,0.016127494830696133,0.45578231292517013,154,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-22,6232
57
+ 1.0,0.16920169406380464,0.45151008168804235,0.49,0.21833619946593313,1.4260676454568076e-05,0.024849320291534072,0.45578231292517013,155,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-29,8170
58
+ 1.0,0.1305885456099783,0.4543635808918873,0.47000000000000003,0.1596898931167178,5.704270581827231e-06,0.03519159419864792,0.435374149659864,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-06,7075
59
+ 1.0,0.1214984593864375,0.35070760971315756,0.4,0.15417676852356046,2.2817082327308923e-06,0.041732046751037526,0.435374149659864,157,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-13,7379
60
+ 1.0,0.057042007816384965,0.32470890321593604,0.47000000000000003,0.15442387578570832,9.126832930923571e-07,0.049892947749703036,0.435374149659864,158,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-20,5442
61
+ 1.0,0.12406882983279183,0.3135816516054531,0.45,0.1671308209739812,3.650733172369429e-07,0.0686930826648678,0.435374149659864,159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-27,7735
62
+ 1.0,0.24786523070013738,0.3102913429236421,0.42,0.16347790840061424,1.4602932689477716e-07,0.0732574679943101,0.435374149659864,160,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-03,9754
63
+ 1.0,0.26083059672146286,0.2649240941306087,0.34,0.25327016920452516,5.841173075791087e-08,0.07444897420480709,0.4217687074829932,161,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-10,10641
64
+ 1.0,0.24028847292133387,0.6513962629200784,0.38,0.3773812732234543,2.3364692303164347e-08,0.08318546435746653,0.4217687074829932,162,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2021-01-17,10230
65
+ 1.0,0.31526302386797916,0.531674302460824,0.47000000000000003,0.3527386460097067,9.345876921265738e-09,0.10258685163731283,0.4217687074829932,163,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-24,10352
66
+ 1.0,0.2966293410018717,0.44836670500794606,0.47000000000000003,0.3711695518795665,3.738350768506295e-09,0.13234151134313912,0.4217687074829932,164,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-31,9216
67
+ 1.0,0.20088776123137192,0.3815806999416851,0.45,0.33580461662371014,1.4953403074025183e-09,0.12043744775703538,0.40816326530612246,165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-07,8421
68
+ 1.0,0.173394454128539,0.343687050600215,0.48,0.3277941002786073,5.981361229610074e-10,0.11271648751301491,0.40816326530612246,166,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-14,9281
69
+ 1.0,0.1777198044422716,0.33051072402008147,0.5,0.31487397296804576,2.3925444918440296e-10,0.109699227170741,0.40816326530612246,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-21,8891
70
+ 1.0,0.1850269016675808,0.30627520154343757,0.46,0.3133091660972597,9.570177967376119e-11,0.08255854878209734,0.40816326530612246,168,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-28,8169
71
+ 1.0,0.2529549962208855,0.298123038215738,0.42,0.3358964981168952,3.828071186950448e-11,0.08351908609640568,0.40816326530612246,169,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-07,8724
72
+ 1.0,0.213028120324469,0.3267901551549544,0.44,0.3038053348505854,1.531228474780179e-11,0.07285052279626343,0.40816326530612246,170,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-14,8194
73
+ 1.0,0.16441430466323353,0.25967469209260036,0.5,0.32087357753439977,6.124913899120717e-12,0.07822879852179906,0.40816326530612246,171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-21,8254
74
+ 1.0,0.11053130189212229,0.260168451958828,0.42,0.3279459500984871,2.449965559648287e-12,0.07333812379932836,0.40816326530612246,172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-28,7026
75
+ 1.0,0.06917021315146277,0.0,0.38,0.37411287881420296,9.799862238593149e-13,0.07465061371735272,0.39455782312925175,173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-04,6412
76
+ 1.0,0.06728264676731566,0.0,0.44,0.4347510050616973,3.9199448954372595e-13,0.0732721326861316,0.39455782312925175,174,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2021-04-11,6297
77
+ 1.0,0.10167805497311716,0.0,0.43,0.4574504815633023,1.5679779581749037e-13,0.07982724993034271,0.39455782312925175,175,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2021-04-18,6687
78
+ 1.0,0.1734619149834527,0.0,0.48,0.48912312446006045,6.271911832699615e-14,0.06941165256412136,0.39455782312925175,176,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-25,8430
79
+ 1.0,0.2040432878056308,0.0,0.46,0.44466429049983563,2.5087647330798465e-14,0.06276854716898124,0.39455782312925175,177,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-02,8025
80
+ 1.0,0.20788046814877387,0.0,0.48,0.5722675873212515,1.0035058932319387e-14,0.04882242524673344,0.40136054421768713,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-09,8242
81
+ 1.0,0.14929264058846564,0.0,0.5,0.45913415146070335,4.014023572927755e-15,0.033618806000791895,0.40136054421768713,179,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-16,8280
82
+ 1.0,0.11694210039888364,0.0,0.51,0.39528662679579885,1.6056094291711022e-15,0.025182942030473228,0.40136054421768713,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-23,7909
83
+ 1.0,0.055184035342337234,0.0,0.51,0.3880077087936407,6.422437716684409e-16,0.017652622780132275,0.40136054421768713,181,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-30,7574
84
+ 1.0,0.04358787034563821,0.0,0.5,0.3863265622647678,2.568975086673764e-16,0.012651962869000308,0.3673469387755103,182,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-06,7270
85
+ 1.0,0.03833609653008979,0.0,0.46,0.3784495643657444,1.0275900346695056e-16,0.008835476822454577,0.3673469387755103,183,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-13,6716
86
+ 1.0,0.06111263589867566,0.0,0.48,0.38862024435317233,4.1103601386780226e-17,0.005939200187708055,0.3673469387755103,184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-20,6944
87
+ 1.0,0.07119833324643848,0.0,0.44,0.4039000969934476,1.644144055471209e-17,0.004967664354533589,0.3673469387755103,185,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2021-06-27,6803
88
+ 1.0,0.0659956847282599,0.0,0.45,0.4420872417106599,6.576576221884836e-18,0.004359079643941282,0.3537414965986395,186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-04,7019
89
+ 1.0,0.12577031397293442,0.0,0.45,0.4950177419852857,2.630630488753935e-18,0.003977797656582247,0.3537414965986395,187,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-11,8254
90
+ 1.0,0.1502746019886232,0.0,0.45,0.5650602702260171,1.052252195501574e-18,0.0040621196345558795,0.3537414965986395,188,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-18,7804
91
+ 1.0,0.21001397285486328,0.0,0.42,0.594015126140436,4.209008782006296e-19,0.004952999662712088,0.3537414965986395,189,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-25,8212
92
+ 1.0,0.23464189851384848,0.0,0.46,0.5484130743981998,1.6836035128025183e-19,0.008076579020691881,0.3537414965986395,190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-01,8378
93
+ 1.0,0.23496148203757855,0.0,0.47000000000000003,0.5324473242588711,6.734414051210074e-20,0.01220102359548914,0.3197278911564626,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-08,9496
94
+ 1.0,0.23319893582092505,0.0,0.53,0.5532778727756644,2.6937656204840295e-20,0.020152952735698258,0.3197278911564626,192,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-15,9511
95
+ 1.0,0.23262329847201318,0.0,0.49,0.7309984534528141,1.0775062481936118e-20,0.029028757460661962,0.3197278911564626,193,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-22,9569
96
+ 1.0,0.18495638415853394,0.0,0.46,0.8724050615489382,4.310024992774448e-21,0.03698435277382646,0.3197278911564626,194,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-29,7928
97
+ 1.0,0.2921700012245981,0.0,0.49,1.0,1.7240099971097793e-21,0.03982197064128697,0.3129251700680272,195,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-05,7840
98
+ 1.0,0.4172971677569805,0.0,0.48,0.8193686075762131,6.896039988439117e-22,0.03868179085216524,0.3129251700680272,196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-12,9521
99
+ 1.0,0.5004920981884484,0.0,0.53,0.4496097944711011,2.758415995375647e-22,0.03902274493701515,0.3129251700680272,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-19,9451
100
+ 1.0,0.6383788968475093,0.0,0.47000000000000003,0.3701822126418114,1.1033663981502588e-22,0.03567186285580209,0.3129251700680272,198,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-26,8898
101
+ 1.0,0.6501651617929107,0.0,0.51,0.34258196039636274,4.413465592601035e-23,0.0352539191388893,0.3129251700680272,199,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-03,8441
102
+ 1.0,0.6649283374522998,0.0,0.51,0.31355701111053985,1.7653862370404143e-23,0.03635010485254652,0.28571428571428575,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-10,8788
103
+ 1.0,0.6097114754591861,0.0,0.51,0.32306971094469733,7.061544948161657e-24,0.031323781730726925,0.28571428571428575,201,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-17,9569
104
+ 1.0,0.3964279757062242,0.0,0.51,0.33051520280988034,2.8246179792646632e-24,0.02719933715592967,0.28571428571428575,202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-24,9008
105
+ 1.0,0.33105364706311086,0.0,0.47000000000000003,0.3259978333423606,1.1298471917058652e-24,0.025967503042923553,0.28571428571428575,203,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-31,8495
106
+ 1.0,0.31714045716637634,0.0,0.55,0.3045528431182349,4.519388766823461e-25,0.02263128565353199,0.2653061224489796,204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-07,8807
107
+ 1.0,0.28268319082761023,0.0,0.49,0.31370309424641213,1.8077555067293845e-25,0.01786159463858867,0.2653061224489796,205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-14,8385
108
+ 1.0,0.15774740707436136,0.0,0.51,0.37945364695975814,7.231022026917538e-26,0.016409790148260033,0.2653061224489796,206,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-21,6964
109
+ 1.0,0.2836203500514554,0.0,0.55,0.36793503370466,2.892408810767015e-26,0.01882946429880776,0.2653061224489796,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-28,9340
110
+ 1.0,0.33646919882766096,0.0,0.49,0.3299836196379579,1.1569635243068062e-26,0.023555161238286576,0.272108843537415,208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-05,8632
111
+ 1.0,0.361268166630245,0.0,0.38,0.3243428164088717,4.6278540972272255e-27,0.029421037966887126,0.272108843537415,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-12,9271
112
+ 1.0,0.21850759166298056,0.0,0.51,0.34100191273497404,1.8511416388908902e-27,0.029549354020325262,0.272108843537415,210,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-19,7663
113
+ 1.0,0.2156152088113536,0.0,0.43,0.3876459690915292,7.404566555563562e-28,0.04853646375621416,0.272108843537415,211,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-26,7888
114
+ 1.0,0.4122692273972545,0.0,0.42,0.44121852053456856,2.961826622225425e-28,0.07303383144403221,0.272108843537415,212,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-02,11088
115
+ 1.0,0.5580863257308297,0.0,0.42,0.33648328199770844,1.18473064889017e-28,0.2914790808171166,0.2585034013605442,213,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-09,12850
116
+ 1.0,0.5441541455767391,0.0,0.45,0.5258301345263098,4.7389225955606806e-29,0.6228644542534939,0.2585034013605442,214,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-16,12768
117
+ 1.0,0.37953926965668333,0.0,0.51,0.6191133700101356,1.8955690382242722e-29,1.0,0.2585034013605442,215,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2022-01-23,11023
118
+ 1.0,0.3422525462363791,0.0,0.5,0.6600516747429145,7.582276152897087e-30,0.8603298089190655,0.2585034013605442,216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-30,10317
119
+ 1.0,0.3679329127754763,0.0,0.49,0.6150147631969254,3.0329104611588346e-30,0.3851571321728674,0.2448979591836735,217,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-02-06,10109
120
+ 1.0,0.3530129569359208,0.0,0.49,0.5435710104633258,1.2131641844635335e-30,0.18207314748280565,0.2448979591836735,218,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-13,10233
121
+ 1.0,0.3628237688509028,0.0,0.48,0.5395383650448762,4.852656737854129e-31,0.08532284319045035,0.2448979591836735,219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-20,10660
122
+ 1.0,0.3535562124344392,0.0,0.49,0.3713089856353334,1.941062695141646e-31,0.04778123212740684,0.2448979591836735,220,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-27,9862
123
+ 1.0,0.35851767100446613,0.0,0.49,0.33021424233802193,7.764250780566529e-32,0.028365180155739026,0.2448979591836735,221,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-06,10393
124
+ 1.0,0.3648140365425708,0.0,0.53,0.29899648842829235,3.105700312226557e-32,0.019053100849085656,0.2448979591836735,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-13,9914
125
+ 1.0,0.417768904168966,0.0,0.46,0.30801461857263196,1.242280124890568e-32,0.014096435013418193,0.2448979591836735,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-20,11027
126
+ 1.0,0.45364666714531404,0.0,0.5,0.29874033139572204,4.9691204995617213e-33,0.013440190054406007,0.2448979591836735,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-03-27,10066
127
+ 1.0,0.45997433293937545,0.0,0.45,0.3080341285301519,1.9876481998241388e-33,0.014672024167412121,0.2448979591836735,225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-03,8722
128
+ 1.0,0.4245480429075594,0.0,0.46,0.304189689538618,7.950592799291056e-34,0.01936472555029256,0.2448979591836735,226,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2022-04-10,7805
129
+ 1.0,0.4463068738641009,0.0,0.54,0.307818077305473,3.1802371197109226e-34,0.027822586558343475,0.2448979591836735,227,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2022-04-17,8519
130
+ 1.0,0.6012222981571669,0.0,0.53,0.29394180576819906,1.272094847878869e-34,0.033340176856183366,0.2448979591836735,228,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-24,10084
131
+ 1.0,0.6804106164543928,0.0,0.5,0.28219281269675367,5.088379391460478e-35,0.04576117082899503,0.2448979591836735,229,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-01,10291
132
+ 1.0,0.62805714350389,0.0,0.54,0.30839694661979145,2.035351756529193e-35,0.05172603422739071,0.2448979591836735,230,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-08,9743
133
+ 1.0,0.7470007501508245,0.0,0.54,0.3120111152265925,8.141407025566787e-36,0.04952999662712088,0.2448979591836735,231,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-15,10759
134
+ 1.0,0.6460736106378411,0.0,0.55,0.2905779236460707,3.25656280967673e-36,0.06457597043598129,0.2448979591836735,232,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-22,9845
135
+ 1.0,0.5732108245519132,0.0,0.52,0.38068837954927237,1.3026251233207076e-36,0.080201199571791,0.2448979591836735,233,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-29,9499
136
+ 1.0,0.5996683384067256,0.0,0.5,0.3940488499594224,5.210500487782985e-37,0.09049581323048496,0.40680272108843546,234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-05,10021
137
+ 1.0,0.5630659455826548,0.0,0.54,0.4539755399873685,2.0842001896133483e-37,0.09128037424293528,0.40680272108843546,235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-12,10112
138
+ 1.0,0.5482324249484887,0.0,0.45,0.48814019600803654,8.336800703454939e-38,0.08289217052103649,0.40680272108843546,236,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-19,10034
139
+ 1.0,0.5485743918729864,0.0,0.47000000000000003,0.475428506654356,3.3347202263835196e-38,0.06987359035649866,0.40680272108843546,237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2022-06-26,9209
140
+ 1.0,0.5559932625646005,0.0,0.43,0.510072176038165,1.333888035554951e-38,0.06264756346145385,0.40680272108843546,238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-03,10265
141
+ 1.0,0.6089718159266746,0.0,0.45,0.44215508529036335,5.33555159223524e-39,0.0627612148230705,0.40680272108843546,239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-10,10033
142
+ 1.0,0.6101706458097598,0.0,0.48,0.41550269661979555,2.1342200869095313e-39,0.07072780865510112,0.40680272108843546,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-17,9790
143
+ 1.0,0.6111403594460636,0.0,0.44,0.437146146258812,8.536874847792479e-40,0.07964760745552932,0.40680272108843546,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-24,9629
144
+ 1.0,0.6451477728019566,0.0,0.44,0.4975101423754845,3.4147444392713438e-40,0.0893739643061401,0.40680272108843546,242,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-31,10134
145
+ 1.0,0.7267513590970145,0.0,0.44,0.5042632593424633,1.3658922758628901e-40,0.09389435556011791,0.40680272108843546,243,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-07,12029
146
+ 1.0,0.832744074444703,0.0,0.46,0.5840915039533217,5.463514104995084e-41,0.08482790984147467,0.40680272108843546,244,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-14,12886
147
+ 1.0,0.8546151893753493,0.0,0.49,0.6374603327364593,2.1853506435415578e-41,0.07962194424484169,0.40680272108843546,245,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-21,12027
148
+ 1.0,0.9999999999999998,0.0,0.55,0.6022458246191313,8.740852589601472e-42,0.07178366646624922,0.40680272108843546,246,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-28,11375
149
+ 1.0,0.860672618209781,0.0,0.48,0.5735957859704555,3.495791051275827e-42,0.05725095687114135,0.40680272108843546,247,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-04,10824
150
+ 1.0,0.8622728019659036,0.0,0.54,0.5790428094946118,1.39776643594557e-42,0.050739833702394745,0.40680272108843546,248,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-11,12285
151
+ 1.0,0.7774120906393625,0.0,0.55,0.7618650061054455,5.585565898134668e-43,0.0440857297883885,0.40680272108843546,249,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-18,12146
152
+ 1.0,0.6580209603679659,0.0,0.52,0.8137272725878776,2.2287265136062566e-43,0.039975949905412735,0.40680272108843546,250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-25,10881
153
+ 1.0,0.9480011027127861,0.0,0.52,0.7867690657367606,8.859907597948911e-44,0.03648941942485079,0.40680272108843546,251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-02,11373
154
+ 1.0,0.709096498806814,0.0,0.46,0.7292818780372798,3.4889645827034517e-44,0.04076784326377381,0.40680272108843546,252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-09,10230
155
+ 1.0,0.5414415970743589,0.0,0.45,0.6974583695681711,1.340587376605267e-44,0.04368978310920796,0.0,253,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-16,11557
156
+ 1.0,0.6081525119323576,0.0,0.54,0.6240593695822464,4.812364941659934e-45,0.041156457597043596,0.0,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-23,10805
157
+ 1.0,0.5960421531458853,0.0,0.45,0.5899287906913332,1.3749614119028383e-45,0.03843982343711047,0.0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-30,9709
158
+ 1.0,0.848521629204434,0.0,0.47000000000000003,0.6201930426013046,0.0,0.040723849188309305,0.0,256,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-11-06,10098