Spaces:

BlendMMM
/

Mastercard

Sleeping

App Files Files Community

BlendMMM commited on Apr 16, 2024

Commit

5cc8082

verified ·

1 Parent(s): 7275079

Delete pages

Browse files

Files changed (15) hide show

pages/10_Optimized_Result_Analysis.py +0 -399
pages/1_Data_Validation.py +0 -241
pages/2_Transformations_with_panel.py +0 -612
pages/3_Model_Tuning_with_panel.py +0 -437
pages/4_Model_Build.py +0 -826
pages/4_Saved_Model_Results.py +0 -413
pages/5_Model_Result_Overview.py +0 -103
pages/5_Model_Tuning_with_panel.py +0 -527
pages/6_Build_Response_Curves.py +0 -168
pages/6_Model_Result_Overview.py +0 -348
pages/7_Build_Response_Curves.py +0 -185
pages/8_Scenario_Planner.py +0 -1133
pages/9_Saved_Scenarios.py +0 -276
pages/Data_Import.py +0 -891
pages/actual_data.csv +0 -158

pages/10_Optimized_Result_Analysis.py DELETED Viewed

@@ -1,399 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import pandas as pd
-from utilities import (format_numbers,decimal_formater,
-                       load_local_css,set_header,
-                       initialize_data,
-                       load_authenticator)
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-from classes import class_from_dict
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-import pandas as pd
-def summary_plot(data, x, y, title, text_column, color, format_as_percent=False, format_as_decimal=False):
-    fig = px.bar(data, x=x, y=y, orientation='h',
-                 title=title, text=text_column, color=color)
-    fig.update_layout(showlegend=False)
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
-    elif format_as_decimal:
-        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
-    else:
-        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
-    fig.update_layout(xaxis_title=x, yaxis_title='Channel Name', showlegend=False)
-    return fig
-def stacked_summary_plot(data, x, y, title, text_column, color_column, stack_column=None, format_as_percent=False, format_as_decimal=False):
-    fig = px.bar(data, x=x, y=y, orientation='h',
-                 title=title, text=text_column, color=color_column, facet_col=stack_column)
-    fig.update_layout(showlegend=False)
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_traces(texttemplate='%{text:.0%}', textposition='outside', hovertemplate='%{x:.0%}')
-    elif format_as_decimal:
-        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside', hovertemplate='%{x:.2f}')
-    else:
-        fig.update_traces(texttemplate='%{text:.2s}', textposition='outside', hovertemplate='%{x:.2s}')
-    fig.update_layout(xaxis_title=x, yaxis_title='', showlegend=False)
-    return fig
-def funnel_plot(data, x, y, title, text_column, color_column, format_as_percent=False, format_as_decimal=False):
-    data[text_column] = pd.to_numeric(data[text_column], errors='coerce')
-    # Round the numeric values in the text column to two decimal points
-    data[text_column] = data[text_column].round(2)
-    # Create a color map for categorical data
-    color_map = {category: f'rgb({i * 30 % 255},{i * 50 % 255},{i * 70 % 255})' for i, category in enumerate(data[color_column].unique())}
-    fig = go.Figure(go.Funnel(
-        y=data[y],
-        x=data[x],
-        text=data[text_column],
-        marker=dict(color=data[color_column].map(color_map)),
-        textinfo="value",
-        hoverinfo='y+x+text'
-    ))
-    # Update the format of the displayed text based on the chosen format
-    if format_as_percent:
-        fig.update_layout(title=title, funnelmode="percent")
-    elif format_as_decimal:
-        fig.update_layout(title=title, funnelmode="overlay")
-    else:
-        fig.update_layout(title=title, funnelmode="group")
-    return fig
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-# for k, v in st.session_state.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-st.empty()
-st.header('Model Result Analysis')
-spends_data=pd.read_excel('Overview_data_test.xlsx')
-with open('summary_df.pkl', 'rb') as file:
-  summary_df_sorted = pickle.load(file)
-selected_scenario= st.selectbox('Select Saved Scenarios',['S1','S2'])
-st.header('Optimized Spends Overview')
-___columns=st.columns(3)
-with ___columns[2]:
-    fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with ___columns[0]:
-    fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with ___columns[1]:
-    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=False)
-st.header(' Budget Allocation')
-summary_df_sorted['Perc_alloted']=np.round(summary_df_sorted['Optimized_spend']/summary_df_sorted['Optimized_spend'].sum(),2)
-columns2=st.columns(2)
-with columns2[0]:
-    fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend',color='Channel_name')
-    st.plotly_chart(fig,use_container_width=True)
-with columns2[1]:
-    fig=summary_plot(summary_df_sorted, x='Perc_alloted', y='Channel_name', title='% Split', text_column='Perc_alloted',color='Channel_name',format_as_percent=True)
-    st.plotly_chart(fig,use_container_width=True)
-if 'raw_data' not in st.session_state:
-    st.session_state['raw_data']=pd.read_excel('raw_data_nov7_combined1.xlsx')
-    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['MediaChannelName'].isin(summary_df_sorted['Channel_name'].unique())]
-    st.session_state['raw_data']=st.session_state['raw_data'][st.session_state['raw_data']['Date'].isin(spends_data["Date"].unique())]
-#st.write(st.session_state['raw_data']['ResponseMetricName'])
-# st.write(st.session_state['raw_data'])
-st.header('Response Forecast Overview')
-raw_data=st.session_state['raw_data']
-effectiveness_overall=raw_data.groupby('ResponseMetricName').agg({'ResponseMetricValue': 'sum'}).reset_index()
-effectiveness_overall['Efficiency']=effectiveness_overall['ResponseMetricValue'].map(lambda x: x/raw_data['Media Spend'].sum() )
-# st.write(effectiveness_overall)
-columns6=st.columns(3)
-effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False,inplace=True)
-effectiveness_overall=np.round(effectiveness_overall,2)
-effectiveness_overall['ResponseMetric'] = effectiveness_overall['ResponseMetricName'].apply(lambda x: 'BAU' if 'BAU' in x else ('Gamified' if 'Gamified' in x else x))
-# effectiveness_overall=np.where(effectiveness_overall[effectiveness_overall['ResponseMetricName']=="Adjusted Account Approval BAU"],"Adjusted Account Approval BAU",effectiveness_overall['ResponseMetricName'])
-effectiveness_overall.replace({'ResponseMetricName':{'BAU approved clients - Appsflyer':'Approved clients - Appsflyer',
-                                                     'Gamified approved clients - Appsflyer':'Approved clients - Appsflyer'}},inplace=True)
-# st.write(effectiveness_overall.sort_values(by=['ResponseMetricValue'],ascending=False))
-condition = effectiveness_overall['ResponseMetricName'] == "Adjusted Account Approval BAU"
-condition1= effectiveness_overall['ResponseMetricName'] == "Approved clients - Appsflyer"
-effectiveness_overall['ResponseMetric'] = np.where(condition, "Adjusted Account Approval BAU", effectiveness_overall['ResponseMetric'])
-effectiveness_overall['ResponseMetricName'] = np.where(condition1, "Approved clients - Appsflyer (BAU, Gamified)", effectiveness_overall['ResponseMetricName'])
-# effectiveness_overall=pd.DataFrame({'ResponseMetricName':["App Installs - Appsflyer",'Account Requests - Appsflyer',
-#                                                           'Total Adjusted Account Approval','Adjusted Account Approval BAU',
-#                                                           'Approved clients - Appsflyer','Approved clients - Appsflyer'],
-#                                     'ResponseMetricValue':[683067,367020,112315,79768,36661,16834],
-#                                     'Efficiency':[1.24,0.67,0.2,0.14,0.07,0.03],
-custom_colors = {
-    'App Installs - Appsflyer': 'rgb(255, 135, 0)',       # Steel Blue (Blue)
-    'Account Requests - Appsflyer': 'rgb(125, 239, 161)',  # Cornflower Blue (Blue)
-    'Adjusted Account Approval': 'rgb(129, 200, 255)',      # Dodger Blue (Blue)
-    'Adjusted Account Approval BAU': 'rgb(255, 207, 98)',  # Light Sky Blue (Blue)
-    'Approved clients - Appsflyer': 'rgb(0, 97, 198)',  # Light Blue (Blue)
-    "BAU": 'rgb(41, 176, 157)',                              # Steel Blue (Blue)
-     "Gamified": 'rgb(213, 218, 229)'                      # Silver (Gray)
-    # Add more categories and their respective shades of blue as needed
-}
-with columns6[0]:
-    revenue=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Total Approved Accounts - Revenue']['ResponseMetricValue']).iloc[0]
-    revenue=round(revenue / 1_000_000, 2)
-#     st.metric('Total Revenue', f"${revenue} M")
-# with columns6[1]:
-#     BAU=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='BAU approved clients - Revenue']['ResponseMetricValue']).iloc[0]
-#     BAU=round(BAU / 1_000_000, 2)
-#     st.metric('BAU approved clients - Revenue', f"${BAU} M")
-# with columns6[2]:
-#     Gam=(effectiveness_overall[effectiveness_overall['ResponseMetricName']=='Gamified approved clients - Revenue']['ResponseMetricValue']).iloc[0]
-#     Gam=round(Gam / 1_000_000, 2)
-#     st.metric('Gamified approved clients - Revenue', f"${Gam} M")
-# st.write(effectiveness_overall)
-data = {'Revenue': ['BAU approved clients - Revenue', 'Gamified approved clients- Revenue'],
-        'ResponseMetricValue': [70200000, 1770000],
-        'Efficiency':[127.54,3.21]}
-df = pd.DataFrame(data)
-columns9=st.columns([0.60,0.40])
-with columns9[0]:
-    figd = px.pie(df,
-              names='Revenue',
-              values='ResponseMetricValue',
-              hole=0.3,  # set the size of the hole in the donut
-              title='Effectiveness')
-    figd.update_layout(
-        margin=dict(l=0, r=0, b=0, t=0),width=100, height=180,legend=dict(
-        orientation='v',  # set orientation to horizontal
-        x=0,  # set x to 0 to move to the left
-        y=0.8  # adjust y as needed
-    )
-    )
-    st.plotly_chart(figd, use_container_width=True)
-with columns9[1]:
-    figd1 = px.pie(df,
-              names='Revenue',
-              values='Efficiency',
-              hole=0.3,  # set the size of the hole in the donut
-              title='Efficiency')
-    figd1.update_layout(
-    margin=dict(l=0, r=0, b=0, t=0),width=100,height=180,showlegend=False
-)
-    st.plotly_chart(figd1, use_container_width=True)
-effectiveness_overall['Response Metric Name']=effectiveness_overall['ResponseMetricName']
-columns4= st.columns([0.55,0.45])
-with columns4[0]:
-    fig=px.funnel(effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
-                                                                                          'BAU approved clients - Revenue',
-                                                                                          'Gamified approved clients - Revenue',
-                                                                                          "Total Approved Accounts - Appsflyer"]))],
-                                                                                            x='ResponseMetricValue', y='Response Metric Name',color='ResponseMetric',
-                                                                                            color_discrete_map=custom_colors,title='Effectiveness',
-                                                                                            labels=None)
-    custom_y_labels=['App Installs - Appsflyer','Account Requests - Appsflyer','Adjusted Account Approval','Adjusted Account Approval BAU',
-                     "Approved clients - Appsflyer (BAU, Gamified)"
-                     ]
-    fig.update_layout(showlegend=False,
-    yaxis=dict(
-        tickmode='array',
-        ticktext=custom_y_labels,
-        )
-        )
-    fig.update_traces(textinfo='value', textposition='inside', texttemplate='%{x:.2s} ', hoverinfo='y+x+percent initial')
-    last_trace_index = len(fig.data) - 1
-    fig.update_traces(marker=dict(line=dict(color='black', width=2)), selector=dict(marker=dict(color='blue')))
-    st.plotly_chart(fig,use_container_width=True)
-with columns4[1]:
-# Your existing code for creating the bar chart
-    fig1 = px.bar((effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue',
-                                                                                            'BAU approved clients - Revenue',
-                                                                                            'Gamified approved clients - Revenue',
-                                                                                            "Total Approved Accounts - Appsflyer"]))]).sort_values(by='ResponseMetricValue'),
-                x='Efficiency', y='Response Metric Name',
-                color_discrete_map=custom_colors, color='ResponseMetric',
-                labels=None,text_auto=True,title='Efficiency'
-                )
-    # Update layout and traces
-    fig1.update_traces(customdata=effectiveness_overall['Efficiency'],
-                   textposition='auto')
-    fig1.update_layout(showlegend=False)
-    fig1.update_yaxes(title='',showticklabels=False)
-    fig1.update_xaxes(title='',showticklabels=False)
-    fig1.update_xaxes(tickfont=dict(size=20))
-    fig1.update_yaxes(tickfont=dict(size=20))
-    st.plotly_chart(fig1, use_container_width=True)
-effectiveness_overall_revenue=pd.DataFrame({'ResponseMetricName':['Approved Clients','Approved Clients'],
-                                            'ResponseMetricValue':[70201070,1768900],
-                                            'Efficiency':[127.54,3.21],
-                                            'ResponseMetric':['BAU','Gamified']
-                                            })
-# from plotly.subplots import make_subplots
-# fig = make_subplots(rows=1, cols=2,
-#                     subplot_titles=["Effectiveness", "Efficiency"])
-# # Add first plot as subplot
-# fig.add_trace(go.Funnel(
-#     x = fig.data[0].x,
-#     y = fig.data[0].y,
-#     textinfo = 'value+percent initial',
-#     hoverinfo = 'x+y+percent initial'
-# ), row=1, col=1)
-# # Update layout for first subplot
-# fig.update_xaxes(title_text="Response Metric Value", row=1, col=1)
-# fig.update_yaxes(ticktext = custom_y_labels, row=1, col=1)
-# # Add second plot as subplot
-# fig.add_trace(go.Bar(
-#     x = fig1.data[0].x,
-#     y = fig1.data[0].y,
-#     customdata = fig1.data[0].customdata,
-#     textposition = 'auto'
-# ), row=1, col=2)
-# # Update layout for second subplot
-# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
-# fig.update_yaxes(title='', showticklabels=False, row=1, col=2)
-# fig.update_layout(height=600, width=800, title_text="Key Metrics")
-# st.plotly_chart(fig)
-st.header('Return Forecast by Media Channel')
-with st.expander("Return Forecast by Media Channel"):
-    metric_data=[val for val in list(st.session_state['raw_data']['ResponseMetricName'].unique()) if val!=np.NaN]
-    # st.write(metric_data)
-    metric=st.selectbox('Select Metric',metric_data,index=1)
-    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
-    # st.dataframe(selected_metric.head(2))
-    selected_metric=st.session_state['raw_data'][st.session_state['raw_data']['ResponseMetricName']==metric]
-    effectiveness=selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()
-    effectiveness_df=pd.DataFrame({'Channel':effectiveness.index,"ResponseMetricValue":effectiveness.values})
-    summary_df_sorted=summary_df_sorted.merge(effectiveness_df,left_on="Channel_name",right_on='Channel')
-    # st.dataframe(summary_df_sorted.head(2))
-    summary_df_sorted['Efficiency']=summary_df_sorted['ResponseMetricValue']/summary_df_sorted['Optimized_spend']
-# # # st.dataframe(summary_df_sorted.head(2))
-# st.dataframe(summary_df_sorted.head(2))
-    columns= st.columns(3)
-    with columns[0]:
-        fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='', text_column='Optimized_spend',color='Channel_name')
-        st.plotly_chart(fig,use_container_width=True)
-    with columns[1]:
-        # effectiveness=(selected_metric.groupby(by=['MediaChannelName'])['ResponseMetricValue'].sum()).values
-        # effectiveness_df=pd.DataFrame({'Channel':st.session_state['raw_data']['MediaChannelName'].unique(),"ResponseMetricValue":effectiveness})
-        # # effectiveness.reset_index(inplace=True)
-        # # st.dataframe(effectiveness.head())
-        fig=summary_plot(summary_df_sorted, x='ResponseMetricValue', y='Channel_name', title='Effectiveness', text_column='ResponseMetricValue',color='Channel_name')
-        st.plotly_chart(fig,use_container_width=True)
-    with columns[2]:
-        fig=summary_plot(summary_df_sorted, x='Efficiency', y='Channel_name', title='Efficiency', text_column='Efficiency',color='Channel_name',format_as_decimal=True)
-        st.plotly_chart(fig,use_container_width=True)
-import plotly.express as px
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-# Create figure with subplots
-# fig = make_subplots(rows=1, cols=2)
-# # Add funnel plot to subplot 1
-# fig.add_trace(
-#     go.Funnel(
-#         x=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricValue'],
-#         y=effectiveness_overall[~(effectiveness_overall['ResponseMetricName'].isin(['Total Approved Accounts - Revenue', 'BAU approved clients - Revenue', 'Gamified approved clients - Revenue', "Total Approved Accounts - Appsflyer"]))]['ResponseMetricName'],
-#         textposition="inside",
-#         texttemplate="%{x:.2s}",
-#         customdata=effectiveness_overall['Efficiency'],
-#         hovertemplate="%{customdata:.2f}<extra></extra>"
-#     ),
-#     row=1, col=1
-# )
-# # Add bar plot to subplot 2
-# fig.add_trace(
-#     go.Bar(
-#         x=effectiveness_overall.sort_values(by='ResponseMetricValue')['Efficiency'],
-#         y=effectiveness_overall.sort_values(by='ResponseMetricValue')['ResponseMetricName'],
-#         marker_color=effectiveness_overall['ResponseMetric'],
-#         customdata=effectiveness_overall['Efficiency'],
-#         hovertemplate="%{customdata:.2f}<extra></extra>",
-#         textposition="outside"
-#     ),
-#     row=1, col=2
-# )
-# # Update layout
-# fig.update_layout(title_text="Effectiveness")
-# fig.update_yaxes(title_text="", row=1, col=1)
-# fig.update_yaxes(title_text="", showticklabels=False, row=1, col=2)
-# fig.update_xaxes(title_text="Efficiency", showticklabels=False, row=1, col=2)
-# # Show figure
-# st.plotly_chart(fig)

pages/1_Data_Validation.py DELETED Viewed

@@ -1,241 +0,0 @@
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from Eda_functions import *
-import numpy as np
-import re
-import pickle
-from ydata_profiling import ProfileReport
-from streamlit_pandas_profiling import st_profile_report
-import streamlit as st
-import streamlit.components.v1 as components
-import sweetviz as sv
-from utilities import set_header,initialize_data,load_local_css
-from st_aggrid import GridOptionsBuilder,GridUpdateMode
-from st_aggrid import GridOptionsBuilder
-from st_aggrid import AgGrid
-import base64
-st.set_page_config(
-  page_title="Data Validation",
-  page_icon=":shark:",
-  layout="wide",
-  initial_sidebar_state='collapsed'
-)
-load_local_css('styles.css')
-set_header()
-#preprocessing
-# with open('Categorised_data.pkl', 'rb') as file:
-#   Categorised_data = pickle.load(file)
-# with open("edited_dataframe.pkl", 'rb') as file:
-# df = pickle.load(file)
-# date=df.index
-# df.reset_index(inplace=True)
-# df['Date'] = pd.to_datetime(date)
-#prospects=pd.read_excel('EDA_Data.xlsx',sheet_name='Prospects')
-#spends=pd.read_excel('EDA_Data.xlsx',sheet_name='SPEND INPUT')
-#spends.columns=['Week','Streaming (Spends)','TV (Spends)','Search (Spends)','Digital (Spends)']
-#df=pd.concat([df,spends],axis=1)
-#df['Date'] =pd.to_datetime(df['Date']).dt.strftime('%m/%d/%Y')
-#df['Prospects']=prospects['Prospects']
-#df.drop(['Week'],axis=1,inplace=True)
-st.title('Data Validation and Insights')
-with open("Pickle_files/main_df",'rb') as f:
-   st.session_state['cleaned_data']= pickle.load(f)
-with open("Pickle_files/category_dict",'rb') as c:
-   st.session_state['category_dict']=pickle.load(c)
-# st.write(st.session_state['cleaned_data'])
-target_variables=[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Response_Metric']
-target_column = st.selectbox('Select the Target Feature/Dependent Variable (will be used in all charts as reference)',list(*target_variables))
-st.session_state['target_column']=target_column
-fig=line_plot_target(st.session_state['cleaned_data'], target=target_column, title=f'{target_column} Over Time')
-st.plotly_chart(fig, use_container_width=True)
-media_channel=list(*[st.session_state['category_dict'][key] for key in st.session_state['category_dict'].keys() if key =='Media'])
-# st.write(media_channel)
-Non_media_channel=[col for col in st.session_state['cleaned_data'].columns if col not in media_channel]
-st.markdown('### Annual Data Summary')
-st.dataframe(summary(st.session_state['cleaned_data'], media_channel+[target_column], spends=None,Target=True), use_container_width=True)
-if st.checkbox('Show raw data'):
-    st.write(pd.concat([pd.to_datetime(st.session_state['cleaned_data']['Date']).dt.strftime('%m/%d/%Y'),st.session_state['cleaned_data'].select_dtypes(np.number).applymap(format_numbers)],axis=1))
-col1 = st.columns(1)
-if "selected_feature" not in st.session_state:
-    st.session_state['selected_feature']=None
-st.header('1. Media Channels')
-if 'Validation' not in st.session_state:
-    st.session_state['Validation']=[]
-eda_columns=st.columns(2)
-with eda_columns[0]:
-    if st.button('Generate Profile Report'):
-        pr = st.session_state['cleaned_data'].profile_report()
-        pr.to_file("Profile_Report.html")
-        with open("Profile_Report.html", "rb") as f:
-            profile_report_html = f.read()
-        b64 = base64.b64encode(profile_report_html).decode()
-        href = f'<a href="data:text/html;base64,{b64}" download="Profile_Report.html">Download Profile Report</a>'
-        st.markdown(href, unsafe_allow_html=True)
-with eda_columns[1]:
-    if st.button('Generate Sweetviz Report'):
-        def generate_report_with_target(df, target_feature):
-            report = sv.analyze([df, "Dataset"], target_feat=target_feature)
-            return report
-        report = generate_report_with_target(st.session_state['cleaned_data'], target_feature=target_column)
-        report.show_html()
-selected_media = st.selectbox('Select media', np.unique([Categorised_data[col]['VB'] for col in media_channel]))
-# selected_feature=st.multiselect('Select Metric', df.columns[df.columns.str.contains(selected_media,case=False)])
-st.session_state["selected_feature"]=st.selectbox('Select Metric',[col for col in  media_channel  if    Categorised_data[col]['VB'] in selected_media ] )
-spends_features=[col for col in df.columns if 'spends' in col.lower() or 'cost' in col.lower()]
-spends_feature=[col for col in spends_features if col.split('_')[0] in st.session_state["selected_feature"].split('_')[0]]
-#st.write(spends_features)
-#st.write(spends_feature)
-#st.write(selected_feature)
-val_variables=[col for col in media_channel if col!='Date']
-if len(spends_feature)==0:
-    st.warning('No spends varaible available for the selected metric in data')
-else:
-    st.write(f'Selected spends variable {spends_feature[0]} if wrong please name the varaibles properly')
-    # Create the dual-axis line plot
-    fig_row1 = line_plot(df, x_col='Date', y1_cols=[st.session_state["selected_feature"]], y2_cols=[target_column], title=f'Analysis of {st.session_state["selected_feature"]} and {[target_column][0]} Over Time')
-    st.plotly_chart(fig_row1, use_container_width=True)
-    st.markdown('### Annual Data Summary')
-    st.dataframe(summary(df,[st.session_state["selected_feature"]],spends=spends_feature[0]),use_container_width=True)
-    if st.button('Validate'):
-        st.session_state['Validation'].append(st.session_state["selected_feature"])
-    if st.checkbox('Validate all'):
-        st.session_state['Validation'].extend(val_variables)
-        st.success('All media variables are validated ✅')
-    if len(set(st.session_state['Validation']).intersection(val_variables))!=len(val_variables):
-        #st.write(st.session_state['Validation'])
-        validation_data=pd.DataFrame({'Variables':val_variables,
-                                    'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
-                                    'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
-        gd=GridOptionsBuilder.from_dataframe(validation_data)
-        gd.configure_pagination(enabled=True)
-        gd.configure_selection(use_checkbox=True,selection_mode='multiple')
-        #gd.configure_selection_toggle_all(None, show_toggle_all=True)
-        #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
-        gridoptions=gd.build()
-        #st.text(st.session_state['Validation'])
-        table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
-        #st.table(table)
-        selected_rows = table["selected_rows"]
-        st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
-        not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
-        if not_validated_variables:
-            not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
-            st.warning(not_validated_message)
-st.header('2. Non Media Variables')
-selected_columns_row = [col for col in df.columns if ("imp" not in col.lower()) and ('cli' not in col.lower() ) and ('spend' not in col.lower()) and col!='Date']
-selected_columns_row4 = st.selectbox('Select Channel',selected_columns_row )
-if not selected_columns_row4:
-    st.warning('Please select at least one.')
-else:
-    # Create the dual-axis line plot
-    fig_row4 = line_plot(df, x_col='Date', y1_cols=[selected_columns_row4], y2_cols=[target_column], title=f'Analysis of {selected_columns_row4} and {target_column} Over Time')
-    st.plotly_chart(fig_row4, use_container_width=True)
-    selected_non_media=selected_columns_row4
-    sum_df = df[['Date', selected_non_media,target_column]]
-    sum_df['Year']=pd.to_datetime(df['Date']).dt.year
-    #st.dataframe(df)
-    #st.dataframe(sum_df.head(2))
-    sum_df=sum_df.groupby('Year').agg('sum')
-    sum_df.loc['Grand Total']=sum_df.sum()
-    sum_df=sum_df.applymap(format_numbers)
-    sum_df.fillna('-',inplace=True)
-    sum_df=sum_df.replace({"0.0":'-','nan':'-'})
-    st.markdown('### Annual Data Summary')
-    st.dataframe(sum_df,use_container_width=True)
-    # if st.checkbox('Validate',key='2'):
-    #     st.session_state['Validation'].append(selected_columns_row4)
-# val_variables=[col for col in media_channel if col!='Date']
-# if st.checkbox('Validate all'):
-#     st.session_state['Validation'].extend(val_variables)
-# validation_data=pd.DataFrame({'Variables':val_variables,
-#                             'Validated':[1 if col in st.session_state['Validation'] else 0 for col in val_variables],
-#                             'Bucket':[Categorised_data[col]['VB'] for col in val_variables]})
-# gd=GridOptionsBuilder.from_dataframe(validation_data)
-# gd.configure_pagination(enabled=True)
-# gd.configure_selection(use_checkbox=True,selection_mode='multiple')
-# #gd.configure_selection_toggle_all(None, show_toggle_all=True)
-# #gd.configure_columns_auto_size_mode(GridOptionsBuilder.configure_columns)
-# gridoptions=gd.build()
-# #st.text(st.session_state['Validation'])
-# table = AgGrid(validation_data,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED,fit_columns_on_grid_load=True)
-# #st.table(table)
-# selected_rows = table["selected_rows"]
-# st.session_state['Validation'].extend([col['Variables'] for col in selected_rows])
-# not_validated_variables = [col for col in val_variables if col not in st.session_state["Validation"]]
-# if not_validated_variables:
-#     not_validated_message = f'The following variables are not validated:\n{" , ".join(not_validated_variables)}'
-#     st.warning(not_validated_message)
-options = list(df.select_dtypes(np.number).columns)
-st.markdown(' ')
-st.markdown(' ')
-st.markdown('# Exploratory Data Analysis')
-st.markdown(' ')
-selected_options = []
-num_columns = 4
-num_rows = -(-len(options) // num_columns)  # Ceiling division to calculate rows
-# Create a grid of checkboxes
-st.header('Select Features for Correlation Plot')
-tick=False
-if st.checkbox('Select all'):
-    tick=True
-selected_options = []
-for row in range(num_rows):
-    cols = st.columns(num_columns)
-    for col in cols:
-        if options:
-            option = options.pop(0)
-            selected = col.checkbox(option,value=tick)
-            if selected:
-                selected_options.append(option)
-# Display selected options
-#st.write('You selected:', selected_options)
-st.pyplot(correlation_plot(df,selected_options,target_column))

pages/2_Transformations_with_panel.py DELETED Viewed

@@ -1,612 +0,0 @@
-'''
-MMO Build Sprint 3
-date :
-additions : adding more variables to session state for saved model : random effect, predicted train & test
-'''
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from Eda_functions import format_numbers
-import numpy as np
-import pickle
-from st_aggrid import AgGrid
-from st_aggrid import GridOptionsBuilder,GridUpdateMode
-from utilities import set_header,load_local_css
-from st_aggrid import GridOptionsBuilder
-import time
-import itertools
-import statsmodels.api as sm
-import numpy as npc
-import re
-import itertools
-from sklearn.metrics import mean_absolute_error, r2_score,mean_absolute_percentage_error
-from sklearn.preprocessing import MinMaxScaler
-import os
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-st.set_option('deprecation.showPyplotGlobalUse', False)
-import statsmodels.api as sm
-import statsmodels.formula.api as smf
-from datetime import datetime
-import seaborn as sns
-from Data_prep_functions import *
-def get_random_effects(media_data, panel_col, mdf):
-    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-    for i, market in enumerate(media_data[panel_col].unique()):
-        print(i, end='\r')
-        intercept = mdf.random_effects[market].values[0]
-        random_eff_df.loc[i, 'random_effect'] = intercept
-        random_eff_df.loc[i, panel_col] = market
-    return random_eff_df
-def mdf_predict(X_df, mdf, random_eff_df) :
-    X=X_df.copy()
-    X['fixed_effect'] = mdf.predict(X)
-    X=pd.merge(X, random_eff_df, on=panel_col, how='left')
-    X['pred'] = X['fixed_effect'] + X['random_effect']
-    # X.to_csv('Test/megred_df.csv',index=False)
-    X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
-    return X['pred']
-st.set_page_config(
-  page_title="Model Build",
-  page_icon=":shark:",
-  layout="wide",
-  initial_sidebar_state='collapsed'
-)
-load_local_css('styles.css')
-set_header()
-st.title('1. Build Your Model')
-# set the panel column
-date_col = 'date'
-media_data=pd.read_csv(r'upf_data_converted.csv')
-# with open("Pickle_files/main_df",'rb') as f:
-#    media_data= pickle.load(f)
-media_data.columns=[i.lower().strip().replace(' ','_').replace('-','').replace(':','').replace("__", "_") for i in media_data.columns]
-#st.write(media_data.columns)
-#media_data.drop(['indicacao_impressions','infleux_impressions','influencer_impressions'],axis=1,inplace=True)
-target_col = 'total_approved_accounts_revenue'
-# st.write(media_data.columns)
-media_data.sort_values(date_col, inplace=True)
-media_data.reset_index(drop=True,inplace=True)
-date=media_data[date_col]
-st.session_state['date']=date
-revenue=media_data[target_col]
-media_data.drop([target_col],axis=1,inplace=True)
-media_data.drop([date_col],axis=1,inplace=True)
-media_data.reset_index(drop=True,inplace=True)
-if st.toggle('Apply Transformations on DMA/Panel Level'):
-  dma=st.selectbox('Select the Level of data ',[ col for col in media_data.columns if col.lower() in ['dma','panel', 'markets']])
-  panel_col= dma
-else:
-  #""" code to aggregate data on date """
-  dma=None
-# dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
-# st.write(dma_dict)
-st.markdown('## Select the Range of Transformations')
-columns = st.columns(2)
-old_shape=media_data.shape
-if "old_shape" not in st.session_state:
-   st.session_state['old_shape']=old_shape
-with columns[0]:
-  slider_value_adstock  = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1, format="%.2f")
-with columns[1]:
-  slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3), step=1)
-# with columns[2]:
-#    slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
-# with columns[1]:
-#    st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
-#    st.number_input('Select the range of  ')
-# Section 1 - Transformations Functions
-def lag(data,features,lags,dma=None):
-    if dma:
-        transformed_data=pd.concat([data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
-        transformed_data=transformed_data.fillna(method='bfill')
-        return pd.concat([transformed_data,data],axis=1)
-    else:
-        #''' data should be aggregated on date'''
-        transformed_data=pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags],axis=1)
-        transformed_data=transformed_data.fillna(method='bfill')
-        return pd.concat([transformed_data,data],axis=1)
-#adstock
-def adstock(df, alphas, cutoff, features,dma=None):
-    # st.write(features)
-    if dma:
-        transformed_data=pd.DataFrame()
-        for d in df[dma].unique():
-            dma_sub_df = df[df[dma] == d]
-            n = len(dma_sub_df)
-            weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
-            X = dma_sub_df[features].to_numpy()
-            res = pd.DataFrame(np.hstack(weights @ X),
-                               columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
-            transformed_data=pd.concat([transformed_data,res],axis=0)
-            transformed_data.reset_index(drop=True,inplace=True)
-        return pd.concat([transformed_data,df],axis=1)
-    else:
-        n = len(df)
-        weights = np.array([[[alpha**(i-j) if i >= j and j >= i-cutoff else 0. for j in range(n)] for i in range(n)] for alpha in alphas])
-        X = df[features].to_numpy()
-        res = pd.DataFrame(np.hstack(weights @ X),
-                           columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
-        return  pd.concat([res,df],axis=1)
-# Section 2 - Begin Transformations
-if 'media_data' not in st.session_state:
-  st.session_state['media_data']=pd.DataFrame()
-# Sprint3 additions
-if 'random_effects' not in st.session_state:
-  st.session_state['random_effects']=pd.DataFrame()
-if 'pred_train' not in st.session_state:
-  st.session_state['pred_train'] = []
-if 'pred_test' not in st.session_state:
-  st.session_state['pred_test'] = []
-# end of Sprint3 additions
-# variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
-variables_to_be_transformed=[col for col in media_data.columns if '_clicks' in col.lower() or '_impress' in col.lower()] # srishti - change
-# st.write(variables_to_be_transformed)
-# st.write(media_data[variables_to_be_transformed].dtypes)
-with columns[0]:
-  if st.button('Apply Transformations'):
-    with st.spinner('Applying Transformations'):
-      transformed_data_lag=lag(media_data,features=variables_to_be_transformed,lags=np.arange(slider_value_lag[0],slider_value_lag[1]+1,1),dma=dma)
-      # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
-      variables_to_be_transformed = [col for col in media_data.columns if
-                                    '_clicks' in col.lower() or '_impress' in col.lower()]  # srishti - change
-      transformed_data_adstock=adstock(df=transformed_data_lag, alphas=np.arange(slider_value_adstock[0],slider_value_adstock[1],0.1), cutoff=8, features=variables_to_be_transformed,dma=dma)
-      # st.success('Done')
-      st.success("Transformations complete!")
-      st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
-      # st.write(media_data.head(10))
-      # st.write(transformed_data_adstock.head(10))
-      transformed_data_adstock.columns = [c.replace(".","_") for c in transformed_data_adstock.columns] # srishti
-      # st.write(transformed_data_adstock.columns)
-      st.session_state['media_data']=transformed_data_adstock # srishti
-    # with st.spinner('Applying Transformations'):
-    #   time.sleep(2)
-    #   st.success("Transformations complete!")
-# if st.session_state['media_data'].shape[1]>old_shape[1]:
-  # with columns[0]:
-    # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
-  #st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
-# Section 3 - Create combinations
-# bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
-#       ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
-#         ' GA App: Will And Cid Pequena Baixo Risco Clicks',
-#       'digital_tactic_others',"programmatic"
-#       ]
-# srishti - bucket names changed
-bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','fb_level_achieved_tier_2',
-      'fb_level_achieved_tier_1','paid_social_others',
-        'ga_app',
-      'digital_tactic_others',"programmatic"
-      ]
-with columns[1]:
-  if st.button('Create Combinations of Variables'):
-    top_3_correlated_features=[]
-    # for col in st.session_state['media_data'].columns[:19]:
-    original_cols = [c for c in st.session_state['media_data'].columns if "_clicks" in c.lower() or "_impressions" in c.lower()]
-    original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
-    # st.write(original_cols)
-    # for col in st.session_state['media_data'].columns[:19]:
-    for col in original_cols: # srishti - new
-        corr_df=pd.concat([st.session_state['media_data'].filter(regex=col),
-                  revenue],axis=1).corr()[target_col].iloc[:-1]
-        top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
-        # st.write(col, top_3_correlated_features)
-    flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
-    # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
-    all_features_set={var:[col for col in flattened_list if var in col] for var in bucket if len([col for col in flattened_list if var in col])>0} # srishti
-    channels_all=[values for values in all_features_set.values()]
-    # st.write(channels_all)
-    st.session_state['combinations'] = list(itertools.product(*channels_all))
-  # if 'combinations' not in st.session_state:
-  #   st.session_state['combinations']=combinations_all
-    st.session_state['final_selection']=st.session_state['combinations']
-    st.success('Done')
-    # st.write(f"{len(st.session_state['combinations'])} combinations created")
-    revenue.reset_index(drop=True,inplace=True)
-  if 'Model_results' not in st.session_state:
-        st.session_state['Model_results']={'Model_object':[],
-      'Model_iteration':[],
-      'Feature_set':[],
-      'MAPE':[],
-      'R2':[],
-      'ADJR2':[]
-      }
-  def reset_model_result_dct():
-      st.session_state['Model_results']={'Model_object':[],
-      'Model_iteration':[],
-      'Feature_set':[],
-      'MAPE':[],
-      'R2':[],
-      'ADJR2':[]
-      }
-      # if st.button('Build Model'):
-  if 'iterations' not in st.session_state:
-    st.session_state['iterations']=0
-      # st.write("1",st.session_state["final_selection"])
-  if 'final_selection' not in st.session_state:
-      st.session_state['final_selection']=False
-save_path = r"Model/"
-with columns[1]:
-  if  st.session_state['final_selection']:
-    st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
-if st.checkbox('Build all iterations'):
-   iterations=len(st.session_state['final_selection'])
-else:
-   iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100, value=st.session_state['iterations'],on_change=reset_model_result_dct)
-  #  st.write("iterations=", iterations)
-if st.button('Build Model',on_click=reset_model_result_dct):
-  st.session_state['iterations']=iterations
-  # st.write("2",st.session_state["final_selection"])
-  # Section 4 - Model
-  st.session_state['media_data']=st.session_state['media_data'].fillna(method='ffill')
-  st.markdown(
-      'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
-  progress_bar = st.progress(0)  # Initialize the progress bar
-  # time_remaining_text = st.empty()  # Create an empty space for time remaining text
-  start_time = time.time()  # Record the start time
-  progress_text = st.empty()
-  # time_elapsed_text = st.empty()
-  # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
-  # st.write(st.session_state["final_selection"])
-  # for i, selected_features in enumerate(st.session_state["final_selection"]):
-  for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]): # srishti
-      df = st.session_state['media_data']
-      fet = [var for var in selected_features if len(var) > 0]
-      inp_vars_str = " + ".join(fet)  # new
-      X = df[fet]
-      y = revenue
-      ss = MinMaxScaler()
-      X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-      # X = sm.add_constant(X)
-      X['total_approved_accounts_revenue'] = revenue  # Sprint2
-      X[panel_col] = df[panel_col] # Sprint2
-      X_train=X.iloc[:8000]
-      X_test=X.iloc[8000:]
-      y_train=y.iloc[:8000]
-      y_test=y.iloc[8000:]
-      md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
-                      data=X_train[['total_approved_accounts_revenue'] + fet],
-                      groups=X_train[panel_col])
-      mdf = md.fit()
-      predicted_values = mdf.fittedvalues
-      # st.write(fet)
-      # positive_coeff=fet
-      # negetive_coeff=[]
-      coefficients = mdf.fe_params.to_dict()
-      model_possitive = [col for col in coefficients.keys() if coefficients[col] > 0]
-      # st.write(positive_coeff)
-      # st.write(model_possitive)
-      pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
-      # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
-      if (len(model_possitive) / len(selected_features)) > 0 and (len(pvalues) / len(selected_features)) >= 0: # srishti - changed just for testing, revert later
-          # predicted_values = model.predict(X_train)
-          mape = mean_absolute_percentage_error(y_train, predicted_values)
-          r2 = r2_score(y_train, predicted_values)
-          adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
-          filename = os.path.join(save_path, f"model_{i}.pkl")
-          with open(filename, "wb") as f:
-              pickle.dump(mdf, f)
-          # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
-          #   model = pickle.load(file)
-          st.session_state['Model_results']['Model_object'].append(filename)
-          st.session_state['Model_results']['Model_iteration'].append(i)
-          st.session_state['Model_results']['Feature_set'].append(fet)
-          st.session_state['Model_results']['MAPE'].append(mape)
-          st.session_state['Model_results']['R2'].append(r2)
-          st.session_state['Model_results']['ADJR2'].append(adjr2)
-      current_time = time.time()
-      time_taken = current_time - start_time
-      time_elapsed_minutes = time_taken / 60
-      completed_iterations_text = f"{i + 1}/{iterations}"
-      progress_bar.progress((i + 1) / int(iterations))
-      progress_text.text(f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
-  st.write(f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
-  pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
-  def to_percentage(value):
-    return f'{value * 100:.1f}%'
-## Section 5 - Select Model
-st.title('2. Select Models')
-if 'tick' not in st.session_state:
-   st.session_state['tick']=False
-if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)',value=st.session_state['tick']):
-  st.session_state['tick']=True
-  st.write('Select one model iteration to generate performance metrics for it:')
-  data=pd.DataFrame(st.session_state['Model_results'])
-  data.sort_values(by=['MAPE'],ascending=False,inplace=True)
-  data.drop_duplicates(subset='Model_iteration',inplace=True)
-  top_10=data.head(10)
-  top_10['Rank']=np.arange(1,len(top_10)+1,1)
-  top_10[['MAPE','R2','ADJR2']]=np.round(top_10[['MAPE','R2','ADJR2']],4).applymap(to_percentage)
-  top_10_table = top_10[['Rank','Model_iteration','MAPE','ADJR2','R2']]
-  #top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
-  gd=GridOptionsBuilder.from_dataframe(top_10_table)
-  gd.configure_pagination(enabled=True)
-  gd.configure_selection(use_checkbox=True)
-  gridoptions=gd.build()
-  table = AgGrid(top_10,gridOptions=gridoptions,update_mode=GridUpdateMode.SELECTION_CHANGED)
-  selected_rows=table.selected_rows
-  # if st.session_state["selected_rows"] != selected_rows:
-  #   st.session_state["build_rc_cb"] = False
-  st.session_state["selected_rows"] = selected_rows
-  if 'Model' not in st.session_state:
-    st.session_state['Model']={}
-# Section 6 - Display Results
-  if len(selected_rows)>0:
-    st.header('2.1 Results Summary')
-    model_object=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Model_object']
-    features_set=data[data['Model_iteration']==selected_rows[0]['Model_iteration']]['Feature_set']
-    with open(str(model_object.values[0]), 'rb') as file:
-        # print(file)
-        model = pickle.load(file)
-    st.write(model.summary())
-    st.header('2.2 Actual vs. Predicted Plot')
-    df=st.session_state['media_data']
-    X=df[features_set.values[0]]
-    # X = sm.add_constant(X)
-    y=revenue
-    ss = MinMaxScaler()
-    X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-    # Sprint2 changes
-    X['total_approved_accounts_revenue'] = revenue  # new
-    X[panel_col] = df[panel_col]
-    X[date_col]=date
-    X_train=X.iloc[:8000]
-    X_test=X.iloc[8000:].reset_index(drop=True)
-    y_train=y.iloc[:8000]
-    y_test=y.iloc[8000:].reset_index(drop=True)
-    random_eff_df = get_random_effects(media_data, panel_col, model)
-    train_pred = model.fittedvalues
-    test_pred = mdf_predict(X_test, model, random_eff_df)
-    print("__"*20, test_pred.isna().sum())
-    # save x test to test - srishti
-    x_test_to_save = X_test.copy()
-    x_test_to_save['Actuals'] = y_test
-    x_test_to_save['Predictions'] = test_pred
-    x_train_to_save=X_train.copy()
-    x_train_to_save['Actuals'] = y_train
-    x_train_to_save['Predictions'] = train_pred
-    x_train_to_save.to_csv('Test/x_train_to_save.csv',index=False)
-    x_test_to_save.to_csv('Test/x_test_to_save.csv',index=False)
-    st.session_state['X']=X_train
-    st.session_state['features_set']=features_set.values[0]
-    print("**"*20,"selected model features : ",features_set.values[0])
-    metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train[date_col], y_train, train_pred, model,target_column='Revenue',is_panel=True) # Sprint2
-    st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
-    st.markdown('## 2.3 Residual Analysis')
-    columns=st.columns(2)
-    with columns[0]:
-      fig=plot_residual_predicted(y_train,train_pred,X_train) # Sprint2
-      st.plotly_chart(fig)
-    with columns[1]:
-      st.empty()
-      fig = qqplot(y_train,train_pred) # Sprint2
-      st.plotly_chart(fig)
-    with columns[0]:
-      fig=residual_distribution(y_train,train_pred) # Sprint2
-      st.pyplot(fig)
-    vif_data = pd.DataFrame()
-    # X=X.drop('const',axis=1)
-    X_train_with_panels = X_train.copy() # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
-    X_train.drop(columns=[target_col, panel_col, date_col], inplace=True) # Sprint2
-    vif_data["Variable"] = X_train.columns
-    vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
-    vif_data.sort_values(by=['VIF'],ascending=False,inplace=True)
-    vif_data=np.round(vif_data)
-    vif_data['VIF']=vif_data['VIF'].astype(float)
-    st.header('2.4 Variance Inflation Factor (VIF)')
-    #st.dataframe(vif_data)
-    color_mapping = {
-    'darkgreen': (vif_data['VIF'] < 3),
-    'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
-    'darkred': (vif_data['VIF'] > 10)
-    }
-# Create a horizontal bar plot
-    fig, ax = plt.subplots()
-    fig.set_figwidth(10)  # Adjust the width of the figure as needed
-    # Sort the bars by descending VIF values
-    vif_data = vif_data.sort_values(by='VIF', ascending=False)
-    # Iterate through the color mapping and plot bars with corresponding colors
-    for color, condition in color_mapping.items():
-        subset = vif_data[condition]
-        bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
-        # Add text annotations on top of the bars
-        for bar in bars:
-            width = bar.get_width()
-            ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
-                        textcoords='offset points', va='center')
-    # Customize the plot
-    ax.set_xlabel('VIF Values')
-    #ax.set_title('2.4 Variance Inflation Factor (VIF)')
-    #ax.legend(loc='upper right')
-    # Display the plot in Streamlit
-    st.pyplot(fig)
-    with st.expander('Results Summary Test data'):
-      # ss = MinMaxScaler()
-      # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
-      st.header('2.2 Actual vs. Predicted Plot')
-      metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_test[date_col], y_test, test_pred, model,target_column='Revenue',is_panel=True) # Sprint2
-      st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
-      st.markdown('## 2.3 Residual Analysis')
-      columns=st.columns(2)
-      with columns[0]:
-        fig=plot_residual_predicted(revenue,test_pred,X_test) # Sprint2
-        st.plotly_chart(fig)
-      with columns[1]:
-        st.empty()
-        fig = qqplot(revenue,test_pred) # Sprint2
-        st.plotly_chart(fig)
-      with columns[0]:
-        fig=residual_distribution(revenue,test_pred) # Sprint2
-        st.pyplot(fig)
-    value=False
-    if st.checkbox('Save this model to tune',key='build_rc_cb'):
-      mod_name=st.text_input('Enter model name')
-      if len(mod_name)>0:
-        st.session_state['Model'][mod_name]={"Model_object":model,'feature_set':st.session_state['features_set'],'X_train':X_train_with_panels}
-        st.session_state['X_train']=X_train_with_panels
-        st.session_state['X_test']=X_test
-        st.session_state['y_train']=y_train
-        st.session_state['y_test']=y_test
-        # Sprint3 additions
-        random_eff_df= get_random_effects(media_data, panel_col, model)
-        st.session_state['random_effects']=random_eff_df
-        st.session_state['pred_train']=model.fittedvalues
-        st.session_state['pred_test']=mdf_predict(X_test, model, random_eff_df)
-        # End of Sprint3 additions
-        with open("best_models.pkl", "wb") as f:
-          pickle.dump(st.session_state['Model'], f)
-          st.success('Model saved! Proceed to the next page to tune the model')
-        value=False
-        # st.write(st.session_state['Model'][mod_name]['X_train'].columns)
-        # st.write(st.session_state['X_test'].columns)

pages/3_Model_Tuning_with_panel.py DELETED Viewed

@@ -1,437 +0,0 @@
-'''
-MMO Build Sprint 3
-date :
-changes : capability to tune MixedLM as well as simple LR in the same page
-'''
-import streamlit as st
-import pandas as pd
-from Eda_functions import format_numbers
-import pickle
-from utilities import set_header,load_local_css
-import statsmodels.api as sm
-import re
-from sklearn.preprocessing import MinMaxScaler
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-st.set_option('deprecation.showPyplotGlobalUse', False)
-import statsmodels.formula.api as smf
-from Data_prep_functions import *
-for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features"] :
-    if i not in st.session_state :
-        st.session_state[i] = None
-st.set_page_config(
-  page_title="Model Tuning",
-  page_icon=":shark:",
-  layout="wide",
-  initial_sidebar_state='collapsed'
-)
-load_local_css('styles.css')
-set_header()
-# Sprint3
-is_panel= True
-panel_col= 'dma' # set the panel column
-date_col = 'date'
-target_col = 'total_approved_accounts_revenue'
-st.title('1. Model Tuning')
-if "X_train" not in st.session_state:
-   st.error(
-"Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
-   st.stop()
-X_train=st.session_state['X_train']
-X_test=st.session_state['X_test']
-y_train=st.session_state['y_train']
-y_test=st.session_state['y_test']
-df=st.session_state['media_data']
-# st.write(X_train.columns)
-# st.write(X_test.columns)
-with open("best_models.pkl", 'rb') as file:
-  model_dict= pickle.load(file)
-if 'selected_model' not in st.session_state:
-   st.session_state['selected_model']=0
-# st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
-st.markdown('### 1.1 Event Flags')
-st.markdown('Helps in quantifying the impact of specific occurrences of events')
-with st.expander('Apply Event Flags'):
-  st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
-  model =model_dict[st.session_state["selected_model"]]['Model_object']
-  date=st.session_state['date']
-  date=pd.to_datetime(date)
-  X_train =model_dict[st.session_state["selected_model"]]['X_train']
-  features_set= model_dict[st.session_state["selected_model"]]['feature_set']
-  col=st.columns(3)
-  min_date=min(date)
-  max_date=max(date)
-  with col[0]:
-    start_date=st.date_input('Select Start Date',min_date,min_value=min_date,max_value=max_date)
-  with col[1]:
-    end_date=st.date_input('Select End Date',max_date,min_value=min_date,max_value=max_date)
-  with col[2]:
-    repeat=st.selectbox('Repeat Annually',['Yes','No'],index=1)
-  if repeat =='Yes':
-      repeat=True
-  else:
-      repeat=False
-  # X_train=sm.add_constant(X_train)
-  if 'Flags' not in st.session_state:
-    st.session_state['Flags']={}
-  # print("**"*50)
-  # print(y_train)
-  # print("**"*50)
-  # print(model.fittedvalues)
-  if is_panel : # Sprint3
-      met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
-                                                            model.fittedvalues, model,
-                                                            target_column='Revenue',
-                                                            flag=(start_date, end_date),
-                                                            repeat_all_years=repeat, is_panel=True)
-      st.plotly_chart(fig_flag, use_container_width=True)
-      # create flag on test
-      met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
-                                                            st.session_state['pred_test'], model,
-                                                            target_column='Revenue',
-                                                            flag=(start_date, end_date),
-                                                            repeat_all_years=repeat, is_panel=True)
-  else :
-      met,line_values,fig_flag=plot_actual_vs_predicted(date[:150], y_train, model.predict(X_train), model,flag=(start_date,end_date),repeat_all_years=repeat)
-      st.plotly_chart(fig_flag,use_container_width=True)
-      met,test_line_values,fig_flag=plot_actual_vs_predicted(date[150:], y_test, model.predict(X_test), model,flag=(start_date,end_date),repeat_all_years=repeat)
-  flag_name='f1'
-  flag_name=st.text_input('Enter Flag Name')
-  if st.button('Update flag'):
-    st.session_state['Flags'][flag_name]= {}
-    st.session_state['Flags'][flag_name]['train']=line_values
-    st.session_state['Flags'][flag_name]['test']=test_line_values
-    # st.write(st.session_state['Flags'][flag_name])
-    st.success(f'{flag_name} stored')
-  options=list(st.session_state['Flags'].keys())
-  selected_options = []
-  num_columns = 4
-  num_rows = -(-len(options) // num_columns)
-tick=False
-if st.checkbox('Select all'):
-    tick=True
-selected_options = []
-for row in range(num_rows):
-    cols = st.columns(num_columns)
-    for col in cols:
-        if options:
-            option = options.pop(0)
-            selected = col.checkbox(option,value=tick)
-            if selected:
-                selected_options.append(option)
-st.markdown('### 1.2 Select Parameters to Apply')
-parameters=st.columns(3)
-with parameters[0]:
-   Trend=st.checkbox("**Trend**")
-   st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
-with parameters[1]:
-   week_number=st.checkbox('**Week_number**')
-   st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
-with parameters[2]:
-   sine_cosine=st.checkbox('**Sine and Cosine Waves**')
-   st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
-if st.button('Build model with Selected Parameters and Flags'):
-  st.header('2.1 Results Summary')
-  # date=list(df.index)
-  # df = df.reset_index(drop=True)
-  # st.write(df.head(2))
-  # X_train=df[features_set]
-  ss = MinMaxScaler()
-  if is_panel == True :
-    X = X_train[features_set]
-    X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-    X_train_tuned[target_col] = X_train[target_col]
-    X_train_tuned[date_col] = X_train[date_col]
-    X_train_tuned[panel_col] = X_train[panel_col]
-    X = X_test[features_set]
-    X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
-    X_test_tuned[target_col] = X_test[target_col]
-    X_test_tuned[date_col] = X_test[date_col]
-    X_test_tuned[panel_col] = X_test[panel_col]
-  else :
-    X_train_tuned = pd.DataFrame(ss.fit_transform(X_train), columns=X_train.columns)
-    X_train_tuned = sm.add_constant(X_train_tuned)
-    X_test_tuned = pd.DataFrame(ss.transform(X_test), columns=X_test.columns)
-    X_test_tuned = sm.add_constant(X_test_tuned)
-  for flag in selected_options:
-    X_train_tuned[flag]=st.session_state['Flags'][flag]['train']
-    X_test_tuned[flag]=st.session_state['Flags'][flag]['test']
-    #test
-    # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
-    # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
-  new_features = features_set
-    # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
-  if Trend:
-     # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
-     if is_panel :
-         newdata = pd.DataFrame()
-         panel_wise_end_point_train = {}
-         for panel, groupdf in X_train_tuned.groupby(panel_col):
-             groupdf.sort_values(date_col, inplace=True)
-             groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
-             newdata = pd.concat([newdata, groupdf])
-             panel_wise_end_point_train[panel] = len(groupdf)
-         X_train_tuned = newdata.copy()
-         test_newdata=pd.DataFrame()
-         for panel, test_groupdf in X_test_tuned.groupby(panel_col):
-             test_groupdf.sort_values(date_col, inplace=True)
-             start = panel_wise_end_point_train[panel]+1
-             end = start + len(test_groupdf)
-             # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
-             test_groupdf['Trend'] = np.arange(start, end, 1)
-             test_newdata = pd.concat([test_newdata, test_groupdf])
-         X_test_tuned = test_newdata.copy()
-         new_features = new_features + ['Trend']
-         # test
-         X_test_tuned.to_csv("Test/X_test_tuned_trend.csv", index=False)
-         X_train_tuned.to_csv("Test/X_train_tuned_trend.csv", index=False)
-         pd.concat([X_train_tuned,X_test_tuned]).sort_values([panel_col, date_col]).to_csv("Test/X_train_test_tuned_trend.csv", index=False)
-     else :
-         X_train_tuned['Trend']=np.arange(1,len(X_train_tuned)+1,1)
-         X_test_tuned['Trend'] = np.arange(len(X_train_tuned)+1, len(X_train_tuned)+len(X_test_tuned), 1)
-  if week_number :
-     # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
-     if is_panel :
-        X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
-        X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
-        if X_train_tuned['Week_number'].nunique() == 1 :
-            st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
-        else :
-            X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
-            X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
-            new_features = new_features + ['Week_number']
-     else :
-        date = pd.to_datetime(date.values)
-        X_train_tuned['Week_number'] = date.dt.day_of_week[:150]
-        X_test_tuned['Week_number'] = date.dt.day_of_week[150:]
-  if sine_cosine :
-      # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
-      if is_panel :
-        new_features = new_features + ['sine_wave', 'cosine_wave']
-        newdata = pd.DataFrame()
-        groups = X_train_tuned.groupby(panel_col)
-        frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-        train_panel_wise_end_point = {}
-        for panel, groupdf in groups:
-            num_samples = len(groupdf)
-            train_panel_wise_end_point[panel] = num_samples
-            days_since_start = np.arange(num_samples)
-            sine_wave = np.sin(frequency * days_since_start)
-            cosine_wave = np.cos(frequency * days_since_start)
-            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-            assert len(sine_cosine_df) == len(groupdf)
-            # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-            groupdf['sine_wave'] = sine_wave
-            groupdf['cosine_wave'] = cosine_wave
-            newdata = pd.concat([newdata, groupdf])
-        test_groups = X_test_tuned.groupby(panel_col)
-        for panel, test_groupdf in test_groups:
-            num_samples = len(test_groupdf)
-            start = train_panel_wise_end_point[panel]
-            days_since_start = np.arange(start, start+num_samples, 1)
-            # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
-            sine_wave = np.sin(frequency * days_since_start)
-            cosine_wave = np.cos(frequency * days_since_start)
-            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-            assert len(sine_cosine_df) == len(test_groupdf)
-            # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-            test_groupdf['sine_wave'] = sine_wave
-            test_groupdf['cosine_wave'] = cosine_wave
-            newdata = pd.concat([newdata, test_groupdf])
-        X_train_tuned = newdata.copy()
-      else :
-        num_samples = len(X_train_tuned)
-        frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-        days_since_start = np.arange(num_samples)
-        sine_wave = np.sin(frequency * days_since_start)
-        cosine_wave = np.cos(frequency * days_since_start)
-        sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-        # Concatenate the sine and cosine waves with the scaled X DataFrame
-        X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
-        test_num_samples = len(X_test_tuned)
-        start = num_samples
-        days_since_start = np.arange(start, start+test_num_samples, 1)
-        sine_wave = np.sin(frequency * days_since_start)
-        cosine_wave = np.cos(frequency * days_since_start)
-        sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-        # Concatenate the sine and cosine waves with the scaled X DataFrame
-        X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
-  # model
-  if is_panel :
-      if selected_options :
-        new_features =  new_features + selected_options
-      inp_vars_str = " + ".join(new_features)
-      # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
-      # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
-      # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
-      md_tuned = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
-                             data=X_train_tuned[['total_approved_accounts_revenue'] + new_features],
-                             groups=X_train_tuned[panel_col])
-      model_tuned = md_tuned.fit()
-      # plot act v pred for original model and tuned model
-      metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
-                                                                               model.fittedvalues, model,
-                                                                               target_column='Revenue',
-                                                                               is_panel=True)
-      metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
-                                                                                           X_train_tuned[target_col],
-                                                                                           model_tuned.fittedvalues,
-                                                                                           model_tuned,
-                                                                                           target_column='Revenue',
-                                                                                           is_panel=True)
-  else :
-      model_tuned = sm.OLS(y_train, X_train_tuned).fit()
-      metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:150], y_train,
-                                                                               model.predict(X_train), model,
-                                                                               target_column='Revenue')
-      metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:150], y_train,
-                                                                                           model_tuned.predict(
-                                                                                               X_train_tuned),
-                                                                                           model_tuned,
-                                                                                           target_column='Revenue')
-  # st.write(metrics_table_tuned)
-  mape=np.round(metrics_table.iloc[0,1],2)
-  r2=np.round(metrics_table.iloc[1,1],2)
-  adjr2=np.round(metrics_table.iloc[2,1],2)
-  mape_tuned=np.round(metrics_table_tuned.iloc[0,1],2)
-  r2_tuned=np.round(metrics_table_tuned.iloc[1,1],2)
-  adjr2_tuned=np.round(metrics_table_tuned.iloc[2,1],2)
-  parameters_=st.columns(3)
-  with parameters_[0]:
-     st.metric('R2',r2_tuned,np.round(r2_tuned-r2,2))
-  with parameters_[1]:
-     st.metric('Adjusted R2',adjr2_tuned,np.round(adjr2_tuned-adjr2,2))
-  with parameters_[2]:
-     st.metric('MAPE',mape_tuned,np.round(mape_tuned-mape,2),'inverse')
-  st.header('2.2 Actual vs. Predicted Plot')
-  # if is_panel:
-  #   metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
-  #                                                                              model, target_column='Revenue',is_panel=True)
-  # else:
-  #   metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
-  metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(X_train_tuned[date_col], X_train_tuned[target_col],
-                                                                                  model_tuned.fittedvalues, model_tuned,
-                                                                                  target_column='Revenue',
-                                                                                  is_panel=True)
-      # plot_actual_vs_predicted(X_train[date_col], y_train,
-      #                                                                             model.fittedvalues, model,
-      #                                                                             target_column='Revenue',
-      #                                                                             is_panel=is_panel)
-  st.plotly_chart(actual_vs_predicted_plot,use_container_width=True)
-  st.markdown('## 2.3 Residual Analysis')
-  columns=st.columns(2)
-  with columns[0]:
-    fig=plot_residual_predicted(y_train,model.predict(X_train),X_train)
-    st.plotly_chart(fig)
-  with columns[1]:
-    st.empty()
-    fig = qqplot(y_train,model.predict(X_train))
-    st.plotly_chart(fig)
-  with columns[0]:
-    fig=residual_distribution(y_train,model.predict(X_train))
-    st.pyplot(fig)
-  if st.checkbox('Use this model to build response curves',key='123'):
-    st.session_state["tuned_model"] = model_tuned
-    st.session_state["X_train_tuned"] = X_train_tuned
-    st.session_state["X_test_tuned"] = X_test_tuned
-    st.session_state["X_train_tuned"] = X_train_tuned
-    st.session_state["X_test_tuned"] = X_test_tuned
-    if is_panel :
-        st.session_state["tuned_model_features"] = new_features
-    with open("tuned_model.pkl", "wb") as f:
-        pickle.dump(st.session_state['tuned_model'], f)
-        st.success('Model saved!')
-#   raw_data=df[features_set]
-#   columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
-#   raw_data.columns=columns_raw
-#   columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
-#   raw_data=raw_data[columns_media]
-#   raw_data['Date']=list(df.index)
-#   spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
-#   spends_df=df[spends_var]
-#   spends_df['Week']=list(df.index)
-#   j=0
-#   X1=X.copy()
-#   col=X1.columns
-#   for i in model.params.values:
-#       X1[col[j]]=X1.iloc[:,j]*i
-#       j+=1
-#   contribution_df=X1
-#   contribution_df['Date']=list(df.index)
-#   excel_file='Overview_data.xlsx'
-#   with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
-#      raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
-#      spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
-#      contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')

pages/4_Model_Build.py DELETED Viewed

@@ -1,826 +0,0 @@
-'''
-MMO Build Sprint 3
-additions : adding more variables to session state for saved model : random effect, predicted train & test
-MMO Build Sprint 4
-additions : ability to run models for different response metrics
-'''
-import streamlit as st
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from Eda_functions import format_numbers
-import numpy as np
-import pickle
-from st_aggrid import AgGrid
-from st_aggrid import GridOptionsBuilder, GridUpdateMode
-from utilities import set_header, load_local_css
-from st_aggrid import GridOptionsBuilder
-import time
-import itertools
-import statsmodels.api as sm
-import numpy as npc
-import re
-import itertools
-from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error
-from sklearn.preprocessing import MinMaxScaler
-import os
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-st.set_option('deprecation.showPyplotGlobalUse', False)
-import statsmodels.api as sm
-import statsmodels.formula.api as smf
-from datetime import datetime
-import seaborn as sns
-from Data_prep_functions import *
-def get_random_effects(media_data, panel_col, mdf):
-    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-    for i, market in enumerate(media_data[panel_col].unique()):
-        print(i, end='\r')
-        intercept = mdf.random_effects[market].values[0]
-        random_eff_df.loc[i, 'random_effect'] = intercept
-        random_eff_df.loc[i, panel_col] = market
-    return random_eff_df
-def mdf_predict(X_df, mdf, random_eff_df):
-    X = X_df.copy()
-    X['fixed_effect'] = mdf.predict(X)
-    X = pd.merge(X, random_eff_df, on=panel_col, how='left')
-    X['pred'] = X['fixed_effect'] + X['random_effect']
-    # X.to_csv('Test/megred_df.csv',index=False)
-    X.drop(columns=['fixed_effect', 'random_effect'], inplace=True)
-    return X['pred']
-st.set_page_config(
-    page_title="Model Build",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state='collapsed'
-)
-load_local_css('styles.css')
-set_header()
-st.title('1. Build Your Model')
-with open("data_import.pkl", "rb") as f:
-    data = pickle.load(f)
-    st.session_state['bin_dict'] = data["bin_dict"]
-#st.write(data["bin_dict"])
-with open("final_df_transformed.pkl", "rb") as f:
-    data = pickle.load(f)
-# Accessing the loaded objects
-    media_data = data["final_df_transformed"]
-# Sprint4 - available response metrics is a list of all reponse metrics in the data
-## these will be put in a drop down
-    st.session_state['media_data']=media_data
-if 'available_response_metrics' not in st.session_state:
-    # st.session_state['available_response_metrics'] = ['Total Approved Accounts - Revenue',
-    #                                                   'Total Approved Accounts - Appsflyer',
-    #                                                   'Account Requests - Appsflyer',
-    #                                                   'App Installs - Appsflyer']
-    st.session_state['available_response_metrics']= st.session_state['bin_dict']["Response Metrics"]
-# Sprint4
-if "is_tuned_model" not in st.session_state:
-    st.session_state["is_tuned_model"] = {}
-for resp_metric in st.session_state['available_response_metrics'] :
-    resp_metric=resp_metric.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
-    st.session_state["is_tuned_model"][resp_metric] = False
-# Sprint4 - used_response_metrics is a list of resp metrics for which user has created & saved a model
-if 'used_response_metrics' not in st.session_state:
-    st.session_state['used_response_metrics'] = []
-# Sprint4 - saved_model_names
-if 'saved_model_names' not in st.session_state:
-    st.session_state['saved_model_names'] = []
-# if "model_save_flag" not in st.session_state:
-#     st.session_state["model_save_flag"]=False
-# def reset_save():
-#     st.session_state["model_save_flag"]=False
-# def set_save():
-#     st.session_state["model_save_flag"]=True
-# Sprint4 - select a response metric
-sel_target_col = st.selectbox("Select the response metric",
-                              st.session_state['available_response_metrics'])
- # , on_change=reset_save())
-target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
-new_name_dct={col:col.lower().replace('.','_').lower().replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns}
-media_data.columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in media_data.columns]
-#st.write(st.session_state['bin_dict'])
-panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
-date_col = 'date'
-#st.write(media_data)
-is_panel = True if len(panel_col)>0 else False
-if 'is_panel' not in st.session_state:
-    st.session_state['is_panel']=False
-# if st.toggle('Apply Transformations on DMA/Panel Level'):
-#     media_data = pd.read_csv(r'C:\Users\SrishtiVerma\Mastercard\Sprint2\upf_data_converted_randomized_resp_metrics.csv')
-#     media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
-#                           media_data.columns]
-#     dma = st.selectbox('Select the Level of data ',
-#                        [col for col in media_data.columns if col.lower() in ['dma', 'panel', 'markets']])
-#     # is_panel = True
-#     # st.session_state['is_panel']=True
-#
-# else:
-#     # """ code to aggregate data on date """
-#     media_data = pd.read_excel(r'C:\Users\SrishtiVerma\Mastercard\Sprint1\Tactic Level Models\Tactic_level_data_imp_clicks_spends.xlsx')
-#     media_data.columns = [i.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for i in
-#                           media_data.columns]
-#     dma = None
-#     # is_panel = False
-#     # st.session_state['is_panel']=False
-#media_data = st.session_state["final_df"]
-# st.write(media_data.columns)
-media_data.sort_values(date_col, inplace=True)
-media_data.reset_index(drop=True, inplace=True)
-date = media_data[date_col]
-st.session_state['date'] = date
-# revenue=media_data[target_col]
-y = media_data[target_col]
-if is_panel:
-    spends_data = media_data[
-        [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col, panel_col]]
-    # Sprint3 - spends for resp curves
-else:
-    spends_data = media_data[
-        [c for c in media_data.columns if "_cost" in c.lower() or "_spend" in c.lower()] + [date_col]]
-y = media_data[target_col]
-# media_data.drop([target_col],axis=1,inplace=True)
-media_data.drop([date_col], axis=1, inplace=True)
-media_data.reset_index(drop=True, inplace=True)
-# dma_dict={ dm:media_data[media_data[dma]==dm] for dm in media_data[dma].unique()}
-# st.markdown('## Select the Range of Transformations')
-columns = st.columns(2)
-old_shape = media_data.shape
-if "old_shape" not in st.session_state:
-    st.session_state['old_shape'] = old_shape
-# with columns[0]:
-#     slider_value_adstock = st.slider('Select Adstock Range (only applied to media)', 0.0, 1.0, (0.2, 0.4), step=0.1,
-#                                      format="%.2f")
-# with columns[1]:
-#     slider_value_lag = st.slider('Select Lag Range (applied to media, seasonal, macroeconomic variables)', 1, 7, (1, 3),
-#                                  step=1)
-# with columns[2]:
-#    slider_value_power=st.slider('Select Power range (only applied to media )',0,4,(1,2),step=1)
-# with columns[1]:
-#    st.number_input('Select the range of half saturation point ',min_value=1,max_value=5)
-#    st.number_input('Select the range of  ')
-# Section 1 - Transformations Functions
-# def lag(data, features, lags, dma=None):
-#     if dma:
-#
-#         transformed_data = pd.concat(
-#             [data.groupby([dma])[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
-#         # transformed_data = transformed_data.fillna(method='bfill')
-#         transformed_data = transformed_data.bfill() # Sprint4 - fillna getting deprecated
-#         return pd.concat([transformed_data, data], axis=1)
-#
-#     else:
-#
-#         # ''' data should be aggregated on date'''
-#
-#         transformed_data = pd.concat([data[features].shift(lag).add_suffix(f'_lag_{lag}') for lag in lags], axis=1)
-#         # transformed_data = transformed_data.fillna(method='bfill')
-#         transformed_data = transformed_data.bfill()
-#
-#         return pd.concat([transformed_data, data], axis=1)
-#
-#
-# # adstock
-# def adstock(df, alphas, cutoff, features, dma=None):
-#     if dma:
-#         transformed_data = pd.DataFrame()
-#         for d in df[dma].unique():
-#             dma_sub_df = df[df[dma] == d]
-#             n = len(dma_sub_df)
-#
-#             weights = np.array(
-#                 [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for
-#                  alpha in alphas])
-#             X = dma_sub_df[features].to_numpy()
-#
-#             res = pd.DataFrame(np.hstack(weights @ X),
-#                                columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
-#
-#             transformed_data = pd.concat([transformed_data, res], axis=0)
-#             transformed_data.reset_index(drop=True, inplace=True)
-#         return pd.concat([transformed_data, df], axis=1)
-#
-#     else:
-#
-#         n = len(df)
-#
-#         weights = np.array(
-#             [[[alpha ** (i - j) if i >= j and j >= i - cutoff else 0. for j in range(n)] for i in range(n)] for alpha in
-#              alphas])
-#
-#         X = df[features].to_numpy()
-#         res = pd.DataFrame(np.hstack(weights @ X),
-#                            columns=[f'{col}_adstock_{alpha}' for alpha in alphas for col in features])
-#         return pd.concat([res, df], axis=1)
-# Section 2 - Begin Transformations
-if 'media_data' not in st.session_state:
-    st.session_state['media_data'] = pd.DataFrame()
-# Sprint3
-if "orig_media_data" not in st.session_state:
-    st.session_state['orig_media_data'] = pd.DataFrame()
-# Sprint3 additions
-if 'random_effects' not in st.session_state:
-    st.session_state['random_effects'] = pd.DataFrame()
-if 'pred_train' not in st.session_state:
-    st.session_state['pred_train'] = []
-if 'pred_test' not in st.session_state:
-    st.session_state['pred_test'] = []
-# end of Sprint3 additions
-# variables_to_be_transformed=[col for col in media_data.columns if col.lower() not in ['dma','panel'] ] # change for buckets
-# variables_to_be_transformed = [col for col in media_data.columns if
-#                                '_clicks' in col.lower() or '_impress' in col.lower()]  # srishti - change
-#
-# with columns[0]:
-#     if st.button('Apply Transformations'):
-#         with st.spinner('Applying Transformations'):
-#             transformed_data_lag = lag(media_data, features=variables_to_be_transformed,
-#                                        lags=np.arange(slider_value_lag[0], slider_value_lag[1] + 1, 1), dma=dma)
-#
-#             # variables_to_be_transformed=[col for col in list(transformed_data_lag.columns) if col not in ['Date','DMA','Panel']] #change for buckets
-#             variables_to_be_transformed = [col for col in media_data.columns if
-#                                            '_clicks' in col.lower() or '_impress' in col.lower()]  # srishti - change
-#
-#             transformed_data_adstock = adstock(df=transformed_data_lag,
-#                                                alphas=np.arange(slider_value_adstock[0], slider_value_adstock[1], 0.1),
-#                                                cutoff=8, features=variables_to_be_transformed, dma=dma)
-#
-#             # st.success('Done')
-#             st.success("Transformations complete!")
-#
-#             st.write(f'old shape {old_shape}, new shape {transformed_data_adstock.shape}')
-#
-#             transformed_data_adstock.columns = [c.replace(".", "_") for c in
-#                                                 transformed_data_adstock.columns]  # srishti
-#             st.session_state['media_data'] = transformed_data_adstock  # srishti
-#             # Sprint3
-#             orig_media_data = media_data.copy()
-#             orig_media_data[date_col] = date
-#             orig_media_data[target_col] = y
-#             st.session_state['orig_media_data'] = orig_media_data  # srishti
-#
-#         # with st.spinner('Applying Transformations'):
-#         #   time.sleep(2)
-#         #   st.success("Transformations complete!")
-#
-# # if st.session_state['media_data'].shape[1]>old_shape[1]:
-# # with columns[0]:
-# # st.write(f'Total no.of variables before transformation: {old_shape[1]}, Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
-# # st.write(f'Total no.of variables after transformation: {st.session_state["media_data"].shape[1]}')
-# Section 3 - Create combinations
-# bucket=['paid_search', 'kwai','indicacao','infleux', 'influencer','FB: Level Achieved - Tier 1 Impressions',
-#       ' FB: Level Achieved - Tier 2 Impressions','paid_social_others',
-#         ' GA App: Will And Cid Pequena Baixo Risco Clicks',
-#       'digital_tactic_others',"programmatic"
-#       ]
-# srishti - bucket names changed
-bucket = ['paid_search', 'kwai', 'indicacao', 'infleux', 'influencer', 'fb_level_achieved_tier_2',
-          'fb_level_achieved_tier_1', 'paid_social_others',
-          'ga_app',
-          'digital_tactic_others', "programmatic"
-          ]
-with columns[0]:
-    if st.button('Create Combinations of Variables'):
-        top_3_correlated_features = []
-        # # for col in st.session_state['media_data'].columns[:19]:
-        # original_cols = [c for c in st.session_state['media_data'].columns if
-        #                  "_clicks" in c.lower() or "_impressions" in c.lower()]
-        #original_cols = [c for c in original_cols if "_lag" not in c.lower() and "_adstock" not in c.lower()]
-        original_cols=st.session_state['bin_dict']['Media'] + st.session_state['bin_dict']['Internal']
-        original_cols=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in original_cols]
-        #st.write(original_cols)
-        # for col in st.session_state['media_data'].columns[:19]:
-        for col in original_cols:  # srishti - new
-            corr_df = pd.concat([st.session_state['media_data'].filter(regex=col),
-                                 y], axis=1).corr()[target_col].iloc[:-1]
-            top_3_correlated_features.append(list(corr_df.sort_values(ascending=False).head(2).index))
-        flattened_list = [item for sublist in top_3_correlated_features for item in sublist]
-        # all_features_set={var:[col for col in flattened_list if var in col] for var in bucket}
-        all_features_set = {var: [col for col in flattened_list if var in col] for var in bucket if
-                            len([col for col in flattened_list if var in col]) > 0}  # srishti
-        channels_all = [values for values in all_features_set.values()]
-        st.session_state['combinations'] = list(itertools.product(*channels_all))
-        # if 'combinations' not in st.session_state:
-        #   st.session_state['combinations']=combinations_all
-        st.session_state['final_selection'] = st.session_state['combinations']
-        st.success('Done')
-        # revenue.reset_index(drop=True,inplace=True)
-    y.reset_index(drop=True, inplace=True)
-    if 'Model_results' not in st.session_state:
-        st.session_state['Model_results'] = {'Model_object': [],
-                                             'Model_iteration': [],
-                                             'Feature_set': [],
-                                             'MAPE': [],
-                                             'R2': [],
-                                             'ADJR2': [],
-                                             'pos_count': []
-                                             }
-    def reset_model_result_dct():
-        st.session_state['Model_results'] = {'Model_object': [],
-                                             'Model_iteration': [],
-                                             'Feature_set': [],
-                                             'MAPE': [],
-                                             'R2': [],
-                                             'ADJR2': [],
-                                             'pos_count': []
-                                             }
-        # if st.button('Build Model'):
-    if 'iterations' not in st.session_state:
-        st.session_state['iterations'] = 0
-    if 'final_selection' not in st.session_state:
-        st.session_state['final_selection'] = False
-save_path = r"Model/"
-with columns[1]:
-    if st.session_state['final_selection']:
-        st.write(f'Total combinations created {format_numbers(len(st.session_state["final_selection"]))}')
-if st.checkbox('Build all iterations'):
-    iterations = len(st.session_state['final_selection'])
-else:
-    iterations = st.number_input('Select the number of iterations to perform', min_value=0, step=100,
-                                 value=st.session_state['iterations'], on_change=reset_model_result_dct)
-#  st.write("iterations=", iterations)
-if st.button('Build Model', on_click=reset_model_result_dct):
-    st.session_state['iterations'] = iterations
-    # Section 4 - Model
-    # st.session_state['media_data'] = st.session_state['media_data'].fillna(method='ffill')
-    st.session_state['media_data'] = st.session_state['media_data'].ffill()
-    st.markdown(
-        'Data Split -- Training Period: May 9th, 2023 - October 5th,2023 , Testing Period: October 6th, 2023 - November 7th, 2023 ')
-    progress_bar = st.progress(0)  # Initialize the progress bar
-    # time_remaining_text = st.empty()  # Create an empty space for time remaining text
-    start_time = time.time()  # Record the start time
-    progress_text = st.empty()
-    # time_elapsed_text = st.empty()
-    # for i, selected_features in enumerate(st.session_state["final_selection"][40000:40000 + int(iterations)]):
-    # st.write(st.session_state["final_selection"])
-    # for i, selected_features in enumerate(st.session_state["final_selection"]):
-    if is_panel == True:
-        for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]):  # srishti
-            df = st.session_state['media_data']
-            fet = [var for var in selected_features if len(var) > 0]
-            inp_vars_str = " + ".join(fet)  # new
-            X = df[fet]
-            y = df[target_col]
-            ss = MinMaxScaler()
-            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-            X[target_col] = y  # Sprint2
-            X[panel_col] = df[panel_col]  # Sprint2
-            X_train = X.iloc[:8000]
-            X_test = X.iloc[8000:]
-            y_train = y.iloc[:8000]
-            y_test = y.iloc[8000:]
-            print(X_train.shape)
-            # model = sm.OLS(y_train, X_train).fit()
-            md_str = target_col + " ~ " + inp_vars_str
-            # md = smf.mixedlm("total_approved_accounts_revenue ~ {}".format(inp_vars_str),
-            #                 data=X_train[[target_col] + fet],
-            #                 groups=X_train[panel_col])
-            md = smf.mixedlm(md_str,
-                             data=X_train[[target_col] + fet],
-                             groups=X_train[panel_col])
-            mdf = md.fit()
-            predicted_values = mdf.fittedvalues
-            coefficients = mdf.fe_params.to_dict()
-            model_positive = [col for col in coefficients.keys() if coefficients[col] > 0]
-            pvalues = [var for var in list(mdf.pvalues) if var <= 0.06]
-            if (len(model_positive) / len(selected_features)) > 0 and (
-                    len(pvalues) / len(selected_features)) >= 0:  # srishti - changed just for testing, revert later
-                # predicted_values = model.predict(X_train)
-                mape = mean_absolute_percentage_error(y_train, predicted_values)
-                r2 = r2_score(y_train, predicted_values)
-                adjr2 = 1 - (1 - r2) * (len(y_train) - 1) / (len(y_train) - len(selected_features) - 1)
-                filename = os.path.join(save_path, f"model_{i}.pkl")
-                with open(filename, "wb") as f:
-                    pickle.dump(mdf, f)
-                # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
-                #   model = pickle.load(file)
-                st.session_state['Model_results']['Model_object'].append(filename)
-                st.session_state['Model_results']['Model_iteration'].append(i)
-                st.session_state['Model_results']['Feature_set'].append(fet)
-                st.session_state['Model_results']['MAPE'].append(mape)
-                st.session_state['Model_results']['R2'].append(r2)
-                st.session_state['Model_results']['pos_count'].append(len(model_positive))
-                st.session_state['Model_results']['ADJR2'].append(adjr2)
-            current_time = time.time()
-            time_taken = current_time - start_time
-            time_elapsed_minutes = time_taken / 60
-            completed_iterations_text = f"{i + 1}/{iterations}"
-            progress_bar.progress((i + 1) / int(iterations))
-            progress_text.text(
-                f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
-        st.write(
-            f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
-    else:
-        for i, selected_features in enumerate(st.session_state["final_selection"][0:int(iterations)]):  # srishti
-            df = st.session_state['media_data']
-            fet = [var for var in selected_features if len(var) > 0]
-            inp_vars_str = " + ".join(fet)
-            X = df[fet]
-            y = df[target_col]
-            ss = MinMaxScaler()
-            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-            X = sm.add_constant(X)
-            X_train = X.iloc[:130]
-            X_test = X.iloc[130:]
-            y_train = y.iloc[:130]
-            y_test = y.iloc[130:]
-            model = sm.OLS(y_train, X_train).fit()
-            coefficients = model.params.to_list()
-            model_positive = [coef for coef in coefficients if coef > 0]
-            predicted_values = model.predict(X_train)
-            pvalues = [var for var in list(model.pvalues) if var <= 0.06]
-            # if (len(model_possitive) / len(selected_features)) > 0.9 and (len(pvalues) / len(selected_features)) >= 0.8:
-            if (len(model_positive) / len(selected_features)) > 0 and (len(pvalues) / len(
-                    selected_features)) >= 0.5:  # srishti - changed just for testing, revert later VALID MODEL CRITERIA
-                # predicted_values = model.predict(X_train)
-                mape = mean_absolute_percentage_error(y_train, predicted_values)
-                adjr2 = model.rsquared_adj
-                r2 = model.rsquared
-                filename = os.path.join(save_path, f"model_{i}.pkl")
-                with open(filename, "wb") as f:
-                    pickle.dump(model, f)
-                # with open(r"C:\Users\ManojP\Documents\MMM\simopt\Model\model.pkl", 'rb') as file:
-                #   model = pickle.load(file)
-                st.session_state['Model_results']['Model_object'].append(filename)
-                st.session_state['Model_results']['Model_iteration'].append(i)
-                st.session_state['Model_results']['Feature_set'].append(fet)
-                st.session_state['Model_results']['MAPE'].append(mape)
-                st.session_state['Model_results']['R2'].append(r2)
-                st.session_state['Model_results']['ADJR2'].append(adjr2)
-                st.session_state['Model_results']['pos_count'].append(len(model_positive))
-            current_time = time.time()
-            time_taken = current_time - start_time
-            time_elapsed_minutes = time_taken / 60
-            completed_iterations_text = f"{i + 1}/{iterations}"
-            progress_bar.progress((i + 1) / int(iterations))
-            progress_text.text(
-                f'Completed iterations: {completed_iterations_text},Time Elapsed (min): {time_elapsed_minutes:.2f}')
-        st.write(
-            f'Out of {st.session_state["iterations"]} iterations : {len(st.session_state["Model_results"]["Model_object"])} valid models')
-    pd.DataFrame(st.session_state['Model_results']).to_csv('model_output.csv')
-    def to_percentage(value):
-        return f'{value * 100:.1f}%'
-## Section 5 - Select Model
-st.title('2. Select Models')
-if 'tick' not in st.session_state:
-    st.session_state['tick'] = False
-if st.checkbox('Show results of top 10 models (based on MAPE and Adj. R2)', value=st.session_state['tick']):
-    st.session_state['tick'] = True
-    st.write('Select one model iteration to generate performance metrics for it:')
-    data = pd.DataFrame(st.session_state['Model_results'])
-    data = data[data['pos_count']==data['pos_count'].max()].reset_index(drop=True) # Sprint4 -- Srishti -- only show models with the lowest num of neg coeffs
-    data.sort_values(by=['ADJR2'], ascending=False, inplace=True)
-    data.drop_duplicates(subset='Model_iteration', inplace=True)
-    top_10 = data.head(10)
-    top_10['Rank'] = np.arange(1, len(top_10) + 1, 1)
-    top_10[['MAPE', 'R2', 'ADJR2']] = np.round(top_10[['MAPE', 'R2', 'ADJR2']], 4).applymap(to_percentage)
-    top_10_table = top_10[['Rank', 'Model_iteration', 'MAPE', 'ADJR2', 'R2']]
-    # top_10_table.columns=[['Rank','Model Iteration Index','MAPE','Adjusted R2','R2']]
-    gd = GridOptionsBuilder.from_dataframe(top_10_table)
-    gd.configure_pagination(enabled=True)
-    gd.configure_selection(
-        use_checkbox=True,
-        selection_mode="single",
-        pre_select_all_rows=False,
-        pre_selected_rows=[1],
-    )
-    gridoptions = gd.build()
-    table = AgGrid(top_10, gridOptions=gridoptions, update_mode=GridUpdateMode.SELECTION_CHANGED)
-    selected_rows = table.selected_rows
-    # if st.session_state["selected_rows"] != selected_rows:
-    #   st.session_state["build_rc_cb"] = False
-    st.session_state["selected_rows"] = selected_rows
-    if 'Model' not in st.session_state:
-        st.session_state['Model'] = {}
-    # Section 6 - Display Results
-    if len(selected_rows) > 0:
-        st.header('2.1 Results Summary')
-        model_object = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Model_object']
-        features_set = data[data['Model_iteration'] == selected_rows[0]['Model_iteration']]['Feature_set']
-        with open(str(model_object.values[0]), 'rb') as file:
-            # print(file)
-            model = pickle.load(file)
-        st.write(model.summary())
-        st.header('2.2 Actual vs. Predicted Plot')
-        if is_panel :
-            df = st.session_state['media_data']
-            X = df[features_set.values[0]]
-            y = df[target_col]
-            ss = MinMaxScaler()
-            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-            # Sprint2 changes
-            X[target_col] = y  # new
-            X[panel_col] = df[panel_col]
-            X[date_col] = date
-            X_train = X.iloc[:8000]
-            X_test = X.iloc[8000:].reset_index(drop=True)
-            y_train = y.iloc[:8000]
-            y_test = y.iloc[8000:].reset_index(drop=True)
-            test_spends = spends_data[8000:]  # Sprint3 - test spends for resp curves
-            random_eff_df = get_random_effects(media_data, panel_col, model)
-            train_pred = model.fittedvalues
-            test_pred = mdf_predict(X_test, model, random_eff_df)
-            print("__" * 20, test_pred.isna().sum())
-        else :
-            df = st.session_state['media_data']
-            X = df[features_set.values[0]]
-            y = df[target_col]
-            ss = MinMaxScaler()
-            X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-            X = sm.add_constant(X)
-            X[date_col] = date
-            X_train = X.iloc[:130]
-            X_test = X.iloc[130:].reset_index(drop=True)
-            y_train = y.iloc[:130]
-            y_test = y.iloc[130:].reset_index(drop=True)
-            test_spends = spends_data[130:]  # Sprint3 - test spends for resp curves
-            train_pred = model.predict(X_train[features_set.values[0]+['const']])
-            test_pred = model.predict(X_test[features_set.values[0]+['const']])
-        # save x test to test - srishti
-        x_test_to_save = X_test.copy()
-        x_test_to_save['Actuals'] = y_test
-        x_test_to_save['Predictions'] = test_pred
-        x_train_to_save = X_train.copy()
-        x_train_to_save['Actuals'] = y_train
-        x_train_to_save['Predictions'] = train_pred
-        x_train_to_save.to_csv('Test/x_train_to_save.csv', index=False)
-        x_test_to_save.to_csv('Test/x_test_to_save.csv', index=False)
-        st.session_state['X'] = X_train
-        st.session_state['features_set'] = features_set.values[0]
-        print("**" * 20, "selected model features : ", features_set.values[0])
-        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train, train_pred,
-                                                                                 model, target_column=sel_target_col,
-                                                                                 is_panel=is_panel)  # Sprint2
-        st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
-        st.markdown('## 2.3 Residual Analysis')
-        columns = st.columns(2)
-        with columns[0]:
-            fig = plot_residual_predicted(y_train, train_pred, X_train)  # Sprint2
-            st.plotly_chart(fig)
-        with columns[1]:
-            st.empty()
-            fig = qqplot(y_train, train_pred)  # Sprint2
-            st.plotly_chart(fig)
-        with columns[0]:
-            fig = residual_distribution(y_train, train_pred)  # Sprint2
-            st.pyplot(fig)
-        vif_data = pd.DataFrame()
-        # X=X.drop('const',axis=1)
-        X_train_orig = X_train.copy()  # Sprint2 -- creating a copy of xtrain. Later deleting panel, target & date from xtrain
-        del_col_list = list(set([target_col, panel_col, date_col]).intersection(list(X_train.columns)))
-        X_train.drop(columns=del_col_list, inplace=True)  # Sprint2
-        vif_data["Variable"] = X_train.columns
-        vif_data["VIF"] = [variance_inflation_factor(X_train.values, i) for i in range(X_train.shape[1])]
-        vif_data.sort_values(by=['VIF'], ascending=False, inplace=True)
-        vif_data = np.round(vif_data)
-        vif_data['VIF'] = vif_data['VIF'].astype(float)
-        st.header('2.4 Variance Inflation Factor (VIF)')
-        # st.dataframe(vif_data)
-        color_mapping = {
-            'darkgreen': (vif_data['VIF'] < 3),
-            'orange': (vif_data['VIF'] >= 3) & (vif_data['VIF'] <= 10),
-            'darkred': (vif_data['VIF'] > 10)
-        }
-        # Create a horizontal bar plot
-        fig, ax = plt.subplots()
-        fig.set_figwidth(10)  # Adjust the width of the figure as needed
-        # Sort the bars by descending VIF values
-        vif_data = vif_data.sort_values(by='VIF', ascending=False)
-        # Iterate through the color mapping and plot bars with corresponding colors
-        for color, condition in color_mapping.items():
-            subset = vif_data[condition]
-            bars = ax.barh(subset["Variable"], subset["VIF"], color=color, label=color)
-            # Add text annotations on top of the bars
-            for bar in bars:
-                width = bar.get_width()
-                ax.annotate(f'{width:}', xy=(width, bar.get_y() + bar.get_height() / 2), xytext=(5, 0),
-                            textcoords='offset points', va='center')
-        # Customize the plot
-        ax.set_xlabel('VIF Values')
-        # ax.set_title('2.4 Variance Inflation Factor (VIF)')
-        # ax.legend(loc='upper right')
-        # Display the plot in Streamlit
-        st.pyplot(fig)
-        with st.expander('Results Summary Test data'):
-            # ss = MinMaxScaler()
-            # X_test = pd.DataFrame(ss.fit_transform(X_test), columns=X_test.columns)
-            st.header('2.2 Actual vs. Predicted Plot')
-            metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_test[date_col], y_test,
-                                                                                     test_pred, model,
-                                                                                     target_column=sel_target_col,
-                                                                                     is_panel=is_panel)  # Sprint2
-            st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
-            st.markdown('## 2.3 Residual Analysis')
-            columns = st.columns(2)
-            with columns[0]:
-                fig = plot_residual_predicted(y, test_pred, X_test)  # Sprint2
-                st.plotly_chart(fig)
-            with columns[1]:
-                st.empty()
-                fig = qqplot(y, test_pred)  # Sprint2
-                st.plotly_chart(fig)
-            with columns[0]:
-                fig = residual_distribution(y, test_pred)  # Sprint2
-                st.pyplot(fig)
-        value = False
-        save_button_model = st.checkbox('Save this model to tune', key='build_rc_cb')  # , on_click=set_save())
-        if save_button_model:
-            mod_name = st.text_input('Enter model name')
-            if len(mod_name) > 0:
-                mod_name = mod_name + "__" + target_col  # Sprint4 - adding target col to model name
-                if is_panel :
-                    pred_train= model.fittedvalues
-                    pred_test= mdf_predict(X_test, model, random_eff_df)
-                else :
-                    st.session_state['features_set'] = st.session_state['features_set'] + ['const']
-                    pred_train= model.predict(X_train_orig[st.session_state['features_set']])
-                    pred_test= model.predict(X_test[st.session_state['features_set']])
-                st.session_state['Model'][mod_name] = {"Model_object": model,
-                                                       'feature_set': st.session_state['features_set'],
-                                                       'X_train': X_train_orig,
-                                                       'X_test': X_test,
-                                                       'y_train': y_train,
-                                                       'y_test': y_test,
-                                                       'pred_train':pred_train,
-                                                       'pred_test': pred_test
-                                                       }
-                st.session_state['X_train'] = X_train_orig
-                # st.session_state['X_test'] = X_test
-                # st.session_state['y_train'] = y_train
-                # st.session_state['y_test'] = y_test
-                st.session_state['X_test_spends'] = test_spends
-                # st.session_state['base_model'] = model
-                # st.session_state['base_model_feature_set'] = st.session_state['features_set']
-                st.session_state['saved_model_names'].append(mod_name)
-                # Sprint3 additions
-                if is_panel :
-                    random_eff_df = get_random_effects(media_data, panel_col, model)
-                    st.session_state['random_effects'] = random_eff_df
-                # st.session_state['pred_train'] = model.fittedvalues
-                # st.session_state['pred_test'] = mdf_predict(X_test, model, random_eff_df)
-                # # End of Sprint3 additions
-                with open("best_models.pkl", "wb") as f:
-                    pickle.dump(st.session_state['Model'], f)
-                    st.success(mod_name + ' model saved! Proceed to the next page to tune the model')
-                    urm = st.session_state['used_response_metrics']
-                    urm.append(sel_target_col)
-                    st.session_state['used_response_metrics'] = list(set(urm))
-                    mod_name = ""
-                    # Sprint4 - add the formatted name of the target col to used resp metrics
-                value = False

pages/4_Saved_Model_Results.py DELETED Viewed

@@ -1,413 +0,0 @@
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-import streamlit as st
-import pandas as pd
-import statsmodels.api as sm
-from sklearn.metrics import mean_absolute_percentage_error
-import sys
-import os
-from utilities import (set_header,
-                       load_local_css,
-                       load_authenticator)
-import seaborn as sns
-import matplotlib.pyplot as plt
-import sweetviz as sv
-import tempfile
-from sklearn.preprocessing import MinMaxScaler
-from st_aggrid import AgGrid
-from st_aggrid import GridOptionsBuilder,GridUpdateMode
-from st_aggrid import GridOptionsBuilder
-import sys
-import re
-sys.setrecursionlimit(10**6)
-original_stdout = sys.stdout
-sys.stdout = open('temp_stdout.txt', 'w')
-sys.stdout.close()
-sys.stdout = original_stdout
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-for k, v in st.session_state.items():
-    if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-        st.session_state[k] = v
-authenticator = st.session_state.get('authenticator')
-if authenticator is None:
-    authenticator = load_authenticator()
-name, authentication_status, username = authenticator.login('Login', 'main')
-auth_status = st.session_state.get('authentication_status')
-if auth_status == True:
-    is_state_initiaized = st.session_state.get('initialized',False)
-    if not is_state_initiaized:
-        a=1
-    def plot_residual_predicted(actual, predicted, df_):
-            df_['Residuals'] = actual - pd.Series(predicted)
-            df_['StdResidual'] = (df_['Residuals'] - df_['Residuals'].mean()) / df_['Residuals'].std()
-            # Create a Plotly scatter plot
-            fig = px.scatter(df_, x=predicted, y='StdResidual', opacity=0.5,color_discrete_sequence=["#11B6BD"])
-            # Add horizontal lines
-            fig.add_hline(y=0, line_dash="dash", line_color="darkorange")
-            fig.add_hline(y=2, line_color="red")
-            fig.add_hline(y=-2, line_color="red")
-            fig.update_xaxes(title='Predicted')
-            fig.update_yaxes(title='Standardized Residuals (Actual - Predicted)')
-            # Set the same width and height for both figures
-            fig.update_layout(title='Residuals over Predicted Values', autosize=False, width=600, height=400)
-            return fig
-    def residual_distribution(actual, predicted):
-            Residuals = actual - pd.Series(predicted)
-            # Create a Seaborn distribution plot
-            sns.set(style="whitegrid")
-            plt.figure(figsize=(6, 4))
-            sns.histplot(Residuals, kde=True, color="#11B6BD")
-            plt.title(' Distribution of Residuals')
-            plt.xlabel('Residuals')
-            plt.ylabel('Probability Density')
-            return plt
-    def qqplot(actual, predicted):
-            Residuals = actual - pd.Series(predicted)
-            Residuals = pd.Series(Residuals)
-            Resud_std = (Residuals - Residuals.mean()) / Residuals.std()
-            # Create a QQ plot using Plotly with custom colors
-            fig = go.Figure()
-            fig.add_trace(go.Scatter(x=sm.ProbPlot(Resud_std).theoretical_quantiles,
-                                    y=sm.ProbPlot(Resud_std).sample_quantiles,
-                                    mode='markers',
-                                    marker=dict(size=5, color="#11B6BD"),
-                                    name='QQ Plot'))
-            # Add the 45-degree reference line
-            diagonal_line = go.Scatter(
-                x=[-2, 2],  # Adjust the x values as needed to fit the range of your data
-                y=[-2, 2],  # Adjust the y values accordingly
-                mode='lines',
-                line=dict(color='red'),  # Customize the line color and style
-                name=' '
-            )
-            fig.add_trace(diagonal_line)
-            # Customize the layout
-            fig.update_layout(title='QQ Plot of Residuals',title_x=0.5, autosize=False, width=600, height=400,
-                            xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
-            return fig
-    def plot_actual_vs_predicted(date, y, predicted_values, model):
-        fig = go.Figure()
-        fig.add_trace(go.Scatter(x=date, y=y, mode='lines', name='Actual', line=dict(color='blue')))
-        fig.add_trace(go.Scatter(x=date, y=predicted_values, mode='lines', name='Predicted', line=dict(color='orange')))
-        # Calculate MAPE
-        mape = mean_absolute_percentage_error(y, predicted_values)*100
-        # Calculate R-squared
-        rss = np.sum((y - predicted_values) ** 2)
-        tss = np.sum((y - np.mean(y)) ** 2)
-        r_squared = 1 - (rss / tss)
-        # Get the number of predictors
-        num_predictors = model.df_model
-        # Get the number of samples
-        num_samples = len(y)
-        # Calculate Adjusted R-squared
-        adj_r_squared = 1 - ((1 - r_squared) * ((num_samples - 1) / (num_samples - num_predictors - 1)))
-        metrics_table = pd.DataFrame({
-        'Metric': ['MAPE', 'R-squared', 'AdjR-squared'],
-        'Value': [mape, r_squared, adj_r_squared]})
-        fig.update_layout(
-            xaxis=dict(title='Date'),
-            yaxis=dict(title='Value'),
-            title=f'MAPE : {mape:.2f}%, AdjR2: {adj_r_squared:.2f}',
-            xaxis_tickangle=-30
-        )
-        return metrics_table,fig
-    def contributions(X, model):
-        X1 = X.copy()
-        for j, col in enumerate(X1.columns):
-            X1[col] = X1[col] * model.params.values[j]
-        return np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
-    transformed_data=pd.read_csv('transformed_data.csv')
-    # hard coded for now, need to get features set from model
-    feature_set_dct={'app_installs_-_appsflyer':['paid_search_clicks',
-                                            'fb:_level_achieved_-_tier_1_impressions_lag2',
-                                            'fb:_level_achieved_-_tier_2_clicks_lag2',
-                                            'paid_social_others_impressions_adst.1',
-                                            'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag2',
-                                            'digital_tactic_others_clicks',
-                                            'kwai_clicks_adst.3',
-                                            'programmaticclicks',
-                                            'indicacao_clicks_adst.1',
-                                            'infleux_clicks_adst.4',
-                                            'influencer_clicks'],
-                'account_requests_-_appsflyer':['paid_search_impressions',
-                                                'fb:_level_achieved_-_tier_1_clicks_adst.1',
-                                                'fb:_level_achieved_-_tier_2_clicks_adst.1',
-                                                'paid_social_others_clicks_lag2',
-                                                'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag5_adst.1',
-                                                'digital_tactic_others_clicks_adst.1',
-                                                'kwai_clicks_adst.2',
-                                                'programmaticimpressions_lag4_adst.1',
-                                                'indicacao_clicks',
-                                                'infleux_clicks_adst.2',
-                                                'influencer_clicks'],
-                'total_approved_accounts_-_appsflyer':['paid_search_clicks',
-                                                        'fb:_level_achieved_-_tier_1_impressions_lag2_adst.1',
-                                                        'fb:_level_achieved_-_tier_2_impressions_lag2',
-                                                        'paid_social_others_clicks_lag2_adst.2',
-                                                        'ga_app:_will_and_cid_pequena_baixo_risco_impressions_lag4',
-                                                        'digital_tactic_others_clicks',
-                                                        'kwai_impressions_adst.2',
-                                                        'programmaticclicks_adst.5',
-                                                        'indicacao_clicks_adst.1',
-                                                        'infleux_clicks_adst.3',
-                                                        'influencer_clicks'],
-                'total_approved_accounts_-_revenue':['paid_search_impressions_adst.5',
-                                                    'kwai_impressions_lag2_adst.3',
-                                                    'indicacao_clicks_adst.3',
-                                                    'infleux_clicks_adst.3',
-                                                    'programmaticclicks_adst.4',
-                                                    'influencer_clicks_adst.3',
-                                                    'fb:_level_achieved_-_tier_1_impressions_adst.2',
-                                                    'fb:_level_achieved_-_tier_2_impressions_lag3_adst.5',
-                                                    'paid_social_others_impressions_adst.3',
-                                                    'ga_app:_will_and_cid_pequena_baixo_risco_clicks_lag3_adst.5',
-                                                    'digital_tactic_others_clicks_adst.2']
-                }
-    #""" the above part should be modified so that we are fetching features set from the saved model"""
-    def contributions(X, model,target):
-        X1 = X.copy()
-        for j, col in enumerate(X1.columns):
-            X1[col] = X1[col] * model.params.values[j]
-        contributions= np.round((X1.sum() / sum(X1.sum()) * 100).sort_values(ascending=False), 2)
-        contributions=pd.DataFrame(contributions,columns=target).reset_index().rename(columns={'index':'Channel'})
-        contributions['Channel']=[ re.split(r'_imp|_cli', col)[0] for col in contributions['Channel']]
-        return contributions
-    def model_fit(features_set,target):
-        X = transformed_data[features_set]
-        y=  transformed_data[target]
-        ss = MinMaxScaler()
-        X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-        X = sm.add_constant(X)
-        X_train=X.iloc[:150]
-        X_test=X.iloc[150:]
-        y_train=y.iloc[:150]
-        y_test=y.iloc[150:]
-        model = sm.OLS(y_train, X_train).fit()
-        predicted_values_train = model.predict(X_train)
-        r2 = model.rsquared
-        adjr2 = model.rsquared_adj
-        train_mape = mean_absolute_percentage_error(y_train, predicted_values_train)
-        test_mape=mean_absolute_percentage_error(y_test, model.predict(X_test))
-        summary=model.summary()
-        train_contributions=contributions(X_train,model,[target])
-        return pd.DataFrame({'Model':target,'R2':np.round(r2,2),'ADJr2':np.round(adjr2,2),'Train Mape':np.round(train_mape,2),
-                             'Test Mape':np.round(test_mape,2),'Summary':summary,'Model_object':model
-                             },index=[0]), train_contributions
-    metrics_table=pd.DataFrame()
-    if 'contribution_df' not in st.session_state:
-        st.session_state["contribution_df"]=pd.DataFrame()
-    for target,feature_set in feature_set_dct.items():
-       metrics_table= pd.concat([metrics_table,model_fit(features_set=feature_set,target=target)[0]])
-       if st.session_state["contribution_df"].empty:
-           st.session_state["contribution_df"]= model_fit(features_set=feature_set,target=target)[1]
-       else:
-        st.session_state["contribution_df"]=pd.merge(st.session_state["contribution_df"],model_fit(features_set=feature_set,target=target)[1])
-    # st.write(st.session_state["contribution_df"])
-    metrics_table.reset_index(drop=True,inplace=True)
-    eda_columns=st.columns(2)
-    with eda_columns[1]:
-        eda=st.button('Generate EDA Report',help="Click to generate a bivariate report for the selected response metric from the table below.")
-    # st.markdown('Model Metrics')
-    st.title('Contribution Overview')
-    contribution_selections=st.multiselect('Select the models to compare contributions',[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel'   ],default=[col for col in st.session_state['contribution_df'].columns if col.lower() != 'channel'   ][-1])
-    trace_data=[]
-    for selection in contribution_selections:
-        trace=go.Bar(x=st.session_state['contribution_df']['Channel'], y=st.session_state['contribution_df'][selection],name=selection,text=np.round(st.session_state['contribution_df'][selection],0).astype(int).astype(str)+'%',textposition='outside')
-        trace_data.append(trace)
-    layout = go.Layout(
-    title='Metrics Contribution by Channel',
-    xaxis=dict(title='Channel Name'),
-    yaxis=dict(title='Metrics Contribution'),
-    barmode='group'
-                )
-    fig = go.Figure(data=trace_data, layout=layout)
-    st.plotly_chart(fig,use_container_width=True)
-    st.title('Analysis of Models Result')
-    #st.markdown()
-    gd_table=metrics_table.iloc[:,:-2]
-    gd=GridOptionsBuilder.from_dataframe(gd_table)
-    #gd.configure_pagination(enabled=True)
-    gd.configure_selection(use_checkbox=True)
-    gridoptions=gd.build()
-    table = AgGrid(gd_table,gridOptions=gridoptions,fit_columns_on_grid_load=True,height=200)
-    # table=metrics_table.iloc[:,:-2]
-    # table.insert(0, "Select", False)
-    # selection_table=st.data_editor(table,column_config={"Select": st.column_config.CheckboxColumn(required=True)})
-    if len(table.selected_rows)==0:
-        st.warning("Click on the checkbox to view comprehensive results of the selected model.")
-        st.stop()
-    else:
-        target_column=table.selected_rows[0]['Model']
-        feature_set=feature_set_dct[target_column]
-    with eda_columns[1]:
-        if eda:
-            def generate_report_with_target(channel_data, target_feature):
-                report = sv.analyze([channel_data, "Dataset"], target_feat=target_feature,verbose=False)
-                temp_dir = tempfile.mkdtemp()
-                report_path = os.path.join(temp_dir, "report.html")
-                report.show_html(filepath=report_path, open_browser=False)  # Generate the report as an HTML file
-                return report_path
-            report_data=transformed_data[feature_set]
-            report_data[target_column]=transformed_data[target_column]
-            report_file = generate_report_with_target(report_data, target_column)
-            if os.path.exists(report_file):
-                with open(report_file, 'rb') as f:
-                    st.download_button(
-                        label="Download EDA Report",
-                        data=f.read(),
-                        file_name="report.html",
-                        mime="text/html"
-                    )
-            else:
-                st.warning("Report generation failed. Unable to find the report file.")
-    model=metrics_table[metrics_table['Model']==target_column]['Model_object'].iloc[0]
-    st.header('Model Summary')
-    st.write(model.summary())
-    X=transformed_data[feature_set]
-    ss=MinMaxScaler()
-    X=pd.DataFrame(ss.fit_transform(X),columns=X.columns)
-    X=sm.add_constant(X)
-    y=transformed_data[target_column]
-    X_train=X.iloc[:150]
-    X_test=X.iloc[150:]
-    y_train=y.iloc[:150]
-    y_test=y.iloc[150:]
-    X.index=transformed_data['date']
-    y.index=transformed_data['date']
-    metrics_table_train,fig_train= plot_actual_vs_predicted(X_train.index, y_train, model.predict(X_train), model)
-    metrics_table_test,fig_test= plot_actual_vs_predicted(X_test.index, y_test, model.predict(X_test), model)
-    metrics_table_train=metrics_table_train.set_index('Metric').transpose()
-    metrics_table_train.index=['Train']
-    metrics_table_test=metrics_table_test.set_index('Metric').transpose()
-    metrics_table_test.index=['test']
-    metrics_table=np.round(pd.concat([metrics_table_train,metrics_table_test]),2)
-    st.markdown('Result Overview')
-    st.dataframe(np.round(metrics_table,2),use_container_width=True)
-    st.subheader('Actual vs Predicted Plot Train')
-    st.plotly_chart(fig_train,use_container_width=True)
-    st.subheader('Actual vs Predicted Plot Test')
-    st.plotly_chart(fig_test,use_container_width=True)
-    st.markdown('## Residual Analysis')
-    columns=st.columns(2)
-    Xtrain1=X_train.copy()
-    with columns[0]:
-        fig=plot_residual_predicted(y_train,model.predict(Xtrain1),Xtrain1)
-        st.plotly_chart(fig)
-    with columns[1]:
-        st.empty()
-        fig = qqplot(y_train,model.predict(X_train))
-        st.plotly_chart(fig)
-    with columns[0]:
-        fig=residual_distribution(y_train,model.predict(X_train))
-        st.pyplot(fig)
-elif auth_status == False:
-    st.error('Username/Password is incorrect')
-    try:
-        username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
-        if username_forgot_pw:
-            st.success('New password sent securely')
-            # Random password to be transferred to the user securely
-        elif username_forgot_pw == False:
-            st.error('Username not found')
-    except Exception as e:
-        st.error(e)

pages/5_Model_Result_Overview.py DELETED Viewed

@@ -1,103 +0,0 @@
-import streamlit as st
-from utilities import (set_header,
-                       initialize_data,
-                       load_local_css,
-                       create_channel_summary,
-                       create_contribution_pie,
-                       create_contribuion_stacked_plot,
-                       create_channel_spends_sales_plot,
-                       format_numbers,
-                       channel_name_formating,
-                       load_authenticator)
-import plotly.graph_objects as go
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import time
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-target='Revenue'
-# for k, v in st.session_state.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-# authenticator = st.session_state.get('authenticator')
-# if authenticator is None:
-#     authenticator = load_authenticator()
-# name, authentication_status, username = authenticator.login('Login', 'main')
-# auth_status = st.session_state['authentication_status']
-# if auth_status:
-#     authenticator.logout('Logout', 'main')
-#     is_state_initiaized = st.session_state.get('initialized',False)
-#     if not is_state_initiaized:
-initialize_data()
-scenario = st.session_state['scenario']
-raw_df = st.session_state['raw_df']
-st.header('Overview of previous spends')
-columns = st.columns((1,1,3))
-with columns[0]:
-    st.metric(label = 'Spends', value=format_numbers(float(scenario.actual_total_spends)))
-###print(f"##################### {scenario.actual_total_sales} ##################")
-with columns[1]:
-    st.metric(label = target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
-actual_summary_df = create_channel_summary(scenario)
-actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
-columns = st.columns((2,1))
-with columns[0]:
-    with st.expander('Channel wise overview'):
-        st.markdown(actual_summary_df.style.set_table_styles(
-        [{
-            'selector': 'th',
-            'props': [('background-color', '#11B6BD')]
-        },
-            {
-            'selector' : 'tr:nth-child(even)',
-            'props' : [('background-color', '#11B6BD')]
-            }]).to_html(), unsafe_allow_html=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-##############################
-st.plotly_chart(create_contribution_pie(),use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-################################3
-st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-#######################################
-selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
-selected_channel = scenario.channels.get(selected_channel_name,None)
-st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-# elif auth_status == False:
-#     st.error('Username/Password is incorrect')
-# if auth_status != True:
-#     try:
-#         username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
-#         if username_forgot_pw:
-#             st.success('New password sent securely')
-#             # Random password to be transferred to user securely
-#         elif username_forgot_pw == False:
-#             st.error('Username not found')
-#     except Exception as e:
-#         st.error(e)

pages/5_Model_Tuning_with_panel.py DELETED Viewed

@@ -1,527 +0,0 @@
-'''
-MMO Build Sprint 3
-date :
-changes : capability to tune MixedLM as well as simple LR in the same page
-'''
-import streamlit as st
-import pandas as pd
-from Eda_functions import format_numbers
-import pickle
-from utilities import set_header, load_local_css
-import statsmodels.api as sm
-import re
-from sklearn.preprocessing import MinMaxScaler
-import matplotlib.pyplot as plt
-from statsmodels.stats.outliers_influence import variance_inflation_factor
-st.set_option('deprecation.showPyplotGlobalUse', False)
-import statsmodels.formula.api as smf
-from Data_prep_functions import *
-# for i in ["model_tuned", "X_train_tuned", "X_test_tuned", "tuned_model_features", "tuned_model", "tuned_model_dict"] :
-st.set_page_config(
-    page_title="Model Tuning",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state='collapsed'
-)
-load_local_css('styles.css')
-set_header()
-# Sprint3
-# is_panel = st.session_state['is_panel']
-# panel_col = 'markets'  # set the panel column
-date_col = 'date'
-panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
-is_panel = True if len(panel_col)>0 else False
-# flag indicating there is not tuned model till now
-# Sprint4 - model tuned dict
-if 'Model_Tuned' not in st.session_state:
-    st.session_state['Model_Tuned'] = {}
-st.title('1. Model Tuning')
-# st.write(st.session_state['base_model_feature_set'])
-if "X_train" not in st.session_state:
-    st.error(
-        "Oops! It seems there are no saved models available. Please build and save a model from the previous page to proceed.")
-    st.stop()
-# X_train=st.session_state['X_train']
-# X_test=st.session_state['X_test']
-# y_train=st.session_state['y_train']
-# y_test=st.session_state['y_test']
-# df=st.session_state['media_data']
-# st.write(X_train.columns)
-# st.write(X_test.columns)
-if "is_tuned_model" not in st.session_state:
-        st.session_state["is_tuned_model"] = {}
-# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
-if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics'] != []:
-    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
-    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
-else:
-    sel_target_col = 'Total Approved Accounts - Revenue'
-    target_col = 'total_approved_accounts_revenue'
-# Sprint4 - Look through all saved models, only show saved models of the sel resp metric (target_col)
-saved_models = st.session_state['saved_model_names']
-required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
-sel_model = st.selectbox("Select the model to tune", required_saved_models)
-with open("best_models.pkl", 'rb') as file:
-    model_dict = pickle.load(file)
-sel_model_dict = model_dict[sel_model + "__" + target_col]  # Sprint4 - get the model obj of the selected model
-# st.write(sel_model_dict)
-X_train = sel_model_dict['X_train']
-X_test = sel_model_dict['X_test']
-y_train = sel_model_dict['y_train']
-y_test = sel_model_dict['y_test']
-df = st.session_state['media_data']
-if 'selected_model' not in st.session_state:
-    st.session_state['selected_model'] = 0
-# st.write(model_dict[st.session_state["selected_model"]]['X_train'].columns)
-st.markdown('### 1.1 Event Flags')
-st.markdown('Helps in quantifying the impact of specific occurrences of events')
-with st.expander('Apply Event Flags'):
-    # st.session_state["selected_model"]=st.selectbox('Select Model to apply flags',model_dict.keys())
-    model = sel_model_dict['Model_object']
-    date = st.session_state['date']
-    date = pd.to_datetime(date)
-    X_train = sel_model_dict['X_train']
-    # features_set= model_dict[st.session_state["selected_model"]]['feature_set']
-    features_set = sel_model_dict["feature_set"]
-    col = st.columns(3)
-    min_date = min(date)
-    max_date = max(date)
-    with col[0]:
-        start_date = st.date_input('Select Start Date', min_date, min_value=min_date, max_value=max_date)
-    with col[1]:
-        end_date = st.date_input('Select End Date', max_date, min_value=min_date, max_value=max_date)
-    with col[2]:
-        repeat = st.selectbox('Repeat Annually', ['Yes', 'No'], index=1)
-    if repeat == 'Yes':
-        repeat = True
-    else:
-        repeat = False
-    if 'Flags' not in st.session_state:
-        st.session_state['Flags'] = {}
-    # print("**"*50)
-    # print(y_train)
-    # print("**"*50)
-    # print(model.fittedvalues)
-    if is_panel:  # Sprint3
-        met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train,
-                                                              model.fittedvalues, model,
-                                                              target_column=sel_target_col,
-                                                              flag=(start_date, end_date),
-                                                              repeat_all_years=repeat, is_panel=True)
-        st.plotly_chart(fig_flag, use_container_width=True)
-        # create flag on test
-        met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test,
-                                                                   sel_model_dict['pred_test'], model,
-                                                                   target_column=sel_target_col,
-                                                                   flag=(start_date, end_date),
-                                                                   repeat_all_years=repeat, is_panel=True)
-    else:
-        pred_train=model.predict(X_train[features_set])
-        met, line_values, fig_flag = plot_actual_vs_predicted(X_train[date_col], y_train, pred_train, model,
-                                                              flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
-        st.plotly_chart(fig_flag, use_container_width=True)
-        pred_test=model.predict(X_test[features_set])
-        met, test_line_values, fig_flag = plot_actual_vs_predicted(X_test[date_col], y_test, pred_test, model,
-                                                                   flag=(start_date, end_date), repeat_all_years=repeat,is_panel=False)
-    flag_name = 'f1_flag'
-    flag_name = st.text_input('Enter Flag Name')
-    # Sprint4 - add selected target col to flag name
-    if st.button('Update flag'):
-        st.session_state['Flags'][flag_name + '__'+ target_col] = {}
-        st.session_state['Flags'][flag_name + '__'+ target_col]['train'] = line_values
-        st.session_state['Flags'][flag_name + '__'+ target_col]['test'] = test_line_values
-        # st.write(st.session_state['Flags'][flag_name])
-        st.success(f'{flag_name + "__" + target_col} stored')
-    # Sprint4 - only show flag created for the particular target col
-    st.write(st.session_state['Flags'].keys() )
-    target_model_flags = [f.split("__")[0] for f in st.session_state['Flags'].keys() if f.split("__")[1] == target_col]
-    options = list(target_model_flags)
-    selected_options = []
-    num_columns = 4
-    num_rows = -(-len(options) // num_columns)
-tick = False
-if st.checkbox('Select all'):
-    tick = True
-selected_options = []
-for row in range(num_rows):
-    cols = st.columns(num_columns)
-    for col in cols:
-        if options:
-            option = options.pop(0)
-            selected = col.checkbox(option, value=tick)
-            if selected:
-                selected_options.append(option)
-st.markdown('### 1.2 Select Parameters to Apply')
-parameters = st.columns(3)
-with parameters[0]:
-    Trend = st.checkbox("**Trend**")
-    st.markdown('Helps account for long-term trends or seasonality that could influence advertising effectiveness')
-with parameters[1]:
-    week_number = st.checkbox('**Week_number**')
-    st.markdown('Assists in detecting and incorporating weekly patterns or seasonality')
-with parameters[2]:
-    sine_cosine = st.checkbox('**Sine and Cosine Waves**')
-    st.markdown('Helps in capturing cyclical patterns or seasonality in the data')
-#
-# def get_tuned_model():
-#     st.session_state['build_tuned_model']=True
-if st.button('Build model with Selected Parameters and Flags', key='build_tuned_model'):
-    new_features = features_set
-    st.header('2.1 Results Summary')
-    # date=list(df.index)
-    # df = df.reset_index(drop=True)
-    # st.write(df.head(2))
-    # X_train=df[features_set]
-    ss = MinMaxScaler()
-    if is_panel == True:
-        X_train_tuned = X_train[features_set]
-        # X_train_tuned = pd.DataFrame(ss.fit_transform(X), columns=X.columns)
-        X_train_tuned[target_col] = X_train[target_col]
-        X_train_tuned[date_col] = X_train[date_col]
-        X_train_tuned[panel_col] = X_train[panel_col]
-        X_test_tuned = X_test[features_set]
-        # X_test_tuned = pd.DataFrame(ss.transform(X), columns=X.columns)
-        X_test_tuned[target_col] = X_test[target_col]
-        X_test_tuned[date_col] = X_test[date_col]
-        X_test_tuned[panel_col] = X_test[panel_col]
-    else:
-        X_train_tuned = X_train[features_set]
-        # X_train_tuned = pd.DataFrame(ss.fit_transform(X_train_tuned), columns=X_train_tuned.columns)
-        X_test_tuned = X_test[features_set]
-        # X_test_tuned = pd.DataFrame(ss.transform(X_test_tuned), columns=X_test_tuned.columns)
-    for flag in selected_options:
-        # Spirnt4 - added target_col in flag name
-        X_train_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['train']
-        X_test_tuned[flag] = st.session_state['Flags'][flag + "__" + target_col]['test']
-        # test
-        # X_train_tuned.to_csv("Test/X_train_tuned_flag.csv",index=False)
-        # X_test_tuned.to_csv("Test/X_test_tuned_flag.csv",index=False)
-    # print("()()"*20,flag, len(st.session_state['Flags'][flag]))
-    if Trend:
-        # Sprint3 - group by panel, calculate trend of each panel spearately. Add trend to new feature set
-        if is_panel:
-            newdata = pd.DataFrame()
-            panel_wise_end_point_train = {}
-            for panel, groupdf in X_train_tuned.groupby(panel_col):
-                groupdf.sort_values(date_col, inplace=True)
-                groupdf['Trend'] = np.arange(1, len(groupdf) + 1, 1)
-                newdata = pd.concat([newdata, groupdf])
-                panel_wise_end_point_train[panel] = len(groupdf)
-            X_train_tuned = newdata.copy()
-            test_newdata = pd.DataFrame()
-            for panel, test_groupdf in X_test_tuned.groupby(panel_col):
-                test_groupdf.sort_values(date_col, inplace=True)
-                start = panel_wise_end_point_train[panel] + 1
-                end = start + len(test_groupdf) # should be + 1? - Sprint4
-                # print("??"*20, panel, len(test_groupdf), len(np.arange(start, end, 1)), start)
-                test_groupdf['Trend'] = np.arange(start, end, 1)
-                test_newdata = pd.concat([test_newdata, test_groupdf])
-            X_test_tuned = test_newdata.copy()
-            new_features = new_features + ['Trend']
-        else:
-            X_train_tuned['Trend'] = np.arange(1, len(X_train_tuned) + 1, 1)
-            X_test_tuned['Trend'] = np.arange(len(X_train_tuned) + 1, len(X_train_tuned) + len(X_test_tuned) + 1, 1)
-            new_features = new_features + ['Trend']
-    if week_number:
-        # Sprint3 - create weeknumber from date column in xtrain tuned. add week num to new feature set
-        if is_panel:
-            X_train_tuned[date_col] = pd.to_datetime(X_train_tuned[date_col])
-            X_train_tuned['Week_number'] = X_train_tuned[date_col].dt.day_of_week
-            if X_train_tuned['Week_number'].nunique() == 1:
-                st.write("All dates in the data are of the same week day. Hence Week number can't be used.")
-            else:
-                X_test_tuned[date_col] = pd.to_datetime(X_test_tuned[date_col])
-                X_test_tuned['Week_number'] = X_test_tuned[date_col].dt.day_of_week
-                new_features = new_features + ['Week_number']
-        else:
-            date = pd.to_datetime(date.values)
-            X_train_tuned['Week_number'] = pd.to_datetime(X_train[date_col]).dt.day_of_week
-            X_test_tuned['Week_number'] = pd.to_datetime(X_test[date_col]).dt.day_of_week
-            new_features = new_features + ['Week_number']
-    if sine_cosine:
-        # Sprint3 - create panel wise sine cosine waves in xtrain tuned. add to new feature set
-        if is_panel:
-            new_features = new_features + ['sine_wave', 'cosine_wave']
-            newdata = pd.DataFrame()
-            newdata_test = pd.DataFrame()
-            groups = X_train_tuned.groupby(panel_col)
-            frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-            train_panel_wise_end_point = {}
-            for panel, groupdf in groups:
-                num_samples = len(groupdf)
-                train_panel_wise_end_point[panel] = num_samples
-                days_since_start = np.arange(num_samples)
-                sine_wave = np.sin(frequency * days_since_start)
-                cosine_wave = np.cos(frequency * days_since_start)
-                sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-                assert len(sine_cosine_df) == len(groupdf)
-                # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-                groupdf['sine_wave'] = sine_wave
-                groupdf['cosine_wave'] = cosine_wave
-                newdata = pd.concat([newdata, groupdf])
-            X_train_tuned = newdata.copy()
-            test_groups = X_test_tuned.groupby(panel_col)
-            for panel, test_groupdf in test_groups:
-                num_samples = len(test_groupdf)
-                start = train_panel_wise_end_point[panel]
-                days_since_start = np.arange(start, start + num_samples, 1)
-                # print("##", panel, num_samples, start, len(np.arange(start, start+num_samples, 1)))
-                sine_wave = np.sin(frequency * days_since_start)
-                cosine_wave = np.cos(frequency * days_since_start)
-                sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-                assert len(sine_cosine_df) == len(test_groupdf)
-                # groupdf = pd.concat([groupdf, sine_cosine_df], axis=1)
-                test_groupdf['sine_wave'] = sine_wave
-                test_groupdf['cosine_wave'] = cosine_wave
-                newdata_test = pd.concat([newdata_test, test_groupdf])
-            X_test_tuned = newdata_test.copy()
-        else:
-            new_features = new_features + ['sine_wave', 'cosine_wave']
-            num_samples = len(X_train_tuned)
-            frequency = 2 * np.pi / 365  # Adjust the frequency as needed
-            days_since_start = np.arange(num_samples)
-            sine_wave = np.sin(frequency * days_since_start)
-            cosine_wave = np.cos(frequency * days_since_start)
-            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-            # Concatenate the sine and cosine waves with the scaled X DataFrame
-            X_train_tuned = pd.concat([X_train_tuned, sine_cosine_df], axis=1)
-            test_num_samples = len(X_test_tuned)
-            start = num_samples
-            days_since_start = np.arange(start, start + test_num_samples, 1)
-            sine_wave = np.sin(frequency * days_since_start)
-            cosine_wave = np.cos(frequency * days_since_start)
-            sine_cosine_df = pd.DataFrame({'sine_wave': sine_wave, 'cosine_wave': cosine_wave})
-            # Concatenate the sine and cosine waves with the scaled X DataFrame
-            X_test_tuned = pd.concat([X_test_tuned, sine_cosine_df], axis=1)
-    # model
-    if selected_options:
-        new_features = new_features + selected_options
-    if is_panel:
-        inp_vars_str = " + ".join(new_features)
-        new_features=list(set(new_features))
-        # X_train_tuned.to_csv("Test/X_train_tuned.csv",index=False)
-        # st.write(X_train_tuned[['total_approved_accounts_revenue'] + new_features].dtypes)
-        # st.write(X_train_tuned[['total_approved_accounts_revenue', panel_col] + new_features].isna().sum())
-        md_str = target_col + " ~ " + inp_vars_str
-        md_tuned = smf.mixedlm(md_str,
-                               data=X_train_tuned[[target_col] + new_features],
-                               groups=X_train_tuned[panel_col])
-        model_tuned = md_tuned.fit()
-        # plot act v pred for original model and tuned model
-        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train[date_col], y_train,
-                                                                                 model.fittedvalues, model,
-                                                                                 target_column=sel_target_col,
-                                                                                 is_panel=True)
-        metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(X_train_tuned[date_col],
-                                                                                             X_train_tuned[target_col],
-                                                                                             model_tuned.fittedvalues,
-                                                                                             model_tuned,
-                                                                                             target_column=sel_target_col,
-                                                                                             is_panel=True)
-    else:
-        new_features=list(set(new_features))
-        # st.write(new_features)
-        model_tuned = sm.OLS(y_train, X_train_tuned[new_features]).fit()
-        # st.write(X_train_tuned.columns)
-        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date[:130], y_train,
-                                                                                 model.predict(X_train[features_set]), model,
-                                                                                 target_column=sel_target_col)
-        metrics_table_tuned, line, actual_vs_predicted_plot_tuned = plot_actual_vs_predicted(date[:130], y_train,
-                                                                                             model_tuned.predict(
-                                                                                                 X_train_tuned),
-                                                                                             model_tuned,
-                                                                                             target_column=sel_target_col)
-    # st.write(metrics_table_tuned)
-    mape = np.round(metrics_table.iloc[0, 1], 2)
-    r2 = np.round(metrics_table.iloc[1, 1], 2)
-    adjr2 = np.round(metrics_table.iloc[2, 1], 2)
-    mape_tuned = np.round(metrics_table_tuned.iloc[0, 1], 2)
-    r2_tuned = np.round(metrics_table_tuned.iloc[1, 1], 2)
-    adjr2_tuned = np.round(metrics_table_tuned.iloc[2, 1], 2)
-    parameters_ = st.columns(3)
-    with parameters_[0]:
-        st.metric('R2', r2_tuned, np.round(r2_tuned - r2, 2))
-    with parameters_[1]:
-        st.metric('Adjusted R2', adjr2_tuned, np.round(adjr2_tuned - adjr2, 2))
-    with parameters_[2]:
-        st.metric('MAPE', mape_tuned, np.round(mape_tuned - mape, 2), 'inverse')
-    st.write(model_tuned.summary())
-    X_train_tuned[date_col] = X_train[date_col]
-    X_test_tuned[date_col] = X_test[date_col]
-    X_train_tuned[target_col] = y_train
-    X_test_tuned[target_col] = y_test
-    st.header('2.2 Actual vs. Predicted Plot')
-    # if is_panel:
-    #   metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(date, y_train, model.predict(X_train),
-    #                                                                              model, target_column='Revenue',is_panel=True)
-    # else:
-    #   metrics_table,line,actual_vs_predicted_plot=plot_actual_vs_predicted(date, y_train, model.predict(X_train), model,target_column='Revenue')
-    if is_panel :
-        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
-                                                                                 X_train_tuned[target_col],
-                                                                                 model_tuned.fittedvalues, model_tuned,
-                                                                                 target_column=sel_target_col,
-                                                                                 is_panel=True)
-    else :
-        metrics_table, line, actual_vs_predicted_plot = plot_actual_vs_predicted(X_train_tuned[date_col],
-                                                                                 X_train_tuned[target_col],
-                                                                                 model_tuned.predict(X_train_tuned[new_features]),
-                                                                                 model_tuned,
-                                                                                 target_column=sel_target_col,
-                                                                                 is_panel=False)
-    # plot_actual_vs_predicted(X_train[date_col], y_train,
-    #                                                                             model.fittedvalues, model,
-    #                                                                             target_column='Revenue',
-    #                                                                             is_panel=is_panel)
-    st.plotly_chart(actual_vs_predicted_plot, use_container_width=True)
-    st.markdown('## 2.3 Residual Analysis')
-    if is_panel :
-        columns = st.columns(2)
-        with columns[0]:
-            fig = plot_residual_predicted(y_train, model_tuned.fittedvalues, X_train_tuned)
-            st.plotly_chart(fig)
-        with columns[1]:
-            st.empty()
-            fig = qqplot(y_train, model_tuned.fittedvalues)
-            st.plotly_chart(fig)
-        with columns[0]:
-            fig = residual_distribution(y_train, model_tuned.fittedvalues)
-            st.pyplot(fig)
-    else:
-        columns = st.columns(2)
-        with columns[0]:
-            fig = plot_residual_predicted(y_train, model_tuned.predict(X_train_tuned[new_features]), X_train)
-            st.plotly_chart(fig)
-        with columns[1]:
-            st.empty()
-            fig = qqplot(y_train, model_tuned.predict(X_train_tuned[new_features]))
-            st.plotly_chart(fig)
-        with columns[0]:
-            fig = residual_distribution(y_train, model_tuned.predict(X_train_tuned[new_features]))
-            st.pyplot(fig)
-    st.session_state['is_tuned_model'][target_col] = True
-    # Sprint4 - saved tuned model in a dict
-    st.session_state['Model_Tuned'][sel_model + "__" + target_col] = {
-                                                                      "Model_object": model_tuned,
-                                                                      'feature_set': new_features,
-                                                                      'X_train_tuned': X_train_tuned,
-                                                                      'X_test_tuned': X_test_tuned
-                                                                      }
-# Pending
-# if st.session_state['build_tuned_model']==True:
-if st.session_state['Model_Tuned'] is not None :
-    if st.checkbox('Use this model to build response curves', key='save_model'):
-        #   save_model = st.button('Use this model to build response curves', key='saved_tuned_model')
-        #   if save_model:
-        st.session_state["is_tuned_model"][target_col]=True
-        with open("tuned_model.pkl", "wb") as f:
-            # pickle.dump(st.session_state['tuned_model'], f)
-            pickle.dump(st.session_state['Model_Tuned'], f)  # Sprint4
-        # X_test_tuned.to_csv("Test/X_test_tuned_final.csv", index=False)
-        # X_train_tuned.to_csv("Test/X_train_tuned.csv", index=False)
-        st.success(sel_model + "__" + target_col + ' Tuned saved!')
-    # if is_panel:
-    #     # st.session_state["tuned_model_features"] = new_features
-    #     with open("tuned_model.pkl", "wb") as f:
-    #         # pickle.dump(st.session_state['tuned_model'], f)
-    #         pickle.dump(st.session_state['Model_Tuned'], f)  # Sprint4
-    #     st.success(sel_model + "__" + target_col + ' Tuned saved!')
-#   raw_data=df[features_set]
-#   columns_raw=[re.split(r"(_lag|_adst)",col)[0] for col in raw_data.columns]
-#   raw_data.columns=columns_raw
-#   columns_media=[col for col in columns_raw if Categorised_data[col]['BB']=='Media']
-#   raw_data=raw_data[columns_media]
-#   raw_data['Date']=list(df.index)
-#   spends_var=[col for col in df.columns if "spends" in col.lower() and 'adst' not in col.lower() and 'lag' not in col.lower()]
-#   spends_df=df[spends_var]
-#   spends_df['Week']=list(df.index)
-#   j=0
-#   X1=X.copy()
-#   col=X1.columns
-#   for i in model.params.values:
-#       X1[col[j]]=X1.iloc[:,j]*i
-#       j+=1
-#   contribution_df=X1
-#   contribution_df['Date']=list(df.index)
-#   excel_file='Overview_data.xlsx'
-#   with pd.ExcelWriter(excel_file,engine='xlsxwriter') as writer:
-#      raw_data.to_excel(writer,sheet_name='RAW DATA MMM',index=False)
-#      spends_df.to_excel(writer,sheet_name='SPEND INPUT',index=False)
-#      contribution_df.to_excel(writer,sheet_name='CONTRIBUTION MMM')

pages/6_Build_Response_Curves.py DELETED Viewed

@@ -1,168 +0,0 @@
-import streamlit as st
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-from utilities import channel_name_formating, load_authenticator, initialize_data
-from sklearn.metrics import r2_score
-from collections import OrderedDict
-from classes import class_from_dict,class_to_dict
-import pickle
-import json
-for k, v in st.session_state.items():
-    if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-        st.session_state[k] = v
-def s_curve(x,K,b,a,x0):
-    return K / (1 + b*np.exp(-a*(x-x0)))
-def save_scenario(scenario_name):
-    """
-    Save the current scenario with the mentioned name in the session state
-    Parameters
-    ----------
-    scenario_name
-        Name of the scenario to be saved
-    """
-    if 'saved_scenarios' not in st.session_state:
-        st.session_state = OrderedDict()
-    #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
-    st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
-    st.session_state['scenario_input'] = ""
-    print(type(st.session_state['saved_scenarios']))
-    with open('../saved_scenarios.pkl', 'wb') as f:
-        pickle.dump(st.session_state['saved_scenarios'],f)
-def reset_curve_parameters():
-    del st.session_state['K']
-    del st.session_state['b']
-    del st.session_state['a']
-    del st.session_state['x0']
-def update_response_curve():
-    # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
-    # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
-    # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
-    # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
-    # rcs = st.session_state['rcs']
-    _channel_class = st.session_state['scenario'].channels[selected_channel_name]
-    _channel_class.update_response_curves({
-                           'K'  : st.session_state['K'],
-                           'b'  : st.session_state['b'],
-                           'a'  : st.session_state['a'],
-                           'x0' : st.session_state['x0']})
-# authenticator = st.session_state.get('authenticator')
-# if authenticator is None:
-#     authenticator = load_authenticator()
-# name, authentication_status, username = authenticator.login('Login', 'main')
-# auth_status = st.session_state.get('authentication_status')
-# if auth_status == True:
-#     is_state_initiaized = st.session_state.get('initialized',False)
-#     if not is_state_initiaized:
-#         print("Scenario page state reloaded")
-initialize_data()
-st.subheader("Build response curves")
-channels_list = st.session_state['channels_list']
-selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
-rcs = {}
-for channel_name in channels_list:
-    rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
-# rcs = st.session_state['rcs']
-if 'K' not in st.session_state:
-    st.session_state['K'] = rcs[selected_channel_name]['K']
-if 'b' not in st.session_state:
-    st.session_state['b'] = rcs[selected_channel_name]['b']
-if 'a' not in st.session_state:
-    st.session_state['a'] = rcs[selected_channel_name]['a']
-if 'x0' not in st.session_state:
-    st.session_state['x0'] = rcs[selected_channel_name]['x0']
-x = st.session_state['actual_input_df'][selected_channel_name].values
-y = st.session_state['actual_contribution_df'][selected_channel_name].values
-power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
-# fig = px.scatter(x, s_curve(x/10**power,
-#                             st.session_state['K'],
-#                             st.session_state['b'],
-#                             st.session_state['a'],
-#                             st.session_state['x0']))
-fig = px.scatter(x=x, y=y)
-fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
-                                    st.session_state['b'],
-                                    st.session_state['a'],
-                                    st.session_state['x0']),
-                        line=dict(color='red')))
-fig.update_layout(title_text="Response Curve",showlegend=False)
-fig.update_annotations(font_size=10)
-fig.update_xaxes(title='Spends')
-fig.update_yaxes(title='Revenue')
-st.plotly_chart(fig,use_container_width=True)
-r2 = r2_score(y, s_curve(x / 10**power,
-                        st.session_state['K'],
-                        st.session_state['b'],
-                        st.session_state['a'],
-                        st.session_state['x0']))
-st.metric('R2',round(r2,2))
-columns = st.columns(4)
-with columns[0]:
-    st.number_input('K',key='K',format="%0.5f")
-with columns[1]:
-    st.number_input('b',key='b',format="%0.5f")
-with columns[2]:
-    st.number_input('a',key='a',step=0.0001,format="%0.5f")
-with columns[3]:
-    st.number_input('x0',key='x0',format="%0.5f")
-st.button('Update parameters',on_click=update_response_curve)
-st.button('Reset parameters',on_click=reset_curve_parameters)
-scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
-st.button('Save', on_click=lambda  : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
-file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
-st.download_button(
-                    label="Download response curves",
-                    data=json.dumps(rcs),
-                    file_name=f"{file_name}.json",
-                    mime="application/json",
-                    disabled= len(file_name) == 0,
-                )
-def s_curve_derivative(x, K, b, a, x0):
-    # Derivative of the S-curve function
-    return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
-# Parameters of the S-curve
-K = st.session_state['K']
-b = st.session_state['b']
-a = st.session_state['a']
-x0 = st.session_state['x0']
-# Optimized spend value obtained from the tool
-optimized_spend = st.number_input('value of x')  # Replace this with your optimized spend value
-# Calculate the slope at the optimized spend value
-slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
-st.write("Slope ", slope_at_optimized_spend)

pages/6_Model_Result_Overview.py DELETED Viewed

@@ -1,348 +0,0 @@
-'''
-MMO Build Sprint 3
-additions : contributions calculated using tuned Mixed LM model
-pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
-MMO Build Sprint 4
-additions : response metrics selection
-pending : contributions calculations using - 1. not tuned Mixed LM model, 2. tuned OLS model, 3. not tuned OLS model
-'''
-import streamlit as st
-import pandas as pd
-from sklearn.preprocessing import MinMaxScaler
-import pickle
-from utilities_with_panel import (set_header,
-                                  overview_test_data_prep_panel,
-                                  overview_test_data_prep_nonpanel,
-                                  initialize_data,
-                                  load_local_css,
-                                  create_channel_summary,
-                                  create_contribution_pie,
-                                  create_contribuion_stacked_plot,
-                                  create_channel_spends_sales_plot,
-                                  format_numbers,
-                                  channel_name_formating)
-import plotly.graph_objects as go
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import time
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-def get_random_effects(media_data, panel_col, mdf):
-    random_eff_df = pd.DataFrame(columns=[panel_col, "random_effect"])
-    for i, market in enumerate(media_data[panel_col].unique()):
-        print(i, end='\r')
-        intercept = mdf.random_effects[market].values[0]
-        random_eff_df.loc[i, 'random_effect'] = intercept
-        random_eff_df.loc[i, panel_col] = market
-    return random_eff_df
-def process_train_and_test(train, test, features, panel_col, target_col):
-    X1 = train[features]
-    ss = MinMaxScaler()
-    X1 = pd.DataFrame(ss.fit_transform(X1), columns=X1.columns)
-    X1[panel_col] = train[panel_col]
-    X1[target_col] = train[target_col]
-    if test is not None:
-        X2 = test[features]
-        X2 = pd.DataFrame(ss.transform(X2), columns=X2.columns)
-        X2[panel_col] = test[panel_col]
-        X2[target_col] = test[target_col]
-        return X1, X2
-    return X1
-def mdf_predict(X_df, mdf, random_eff_df) :
-    X=X_df.copy()
-    X=pd.merge(X, random_eff_df[[panel_col,'random_effect']], on=panel_col, how='left')
-    X['pred_fixed_effect'] = mdf.predict(X)
-    X['pred'] = X['pred_fixed_effect'] + X['random_effect']
-    X.to_csv('Test/merged_df_contri.csv',index=False)
-    X.drop(columns=['pred_fixed_effect', 'random_effect'], inplace=True)
-    return X
-target='Revenue'
-# is_panel=False
-# is_panel = st.session_state['is_panel']
-panel_col = [col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in  st.session_state['bin_dict']['Panel Level 1']  ] [0]# set the panel column
-date_col = 'date'
-#st.write(media_data)
-is_panel = True if len(panel_col)>0 else False
-# panel_col='markets'
-date_col = 'date'
-# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
-if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
-    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
-    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
-else :
-    sel_target_col = 'Total Approved Accounts - Revenue'
-    target_col = 'total_approved_accounts_revenue'
-# Sprint4 - Look through all saved tuned models, only show saved models of the sel resp metric (target_col)
-# saved_models = st.session_state['saved_model_names']
- # Sprint4 - get the model obj of the selected model
-# st.write(sel_model_dict)
-# Sprint3 - Contribution
-if is_panel:
-    # read tuned mixedLM model
-    # if st.session_state["tuned_model"] is not None :
-    if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
-        with open("tuned_model.pkl", 'rb') as file:
-            model_dict = pickle.load(file)
-        saved_models = list(model_dict.keys())
-        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
-        sel_model = st.selectbox("Select the model to review", required_saved_models)
-        sel_model_dict = model_dict[sel_model + "__" + target_col]
-        # model=st.session_state["tuned_model"]
-        # X_train=st.session_state["X_train_tuned"]
-        # X_test=st.session_state["X_test_tuned"]
-        # best_feature_set=st.session_state["tuned_model_features"]
-        model=sel_model_dict["Model_object"]
-        X_train=sel_model_dict["X_train_tuned"]
-        X_test=sel_model_dict["X_test_tuned"]
-        best_feature_set=sel_model_dict["feature_set"]
-        # st.write("features", best_feature_set)
-        # st.write(X_test.columns)
-    else : # if non tuned model to be used # Pending
-        with open("best_models.pkl", 'rb') as file:
-            model_dict = pickle.load(file)
-        saved_models = list(model_dict.keys())
-        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
-        sel_model = st.selectbox("Select the model to review", required_saved_models)
-        sel_model_dict = model_dict[sel_model + "__" + target_col]
-        model=st.session_state["base_model"]
-        X_train = st.session_state['X_train']
-        X_test = st.session_state['X_test']
-        # y_train = st.session_state['y_train']
-        # y_test = st.session_state['y_test']
-        best_feature_set = st.session_state['base_model_feature_set']
-        # st.write(best_feature_set)
-        # st.write(X_test.columns)
-    # Calculate contributions
-    with open("data_import.pkl", "rb") as f:
-        data = pickle.load(f)
-    # Accessing the loaded objects
-    st.session_state['orig_media_data'] = data["final_df"]
-    st.session_state['orig_media_data'].columns=[col.lower().replace('.','_').replace('@','_').replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_") for col in st.session_state['orig_media_data'].columns]
-    media_data = st.session_state["media_data"]
-    # st.session_state['orig_media_data']=st.session_state["media_data"]
-    #st.write(media_data)
-    contri_df = pd.DataFrame()
-    y = []
-    y_pred = []
-    random_eff_df = get_random_effects(media_data, panel_col, model)
-    random_eff_df['fixed_effect'] = model.fe_params['Intercept']
-    random_eff_df['panel_effect'] = random_eff_df['random_effect'] + random_eff_df['fixed_effect']
-    # random_eff_df.to_csv("Test/random_eff_df_contri.csv", index=False)
-    coef_df = pd.DataFrame(model.fe_params)
-    coef_df.columns = ['coef']
-    # coef_df.reset_index().to_csv("Test/coef_df_contri1.csv",index=False)
-    # print(model.fe_params)
-    x_train_contribution = X_train.copy()
-    x_test_contribution = X_test.copy()
-    # preprocessing not needed since X_train is already preprocessed
-    # X1, X2 = process_train_and_test(x_train_contribution, x_test_contribution, best_feature_set, panel_col, target_col)
-    # x_train_contribution[best_feature_set] = X1[best_feature_set]
-    # x_test_contribution[best_feature_set] = X2[best_feature_set]
-    x_train_contribution = mdf_predict(x_train_contribution, model, random_eff_df)
-    x_test_contribution = mdf_predict(x_test_contribution, model, random_eff_df)
-    x_train_contribution = pd.merge(x_train_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
-                                    how='left')
-    x_test_contribution = pd.merge(x_test_contribution, random_eff_df[[panel_col, 'panel_effect']], on=panel_col,
-                                   how='left')
-    inp_coef = coef_df['coef'][1:].tolist() # 0th index is intercept
-    for i in range(len(inp_coef)):
-        x_train_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_train_contribution[best_feature_set[i]]
-        x_test_contribution[str(best_feature_set[i]) + "_contr"] = inp_coef[i] * x_test_contribution[best_feature_set[i]]
-    x_train_contribution['sum_contributions'] = x_train_contribution.filter(regex="contr").sum(axis=1)
-    x_train_contribution['sum_contributions'] = x_train_contribution['sum_contributions'] + x_train_contribution['panel_effect']
-    x_test_contribution['sum_contributions'] = x_test_contribution.filter(regex="contr").sum(axis=1)
-    x_test_contribution['sum_contributions'] = x_test_contribution['sum_contributions'] + x_test_contribution['panel_effect']
-    # # test
-    x_train_contribution.to_csv("Test/x_train_contribution.csv",index=False)
-    x_test_contribution.to_csv("Test/x_test_contribution.csv",index=False)
-    #
-    # st.session_state['orig_media_data'].to_csv("Test/transformed_data.csv",index=False)
-    # st.session_state['X_test_spends'].to_csv("Test/test_spends.csv",index=False)
-    # # st.write(st.session_state['orig_media_data'].columns)
-    st.write(date_col,panel_col)
-    # st.write(x_test_contribution)
-    overview_test_data_prep_panel(x_test_contribution, st.session_state['orig_media_data'], st.session_state['X_test_spends'],
-                        date_col, panel_col, target_col)
-else : # NON PANEL
-    if st.session_state["is_tuned_model"][target_col]==True: #Sprint4
-        with open("tuned_model.pkl", 'rb') as file:
-            model_dict = pickle.load(file)
-        saved_models = list(model_dict.keys())
-        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
-        sel_model = st.selectbox("Select the model to review", required_saved_models)
-        sel_model_dict = model_dict[sel_model + "__" + target_col]
-        model=sel_model_dict["Model_object"]
-        X_train=sel_model_dict["X_train_tuned"]
-        X_test=sel_model_dict["X_test_tuned"]
-        best_feature_set=sel_model_dict["feature_set"]
-    else : #Sprint4
-        with open("best_models.pkl", 'rb') as file:
-            model_dict = pickle.load(file)
-        saved_models = list(model_dict.keys())
-        required_saved_models = [m.split("__")[0] for m in saved_models if m.split("__")[1] == target_col]
-        sel_model = st.selectbox("Select the model to review", required_saved_models)
-        sel_model_dict = model_dict[sel_model + "__" + target_col]
-        model=sel_model_dict["Model_object"]
-        X_train=sel_model_dict["X_train"]
-        X_test=sel_model_dict["X_test"]
-        best_feature_set=sel_model_dict["feature_set"]
-    x_train_contribution = X_train.copy()
-    x_test_contribution = X_test.copy()
-    x_train_contribution['pred'] = model.predict(x_train_contribution[best_feature_set])
-    x_test_contribution['pred'] = model.predict(x_test_contribution[best_feature_set])
-    for num,i in enumerate(model.params.values):
-        col=best_feature_set[num]
-        x_train_contribution[col + "_contr"] = X_train[col] * i
-        x_test_contribution[col + "_contr"] = X_test[col] * i
-    x_test_contribution.to_csv("Test/x_test_contribution_non_panel.csv",index=False)
-    overview_test_data_prep_nonpanel(x_test_contribution, st.session_state['orig_media_data'].copy(), st.session_state['X_test_spends'].copy(), date_col, target_col)
-# for k, v in st.session_sta
-# te.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-# authenticator = st.session_state.get('authenticator')
-# if authenticator is None:
-#     authenticator = load_authenticator()
-# name, authentication_status, username = authenticator.login('Login', 'main')
-# auth_status = st.session_state['authentication_status']
-# if auth_status:
-#     authenticator.logout('Logout', 'main')
-#     is_state_initiaized = st.session_state.get('initialized',False)
-#     if not is_state_initiaized:
-initialize_data(target_col)
-scenario = st.session_state['scenario']
-raw_df = st.session_state['raw_df']
-st.header('Overview of previous spends')
-# st.write(scenario.actual_total_spends)
-# st.write(scenario.actual_total_sales)
-columns = st.columns((1,1,3))
-with columns[0]:
-    st.metric(label='Spends', value=format_numbers(float(scenario.actual_total_spends)))
-###print(f"##################### {scenario.actual_total_sales} ##################")
-with columns[1]:
-    st.metric(label=target, value=format_numbers(float(scenario.actual_total_sales),include_indicator=False))
-actual_summary_df = create_channel_summary(scenario)
-actual_summary_df['Channel'] = actual_summary_df['Channel'].apply(channel_name_formating)
-columns = st.columns((2,1))
-with columns[0]:
-    with st.expander('Channel wise overview'):
-        st.markdown(actual_summary_df.style.set_table_styles(
-        [{
-            'selector': 'th',
-            'props': [('background-color', '#11B6BD')]
-        },
-            {
-            'selector' : 'tr:nth-child(even)',
-            'props' : [('background-color', '#11B6BD')]
-            }]).to_html(), unsafe_allow_html=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-##############################
-st.plotly_chart(create_contribution_pie(scenario),use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-################################3
-st.plotly_chart(create_contribuion_stacked_plot(scenario),use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-#######################################
-selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['non media'], format_func=channel_name_formating)
-selected_channel = scenario.channels.get(selected_channel_name,None)
-st.plotly_chart(create_channel_spends_sales_plot(selected_channel), use_container_width=True)
-st.markdown("<hr>",unsafe_allow_html=True)
-# elif auth_status == False:
-#     st.error('Username/Password is incorrect')
-# if auth_status != True:
-#     try:
-#         username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
-#         if username_forgot_pw:
-#             st.success('New password sent securely')
-#             # Random password to be transferred to user securely
-#         elif username_forgot_pw == False:
-#             st.error('Username not found')
-#     except Exception as e:
-#         st.error(e)

pages/7_Build_Response_Curves.py DELETED Viewed

@@ -1,185 +0,0 @@
-import streamlit as st
-import plotly.express as px
-import numpy as np
-import plotly.graph_objects as go
-from utilities_with_panel import channel_name_formating, load_authenticator, initialize_data
-from sklearn.metrics import r2_score
-from collections import OrderedDict
-from classes import class_from_dict,class_to_dict
-import pickle
-import json
-from utilities import (
-    load_local_css,
-    set_header,
-    channel_name_formating,
-)
-for k, v in st.session_state.items():
-    if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-        st.session_state[k] = v
-def s_curve(x,K,b,a,x0):
-    return K / (1 + b*np.exp(-a*(x-x0)))
-def save_scenario(scenario_name):
-    """
-    Save the current scenario with the mentioned name in the session state
-    Parameters
-    ----------
-    scenario_name
-        Name of the scenario to be saved
-    """
-    if 'saved_scenarios' not in st.session_state:
-        st.session_state = OrderedDict()
-    #st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
-    st.session_state['saved_scenarios'][scenario_name] = class_to_dict(st.session_state['scenario'])
-    st.session_state['scenario_input'] = ""
-    print(type(st.session_state['saved_scenarios']))
-    with open('../saved_scenarios.pkl', 'wb') as f:
-        pickle.dump(st.session_state['saved_scenarios'],f)
-def reset_curve_parameters():
-    del st.session_state['K']
-    del st.session_state['b']
-    del st.session_state['a']
-    del st.session_state['x0']
-def update_response_curve():
-    # st.session_state['rcs'][selected_channel_name]['K'] = st.session_state['K']
-    # st.session_state['rcs'][selected_channel_name]['b'] = st.session_state['b']
-    # st.session_state['rcs'][selected_channel_name]['a'] = st.session_state['a']
-    # st.session_state['rcs'][selected_channel_name]['x0'] = st.session_state['x0']
-    # rcs = st.session_state['rcs']
-    _channel_class = st.session_state['scenario'].channels[selected_channel_name]
-    _channel_class.update_response_curves({
-                           'K'  : st.session_state['K'],
-                           'b'  : st.session_state['b'],
-                           'a'  : st.session_state['a'],
-                           'x0' : st.session_state['x0']})
-# authenticator = st.session_state.get('authenticator')
-# if authenticator is None:
-#     authenticator = load_authenticator()
-# name, authentication_status, username = authenticator.login('Login', 'main')
-# auth_status = st.session_state.get('authentication_status')
-# if auth_status == True:
-#     is_state_initiaized = st.session_state.get('initialized',False)
-#     if not is_state_initiaized:
-#         print("Scenario page state reloaded")
-# Sprint4 - if used_response_metrics is not blank, then select one of the used_response_metrics, else target is revenue by default
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-if "used_response_metrics" in st.session_state and st.session_state['used_response_metrics']!=[]:
-    sel_target_col = st.selectbox("Select the response metric", st.session_state['used_response_metrics'])
-    target_col = sel_target_col.lower().replace(" ", "_").replace('-', '').replace(':', '').replace("__", "_")
-else :
-    sel_target_col = 'Total Approved Accounts - Revenue'
-    target_col = 'total_approved_accounts_revenue'
-initialize_data(target_col)
-st.subheader("Build response curves")
-channels_list = st.session_state['channels_list']
-selected_channel_name = st.selectbox('Channel', st.session_state['channels_list'] + ['Others'], format_func=channel_name_formating,on_change=reset_curve_parameters)
-rcs = {}
-for channel_name in channels_list:
-    rcs[channel_name] = st.session_state['scenario'].channels[channel_name].response_curve_params
-# rcs = st.session_state['rcs']
-if 'K' not in st.session_state:
-    st.session_state['K'] = rcs[selected_channel_name]['K']
-if 'b' not in st.session_state:
-    st.session_state['b'] = rcs[selected_channel_name]['b']
-if 'a' not in st.session_state:
-    st.session_state['a'] = rcs[selected_channel_name]['a']
-if 'x0' not in st.session_state:
-    st.session_state['x0'] = rcs[selected_channel_name]['x0']
-x = st.session_state['actual_input_df'][selected_channel_name].values
-y = st.session_state['actual_contribution_df'][selected_channel_name].values
-power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
-# fig = px.scatter(x, s_curve(x/10**power,
-#                             st.session_state['K'],
-#                             st.session_state['b'],
-#                             st.session_state['a'],
-#                             st.session_state['x0']))
-fig = px.scatter(x=x, y=y)
-fig.add_trace(go.Scatter(x=sorted(x), y=s_curve(sorted(x)/10**power,st.session_state['K'],
-                                    st.session_state['b'],
-                                    st.session_state['a'],
-                                    st.session_state['x0']),
-                        line=dict(color='red')))
-fig.update_layout(title_text="Response Curve",showlegend=False)
-fig.update_annotations(font_size=10)
-fig.update_xaxes(title='Spends')
-fig.update_yaxes(title=sel_target_col)
-st.plotly_chart(fig,use_container_width=True)
-r2 = r2_score(y, s_curve(x / 10**power,
-                        st.session_state['K'],
-                        st.session_state['b'],
-                        st.session_state['a'],
-                        st.session_state['x0']))
-st.metric('R2',round(r2,2))
-columns = st.columns(4)
-with columns[0]:
-    st.number_input('K',key='K',format="%0.5f")
-with columns[1]:
-    st.number_input('b',key='b',format="%0.5f")
-with columns[2]:
-    st.number_input('a',key='a',step=0.0001,format="%0.5f")
-with columns[3]:
-    st.number_input('x0',key='x0',format="%0.5f")
-st.button('Update parameters',on_click=update_response_curve)
-st.button('Reset parameters',on_click=reset_curve_parameters)
-scenario_name = st.text_input('Scenario name', key='scenario_input',placeholder='Scenario name',label_visibility='collapsed')
-st.button('Save', on_click=lambda  : save_scenario(scenario_name),disabled=len(st.session_state['scenario_input']) == 0)
-file_name = st.text_input('rcs download file name', key='file_name_input',placeholder='file name',label_visibility='collapsed')
-st.download_button(
-                    label="Download response curves",
-                    data=json.dumps(rcs),
-                    file_name=f"{file_name}.json",
-                    mime="application/json",
-                    disabled= len(file_name) == 0,
-                )
-def s_curve_derivative(x, K, b, a, x0):
-    # Derivative of the S-curve function
-    return a * b * K * np.exp(-a * (x - x0)) / ((1 + b * np.exp(-a * (x - x0))) ** 2)
-# Parameters of the S-curve
-K = st.session_state['K']
-b = st.session_state['b']
-a = st.session_state['a']
-x0 = st.session_state['x0']
-# Optimized spend value obtained from the tool
-optimized_spend = st.number_input('value of x')  # Replace this with your optimized spend value
-# Calculate the slope at the optimized spend value
-slope_at_optimized_spend = s_curve_derivative(optimized_spend, K, b, a, x0)
-st.write("Slope ", slope_at_optimized_spend)

pages/8_Scenario_Planner.py DELETED Viewed

@@ -1,1133 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import numpy as np
-from functools import partial
-from collections import OrderedDict
-from plotly.subplots import make_subplots
-import plotly.graph_objects as go
-from utilities import (
-    format_numbers,
-    load_local_css,
-    set_header,
-    initialize_data,
-    load_authenticator,
-    send_email,
-    channel_name_formating,
-)
-from classes import class_from_dict, class_to_dict
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-import re
-import pandas as pd
-import plotly.express as px
-target = "Revenue"
-st.set_page_config(layout="wide")
-load_local_css("styles.css")
-set_header()
-for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
-        st.session_state[k] = v
-# ======================================================== #
-# ======================= Functions ====================== #
-# ======================================================== #
-def optimize(key):
-    """
-    Optimize the spends for the sales
-    """
-    channel_list = [
-        key
-        for key, value in st.session_state["optimization_channels"].items()
-        if value
-    ]
-    # print('channel_list')
-    # print(channel_list)
-    # print('@@@@@@@@')
-    if len(channel_list) > 0:
-        scenario = st.session_state["scenario"]
-        if key.lower() == "spends":
-            with status_placeholder:
-                with st.spinner("Optimizing"):
-                    result = st.session_state["scenario"].optimize(
-                        st.session_state["total_spends_change"], channel_list
-                    )
-        elif key.lower() == "sales":
-            with status_placeholder:
-                with st.spinner("Optimizing"):
-                    result = st.session_state["scenario"].optimize_spends(
-                        st.session_state["total_sales_change"], channel_list
-                    )
-        for channel_name, modified_spends in result:
-            st.session_state[channel_name] = numerize(
-                modified_spends
-                * scenario.channels[channel_name].conversion_rate,
-                1,
-            )
-            prev_spends = (
-                st.session_state["scenario"]
-                .channels[channel_name]
-                .actual_total_spends
-            )
-            st.session_state[f"{channel_name}_change"] = round(
-                100 * (modified_spends - prev_spends) / prev_spends, 2
-            )
-def save_scenario(scenario_name):
-    """
-    Save the current scenario with the mentioned name in the session state
-    Parameters
-    ----------
-    scenario_name
-        Name of the scenario to be saved
-    """
-    if "saved_scenarios" not in st.session_state:
-        st.session_state = OrderedDict()
-    # st.session_state['saved_scenarios'][scenario_name] = st.session_state['scenario'].save()
-    st.session_state["saved_scenarios"][scenario_name] = class_to_dict(
-        st.session_state["scenario"]
-    )
-    st.session_state["scenario_input"] = ""
-    # print(type(st.session_state['saved_scenarios']))
-    with open("../saved_scenarios.pkl", "wb") as f:
-        pickle.dump(st.session_state["saved_scenarios"], f)
-def update_sales_abs():
-    actual_sales = _scenario.actual_total_sales
-    if validate_input(st.session_state["total_sales_change_abs"]):
-        modified_sales = extract_number_for_string(
-            st.session_state["total_sales_change_abs"]
-        )
-        st.session_state["total_sales_change"] = round(
-            ((modified_sales / actual_sales) - 1) * 100
-        )
-def update_sales():
-    st.session_state["total_sales_change_abs"] = numerize(
-        (1 + st.session_state["total_sales_change"] / 100)
-        * _scenario.actual_total_sales,
-        1,
-    )
-def update_all_spends_abs():
-    actual_spends = _scenario.actual_total_spends
-    if validate_input(st.session_state["total_spends_change_abs"]):
-        modified_spends = extract_number_for_string(
-            st.session_state["total_spends_change_abs"]
-        )
-        print(modified_spends)
-        print(actual_spends)
-        st.session_state["total_spends_change"] = (
-            (modified_spends / actual_spends) - 1
-        ) * 100
-        update_all_spends()
-def update_all_spends():
-    """
-    Updates spends for all the channels with the given overall spends change
-    """
-    percent_change = st.session_state["total_spends_change"]
-    st.session_state["total_spends_change_abs"] = numerize(
-        (1 + percent_change / 100) * _scenario.actual_total_spends, 1
-    )
-    for channel_name in st.session_state["channels_list"]:
-        channel = st.session_state["scenario"].channels[channel_name]
-        current_spends = channel.actual_total_spends
-        modified_spends = (1 + percent_change / 100) * current_spends
-        st.session_state["scenario"].update(channel_name, modified_spends)
-        st.session_state[channel_name] = numerize(
-            modified_spends * channel.conversion_rate, 1
-        )
-        st.session_state[f"{channel_name}_change"] = percent_change
-def extract_number_for_string(string_input):
-    string_input = string_input.upper()
-    if string_input.endswith("K"):
-        return float(string_input[:-1]) * 10**3
-    elif string_input.endswith("M"):
-        return float(string_input[:-1]) * 10**6
-    elif string_input.endswith("B"):
-        return float(string_input[:-1]) * 10**9
-def validate_input(string_input):
-    pattern = r"\d+\.?\d*[K|M|B]$"
-    match = re.match(pattern, string_input)
-    if match is None:
-        return False
-    return True
-def update_data_by_percent(channel_name):
-    prev_spends = (
-        st.session_state["scenario"].channels[channel_name].actual_total_spends
-        * st.session_state["scenario"].channels[channel_name].conversion_rate
-    )
-    modified_spends = prev_spends * (
-        1 + st.session_state[f"{channel_name}_change"] / 100
-    )
-    st.session_state[channel_name] = numerize(modified_spends, 1)
-    st.session_state["scenario"].update(
-        channel_name,
-        modified_spends
-        / st.session_state["scenario"].channels[channel_name].conversion_rate,
-    )
-def update_data(channel_name):
-    """
-    Updates the spends for the given channel
-    """
-    if validate_input(st.session_state[channel_name]):
-        modified_spends = extract_number_for_string(
-            st.session_state[channel_name]
-        )
-        prev_spends = (
-            st.session_state["scenario"]
-            .channels[channel_name]
-            .actual_total_spends
-            * st.session_state["scenario"]
-            .channels[channel_name]
-            .conversion_rate
-        )
-        st.session_state[f"{channel_name}_change"] = round(
-            100 * (modified_spends - prev_spends) / prev_spends, 2
-        )
-        st.session_state["scenario"].update(
-            channel_name,
-            modified_spends
-            / st.session_state["scenario"]
-            .channels[channel_name]
-            .conversion_rate,
-        )
-    # st.session_state['scenario'].update(channel_name, modified_spends)
-    # else:
-    #     try:
-    #         modified_spends = float(st.session_state[channel_name])
-    #         prev_spends = st.session_state['scenario'].channels[channel_name].actual_total_spends * st.session_state['scenario'].channels[channel_name].conversion_rate
-    #         st.session_state[f'{channel_name}_change'] = round(100*(modified_spends - prev_spends) / prev_spends,2)
-    #         st.session_state['scenario'].update(channel_name, modified_spends/st.session_state['scenario'].channels[channel_name].conversion_rate)
-    #         st.session_state[f'{channel_name}'] = numerize(modified_spends,1)
-    #     except ValueError:
-    #         st.write('Invalid input')
-def select_channel_for_optimization(channel_name):
-    """
-    Marks the given channel for optimization
-    """
-    st.session_state["optimization_channels"][channel_name] = st.session_state[
-        f"{channel_name}_selected"
-    ]
-def select_all_channels_for_optimization():
-    """
-    Marks all the channel for optimization
-    """
-    for channel_name in st.session_state["optimization_channels"].keys():
-        st.session_state[f"{channel_name}_selected"] = st.session_state[
-            "optimze_all_channels"
-        ]
-        st.session_state["optimization_channels"][channel_name] = (
-            st.session_state["optimze_all_channels"]
-        )
-def update_penalty():
-    """
-    Updates the penalty flag for sales calculation
-    """
-    st.session_state["scenario"].update_penalty(
-        st.session_state["apply_penalty"]
-    )
-def reset_scenario():
-    # #print(st.session_state['default_scenario_dict'])
-    # st.session_state['scenario']  = class_from_dict(st.session_state['default_scenario_dict'])
-    # for channel in st.session_state['scenario'].channels.values():
-    #     st.session_state[channel.name] = float(channel.actual_total_spends * channel.conversion_rate)
-    initialize_data()
-    for channel_name in st.session_state["channels_list"]:
-        st.session_state[f"{channel_name}_selected"] = False
-        st.session_state[f"{channel_name}_change"] = 0
-    st.session_state["optimze_all_channels"] = False
-def format_number(num):
-    if num >= 1_000_000:
-        return f"{num / 1_000_000:.2f}M"
-    elif num >= 1_000:
-        return f"{num / 1_000:.0f}K"
-    else:
-        return f"{num:.2f}"
-def summary_plot(data, x, y, title, text_column):
-    fig = px.bar(
-        data,
-        x=x,
-        y=y,
-        orientation="h",
-        title=title,
-        text=text_column,
-        color="Channel_name",
-    )
-    # Convert text_column to numeric values
-    data[text_column] = pd.to_numeric(data[text_column], errors="coerce")
-    # Update the format of the displayed text based on magnitude
-    fig.update_traces(
-        texttemplate="%{text:.2s}",
-        textposition="outside",
-        hovertemplate="%{x:.2s}",
-    )
-    fig.update_layout(
-        xaxis_title=x, yaxis_title="Channel Name", showlegend=False
-    )
-    return fig
-def s_curve(x, K, b, a, x0):
-    return K / (1 + b * np.exp(-a * (x - x0)))
-def find_segment_value(x, roi, mroi):
-    start_value = x[0]
-    end_value = x[len(x) - 1]
-    # Condition for green region: Both MROI and ROI > 1
-    green_condition = (roi > 1) & (mroi > 1)
-    left_indices = np.where(green_condition)[0]
-    left_value = x[left_indices[0]] if left_indices.size > 0 else x[0]
-    right_indices = np.where(green_condition)[0]
-    right_value = x[right_indices[-1]] if right_indices.size > 0 else x[0]
-    return start_value, end_value, left_value, right_value
-def calculate_rgba(
-    start_value, end_value, left_value, right_value, current_channel_spends
-):
-    # Initialize alpha to None for clarity
-    alpha = None
-    # Determine the color and calculate relative_position and alpha based on the point's position
-    if start_value <= current_channel_spends <= left_value:
-        color = "yellow"
-        relative_position = (current_channel_spends - start_value) / (
-            left_value - start_value
-        )
-        alpha = 0.8 - (
-            0.6 * relative_position
-        )  # Alpha decreases from start to end
-    elif left_value < current_channel_spends <= right_value:
-        color = "green"
-        relative_position = (current_channel_spends - left_value) / (
-            right_value - left_value
-        )
-        alpha = 0.8 - (
-            0.6 * relative_position
-        )  # Alpha decreases from start to end
-    elif right_value < current_channel_spends <= end_value:
-        color = "red"
-        relative_position = (current_channel_spends - right_value) / (
-            end_value - right_value
-        )
-        alpha = 0.2 + (
-            0.6 * relative_position
-        )  # Alpha increases from start to end
-    else:
-        # Default case, if the spends are outside the defined ranges
-        return "rgba(136, 136, 136, 0.5)"  # Grey for values outside the range
-    # Ensure alpha is within the intended range in case of any calculation overshoot
-    alpha = max(0.2, min(alpha, 0.8))
-    # Define color codes for RGBA
-    color_codes = {
-        "yellow": "255, 255, 0",  # RGB for yellow
-        "green": "0, 128, 0",  # RGB for green
-        "red": "255, 0, 0",  # RGB for red
-    }
-    rgba = f"rgba({color_codes[color]}, {alpha})"
-    return rgba
-def debug_temp(x_test, power, K, b, a, x0):
-    print("*" * 100)
-    # Calculate the count of bins
-    count_lower_bin = sum(1 for x in x_test if x <= 2524)
-    count_center_bin = sum(1 for x in x_test if x > 2524 and x <= 3377)
-    count_ = sum(1 for x in x_test if x > 3377)
-    print(
-        f"""
-            lower : {count_lower_bin}
-            center : {count_center_bin}
-            upper : {count_}
-          """
-    )
-# @st.cache
-def plot_response_curves():
-    cols = 4
-    rows = (
-        len(channels_list) // cols
-        if len(channels_list) % cols == 0
-        else len(channels_list) // cols + 1
-    )
-    rcs = st.session_state["rcs"]
-    shapes = []
-    fig = make_subplots(rows=rows, cols=cols, subplot_titles=channels_list)
-    for i in range(0, len(channels_list)):
-        col = channels_list[i]
-        x_actual = st.session_state["scenario"].channels[col].actual_spends
-        # x_modified = st.session_state["scenario"].channels[col].modified_spends
-        power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
-        K = rcs[col]["K"]
-        b = rcs[col]["b"]
-        a = rcs[col]["a"]
-        x0 = rcs[col]["x0"]
-        x_plot = np.linspace(0, 5 * x_actual.sum(), 50)
-        x, y, marginal_roi = [], [], []
-        for x_p in x_plot:
-            x.append(x_p * x_actual / x_actual.sum())
-        for index in range(len(x_plot)):
-            y.append(s_curve(x[index] / 10**power, K, b, a, x0))
-        for index in range(len(x_plot)):
-            marginal_roi.append(
-                a
-                * y[index]
-                * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
-            )
-        x = (
-            np.sum(x, axis=1)
-            * st.session_state["scenario"].channels[col].conversion_rate
-        )
-        y = np.sum(y, axis=1)
-        marginal_roi = (
-            np.average(marginal_roi, axis=1)
-            / st.session_state["scenario"].channels[col].conversion_rate
-        )
-        roi = y / np.maximum(x, np.finfo(float).eps)
-        fig.add_trace(
-            go.Scatter(
-                x=x,
-                y=y,
-                name=col,
-                customdata=np.stack((roi, marginal_roi), axis=-1),
-                hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
-                line=dict(color="blue"),
-            ),
-            row=1 + (i) // cols,
-            col=i % cols + 1,
-        )
-        x_optimal = (
-            st.session_state["scenario"].channels[col].modified_total_spends
-            * st.session_state["scenario"].channels[col].conversion_rate
-        )
-        y_optimal = (
-            st.session_state["scenario"].channels[col].modified_total_sales
-        )
-        # if col == "Paid_social_others":
-        #     debug_temp(x_optimal * x_actual / x_actual.sum(), power, K, b, a, x0)
-        fig.add_trace(
-            go.Scatter(
-                x=[x_optimal],
-                y=[y_optimal],
-                name=col,
-                legendgroup=col,
-                showlegend=False,
-                marker=dict(color=["black"]),
-            ),
-            row=1 + (i) // cols,
-            col=i % cols + 1,
-        )
-        shapes.append(
-            go.layout.Shape(
-                type="line",
-                x0=0,
-                y0=y_optimal,
-                x1=x_optimal,
-                y1=y_optimal,
-                line_width=1,
-                line_dash="dash",
-                line_color="black",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        shapes.append(
-            go.layout.Shape(
-                type="line",
-                x0=x_optimal,
-                y0=0,
-                x1=x_optimal,
-                y1=y_optimal,
-                line_width=1,
-                line_dash="dash",
-                line_color="black",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        start_value, end_value, left_value, right_value = find_segment_value(
-            x,
-            roi,
-            marginal_roi,
-        )
-        # Adding background colors
-        y_max = y.max() * 1.3  # 30% extra space above the max
-        # Yellow region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=start_value,
-                y0=0,
-                x1=left_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(255, 255, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        # Green region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=left_value,
-                y0=0,
-                x1=right_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(0, 255, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-        # Red region
-        shapes.append(
-            go.layout.Shape(
-                type="rect",
-                x0=right_value,
-                y0=0,
-                x1=end_value,
-                y1=y_max,
-                line=dict(width=0),
-                fillcolor="rgba(255, 0, 0, 0.3)",
-                layer="below",
-                xref=f"x{i+1}",
-                yref=f"y{i+1}",
-            )
-        )
-    fig.update_layout(
-        # height=1000,
-        # width=1000,
-        title_text="Response Curves (X: Spends Vs Y: Revenue)",
-        showlegend=False,
-        shapes=shapes,
-    )
-    fig.update_annotations(font_size=10)
-    # fig.update_xaxes(title="Spends")
-    # fig.update_yaxes(title=target)
-    fig.update_yaxes(
-        gridcolor="rgba(136, 136, 136, 0.5)", gridwidth=0.5, griddash="dash"
-    )
-    return fig
-# @st.cache
-# def plot_response_curves():
-#     cols = 4
-#     rcs = st.session_state["rcs"]
-#     shapes = []
-#     fig = make_subplots(rows=6, cols=cols, subplot_titles=channels_list)
-#     for i in range(0, len(channels_list)):
-#         col = channels_list[i]
-#         x = st.session_state["actual_df"][col].values
-#         spends = x.sum()
-#         power = np.ceil(np.log(x.max()) / np.log(10)) - 3
-#         x = np.linspace(0, 3 * x.max(), 200)
-#         K = rcs[col]["K"]
-#         b = rcs[col]["b"]
-#         a = rcs[col]["a"]
-#         x0 = rcs[col]["x0"]
-#         y = s_curve(x / 10**power, K, b, a, x0)
-#         roi = y / x
-#         marginal_roi = a * (y) * (1 - y / K)
-#         fig.add_trace(
-#             go.Scatter(
-#                 x=52
-#                 * x
-#                 * st.session_state["scenario"].channels[col].conversion_rate,
-#                 y=52 * y,
-#                 name=col,
-#                 customdata=np.stack((roi, marginal_roi), axis=-1),
-#                 hovertemplate="Spend:%{x:$.2s}<br>Sale:%{y:$.2s}<br>ROI:%{customdata[0]:.3f}<br>MROI:%{customdata[1]:.3f}",
-#             ),
-#             row=1 + (i) // cols,
-#             col=i % cols + 1,
-#         )
-#         fig.add_trace(
-#             go.Scatter(
-#                 x=[
-#                     spends
-#                     * st.session_state["scenario"]
-#                     .channels[col]
-#                     .conversion_rate
-#                 ],
-#                 y=[52 * s_curve(spends / (10**power * 52), K, b, a, x0)],
-#                 name=col,
-#                 legendgroup=col,
-#                 showlegend=False,
-#                 marker=dict(color=["black"]),
-#             ),
-#             row=1 + (i) // cols,
-#             col=i % cols + 1,
-#         )
-#         shapes.append(
-#             go.layout.Shape(
-#                 type="line",
-#                 x0=0,
-#                 y0=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
-#                 x1=spends
-#                 * st.session_state["scenario"].channels[col].conversion_rate,
-#                 y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
-#                 line_width=1,
-#                 line_dash="dash",
-#                 line_color="black",
-#                 xref=f"x{i+1}",
-#                 yref=f"y{i+1}",
-#             )
-#         )
-#         shapes.append(
-#             go.layout.Shape(
-#                 type="line",
-#                 x0=spends
-#                 * st.session_state["scenario"].channels[col].conversion_rate,
-#                 y0=0,
-#                 x1=spends
-#                 * st.session_state["scenario"].channels[col].conversion_rate,
-#                 y1=52 * s_curve(spends / (10**power * 52), K, b, a, x0),
-#                 line_width=1,
-#                 line_dash="dash",
-#                 line_color="black",
-#                 xref=f"x{i+1}",
-#                 yref=f"y{i+1}",
-#             )
-#         )
-#     fig.update_layout(
-#         height=1500,
-#         width=1000,
-#         title_text="Response Curves",
-#         showlegend=False,
-#         shapes=shapes,
-#     )
-#     fig.update_annotations(font_size=10)
-#     fig.update_xaxes(title="Spends")
-#     fig.update_yaxes(title=target)
-#     return fig
-# ======================================================== #
-# ==================== HTML Components =================== #
-# ======================================================== #
-def generate_spending_header(heading):
-    return st.markdown(
-        f"""<h2 class="spends-header">{heading}</h2>""", unsafe_allow_html=True
-    )
-# ======================================================== #
-# =================== Session variables ================== #
-# ======================================================== #
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
-    if not is_state_initiaized:
-        initialize_data()
-    channels_list = st.session_state["channels_list"]
-    # ======================================================== #
-    # ========================== UI ========================== #
-    # ======================================================== #
-    # print(list(st.session_state.keys()))
-    st.header("Simulation")
-    main_header = st.columns((2, 2))
-    sub_header = st.columns((1, 1, 1, 1))
-    _scenario = st.session_state["scenario"]
-    if "total_spends_change_abs" not in st.session_state:
-        st.session_state["total_spends_change_abs"] = numerize(
-            _scenario.actual_total_spends, 1
-        )
-    if "total_sales_change_abs" not in st.session_state:
-        st.session_state["total_sales_change_abs"] = numerize(
-            _scenario.actual_total_sales, 1
-        )
-    with main_header[0]:
-        st.subheader("Actual")
-    with main_header[-1]:
-        st.subheader("Simulated")
-    with sub_header[0]:
-        st.metric(
-            label="Spends", value=format_numbers(_scenario.actual_total_spends)
-        )
-    with sub_header[1]:
-        st.metric(
-            label=target,
-            value=format_numbers(
-                float(_scenario.actual_total_sales), include_indicator=False
-            ),
-        )
-    with sub_header[2]:
-        st.metric(
-            label="Spends",
-            value=format_numbers(_scenario.modified_total_spends),
-            delta=numerize(_scenario.delta_spends, 1),
-        )
-    with sub_header[3]:
-        st.metric(
-            label=target,
-            value=format_numbers(
-                float(_scenario.modified_total_sales), include_indicator=False
-            ),
-            delta=numerize(_scenario.delta_sales, 1),
-        )
-    with st.expander("Channel Spends Simulator"):
-        _columns1 = st.columns((2, 2, 1, 1))
-        with _columns1[0]:
-            optimization_selection = st.selectbox(
-                "Optimize", options=["Spends", "Sales"], key="optimization_key"
-            )
-        with _columns1[1]:
-            st.markdown("#")
-            st.checkbox(
-                label="Optimize all Channels",
-                key=f"optimze_all_channels",
-                value=False,
-                on_change=select_all_channels_for_optimization,
-            )
-        with _columns1[2]:
-            st.markdown("#")
-            st.button(
-                "Optimize",
-                on_click=optimize,
-                args=(st.session_state["optimization_key"],),
-            )
-        with _columns1[3]:
-            st.markdown("#")
-            st.button("Reset", on_click=reset_scenario)
-        _columns2 = st.columns((2, 2, 2))
-        if st.session_state["optimization_key"] == "Spends":
-            with _columns2[0]:
-                spend_input = st.text_input(
-                    "Absolute",
-                    key="total_spends_change_abs",
-                    # label_visibility="collapsed",
-                    on_change=update_all_spends_abs,
-                )
-            with _columns2[1]:
-                st.number_input(
-                    "Percent",
-                    key=f"total_spends_change",
-                    step=1,
-                    on_change=update_all_spends,
-                )
-        elif st.session_state["optimization_key"] == "Sales":
-            with _columns2[0]:
-                sales_input = st.text_input(
-                    "Absolute",
-                    key="total_sales_change_abs",
-                    on_change=update_sales_abs,
-                )
-            with _columns2[1]:
-                st.number_input(
-                    "Percent change",
-                    key=f"total_sales_change",
-                    step=1,
-                    on_change=update_sales,
-                )
-        with _columns2[2]:
-            st.markdown("#")
-            status_placeholder = st.empty()
-        st.markdown(
-            """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
-        )
-        _columns = st.columns((2.5, 2, 1.5, 1.5, 1))
-        with _columns[0]:
-            generate_spending_header("Channel")
-        with _columns[1]:
-            generate_spending_header("Spends Input")
-        with _columns[2]:
-            generate_spending_header("Spends")
-        with _columns[3]:
-            generate_spending_header(target)
-        with _columns[4]:
-            generate_spending_header("Optimize")
-        st.markdown(
-            """<hr class="spends-heading-seperator">""", unsafe_allow_html=True
-        )
-        if "acutual_predicted" not in st.session_state:
-            st.session_state["acutual_predicted"] = {
-                "Channel_name": [],
-                "Actual_spend": [],
-                "Optimized_spend": [],
-                "Delta": [],
-            }
-        for i, channel_name in enumerate(channels_list):
-            _channel_class = st.session_state["scenario"].channels[
-                channel_name
-            ]
-            _columns = st.columns((2.5, 1.5, 1.5, 1.5, 1))
-            with _columns[0]:
-                st.write(channel_name_formating(channel_name))
-                bin_placeholder = st.container()
-            with _columns[1]:
-                channel_bounds = _channel_class.bounds
-                channel_spends = float(_channel_class.actual_total_spends)
-                min_value = float(
-                    (1 + channel_bounds[0] / 100) * channel_spends
-                )
-                max_value = float(
-                    (1 + channel_bounds[1] / 100) * channel_spends
-                )
-                ##print(st.session_state[channel_name])
-                spend_input = st.text_input(
-                    channel_name,
-                    key=channel_name,
-                    label_visibility="collapsed",
-                    on_change=partial(update_data, channel_name),
-                )
-                if not validate_input(spend_input):
-                    st.error("Invalid input")
-                st.number_input(
-                    "Percent change",
-                    key=f"{channel_name}_change",
-                    step=1,
-                    on_change=partial(update_data_by_percent, channel_name),
-                )
-            with _columns[2]:
-                # spends
-                current_channel_spends = float(
-                    _channel_class.modified_total_spends
-                    * _channel_class.conversion_rate
-                )
-                actual_channel_spends = float(
-                    _channel_class.actual_total_spends
-                    * _channel_class.conversion_rate
-                )
-                spends_delta = float(
-                    _channel_class.delta_spends
-                    * _channel_class.conversion_rate
-                )
-                st.session_state["acutual_predicted"]["Channel_name"].append(
-                    channel_name
-                )
-                st.session_state["acutual_predicted"]["Actual_spend"].append(
-                    actual_channel_spends
-                )
-                st.session_state["acutual_predicted"][
-                    "Optimized_spend"
-                ].append(current_channel_spends)
-                st.session_state["acutual_predicted"]["Delta"].append(
-                    spends_delta
-                )
-                ## REMOVE
-                st.metric(
-                    "Spends",
-                    format_numbers(current_channel_spends),
-                    delta=numerize(spends_delta, 1),
-                    label_visibility="collapsed",
-                )
-            with _columns[3]:
-                # sales
-                current_channel_sales = float(
-                    _channel_class.modified_total_sales
-                )
-                actual_channel_sales = float(_channel_class.actual_total_sales)
-                sales_delta = float(_channel_class.delta_sales)
-                st.metric(
-                    target,
-                    format_numbers(
-                        current_channel_sales, include_indicator=False
-                    ),
-                    delta=numerize(sales_delta, 1),
-                    label_visibility="collapsed",
-                )
-            with _columns[4]:
-                st.checkbox(
-                    label="select for optimization",
-                    key=f"{channel_name}_selected",
-                    value=False,
-                    on_change=partial(
-                        select_channel_for_optimization, channel_name
-                    ),
-                    label_visibility="collapsed",
-                )
-            st.markdown(
-                """<hr class="spends-child-seperator">""",
-                unsafe_allow_html=True,
-            )
-            # Bins
-            col = channels_list[i]
-            x_actual = st.session_state["scenario"].channels[col].actual_spends
-            x_modified = (
-                st.session_state["scenario"].channels[col].modified_spends
-            )
-            x_total = x_modified.sum()
-            power = np.ceil(np.log(x_actual.max()) / np.log(10)) - 3
-            K = st.session_state["rcs"][col]["K"]
-            b = st.session_state["rcs"][col]["b"]
-            a = st.session_state["rcs"][col]["a"]
-            x0 = st.session_state["rcs"][col]["x0"]
-            x_plot = np.linspace(0, 5 * x_actual.sum(), 200)
-            x, y, marginal_roi = [], [], []
-            for x_p in x_plot:
-                x.append(x_p * x_actual / x_actual.sum())
-            for index in range(len(x_plot)):
-                y.append(s_curve(x[index] / 10**power, K, b, a, x0))
-            for index in range(len(x_plot)):
-                marginal_roi.append(
-                    a
-                    * y[index]
-                    * (1 - y[index] / np.maximum(K, np.finfo(float).eps))
-                )
-            x = (
-                np.sum(x, axis=1)
-                * st.session_state["scenario"].channels[col].conversion_rate
-            )
-            y = np.sum(y, axis=1)
-            marginal_roi = (
-                np.average(marginal_roi, axis=1)
-                / st.session_state["scenario"].channels[col].conversion_rate
-            )
-            roi = y / np.maximum(x, np.finfo(float).eps)
-            start_value, end_value, left_value, right_value = (
-                find_segment_value(
-                    x,
-                    roi,
-                    marginal_roi,
-                )
-            )
-            rgba = calculate_rgba(
-                start_value,
-                end_value,
-                left_value,
-                right_value,
-                current_channel_spends,
-            )
-            # Protecting division by zero by adding a small epsilon to denominators
-            roi_current = current_channel_sales / np.maximum(
-                current_channel_spends, np.finfo(float).eps
-            )
-            marginal_roi_current = (
-                st.session_state["scenario"]
-                .channels[col]
-                .get_marginal_roi("modified")
-            )
-            with bin_placeholder:
-                st.markdown(
-                    f"""
-                    <div style="
-                        border-radius: 12px;
-                        background-color: {rgba};
-                        padding: 10px;
-                        text-align: center;
-                        color: #006EC0;
-                        ">
-                        <p style="margin: 0; font-size: 20px;">ROI: {round(roi_current,1)}</p>
-                        <p style="margin: 0; font-size: 20px;">Marginal ROI: {round(marginal_roi_current,1)}</p>
-                    </div>
-                    """,
-                    unsafe_allow_html=True,
-                )
-    with st.expander("See Response Curves"):
-        fig = plot_response_curves()
-        st.plotly_chart(fig, use_container_width=True)
-    _columns = st.columns(2)
-    with _columns[0]:
-        st.subheader("Save Scenario")
-        scenario_name = st.text_input(
-            "Scenario name",
-            key="scenario_input",
-            placeholder="Scenario name",
-            label_visibility="collapsed",
-        )
-        st.button(
-            "Save",
-            on_click=lambda: save_scenario(scenario_name),
-            disabled=len(st.session_state["scenario_input"]) == 0,
-        )
-    summary_df = pd.DataFrame(st.session_state["acutual_predicted"])
-    summary_df.drop_duplicates(
-        subset="Channel_name", keep="last", inplace=True
-    )
-    summary_df_sorted = summary_df.sort_values(by="Delta", ascending=False)
-    summary_df_sorted["Delta_percent"] = np.round(
-        (
-            (
-                summary_df_sorted["Optimized_spend"]
-                / summary_df_sorted["Actual_spend"]
-            )
-            - 1
-        )
-        * 100,
-        2,
-    )
-    with open("summary_df.pkl", "wb") as f:
-        pickle.dump(summary_df_sorted, f)
-        # st.dataframe(summary_df_sorted)
-        # ___columns=st.columns(3)
-        # with ___columns[2]:
-        #     fig=summary_plot(summary_df_sorted, x='Delta_percent', y='Channel_name', title='Delta', text_column='Delta_percent')
-        #     st.plotly_chart(fig,use_container_width=True)
-        # with ___columns[0]:
-        #     fig=summary_plot(summary_df_sorted, x='Actual_spend', y='Channel_name', title='Actual Spend', text_column='Actual_spend')
-        #     st.plotly_chart(fig,use_container_width=True)
-        # with ___columns[1]:
-        #     fig=summary_plot(summary_df_sorted, x='Optimized_spend', y='Channel_name', title='Planned Spend', text_column='Optimized_spend')
-        #     st.plotly_chart(fig,use_container_width=True)
-elif auth_status == False:
-    st.error("Username/Password is incorrect")
-if auth_status != True:
-    try:
-        username_forgot_pw, email_forgot_password, random_password = (
-            authenticator.forgot_password("Forgot password")
-        )
-        if username_forgot_pw:
-            st.session_state["config"]["credentials"]["usernames"][
-                username_forgot_pw
-            ]["password"] = stauth.Hasher([random_password]).generate()[0]
-            send_email(email_forgot_password, random_password)
-            st.success("New password sent securely")
-            # Random password to be transferred to user securely
-        elif username_forgot_pw == False:
-            st.error("Username not found")
-    except Exception as e:
-        st.error(e)

pages/9_Saved_Scenarios.py DELETED Viewed

@@ -1,276 +0,0 @@
-import streamlit as st
-from numerize.numerize import numerize
-import io
-import pandas as pd
-from utilities import (format_numbers,decimal_formater,
-                       channel_name_formating,
-                       load_local_css,set_header,
-                       initialize_data,
-                       load_authenticator)
-from openpyxl import Workbook
-from openpyxl.styles import Alignment,Font,PatternFill
-import pickle
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
-from classes import class_from_dict
-st.set_page_config(layout='wide')
-load_local_css('styles.css')
-set_header()
-# for k, v in st.session_state.items():
-#     if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
-#         st.session_state[k] = v
-def create_scenario_summary(scenario_dict):
-    summary_rows = []
-    for channel_dict in scenario_dict['channels']:
-        name_mod = channel_name_formating(channel_dict['name'])
-        summary_rows.append([name_mod,
-                             channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate'),
-                             channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate'),
-                             channel_dict.get('actual_total_sales') ,
-                             channel_dict.get('modified_total_sales'),
-                             channel_dict.get('actual_total_sales') / (channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate')),
-                             channel_dict.get('modified_total_sales') / (channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate')),
-                             channel_dict.get('actual_mroi'),
-                             channel_dict.get('modified_mroi'),
-                             channel_dict.get('actual_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('actual_total_sales'),
-                             channel_dict.get('modified_total_spends') * channel_dict.get('conversion_rate') / channel_dict.get('modified_total_sales')])
-    summary_rows.append(['Total',
-                         scenario_dict.get('actual_total_spends'),
-                         scenario_dict.get('modified_total_spends'),
-                         scenario_dict.get('actual_total_sales'),
-                         scenario_dict.get('modified_total_sales'),
-                         scenario_dict.get('actual_total_sales') / scenario_dict.get('actual_total_spends'),
-                         scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
-                         '-',
-                         '-',
-                         scenario_dict.get('actual_total_spends') / scenario_dict.get('actual_total_sales'),
-                         scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
-    columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
-    columns_index = columns_index.append(pd.MultiIndex.from_product([['Spends','NRPU','ROI','MROI','Spend per NRPU'],['Actual','Simulated']], names=["first", "second"]))
-    return  pd.DataFrame(summary_rows, columns=columns_index)
-def summary_df_to_worksheet(df, ws):
-    heading_fill = PatternFill(fill_type='solid',start_color='FF11B6BD',end_color='FF11B6BD')
-    for j,header in enumerate(df.columns.values):
-        col = j + 1
-        for i in range(1,3):
-            ws.cell(row=i, column=j + 1, value=header[i - 1]).font = Font(bold=True, color='FF11B6BD')
-            ws.cell(row=i,column=j+1).fill = heading_fill
-        if  col > 1 and (col - 6)%5==0:
-            ws.merge_cells(start_row=1, end_row=1, start_column = col-3, end_column=col)
-            ws.cell(row=1,column=col).alignment = Alignment(horizontal='center')
-    for i,row in enumerate(df.itertuples()):
-        for j,value in enumerate(row):
-            if j == 0:
-                continue
-            elif (j-2)%4 == 0 or (j-3)%4 == 0:
-                ws.cell(row=i+3, column = j, value=value).number_format = '$#,##0.0'
-            else:
-                ws.cell(row=i+3, column = j, value=value)
-from openpyxl.utils import get_column_letter
-from openpyxl.styles import Font, PatternFill
-import logging
-def scenario_df_to_worksheet(df, ws):
-    heading_fill = PatternFill(start_color='FF11B6BD', end_color='FF11B6BD', fill_type='solid')
-    for j, header in enumerate(df.columns.values):
-        cell = ws.cell(row=1, column=j + 1, value=header)
-        cell.font = Font(bold=True, color='FF11B6BD')
-        cell.fill = heading_fill
-    for i, row in enumerate(df.itertuples()):
-        for j, value in enumerate(row[1:], start=1):  # Start from index 1 to skip the index column
-            try:
-                cell = ws.cell(row=i + 2, column=j, value=value)
-                if isinstance(value, (int, float)):
-                    cell.number_format = '$#,##0.0'
-                elif isinstance(value, str):
-                    cell.value = value[:32767]
-                else:
-                    cell.value = str(value)
-            except ValueError as e:
-                logging.error(f"Error assigning value '{value}' to cell {get_column_letter(j)}{i+2}: {e}")
-                cell.value = None  # Assign None to the cell where the error occurred
-    return ws
-def download_scenarios():
-    """
-    Makes a excel with all saved scenarios and saves it locally
-    """
-    ## create summary page
-    if len(scenarios_to_download) == 0:
-        return
-    wb = Workbook()
-    wb.iso_dates = True
-    wb.remove(wb.active)
-    st.session_state['xlsx_buffer'] = io.BytesIO()
-    summary_df = None
-    #print(scenarios_to_download)
-    for scenario_name in scenarios_to_download:
-        scenario_dict =  st.session_state['saved_scenarios'][scenario_name]
-        _spends = []
-        column_names = ['Date']
-        _sales = None
-        dates = None
-        summary_rows = []
-        for channel in scenario_dict['channels']:
-            if dates is None:
-                dates = channel.get('dates')
-                _spends.append(dates)
-            if _sales is None:
-                _sales = channel.get('modified_sales')
-            else:
-                _sales += channel.get('modified_sales')
-            _spends.append(channel.get('modified_spends') * channel.get('conversion_rate'))
-            column_names.append(channel.get('name'))
-            name_mod = channel_name_formating(channel['name'])
-            summary_rows.append([name_mod,
-                                channel.get('modified_total_spends') * channel.get('conversion_rate') ,
-                                channel.get('modified_total_sales'),
-                                channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate'),
-                                channel.get('modified_mroi'),
-                                channel.get('modified_total_sales') / channel.get('modified_total_spends') * channel.get('conversion_rate')])
-        _spends.append(_sales)
-        column_names.append('NRPU')
-        scenario_df = pd.DataFrame(_spends).T
-        scenario_df.columns = column_names
-        ## write to sheet
-        ws = wb.create_sheet(scenario_name)
-        scenario_df_to_worksheet(scenario_df, ws)
-        summary_rows.append(['Total',
-                        scenario_dict.get('modified_total_spends') ,
-                        scenario_dict.get('modified_total_sales'),
-                        scenario_dict.get('modified_total_sales') / scenario_dict.get('modified_total_spends'),
-                        '-',
-                        scenario_dict.get('modified_total_spends') / scenario_dict.get('modified_total_sales')])
-        columns_index = pd.MultiIndex.from_product([[''],['Channel']], names=["first", "second"])
-        columns_index = columns_index.append(pd.MultiIndex.from_product([[scenario_name],['Spends','NRPU','ROI','MROI','Spends per NRPU']], names=["first", "second"]))
-        if summary_df is None:
-            summary_df = pd.DataFrame(summary_rows, columns = columns_index)
-            summary_df = summary_df.set_index(('','Channel'))
-        else:
-            _df = pd.DataFrame(summary_rows, columns = columns_index)
-            _df = _df.set_index(('','Channel'))
-            summary_df = summary_df.merge(_df, left_index=True, right_index=True)
-    ws = wb.create_sheet('Summary',0)
-    summary_df_to_worksheet(summary_df.reset_index(), ws)
-    wb.save(st.session_state['xlsx_buffer'])
-    st.session_state['disable_download_button'] = False
-def disable_download_button():
-    st.session_state['disable_download_button'] =True
-def transform(x):
-    if x.name == ("",'Channel'):
-        return x
-    elif x.name[0] == 'ROI' or x.name[0] == 'MROI':
-        return x.apply(lambda y : y if isinstance(y,str) else decimal_formater(format_numbers(y,include_indicator=False,n_decimals=4),n_decimals=4))
-    else:
-        return x.apply(lambda y : y if isinstance(y,str) else format_numbers(y))
-def delete_scenario():
-    if selected_scenario in st.session_state['saved_scenarios']:
-        del st.session_state['saved_scenarios'][selected_scenario]
-        with open('../saved_scenarios.pkl', 'wb') as f:
-            pickle.dump(st.session_state['saved_scenarios'],f)
-def load_scenario():
-    if selected_scenario in st.session_state['saved_scenarios']:
-        st.session_state['scenario'] = class_from_dict(selected_scenario_details)
-authenticator = st.session_state.get('authenticator')
-if authenticator is None:
-    authenticator = load_authenticator()
-name, authentication_status, username = authenticator.login('Login', 'main')
-auth_status = st.session_state.get('authentication_status')
-if auth_status == True:
-    is_state_initiaized = st.session_state.get('initialized',False)
-    if not is_state_initiaized:
-        #print("Scenario page state reloaded")
-        initialize_data()
-    saved_scenarios = st.session_state['saved_scenarios']
-    if len(saved_scenarios) ==0:
-        st.header('No saved scenarios')
-    else:
-        with st.sidebar:
-            selected_scenario = st.radio(
-                'Pick a scenario to view details',
-                list(saved_scenarios.keys())
-            )
-            st.markdown("""<hr>""", unsafe_allow_html=True)
-            scenarios_to_download = st.multiselect('Select scenarios to download',
-                        list(saved_scenarios.keys()))
-            st.button('Prepare download',on_click=download_scenarios)
-            st.download_button(
-                    label="Download Scenarios",
-                    data=st.session_state['xlsx_buffer'].getvalue(),
-                    file_name="scenarios.xlsx",
-                    mime="application/vnd.ms-excel",
-                    disabled= st.session_state['disable_download_button'],
-                    on_click= disable_download_button
-                )
-        column_1, column_2,column_3 = st.columns((6,1,1))
-        with column_1:
-            st.header(selected_scenario)
-        with column_2:
-            st.button('Delete scenarios', on_click=delete_scenario)
-        with column_3:
-            st.button('Load Scenario', on_click=load_scenario)
-        selected_scenario_details = saved_scenarios[selected_scenario]
-        pd.set_option('display.max_colwidth', 100)
-        st.markdown(create_scenario_summary(selected_scenario_details).transform(transform).style.set_table_styles(
-    [{
-        'selector': 'th',
-        'props': [('background-color', '#11B6BD')]
-    },
-        {
-        'selector' : 'tr:nth-child(even)',
-        'props' : [('background-color', '#11B6BD')]
-        }
-        ]).to_html(),unsafe_allow_html=True)
-elif auth_status == False:
-    st.error('Username/Password is incorrect')
-if auth_status != True:
-    try:
-        username_forgot_pw, email_forgot_password, random_password = authenticator.forgot_password('Forgot password')
-        if username_forgot_pw:
-            st.success('New password sent securely')
-            # Random password to be transferred to user securely
-        elif username_forgot_pw == False:
-            st.error('Username not found')
-    except Exception as e:
-        st.error(e)

pages/Data_Import.py DELETED Viewed

@@ -1,891 +0,0 @@
-# Importing necessary libraries
-import streamlit as st
-st.set_page_config(
-    page_title="Model Build",
-    page_icon=":shark:",
-    layout="wide",
-    initial_sidebar_state="collapsed",
-)
-import numpy as np
-import pandas as pd
-from utilities import set_header, load_local_css, load_authenticator
-import pickle
-load_local_css("styles.css")
-set_header()
-authenticator = st.session_state.get("authenticator")
-if authenticator is None:
-    authenticator = load_authenticator()
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-# Check for authentication status
-if auth_status != True:
-    st.stop()
-# Function to validate date column in dataframe
-def validate_date_column(df):
-    try:
-        # Attempt to convert the 'Date' column to datetime
-        df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y")
-        return True
-    except:
-        return False
-# Function to determine data interval
-def determine_data_interval(common_freq):
-    if common_freq == 1:
-        return "daily"
-    elif common_freq == 7:
-        return "weekly"
-    elif 28 <= common_freq <= 31:
-        return "monthly"
-    else:
-        return "irregular"
-# Function to read each uploaded Excel file into a pandas DataFrame and stores them in a dictionary
-st.cache_resource(show_spinner=False)
-def files_to_dataframes(uploaded_files):
-    df_dict = {}
-    for uploaded_file in uploaded_files:
-        # Extract file name without extension
-        file_name = uploaded_file.name.rsplit(".", 1)[0]
-        # Check for duplicate file names
-        if file_name in df_dict:
-            st.warning(
-                f"Duplicate File: {file_name}. This file will be skipped.",
-                icon="⚠️",
-            )
-            continue
-        # Read the file into a DataFrame
-        df = pd.read_excel(uploaded_file)
-        # Convert all column names to lowercase
-        df.columns = df.columns.str.lower().str.strip()
-        # Separate numeric and non-numeric columns
-        numeric_cols = list(df.select_dtypes(include=["number"]).columns)
-        non_numeric_cols = [
-            col
-            for col in df.select_dtypes(exclude=["number"]).columns
-            if col.lower() != "date"
-        ]
-        # Check for 'Date' column
-        if not (validate_date_column(df) and len(numeric_cols) > 0):
-            st.warning(
-                f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
-                icon="⚠️",
-            )
-            continue
-        # Check for interval
-        common_freq = common_freq = (
-            pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
-        )
-        # Calculate the data interval (daily, weekly, monthly or irregular)
-        interval = determine_data_interval(common_freq)
-        if interval == "irregular":
-            st.warning(
-                f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
-                icon="⚠️",
-            )
-            continue
-        # Store both DataFrames in the dictionary under their respective keys
-        df_dict[file_name] = {
-            "numeric": numeric_cols,
-            "non_numeric": non_numeric_cols,
-            "interval": interval,
-            "df": df,
-        }
-    return df_dict
-# Function to adjust dataframe granularity
-# def adjust_dataframe_granularity(df, current_granularity, target_granularity):
-#     # Set index
-#     df.set_index("date", inplace=True)
-#     # Define aggregation rules for resampling
-#     aggregation_rules = {
-#         col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
-#         for col in df.columns
-#     }
-#     resampled_df = df
-#     if current_granularity == "daily" and target_granularity == "weekly":
-#         resampled_df = df.resample("W-MON").agg(aggregation_rules)
-#     elif current_granularity == "daily" and target_granularity == "monthly":
-#         resampled_df = df.resample("MS").agg(aggregation_rules)
-#     elif current_granularity == "daily" and target_granularity == "daily":
-#         resampled_df = df.resample("D").agg(aggregation_rules)
-#     elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
-#         # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
-#         expanded_data = []
-#         for _, row in df.iterrows():
-#             if current_granularity == "weekly":
-#                 period_range = pd.date_range(start=row.name, periods=7)
-#             elif current_granularity == "monthly":
-#                 period_range = pd.date_range(
-#                     start=row.name, periods=row.name.days_in_month
-#                 )
-#             for date in period_range:
-#                 new_row = {}
-#                 for col in df.columns:
-#                     if pd.api.types.is_numeric_dtype(df[col]):
-#                         if current_granularity == "weekly":
-#                             new_row[col] = row[col] / 7
-#                         elif current_granularity == "monthly":
-#                             new_row[col] = row[col] / row.name.days_in_month
-#                     else:
-#                         new_row[col] = row[col]
-#                 expanded_data.append((date, new_row))
-#         resampled_df = pd.DataFrame(
-#             [data for _, data in expanded_data],
-#             index=[date for date, _ in expanded_data],
-#         )
-#     # Reset index
-#     resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
-#     return resampled_df
-def adjust_dataframe_granularity(df, current_granularity, target_granularity):
-    # Set index
-    df.set_index("date", inplace=True)
-    # Define aggregation rules for resampling
-    aggregation_rules = {
-        col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
-        for col in df.columns
-    }
-    # Initialize resampled_df
-    resampled_df = df
-    if current_granularity == "daily" and target_granularity == "weekly":
-        resampled_df = df.resample("W-MON", closed="left", label="left").agg(
-            aggregation_rules
-        )
-    elif current_granularity == "daily" and target_granularity == "monthly":
-        resampled_df = df.resample("MS", closed="left", label="left").agg(
-            aggregation_rules
-        )
-    elif current_granularity == "daily" and target_granularity == "daily":
-        resampled_df = df.resample("D").agg(aggregation_rules)
-    elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
-        # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
-        expanded_data = []
-        for _, row in df.iterrows():
-            if current_granularity == "weekly":
-                period_range = pd.date_range(start=row.name, periods=7)
-            elif current_granularity == "monthly":
-                period_range = pd.date_range(
-                    start=row.name, periods=row.name.days_in_month
-                )
-            for date in period_range:
-                new_row = {}
-                for col in df.columns:
-                    if pd.api.types.is_numeric_dtype(df[col]):
-                        if current_granularity == "weekly":
-                            new_row[col] = row[col] / 7
-                        elif current_granularity == "monthly":
-                            new_row[col] = row[col] / row.name.days_in_month
-                    else:
-                        new_row[col] = row[col]
-                expanded_data.append((date, new_row))
-        resampled_df = pd.DataFrame(
-            [data for _, data in expanded_data],
-            index=[date for date, _ in expanded_data],
-        )
-    # Reset index
-    resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
-    return resampled_df
-# Function to clean and extract unique values of DMA and Panel
-st.cache_resource(show_spinner=False)
-def clean_and_extract_unique_values(files_dict, selections):
-    all_dma_values = set()
-    all_panel_values = set()
-    for file_name, file_data in files_dict.items():
-        df = file_data["df"]
-        # 'DMA' and 'Panel' selections
-        selected_dma = selections[file_name].get("DMA")
-        selected_panel = selections[file_name].get("Panel")
-        # Clean and standardize DMA column if it exists and is selected
-        if selected_dma and selected_dma != "N/A" and selected_dma in df.columns:
-            df[selected_dma] = (
-                df[selected_dma].str.lower().str.strip().str.replace("_", " ")
-            )
-            all_dma_values.update(df[selected_dma].dropna().unique())
-        # Clean and standardize Panel column if it exists and is selected
-        if selected_panel and selected_panel != "N/A" and selected_panel in df.columns:
-            df[selected_panel] = (
-                df[selected_panel].str.lower().str.strip().str.replace("_", " ")
-            )
-            all_panel_values.update(df[selected_panel].dropna().unique())
-        # Update the processed DataFrame back in the dictionary
-        files_dict[file_name]["df"] = df
-    return all_dma_values, all_panel_values
-# Function to format values for display
-st.cache_resource(show_spinner=False)
-def format_values_for_display(values_list):
-    # Capitalize the first letter of each word and replace underscores with spaces
-    formatted_list = [value.replace("_", " ").title() for value in values_list]
-    # Join values with commas and 'and' before the last value
-    if len(formatted_list) > 1:
-        return ", ".join(formatted_list[:-1]) + ", and " + formatted_list[-1]
-    elif formatted_list:
-        return formatted_list[0]
-    return "No values available"
-# Function to normalizes all data within files_dict to a daily granularity
-st.cache(show_spinner=False, allow_output_mutation=True)
-def standardize_data_to_daily(files_dict, selections):
-    # Normalize all data to a daily granularity using a provided function
-    files_dict = apply_granularity_to_all(files_dict, "daily", selections)
-    # Update the "interval" attribute for each dataset to indicate the new granularity
-    for files_name, files_data in files_dict.items():
-        files_data["interval"] = "daily"
-    return files_dict
-# Function to apply granularity transformation to all DataFrames in files_dict
-st.cache_resource(show_spinner=False)
-def apply_granularity_to_all(files_dict, granularity_selection, selections):
-    for file_name, file_data in files_dict.items():
-        df = file_data["df"].copy()
-        # Handling when DMA or Panel might be 'N/A'
-        selected_dma = selections[file_name].get("DMA")
-        selected_panel = selections[file_name].get("Panel")
-        # Correcting the segment selection logic & handling 'N/A'
-        if selected_dma != "N/A" and selected_panel != "N/A":
-            unique_combinations = df[[selected_dma, selected_panel]].drop_duplicates()
-        elif selected_dma != "N/A":
-            unique_combinations = df[[selected_dma]].drop_duplicates()
-            selected_panel = None  # Ensure Panel is ignored if N/A
-        elif selected_panel != "N/A":
-            unique_combinations = df[[selected_panel]].drop_duplicates()
-            selected_dma = None  # Ensure DMA is ignored if N/A
-        else:
-            # If both are 'N/A', process the entire dataframe as is
-            df = adjust_dataframe_granularity(
-                df, file_data["interval"], granularity_selection
-            )
-            files_dict[file_name]["df"] = df
-            continue  # Skip to the next file
-        transformed_segments = []
-        for _, combo in unique_combinations.iterrows():
-            if selected_dma and selected_panel:
-                segment = df[
-                    (df[selected_dma] == combo[selected_dma])
-                    & (df[selected_panel] == combo[selected_panel])
-                ]
-            elif selected_dma:
-                segment = df[df[selected_dma] == combo[selected_dma]]
-            elif selected_panel:
-                segment = df[df[selected_panel] == combo[selected_panel]]
-            # Adjust granularity of the segment
-            transformed_segment = adjust_dataframe_granularity(
-                segment, file_data["interval"], granularity_selection
-            )
-            transformed_segments.append(transformed_segment)
-        # Combine all transformed segments into a single DataFrame for this file
-        transformed_df = pd.concat(transformed_segments, ignore_index=True)
-        files_dict[file_name]["df"] = transformed_df
-    return files_dict
-# Function to create main dataframe structure
-st.cache_resource(show_spinner=False)
-def create_main_dataframe(
-    files_dict, all_dma_values, all_panel_values, granularity_selection
-):
-    # Determine the global start and end dates across all DataFrames
-    global_start = min(df["df"]["date"].min() for df in files_dict.values())
-    global_end = max(df["df"]["date"].max() for df in files_dict.values())
-    # Adjust the date_range generation based on the granularity_selection
-    if granularity_selection == "weekly":
-        # Generate a weekly range, with weeks starting on Monday
-        date_range = pd.date_range(start=global_start, end=global_end, freq="W-MON")
-    elif granularity_selection == "monthly":
-        # Generate a monthly range, starting from the first day of each month
-        date_range = pd.date_range(start=global_start, end=global_end, freq="MS")
-    else:  # Default to daily if not weekly or monthly
-        date_range = pd.date_range(start=global_start, end=global_end, freq="D")
-    # Collect all unique DMA and Panel values, excluding 'N/A'
-    all_dmas = all_dma_values
-    all_panels = all_panel_values
-    # Dynamically build the list of dimensions (Panel, DMA) to include in the main DataFrame based on availability
-    dimensions, merge_keys = [], []
-    if all_panels:
-        dimensions.append(all_panels)
-        merge_keys.append("Panel")
-    if all_dmas:
-        dimensions.append(all_dmas)
-        merge_keys.append("DMA")
-    dimensions.append(date_range)  # Date range is always included
-    merge_keys.append("date")  # Date range is always included
-    # Create a main DataFrame template with the dimensions
-    main_df = pd.MultiIndex.from_product(
-        dimensions,
-        names=[name for name, _ in zip(merge_keys, dimensions)],
-    ).to_frame(index=False)
-    return main_df.reset_index(drop=True)
-# Function to prepare and merge dataFrames
-st.cache_resource(show_spinner=False)
-def merge_into_main_df(main_df, files_dict, selections):
-    for file_name, file_data in files_dict.items():
-        df = file_data["df"].copy()
-        # Rename selected DMA and Panel columns if not 'N/A'
-        selected_dma = selections[file_name].get("DMA", "N/A")
-        selected_panel = selections[file_name].get("Panel", "N/A")
-        if selected_dma != "N/A":
-            df.rename(columns={selected_dma: "DMA"}, inplace=True)
-        if selected_panel != "N/A":
-            df.rename(columns={selected_panel: "Panel"}, inplace=True)
-        # Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel' and 'DMA'
-        merge_keys = ["date"]
-        if "Panel" in df.columns:
-            merge_keys.append("Panel")
-        if "DMA" in df.columns:
-            merge_keys.append("DMA")
-        main_df = pd.merge(main_df, df, on=merge_keys, how="left")
-    # After all merges, sort by 'date' and reset index for cleanliness
-    sort_by = ["date"]
-    if "Panel" in main_df.columns:
-        sort_by.append("Panel")
-    if "DMA" in main_df.columns:
-        sort_by.append("DMA")
-    main_df.sort_values(by=sort_by, inplace=True)
-    main_df.reset_index(drop=True, inplace=True)
-    return main_df
-# Function to categorize column
-def categorize_column(column_name):
-    # Define keywords for each category
-    internal_keywords = [
-        "Price",
-        "Discount",
-        "product_price",
-        "cost",
-        "margin",
-        "inventory",
-        "sales",
-        "revenue",
-        "turnover",
-        "expense",
-    ]
-    exogenous_keywords = [
-        "GDP",
-        "Tax",
-        "Inflation",
-        "interest_rate",
-        "employment_rate",
-        "exchange_rate",
-        "consumer_spending",
-        "retail_sales",
-        "oil_prices",
-        "weather",
-    ]
-    # Check if the column name matches any of the keywords for Internal or Exogenous categories
-    for keyword in internal_keywords:
-        if keyword.lower() in column_name.lower():
-            return "Internal"
-    for keyword in exogenous_keywords:
-        if keyword.lower() in column_name.lower():
-            return "Exogenous"
-    # Default to Media if no match found
-    return "Media"
-# Function to calculate missing stats and prepare for editable DataFrame
-st.cache_resource(show_spinner=False)
-def prepare_missing_stats_df(df):
-    missing_stats = []
-    for column in df.columns:
-        if (
-            column == "date" or column == "DMA" or column == "Panel"
-        ):  # Skip Date, DMA and Panel column
-            continue
-        missing = df[column].isnull().sum()
-        pct_missing = round((missing / len(df)) * 100, 2)
-        # Dynamically assign category based on column name
-        # category = categorize_column(column)
-        category = "Media"
-        missing_stats.append(
-            {
-                "Column": column,
-                "Missing Values": missing,
-                "Missing Percentage": pct_missing,
-                "Impute Method": "Fill with 0",  # Default value
-                "Category": category,
-            }
-        )
-    stats_df = pd.DataFrame(missing_stats)
-    return stats_df
-# Function to add API DataFrame details to the files dictionary
-st.cache_resource(show_spinner=False)
-def add_api_dataframe_to_dict(main_df, files_dict):
-    files_dict["API"] = {
-        "numeric": list(main_df.select_dtypes(include=["number"]).columns),
-        "non_numeric": [
-            col
-            for col in main_df.select_dtypes(exclude=["number"]).columns
-            if col.lower() != "date"
-        ],
-        "interval": determine_data_interval(
-            pd.Series(main_df["date"].unique()).diff().dt.days.dropna().mode()[0]
-        ),
-        "df": main_df,
-    }
-    return files_dict
-# Function to reads an API into a DataFrame, parsing specified columns as datetime
-@st.cache_resource(show_spinner=False)
-def read_API_data():
-    return pd.read_excel(r"upf_data_converted.xlsx", parse_dates=["Date"])
-# Function to set the 'DMA_Panel_Selected' session state variable to False
-def set_DMA_Panel_Selected_false():
-    st.session_state["DMA_Panel_Selected"] = False
-# Initialize 'final_df' in session state
-if "final_df" not in st.session_state:
-    st.session_state["final_df"] = pd.DataFrame()
-# Initialize 'bin_dict' in session state
-if "bin_dict" not in st.session_state:
-    st.session_state["bin_dict"] = {}
-# Initialize 'DMA_Panel_Selected' in session state
-if "DMA_Panel_Selected" not in st.session_state:
-    st.session_state["DMA_Panel_Selected"] = False
-# Page Title
-st.write("")  # Top padding
-st.title("Data Import")
-#########################################################################################################################################################
-# Create a dictionary to hold all DataFrames and collect user input to specify "DMA" and "Panel" columns for each file
-#########################################################################################################################################################
-# Read the Excel file, parsing 'Date' column as datetime
-main_df = read_API_data()
-# Convert all column names to lowercase
-main_df.columns = main_df.columns.str.lower().str.strip()
-# File uploader
-uploaded_files = st.file_uploader(
-    "Upload additional data",
-    type=["xlsx"],
-    accept_multiple_files=True,
-    on_change=set_DMA_Panel_Selected_false,
-)
-# Custom HTML for upload instructions
-recommendation_html = f"""
-<div style="text-align: justify;">
-<strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including DMA, Panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
-</div>
-"""
-st.markdown(recommendation_html, unsafe_allow_html=True)
-# Choose Date Granularity
-st.markdown("#### Choose Date Granularity")
-# Granularity Selection
-granularity_selection = st.selectbox(
-    "Choose Date Granularity",
-    ["Daily", "Weekly", "Monthly"],
-    label_visibility="collapsed",
-    on_change=set_DMA_Panel_Selected_false,
-)
-granularity_selection = str(granularity_selection).lower()
-# Convert files to dataframes
-files_dict = files_to_dataframes(uploaded_files)
-# Add API Dataframe
-if main_df is not None:
-    files_dict = add_api_dataframe_to_dict(main_df, files_dict)
-# Display a warning message if no files have been uploaded and halt further execution
-if not files_dict:
-    st.warning(
-        "Please upload at least one file to proceed.",
-        icon="⚠️",
-    )
-    st.stop()  # Halts further execution until file is uploaded
-# Select DMA and Panel columns
-st.markdown("#### Select DMA and Panel columns")
-selections = {}
-with st.expander("Select DMA and Panel columns", expanded=False):
-    count = 0  # Initialize counter to manage the visibility of labels and keys
-    for file_name, file_data in files_dict.items():
-        # Determine visibility of the label based on the count
-        if count == 0:
-            label_visibility = "visible"
-        else:
-            label_visibility = "collapsed"
-        # Extract non-numeric columns
-        non_numeric_cols = file_data["non_numeric"]
-        # Prepare DMA and Panel values for dropdown, adding "N/A" as an option
-        dma_values = non_numeric_cols + ["N/A"]
-        panel_values = non_numeric_cols + ["N/A"]
-        # Skip if only one option is available
-        if len(dma_values) == 1 and len(panel_values) == 1:
-            selected_dma, selected_panel = "N/A", "N/A"
-            # Update the selections for DMA and Panel for the current file
-            selections[file_name] = {
-                "DMA": selected_dma,
-                "Panel": selected_panel,
-            }
-            continue
-        # Create layout columns for File Name, DMA, and Panel selections
-        file_name_col, DMA_col, Panel_col = st.columns([2, 4, 4])
-        with file_name_col:
-            # Display "File Name" label only for the first file
-            if count == 0:
-                st.write("File Name")
-            else:
-                st.write("")
-            st.write(file_name)  # Display the file name
-        with DMA_col:
-            # Display a selectbox for DMA values
-            selected_dma = st.selectbox(
-                "Select DMA",
-                dma_values,
-                on_change=set_DMA_Panel_Selected_false,
-                label_visibility=label_visibility,  # Control visibility of the label
-                key=f"DMA_selectbox{count}",  # Ensure unique key for each selectbox
-            )
-        with Panel_col:
-            # Display a selectbox for Panel values
-            selected_panel = st.selectbox(
-                "Select Panel",
-                panel_values,
-                on_change=set_DMA_Panel_Selected_false,
-                label_visibility=label_visibility,  # Control visibility of the label
-                key=f"Panel_selectbox{count}",  # Ensure unique key for each selectbox
-            )
-        # Skip processing if the same column is selected for both Panel and DMA due to potential data integrity issues
-        if selected_panel == selected_dma and not (
-            selected_panel == "N/A" and selected_dma == "N/A"
-        ):
-            st.warning(
-                f"File: {file_name} → The same column cannot serve as both Panel and DMA. Please adjust your selections.",
-            )
-            selected_dma, selected_panel = "N/A", "N/A"
-            st.stop()
-        # Update the selections for DMA and Panel for the current file
-        selections[file_name] = {
-            "DMA": selected_dma,
-            "Panel": selected_panel,
-        }
-        count += 1  # Increment the counter after processing each file
-    # Accept DMA and Panel selection
-    if st.button("Accept and Process", use_container_width=True):
-        # Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
-        with st.spinner("Processing...", cache=True):
-            files_dict = standardize_data_to_daily(files_dict, selections)
-            # Convert all data to daily level granularity
-            files_dict = apply_granularity_to_all(
-                files_dict, granularity_selection, selections
-            )
-        st.session_state["files_dict"] = files_dict
-        st.session_state["DMA_Panel_Selected"] = True
-#########################################################################################################################################################
-# Display unique DMA and Panel values
-#########################################################################################################################################################
-# Halts further execution until DMA and Panel columns are selected
-if "files_dict" in st.session_state and st.session_state["DMA_Panel_Selected"]:
-    files_dict = st.session_state["files_dict"]
-else:
-    st.stop()
-# Set to store unique values of DMA and Panel
-with st.spinner("Fetching DMA and Panel values..."):
-    all_dma_values, all_panel_values = clean_and_extract_unique_values(
-        files_dict, selections
-    )
-    # List of DMA and Panel columns unique values
-    list_of_all_dma_values = list(all_dma_values)
-    list_of_all_panel_values = list(all_panel_values)
-    # Format DMA and Panel values for display
-    formatted_dma_values = format_values_for_display(list_of_all_dma_values)
-    formatted_panel_values = format_values_for_display(list_of_all_panel_values)
-# Unique DMA and Panel values
-st.markdown("#### Unique DMA and Panel values")
-# Display DMA and Panel values
-with st.expander("Unique DMA and Panel values"):
-    st.write("")
-    st.markdown(
-        f"""
-    <style>
-    .justify-text {{
-    text-align: justify;
-    }}
-    </style>
-    <div class="justify-text">
-    <strong>Panel Values:</strong> {formatted_panel_values}<br>
-    <strong>DMA Values:</strong> {formatted_dma_values}
-    </div>
-    """,
-        unsafe_allow_html=True,
-    )
-    # Display total DMA and Panel
-    st.write("")
-    st.markdown(
-        f"""
-    <div style="text-align: justify;">
-        <strong>Number of DMAs detected:</strong> {len(list_of_all_dma_values)}<br>
-        <strong>Number of Panels detected:</strong> {len(list_of_all_panel_values)}
-    </div>
-    """,
-        unsafe_allow_html=True,
-    )
-    st.write("")
-#########################################################################################################################################################
-# Merge all DataFrames
-#########################################################################################################################################################
-# Merge all DataFrames selected
-main_df = create_main_dataframe(
-    files_dict, all_dma_values, all_panel_values, granularity_selection
-)
-merged_df = merge_into_main_df(main_df, files_dict, selections)
-# # Display the merged DataFrame
-# st.markdown("#### Merged DataFrame based on selected DMA and Panel")
-# st.dataframe(merged_df)
-#########################################################################################################################################################
-# Categorize Variables and Impute Missing Values
-#########################################################################################################################################################
-# Create an editable DataFrame in Streamlit
-st.markdown("#### Select Variables Category & Impute Missing Values")
-# Prepare missing stats DataFrame for editing
-missing_stats_df = prepare_missing_stats_df(merged_df)
-edited_stats_df = st.data_editor(
-    missing_stats_df,
-    column_config={
-        "Impute Method": st.column_config.SelectboxColumn(
-            options=[
-                "Drop Column",
-                "Fill with Mean",
-                "Fill with Median",
-                "Fill with 0",
-            ],
-            required=True,
-            default="Fill with 0",
-        ),
-        "Category": st.column_config.SelectboxColumn(
-            options=[
-                "Media",
-                "Exogenous",
-                "Internal",
-                "Response_Metric"
-            ],
-            required=True,
-            default="Media",
-        ),
-    },
-    disabled=["Column", "Missing Values", "Missing Percentage"],
-    hide_index=True,
-    use_container_width=True,
-)
-# Apply changes based on edited DataFrame
-for i, row in edited_stats_df.iterrows():
-    column = row["Column"]
-    if row["Impute Method"] == "Drop Column":
-        merged_df.drop(columns=[column], inplace=True)
-    elif row["Impute Method"] == "Fill with Mean":
-        merged_df[column].fillna(merged_df[column].mean(), inplace=True)
-    elif row["Impute Method"] == "Fill with Median":
-        merged_df[column].fillna(merged_df[column].median(), inplace=True)
-    elif row["Impute Method"] == "Fill with 0":
-        merged_df[column].fillna(0, inplace=True)
-# Display the Final DataFrame and exogenous variables
-st.markdown("#### Final DataFrame")
-final_df = merged_df
-st.dataframe(final_df, hide_index=True)
-# Initialize an empty dictionary to hold categories and their variables
-category_dict = {}
-# Iterate over each row in the edited DataFrame to populate the dictionary
-for i, row in edited_stats_df.iterrows():
-    column = row["Column"]
-    category = row["Category"]  # The category chosen by the user for this variable
-    # Check if the category already exists in the dictionary
-    if category not in category_dict:
-        # If not, initialize it with the current column as its first element
-        category_dict[category] = [column]
-    else:
-        # If it exists, append the current column to the list of variables under this category
-        category_dict[category].append(column)
-# Add Date, DMA and Panel in category dictionary
-category_dict.update({"Date": ["date"]})
-if "DMA" in final_df.columns:
-    category_dict["DMA"] = ["DMA"]
-if "Panel" in final_df.columns:
-    category_dict["Panel"] = ["Panel"]
-# Display the dictionary
-st.markdown("#### Variable Category")
-for category, variables in category_dict.items():
-    # Check if there are multiple variables to handle "and" insertion correctly
-    if len(variables) > 1:
-        # Join all but the last variable with ", ", then add " and " before the last variable
-        variables_str = ", ".join(variables[:-1]) + " and " + variables[-1]
-    else:
-        # If there's only one variable, no need for "and"
-        variables_str = variables[0]
-    # Display the category and its variables in the desired format
-    st.markdown(
-        f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
-        unsafe_allow_html=True,
-    )
-# Store final dataframe and bin dictionary into session state
-st.session_state["final_df"], st.session_state["bin_dict"] = final_df, category_dict
-if st.button('Save Changes'):
-    with open("Pickle_files/main_df", 'wb') as f:
-        pickle.dump(st.session_state["final_df"], f)
-    with open("Pickle_files/category_dict",'wb') as c:
-        pickle.dump(st.session_state["bin_dict"],c)
-    st.success('Changes Saved!')

pages/actual_data.csv DELETED Viewed

@@ -1,158 +0,0 @@
-const,clicks_search_decay.2,impressions_tv_lag3,online_edu_trend_lag3,clicks_digital_lag2_decay.3,impressions_streaming_lag2_decay.4,covid_cases_lag3,unemployement_rate_lead4,season,flag_Aug_1,flag_Aug_2,flag_Aug_3,flag_dec_1,flag_dec_-1,flag_dec_-2,flag_dec_-3,flag_easter_-1,flag_easter_-2,flag_may_-1,flag_may_-2,flag_jun_-1,flag_jun_-2,covid_flag1,flag_june28,flag_aug13,flag_sep13,flag_mar_feb,date,total_prospect_id
-1.0,0.03264506089026503,0.0,0.0,0.0,0.11920857922376585,0.0,0.2448979591836735,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-10,3106
-1.0,0.1203178311529351,0.0,0.0,0.0,0.23575959332216032,0.0,0.2448979591836735,101,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-17,7809
-1.0,0.037674240888288246,0.0,0.0,0.30427286753070926,0.14866425214344534,0.0,0.2448979591836735,102,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-11-24,5658
-1.0,0.114056065999327,0.25459834519940233,0.5700000000000001,0.3210660307498862,0.06375317695001911,0.0,0.2448979591836735,103,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-01,7528
-1.0,0.15091848146432302,0.04759636387261456,0.58,0.2652143429433443,0.02550166207848893,0.0,0.2380952380952381,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-08,8913
-1.0,0.09691798534505919,0.0,0.41000000000000003,0.27398476053158455,0.22803554179688423,0.0,0.2380952380952381,105,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-15,7974
-1.0,0.0,0.2185391903071715,0.53,0.3093665823461814,0.3016670242357716,0.0,0.2380952380952381,106,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-22,5034
-1.0,0.06818143419410627,0.0645557652165116,0.6,0.35005256364095544,0.3915886857834677,0.0,0.2380952380952381,107,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2019-12-29,8296
-1.0,0.19748095587743647,0.0,0.49,0.2866388037412839,0.4644891817948484,0.0,0.2380952380952381,108,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-05,10953
-1.0,0.2718903484441833,0.31632836028874944,0.42,0.38339772931601046,0.4758788391710054,0.0,0.2380952380952381,109,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-12,11583
-1.0,0.29329394272923165,0.710207473795361,0.56,0.4716341482535363,0.47415700741999534,0.0,0.2380952380952381,110,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2020-01-19,11650
-1.0,0.3150710926081645,0.6225458397661645,0.66,0.5560651882029227,0.2282082561307921,0.0,0.2380952380952381,111,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-01-26,10086
-1.0,0.23335326208386092,0.5093471390869946,0.65,0.5990392189890996,0.09128427138188955,0.0,0.2993197278911565,112,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-02,8454
-1.0,0.18339704064539092,0.46920681970876166,0.66,0.5097387360461574,0.03651393215188798,0.0,0.2993197278911565,113,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-09,7842
-1.0,0.1829206162885479,0.5702922924005152,0.64,0.3647117781342298,0.5333315970976881,0.0,0.2993197278911565,114,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-16,8528
-1.0,0.17708137647064887,0.4762803199026322,0.62,0.2994390381863003,0.9999999999999999,0.0,0.2993197278911565,115,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-02-23,9230
-1.0,0.2110785179466496,0.31643298954206356,0.65,0.318727924805625,0.5153399788387041,0.0,0.2993197278911565,116,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-01,8210
-1.0,0.1922309642774856,0.35110354589746834,0.65,0.3435805763353255,0.20613623376787482,0.0,1.0,117,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-08,6573
-1.0,0.1174971533357681,0.4397302099507956,0.64,0.37079693119819457,0.08245451214041095,0.0,1.0,118,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-15,4464
-1.0,0.04487177585471158,0.5651604986093057,0.66,0.3797815418753292,0.032981804856164386,3.6661729553753427e-06,1.0,119,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2020-03-22,5498
-1.0,0.04417426781579725,0.5142518574426083,0.77,0.3239901926717436,0.013192796475509808,0.00016497778299189042,1.0,120,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-03-29,7134
-1.0,0.09508966430933447,0.4246084040047787,1.0,0.22766051203571303,0.005277118590203924,0.01074555293220513,0.8979591836734694,121,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2020-04-05,6507
-1.0,0.1727148072921107,0.3306303340730278,0.92,0.2557126494916798,0.0021108474360815696,0.07506489126131015,0.8979591836734694,122,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2020-04-12,6752
-1.0,0.2757761792524949,0.9059477066272279,0.87,0.2910560761584964,0.0008443389744326279,0.11051311756683434,0.8979591836734694,123,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-19,7874
-1.0,0.46164669127102737,1.0,0.8200000000000001,0.29288325042575475,0.0003377355897730512,0.1323451775160945,0.8979591836734694,124,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-04-26,8706
-1.0,0.3631365926708698,0.8555262504044332,0.85,0.3143348639913703,0.00013509423590922048,0.12527679605813083,0.8979591836734694,125,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-03,9593
-1.0,0.3556269301486625,0.5998066602658987,0.8,0.3573452157072908,5.4838924587260594e-05,0.08418266340132861,0.7482993197278912,126,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-10,9554
-1.0,0.3898924329688705,0.31953123019194307,0.76,0.3492819601843694,0.08837696494340691,0.06699197841357364,0.7482993197278912,127,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-17,9461
-1.0,0.3270785638817633,0.5040802333471541,0.88,0.37224504100306005,0.12944061135952373,0.04806352744497074,0.7482993197278912,128,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-24,8347
-1.0,0.29596428185745655,0.6228739252579004,0.8300000000000001,0.3873711562094451,0.14079607140381442,0.028926104617911456,0.7482993197278912,129,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-05-31,7926
-1.0,0.23446621861142697,0.644779308361226,0.8,0.3519020717491842,0.15750706055823313,0.024482702995996537,0.6938775510204082,130,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-07,8606
-1.0,0.2202508917985891,0.726916988225644,0.71,0.32726146750928653,0.0797309833640819,0.022000703905207433,0.6938775510204082,131,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-14,7573
-1.0,0.18610614076735926,0.5963517592669729,0.73,0.31618831243754153,0.03501476889363339,0.015086301711369536,0.6938775510204082,132,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2020-06-21,6983
-1.0,0.1568177529621934,0.6764095796293655,0.75,0.2836099513597926,0.014005944823975384,0.011489786042146325,0.6938775510204082,133,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-06-28,6277
-1.0,0.22774801916471138,0.6466210070345804,0.72,0.25409997289933184,0.006272411362367827,0.00871449311492719,0.5714285714285715,134,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-05,7421
-1.0,0.24542124594101095,0.6580063264819511,0.73,0.2516667689694555,0.05947462601462651,0.008318546435746652,0.5714285714285715,135,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-12,7852
-1.0,0.24895270375190542,0.32749815383926373,0.68,0.2671053898526598,0.0888609058832765,0.008014254080450499,0.5714285714285715,136,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-19,7396
-1.0,0.16285259960994197,0.3666961464656464,0.78,0.26077100654286645,0.12420199588573878,0.008058248155915004,0.5714285714285715,137,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-07-26,7041
-1.0,0.16864346155569104,0.39341698388602436,0.84,0.25893225300958655,0.10423952696584138,0.00920209411799211,0.5714285714285715,138,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-02,7470
-1.0,0.22582910125625383,0.41507293852636135,0.8300000000000001,0.2528768986269057,0.08197739941078482,0.009315745479608745,0.5374149659863946,139,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-09,8725
-1.0,0.2778946696783185,0.7857143231388266,0.8,0.2772125371796957,0.07178679747906064,0.007237025413910927,0.5374149659863946,140,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-16,9657
-1.0,0.3062154076077969,0.434016630925742,0.87,0.33174759696083367,0.12078972986041582,0.006500124649880482,0.5374149659863946,141,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2020-08-23,10000
-1.0,0.2851073700683267,0.4051792323256236,0.8200000000000001,0.3621387745268235,0.1539969659046611,0.006118842662521447,0.5374149659863946,142,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-08-30,8941
-1.0,0.25999778433367665,0.4113785668398346,0.77,0.3604714968693371,0.1462622685965232,0.006375474769397721,0.4693877551020409,143,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-06,8507
-1.0,0.2947500457787596,0.43576671635701947,0.74,0.3084711376902622,0.1030893445960345,0.0060051913009048115,0.4693877551020409,144,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,2020-09-13,9887
-1.0,0.3239559328273078,0.40721834097732834,0.72,0.24061271129609485,0.08422768334333634,0.006456130574415978,0.4693877551020409,145,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-20,9627
-1.0,0.3189849597494306,0.4831656702512836,0.68,0.28577062852640756,0.054400116894051116,0.006401137980085348,0.4693877551020409,146,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-09-27,8735
-1.0,0.2930673557404469,0.5423730023996388,0.62,0.32330756771945346,0.02176006539088146,0.007566980979894707,0.45578231292517013,147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-04,8138
-1.0,0.27381401410957934,0.48862464971809444,0.59,0.33668984325037016,0.008704026156352586,0.009172764734349107,0.45578231292517013,148,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-11,7966
-1.0,0.21658154029531146,0.5162854532967293,0.55,0.44481231480084876,0.003481610462541034,0.012223020633221393,0.45578231292517013,149,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-18,8109
-1.0,0.21772903332032795,0.47368257634991157,0.6,0.46141705479304307,0.0013926441850164136,0.013601501664442522,0.45578231292517013,150,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-10-25,7848
-1.0,0.16712357438522701,0.5132571164009214,0.5,0.38402389059771924,0.0005570576740065655,0.012915927321787332,0.45578231292517013,151,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-01,6516
-1.0,0.1814031347156822,0.5409537987241609,0.5,0.2968208337801042,0.00022282306960262618,0.013091903623645349,0.45578231292517013,152,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-08,7233
-1.0,0.16852532779394064,0.49490997931858044,0.5,0.22663075929954526,8.912922784105048e-05,0.014624363918992243,0.45578231292517013,153,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-15,7409
-1.0,0.10492104198879731,0.4086344123814518,0.41000000000000003,0.21669561761817938,3.565169113642019e-05,0.016127494830696133,0.45578231292517013,154,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-22,6232
-1.0,0.16920169406380464,0.45151008168804235,0.49,0.21833619946593313,1.4260676454568076e-05,0.024849320291534072,0.45578231292517013,155,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-11-29,8170
-1.0,0.1305885456099783,0.4543635808918873,0.47000000000000003,0.1596898931167178,5.704270581827231e-06,0.03519159419864792,0.435374149659864,156,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-06,7075
-1.0,0.1214984593864375,0.35070760971315756,0.4,0.15417676852356046,2.2817082327308923e-06,0.041732046751037526,0.435374149659864,157,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-13,7379
-1.0,0.057042007816384965,0.32470890321593604,0.47000000000000003,0.15442387578570832,9.126832930923571e-07,0.049892947749703036,0.435374149659864,158,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-20,5442
-1.0,0.12406882983279183,0.3135816516054531,0.45,0.1671308209739812,3.650733172369429e-07,0.0686930826648678,0.435374149659864,159,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2020-12-27,7735
-1.0,0.24786523070013738,0.3102913429236421,0.42,0.16347790840061424,1.4602932689477716e-07,0.0732574679943101,0.435374149659864,160,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-03,9754
-1.0,0.26083059672146286,0.2649240941306087,0.34,0.25327016920452516,5.841173075791087e-08,0.07444897420480709,0.4217687074829932,161,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-10,10641
-1.0,0.24028847292133387,0.6513962629200784,0.38,0.3773812732234543,2.3364692303164347e-08,0.08318546435746653,0.4217687074829932,162,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2021-01-17,10230
-1.0,0.31526302386797916,0.531674302460824,0.47000000000000003,0.3527386460097067,9.345876921265738e-09,0.10258685163731283,0.4217687074829932,163,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-24,10352
-1.0,0.2966293410018717,0.44836670500794606,0.47000000000000003,0.3711695518795665,3.738350768506295e-09,0.13234151134313912,0.4217687074829932,164,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-01-31,9216
-1.0,0.20088776123137192,0.3815806999416851,0.45,0.33580461662371014,1.4953403074025183e-09,0.12043744775703538,0.40816326530612246,165,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-07,8421
-1.0,0.173394454128539,0.343687050600215,0.48,0.3277941002786073,5.981361229610074e-10,0.11271648751301491,0.40816326530612246,166,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-14,9281
-1.0,0.1777198044422716,0.33051072402008147,0.5,0.31487397296804576,2.3925444918440296e-10,0.109699227170741,0.40816326530612246,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-21,8891
-1.0,0.1850269016675808,0.30627520154343757,0.46,0.3133091660972597,9.570177967376119e-11,0.08255854878209734,0.40816326530612246,168,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-02-28,8169
-1.0,0.2529549962208855,0.298123038215738,0.42,0.3358964981168952,3.828071186950448e-11,0.08351908609640568,0.40816326530612246,169,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-07,8724
-1.0,0.213028120324469,0.3267901551549544,0.44,0.3038053348505854,1.531228474780179e-11,0.07285052279626343,0.40816326530612246,170,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-14,8194
-1.0,0.16441430466323353,0.25967469209260036,0.5,0.32087357753439977,6.124913899120717e-12,0.07822879852179906,0.40816326530612246,171,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-21,8254
-1.0,0.11053130189212229,0.260168451958828,0.42,0.3279459500984871,2.449965559648287e-12,0.07333812379932836,0.40816326530612246,172,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-03-28,7026
-1.0,0.06917021315146277,0.0,0.38,0.37411287881420296,9.799862238593149e-13,0.07465061371735272,0.39455782312925175,173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-04,6412
-1.0,0.06728264676731566,0.0,0.44,0.4347510050616973,3.9199448954372595e-13,0.0732721326861316,0.39455782312925175,174,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2021-04-11,6297
-1.0,0.10167805497311716,0.0,0.43,0.4574504815633023,1.5679779581749037e-13,0.07982724993034271,0.39455782312925175,175,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2021-04-18,6687
-1.0,0.1734619149834527,0.0,0.48,0.48912312446006045,6.271911832699615e-14,0.06941165256412136,0.39455782312925175,176,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-04-25,8430
-1.0,0.2040432878056308,0.0,0.46,0.44466429049983563,2.5087647330798465e-14,0.06276854716898124,0.39455782312925175,177,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-02,8025
-1.0,0.20788046814877387,0.0,0.48,0.5722675873212515,1.0035058932319387e-14,0.04882242524673344,0.40136054421768713,178,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-09,8242
-1.0,0.14929264058846564,0.0,0.5,0.45913415146070335,4.014023572927755e-15,0.033618806000791895,0.40136054421768713,179,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-16,8280
-1.0,0.11694210039888364,0.0,0.51,0.39528662679579885,1.6056094291711022e-15,0.025182942030473228,0.40136054421768713,180,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-23,7909
-1.0,0.055184035342337234,0.0,0.51,0.3880077087936407,6.422437716684409e-16,0.017652622780132275,0.40136054421768713,181,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-05-30,7574
-1.0,0.04358787034563821,0.0,0.5,0.3863265622647678,2.568975086673764e-16,0.012651962869000308,0.3673469387755103,182,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-06,7270
-1.0,0.03833609653008979,0.0,0.46,0.3784495643657444,1.0275900346695056e-16,0.008835476822454577,0.3673469387755103,183,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-13,6716
-1.0,0.06111263589867566,0.0,0.48,0.38862024435317233,4.1103601386780226e-17,0.005939200187708055,0.3673469387755103,184,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-06-20,6944
-1.0,0.07119833324643848,0.0,0.44,0.4039000969934476,1.644144055471209e-17,0.004967664354533589,0.3673469387755103,185,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2021-06-27,6803
-1.0,0.0659956847282599,0.0,0.45,0.4420872417106599,6.576576221884836e-18,0.004359079643941282,0.3537414965986395,186,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-04,7019
-1.0,0.12577031397293442,0.0,0.45,0.4950177419852857,2.630630488753935e-18,0.003977797656582247,0.3537414965986395,187,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-11,8254
-1.0,0.1502746019886232,0.0,0.45,0.5650602702260171,1.052252195501574e-18,0.0040621196345558795,0.3537414965986395,188,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-18,7804
-1.0,0.21001397285486328,0.0,0.42,0.594015126140436,4.209008782006296e-19,0.004952999662712088,0.3537414965986395,189,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-07-25,8212
-1.0,0.23464189851384848,0.0,0.46,0.5484130743981998,1.6836035128025183e-19,0.008076579020691881,0.3537414965986395,190,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-01,8378
-1.0,0.23496148203757855,0.0,0.47000000000000003,0.5324473242588711,6.734414051210074e-20,0.01220102359548914,0.3197278911564626,191,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-08,9496
-1.0,0.23319893582092505,0.0,0.53,0.5532778727756644,2.6937656204840295e-20,0.020152952735698258,0.3197278911564626,192,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-15,9511
-1.0,0.23262329847201318,0.0,0.49,0.7309984534528141,1.0775062481936118e-20,0.029028757460661962,0.3197278911564626,193,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-22,9569
-1.0,0.18495638415853394,0.0,0.46,0.8724050615489382,4.310024992774448e-21,0.03698435277382646,0.3197278911564626,194,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-08-29,7928
-1.0,0.2921700012245981,0.0,0.49,1.0,1.7240099971097793e-21,0.03982197064128697,0.3129251700680272,195,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-05,7840
-1.0,0.4172971677569805,0.0,0.48,0.8193686075762131,6.896039988439117e-22,0.03868179085216524,0.3129251700680272,196,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-12,9521
-1.0,0.5004920981884484,0.0,0.53,0.4496097944711011,2.758415995375647e-22,0.03902274493701515,0.3129251700680272,197,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-19,9451
-1.0,0.6383788968475093,0.0,0.47000000000000003,0.3701822126418114,1.1033663981502588e-22,0.03567186285580209,0.3129251700680272,198,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-09-26,8898
-1.0,0.6501651617929107,0.0,0.51,0.34258196039636274,4.413465592601035e-23,0.0352539191388893,0.3129251700680272,199,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-03,8441
-1.0,0.6649283374522998,0.0,0.51,0.31355701111053985,1.7653862370404143e-23,0.03635010485254652,0.28571428571428575,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-10,8788
-1.0,0.6097114754591861,0.0,0.51,0.32306971094469733,7.061544948161657e-24,0.031323781730726925,0.28571428571428575,201,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-17,9569
-1.0,0.3964279757062242,0.0,0.51,0.33051520280988034,2.8246179792646632e-24,0.02719933715592967,0.28571428571428575,202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-24,9008
-1.0,0.33105364706311086,0.0,0.47000000000000003,0.3259978333423606,1.1298471917058652e-24,0.025967503042923553,0.28571428571428575,203,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-10-31,8495
-1.0,0.31714045716637634,0.0,0.55,0.3045528431182349,4.519388766823461e-25,0.02263128565353199,0.2653061224489796,204,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-07,8807
-1.0,0.28268319082761023,0.0,0.49,0.31370309424641213,1.8077555067293845e-25,0.01786159463858867,0.2653061224489796,205,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-14,8385
-1.0,0.15774740707436136,0.0,0.51,0.37945364695975814,7.231022026917538e-26,0.016409790148260033,0.2653061224489796,206,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-21,6964
-1.0,0.2836203500514554,0.0,0.55,0.36793503370466,2.892408810767015e-26,0.01882946429880776,0.2653061224489796,207,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-11-28,9340
-1.0,0.33646919882766096,0.0,0.49,0.3299836196379579,1.1569635243068062e-26,0.023555161238286576,0.272108843537415,208,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-05,8632
-1.0,0.361268166630245,0.0,0.38,0.3243428164088717,4.6278540972272255e-27,0.029421037966887126,0.272108843537415,209,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-12,9271
-1.0,0.21850759166298056,0.0,0.51,0.34100191273497404,1.8511416388908902e-27,0.029549354020325262,0.272108843537415,210,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-19,7663
-1.0,0.2156152088113536,0.0,0.43,0.3876459690915292,7.404566555563562e-28,0.04853646375621416,0.272108843537415,211,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2021-12-26,7888
-1.0,0.4122692273972545,0.0,0.42,0.44121852053456856,2.961826622225425e-28,0.07303383144403221,0.272108843537415,212,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-02,11088
-1.0,0.5580863257308297,0.0,0.42,0.33648328199770844,1.18473064889017e-28,0.2914790808171166,0.2585034013605442,213,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-09,12850
-1.0,0.5441541455767391,0.0,0.45,0.5258301345263098,4.7389225955606806e-29,0.6228644542534939,0.2585034013605442,214,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-16,12768
-1.0,0.37953926965668333,0.0,0.51,0.6191133700101356,1.8955690382242722e-29,1.0,0.2585034013605442,215,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2022-01-23,11023
-1.0,0.3422525462363791,0.0,0.5,0.6600516747429145,7.582276152897087e-30,0.8603298089190655,0.2585034013605442,216,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-01-30,10317
-1.0,0.3679329127754763,0.0,0.49,0.6150147631969254,3.0329104611588346e-30,0.3851571321728674,0.2448979591836735,217,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-02-06,10109
-1.0,0.3530129569359208,0.0,0.49,0.5435710104633258,1.2131641844635335e-30,0.18207314748280565,0.2448979591836735,218,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-13,10233
-1.0,0.3628237688509028,0.0,0.48,0.5395383650448762,4.852656737854129e-31,0.08532284319045035,0.2448979591836735,219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-20,10660
-1.0,0.3535562124344392,0.0,0.49,0.3713089856353334,1.941062695141646e-31,0.04778123212740684,0.2448979591836735,220,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-02-27,9862
-1.0,0.35851767100446613,0.0,0.49,0.33021424233802193,7.764250780566529e-32,0.028365180155739026,0.2448979591836735,221,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-06,10393
-1.0,0.3648140365425708,0.0,0.53,0.29899648842829235,3.105700312226557e-32,0.019053100849085656,0.2448979591836735,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-13,9914
-1.0,0.417768904168966,0.0,0.46,0.30801461857263196,1.242280124890568e-32,0.014096435013418193,0.2448979591836735,223,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2022-03-20,11027
-1.0,0.45364666714531404,0.0,0.5,0.29874033139572204,4.9691204995617213e-33,0.013440190054406007,0.2448979591836735,224,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-03-27,10066
-1.0,0.45997433293937545,0.0,0.45,0.3080341285301519,1.9876481998241388e-33,0.014672024167412121,0.2448979591836735,225,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-03,8722
-1.0,0.4245480429075594,0.0,0.46,0.304189689538618,7.950592799291056e-34,0.01936472555029256,0.2448979591836735,226,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,2022-04-10,7805
-1.0,0.4463068738641009,0.0,0.54,0.307818077305473,3.1802371197109226e-34,0.027822586558343475,0.2448979591836735,227,0,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,2022-04-17,8519
-1.0,0.6012222981571669,0.0,0.53,0.29394180576819906,1.272094847878869e-34,0.033340176856183366,0.2448979591836735,228,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-04-24,10084
-1.0,0.6804106164543928,0.0,0.5,0.28219281269675367,5.088379391460478e-35,0.04576117082899503,0.2448979591836735,229,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-01,10291
-1.0,0.62805714350389,0.0,0.54,0.30839694661979145,2.035351756529193e-35,0.05172603422739071,0.2448979591836735,230,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-08,9743
-1.0,0.7470007501508245,0.0,0.54,0.3120111152265925,8.141407025566787e-36,0.04952999662712088,0.2448979591836735,231,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-15,10759
-1.0,0.6460736106378411,0.0,0.55,0.2905779236460707,3.25656280967673e-36,0.06457597043598129,0.2448979591836735,232,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-22,9845
-1.0,0.5732108245519132,0.0,0.52,0.38068837954927237,1.3026251233207076e-36,0.080201199571791,0.2448979591836735,233,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-05-29,9499
-1.0,0.5996683384067256,0.0,0.5,0.3940488499594224,5.210500487782985e-37,0.09049581323048496,0.40680272108843546,234,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-05,10021
-1.0,0.5630659455826548,0.0,0.54,0.4539755399873685,2.0842001896133483e-37,0.09128037424293528,0.40680272108843546,235,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-12,10112
-1.0,0.5482324249484887,0.0,0.45,0.48814019600803654,8.336800703454939e-38,0.08289217052103649,0.40680272108843546,236,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-06-19,10034
-1.0,0.5485743918729864,0.0,0.47000000000000003,0.475428506654356,3.3347202263835196e-38,0.06987359035649866,0.40680272108843546,237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2022-06-26,9209
-1.0,0.5559932625646005,0.0,0.43,0.510072176038165,1.333888035554951e-38,0.06264756346145385,0.40680272108843546,238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-03,10265
-1.0,0.6089718159266746,0.0,0.45,0.44215508529036335,5.33555159223524e-39,0.0627612148230705,0.40680272108843546,239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-10,10033
-1.0,0.6101706458097598,0.0,0.48,0.41550269661979555,2.1342200869095313e-39,0.07072780865510112,0.40680272108843546,240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-17,9790
-1.0,0.6111403594460636,0.0,0.44,0.437146146258812,8.536874847792479e-40,0.07964760745552932,0.40680272108843546,241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-24,9629
-1.0,0.6451477728019566,0.0,0.44,0.4975101423754845,3.4147444392713438e-40,0.0893739643061401,0.40680272108843546,242,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-07-31,10134
-1.0,0.7267513590970145,0.0,0.44,0.5042632593424633,1.3658922758628901e-40,0.09389435556011791,0.40680272108843546,243,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-07,12029
-1.0,0.832744074444703,0.0,0.46,0.5840915039533217,5.463514104995084e-41,0.08482790984147467,0.40680272108843546,244,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-14,12886
-1.0,0.8546151893753493,0.0,0.49,0.6374603327364593,2.1853506435415578e-41,0.07962194424484169,0.40680272108843546,245,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-21,12027
-1.0,0.9999999999999998,0.0,0.55,0.6022458246191313,8.740852589601472e-42,0.07178366646624922,0.40680272108843546,246,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-08-28,11375
-1.0,0.860672618209781,0.0,0.48,0.5735957859704555,3.495791051275827e-42,0.05725095687114135,0.40680272108843546,247,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-04,10824
-1.0,0.8622728019659036,0.0,0.54,0.5790428094946118,1.39776643594557e-42,0.050739833702394745,0.40680272108843546,248,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-11,12285
-1.0,0.7774120906393625,0.0,0.55,0.7618650061054455,5.585565898134668e-43,0.0440857297883885,0.40680272108843546,249,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-18,12146
-1.0,0.6580209603679659,0.0,0.52,0.8137272725878776,2.2287265136062566e-43,0.039975949905412735,0.40680272108843546,250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-09-25,10881
-1.0,0.9480011027127861,0.0,0.52,0.7867690657367606,8.859907597948911e-44,0.03648941942485079,0.40680272108843546,251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-02,11373
-1.0,0.709096498806814,0.0,0.46,0.7292818780372798,3.4889645827034517e-44,0.04076784326377381,0.40680272108843546,252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-09,10230
-1.0,0.5414415970743589,0.0,0.45,0.6974583695681711,1.340587376605267e-44,0.04368978310920796,0.0,253,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-16,11557
-1.0,0.6081525119323576,0.0,0.54,0.6240593695822464,4.812364941659934e-45,0.041156457597043596,0.0,254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-23,10805
-1.0,0.5960421531458853,0.0,0.45,0.5899287906913332,1.3749614119028383e-45,0.03843982343711047,0.0,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-10-30,9709
-1.0,0.848521629204434,0.0,0.47000000000000003,0.6201930426013046,0.0,0.040723849188309305,0.0,256,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2022-11-06,10098