Spaces:

BlendMMM
/

Mastercard

Sleeping

App Files Files Community

BlendMMM commited on Apr 16

Commit

bb080e9

•

1 Parent(s): 45dc323

Upload 28 files

Browse files

Files changed (6) hide show

Data_Import.py +211 -846
README.md +1 -1
classes.py +106 -130
upf_data_converted.csv +0 -0
upf_data_converted.xlsx +2 -2
utilities.py +263 -534

Data_Import.py CHANGED Viewed

@@ -1,58 +1,79 @@
 # Importing necessary libraries
 import streamlit as st
 st.set_page_config(
-    page_title="Data Import",
     page_icon=":shark:",
     layout="wide",
     initial_sidebar_state="collapsed",
 )
-import pickle
 import pandas as pd
 from utilities import set_header, load_local_css
-import streamlit_authenticator as stauth
-import yaml
-from yaml import SafeLoader
 load_local_css("styles.css")
 set_header()
 for k, v in st.session_state.items():
-    if k not in ["logout", "login", "config"] and not k.startswith(
-        "FormSubmitter"
-    ):
         st.session_state[k] = v
-with open("config.yaml") as file:
-    config = yaml.load(file, Loader=SafeLoader)
-    st.session_state["config"] = config
-authenticator = stauth.Authenticate(
-    config["credentials"],
-    config["cookie"]["name"],
-    config["cookie"]["key"],
-    config["cookie"]["expiry_days"],
-    config["preauthorized"],
-)
-st.session_state["authenticator"] = authenticator
-name, authentication_status, username = authenticator.login("Login", "main")
-auth_status = st.session_state.get("authentication_status")
-if auth_status == True:
-    authenticator.logout("Logout", "main")
-    is_state_initiaized = st.session_state.get("initialized", False)
     if not is_state_initiaized:
-        if 'session_name' not in st.session_state:
-            st.session_state['session_name']=None
-# Function to validate date column in dataframe
     def validate_date_column(df):
         try:
             # Attempt to convert the 'Date' column to datetime
-            df["date"] = pd.to_datetime(df["date"], format="%d-%m-%Y")
             return True
         except:
             return False
@@ -70,786 +91,196 @@ if auth_status == True:
             return "irregular"
-    # Function to read each uploaded Excel file into a pandas DataFrame and stores them in a dictionary
-    st.cache_resource(show_spinner=False)
-    def files_to_dataframes(uploaded_files):
-        df_dict = {}
-        for uploaded_file in uploaded_files:
-            # Extract file name without extension
-            file_name = uploaded_file.name.rsplit(".", 1)[0]
-            # Check for duplicate file names
-            if file_name in df_dict:
-                st.warning(
-                    f"Duplicate File: {file_name}. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Read the file into a DataFrame
-            df = pd.read_excel(uploaded_file)
-            # Convert all column names to lowercase
-            df.columns = df.columns.str.lower().str.strip()
-            # Separate numeric and non-numeric columns
-            numeric_cols = list(df.select_dtypes(include=["number"]).columns)
-            non_numeric_cols = [
-                col
-                for col in df.select_dtypes(exclude=["number"]).columns
-                if col.lower() != "date"
-            ]
-            # Check for 'Date' column
-            if not (validate_date_column(df) and len(numeric_cols) > 0):
-                st.warning(
-                    f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Check for interval
-            common_freq = common_freq = (
-                pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
-            )
-            # Calculate the data interval (daily, weekly, monthly or irregular)
-            interval = determine_data_interval(common_freq)
-            if interval == "irregular":
-                st.warning(
-                    f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
-                    icon="⚠️",
-                )
-                continue
-            # Store both DataFrames in the dictionary under their respective keys
-            df_dict[file_name] = {
-                "numeric": numeric_cols,
-                "non_numeric": non_numeric_cols,
-                "interval": interval,
-                "df": df,
-            }
-        return df_dict
-    # Function to adjust dataframe granularity
-    def adjust_dataframe_granularity(df, current_granularity, target_granularity):
-        # Set index
-        df.set_index("date", inplace=True)
-        # Define aggregation rules for resampling
-        aggregation_rules = {
-            col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
-            for col in df.columns
-        }
-        # Initialize resampled_df
-        resampled_df = df
-        if current_granularity == "daily" and target_granularity == "weekly":
-            resampled_df = df.resample("W-MON", closed="left", label="left").agg(
-                aggregation_rules
-            )
-        elif current_granularity == "daily" and target_granularity == "monthly":
-            resampled_df = df.resample("MS", closed="left", label="left").agg(
-                aggregation_rules
-            )
-        elif current_granularity == "daily" and target_granularity == "daily":
-            resampled_df = df.resample("D").agg(aggregation_rules)
-        elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
-            # For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
-            expanded_data = []
             for _, row in df.iterrows():
-                if current_granularity == "weekly":
-                    period_range = pd.date_range(start=row.name, periods=7)
-                elif current_granularity == "monthly":
-                    period_range = pd.date_range(
-                        start=row.name, periods=row.name.days_in_month
-                    )
-                for date in period_range:
-                    new_row = {}
                     for col in df.columns:
-                        if pd.api.types.is_numeric_dtype(df[col]):
-                            if current_granularity == "weekly":
-                                new_row[col] = row[col] / 7
-                            elif current_granularity == "monthly":
-                                new_row[col] = row[col] / row.name.days_in_month
-                        else:
-                            new_row[col] = row[col]
-                    expanded_data.append((date, new_row))
-            resampled_df = pd.DataFrame(
-                [data for _, data in expanded_data],
-                index=[date for date, _ in expanded_data],
-            )
-        # Reset index
-        resampled_df = resampled_df.reset_index().rename(columns={"index": "date"})
-        return resampled_df
-    # Function to clean and extract unique values of Panel_1 and Panel_2
-    st.cache_resource(show_spinner=False)
-    def clean_and_extract_unique_values(files_dict, selections):
-        all_panel1_values = set()
-        all_panel2_values = set()
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"]
-            # 'Panel_1' and 'Panel_2' selections
-            selected_panel1 = selections[file_name].get("Panel_1")
-            selected_panel2 = selections[file_name].get("Panel_2")
-            # Clean and standardize Panel_1 column if it exists and is selected
-            if (
-                selected_panel1
-                and selected_panel1 != "N/A"
-                and selected_panel1 in df.columns
-            ):
-                df[selected_panel1] = (
-                    df[selected_panel1].str.lower().str.strip().str.replace("_", " ")
-                )
-                all_panel1_values.update(df[selected_panel1].dropna().unique())
-            # Clean and standardize Panel_2 column if it exists and is selected
-            if (
-                selected_panel2
-                and selected_panel2 != "N/A"
-                and selected_panel2 in df.columns
-            ):
-                df[selected_panel2] = (
-                    df[selected_panel2].str.lower().str.strip().str.replace("_", " ")
-                )
-                all_panel2_values.update(df[selected_panel2].dropna().unique())
-            # Update the processed DataFrame back in the dictionary
-            files_dict[file_name]["df"] = df
-        return all_panel1_values, all_panel2_values
-    # Function to format values for display
-    st.cache_resource(show_spinner=False)
-    def format_values_for_display(values_list):
-        # Capitalize the first letter of each word and replace underscores with spaces
-        formatted_list = [value.replace("_", " ").title() for value in values_list]
-        # Join values with commas and 'and' before the last value
-        if len(formatted_list) > 1:
-            return ", ".join(formatted_list[:-1]) + ", and " + formatted_list[-1]
-        elif formatted_list:
-            return formatted_list[0]
-        return "No values available"
-    # Function to normalizes all data within files_dict to a daily granularity
-    st.cache(show_spinner=False, allow_output_mutation=True)
-    def standardize_data_to_daily(files_dict, selections):
-        # Normalize all data to a daily granularity using a provided function
-        files_dict = apply_granularity_to_all(files_dict, "daily", selections)
-        # Update the "interval" attribute for each dataset to indicate the new granularity
-        for files_name, files_data in files_dict.items():
-            files_data["interval"] = "daily"
-        return files_dict
-    # Function to apply granularity transformation to all DataFrames in files_dict
-    st.cache_resource(show_spinner=False)
-    def apply_granularity_to_all(files_dict, granularity_selection, selections):
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"].copy()
-            # Handling when Panel_1 or Panel_2 might be 'N/A'
-            selected_panel1 = selections[file_name].get("Panel_1")
-            selected_panel2 = selections[file_name].get("Panel_2")
-            # Correcting the segment selection logic & handling 'N/A'
-            if selected_panel1 != "N/A" and selected_panel2 != "N/A":
-                unique_combinations = df[
-                    [selected_panel1, selected_panel2]
-                ].drop_duplicates()
-            elif selected_panel1 != "N/A":
-                unique_combinations = df[[selected_panel1]].drop_duplicates()
-                selected_panel2 = None  # Ensure Panel_2 is ignored if N/A
-            elif selected_panel2 != "N/A":
-                unique_combinations = df[[selected_panel2]].drop_duplicates()
-                selected_panel1 = None  # Ensure Panel_1 is ignored if N/A
             else:
-                # If both are 'N/A', process the entire dataframe as is
-                df = adjust_dataframe_granularity(
-                    df, file_data["interval"], granularity_selection
-                )
-                files_dict[file_name]["df"] = df
-                continue  # Skip to the next file
-            transformed_segments = []
-            for _, combo in unique_combinations.iterrows():
-                if selected_panel1 and selected_panel2:
-                    segment = df[
-                        (df[selected_panel1] == combo[selected_panel1])
-                        & (df[selected_panel2] == combo[selected_panel2])
-                    ]
-                elif selected_panel1:
-                    segment = df[df[selected_panel1] == combo[selected_panel1]]
-                elif selected_panel2:
-                    segment = df[df[selected_panel2] == combo[selected_panel2]]
-                # Adjust granularity of the segment
-                transformed_segment = adjust_dataframe_granularity(
-                    segment, file_data["interval"], granularity_selection
                 )
-                transformed_segments.append(transformed_segment)
-            # Combine all transformed segments into a single DataFrame for this file
-            transformed_df = pd.concat(transformed_segments, ignore_index=True)
-            files_dict[file_name]["df"] = transformed_df
-        return files_dict
-    # Function to create main dataframe structure
-    st.cache_resource(show_spinner=False)
-    def create_main_dataframe(
-        files_dict, all_panel1_values, all_panel2_values, granularity_selection
-    ):
-        # Determine the global start and end dates across all DataFrames
-        global_start = min(df["df"]["date"].min() for df in files_dict.values())
-        global_end = max(df["df"]["date"].max() for df in files_dict.values())
-        # Adjust the date_range generation based on the granularity_selection
-        if granularity_selection == "weekly":
-            # Generate a weekly range, with weeks starting on Monday
-            date_range = pd.date_range(start=global_start, end=global_end, freq="W-MON")
-        elif granularity_selection == "monthly":
-            # Generate a monthly range, starting from the first day of each month
-            date_range = pd.date_range(start=global_start, end=global_end, freq="MS")
-        else:  # Default to daily if not weekly or monthly
-            date_range = pd.date_range(start=global_start, end=global_end, freq="D")
-        # Collect all unique Panel_1 and Panel_2 values, excluding 'N/A'
-        all_panel1s = all_panel1_values
-        all_panel2s = all_panel2_values
-        # Dynamically build the list of dimensions (Panel_1, Panel_2) to include in the main DataFrame based on availability
-        dimensions, merge_keys = [], []
-        if all_panel1s:
-            dimensions.append(all_panel1s)
-            merge_keys.append("Panel_1")
-        if all_panel2s:
-            dimensions.append(all_panel2s)
-            merge_keys.append("Panel_2")
-        dimensions.append(date_range)  # Date range is always included
-        merge_keys.append("date")  # Date range is always included
-        # Create a main DataFrame template with the dimensions
-        main_df = pd.MultiIndex.from_product(
-            dimensions,
-            names=[name for name, _ in zip(merge_keys, dimensions)],
-        ).to_frame(index=False)
-        return main_df.reset_index(drop=True)
-    # Function to prepare and merge dataFrames
-    st.cache_resource(show_spinner=False)
-    def merge_into_main_df(main_df, files_dict, selections):
-        for file_name, file_data in files_dict.items():
-            df = file_data["df"].copy()
-            # Rename selected Panel_1 and Panel_2 columns if not 'N/A'
-            selected_panel1 = selections[file_name].get("Panel_1", "N/A")
-            selected_panel2 = selections[file_name].get("Panel_2", "N/A")
-            if selected_panel1 != "N/A":
-                df.rename(columns={selected_panel1: "Panel_1"}, inplace=True)
-            if selected_panel2 != "N/A":
-                df.rename(columns={selected_panel2: "Panel_2"}, inplace=True)
-            # Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel_1' and 'Panel_2'
-            merge_keys = ["date"]
-            if "Panel_1" in df.columns:
-                merge_keys.append("Panel_1")
-            if "Panel_2" in df.columns:
-                merge_keys.append("Panel_2")
-            main_df = pd.merge(main_df, df, on=merge_keys, how="left")
-        # After all merges, sort by 'date' and reset index for cleanliness
-        sort_by = ["date"]
-        if "Panel_1" in main_df.columns:
-            sort_by.append("Panel_1")
-        if "Panel_2" in main_df.columns:
-            sort_by.append("Panel_2")
-        main_df.sort_values(by=sort_by, inplace=True)
-        main_df.reset_index(drop=True, inplace=True)
-        return main_df
-    # Function to categorize column
-    def categorize_column(column_name):
-        # Define keywords for each category
-        internal_keywords = [
-            "Price",
-            "Discount",
-            "product_price",
-            "cost",
-            "margin",
-            "inventory",
-            "sales",
-            "revenue",
-            "turnover",
-            "expense",
-        ]
-        exogenous_keywords = [
-            "GDP",
-            "Tax",
-            "Inflation",
-            "interest_rate",
-            "employment_rate",
-            "exchange_rate",
-            "consumer_spending",
-            "retail_sales",
-            "oil_prices",
-            "weather",
-        ]
-        # Check if the column name matches any of the keywords for Internal or Exogenous categories
-        for keyword in internal_keywords:
-            if keyword.lower() in column_name.lower():
-                return "Internal"
-        for keyword in exogenous_keywords:
-            if keyword.lower() in column_name.lower():
-                return "Exogenous"
-        # Default to Media if no match found
-        return "Media"
-    # Function to calculate missing stats and prepare for editable DataFrame
-    st.cache_resource(show_spinner=False)
     def prepare_missing_stats_df(df):
         missing_stats = []
         for column in df.columns:
             if (
-                column == "date" or column == "Panel_2" or column == "Panel_1"
-            ):  # Skip Date, Panel_1 and Panel_2 column
                 continue
             missing = df[column].isnull().sum()
             pct_missing = round((missing / len(df)) * 100, 2)
-            # Dynamically assign category based on column name
-            category = categorize_column(column)
-            # category = "Media"  # Keep default bin as Media
             missing_stats.append(
                 {
                     "Column": column,
                     "Missing Values": missing,
                     "Missing Percentage": pct_missing,
                     "Impute Method": "Fill with 0",  # Default value
-                    "Category": category,
                 }
             )
         stats_df = pd.DataFrame(missing_stats)
         return stats_df
-    # Function to add API DataFrame details to the files dictionary
-    st.cache_resource(show_spinner=False)
-    def add_api_dataframe_to_dict(main_df, files_dict):
-        files_dict["API"] = {
-            "numeric": list(main_df.select_dtypes(include=["number"]).columns),
-            "non_numeric": [
-                col
-                for col in main_df.select_dtypes(exclude=["number"]).columns
-                if col.lower() != "date"
-            ],
-            "interval": determine_data_interval(
-                pd.Series(main_df["date"].unique()).diff().dt.days.dropna().mode()[0]
-            ),
-            "df": main_df,
-        }
-        return files_dict
-    # Function to reads an API into a DataFrame, parsing specified columns as datetime
-    @st.cache_resource(show_spinner=False)
-    def read_API_data():
-        return pd.read_excel("upf_data_converted_randomized_resp_metrics.xlsx", parse_dates=["Date"])
-    # Function to set the 'Panel_1_Panel_2_Selected' session state variable to False
-    def set_Panel_1_Panel_2_Selected_false():
-        st.session_state["Panel_1_Panel_2_Selected"] = False
-    # Function to serialize and save the objects into a pickle file
-    @st.cache_resource(show_spinner=False)
-    def save_to_pickle(file_path, final_df, bin_dict):
-        # Open the file in write-binary mode and dump the objects
-        with open(file_path, "wb") as f:
-            pickle.dump({"final_df": final_df, "bin_dict": bin_dict}, f)
-            # Data is now saved to file
-    # Function to processes the merged_df DataFrame based on operations defined in edited_df
-    @st.cache_resource(show_spinner=False)
-    def process_dataframes(merged_df, edited_df, edited_stats_df):
-        # Ensure there are operations defined by the user
-        if edited_df.empty:
-            return merged_df, edited_stats_df  # No operations to apply
-        # Perform operations as defined by the user
-        for index, row in edited_df.iterrows():
-            result_column_name = f"{row['Column 1']}{row['Operator']}{row['Column 2']}"
-            col1 = row["Column 1"]
-            col2 = row["Column 2"]
-            op = row["Operator"]
-            # Apply the specified operation
-            if op == "+":
-                merged_df[result_column_name] = merged_df[col1] + merged_df[col2]
-            elif op == "-":
-                merged_df[result_column_name] = merged_df[col1] - merged_df[col2]
-            elif op == "*":
-                merged_df[result_column_name] = merged_df[col1] * merged_df[col2]
-            elif op == "/":
-                merged_df[result_column_name] = merged_df[col1] / merged_df[col2].replace(
-                    0, 1e-9
-                )
-            # Add summary of operation to edited_stats_df
-            new_row = {
-                "Column": result_column_name,
-                "Missing Values": None,
-                "Missing Percentage": None,
-                "Impute Method": None,
-                "Category": row["Category"],
-            }
-            new_row_df = pd.DataFrame([new_row])
-            # Use pd.concat to add the new_row_df to edited_stats_df
-            edited_stats_df = pd.concat(
-                [edited_stats_df, new_row_df], ignore_index=True, axis=0
-            )
-        # Combine column names from edited_df for cleanup
-        combined_columns = set(edited_df["Column 1"]).union(set(edited_df["Column 2"]))
-        # Filter out rows in edited_stats_df and drop columns from merged_df
-        edited_stats_df = edited_stats_df[~edited_stats_df["Column"].isin(combined_columns)]
-        merged_df.drop(columns=list(combined_columns), errors="ignore", inplace=True)
-        return merged_df, edited_stats_df
-    # Function to prepare a list of numeric column names and initialize an empty DataFrame with predefined structure
-    st.cache_resource(show_spinner=False)
-    def prepare_numeric_columns_and_default_df(merged_df, edited_stats_df):
-        # Get columns categorized as 'Response Metrics'
-        columns_response_metrics = edited_stats_df[
-            edited_stats_df["Category"] == "Response Metrics"
-        ]["Column"].tolist()
-        # Filter numeric columns, excluding those categorized as 'Response Metrics'
-        numeric_columns = [
-            col
-            for col in merged_df.select_dtypes(include=["number"]).columns
-            if col not in columns_response_metrics
-        ]
-        # Define the structure of the empty DataFrame
-        data = {
-            "Column 1": pd.Series([], dtype="str"),
-            "Operator": pd.Series([], dtype="str"),
-            "Column 2": pd.Series([], dtype="str"),
-            "Category": pd.Series([], dtype="str"),
-        }
-        default_df = pd.DataFrame(data)
-        return numeric_columns, default_df
-    # Initialize 'final_df' in session state
-    if "final_df" not in st.session_state:
-        st.session_state["final_df"] = pd.DataFrame()
-    # Initialize 'bin_dict' in session state
-    if "bin_dict" not in st.session_state:
-        st.session_state["bin_dict"] = {}
-    # Initialize 'Panel_1_Panel_2_Selected' in session state
-    if "Panel_1_Panel_2_Selected" not in st.session_state:
-        st.session_state["Panel_1_Panel_2_Selected"] = False
-    # Page Title
-    st.write("")  # Top padding
-    st.title("Data Import")
-    #########################################################################################################################################################
-    # Create a dictionary to hold all DataFrames and collect user input to specify "Panel_2" and "Panel_1" columns for each file
-    #########################################################################################################################################################
-    # Read the Excel file, parsing 'Date' column as datetime
-    main_df = read_API_data()
-    # Convert all column names to lowercase
-    main_df.columns = main_df.columns.str.lower().str.strip()
-    # File uploader
-    uploaded_files = st.file_uploader(
-        "Upload additional data",
-        type=["xlsx"],
-        accept_multiple_files=True,
-        on_change=set_Panel_1_Panel_2_Selected_false,
-    )
-    # Custom HTML for upload instructions
-    recommendation_html = f"""
-    <div style="text-align: justify;">
-    <strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
-    </div>
-    """
-    st.markdown(recommendation_html, unsafe_allow_html=True)
-    # Choose Desired Granularity
-    st.markdown("#### Choose Desired Granularity")
-    # Granularity Selection
-    granularity_selection = st.selectbox(
-        "Choose Date Granularity",
-        ["Daily", "Weekly", "Monthly"],
-        label_visibility="collapsed",
-        on_change=set_Panel_1_Panel_2_Selected_false,
-    )
-    granularity_selection = str(granularity_selection).lower()
-    # Convert files to dataframes
-    files_dict = files_to_dataframes(uploaded_files)
-    # Add API Dataframe
-    if main_df is not None:
-        files_dict = add_api_dataframe_to_dict(main_df, files_dict)
-    # Display a warning message if no files have been uploaded and halt further execution
-    if not files_dict:
-        st.warning(
-            "Please upload at least one file to proceed.",
-            icon="⚠️",
-        )
-        st.stop()  # Halts further execution until file is uploaded
-    # Select Panel_1 and Panel_2 columns
-    st.markdown("#### Select Panel columns")
-    selections = {}
-    with st.expander("Select Panel columns", expanded=False):
-        count = 0  # Initialize counter to manage the visibility of labels and keys
-        for file_name, file_data in files_dict.items():
-            # Determine visibility of the label based on the count
-            if count == 0:
-                label_visibility = "visible"
-            else:
-                label_visibility = "collapsed"
-            # Extract non-numeric columns
-            non_numeric_cols = file_data["non_numeric"]
-            # Prepare Panel_1 and Panel_2 values for dropdown, adding "N/A" as an option
-            panel1_values = non_numeric_cols + ["N/A"]
-            panel2_values = non_numeric_cols + ["N/A"]
-            # Skip if only one option is available
-            if len(panel1_values) == 1 and len(panel2_values) == 1:
-                selected_panel1, selected_panel2 = "N/A", "N/A"
-                # Update the selections for Panel_1 and Panel_2 for the current file
-                selections[file_name] = {
-                    "Panel_1": selected_panel1,
-                    "Panel_2": selected_panel2,
-                }
-                continue
-            # Create layout columns for File Name, Panel_2, and Panel_1 selections
-            file_name_col, Panel_1_col, Panel_2_col = st.columns([2, 4, 4])
-            with file_name_col:
-                # Display "File Name" label only for the first file
-                if count == 0:
-                    st.write("File Name")
-                else:
-                    st.write("")
-                st.write(file_name)  # Display the file name
-            with Panel_1_col:
-                # Display a selectbox for Panel_1 values
-                selected_panel1 = st.selectbox(
-                    "Select Panel Level 1",
-                    panel2_values,
-                    on_change=set_Panel_1_Panel_2_Selected_false,
-                    label_visibility=label_visibility,  # Control visibility of the label
-                    key=f"Panel_1_selectbox{count}",  # Ensure unique key for each selectbox
-                )
-            with Panel_2_col:
-                # Display a selectbox for Panel_2 values
-                selected_panel2 = st.selectbox(
-                    "Select Panel Level 2",
-                    panel1_values,
-                    on_change=set_Panel_1_Panel_2_Selected_false,
-                    label_visibility=label_visibility,  # Control visibility of the label
-                    key=f"Panel_2_selectbox{count}",  # Ensure unique key for each selectbox
-                )
-            # Skip processing if the same column is selected for both Panel_1 and Panel_2 due to potential data integrity issues
-            if selected_panel2 == selected_panel1 and not (
-                selected_panel2 == "N/A" and selected_panel1 == "N/A"
-            ):
-                st.warning(
-                    f"File: {file_name} → The same column cannot serve as both Panel_1 and Panel_2. Please adjust your selections.",
-                )
-                selected_panel1, selected_panel2 = "N/A", "N/A"
-                st.stop()
-            # Update the selections for Panel_1 and Panel_2 for the current file
-            selections[file_name] = {
-                "Panel_1": selected_panel1,
-                "Panel_2": selected_panel2,
-            }
-            count += 1  # Increment the counter after processing each file
-        # Accept Panel_1 and Panel_2 selection
-        if st.button("Accept and Process", use_container_width=True):
-            # Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
-            with st.spinner("Processing..."):
-                files_dict = standardize_data_to_daily(files_dict, selections)
-                # Convert all data to daily level granularity
-                files_dict = apply_granularity_to_all(
-                    files_dict, granularity_selection, selections
-                )
-            # Update the 'files_dict' in the session state
-            st.session_state["files_dict"] = files_dict
-            # Set a flag in the session state to indicate that selection has been made
-            st.session_state["Panel_1_Panel_2_Selected"] = True
-    #########################################################################################################################################################
-    # Display unique Panel_1 and Panel_2 values
-    #########################################################################################################################################################
-    # Halts further execution until Panel_1 and Panel_2 columns are selected
-    if "files_dict" in st.session_state and st.session_state["Panel_1_Panel_2_Selected"]:
-        files_dict = st.session_state["files_dict"]
-    else:
-        st.stop()
-    # Set to store unique values of Panel_1 and Panel_2
-    with st.spinner("Fetching Panel values..."):
-        all_panel1_values, all_panel2_values = clean_and_extract_unique_values(
-            files_dict, selections
-        )
-        # List of Panel_1 and Panel_2 columns unique values
-        list_of_all_panel1_values = list(all_panel1_values)
-        list_of_all_panel2_values = list(all_panel2_values)
-        # Format Panel_1 and Panel_2 values for display
-        formatted_panel1_values = format_values_for_display(list_of_all_panel1_values)
-        formatted_panel2_values = format_values_for_display(list_of_all_panel2_values)
-    # Unique Panel_1 and Panel_2 values
-    st.markdown("#### Unique Panel values")
-    # Display Panel_1 and Panel_2 values
-    with st.expander("Unique Panel values"):
-        st.write("")
-        st.markdown(
-            f"""
-        <style>
-        .justify-text {{
-        text-align: justify;
-        }}
-        </style>
-        <div class="justify-text">
-        <strong>Panel Level 1 Values:</strong> {formatted_panel1_values}<br>
-        <strong>Panel Level 2 Values:</strong> {formatted_panel2_values}
-        </div>
-        """,
-            unsafe_allow_html=True,
-        )
-        # Display total Panel_1 and Panel_2
-        st.write("")
-        st.markdown(
-            f"""
-        <div style="text-align: justify;">
-            <strong>Number of Level 1 Panels detected:</strong> {len(list_of_all_panel1_values)}<br>
-            <strong>Number of Level 2 Panels detected:</strong> {len(list_of_all_panel2_values)}
-        </div>
-        """,
-            unsafe_allow_html=True,
-        )
-        st.write("")
-    #########################################################################################################################################################
-    # Merge all DataFrames
-    #########################################################################################################################################################
-    # Merge all DataFrames selected
-    main_df = create_main_dataframe(
-        files_dict, all_panel1_values, all_panel2_values, granularity_selection
-    )
-    merged_df = merge_into_main_df(main_df, files_dict, selections)
-    #########################################################################################################################################################
-    # Categorize Variables and Impute Missing Values
-    #########################################################################################################################################################
     # Create an editable DataFrame in Streamlit
     st.markdown("#### Select Variables Category & Impute Missing Values")
-    # Prepare missing stats DataFrame for editing
-    missing_stats_df = prepare_missing_stats_df(merged_df)
     edited_stats_df = st.data_editor(
         missing_stats_df,
         column_config={
@@ -865,10 +296,12 @@ if auth_status == True:
             ),
             "Category": st.column_config.SelectboxColumn(
                 options=[
                     "Media",
                     "Exogenous",
                     "Internal",
-                    "Response Metrics",
                 ],
                 required=True,
                 default="Media",
@@ -879,84 +312,31 @@ if auth_status == True:
         use_container_width=True,
     )
     # Apply changes based on edited DataFrame
     for i, row in edited_stats_df.iterrows():
         column = row["Column"]
         if row["Impute Method"] == "Drop Column":
-            merged_df.drop(columns=[column], inplace=True)
         elif row["Impute Method"] == "Fill with Mean":
-            merged_df[column].fillna(merged_df[column].mean(), inplace=True)
         elif row["Impute Method"] == "Fill with Median":
-            merged_df[column].fillna(merged_df[column].median(), inplace=True)
         elif row["Impute Method"] == "Fill with 0":
-            merged_df[column].fillna(0, inplace=True)
-    #########################################################################################################################################################
-    # Group columns
-    #########################################################################################################################################################
-    # Display Group columns header
-    st.markdown("#### Feature engineering")
-    # Prepare the numeric columns and an empty DataFrame for user input
-    numeric_columns, default_df = prepare_numeric_columns_and_default_df(
-        merged_df, edited_stats_df
-    )
-    # Display editable Dataframe
-    edited_df = st.data_editor(
-        default_df,
-        column_config={
-            "Column 1": st.column_config.SelectboxColumn(
-                options=numeric_columns,
-                required=True,
-                default=numeric_columns[0],
-                width=400,
-            ),
-            "Operator": st.column_config.SelectboxColumn(
-                options=["+", "-", "*", "/"],
-                required=True,
-                default="+",
-                width=100,
-            ),
-            "Column 2": st.column_config.SelectboxColumn(
-                options=numeric_columns,
-                required=True,
-                default=numeric_columns[0],
-                width=400,
-            ),
-            "Category": st.column_config.SelectboxColumn(
-                options=[
-                    "Media",
-                    "Exogenous",
-                    "Internal",
-                    "Response Metrics",
-                ],
-                required=True,
-                default="Media",
-                width=200,
-            ),
-        },
-        num_rows="dynamic",
-    )
-    # Process the DataFrame based on user inputs and operations specified in edited_df
-    final_df, edited_stats_df = process_dataframes(merged_df, edited_df, edited_stats_df)
-    #########################################################################################################################################################
-    # Display the Final DataFrame and variables
-    #########################################################################################################################################################
-    # Display the Final DataFrame and variables
-    st.markdown("#### Final DataFrame")
-    st.dataframe(final_df, hide_index=True)
     # Initialize an empty dictionary to hold categories and their variables
     category_dict = {}
@@ -974,15 +354,8 @@ if auth_status == True:
             # If it exists, append the current column to the list of variables under this category
             category_dict[category].append(column)
-    # Add Date, Panel_1 and Panel_12 in category dictionary
-    category_dict.update({"Date": ["date"]})
-    if "Panel_1" in final_df.columns:
-        category_dict["Panel Level 1"] = ["Panel_1"]
-    if "Panel_2" in final_df.columns:
-        category_dict["Panel Level 2"] = ["Panel_2"]
     # Display the dictionary
-    st.markdown("#### Variable Category")
     for category, variables in category_dict.items():
         # Check if there are multiple variables to handle "and" insertion correctly
         if len(variables) > 1:
@@ -993,27 +366,19 @@ if auth_status == True:
             variables_str = variables[0]
         # Display the category and its variables in the desired format
-        st.markdown(
-            f"<div style='text-align: justify;'><strong>{category}:</strong> {variables_str}</div>",
-            unsafe_allow_html=True,
-        )
-    # Function to check if Response Metrics is selected
-    st.write("")
-    response_metrics_col = category_dict.get("Response Metrics", [])
-    if len(response_metrics_col) == 0:
-        st.warning("Please select Response Metrics column", icon="⚠️")
-        st.stop()
-    # elif len(response_metrics_col) > 1:
-    #     st.warning("Please select only one Response Metrics column", icon="⚠️")
-    #     st.stop()
-    # Store final dataframe and bin dictionary into session state
-    st.session_state["final_df"], st.session_state["bin_dict"] = final_df, category_dict
-    # Save the DataFrame and dictionary from the session state to the pickle file
-    if st.button("Accept and Save", use_container_width=True):
-        save_to_pickle(
-            "data_import.pkl", st.session_state["final_df"], st.session_state["bin_dict"]
-        )
-        st.toast("💾 Saved Successfully!")

 # Importing necessary libraries
 import streamlit as st
+import pickle
 st.set_page_config(
+    page_title="Model Build",
     page_icon=":shark:",
     layout="wide",
     initial_sidebar_state="collapsed",
 )
+from utilities import load_authenticator
+import numpy as np
 import pandas as pd
 from utilities import set_header, load_local_css
 load_local_css("styles.css")
 set_header()
 for k, v in st.session_state.items():
+    if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
         st.session_state[k] = v
+authenticator = st.session_state.get('authenticator')
+if authenticator is None:
+    authenticator = load_authenticator()
+name, authentication_status, username = authenticator.login('Login', 'main')
+auth_status = st.session_state.get('authentication_status')
+if auth_status == True:
+    is_state_initiaized = st.session_state.get('initialized',False)
     if not is_state_initiaized:
+        a=1
+    # Function to expand dataframe to daily
+    @st.cache_resource(show_spinner=False)
+    def expand_to_daily(df, granularity, start_date, end_date):
+        # Create a new DataFrame with a row for each day
+        all_dates = pd.date_range(start=start_date, end=end_date, freq="D")
+        daily_df = pd.DataFrame(all_dates, columns=["Date"])
+        if granularity == "daily":
+            # For daily data, simply merge to fill missing dates
+            daily_df = daily_df.merge(df, on="Date", how="left")
+        else:
+            # For weekly or monthly, distribute values to daily rows
+            for column in df.columns:
+                if column != "Date":  # Skip 'Date' column
+                    daily_df[column] = np.nan  # Initialize with NaNs
+            # Group by the required frequency and distribute values
+            freq = "W-MON" if granularity == "weekly" else "MS"
+            for _, group in df.groupby(pd.Grouper(key="Date", freq=freq)):
+                num_days = len(
+                    pd.date_range(group["Date"].min(), group["Date"].max(), freq="D")
+                )
+                for column in group.columns:
+                    if column == "Date":  # Skip 'Date' column
+                        continue
+                    value = group[column].sum() / num_days
+                    date_range = pd.date_range(
+                        group["Date"].min(), periods=num_days, freq="D"
+                    )
+                    daily_df.loc[daily_df["Date"].isin(date_range), column] = value
+        return daily_df
+    # Function to validate date column in dataframe
     def validate_date_column(df):
         try:
             # Attempt to convert the 'Date' column to datetime
+            df["Date"] = pd.to_datetime(df["Date"], format="%d-%m-%Y")
             return True
         except:
             return False
             return "irregular"
+    # Function to convert and fill dates in dataframe
+    def convert_and_fill_dates(df, start_date, end_date, interval):
+        # Create a date range for the desired period
+        all_dates = pd.date_range(start=start_date, end=end_date, freq="D")
+        new_df = pd.DataFrame(all_dates, columns=["Date"])
+        # Preprocess and aggregate data based on the original interval
+        if interval != "daily":
+            # Resample to start of each week/month, then sum values for the same period
+            if interval == "weekly":
+                df = df.resample("W-MON", on="Date").sum().reset_index()
+            elif interval == "monthly":
+                df = df.resample("MS", on="Date").sum().reset_index()
+            # Distribute values equally across the days in each week/month
+            expanded_rows = []
             for _, row in df.iterrows():
+                if interval == "weekly":
+                    period_dates = pd.date_range(row["Date"], periods=7)
+                elif interval == "monthly":
+                    period_end = row["Date"] + pd.offsets.MonthEnd(1)
+                    period_dates = pd.date_range(row["Date"], period_end)
+                for date in period_dates:
+                    new_row = row.copy()
+                    new_row["Date"] = date
                     for col in df.columns:
+                        if col != "Date":  # Skip 'Date' column
+                            new_row[col] = row[col] / len(period_dates)
+                    expanded_rows.append(new_row)
+            # Create a DataFrame from expanded rows
+            expanded_df = pd.DataFrame(expanded_rows)
+            new_df = pd.merge(new_df, expanded_df, how="left", on="Date")
+        else:
+            # Daily data, aggregate if there are multiple entries for the same day
+            df = df.groupby("Date").sum().reset_index()
+            new_df = pd.merge(new_df, df, how="left", on="Date")
+        # Ensure all dates from start to end are present, filling missing values with NaN
+        new_df["Date"] = pd.to_datetime(new_df["Date"])  # Ensure 'Date' is datetime type
+        new_df = new_df.set_index("Date").reindex(all_dates).reset_index()
+        new_df.rename(columns={"index": "Date"}, inplace=True)
+        return new_df
+    # Function to convert a DataFrame from daily level granularity to either weekly or monthly level
+    def convert_to_higher_granularity(df, required_granularity):
+        if required_granularity == "daily":
+            return df
+        # Ensure 'Date' is the index and is in datetime format
+        if not pd.api.types.is_datetime64_any_dtype(df["Date"]):
+            df["Date"] = pd.to_datetime(df["Date"])
+        df.set_index("Date", inplace=True)
+        # Resample and aggregate
+        if required_granularity == "weekly":
+            # Resample to weekly, using 'W-MON' to indicate weeks starting on Monday
+            df = df.resample("W-MON").sum()
+        elif required_granularity == "monthly":
+            # Resample to monthly, using 'MS' to indicate month start
+            df = df.resample("MS").sum()
+        # Reset index to move 'Date' back to a column
+        df.reset_index(inplace=True)
+        return df
+    # # Read the CSV file, parsing 'Date' column as datetime
+    main_df = pd.read_csv("Media_data_for_model_dma_level.csv", dayfirst=True, parse_dates=["Date"])
+    # st.write(main_df)
+    # Get the start date (minimum) and end date (maximum) from the 'Date' column
+    api_start_date = main_df["Date"].min()
+    api_end_date = main_df["Date"].max()
+    # Infer the granularity from the most common difference between consecutive dates
+    date_diffs = main_df["Date"].diff().dt.days.dropna()
+    common_diff = date_diffs.mode()[0]
+    api_granularity = determine_data_interval(common_diff)
+    # Convert the DataFrame to daily level granularity
+    main_df = expand_to_daily(main_df, api_granularity, api_start_date, api_end_date)
+    # Page Title
+    st.title("Data Import")
+    # File uploader
+    uploaded_files = st.file_uploader(
+        "Upload additional data", type=["xlsx"], accept_multiple_files=True
+    )
+    # Custom HTML for upload instructions
+    recommendation_html = f"""
+    <div style="text-align: justify;">
+    <strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values, and aggregated to a {api_granularity} level.
+    </div>
+    """
+    st.markdown(recommendation_html, unsafe_allow_html=True)
+    # Initialize a list to collect all processed DataFrames
+    all_data_dfs = []
+    if uploaded_files:
+        for uploaded_file in uploaded_files:
+            # Extract the file name
+            file_name = uploaded_file.name
+            # Load the file into a DataFrame
+            data_df = pd.read_excel(
+                uploaded_file,
+            )
+            # Identify numeric columns in the DataFrame
+            numeric_columns = data_df.select_dtypes(include="number").columns.tolist()
+            # Validate the 'Date' column and ensure there's at least one numeric column
+            if validate_date_column(data_df) and len(numeric_columns) > 0:
+                data_df = data_df[["Date"] + numeric_columns]
+                # Ensure the 'Date' column is in datetime format and sorted
+                data_df["Date"] = pd.to_datetime(data_df["Date"], dayfirst=True)
+                data_df.sort_values("Date", inplace=True)
+                # Calculate the most common day difference between dates to determine frequency
+                common_freq = data_df["Date"].diff().dt.days.dropna().mode()[0]
+                # Calculate the data interval (daily, weekly, monthly or irregular)
+                interval = determine_data_interval(common_freq)
+                if interval == "irregular":
+                    # Warn the user if the 'Date' column doesn't meet the format requirements
+                    st.warning(
+                        f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval."
+                    )
+                    continue
+                # Convert data to specified interval and redistribute to daily
+                data_df = convert_and_fill_dates(
+                    data_df, api_start_date, api_end_date, interval
+                )
+                # Add the processed DataFrame to the list
+                all_data_dfs.append(data_df)
             else:
+                # Warn the user if the 'Date' column doesn't meet the format requirements
+                st.warning(
+                    f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column."
                 )
+    # Sequentially merge each of the other DataFrames with the main DataFrame on 'Date'
+    for df in all_data_dfs:
+        main_df = pd.merge(main_df, df, on="Date", how="left")
+    # Function to calculate missing stats and prepare for editable DataFrame
     def prepare_missing_stats_df(df):
         missing_stats = []
         for column in df.columns:
             if (
+                column == "Date" or column == "Total Approved Accounts - Revenue"
+            ):  # Skip Date and Revenue column
                 continue
             missing = df[column].isnull().sum()
             pct_missing = round((missing / len(df)) * 100, 2)
             missing_stats.append(
                 {
                     "Column": column,
                     "Missing Values": missing,
                     "Missing Percentage": pct_missing,
                     "Impute Method": "Fill with 0",  # Default value
+                    "Category": "Media",  # Default value
                 }
             )
         stats_df = pd.DataFrame(missing_stats)
         return stats_df
+    # Prepare missing stats DataFrame for editing
+    missing_stats_df = prepare_missing_stats_df(main_df)
     # Create an editable DataFrame in Streamlit
     st.markdown("#### Select Variables Category & Impute Missing Values")
     edited_stats_df = st.data_editor(
         missing_stats_df,
         column_config={
             ),
             "Category": st.column_config.SelectboxColumn(
                 options=[
+                    "Date",
                     "Media",
                     "Exogenous",
                     "Internal",
+                    "DMA/Panel",
+                    "Response_Metric"
                 ],
                 required=True,
                 default="Media",
         use_container_width=True,
     )
     # Apply changes based on edited DataFrame
     for i, row in edited_stats_df.iterrows():
         column = row["Column"]
         if row["Impute Method"] == "Drop Column":
+            main_df.drop(columns=[column], inplace=True)
         elif row["Impute Method"] == "Fill with Mean":
+            main_df[column].fillna(main_df[column].mean(), inplace=True)
         elif row["Impute Method"] == "Fill with Median":
+            main_df[column].fillna(main_df[column].median(), inplace=True)
         elif row["Impute Method"] == "Fill with 0":
+            main_df[column].fillna(0, inplace=True)
+    # Convert the Final DataFrame to required granularity
+    main_df = convert_to_higher_granularity(main_df, api_granularity)
+    # Display the Final DataFrame and exogenous variables
+    st.markdown("#### Final DataFrame:")
+    st.dataframe(main_df)
     # Initialize an empty dictionary to hold categories and their variables
     category_dict = {}
             # If it exists, append the current column to the list of variables under this category
             category_dict[category].append(column)
     # Display the dictionary
+    st.markdown("#### Variable Category:")
     for category, variables in category_dict.items():
         # Check if there are multiple variables to handle "and" insertion correctly
         if len(variables) > 1:
             variables_str = variables[0]
         # Display the category and its variables in the desired format
+        st.markdown(f"**{category}:** {variables_str}\n\n", unsafe_allow_html=True)
+    # storing maindf and categories in session_state
+    # st.write(main_df)
+    # st.session_state['Cleaned_data']=main_df
+    # st.session_state['category_dict']=category_dict
+    if st.button('Save Changes'):
+        with open("Pickle_files/main_df", 'wb') as f:
+            pickle.dump(main_df, f)
+        with open("Pickle_files/category_dict",'wb') as c:
+            pickle.dump(category_dict,c)
+        st.success('Changes Saved!')

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: indigo
 colorTo: pink
 sdk: streamlit
 sdk_version: 1.32.1
-app_file: Data_Import.py
 pinned: false
 ---

 colorTo: pink
 sdk: streamlit
 sdk_version: 1.32.1
+app_file: app.py
 pinned: false
 ---

classes.py CHANGED Viewed

@@ -16,15 +16,21 @@ def class_to_dict(class_instance):
         attr_dict["modified_spends"] = class_instance.modified_spends
         attr_dict["modified_sales"] = class_instance.modified_sales
         attr_dict["response_curve_type"] = class_instance.response_curve_type
-        attr_dict["response_curve_params"] = class_instance.response_curve_params
         attr_dict["penalty"] = class_instance.penalty
         attr_dict["bounds"] = class_instance.bounds
         attr_dict["actual_total_spends"] = class_instance.actual_total_spends
         attr_dict["actual_total_sales"] = class_instance.actual_total_sales
-        attr_dict["modified_total_spends"] = class_instance.modified_total_spends
         attr_dict["modified_total_sales"] = class_instance.modified_total_sales
         attr_dict["actual_mroi"] = class_instance.get_marginal_roi("actual")
-        attr_dict["modified_mroi"] = class_instance.get_marginal_roi("modified")
     elif isinstance(class_instance, Scenario):
         attr_dict["type"] = "Scenario"
@@ -37,7 +43,9 @@ def class_to_dict(class_instance):
         attr_dict["correction"] = class_instance.correction
         attr_dict["actual_total_spends"] = class_instance.actual_total_spends
         attr_dict["actual_total_sales"] = class_instance.actual_total_sales
-        attr_dict["modified_total_spends"] = class_instance.modified_total_spends
         attr_dict["modified_total_sales"] = class_instance.modified_total_sales
     return attr_dict
@@ -87,7 +95,9 @@ class Channel:
         self.modified_sales = self.calculate_sales()
         self.modified_total_spends = self.modified_spends.sum()
         self.modified_total_sales = self.modified_sales.sum()
-        self.delta_spends = self.modified_total_spends - self.actual_total_spends
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def update_penalty(self, penalty):
@@ -109,7 +119,8 @@ class Channel:
             x = np.where(
                 x < self.upper_limit,
                 x,
-                self.upper_limit + (x - self.upper_limit) * self.upper_limit / x,
             )
         if self.response_curve_type == "s-curve":
             if self.power >= 0:
@@ -158,7 +169,9 @@ class Channel:
         self.modified_sales = self.calculate_sales()
         self.modified_total_spends = self.modified_spends.sum()
         self.modified_total_sales = self.modified_sales.sum()
-        self.delta_spends = self.modified_total_spends - self.actual_total_spends
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def intialize(self):
@@ -195,7 +208,9 @@ class Scenario:
         self.actual_total_sales = self.calculate_actual_total_sales()
         self.modified_total_sales = self.calculate_modified_total_sales()
         self.modified_total_spends = self.calculate_modified_total_spends()
-        self.delta_spends = self.modified_total_spends - self.actual_total_spends
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def update_penalty(self, value):
@@ -205,7 +220,9 @@ class Scenario:
     def calculate_modified_total_spends(self):
         total_actual_spends = 0.0
         for channel in self.channels.values():
-            total_actual_spends += channel.actual_total_spends * channel.conversion_rate
         return total_actual_spends
     def calculate_modified_total_spends(self):
@@ -234,47 +251,12 @@ class Scenario:
         self.channels[channel_name].update(modified_spends)
         self.modified_total_sales = self.calculate_modified_total_sales()
         self.modified_total_spends = self.calculate_modified_total_spends()
-        self.delta_spends = self.modified_total_spends - self.actual_total_spends
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
-    # def optimize_spends(self, sales_percent, channels_list, algo="COBYLA"):
-    #     desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
-    #     def constraint(x):
-    #         for ch, spends in zip(channels_list, x):
-    #             self.update(ch, spends)
-    #         return self.modified_total_sales - desired_sales
-    #     bounds = []
-    #     for ch in channels_list:
-    #         bounds.append(
-    #             (1 + np.array([-50.0, 100.0]) / 100.0)
-    #             * self.channels[ch].actual_total_spends
-    #         )
-    #     initial_point = []
-    #     for bound in bounds:
-    #         initial_point.append(bound[0])
-    #     power = np.ceil(np.log(sum(initial_point)) / np.log(10))
-    #     constraints = [NonlinearConstraint(constraint, -1.0, 1.0)]
-    #     res = minimize(
-    #         lambda x: sum(x) / 10 ** (power),
-    #         bounds=bounds,
-    #         x0=initial_point,
-    #         constraints=constraints,
-    #         method=algo,
-    #         options={"maxiter": int(2e7), "catol": 1},
-    #     )
-    #     for channel_name, modified_spends in zip(channels_list, res.x):
-    #         self.update(channel_name, modified_spends)
-    #     return zip(channels_list, res.x)
-    def optimize_spends(self, sales_percent, channels_list, algo="trust-constr"):
         desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
         def constraint(x):
@@ -303,7 +285,7 @@ class Scenario:
             x0=initial_point,
             constraints=constraints,
             method=algo,
-            options={"maxiter": int(2e7), "xtol": 100},
         )
         for channel_name, modified_spends in zip(channels_list, res.x):
@@ -335,11 +317,14 @@ class Scenario:
         for channel_name in channels_list:
             _channel_class = self.channels[channel_name]
             channel_bounds = _channel_class.bounds
-            channel_actual_total_spends = _channel_class.actual_total_spends * (
-                (1 + spends_percent / 100)
             )
             old_spends.append(channel_actual_total_spends)
-            bounds.append((1 + channel_bounds / 100) * channel_actual_total_spends)
         def objective_function(x):
             for channel_name, modified_spends in zip(channels_list, x):
@@ -347,12 +332,12 @@ class Scenario:
             return -1 * self.modified_total_sales
         res = minimize(
-            lambda x: objective_function(x) / 1e8,
             method="trust-constr",
             x0=old_spends,
             constraints=constraint,
             bounds=bounds,
-            options={"maxiter": int(1e7), "xtol": 100},
         )
         # res = dual_annealing(
         # objective_function,
@@ -376,91 +361,81 @@ class Scenario:
         channel_data = []
         summary_rows = []
-        actual_list.append(
-            {
-                "name": "Total",
-                "Spends": self.actual_total_spends,
-                "Sales": self.actual_total_sales,
-            }
-        )
-        modified_list.append(
-            {
-                "name": "Total",
-                "Spends": self.modified_total_spends,
-                "Sales": self.modified_total_sales,
-            }
-        )
         for channel in self.channels.values():
             name_mod = channel.name.replace("_", " ")
             if name_mod.lower().endswith(" imp"):
                 name_mod = name_mod.replace("Imp", " Impressions")
-            summary_rows.append(
-                [
-                    name_mod,
-                    channel.actual_total_spends,
-                    channel.modified_total_spends,
-                    channel.actual_total_sales,
-                    channel.modified_total_sales,
-                    round(channel.actual_total_sales / channel.actual_total_spends, 2),
-                    round(
-                        channel.modified_total_sales / channel.modified_total_spends,
-                        2,
-                    ),
-                    channel.get_marginal_roi("actual"),
-                    channel.get_marginal_roi("modified"),
-                ]
-            )
             data[channel.name] = channel.modified_spends
             data["Date"] = channel.dates
             data["Sales"] = (
                 data.get("Sales", np.zeros((len(channel.dates),)))
                 + channel.modified_sales
             )
-            actual_list.append(
-                {
-                    "name": channel.name,
-                    "Spends": channel.actual_total_spends,
-                    "Sales": channel.actual_total_sales,
-                    "ROI": round(
-                        channel.actual_total_sales / channel.actual_total_spends, 2
-                    ),
-                }
-            )
-            modified_list.append(
-                {
-                    "name": channel.name,
-                    "Spends": channel.modified_total_spends,
-                    "Sales": channel.modified_total_sales,
-                    "ROI": round(
-                        channel.modified_total_sales / channel.modified_total_spends,
-                        2,
-                    ),
-                    "Marginal ROI": channel.get_marginal_roi("modified"),
-                }
-            )
-            channel_data.append(
-                {
-                    "channel": channel.name,
-                    "spends_act": channel.actual_total_spends,
-                    "spends_mod": channel.modified_total_spends,
-                    "sales_act": channel.actual_total_sales,
-                    "sales_mod": channel.modified_total_sales,
-                }
-            )
-        summary_rows.append(
-            [
-                "Total",
-                self.actual_total_spends,
-                self.modified_total_spends,
-                self.actual_total_sales,
-                self.modified_total_sales,
-                round(self.actual_total_sales / self.actual_total_spends, 2),
-                round(self.modified_total_sales / self.modified_total_spends, 2),
-                0.0,
-                0.0,
-            ]
-        )
         details["Actual"] = actual_list
         details["Modified"] = modified_list
         columns_index = pd.MultiIndex.from_product(
@@ -492,7 +467,8 @@ class Scenario:
     def from_dict(cls, attr_dict):
         channels_list = attr_dict["channels"]
         channels = {
-            channel["name"]: class_from_dict(channel) for channel in channels_list
         }
         return Scenario(
             name=attr_dict["name"],

         attr_dict["modified_spends"] = class_instance.modified_spends
         attr_dict["modified_sales"] = class_instance.modified_sales
         attr_dict["response_curve_type"] = class_instance.response_curve_type
+        attr_dict["response_curve_params"] = (
+            class_instance.response_curve_params
+        )
         attr_dict["penalty"] = class_instance.penalty
         attr_dict["bounds"] = class_instance.bounds
         attr_dict["actual_total_spends"] = class_instance.actual_total_spends
         attr_dict["actual_total_sales"] = class_instance.actual_total_sales
+        attr_dict["modified_total_spends"] = (
+            class_instance.modified_total_spends
+        )
         attr_dict["modified_total_sales"] = class_instance.modified_total_sales
         attr_dict["actual_mroi"] = class_instance.get_marginal_roi("actual")
+        attr_dict["modified_mroi"] = class_instance.get_marginal_roi(
+            "modified"
+        )
     elif isinstance(class_instance, Scenario):
         attr_dict["type"] = "Scenario"
         attr_dict["correction"] = class_instance.correction
         attr_dict["actual_total_spends"] = class_instance.actual_total_spends
         attr_dict["actual_total_sales"] = class_instance.actual_total_sales
+        attr_dict["modified_total_spends"] = (
+            class_instance.modified_total_spends
+        )
         attr_dict["modified_total_sales"] = class_instance.modified_total_sales
     return attr_dict
         self.modified_sales = self.calculate_sales()
         self.modified_total_spends = self.modified_spends.sum()
         self.modified_total_sales = self.modified_sales.sum()
+        self.delta_spends = (
+            self.modified_total_spends - self.actual_total_spends
+        )
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def update_penalty(self, penalty):
             x = np.where(
                 x < self.upper_limit,
                 x,
+                self.upper_limit
+                + (x - self.upper_limit) * self.upper_limit / x,
             )
         if self.response_curve_type == "s-curve":
             if self.power >= 0:
         self.modified_sales = self.calculate_sales()
         self.modified_total_spends = self.modified_spends.sum()
         self.modified_total_sales = self.modified_sales.sum()
+        self.delta_spends = (
+            self.modified_total_spends - self.actual_total_spends
+        )
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def intialize(self):
         self.actual_total_sales = self.calculate_actual_total_sales()
         self.modified_total_sales = self.calculate_modified_total_sales()
         self.modified_total_spends = self.calculate_modified_total_spends()
+        self.delta_spends = (
+            self.modified_total_spends - self.actual_total_spends
+        )
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
     def update_penalty(self, value):
     def calculate_modified_total_spends(self):
         total_actual_spends = 0.0
         for channel in self.channels.values():
+            total_actual_spends += (
+                channel.actual_total_spends * channel.conversion_rate
+            )
         return total_actual_spends
     def calculate_modified_total_spends(self):
         self.channels[channel_name].update(modified_spends)
         self.modified_total_sales = self.calculate_modified_total_sales()
         self.modified_total_spends = self.calculate_modified_total_spends()
+        self.delta_spends = (
+            self.modified_total_spends - self.actual_total_spends
+        )
         self.delta_sales = self.modified_total_sales - self.actual_total_sales
+    def optimize_spends(self, sales_percent, channels_list, algo="COBYLA"):
         desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
         def constraint(x):
             x0=initial_point,
             constraints=constraints,
             method=algo,
+            options={"maxiter": int(2e7), "catol": 1},
         )
         for channel_name, modified_spends in zip(channels_list, res.x):
         for channel_name in channels_list:
             _channel_class = self.channels[channel_name]
             channel_bounds = _channel_class.bounds
+            channel_actual_total_spends = (
+                _channel_class.actual_total_spends
+                * ((1 + spends_percent / 100))
             )
             old_spends.append(channel_actual_total_spends)
+            bounds.append(
+                (1 + channel_bounds / 100) * channel_actual_total_spends
+            )
         def objective_function(x):
             for channel_name, modified_spends in zip(channels_list, x):
             return -1 * self.modified_total_sales
         res = minimize(
+            lambda x : objective_function(x) / 1e8,
             method="trust-constr",
             x0=old_spends,
             constraints=constraint,
             bounds=bounds,
+            options={"maxiter": int(1e7), 'xtol' : 100},
         )
         # res = dual_annealing(
         # objective_function,
         channel_data = []
         summary_rows = []
+        actual_list.append({
+            "name": "Total",
+            "Spends": self.actual_total_spends,
+            "Sales": self.actual_total_sales,
+        })
+        modified_list.append({
+            "name": "Total",
+            "Spends": self.modified_total_spends,
+            "Sales": self.modified_total_sales,
+        })
         for channel in self.channels.values():
             name_mod = channel.name.replace("_", " ")
             if name_mod.lower().endswith(" imp"):
                 name_mod = name_mod.replace("Imp", " Impressions")
+            summary_rows.append([
+                name_mod,
+                channel.actual_total_spends,
+                channel.modified_total_spends,
+                channel.actual_total_sales,
+                channel.modified_total_sales,
+                round(
+                    channel.actual_total_sales / channel.actual_total_spends, 2
+                ),
+                round(
+                    channel.modified_total_sales
+                    / channel.modified_total_spends,
+                    2,
+                ),
+                channel.get_marginal_roi("actual"),
+                channel.get_marginal_roi("modified"),
+            ])
             data[channel.name] = channel.modified_spends
             data["Date"] = channel.dates
             data["Sales"] = (
                 data.get("Sales", np.zeros((len(channel.dates),)))
                 + channel.modified_sales
             )
+            actual_list.append({
+                "name": channel.name,
+                "Spends": channel.actual_total_spends,
+                "Sales": channel.actual_total_sales,
+                "ROI": round(
+                    channel.actual_total_sales / channel.actual_total_spends, 2
+                ),
+            })
+            modified_list.append({
+                "name": channel.name,
+                "Spends": channel.modified_total_spends,
+                "Sales": channel.modified_total_sales,
+                "ROI": round(
+                    channel.modified_total_sales
+                    / channel.modified_total_spends,
+                    2,
+                ),
+                "Marginal ROI": channel.get_marginal_roi("modified"),
+            })
+            channel_data.append({
+                "channel": channel.name,
+                "spends_act": channel.actual_total_spends,
+                "spends_mod": channel.modified_total_spends,
+                "sales_act": channel.actual_total_sales,
+                "sales_mod": channel.modified_total_sales,
+            })
+        summary_rows.append([
+            "Total",
+            self.actual_total_spends,
+            self.modified_total_spends,
+            self.actual_total_sales,
+            self.modified_total_sales,
+            round(self.actual_total_sales / self.actual_total_spends, 2),
+            round(self.modified_total_sales / self.modified_total_spends, 2),
+            0.0,
+            0.0,
+        ])
         details["Actual"] = actual_list
         details["Modified"] = modified_list
         columns_index = pd.MultiIndex.from_product(
     def from_dict(cls, attr_dict):
         channels_list = attr_dict["channels"]
         channels = {
+            channel["name"]: class_from_dict(channel)
+            for channel in channels_list
         }
         return Scenario(
             name=attr_dict["name"],

upf_data_converted.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

upf_data_converted.xlsx CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acafd6c7cb1d8d860d6f055632dced93b1c726f432b230504b869b3e19a5edbc
-size 1853475

 version https://git-lfs.github.com/spec/v1
+oid sha256:92716069afa2c16a8afb6494da6d5f93878558de0215b1b9334ffeb997fdc6b6
+size 1561111

utilities.py CHANGED Viewed

@@ -12,6 +12,7 @@ import io
 import plotly
 from pathlib import Path
 import pickle
 import yaml
 from yaml import SafeLoader
 from streamlit.components.v1 import html
@@ -23,59 +24,27 @@ import os
 import base64
-color_palette = [
-    "#F3F3F0",
-    "#5E7D7E",
-    "#2FA1FF",
-    "#00EDED",
-    "#00EAE4",
-    "#304550",
-    "#EDEBEB",
-    "#7FBEFD",
-    "#003059",
-    "#A2F3F3",
-    "#E1D6E2",
-    "#B6B6B6",
-]
-CURRENCY_INDICATOR = "$"
-import streamlit_authenticator as stauth
 def load_authenticator():
-    with open("config.yaml") as file:
         config = yaml.load(file, Loader=SafeLoader)
-        st.session_state["config"] = config
     authenticator = stauth.Authenticate(
-        credentials=config["credentials"],
-        cookie_name=config["cookie"]["name"],
-        key=config["cookie"]["key"],
-        cookie_expiry_days=config["cookie"]["expiry_days"],
-        preauthorized=config["preauthorized"],
     )
-    st.session_state["authenticator"] = authenticator
     return authenticator
-# Authentication
-def authentication():
-    with open("config.yaml") as file:
-        config = yaml.load(file, Loader=SafeLoader)
-        authenticator = stauth.Authenticate(
-            config["credentials"],
-            config["cookie"]["name"],
-            config["cookie"]["key"],
-            config["cookie"]["expiry_days"],
-            config["preauthorized"],
-        )
-    name, authentication_status, username = authenticator.login("Login", "main")
-    return authenticator, name, authentication_status, username
 def nav_page(page_name, timeout_secs=3):
     nav_script = """
         <script type="text/javascript">
@@ -98,10 +67,7 @@ def nav_page(page_name, timeout_secs=3):
                 attempt_nav_page("%s", new Date(), %d);
             });
         </script>
-    """ % (
-        page_name,
-        timeout_secs,
-    )
     html(nav_script)
@@ -126,18 +92,23 @@ data_url = base64.b64encode(contents).decode("utf-8")
 file_.close()
-DATA_PATH = "./data"
-IMAGES_PATH = "./data/images_224_224"
 def load_local_css(file_name):
     with open(file_name) as f:
-        st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
 # def set_header():
@@ -158,24 +129,24 @@ data_url1 = base64.b64encode(contents1).decode("utf-8")
 file_1.close()
-DATA_PATH1 = "./data"
-IMAGES_PATH1 = "./data/images_224_224"
 def set_header():
-    return st.markdown(
-        f"""<div class='main-header'>
                     <!-- <h1></h1> -->
                        <div >
                        <img class='blend-logo' src="data:image;base64,{data_url1}", alt="Logo">
                        </div>
                     <img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo">
-            </div>""",
-        unsafe_allow_html=True,
-    )
 # def set_header():
 #     logo_path = "./path/to/your/local/LIME_logo.png"  # Replace with the actual file path
@@ -186,87 +157,51 @@ def set_header():
 #             </div>""", unsafe_allow_html=True)
-def s_curve(x, K, b, a, x0):
-    return K / (1 + b * np.exp(-a * (x - x0)))
-def panel_level(input_df, date_column="Date"):
-    # Ensure 'Date' is set as the index
-    if date_column not in input_df.index.names:
-        input_df = input_df.set_index(date_column)
-    # Select numeric columns only (excluding 'Date' since it's now the index)
-    numeric_columns_df = input_df.select_dtypes(include="number")
-    # Group by 'Date' (which is the index) and sum the numeric columns
-    aggregated_df = numeric_columns_df.groupby(input_df.index).sum()
-    # Reset index if you want 'Date' back as a column
-    aggregated_df = aggregated_df.reset_index()
-    return aggregated_df
-def initialize_data(
-    panel=None, target_file="Overview_data_test.xlsx", updated_rcs=None, metrics=None
-):
     # uopx_conv_rates = {'streaming_impressions' : 0.007,'digital_impressions' : 0.007,'search_clicks' : 0.00719,'tv_impressions' : 0.000173,
     #                    "digital_clicks":0.005,"streaming_clicks":0.004,'streaming_spends':1,"tv_spends":1,"search_spends":1,
     #                    "digital_spends":1}
-    # print('State initialized')
-    excel = pd.read_excel(target_file, sheet_name=None)
-    # Extract dataframes for raw data, spend input, and contribution MMM
-    raw_df = excel["RAW DATA MMM"]
-    spend_df = excel["SPEND INPUT"]
-    contri_df = excel["CONTRIBUTION MMM"]
-    # Check if the panel is not None
-    if panel is not None and panel != "Aggregated":
-        raw_df = raw_df[raw_df["Panel"] == panel].drop(columns=["Panel"])
-        spend_df = spend_df[spend_df["Panel"] == panel].drop(columns=["Panel"])
-        contri_df = contri_df[contri_df["Panel"] == panel].drop(columns=["Panel"])
-    elif panel == "Aggregated":
-        raw_df = panel_level(raw_df, date_column="Date")
-        spend_df = panel_level(spend_df, date_column="Week")
-        contri_df = panel_level(contri_df, date_column="Date")
-    # Revenue_df = excel['Revenue']
-    ## remove sesonalities, indices etc ...
-    exclude_columns = [
-        "Date",
-        "Region",
-        "Controls_Grammarly_Index_SeasonalAVG",
-        "Controls_Quillbot_Index",
-        "Daily_Positive_Outliers",
-        "External_RemoteClass_Index",
-        "Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802",
-        "Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206",
-        "Intervals ON 20201005-20201019",
-        "Promotion_PercentOff",
-        "Promotion_TimeBased",
-        "Seasonality_Indicator_Chirstmas",
-        "Seasonality_Indicator_NewYears_Days",
-        "Seasonality_Indicator_Thanksgiving",
-        "Trend 20200302 / 20200803",
-    ]
-    raw_df["Date"] = pd.to_datetime(raw_df["Date"])
-    contri_df["Date"] = pd.to_datetime(contri_df["Date"])
-    input_df = raw_df.sort_values(by="Date")
-    output_df = contri_df.sort_values(by="Date")
-    spend_df["Week"] = pd.to_datetime(
-        spend_df["Week"], format="%Y-%m-%d", errors="coerce"
-    )
-    spend_df.sort_values(by="Week", inplace=True)
     # spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce')
     # spend_df = spend_df.sort_values(by='Week')
     channel_list = [col for col in input_df.columns if col not in exclude_columns]
-    channel_list = list(set(channel_list) - set(["fb_level_achieved_tier_1", "ga_app"]))
     response_curves = {}
     mapes = {}
     rmses = {}
@@ -280,14 +215,14 @@ def initialize_data(
     dates = input_df.Date.values
     actual_output_dic = {}
     actual_input_dic = {}
     for inp_col in channel_list:
-        # st.write(inp_col)
         spends = input_df[inp_col].values
         x = spends.copy()
-        # upper limit for penalty
-        upper_limits[inp_col] = 2 * x.max()
         # contribution
         out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0]
         y = output_df[out_col].values.copy()
@@ -295,141 +230,96 @@ def initialize_data(
         actual_input_dic[inp_col] = x.copy()
         ##output cols aggregation
         output_cols.append(out_col)
         ## scale the input
-        power = np.ceil(np.log(x.max()) / np.log(10)) - 3
-        if power >= 0:
             x = x / 10**power
-        x = x.astype("float64")
-        y = y.astype("float64")
-        # print('#printing yyyyyyyyy')
-        # print(inp_col)
-        # print(x.max())
-        # print(y.max())
-        bounds = ((0, 0, 0, 0), (3 * y.max(), 1000, 1, x.max()))
-        # bounds = ((y.max(), 3*y.max()),(0,1000),(0,1),(0,x.max()))
-        params, _ = curve_fit(
-            s_curve,
-            x,
-            y,
-            p0=(2 * y.max(), 0.01, 1e-5, x.max()),
-            bounds=bounds,
-            maxfev=int(1e5),
-        )
         mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean()
-        rmse = np.sqrt(((y - s_curve(x, *params)) ** 2).mean())
-        r2_ = r2_score(y, s_curve(x, *params))
-        response_curves[inp_col] = {
-            "K": params[0],
-            "b": params[1],
-            "a": params[2],
-            "x0": params[3],
-        }
-        updated_rcs_key = f"{metrics}#@{panel}#@{inp_col}"
-        if updated_rcs is not None and updated_rcs_key in list(updated_rcs.keys()):
-            response_curves[inp_col] = updated_rcs[updated_rcs_key]
         mapes[inp_col] = mape
         rmses[inp_col] = rmse
         r2[inp_col] = r2_
         powers[inp_col] = power
         ## conversion rates
-        spend_col = [
-            _col
-            for _col in spend_df.columns
-            if _col.startswith(inp_col.rsplit("_", 1)[0])
-        ][0]
-        # print('#printing spendssss')
-        # print(spend_col)
-        conv = (
-            spend_df.set_index("Week")[spend_col]
-            / input_df.set_index("Date")[inp_col].clip(lower=1)
-        ).reset_index()
-        conv.rename(columns={"index": "Week"}, inplace=True)
-        conv["year"] = conv.Week.dt.year
-        conv_rates[inp_col] = list(conv.drop("Week", axis=1).mean().to_dict().values())[
-            0
-        ]
         ##print('Before',conv_rates[inp_col])
         # conv_rates[inp_col] = uopx_conv_rates[inp_col]
         ##print('After',(conv_rates[inp_col]))
-        channel = Channel(
-            name=inp_col,
-            dates=dates,
-            spends=spends,
-            # conversion_rate = np.mean(list(conv_rates[inp_col].values())),
-            conversion_rate=conv_rates[inp_col],
-            response_curve_type="s-curve",
-            response_curve_params={
-                "K": params[0],
-                "b": params[1],
-                "a": params[2],
-                "x0": params[3],
-            },
-            bounds=np.array([-10, 10]),
-        )
         channels[inp_col] = channel
         if sales is None:
             sales = channel.actual_sales
         else:
             sales += channel.actual_sales
-    other_contributions = (
-        output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only=True).values
-    )
-    correction = output_df.drop("Date", axis=1).sum(axis=1).values - (
-        sales + other_contributions
-    )
-    scenario = Scenario(
-        name="default",
-        channels=channels,
-        constant=other_contributions,
-        correction=correction,
-    )
     ## setting session variables
-    st.session_state["initialized"] = True
-    st.session_state["actual_df"] = input_df
-    st.session_state["raw_df"] = raw_df
-    st.session_state["contri_df"] = output_df
     default_scenario_dict = class_to_dict(scenario)
-    st.session_state["default_scenario_dict"] = default_scenario_dict
-    st.session_state["scenario"] = scenario
-    st.session_state["channels_list"] = channel_list
-    st.session_state["optimization_channels"] = {
-        channel_name: False for channel_name in channel_list
-    }
-    st.session_state["rcs"] = response_curves
-    st.session_state["powers"] = powers
-    st.session_state["actual_contribution_df"] = pd.DataFrame(actual_output_dic)
-    st.session_state["actual_input_df"] = pd.DataFrame(actual_input_dic)
     for channel in channels.values():
-        st.session_state[channel.name] = numerize(
-            channel.actual_total_spends * channel.conversion_rate, 1
-        )
-    st.session_state["xlsx_buffer"] = io.BytesIO()
-    if Path("../saved_scenarios.pkl").exists():
-        with open("../saved_scenarios.pkl", "rb") as f:
-            st.session_state["saved_scenarios"] = pickle.load(f)
     else:
-        st.session_state["saved_scenarios"] = OrderedDict()
-    # st.session_state["total_spends_change"] = 0
-    st.session_state["optimization_channels"] = {
-        channel_name: False for channel_name in channel_list
-    }
-    st.session_state["disable_download_button"] = True
 # def initialize_data():
 #     # fetch data from excel
 #     output = pd.read_excel('data.xlsx',sheet_name=None)
@@ -445,17 +335,17 @@ def initialize_data(
 #             channel_list.append(col)
 #         else:
 #             pass
 #     ## NOTE : Considered only Desktop spends for all calculations
 #     acutal_df = raw_df[raw_df.Region == 'Desktop'].copy()
 #     ## NOTE : Considered one year of data
 #     acutal_df = acutal_df[acutal_df.Date>'2020-12-31']
 #     actual_df = acutal_df.drop('Region',axis=1).sort_values(by='Date')[[*channel_list,'Date']]
 #     ##load response curves
 #     with open('./grammarly_response_curves.json','r') as f:
 #         response_curves = json.load(f)
 #     ## create channel dict for scenario creation
 #     dates = actual_df.Date.values
 #     channels = {}
@@ -473,15 +363,15 @@ def initialize_data(
 #                             response_curve_type=response_curve_type,
 #                             response_curve_params=response_curve_params,
 #                             bounds=np.array([-30,30]))
 #             channels[name] = channel
 #         else:
 #             constant = info_dict.get('value',0.) * len(dates)
 #     ## create scenario
 #     scenario = Scenario(name='default', channels=channels, constant=constant)
 #     default_scenario_dict = class_to_dict(scenario)
 #     ## setting session variables
 #     st.session_state['initialized'] = True
@@ -495,7 +385,7 @@ def initialize_data(
 #     for channel in channels.values():
 #         if channel.name not in st.session_state:
 #             st.session_state[channel.name] = float(channel.actual_total_spends)
 #     if 'xlsx_buffer' not in st.session_state:
 #         st.session_state['xlsx_buffer'] = io.BytesIO()
@@ -504,121 +394,51 @@ def initialize_data(
 #         if Path('../saved_scenarios.pkl').exists():
 #             with open('../saved_scenarios.pkl','rb') as f:
 #                 st.session_state['saved_scenarios'] = pickle.load(f)
 #         else:
 #             st.session_state['saved_scenarios'] = OrderedDict()
 #     if 'total_spends_change' not in st.session_state:
 #         st.session_state['total_spends_change'] = 0
 #     if 'optimization_channels' not in st.session_state:
 #         st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
 #     if 'disable_download_button' not in st.session_state:
 #         st.session_state['disable_download_button'] = True
 def create_channel_summary(scenario):
     # Provided data
     data = {
-        "Channel": [
-            "Paid Search",
-            "Ga will cid baixo risco",
-            "Digital tactic others",
-            "Fb la tier 1",
-            "Fb la tier 2",
-            "Paid social others",
-            "Programmatic",
-            "Kwai",
-            "Indicacao",
-            "Infleux",
-            "Influencer",
-        ],
-        "Spends": [
-            "$ 11.3K",
-            "$ 155.2K",
-            "$ 50.7K",
-            "$ 125.4K",
-            "$ 125.2K",
-            "$ 105K",
-            "$ 3.3M",
-            "$ 47.5K",
-            "$ 55.9K",
-            "$ 632.3K",
-            "$ 48.3K",
-        ],
-        "Revenue": [
-            "558.0K",
-            "3.5M",
-            "5.2M",
-            "3.1M",
-            "3.1M",
-            "2.1M",
-            "20.8M",
-            "1.6M",
-            "728.4K",
-            "22.9M",
-            "4.8M",
-        ],
     }
     # Create DataFrame
     df = pd.DataFrame(data)
     # Convert currency strings to numeric values
-    df["Spends"] = (
-        df["Spends"]
-        .replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True)
-        .map(pd.eval)
-        .astype(int)
-    )
-    df["Revenue"] = (
-        df["Revenue"]
-        .replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True)
-        .map(pd.eval)
-        .astype(int)
-    )
     # Calculate ROI
-    df["ROI"] = (df["Revenue"] - df["Spends"]) / df["Spends"]
     # Format columns
     format_currency = lambda x: f"${x:,.1f}"
     format_roi = lambda x: f"{x:.1f}"
-    df["Spends"] = [
-        "$ 11.3K",
-        "$ 155.2K",
-        "$ 50.7K",
-        "$ 125.4K",
-        "$ 125.2K",
-        "$ 105K",
-        "$ 3.3M",
-        "$ 47.5K",
-        "$ 55.9K",
-        "$ 632.3K",
-        "$ 48.3K",
-    ]
-    df["Revenue"] = [
-        "$ 536.3K",
-        "$ 3.4M",
-        "$ 5M",
-        "$ 3M",
-        "$ 3M",
-        "$ 2M",
-        "$ 20M",
-        "$ 1.5M",
-        "$ 7.1M",
-        "$ 22M",
-        "$ 4.6M",
-    ]
-    df["ROI"] = df["ROI"].apply(format_roi)
     return df
-# @st.cache(allow_output_mutation=True)
 # def create_contribution_pie(scenario):
 #     #c1f7dc
 #     colors_map = {col:color for col,color in zip(st.session_state['channels_list'],plotly.colors.n_colors(plotly.colors.hex_to_rgb('#BE6468'), plotly.colors.hex_to_rgb('#E7B8B7'),23))}
@@ -650,23 +470,23 @@ def create_channel_summary(scenario):
 #     weekly_spends_data = []
 #     weekly_sales_data = []
 #     for channel_name in st.session_state['channels_list']:
-#         weekly_spends_data.append((go.Bar(x=x,
 #                                           y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
-#                                           name=channel_name_formating(channel_name),
 #                                           hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
 #                                           legendgroup=channel_name)))
-#         weekly_sales_data.append((go.Bar(x=x,
 #                                          y=scenario.channels[channel_name].actual_sales,
-#                                          name=channel_name_formating(channel_name),
 #                                          hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
 #                                          legendgroup=channel_name, showlegend=False)))
 #     for _d in weekly_spends_data:
 #         weekly_contribution_fig.add_trace(_d, row=1, col=1)
 #     for _d in weekly_sales_data:
 #         weekly_contribution_fig.add_trace(_d, row=1, col=2)
-#     weekly_contribution_fig.add_trace(go.Bar(x=x,
 #                                          y=scenario.constant + scenario.correction,
-#                                          name='Non Media',
 #                                          hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}"), row=1, col=2)
 #     weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribuion by week', xaxis_title='Date')
 #     weekly_contribution_fig.update_xaxes(showgrid=False)
@@ -704,50 +524,14 @@ def create_channel_summary(scenario):
 def create_contribution_pie():
-    color_palette = [
-        "#F3F3F0",
-        "#5E7D7E",
-        "#2FA1FF",
-        "#00EDED",
-        "#00EAE4",
-        "#304550",
-        "#EDEBEB",
-        "#7FBEFD",
-        "#003059",
-        "#A2F3F3",
-        "#E1D6E2",
-        "#B6B6B6",
-    ]
-    total_contribution_fig = make_subplots(
-        rows=1,
-        cols=2,
-        subplot_titles=["Spends", "Revenue"],
-        specs=[[{"type": "pie"}, {"type": "pie"}]],
-    )
-    channels_list = [
-        "Paid Search",
-        "Ga will cid baixo risco",
-        "Digital tactic others",
-        "Fb la tier 1",
-        "Fb la tier 2",
-        "Paid social others",
-        "Programmatic",
-        "Kwai",
-        "Indicacao",
-        "Infleux",
-        "Influencer",
-        "Non Media",
-    ]
     # Assign colors from the limited palette to channels
-    colors_map = {
-        col: color_palette[i % len(color_palette)]
-        for i, col in enumerate(channels_list)
-    }
-    colors_map["Non Media"] = color_palette[
-        5
-    ]  # Assign fixed green color for 'Non Media'
     # Hardcoded values for Spends and Revenue
     spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0]
@@ -758,13 +542,10 @@ def create_contribution_pie():
         go.Pie(
             labels=[channel_name for channel_name in channels_list],
             values=spends_values,
-            marker=dict(
-                colors=[colors_map[channel_name] for channel_name in channels_list]
-            ),
-            hole=0.3,
         ),
-        row=1,
-        col=1,
     )
     # Add trace for Revenue pie chart
@@ -772,196 +553,144 @@ def create_contribution_pie():
         go.Pie(
             labels=[channel_name for channel_name in channels_list],
             values=revenue_values,
-            marker=dict(
-                colors=[colors_map[channel_name] for channel_name in channels_list]
-            ),
-            hole=0.3,
         ),
-        row=1,
-        col=2,
-    )
-    total_contribution_fig.update_traces(
-        textposition="inside", texttemplate="%{percent:.1%}"
-    )
-    total_contribution_fig.update_layout(
-        uniformtext_minsize=12, title="Channel contribution", uniformtext_mode="hide"
     )
     return total_contribution_fig
 def create_contribuion_stacked_plot(scenario):
-    weekly_contribution_fig = make_subplots(
-        rows=1,
-        cols=2,
-        subplot_titles=["Spends", "Revenue"],
-        specs=[[{"type": "bar"}, {"type": "bar"}]],
-    )
-    raw_df = st.session_state["raw_df"]
-    df = raw_df.sort_values(by="Date")
     x = df.Date
     weekly_spends_data = []
     weekly_sales_data = []
-    for i, channel_name in enumerate(st.session_state["channels_list"]):
         color = color_palette[i % len(color_palette)]
-        weekly_spends_data.append(
-            go.Bar(
-                x=x,
-                y=scenario.channels[channel_name].actual_spends
-                * scenario.channels[channel_name].conversion_rate,
-                name=channel_name_formating(channel_name),
-                hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
-                legendgroup=channel_name,
-                marker_color=color,
-            )
-        )
-        weekly_sales_data.append(
-            go.Bar(
-                x=x,
-                y=scenario.channels[channel_name].actual_sales,
-                name=channel_name_formating(channel_name),
-                hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
-                legendgroup=channel_name,
-                showlegend=False,
-                marker_color=color,
-            )
-        )
     for _d in weekly_spends_data:
         weekly_contribution_fig.add_trace(_d, row=1, col=1)
     for _d in weekly_sales_data:
         weekly_contribution_fig.add_trace(_d, row=1, col=2)
-    weekly_contribution_fig.add_trace(
-        go.Bar(
-            x=x,
-            y=scenario.constant + scenario.correction,
-            name="Non Media",
-            hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
-            marker_color=color_palette[-1],
-        ),
-        row=1,
-        col=2,
-    )
-    weekly_contribution_fig.update_layout(
-        barmode="stack", title="Channel contribution by week", xaxis_title="Date"
-    )
     weekly_contribution_fig.update_xaxes(showgrid=False)
     weekly_contribution_fig.update_yaxes(showgrid=False)
     return weekly_contribution_fig
 def create_channel_spends_sales_plot(channel):
     if channel is not None:
         x = channel.dates
         _spends = channel.actual_spends * channel.conversion_rate
         _sales = channel.actual_sales
         channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
-        channel_sales_spends_fig.add_trace(
-            go.Bar(
-                x=x,
-                y=_sales,
-                marker_color=color_palette[
-                    3
-                ],  # You can choose a color from the palette
-                name="Revenue",
-                hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
-            ),
-            secondary_y=False,
-        )
-        channel_sales_spends_fig.add_trace(
-            go.Scatter(
-                x=x,
-                y=_spends,
-                line=dict(
-                    color=color_palette[2]
-                ),  # You can choose another color from the palette
-                name="Spends",
-                hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
-            ),
-            secondary_y=True,
-        )
-        channel_sales_spends_fig.update_layout(
-            xaxis_title="Date",
-            yaxis_title="Revenue",
-            yaxis2_title="Spends ($)",
-            title="Channel spends and Revenue week-wise",
-        )
         channel_sales_spends_fig.update_xaxes(showgrid=False)
         channel_sales_spends_fig.update_yaxes(showgrid=False)
     else:
-        raw_df = st.session_state["raw_df"]
-        df = raw_df.sort_values(by="Date")
         x = df.Date
-        scenario = class_from_dict(st.session_state["default_scenario_dict"])
         _sales = scenario.constant + scenario.correction
         channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
-        channel_sales_spends_fig.add_trace(
-            go.Bar(
-                x=x,
-                y=_sales,
-                marker_color=color_palette[
-                    0
-                ],  # You can choose a color from the palette
-                name="Revenue",
-                hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
-            ),
-            secondary_y=False,
-        )
-        channel_sales_spends_fig.update_layout(
-            xaxis_title="Date",
-            yaxis_title="Revenue",
-            yaxis2_title="Spends ($)",
-            title="Channel spends and Revenue week-wise",
-        )
         channel_sales_spends_fig.update_xaxes(showgrid=False)
         channel_sales_spends_fig.update_yaxes(showgrid=False)
     return channel_sales_spends_fig
-def format_numbers(value, n_decimals=1, include_indicator=True):
     if include_indicator:
-        return f"{CURRENCY_INDICATOR} {numerize(value,n_decimals)}"
     else:
-        return f"{numerize(value,n_decimals)}"
-def decimal_formater(num_string, n_decimals=1):
-    parts = num_string.split(".")
     if len(parts) == 1:
-        return num_string + "." + "0" * n_decimals
     else:
         to_be_padded = n_decimals - len(parts[-1])
-        if to_be_padded > 0:
-            return num_string + "0" * to_be_padded
         else:
             return num_string
 def channel_name_formating(channel_name):
-    name_mod = channel_name.replace("_", " ")
-    if name_mod.lower().endswith(" imp"):
-        name_mod = name_mod.replace("Imp", "Spend")
-    elif name_mod.lower().endswith(" clicks"):
-        name_mod = name_mod.replace("Clicks", "Spend")
     return name_mod
-def send_email(email, message):
-    s = smtplib.SMTP("smtp.gmail.com", 587)
     s.starttls()
     s.login("geethu4444@gmail.com", "jgydhpfusuremcol")
     s.sendmail("geethu4444@gmail.com", email, message)
     s.quit()
 if __name__ == "__main__":
     initialize_data()

 import plotly
 from pathlib import Path
 import pickle
+import streamlit_authenticator as stauth
 import yaml
 from yaml import SafeLoader
 from streamlit.components.v1 import html
 import base64
+color_palette = ['#F3F3F0', '#5E7D7E', '#2FA1FF', '#00EDED', '#00EAE4', '#304550', '#EDEBEB', '#7FBEFD', '#003059', '#A2F3F3', '#E1D6E2', '#B6B6B6']
+CURRENCY_INDICATOR = '$'
 def load_authenticator():
+    with open('config.yaml') as file:
         config = yaml.load(file, Loader=SafeLoader)
+        st.session_state['config'] = config
     authenticator = stauth.Authenticate(
+        config['credentials'],
+        config['cookie']['name'],
+        config['cookie']['key'],
+        config['cookie']['expiry_days'],
+        config['preauthorized']
     )
+    st.session_state['authenticator'] = authenticator
     return authenticator
 def nav_page(page_name, timeout_secs=3):
     nav_script = """
         <script type="text/javascript">
                 attempt_nav_page("%s", new Date(), %d);
             });
         </script>
+    """ % (page_name, timeout_secs)
     html(nav_script)
 file_.close()
+DATA_PATH = './data'
+IMAGES_PATH = './data/images_224_224'
 def load_local_css(file_name):
     with open(file_name) as f:
+        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
 # def set_header():
 file_1.close()
+DATA_PATH1 = './data'
+IMAGES_PATH1 = './data/images_224_224'
 def set_header():
+    return st.markdown(f"""<div class='main-header'>
                     <!-- <h1></h1> -->
                        <div >
                        <img class='blend-logo' src="data:image;base64,{data_url1}", alt="Logo">
                        </div>
                     <img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo">
+            </div>""", unsafe_allow_html=True)
 # def set_header():
 #     logo_path = "./path/to/your/local/LIME_logo.png"  # Replace with the actual file path
 #             </div>""", unsafe_allow_html=True)
+def s_curve(x,K,b,a,x0):
+    return K / (1 + b * np.exp(-a*(x-x0)))
+def initialize_data():
     # uopx_conv_rates = {'streaming_impressions' : 0.007,'digital_impressions' : 0.007,'search_clicks' : 0.00719,'tv_impressions' : 0.000173,
     #                    "digital_clicks":0.005,"streaming_clicks":0.004,'streaming_spends':1,"tv_spends":1,"search_spends":1,
     #                    "digital_spends":1}
+    #print('State initialized')
+    excel = pd.read_excel("Overview_data_test.xlsx",sheet_name=None)
+    raw_df = excel['RAW DATA MMM']
+    spend_df = excel['SPEND INPUT']
+    contri_df = excel['CONTRIBUTION MMM']
+    #Revenue_df = excel['Revenue']
+    ## remove sesonalities, indices etc ...
+    exclude_columns = ['Date',
+                       'Region',
+                       'Controls_Grammarly_Index_SeasonalAVG',
+                       'Controls_Quillbot_Index',
+                       'Daily_Positive_Outliers',
+                       'External_RemoteClass_Index',
+                       'Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802',
+                       'Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206',
+                       'Intervals ON 20201005-20201019',
+                       'Promotion_PercentOff',
+                       'Promotion_TimeBased',
+                       'Seasonality_Indicator_Chirstmas',
+                       'Seasonality_Indicator_NewYears_Days',
+                       'Seasonality_Indicator_Thanksgiving',
+                       'Trend 20200302 / 20200803',
+                  ]
+    raw_df['Date']=pd.to_datetime(raw_df['Date'])
+    contri_df['Date']=pd.to_datetime(contri_df['Date'])
+    input_df = raw_df.sort_values(by='Date')
+    output_df = contri_df.sort_values(by='Date')
+    spend_df['Week'] = pd.to_datetime(spend_df['Week'], format='%Y-%m-%d', errors='coerce')
+    spend_df.sort_values(by='Week', inplace=True)
     # spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce')
     # spend_df = spend_df.sort_values(by='Week')
     channel_list = [col for col in input_df.columns if col not in exclude_columns]
     response_curves = {}
     mapes = {}
     rmses = {}
     dates = input_df.Date.values
     actual_output_dic = {}
     actual_input_dic = {}
     for inp_col in channel_list:
+        #st.write(inp_col)
         spends = input_df[inp_col].values
         x = spends.copy()
+        # upper limit for penalty
+        upper_limits[inp_col] = 2*x.max()
         # contribution
         out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0]
         y = output_df[out_col].values.copy()
         actual_input_dic[inp_col] = x.copy()
         ##output cols aggregation
         output_cols.append(out_col)
         ## scale the input
+        power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
+        if power >= 0 :
             x = x / 10**power
+        x = x.astype('float64')
+        y = y.astype('float64')
+        #print('#printing yyyyyyyyy')
+        #print(inp_col)
+        #print(x.max())
+        #print(y.max())
+        bounds = ((0, 0, 0, 0), (3*y.max(), 1000, 1, x.max()))
+        #bounds = ((y.max(), 3*y.max()),(0,1000),(0,1),(0,x.max()))
+        params,_ = curve_fit(s_curve,x,y,p0=(2*y.max(),0.01,1e-5,x.max()),
+                                bounds=bounds,
+                                maxfev=int(1e5))
         mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean()
+        rmse =  np.sqrt(((y - s_curve(x,*params))**2).mean())
+        r2_ = r2_score(y, s_curve(x,*params))
+        response_curves[inp_col] = {'K' : params[0], 'b' : params[1], 'a' : params[2], 'x0' : params[3]}
         mapes[inp_col] = mape
         rmses[inp_col] = rmse
         r2[inp_col] = r2_
         powers[inp_col] = power
         ## conversion rates
+        spend_col = [_col for _col in spend_df.columns if _col.startswith(inp_col.rsplit('_',1)[0])][0]
+        #print('#printing spendssss')
+        #print(spend_col)
+        conv = (spend_df.set_index('Week')[spend_col] / input_df.set_index('Date')[inp_col].clip(lower=1)).reset_index()
+        conv.rename(columns={'index':'Week'},inplace=True)
+        conv['year'] = conv.Week.dt.year
+        conv_rates[inp_col] = list(conv.drop('Week',axis=1).mean().to_dict().values())[0]
         ##print('Before',conv_rates[inp_col])
         # conv_rates[inp_col] = uopx_conv_rates[inp_col]
         ##print('After',(conv_rates[inp_col]))
+        channel = Channel(name=inp_col,dates=dates,
+                            spends=spends,
+                            # conversion_rate = np.mean(list(conv_rates[inp_col].values())),
+                            conversion_rate = conv_rates[inp_col],
+                            response_curve_type='s-curve',
+                            response_curve_params={'K' : params[0], 'b' : params[1], 'a' : params[2], 'x0' : params[3]},
+                            bounds=np.array([-10,10]))
         channels[inp_col] = channel
         if sales is None:
             sales = channel.actual_sales
         else:
             sales += channel.actual_sales
+    other_contributions = output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only = True).values
+    correction = output_df.drop('Date',axis=1).sum(axis=1).values -  (sales + other_contributions)
+    scenario = Scenario(name='default', channels=channels, constant=other_contributions, correction = correction)
     ## setting session variables
+    st.session_state['initialized'] = True
+    st.session_state['actual_df'] = input_df
+    st.session_state['raw_df'] = raw_df
+    st.session_state['contri_df'] = output_df
     default_scenario_dict = class_to_dict(scenario)
+    st.session_state['default_scenario_dict'] = default_scenario_dict
+    st.session_state['scenario'] = scenario
+    st.session_state['channels_list'] = channel_list
+    st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
+    st.session_state['rcs'] = response_curves
+    st.session_state['powers'] = powers
+    st.session_state['actual_contribution_df'] = pd.DataFrame(actual_output_dic)
+    st.session_state['actual_input_df'] = pd.DataFrame(actual_input_dic)
     for channel in channels.values():
+        st.session_state[channel.name] = numerize(channel.actual_total_spends * channel.conversion_rate,1)
+    st.session_state['xlsx_buffer'] = io.BytesIO()
+    if Path('../saved_scenarios.pkl').exists():
+        with open('../saved_scenarios.pkl','rb') as f:
+            st.session_state['saved_scenarios'] = pickle.load(f)
     else:
+        st.session_state['saved_scenarios'] = OrderedDict()
+    st.session_state['total_spends_change'] = 0
+    st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
+    st.session_state['disable_download_button'] = True
 # def initialize_data():
 #     # fetch data from excel
 #     output = pd.read_excel('data.xlsx',sheet_name=None)
 #             channel_list.append(col)
 #         else:
 #             pass
 #     ## NOTE : Considered only Desktop spends for all calculations
 #     acutal_df = raw_df[raw_df.Region == 'Desktop'].copy()
 #     ## NOTE : Considered one year of data
 #     acutal_df = acutal_df[acutal_df.Date>'2020-12-31']
 #     actual_df = acutal_df.drop('Region',axis=1).sort_values(by='Date')[[*channel_list,'Date']]
 #     ##load response curves
 #     with open('./grammarly_response_curves.json','r') as f:
 #         response_curves = json.load(f)
 #     ## create channel dict for scenario creation
 #     dates = actual_df.Date.values
 #     channels = {}
 #                             response_curve_type=response_curve_type,
 #                             response_curve_params=response_curve_params,
 #                             bounds=np.array([-30,30]))
 #             channels[name] = channel
 #         else:
 #             constant = info_dict.get('value',0.) * len(dates)
 #     ## create scenario
 #     scenario = Scenario(name='default', channels=channels, constant=constant)
 #     default_scenario_dict = class_to_dict(scenario)
 #     ## setting session variables
 #     st.session_state['initialized'] = True
 #     for channel in channels.values():
 #         if channel.name not in st.session_state:
 #             st.session_state[channel.name] = float(channel.actual_total_spends)
 #     if 'xlsx_buffer' not in st.session_state:
 #         st.session_state['xlsx_buffer'] = io.BytesIO()
 #         if Path('../saved_scenarios.pkl').exists():
 #             with open('../saved_scenarios.pkl','rb') as f:
 #                 st.session_state['saved_scenarios'] = pickle.load(f)
 #         else:
 #             st.session_state['saved_scenarios'] = OrderedDict()
 #     if 'total_spends_change' not in st.session_state:
 #         st.session_state['total_spends_change'] = 0
 #     if 'optimization_channels' not in st.session_state:
 #         st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
 #     if 'disable_download_button' not in st.session_state:
 #         st.session_state['disable_download_button'] = True
 def create_channel_summary(scenario):
     # Provided data
     data = {
+        'Channel': ['Paid Search', 'Ga will cid baixo risco', 'Digital tactic others', 'Fb la tier 1', 'Fb la tier 2', 'Paid social others', 'Programmatic', 'Kwai', 'Indicacao', 'Infleux', 'Influencer'],
+        'Spends': ['$ 11.3K', '$ 155.2K', '$ 50.7K', '$ 125.4K', '$ 125.2K', '$ 105K', '$ 3.3M', '$ 47.5K', '$ 55.9K', '$ 632.3K', '$ 48.3K'],
+        'Revenue': ['558.0K', '3.5M', '5.2M', '3.1M', '3.1M', '2.1M', '20.8M', '1.6M', '728.4K', '22.9M', '4.8M']
     }
     # Create DataFrame
     df = pd.DataFrame(data)
     # Convert currency strings to numeric values
+    df['Spends'] = df['Spends'].replace({'\$': '', 'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
+    df['Revenue'] = df['Revenue'].replace({'\$': '', 'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
     # Calculate ROI
+    df['ROI'] = ((df['Revenue'] - df['Spends']) / df['Spends'])
     # Format columns
     format_currency = lambda x: f"${x:,.1f}"
     format_roi = lambda x: f"{x:.1f}"
+    df['Spends'] = ['$ 11.3K', '$ 155.2K', '$ 50.7K', '$ 125.4K', '$ 125.2K', '$ 105K', '$ 3.3M', '$ 47.5K', '$ 55.9K', '$ 632.3K', '$ 48.3K']
+    df['Revenue'] =  ['$ 536.3K', '$ 3.4M', '$ 5M', '$ 3M', '$ 3M', '$ 2M', '$ 20M', '$ 1.5M', '$ 7.1M', '$ 22M', '$ 4.6M']
+    df['ROI'] = df['ROI'].apply(format_roi)
     return df
+#@st.cache(allow_output_mutation=True)
 # def create_contribution_pie(scenario):
 #     #c1f7dc
 #     colors_map = {col:color for col,color in zip(st.session_state['channels_list'],plotly.colors.n_colors(plotly.colors.hex_to_rgb('#BE6468'), plotly.colors.hex_to_rgb('#E7B8B7'),23))}
 #     weekly_spends_data = []
 #     weekly_sales_data = []
 #     for channel_name in st.session_state['channels_list']:
+#         weekly_spends_data.append((go.Bar(x=x,
 #                                           y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
+#                                           name=channel_name_formating(channel_name),
 #                                           hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
 #                                           legendgroup=channel_name)))
+#         weekly_sales_data.append((go.Bar(x=x,
 #                                          y=scenario.channels[channel_name].actual_sales,
+#                                          name=channel_name_formating(channel_name),
 #                                          hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
 #                                          legendgroup=channel_name, showlegend=False)))
 #     for _d in weekly_spends_data:
 #         weekly_contribution_fig.add_trace(_d, row=1, col=1)
 #     for _d in weekly_sales_data:
 #         weekly_contribution_fig.add_trace(_d, row=1, col=2)
+#     weekly_contribution_fig.add_trace(go.Bar(x=x,
 #                                          y=scenario.constant + scenario.correction,
+#                                          name='Non Media',
 #                                          hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}"), row=1, col=2)
 #     weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribuion by week', xaxis_title='Date')
 #     weekly_contribution_fig.update_xaxes(showgrid=False)
 def create_contribution_pie():
+    color_palette = ['#F3F3F0', '#5E7D7E', '#2FA1FF', '#00EDED', '#00EAE4', '#304550', '#EDEBEB', '#7FBEFD', '#003059', '#A2F3F3', '#E1D6E2', '#B6B6B6']
+    total_contribution_fig = make_subplots(rows=1, cols=2, subplot_titles=['Spends', 'Revenue'], specs=[[{"type": "pie"}, {"type": "pie"}]])
+    channels_list = ['Paid Search', 'Ga will cid baixo risco', 'Digital tactic others', 'Fb la tier 1', 'Fb la tier 2', 'Paid social others', 'Programmatic', 'Kwai', 'Indicacao', 'Infleux', 'Influencer', 'Non Media']
     # Assign colors from the limited palette to channels
+    colors_map = {col: color_palette[i % len(color_palette)] for i, col in enumerate(channels_list)}
+    colors_map['Non Media'] = color_palette[5]  # Assign fixed green color for 'Non Media'
     # Hardcoded values for Spends and Revenue
     spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0]
         go.Pie(
             labels=[channel_name for channel_name in channels_list],
             values=spends_values,
+            marker=dict(colors=[colors_map[channel_name] for channel_name in channels_list]),
+            hole=0.3
         ),
+        row=1, col=1
     )
     # Add trace for Revenue pie chart
         go.Pie(
             labels=[channel_name for channel_name in channels_list],
             values=revenue_values,
+            marker=dict(colors=[colors_map[channel_name] for channel_name in channels_list]),
+            hole=0.3
         ),
+        row=1, col=2
     )
+    total_contribution_fig.update_traces(textposition='inside', texttemplate='%{percent:.1%}')
+    total_contribution_fig.update_layout(uniformtext_minsize=12, title='Channel contribution', uniformtext_mode='hide')
     return total_contribution_fig
 def create_contribuion_stacked_plot(scenario):
+    weekly_contribution_fig = make_subplots(rows=1, cols=2, subplot_titles=['Spends', 'Revenue'], specs=[[{"type": "bar"}, {"type": "bar"}]])
+    raw_df = st.session_state['raw_df']
+    df = raw_df.sort_values(by='Date')
     x = df.Date
     weekly_spends_data = []
     weekly_sales_data = []
+    for i, channel_name in enumerate(st.session_state['channels_list']):
         color = color_palette[i % len(color_palette)]
+        weekly_spends_data.append(go.Bar(
+            x=x,
+            y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
+            name=channel_name_formating(channel_name),
+            hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
+            legendgroup=channel_name,
+            marker_color=color,
+        ))
+        weekly_sales_data.append(go.Bar(
+            x=x,
+            y=scenario.channels[channel_name].actual_sales,
+            name=channel_name_formating(channel_name),
+            hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
+            legendgroup=channel_name,
+            showlegend=False,
+            marker_color=color,
+        ))
     for _d in weekly_spends_data:
         weekly_contribution_fig.add_trace(_d, row=1, col=1)
     for _d in weekly_sales_data:
         weekly_contribution_fig.add_trace(_d, row=1, col=2)
+    weekly_contribution_fig.add_trace(go.Bar(
+        x=x,
+        y=scenario.constant + scenario.correction,
+        name='Non Media',
+        hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
+        marker_color=color_palette[-1],
+    ), row=1, col=2)
+    weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribution by week', xaxis_title='Date')
     weekly_contribution_fig.update_xaxes(showgrid=False)
     weekly_contribution_fig.update_yaxes(showgrid=False)
     return weekly_contribution_fig
 def create_channel_spends_sales_plot(channel):
     if channel is not None:
         x = channel.dates
         _spends = channel.actual_spends * channel.conversion_rate
         _sales = channel.actual_sales
         channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
+        channel_sales_spends_fig.add_trace(go.Bar(
+            x=x,
+            y=_sales,
+            marker_color=color_palette[3],  # You can choose a color from the palette
+            name='Revenue',
+            hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
+        ), secondary_y=False)
+        channel_sales_spends_fig.add_trace(go.Scatter(
+            x=x,
+            y=_spends,
+            line=dict(color=color_palette[2]),  # You can choose another color from the palette
+            name='Spends',
+            hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
+        ), secondary_y=True)
+        channel_sales_spends_fig.update_layout(xaxis_title='Date', yaxis_title='Revenue', yaxis2_title='Spends ($)', title='Channel spends and Revenue week-wise')
         channel_sales_spends_fig.update_xaxes(showgrid=False)
         channel_sales_spends_fig.update_yaxes(showgrid=False)
     else:
+        raw_df = st.session_state['raw_df']
+        df = raw_df.sort_values(by='Date')
         x = df.Date
+        scenario = class_from_dict(st.session_state['default_scenario_dict'])
         _sales = scenario.constant + scenario.correction
         channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
+        channel_sales_spends_fig.add_trace(go.Bar(
+            x=x,
+            y=_sales,
+            marker_color=color_palette[0],  # You can choose a color from the palette
+            name='Revenue',
+            hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
+        ), secondary_y=False)
+        channel_sales_spends_fig.update_layout(xaxis_title='Date', yaxis_title='Revenue', yaxis2_title='Spends ($)', title='Channel spends and Revenue week-wise')
         channel_sales_spends_fig.update_xaxes(showgrid=False)
         channel_sales_spends_fig.update_yaxes(showgrid=False)
     return channel_sales_spends_fig
+def format_numbers(value, n_decimals=1,include_indicator = True):
     if include_indicator:
+        return f'{CURRENCY_INDICATOR} {numerize(value,n_decimals)}'
     else:
+        return f'{numerize(value,n_decimals)}'
+def decimal_formater(num_string,n_decimals=1):
+    parts = num_string.split('.')
     if len(parts) == 1:
+        return num_string+'.' + '0'*n_decimals
     else:
         to_be_padded = n_decimals - len(parts[-1])
+        if to_be_padded > 0 :
+            return num_string+'0'*to_be_padded
         else:
             return num_string
 def channel_name_formating(channel_name):
+    name_mod = channel_name.replace('_', ' ')
+    if name_mod.lower().endswith(' imp'):
+        name_mod = name_mod.replace('Imp','Spend')
+    elif name_mod.lower().endswith(' clicks'):
+        name_mod = name_mod.replace('Clicks','Spend')
     return name_mod
+def send_email(email,message):
+    s = smtplib.SMTP('smtp.gmail.com', 587)
     s.starttls()
     s.login("geethu4444@gmail.com", "jgydhpfusuremcol")
     s.sendmail("geethu4444@gmail.com", email, message)
     s.quit()
 if __name__ == "__main__":
     initialize_data()