Spaces:
Sleeping
Sleeping
Upload 28 files
Browse files- Data_Import.py +211 -846
- README.md +1 -1
- classes.py +106 -130
- upf_data_converted.csv +0 -0
- upf_data_converted.xlsx +2 -2
- utilities.py +263 -534
Data_Import.py
CHANGED
@@ -1,58 +1,79 @@
|
|
1 |
# Importing necessary libraries
|
2 |
import streamlit as st
|
|
|
3 |
|
4 |
st.set_page_config(
|
5 |
-
page_title="
|
6 |
page_icon=":shark:",
|
7 |
layout="wide",
|
8 |
initial_sidebar_state="collapsed",
|
9 |
)
|
10 |
|
11 |
-
import
|
|
|
12 |
import pandas as pd
|
13 |
from utilities import set_header, load_local_css
|
14 |
-
import streamlit_authenticator as stauth
|
15 |
-
import yaml
|
16 |
-
from yaml import SafeLoader
|
17 |
|
18 |
load_local_css("styles.css")
|
19 |
set_header()
|
20 |
|
21 |
|
22 |
for k, v in st.session_state.items():
|
23 |
-
if k not in [
|
24 |
-
"FormSubmitter"
|
25 |
-
):
|
26 |
st.session_state[k] = v
|
27 |
-
with open("config.yaml") as file:
|
28 |
-
config = yaml.load(file, Loader=SafeLoader)
|
29 |
-
st.session_state["config"] = config
|
30 |
-
authenticator = stauth.Authenticate(
|
31 |
-
config["credentials"],
|
32 |
-
config["cookie"]["name"],
|
33 |
-
config["cookie"]["key"],
|
34 |
-
config["cookie"]["expiry_days"],
|
35 |
-
config["preauthorized"],
|
36 |
-
)
|
37 |
-
st.session_state["authenticator"] = authenticator
|
38 |
-
name, authentication_status, username = authenticator.login("Login", "main")
|
39 |
-
auth_status = st.session_state.get("authentication_status")
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
44 |
|
|
|
|
|
45 |
if not is_state_initiaized:
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
50 |
|
51 |
-
|
|
|
52 |
def validate_date_column(df):
|
53 |
try:
|
54 |
# Attempt to convert the 'Date' column to datetime
|
55 |
-
df["
|
56 |
return True
|
57 |
except:
|
58 |
return False
|
@@ -70,786 +91,196 @@ if auth_status == True:
|
|
70 |
return "irregular"
|
71 |
|
72 |
|
73 |
-
# Function to
|
74 |
-
|
75 |
-
|
|
|
|
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
|
|
82 |
|
83 |
-
#
|
84 |
-
|
85 |
-
st.warning(
|
86 |
-
f"Duplicate File: {file_name}. This file will be skipped.",
|
87 |
-
icon="⚠️",
|
88 |
-
)
|
89 |
-
continue
|
90 |
-
|
91 |
-
# Read the file into a DataFrame
|
92 |
-
df = pd.read_excel(uploaded_file)
|
93 |
-
|
94 |
-
# Convert all column names to lowercase
|
95 |
-
df.columns = df.columns.str.lower().str.strip()
|
96 |
-
|
97 |
-
# Separate numeric and non-numeric columns
|
98 |
-
numeric_cols = list(df.select_dtypes(include=["number"]).columns)
|
99 |
-
non_numeric_cols = [
|
100 |
-
col
|
101 |
-
for col in df.select_dtypes(exclude=["number"]).columns
|
102 |
-
if col.lower() != "date"
|
103 |
-
]
|
104 |
-
|
105 |
-
# Check for 'Date' column
|
106 |
-
if not (validate_date_column(df) and len(numeric_cols) > 0):
|
107 |
-
st.warning(
|
108 |
-
f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column. This file will be skipped.",
|
109 |
-
icon="⚠️",
|
110 |
-
)
|
111 |
-
continue
|
112 |
-
|
113 |
-
# Check for interval
|
114 |
-
common_freq = common_freq = (
|
115 |
-
pd.Series(df["date"].unique()).diff().dt.days.dropna().mode()[0]
|
116 |
-
)
|
117 |
-
# Calculate the data interval (daily, weekly, monthly or irregular)
|
118 |
-
interval = determine_data_interval(common_freq)
|
119 |
-
if interval == "irregular":
|
120 |
-
st.warning(
|
121 |
-
f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval. This file will be skipped.",
|
122 |
-
icon="⚠️",
|
123 |
-
)
|
124 |
-
continue
|
125 |
-
|
126 |
-
# Store both DataFrames in the dictionary under their respective keys
|
127 |
-
df_dict[file_name] = {
|
128 |
-
"numeric": numeric_cols,
|
129 |
-
"non_numeric": non_numeric_cols,
|
130 |
-
"interval": interval,
|
131 |
-
"df": df,
|
132 |
-
}
|
133 |
-
|
134 |
-
return df_dict
|
135 |
-
|
136 |
-
|
137 |
-
# Function to adjust dataframe granularity
|
138 |
-
def adjust_dataframe_granularity(df, current_granularity, target_granularity):
|
139 |
-
# Set index
|
140 |
-
df.set_index("date", inplace=True)
|
141 |
-
|
142 |
-
# Define aggregation rules for resampling
|
143 |
-
aggregation_rules = {
|
144 |
-
col: "sum" if pd.api.types.is_numeric_dtype(df[col]) else "first"
|
145 |
-
for col in df.columns
|
146 |
-
}
|
147 |
-
|
148 |
-
# Initialize resampled_df
|
149 |
-
resampled_df = df
|
150 |
-
if current_granularity == "daily" and target_granularity == "weekly":
|
151 |
-
resampled_df = df.resample("W-MON", closed="left", label="left").agg(
|
152 |
-
aggregation_rules
|
153 |
-
)
|
154 |
-
|
155 |
-
elif current_granularity == "daily" and target_granularity == "monthly":
|
156 |
-
resampled_df = df.resample("MS", closed="left", label="left").agg(
|
157 |
-
aggregation_rules
|
158 |
-
)
|
159 |
-
|
160 |
-
elif current_granularity == "daily" and target_granularity == "daily":
|
161 |
-
resampled_df = df.resample("D").agg(aggregation_rules)
|
162 |
-
|
163 |
-
elif current_granularity in ["weekly", "monthly"] and target_granularity == "daily":
|
164 |
-
# For higher to lower granularity, distribute numeric and replicate non-numeric values equally across the new period
|
165 |
-
expanded_data = []
|
166 |
for _, row in df.iterrows():
|
167 |
-
if
|
168 |
-
|
169 |
-
elif
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
new_row =
|
176 |
for col in df.columns:
|
177 |
-
if
|
178 |
-
|
179 |
-
|
180 |
-
elif current_granularity == "monthly":
|
181 |
-
new_row[col] = row[col] / row.name.days_in_month
|
182 |
-
else:
|
183 |
-
new_row[col] = row[col]
|
184 |
-
expanded_data.append((date, new_row))
|
185 |
-
|
186 |
-
resampled_df = pd.DataFrame(
|
187 |
-
[data for _, data in expanded_data],
|
188 |
-
index=[date for date, _ in expanded_data],
|
189 |
-
)
|
190 |
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
193 |
|
194 |
-
|
|
|
|
|
|
|
195 |
|
|
|
196 |
|
197 |
-
# Function to clean and extract unique values of Panel_1 and Panel_2
|
198 |
-
st.cache_resource(show_spinner=False)
|
199 |
|
|
|
|
|
|
|
|
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
|
|
|
204 |
|
205 |
-
|
206 |
-
|
|
|
|
|
|
|
|
|
|
|
207 |
|
208 |
-
|
209 |
-
|
210 |
-
selected_panel2 = selections[file_name].get("Panel_2")
|
211 |
|
212 |
-
|
213 |
-
if (
|
214 |
-
selected_panel1
|
215 |
-
and selected_panel1 != "N/A"
|
216 |
-
and selected_panel1 in df.columns
|
217 |
-
):
|
218 |
-
df[selected_panel1] = (
|
219 |
-
df[selected_panel1].str.lower().str.strip().str.replace("_", " ")
|
220 |
-
)
|
221 |
-
all_panel1_values.update(df[selected_panel1].dropna().unique())
|
222 |
|
223 |
-
# Clean and standardize Panel_2 column if it exists and is selected
|
224 |
-
if (
|
225 |
-
selected_panel2
|
226 |
-
and selected_panel2 != "N/A"
|
227 |
-
and selected_panel2 in df.columns
|
228 |
-
):
|
229 |
-
df[selected_panel2] = (
|
230 |
-
df[selected_panel2].str.lower().str.strip().str.replace("_", " ")
|
231 |
-
)
|
232 |
-
all_panel2_values.update(df[selected_panel2].dropna().unique())
|
233 |
|
234 |
-
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
236 |
|
237 |
-
|
|
|
|
|
|
|
238 |
|
|
|
|
|
239 |
|
240 |
-
#
|
241 |
-
st.
|
242 |
|
|
|
|
|
|
|
|
|
243 |
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
elif formatted_list:
|
251 |
-
return formatted_list[0]
|
252 |
-
return "No values available"
|
253 |
|
|
|
254 |
|
255 |
-
#
|
256 |
-
|
257 |
|
|
|
|
|
|
|
|
|
258 |
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
|
|
|
|
262 |
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
|
267 |
-
|
|
|
|
|
268 |
|
|
|
|
|
269 |
|
270 |
-
|
271 |
-
|
272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
|
|
277 |
|
278 |
-
|
279 |
-
|
280 |
-
selected_panel2 = selections[file_name].get("Panel_2")
|
281 |
|
282 |
-
# Correcting the segment selection logic & handling 'N/A'
|
283 |
-
if selected_panel1 != "N/A" and selected_panel2 != "N/A":
|
284 |
-
unique_combinations = df[
|
285 |
-
[selected_panel1, selected_panel2]
|
286 |
-
].drop_duplicates()
|
287 |
-
elif selected_panel1 != "N/A":
|
288 |
-
unique_combinations = df[[selected_panel1]].drop_duplicates()
|
289 |
-
selected_panel2 = None # Ensure Panel_2 is ignored if N/A
|
290 |
-
elif selected_panel2 != "N/A":
|
291 |
-
unique_combinations = df[[selected_panel2]].drop_duplicates()
|
292 |
-
selected_panel1 = None # Ensure Panel_1 is ignored if N/A
|
293 |
else:
|
294 |
-
#
|
295 |
-
|
296 |
-
|
297 |
-
)
|
298 |
-
files_dict[file_name]["df"] = df
|
299 |
-
continue # Skip to the next file
|
300 |
-
|
301 |
-
transformed_segments = []
|
302 |
-
for _, combo in unique_combinations.iterrows():
|
303 |
-
if selected_panel1 and selected_panel2:
|
304 |
-
segment = df[
|
305 |
-
(df[selected_panel1] == combo[selected_panel1])
|
306 |
-
& (df[selected_panel2] == combo[selected_panel2])
|
307 |
-
]
|
308 |
-
elif selected_panel1:
|
309 |
-
segment = df[df[selected_panel1] == combo[selected_panel1]]
|
310 |
-
elif selected_panel2:
|
311 |
-
segment = df[df[selected_panel2] == combo[selected_panel2]]
|
312 |
-
|
313 |
-
# Adjust granularity of the segment
|
314 |
-
transformed_segment = adjust_dataframe_granularity(
|
315 |
-
segment, file_data["interval"], granularity_selection
|
316 |
)
|
317 |
-
transformed_segments.append(transformed_segment)
|
318 |
-
|
319 |
-
# Combine all transformed segments into a single DataFrame for this file
|
320 |
-
transformed_df = pd.concat(transformed_segments, ignore_index=True)
|
321 |
-
files_dict[file_name]["df"] = transformed_df
|
322 |
-
|
323 |
-
return files_dict
|
324 |
-
|
325 |
-
|
326 |
-
# Function to create main dataframe structure
|
327 |
-
st.cache_resource(show_spinner=False)
|
328 |
-
|
329 |
-
|
330 |
-
def create_main_dataframe(
|
331 |
-
files_dict, all_panel1_values, all_panel2_values, granularity_selection
|
332 |
-
):
|
333 |
-
# Determine the global start and end dates across all DataFrames
|
334 |
-
global_start = min(df["df"]["date"].min() for df in files_dict.values())
|
335 |
-
global_end = max(df["df"]["date"].max() for df in files_dict.values())
|
336 |
-
|
337 |
-
# Adjust the date_range generation based on the granularity_selection
|
338 |
-
if granularity_selection == "weekly":
|
339 |
-
# Generate a weekly range, with weeks starting on Monday
|
340 |
-
date_range = pd.date_range(start=global_start, end=global_end, freq="W-MON")
|
341 |
-
elif granularity_selection == "monthly":
|
342 |
-
# Generate a monthly range, starting from the first day of each month
|
343 |
-
date_range = pd.date_range(start=global_start, end=global_end, freq="MS")
|
344 |
-
else: # Default to daily if not weekly or monthly
|
345 |
-
date_range = pd.date_range(start=global_start, end=global_end, freq="D")
|
346 |
-
|
347 |
-
# Collect all unique Panel_1 and Panel_2 values, excluding 'N/A'
|
348 |
-
all_panel1s = all_panel1_values
|
349 |
-
all_panel2s = all_panel2_values
|
350 |
-
|
351 |
-
# Dynamically build the list of dimensions (Panel_1, Panel_2) to include in the main DataFrame based on availability
|
352 |
-
dimensions, merge_keys = [], []
|
353 |
-
if all_panel1s:
|
354 |
-
dimensions.append(all_panel1s)
|
355 |
-
merge_keys.append("Panel_1")
|
356 |
-
if all_panel2s:
|
357 |
-
dimensions.append(all_panel2s)
|
358 |
-
merge_keys.append("Panel_2")
|
359 |
-
|
360 |
-
dimensions.append(date_range) # Date range is always included
|
361 |
-
merge_keys.append("date") # Date range is always included
|
362 |
-
|
363 |
-
# Create a main DataFrame template with the dimensions
|
364 |
-
main_df = pd.MultiIndex.from_product(
|
365 |
-
dimensions,
|
366 |
-
names=[name for name, _ in zip(merge_keys, dimensions)],
|
367 |
-
).to_frame(index=False)
|
368 |
-
|
369 |
-
return main_df.reset_index(drop=True)
|
370 |
-
|
371 |
-
|
372 |
-
# Function to prepare and merge dataFrames
|
373 |
-
st.cache_resource(show_spinner=False)
|
374 |
-
|
375 |
-
|
376 |
-
def merge_into_main_df(main_df, files_dict, selections):
|
377 |
-
for file_name, file_data in files_dict.items():
|
378 |
-
df = file_data["df"].copy()
|
379 |
-
|
380 |
-
# Rename selected Panel_1 and Panel_2 columns if not 'N/A'
|
381 |
-
selected_panel1 = selections[file_name].get("Panel_1", "N/A")
|
382 |
-
selected_panel2 = selections[file_name].get("Panel_2", "N/A")
|
383 |
-
if selected_panel1 != "N/A":
|
384 |
-
df.rename(columns={selected_panel1: "Panel_1"}, inplace=True)
|
385 |
-
if selected_panel2 != "N/A":
|
386 |
-
df.rename(columns={selected_panel2: "Panel_2"}, inplace=True)
|
387 |
-
|
388 |
-
# Merge current DataFrame into main_df based on 'date', and where applicable, 'Panel_1' and 'Panel_2'
|
389 |
-
merge_keys = ["date"]
|
390 |
-
if "Panel_1" in df.columns:
|
391 |
-
merge_keys.append("Panel_1")
|
392 |
-
if "Panel_2" in df.columns:
|
393 |
-
merge_keys.append("Panel_2")
|
394 |
-
main_df = pd.merge(main_df, df, on=merge_keys, how="left")
|
395 |
-
|
396 |
-
# After all merges, sort by 'date' and reset index for cleanliness
|
397 |
-
sort_by = ["date"]
|
398 |
-
if "Panel_1" in main_df.columns:
|
399 |
-
sort_by.append("Panel_1")
|
400 |
-
if "Panel_2" in main_df.columns:
|
401 |
-
sort_by.append("Panel_2")
|
402 |
-
main_df.sort_values(by=sort_by, inplace=True)
|
403 |
-
main_df.reset_index(drop=True, inplace=True)
|
404 |
-
|
405 |
-
return main_df
|
406 |
-
|
407 |
-
|
408 |
-
# Function to categorize column
|
409 |
-
def categorize_column(column_name):
|
410 |
-
# Define keywords for each category
|
411 |
-
internal_keywords = [
|
412 |
-
"Price",
|
413 |
-
"Discount",
|
414 |
-
"product_price",
|
415 |
-
"cost",
|
416 |
-
"margin",
|
417 |
-
"inventory",
|
418 |
-
"sales",
|
419 |
-
"revenue",
|
420 |
-
"turnover",
|
421 |
-
"expense",
|
422 |
-
]
|
423 |
-
exogenous_keywords = [
|
424 |
-
"GDP",
|
425 |
-
"Tax",
|
426 |
-
"Inflation",
|
427 |
-
"interest_rate",
|
428 |
-
"employment_rate",
|
429 |
-
"exchange_rate",
|
430 |
-
"consumer_spending",
|
431 |
-
"retail_sales",
|
432 |
-
"oil_prices",
|
433 |
-
"weather",
|
434 |
-
]
|
435 |
-
|
436 |
-
# Check if the column name matches any of the keywords for Internal or Exogenous categories
|
437 |
-
for keyword in internal_keywords:
|
438 |
-
if keyword.lower() in column_name.lower():
|
439 |
-
return "Internal"
|
440 |
-
for keyword in exogenous_keywords:
|
441 |
-
if keyword.lower() in column_name.lower():
|
442 |
-
return "Exogenous"
|
443 |
-
|
444 |
-
# Default to Media if no match found
|
445 |
-
return "Media"
|
446 |
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
|
451 |
|
|
|
452 |
def prepare_missing_stats_df(df):
|
453 |
missing_stats = []
|
454 |
for column in df.columns:
|
455 |
if (
|
456 |
-
column == "
|
457 |
-
): # Skip Date
|
458 |
continue
|
459 |
|
460 |
missing = df[column].isnull().sum()
|
461 |
pct_missing = round((missing / len(df)) * 100, 2)
|
462 |
-
|
463 |
-
# Dynamically assign category based on column name
|
464 |
-
category = categorize_column(column)
|
465 |
-
# category = "Media" # Keep default bin as Media
|
466 |
-
|
467 |
missing_stats.append(
|
468 |
{
|
469 |
"Column": column,
|
470 |
"Missing Values": missing,
|
471 |
"Missing Percentage": pct_missing,
|
472 |
"Impute Method": "Fill with 0", # Default value
|
473 |
-
"Category":
|
474 |
}
|
475 |
)
|
476 |
stats_df = pd.DataFrame(missing_stats)
|
477 |
-
|
478 |
return stats_df
|
479 |
|
480 |
|
481 |
-
#
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
def add_api_dataframe_to_dict(main_df, files_dict):
|
486 |
-
files_dict["API"] = {
|
487 |
-
"numeric": list(main_df.select_dtypes(include=["number"]).columns),
|
488 |
-
"non_numeric": [
|
489 |
-
col
|
490 |
-
for col in main_df.select_dtypes(exclude=["number"]).columns
|
491 |
-
if col.lower() != "date"
|
492 |
-
],
|
493 |
-
"interval": determine_data_interval(
|
494 |
-
pd.Series(main_df["date"].unique()).diff().dt.days.dropna().mode()[0]
|
495 |
-
),
|
496 |
-
"df": main_df,
|
497 |
-
}
|
498 |
-
|
499 |
-
return files_dict
|
500 |
-
|
501 |
-
|
502 |
-
# Function to reads an API into a DataFrame, parsing specified columns as datetime
|
503 |
-
@st.cache_resource(show_spinner=False)
|
504 |
-
def read_API_data():
|
505 |
-
return pd.read_excel("upf_data_converted_randomized_resp_metrics.xlsx", parse_dates=["Date"])
|
506 |
-
|
507 |
-
|
508 |
-
# Function to set the 'Panel_1_Panel_2_Selected' session state variable to False
|
509 |
-
def set_Panel_1_Panel_2_Selected_false():
|
510 |
-
st.session_state["Panel_1_Panel_2_Selected"] = False
|
511 |
-
|
512 |
-
|
513 |
-
# Function to serialize and save the objects into a pickle file
|
514 |
-
@st.cache_resource(show_spinner=False)
|
515 |
-
def save_to_pickle(file_path, final_df, bin_dict):
|
516 |
-
# Open the file in write-binary mode and dump the objects
|
517 |
-
with open(file_path, "wb") as f:
|
518 |
-
pickle.dump({"final_df": final_df, "bin_dict": bin_dict}, f)
|
519 |
-
# Data is now saved to file
|
520 |
-
|
521 |
-
|
522 |
-
# Function to processes the merged_df DataFrame based on operations defined in edited_df
|
523 |
-
@st.cache_resource(show_spinner=False)
|
524 |
-
def process_dataframes(merged_df, edited_df, edited_stats_df):
|
525 |
-
# Ensure there are operations defined by the user
|
526 |
-
if edited_df.empty:
|
527 |
-
return merged_df, edited_stats_df # No operations to apply
|
528 |
-
|
529 |
-
# Perform operations as defined by the user
|
530 |
-
for index, row in edited_df.iterrows():
|
531 |
-
result_column_name = f"{row['Column 1']}{row['Operator']}{row['Column 2']}"
|
532 |
-
col1 = row["Column 1"]
|
533 |
-
col2 = row["Column 2"]
|
534 |
-
op = row["Operator"]
|
535 |
-
|
536 |
-
# Apply the specified operation
|
537 |
-
if op == "+":
|
538 |
-
merged_df[result_column_name] = merged_df[col1] + merged_df[col2]
|
539 |
-
elif op == "-":
|
540 |
-
merged_df[result_column_name] = merged_df[col1] - merged_df[col2]
|
541 |
-
elif op == "*":
|
542 |
-
merged_df[result_column_name] = merged_df[col1] * merged_df[col2]
|
543 |
-
elif op == "/":
|
544 |
-
merged_df[result_column_name] = merged_df[col1] / merged_df[col2].replace(
|
545 |
-
0, 1e-9
|
546 |
-
)
|
547 |
-
|
548 |
-
# Add summary of operation to edited_stats_df
|
549 |
-
new_row = {
|
550 |
-
"Column": result_column_name,
|
551 |
-
"Missing Values": None,
|
552 |
-
"Missing Percentage": None,
|
553 |
-
"Impute Method": None,
|
554 |
-
"Category": row["Category"],
|
555 |
-
}
|
556 |
-
new_row_df = pd.DataFrame([new_row])
|
557 |
-
|
558 |
-
# Use pd.concat to add the new_row_df to edited_stats_df
|
559 |
-
edited_stats_df = pd.concat(
|
560 |
-
[edited_stats_df, new_row_df], ignore_index=True, axis=0
|
561 |
-
)
|
562 |
-
|
563 |
-
# Combine column names from edited_df for cleanup
|
564 |
-
combined_columns = set(edited_df["Column 1"]).union(set(edited_df["Column 2"]))
|
565 |
-
|
566 |
-
# Filter out rows in edited_stats_df and drop columns from merged_df
|
567 |
-
edited_stats_df = edited_stats_df[~edited_stats_df["Column"].isin(combined_columns)]
|
568 |
-
merged_df.drop(columns=list(combined_columns), errors="ignore", inplace=True)
|
569 |
-
|
570 |
-
return merged_df, edited_stats_df
|
571 |
-
|
572 |
-
|
573 |
-
# Function to prepare a list of numeric column names and initialize an empty DataFrame with predefined structure
|
574 |
-
st.cache_resource(show_spinner=False)
|
575 |
-
|
576 |
-
|
577 |
-
def prepare_numeric_columns_and_default_df(merged_df, edited_stats_df):
|
578 |
-
# Get columns categorized as 'Response Metrics'
|
579 |
-
columns_response_metrics = edited_stats_df[
|
580 |
-
edited_stats_df["Category"] == "Response Metrics"
|
581 |
-
]["Column"].tolist()
|
582 |
-
|
583 |
-
# Filter numeric columns, excluding those categorized as 'Response Metrics'
|
584 |
-
numeric_columns = [
|
585 |
-
col
|
586 |
-
for col in merged_df.select_dtypes(include=["number"]).columns
|
587 |
-
if col not in columns_response_metrics
|
588 |
-
]
|
589 |
-
|
590 |
-
# Define the structure of the empty DataFrame
|
591 |
-
data = {
|
592 |
-
"Column 1": pd.Series([], dtype="str"),
|
593 |
-
"Operator": pd.Series([], dtype="str"),
|
594 |
-
"Column 2": pd.Series([], dtype="str"),
|
595 |
-
"Category": pd.Series([], dtype="str"),
|
596 |
-
}
|
597 |
-
default_df = pd.DataFrame(data)
|
598 |
-
|
599 |
-
return numeric_columns, default_df
|
600 |
-
|
601 |
-
|
602 |
-
# Initialize 'final_df' in session state
|
603 |
-
if "final_df" not in st.session_state:
|
604 |
-
st.session_state["final_df"] = pd.DataFrame()
|
605 |
-
|
606 |
-
# Initialize 'bin_dict' in session state
|
607 |
-
if "bin_dict" not in st.session_state:
|
608 |
-
st.session_state["bin_dict"] = {}
|
609 |
-
|
610 |
-
# Initialize 'Panel_1_Panel_2_Selected' in session state
|
611 |
-
if "Panel_1_Panel_2_Selected" not in st.session_state:
|
612 |
-
st.session_state["Panel_1_Panel_2_Selected"] = False
|
613 |
-
|
614 |
-
|
615 |
-
# Page Title
|
616 |
-
st.write("") # Top padding
|
617 |
-
st.title("Data Import")
|
618 |
-
|
619 |
-
|
620 |
-
#########################################################################################################################################################
|
621 |
-
# Create a dictionary to hold all DataFrames and collect user input to specify "Panel_2" and "Panel_1" columns for each file
|
622 |
-
#########################################################################################################################################################
|
623 |
-
|
624 |
-
|
625 |
-
# Read the Excel file, parsing 'Date' column as datetime
|
626 |
-
main_df = read_API_data()
|
627 |
-
|
628 |
-
# Convert all column names to lowercase
|
629 |
-
main_df.columns = main_df.columns.str.lower().str.strip()
|
630 |
-
|
631 |
-
# File uploader
|
632 |
-
uploaded_files = st.file_uploader(
|
633 |
-
"Upload additional data",
|
634 |
-
type=["xlsx"],
|
635 |
-
accept_multiple_files=True,
|
636 |
-
on_change=set_Panel_1_Panel_2_Selected_false,
|
637 |
-
)
|
638 |
-
|
639 |
-
# Custom HTML for upload instructions
|
640 |
-
recommendation_html = f"""
|
641 |
-
<div style="text-align: justify;">
|
642 |
-
<strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including panel, media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values.
|
643 |
-
</div>
|
644 |
-
"""
|
645 |
-
st.markdown(recommendation_html, unsafe_allow_html=True)
|
646 |
-
|
647 |
-
# Choose Desired Granularity
|
648 |
-
st.markdown("#### Choose Desired Granularity")
|
649 |
-
# Granularity Selection
|
650 |
-
granularity_selection = st.selectbox(
|
651 |
-
"Choose Date Granularity",
|
652 |
-
["Daily", "Weekly", "Monthly"],
|
653 |
-
label_visibility="collapsed",
|
654 |
-
on_change=set_Panel_1_Panel_2_Selected_false,
|
655 |
-
)
|
656 |
-
granularity_selection = str(granularity_selection).lower()
|
657 |
-
|
658 |
-
# Convert files to dataframes
|
659 |
-
files_dict = files_to_dataframes(uploaded_files)
|
660 |
-
|
661 |
-
# Add API Dataframe
|
662 |
-
if main_df is not None:
|
663 |
-
files_dict = add_api_dataframe_to_dict(main_df, files_dict)
|
664 |
-
|
665 |
-
# Display a warning message if no files have been uploaded and halt further execution
|
666 |
-
if not files_dict:
|
667 |
-
st.warning(
|
668 |
-
"Please upload at least one file to proceed.",
|
669 |
-
icon="⚠️",
|
670 |
-
)
|
671 |
-
st.stop() # Halts further execution until file is uploaded
|
672 |
-
|
673 |
-
|
674 |
-
# Select Panel_1 and Panel_2 columns
|
675 |
-
st.markdown("#### Select Panel columns")
|
676 |
-
selections = {}
|
677 |
-
with st.expander("Select Panel columns", expanded=False):
|
678 |
-
count = 0 # Initialize counter to manage the visibility of labels and keys
|
679 |
-
for file_name, file_data in files_dict.items():
|
680 |
-
# Determine visibility of the label based on the count
|
681 |
-
if count == 0:
|
682 |
-
label_visibility = "visible"
|
683 |
-
else:
|
684 |
-
label_visibility = "collapsed"
|
685 |
-
|
686 |
-
# Extract non-numeric columns
|
687 |
-
non_numeric_cols = file_data["non_numeric"]
|
688 |
-
|
689 |
-
# Prepare Panel_1 and Panel_2 values for dropdown, adding "N/A" as an option
|
690 |
-
panel1_values = non_numeric_cols + ["N/A"]
|
691 |
-
panel2_values = non_numeric_cols + ["N/A"]
|
692 |
-
|
693 |
-
# Skip if only one option is available
|
694 |
-
if len(panel1_values) == 1 and len(panel2_values) == 1:
|
695 |
-
selected_panel1, selected_panel2 = "N/A", "N/A"
|
696 |
-
# Update the selections for Panel_1 and Panel_2 for the current file
|
697 |
-
selections[file_name] = {
|
698 |
-
"Panel_1": selected_panel1,
|
699 |
-
"Panel_2": selected_panel2,
|
700 |
-
}
|
701 |
-
continue
|
702 |
-
|
703 |
-
# Create layout columns for File Name, Panel_2, and Panel_1 selections
|
704 |
-
file_name_col, Panel_1_col, Panel_2_col = st.columns([2, 4, 4])
|
705 |
-
|
706 |
-
with file_name_col:
|
707 |
-
# Display "File Name" label only for the first file
|
708 |
-
if count == 0:
|
709 |
-
st.write("File Name")
|
710 |
-
else:
|
711 |
-
st.write("")
|
712 |
-
st.write(file_name) # Display the file name
|
713 |
-
|
714 |
-
with Panel_1_col:
|
715 |
-
# Display a selectbox for Panel_1 values
|
716 |
-
selected_panel1 = st.selectbox(
|
717 |
-
"Select Panel Level 1",
|
718 |
-
panel2_values,
|
719 |
-
on_change=set_Panel_1_Panel_2_Selected_false,
|
720 |
-
label_visibility=label_visibility, # Control visibility of the label
|
721 |
-
key=f"Panel_1_selectbox{count}", # Ensure unique key for each selectbox
|
722 |
-
)
|
723 |
-
|
724 |
-
with Panel_2_col:
|
725 |
-
# Display a selectbox for Panel_2 values
|
726 |
-
selected_panel2 = st.selectbox(
|
727 |
-
"Select Panel Level 2",
|
728 |
-
panel1_values,
|
729 |
-
on_change=set_Panel_1_Panel_2_Selected_false,
|
730 |
-
label_visibility=label_visibility, # Control visibility of the label
|
731 |
-
key=f"Panel_2_selectbox{count}", # Ensure unique key for each selectbox
|
732 |
-
)
|
733 |
-
|
734 |
-
# Skip processing if the same column is selected for both Panel_1 and Panel_2 due to potential data integrity issues
|
735 |
-
if selected_panel2 == selected_panel1 and not (
|
736 |
-
selected_panel2 == "N/A" and selected_panel1 == "N/A"
|
737 |
-
):
|
738 |
-
st.warning(
|
739 |
-
f"File: {file_name} → The same column cannot serve as both Panel_1 and Panel_2. Please adjust your selections.",
|
740 |
-
)
|
741 |
-
selected_panel1, selected_panel2 = "N/A", "N/A"
|
742 |
-
st.stop()
|
743 |
-
|
744 |
-
# Update the selections for Panel_1 and Panel_2 for the current file
|
745 |
-
selections[file_name] = {
|
746 |
-
"Panel_1": selected_panel1,
|
747 |
-
"Panel_2": selected_panel2,
|
748 |
-
}
|
749 |
-
|
750 |
-
count += 1 # Increment the counter after processing each file
|
751 |
-
|
752 |
-
# Accept Panel_1 and Panel_2 selection
|
753 |
-
if st.button("Accept and Process", use_container_width=True):
|
754 |
-
|
755 |
-
# Normalize all data to a daily granularity. This initial standardization simplifies subsequent conversions to other levels of granularity
|
756 |
-
with st.spinner("Processing..."):
|
757 |
-
files_dict = standardize_data_to_daily(files_dict, selections)
|
758 |
-
|
759 |
-
# Convert all data to daily level granularity
|
760 |
-
files_dict = apply_granularity_to_all(
|
761 |
-
files_dict, granularity_selection, selections
|
762 |
-
)
|
763 |
-
|
764 |
-
# Update the 'files_dict' in the session state
|
765 |
-
st.session_state["files_dict"] = files_dict
|
766 |
-
|
767 |
-
# Set a flag in the session state to indicate that selection has been made
|
768 |
-
st.session_state["Panel_1_Panel_2_Selected"] = True
|
769 |
-
|
770 |
-
|
771 |
-
#########################################################################################################################################################
|
772 |
-
# Display unique Panel_1 and Panel_2 values
|
773 |
-
#########################################################################################################################################################
|
774 |
-
|
775 |
-
|
776 |
-
# Halts further execution until Panel_1 and Panel_2 columns are selected
|
777 |
-
if "files_dict" in st.session_state and st.session_state["Panel_1_Panel_2_Selected"]:
|
778 |
-
files_dict = st.session_state["files_dict"]
|
779 |
-
else:
|
780 |
-
st.stop()
|
781 |
-
|
782 |
-
# Set to store unique values of Panel_1 and Panel_2
|
783 |
-
with st.spinner("Fetching Panel values..."):
|
784 |
-
all_panel1_values, all_panel2_values = clean_and_extract_unique_values(
|
785 |
-
files_dict, selections
|
786 |
-
)
|
787 |
-
|
788 |
-
# List of Panel_1 and Panel_2 columns unique values
|
789 |
-
list_of_all_panel1_values = list(all_panel1_values)
|
790 |
-
list_of_all_panel2_values = list(all_panel2_values)
|
791 |
-
|
792 |
-
# Format Panel_1 and Panel_2 values for display
|
793 |
-
formatted_panel1_values = format_values_for_display(list_of_all_panel1_values)
|
794 |
-
formatted_panel2_values = format_values_for_display(list_of_all_panel2_values)
|
795 |
-
|
796 |
-
# Unique Panel_1 and Panel_2 values
|
797 |
-
st.markdown("#### Unique Panel values")
|
798 |
-
# Display Panel_1 and Panel_2 values
|
799 |
-
with st.expander("Unique Panel values"):
|
800 |
-
st.write("")
|
801 |
-
st.markdown(
|
802 |
-
f"""
|
803 |
-
<style>
|
804 |
-
.justify-text {{
|
805 |
-
text-align: justify;
|
806 |
-
}}
|
807 |
-
</style>
|
808 |
-
<div class="justify-text">
|
809 |
-
<strong>Panel Level 1 Values:</strong> {formatted_panel1_values}<br>
|
810 |
-
<strong>Panel Level 2 Values:</strong> {formatted_panel2_values}
|
811 |
-
</div>
|
812 |
-
""",
|
813 |
-
unsafe_allow_html=True,
|
814 |
-
)
|
815 |
-
|
816 |
-
# Display total Panel_1 and Panel_2
|
817 |
-
st.write("")
|
818 |
-
st.markdown(
|
819 |
-
f"""
|
820 |
-
<div style="text-align: justify;">
|
821 |
-
<strong>Number of Level 1 Panels detected:</strong> {len(list_of_all_panel1_values)}<br>
|
822 |
-
<strong>Number of Level 2 Panels detected:</strong> {len(list_of_all_panel2_values)}
|
823 |
-
</div>
|
824 |
-
""",
|
825 |
-
unsafe_allow_html=True,
|
826 |
-
)
|
827 |
-
st.write("")
|
828 |
-
|
829 |
-
|
830 |
-
#########################################################################################################################################################
|
831 |
-
# Merge all DataFrames
|
832 |
-
#########################################################################################################################################################
|
833 |
-
|
834 |
-
|
835 |
-
# Merge all DataFrames selected
|
836 |
-
main_df = create_main_dataframe(
|
837 |
-
files_dict, all_panel1_values, all_panel2_values, granularity_selection
|
838 |
-
)
|
839 |
-
merged_df = merge_into_main_df(main_df, files_dict, selections)
|
840 |
-
|
841 |
-
|
842 |
-
#########################################################################################################################################################
|
843 |
-
# Categorize Variables and Impute Missing Values
|
844 |
-
#########################################################################################################################################################
|
845 |
-
|
846 |
|
847 |
# Create an editable DataFrame in Streamlit
|
848 |
st.markdown("#### Select Variables Category & Impute Missing Values")
|
849 |
|
850 |
-
# Prepare missing stats DataFrame for editing
|
851 |
-
missing_stats_df = prepare_missing_stats_df(merged_df)
|
852 |
-
|
853 |
edited_stats_df = st.data_editor(
|
854 |
missing_stats_df,
|
855 |
column_config={
|
@@ -865,10 +296,12 @@ if auth_status == True:
|
|
865 |
),
|
866 |
"Category": st.column_config.SelectboxColumn(
|
867 |
options=[
|
|
|
868 |
"Media",
|
869 |
"Exogenous",
|
870 |
"Internal",
|
871 |
-
"
|
|
|
872 |
],
|
873 |
required=True,
|
874 |
default="Media",
|
@@ -879,84 +312,31 @@ if auth_status == True:
|
|
879 |
use_container_width=True,
|
880 |
)
|
881 |
|
|
|
882 |
# Apply changes based on edited DataFrame
|
883 |
for i, row in edited_stats_df.iterrows():
|
884 |
column = row["Column"]
|
885 |
if row["Impute Method"] == "Drop Column":
|
886 |
-
|
887 |
|
888 |
elif row["Impute Method"] == "Fill with Mean":
|
889 |
-
|
890 |
|
891 |
elif row["Impute Method"] == "Fill with Median":
|
892 |
-
|
893 |
|
894 |
elif row["Impute Method"] == "Fill with 0":
|
895 |
-
|
896 |
-
|
897 |
-
|
898 |
-
#########################################################################################################################################################
|
899 |
-
# Group columns
|
900 |
-
#########################################################################################################################################################
|
901 |
|
902 |
|
903 |
-
#
|
904 |
-
|
905 |
|
906 |
-
#
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
911 |
-
# Display editable Dataframe
|
912 |
-
edited_df = st.data_editor(
|
913 |
-
default_df,
|
914 |
-
column_config={
|
915 |
-
"Column 1": st.column_config.SelectboxColumn(
|
916 |
-
options=numeric_columns,
|
917 |
-
required=True,
|
918 |
-
default=numeric_columns[0],
|
919 |
-
width=400,
|
920 |
-
),
|
921 |
-
"Operator": st.column_config.SelectboxColumn(
|
922 |
-
options=["+", "-", "*", "/"],
|
923 |
-
required=True,
|
924 |
-
default="+",
|
925 |
-
width=100,
|
926 |
-
),
|
927 |
-
"Column 2": st.column_config.SelectboxColumn(
|
928 |
-
options=numeric_columns,
|
929 |
-
required=True,
|
930 |
-
default=numeric_columns[0],
|
931 |
-
width=400,
|
932 |
-
),
|
933 |
-
"Category": st.column_config.SelectboxColumn(
|
934 |
-
options=[
|
935 |
-
"Media",
|
936 |
-
"Exogenous",
|
937 |
-
"Internal",
|
938 |
-
"Response Metrics",
|
939 |
-
],
|
940 |
-
required=True,
|
941 |
-
default="Media",
|
942 |
-
width=200,
|
943 |
-
),
|
944 |
-
},
|
945 |
-
num_rows="dynamic",
|
946 |
-
)
|
947 |
-
|
948 |
-
# Process the DataFrame based on user inputs and operations specified in edited_df
|
949 |
-
final_df, edited_stats_df = process_dataframes(merged_df, edited_df, edited_stats_df)
|
950 |
-
|
951 |
-
|
952 |
-
#########################################################################################################################################################
|
953 |
-
# Display the Final DataFrame and variables
|
954 |
-
#########################################################################################################################################################
|
955 |
-
|
956 |
-
|
957 |
-
# Display the Final DataFrame and variables
|
958 |
-
st.markdown("#### Final DataFrame")
|
959 |
-
st.dataframe(final_df, hide_index=True)
|
960 |
|
961 |
# Initialize an empty dictionary to hold categories and their variables
|
962 |
category_dict = {}
|
@@ -974,15 +354,8 @@ if auth_status == True:
|
|
974 |
# If it exists, append the current column to the list of variables under this category
|
975 |
category_dict[category].append(column)
|
976 |
|
977 |
-
# Add Date, Panel_1 and Panel_12 in category dictionary
|
978 |
-
category_dict.update({"Date": ["date"]})
|
979 |
-
if "Panel_1" in final_df.columns:
|
980 |
-
category_dict["Panel Level 1"] = ["Panel_1"]
|
981 |
-
if "Panel_2" in final_df.columns:
|
982 |
-
category_dict["Panel Level 2"] = ["Panel_2"]
|
983 |
-
|
984 |
# Display the dictionary
|
985 |
-
st.markdown("#### Variable Category")
|
986 |
for category, variables in category_dict.items():
|
987 |
# Check if there are multiple variables to handle "and" insertion correctly
|
988 |
if len(variables) > 1:
|
@@ -993,27 +366,19 @@ if auth_status == True:
|
|
993 |
variables_str = variables[0]
|
994 |
|
995 |
# Display the category and its variables in the desired format
|
996 |
-
st.markdown(
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
st.
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
1006 |
-
|
1007 |
-
|
1008 |
-
|
1009 |
-
|
1010 |
-
|
1011 |
-
|
1012 |
-
st.session_state["final_df"], st.session_state["bin_dict"] = final_df, category_dict
|
1013 |
-
|
1014 |
-
# Save the DataFrame and dictionary from the session state to the pickle file
|
1015 |
-
if st.button("Accept and Save", use_container_width=True):
|
1016 |
-
save_to_pickle(
|
1017 |
-
"data_import.pkl", st.session_state["final_df"], st.session_state["bin_dict"]
|
1018 |
-
)
|
1019 |
-
st.toast("💾 Saved Successfully!")
|
|
|
1 |
# Importing necessary libraries
|
2 |
import streamlit as st
|
3 |
+
import pickle
|
4 |
|
5 |
st.set_page_config(
|
6 |
+
page_title="Model Build",
|
7 |
page_icon=":shark:",
|
8 |
layout="wide",
|
9 |
initial_sidebar_state="collapsed",
|
10 |
)
|
11 |
|
12 |
+
from utilities import load_authenticator
|
13 |
+
import numpy as np
|
14 |
import pandas as pd
|
15 |
from utilities import set_header, load_local_css
|
|
|
|
|
|
|
16 |
|
17 |
load_local_css("styles.css")
|
18 |
set_header()
|
19 |
|
20 |
|
21 |
for k, v in st.session_state.items():
|
22 |
+
if k not in ['logout', 'login','config'] and not k.startswith('FormSubmitter'):
|
|
|
|
|
23 |
st.session_state[k] = v
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
authenticator = st.session_state.get('authenticator')
|
26 |
+
if authenticator is None:
|
27 |
+
authenticator = load_authenticator()
|
28 |
+
|
29 |
+
name, authentication_status, username = authenticator.login('Login', 'main')
|
30 |
+
auth_status = st.session_state.get('authentication_status')
|
31 |
|
32 |
+
if auth_status == True:
|
33 |
+
is_state_initiaized = st.session_state.get('initialized',False)
|
34 |
if not is_state_initiaized:
|
35 |
+
a=1
|
36 |
+
|
37 |
+
|
38 |
+
# Function to expand dataframe to daily
|
39 |
+
@st.cache_resource(show_spinner=False)
|
40 |
+
def expand_to_daily(df, granularity, start_date, end_date):
|
41 |
+
# Create a new DataFrame with a row for each day
|
42 |
+
all_dates = pd.date_range(start=start_date, end=end_date, freq="D")
|
43 |
+
daily_df = pd.DataFrame(all_dates, columns=["Date"])
|
44 |
+
|
45 |
+
if granularity == "daily":
|
46 |
+
# For daily data, simply merge to fill missing dates
|
47 |
+
daily_df = daily_df.merge(df, on="Date", how="left")
|
48 |
+
else:
|
49 |
+
# For weekly or monthly, distribute values to daily rows
|
50 |
+
for column in df.columns:
|
51 |
+
if column != "Date": # Skip 'Date' column
|
52 |
+
daily_df[column] = np.nan # Initialize with NaNs
|
53 |
+
|
54 |
+
# Group by the required frequency and distribute values
|
55 |
+
freq = "W-MON" if granularity == "weekly" else "MS"
|
56 |
+
for _, group in df.groupby(pd.Grouper(key="Date", freq=freq)):
|
57 |
+
num_days = len(
|
58 |
+
pd.date_range(group["Date"].min(), group["Date"].max(), freq="D")
|
59 |
+
)
|
60 |
+
for column in group.columns:
|
61 |
+
if column == "Date": # Skip 'Date' column
|
62 |
+
continue
|
63 |
+
value = group[column].sum() / num_days
|
64 |
+
date_range = pd.date_range(
|
65 |
+
group["Date"].min(), periods=num_days, freq="D"
|
66 |
+
)
|
67 |
+
daily_df.loc[daily_df["Date"].isin(date_range), column] = value
|
68 |
|
69 |
+
return daily_df
|
70 |
|
71 |
+
|
72 |
+
# Function to validate date column in dataframe
|
73 |
def validate_date_column(df):
|
74 |
try:
|
75 |
# Attempt to convert the 'Date' column to datetime
|
76 |
+
df["Date"] = pd.to_datetime(df["Date"], format="%d-%m-%Y")
|
77 |
return True
|
78 |
except:
|
79 |
return False
|
|
|
91 |
return "irregular"
|
92 |
|
93 |
|
94 |
+
# Function to convert and fill dates in dataframe
|
95 |
+
def convert_and_fill_dates(df, start_date, end_date, interval):
|
96 |
+
# Create a date range for the desired period
|
97 |
+
all_dates = pd.date_range(start=start_date, end=end_date, freq="D")
|
98 |
+
new_df = pd.DataFrame(all_dates, columns=["Date"])
|
99 |
|
100 |
+
# Preprocess and aggregate data based on the original interval
|
101 |
+
if interval != "daily":
|
102 |
+
# Resample to start of each week/month, then sum values for the same period
|
103 |
+
if interval == "weekly":
|
104 |
+
df = df.resample("W-MON", on="Date").sum().reset_index()
|
105 |
+
elif interval == "monthly":
|
106 |
+
df = df.resample("MS", on="Date").sum().reset_index()
|
107 |
|
108 |
+
# Distribute values equally across the days in each week/month
|
109 |
+
expanded_rows = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
for _, row in df.iterrows():
|
111 |
+
if interval == "weekly":
|
112 |
+
period_dates = pd.date_range(row["Date"], periods=7)
|
113 |
+
elif interval == "monthly":
|
114 |
+
period_end = row["Date"] + pd.offsets.MonthEnd(1)
|
115 |
+
period_dates = pd.date_range(row["Date"], period_end)
|
116 |
+
|
117 |
+
for date in period_dates:
|
118 |
+
new_row = row.copy()
|
119 |
+
new_row["Date"] = date
|
120 |
for col in df.columns:
|
121 |
+
if col != "Date": # Skip 'Date' column
|
122 |
+
new_row[col] = row[col] / len(period_dates)
|
123 |
+
expanded_rows.append(new_row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
+
# Create a DataFrame from expanded rows
|
126 |
+
expanded_df = pd.DataFrame(expanded_rows)
|
127 |
+
new_df = pd.merge(new_df, expanded_df, how="left", on="Date")
|
128 |
+
else:
|
129 |
+
# Daily data, aggregate if there are multiple entries for the same day
|
130 |
+
df = df.groupby("Date").sum().reset_index()
|
131 |
+
new_df = pd.merge(new_df, df, how="left", on="Date")
|
132 |
|
133 |
+
# Ensure all dates from start to end are present, filling missing values with NaN
|
134 |
+
new_df["Date"] = pd.to_datetime(new_df["Date"]) # Ensure 'Date' is datetime type
|
135 |
+
new_df = new_df.set_index("Date").reindex(all_dates).reset_index()
|
136 |
+
new_df.rename(columns={"index": "Date"}, inplace=True)
|
137 |
|
138 |
+
return new_df
|
139 |
|
|
|
|
|
140 |
|
141 |
+
# Function to convert a DataFrame from daily level granularity to either weekly or monthly level
|
142 |
+
def convert_to_higher_granularity(df, required_granularity):
|
143 |
+
if required_granularity == "daily":
|
144 |
+
return df
|
145 |
|
146 |
+
# Ensure 'Date' is the index and is in datetime format
|
147 |
+
if not pd.api.types.is_datetime64_any_dtype(df["Date"]):
|
148 |
+
df["Date"] = pd.to_datetime(df["Date"])
|
149 |
+
df.set_index("Date", inplace=True)
|
150 |
|
151 |
+
# Resample and aggregate
|
152 |
+
if required_granularity == "weekly":
|
153 |
+
# Resample to weekly, using 'W-MON' to indicate weeks starting on Monday
|
154 |
+
df = df.resample("W-MON").sum()
|
155 |
+
elif required_granularity == "monthly":
|
156 |
+
# Resample to monthly, using 'MS' to indicate month start
|
157 |
+
df = df.resample("MS").sum()
|
158 |
|
159 |
+
# Reset index to move 'Date' back to a column
|
160 |
+
df.reset_index(inplace=True)
|
|
|
161 |
|
162 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
+
# # Read the CSV file, parsing 'Date' column as datetime
|
166 |
+
main_df = pd.read_csv("Media_data_for_model_dma_level.csv", dayfirst=True, parse_dates=["Date"])
|
167 |
+
# st.write(main_df)
|
168 |
+
|
169 |
+
# Get the start date (minimum) and end date (maximum) from the 'Date' column
|
170 |
+
api_start_date = main_df["Date"].min()
|
171 |
+
api_end_date = main_df["Date"].max()
|
172 |
|
173 |
+
# Infer the granularity from the most common difference between consecutive dates
|
174 |
+
date_diffs = main_df["Date"].diff().dt.days.dropna()
|
175 |
+
common_diff = date_diffs.mode()[0]
|
176 |
+
api_granularity = determine_data_interval(common_diff)
|
177 |
|
178 |
+
# Convert the DataFrame to daily level granularity
|
179 |
+
main_df = expand_to_daily(main_df, api_granularity, api_start_date, api_end_date)
|
180 |
|
181 |
+
# Page Title
|
182 |
+
st.title("Data Import")
|
183 |
|
184 |
+
# File uploader
|
185 |
+
uploaded_files = st.file_uploader(
|
186 |
+
"Upload additional data", type=["xlsx"], accept_multiple_files=True
|
187 |
+
)
|
188 |
|
189 |
+
# Custom HTML for upload instructions
|
190 |
+
recommendation_html = f"""
|
191 |
+
<div style="text-align: justify;">
|
192 |
+
<strong>Recommendation:</strong> For optimal processing, please ensure that all uploaded datasets including media, internal, and exogenous data adhere to the following guidelines: Each dataset must include a <code>Date</code> column formatted as <code>DD-MM-YYYY</code>, be free of missing values, and aggregated to a {api_granularity} level.
|
193 |
+
</div>
|
194 |
+
"""
|
|
|
|
|
|
|
195 |
|
196 |
+
st.markdown(recommendation_html, unsafe_allow_html=True)
|
197 |
|
198 |
+
# Initialize a list to collect all processed DataFrames
|
199 |
+
all_data_dfs = []
|
200 |
|
201 |
+
if uploaded_files:
|
202 |
+
for uploaded_file in uploaded_files:
|
203 |
+
# Extract the file name
|
204 |
+
file_name = uploaded_file.name
|
205 |
|
206 |
+
# Load the file into a DataFrame
|
207 |
+
data_df = pd.read_excel(
|
208 |
+
uploaded_file,
|
209 |
+
)
|
210 |
+
|
211 |
+
# Identify numeric columns in the DataFrame
|
212 |
+
numeric_columns = data_df.select_dtypes(include="number").columns.tolist()
|
213 |
|
214 |
+
# Validate the 'Date' column and ensure there's at least one numeric column
|
215 |
+
if validate_date_column(data_df) and len(numeric_columns) > 0:
|
216 |
+
data_df = data_df[["Date"] + numeric_columns]
|
217 |
|
218 |
+
# Ensure the 'Date' column is in datetime format and sorted
|
219 |
+
data_df["Date"] = pd.to_datetime(data_df["Date"], dayfirst=True)
|
220 |
+
data_df.sort_values("Date", inplace=True)
|
221 |
|
222 |
+
# Calculate the most common day difference between dates to determine frequency
|
223 |
+
common_freq = data_df["Date"].diff().dt.days.dropna().mode()[0]
|
224 |
|
225 |
+
# Calculate the data interval (daily, weekly, monthly or irregular)
|
226 |
+
interval = determine_data_interval(common_freq)
|
227 |
|
228 |
+
if interval == "irregular":
|
229 |
+
# Warn the user if the 'Date' column doesn't meet the format requirements
|
230 |
+
st.warning(
|
231 |
+
f"File Name: {file_name} ➜ Please upload data in daily, weekly or monthly interval."
|
232 |
+
)
|
233 |
+
continue
|
234 |
|
235 |
+
# Convert data to specified interval and redistribute to daily
|
236 |
+
data_df = convert_and_fill_dates(
|
237 |
+
data_df, api_start_date, api_end_date, interval
|
238 |
+
)
|
239 |
|
240 |
+
# Add the processed DataFrame to the list
|
241 |
+
all_data_dfs.append(data_df)
|
|
|
242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
243 |
else:
|
244 |
+
# Warn the user if the 'Date' column doesn't meet the format requirements
|
245 |
+
st.warning(
|
246 |
+
f"File Name: {file_name} ➜ Please upload data with Date column in 'DD-MM-YYYY' format and at least one media/exogenous column."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
# Sequentially merge each of the other DataFrames with the main DataFrame on 'Date'
|
250 |
+
for df in all_data_dfs:
|
251 |
+
main_df = pd.merge(main_df, df, on="Date", how="left")
|
252 |
|
253 |
|
254 |
+
# Function to calculate missing stats and prepare for editable DataFrame
|
255 |
def prepare_missing_stats_df(df):
|
256 |
missing_stats = []
|
257 |
for column in df.columns:
|
258 |
if (
|
259 |
+
column == "Date" or column == "Total Approved Accounts - Revenue"
|
260 |
+
): # Skip Date and Revenue column
|
261 |
continue
|
262 |
|
263 |
missing = df[column].isnull().sum()
|
264 |
pct_missing = round((missing / len(df)) * 100, 2)
|
|
|
|
|
|
|
|
|
|
|
265 |
missing_stats.append(
|
266 |
{
|
267 |
"Column": column,
|
268 |
"Missing Values": missing,
|
269 |
"Missing Percentage": pct_missing,
|
270 |
"Impute Method": "Fill with 0", # Default value
|
271 |
+
"Category": "Media", # Default value
|
272 |
}
|
273 |
)
|
274 |
stats_df = pd.DataFrame(missing_stats)
|
|
|
275 |
return stats_df
|
276 |
|
277 |
|
278 |
+
# Prepare missing stats DataFrame for editing
|
279 |
+
missing_stats_df = prepare_missing_stats_df(main_df)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
280 |
|
281 |
# Create an editable DataFrame in Streamlit
|
282 |
st.markdown("#### Select Variables Category & Impute Missing Values")
|
283 |
|
|
|
|
|
|
|
284 |
edited_stats_df = st.data_editor(
|
285 |
missing_stats_df,
|
286 |
column_config={
|
|
|
296 |
),
|
297 |
"Category": st.column_config.SelectboxColumn(
|
298 |
options=[
|
299 |
+
"Date",
|
300 |
"Media",
|
301 |
"Exogenous",
|
302 |
"Internal",
|
303 |
+
"DMA/Panel",
|
304 |
+
"Response_Metric"
|
305 |
],
|
306 |
required=True,
|
307 |
default="Media",
|
|
|
312 |
use_container_width=True,
|
313 |
)
|
314 |
|
315 |
+
|
316 |
# Apply changes based on edited DataFrame
|
317 |
for i, row in edited_stats_df.iterrows():
|
318 |
column = row["Column"]
|
319 |
if row["Impute Method"] == "Drop Column":
|
320 |
+
main_df.drop(columns=[column], inplace=True)
|
321 |
|
322 |
elif row["Impute Method"] == "Fill with Mean":
|
323 |
+
main_df[column].fillna(main_df[column].mean(), inplace=True)
|
324 |
|
325 |
elif row["Impute Method"] == "Fill with Median":
|
326 |
+
main_df[column].fillna(main_df[column].median(), inplace=True)
|
327 |
|
328 |
elif row["Impute Method"] == "Fill with 0":
|
329 |
+
main_df[column].fillna(0, inplace=True)
|
|
|
|
|
|
|
|
|
|
|
330 |
|
331 |
|
332 |
+
# Convert the Final DataFrame to required granularity
|
333 |
+
main_df = convert_to_higher_granularity(main_df, api_granularity)
|
334 |
|
335 |
+
# Display the Final DataFrame and exogenous variables
|
336 |
+
st.markdown("#### Final DataFrame:")
|
337 |
+
st.dataframe(main_df)
|
338 |
+
|
339 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
340 |
|
341 |
# Initialize an empty dictionary to hold categories and their variables
|
342 |
category_dict = {}
|
|
|
354 |
# If it exists, append the current column to the list of variables under this category
|
355 |
category_dict[category].append(column)
|
356 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
357 |
# Display the dictionary
|
358 |
+
st.markdown("#### Variable Category:")
|
359 |
for category, variables in category_dict.items():
|
360 |
# Check if there are multiple variables to handle "and" insertion correctly
|
361 |
if len(variables) > 1:
|
|
|
366 |
variables_str = variables[0]
|
367 |
|
368 |
# Display the category and its variables in the desired format
|
369 |
+
st.markdown(f"**{category}:** {variables_str}\n\n", unsafe_allow_html=True)
|
370 |
+
|
371 |
+
# storing maindf and categories in session_state
|
372 |
+
# st.write(main_df)
|
373 |
+
|
374 |
+
|
375 |
+
# st.session_state['Cleaned_data']=main_df
|
376 |
+
|
377 |
+
# st.session_state['category_dict']=category_dict
|
378 |
+
if st.button('Save Changes'):
|
379 |
+
|
380 |
+
with open("Pickle_files/main_df", 'wb') as f:
|
381 |
+
pickle.dump(main_df, f)
|
382 |
+
with open("Pickle_files/category_dict",'wb') as c:
|
383 |
+
pickle.dump(category_dict,c)
|
384 |
+
st.success('Changes Saved!')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
@@ -5,7 +5,7 @@ colorFrom: indigo
|
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.32.1
|
8 |
-
app_file:
|
9 |
pinned: false
|
10 |
---
|
11 |
|
|
|
5 |
colorTo: pink
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.32.1
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
classes.py
CHANGED
@@ -16,15 +16,21 @@ def class_to_dict(class_instance):
|
|
16 |
attr_dict["modified_spends"] = class_instance.modified_spends
|
17 |
attr_dict["modified_sales"] = class_instance.modified_sales
|
18 |
attr_dict["response_curve_type"] = class_instance.response_curve_type
|
19 |
-
attr_dict["response_curve_params"] =
|
|
|
|
|
20 |
attr_dict["penalty"] = class_instance.penalty
|
21 |
attr_dict["bounds"] = class_instance.bounds
|
22 |
attr_dict["actual_total_spends"] = class_instance.actual_total_spends
|
23 |
attr_dict["actual_total_sales"] = class_instance.actual_total_sales
|
24 |
-
attr_dict["modified_total_spends"] =
|
|
|
|
|
25 |
attr_dict["modified_total_sales"] = class_instance.modified_total_sales
|
26 |
attr_dict["actual_mroi"] = class_instance.get_marginal_roi("actual")
|
27 |
-
attr_dict["modified_mroi"] = class_instance.get_marginal_roi(
|
|
|
|
|
28 |
|
29 |
elif isinstance(class_instance, Scenario):
|
30 |
attr_dict["type"] = "Scenario"
|
@@ -37,7 +43,9 @@ def class_to_dict(class_instance):
|
|
37 |
attr_dict["correction"] = class_instance.correction
|
38 |
attr_dict["actual_total_spends"] = class_instance.actual_total_spends
|
39 |
attr_dict["actual_total_sales"] = class_instance.actual_total_sales
|
40 |
-
attr_dict["modified_total_spends"] =
|
|
|
|
|
41 |
attr_dict["modified_total_sales"] = class_instance.modified_total_sales
|
42 |
|
43 |
return attr_dict
|
@@ -87,7 +95,9 @@ class Channel:
|
|
87 |
self.modified_sales = self.calculate_sales()
|
88 |
self.modified_total_spends = self.modified_spends.sum()
|
89 |
self.modified_total_sales = self.modified_sales.sum()
|
90 |
-
self.delta_spends =
|
|
|
|
|
91 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
92 |
|
93 |
def update_penalty(self, penalty):
|
@@ -109,7 +119,8 @@ class Channel:
|
|
109 |
x = np.where(
|
110 |
x < self.upper_limit,
|
111 |
x,
|
112 |
-
self.upper_limit
|
|
|
113 |
)
|
114 |
if self.response_curve_type == "s-curve":
|
115 |
if self.power >= 0:
|
@@ -158,7 +169,9 @@ class Channel:
|
|
158 |
self.modified_sales = self.calculate_sales()
|
159 |
self.modified_total_spends = self.modified_spends.sum()
|
160 |
self.modified_total_sales = self.modified_sales.sum()
|
161 |
-
self.delta_spends =
|
|
|
|
|
162 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
163 |
|
164 |
def intialize(self):
|
@@ -195,7 +208,9 @@ class Scenario:
|
|
195 |
self.actual_total_sales = self.calculate_actual_total_sales()
|
196 |
self.modified_total_sales = self.calculate_modified_total_sales()
|
197 |
self.modified_total_spends = self.calculate_modified_total_spends()
|
198 |
-
self.delta_spends =
|
|
|
|
|
199 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
200 |
|
201 |
def update_penalty(self, value):
|
@@ -205,7 +220,9 @@ class Scenario:
|
|
205 |
def calculate_modified_total_spends(self):
|
206 |
total_actual_spends = 0.0
|
207 |
for channel in self.channels.values():
|
208 |
-
total_actual_spends +=
|
|
|
|
|
209 |
return total_actual_spends
|
210 |
|
211 |
def calculate_modified_total_spends(self):
|
@@ -234,47 +251,12 @@ class Scenario:
|
|
234 |
self.channels[channel_name].update(modified_spends)
|
235 |
self.modified_total_sales = self.calculate_modified_total_sales()
|
236 |
self.modified_total_spends = self.calculate_modified_total_spends()
|
237 |
-
self.delta_spends =
|
|
|
|
|
238 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
239 |
|
240 |
-
|
241 |
-
# desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
|
242 |
-
|
243 |
-
# def constraint(x):
|
244 |
-
# for ch, spends in zip(channels_list, x):
|
245 |
-
# self.update(ch, spends)
|
246 |
-
# return self.modified_total_sales - desired_sales
|
247 |
-
|
248 |
-
# bounds = []
|
249 |
-
# for ch in channels_list:
|
250 |
-
# bounds.append(
|
251 |
-
# (1 + np.array([-50.0, 100.0]) / 100.0)
|
252 |
-
# * self.channels[ch].actual_total_spends
|
253 |
-
# )
|
254 |
-
|
255 |
-
# initial_point = []
|
256 |
-
# for bound in bounds:
|
257 |
-
# initial_point.append(bound[0])
|
258 |
-
|
259 |
-
# power = np.ceil(np.log(sum(initial_point)) / np.log(10))
|
260 |
-
|
261 |
-
# constraints = [NonlinearConstraint(constraint, -1.0, 1.0)]
|
262 |
-
|
263 |
-
# res = minimize(
|
264 |
-
# lambda x: sum(x) / 10 ** (power),
|
265 |
-
# bounds=bounds,
|
266 |
-
# x0=initial_point,
|
267 |
-
# constraints=constraints,
|
268 |
-
# method=algo,
|
269 |
-
# options={"maxiter": int(2e7), "catol": 1},
|
270 |
-
# )
|
271 |
-
|
272 |
-
# for channel_name, modified_spends in zip(channels_list, res.x):
|
273 |
-
# self.update(channel_name, modified_spends)
|
274 |
-
|
275 |
-
# return zip(channels_list, res.x)
|
276 |
-
|
277 |
-
def optimize_spends(self, sales_percent, channels_list, algo="trust-constr"):
|
278 |
desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
|
279 |
|
280 |
def constraint(x):
|
@@ -303,7 +285,7 @@ class Scenario:
|
|
303 |
x0=initial_point,
|
304 |
constraints=constraints,
|
305 |
method=algo,
|
306 |
-
options={"maxiter": int(2e7), "
|
307 |
)
|
308 |
|
309 |
for channel_name, modified_spends in zip(channels_list, res.x):
|
@@ -335,11 +317,14 @@ class Scenario:
|
|
335 |
for channel_name in channels_list:
|
336 |
_channel_class = self.channels[channel_name]
|
337 |
channel_bounds = _channel_class.bounds
|
338 |
-
channel_actual_total_spends =
|
339 |
-
|
|
|
340 |
)
|
341 |
old_spends.append(channel_actual_total_spends)
|
342 |
-
bounds.append(
|
|
|
|
|
343 |
|
344 |
def objective_function(x):
|
345 |
for channel_name, modified_spends in zip(channels_list, x):
|
@@ -347,12 +332,12 @@ class Scenario:
|
|
347 |
return -1 * self.modified_total_sales
|
348 |
|
349 |
res = minimize(
|
350 |
-
lambda x: objective_function(x) / 1e8,
|
351 |
method="trust-constr",
|
352 |
x0=old_spends,
|
353 |
constraints=constraint,
|
354 |
bounds=bounds,
|
355 |
-
options={"maxiter": int(1e7),
|
356 |
)
|
357 |
# res = dual_annealing(
|
358 |
# objective_function,
|
@@ -376,91 +361,81 @@ class Scenario:
|
|
376 |
channel_data = []
|
377 |
|
378 |
summary_rows = []
|
379 |
-
actual_list.append(
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
"Spends": self.modified_total_spends,
|
390 |
-
"Sales": self.modified_total_sales,
|
391 |
-
}
|
392 |
-
)
|
393 |
for channel in self.channels.values():
|
394 |
name_mod = channel.name.replace("_", " ")
|
395 |
if name_mod.lower().endswith(" imp"):
|
396 |
name_mod = name_mod.replace("Imp", " Impressions")
|
397 |
-
summary_rows.append(
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
|
|
413 |
data[channel.name] = channel.modified_spends
|
414 |
data["Date"] = channel.dates
|
415 |
data["Sales"] = (
|
416 |
data.get("Sales", np.zeros((len(channel.dates),)))
|
417 |
+ channel.modified_sales
|
418 |
)
|
419 |
-
actual_list.append(
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
self.modified_total_sales,
|
458 |
-
round(self.actual_total_sales / self.actual_total_spends, 2),
|
459 |
-
round(self.modified_total_sales / self.modified_total_spends, 2),
|
460 |
-
0.0,
|
461 |
-
0.0,
|
462 |
-
]
|
463 |
-
)
|
464 |
details["Actual"] = actual_list
|
465 |
details["Modified"] = modified_list
|
466 |
columns_index = pd.MultiIndex.from_product(
|
@@ -492,7 +467,8 @@ class Scenario:
|
|
492 |
def from_dict(cls, attr_dict):
|
493 |
channels_list = attr_dict["channels"]
|
494 |
channels = {
|
495 |
-
channel["name"]: class_from_dict(channel)
|
|
|
496 |
}
|
497 |
return Scenario(
|
498 |
name=attr_dict["name"],
|
|
|
16 |
attr_dict["modified_spends"] = class_instance.modified_spends
|
17 |
attr_dict["modified_sales"] = class_instance.modified_sales
|
18 |
attr_dict["response_curve_type"] = class_instance.response_curve_type
|
19 |
+
attr_dict["response_curve_params"] = (
|
20 |
+
class_instance.response_curve_params
|
21 |
+
)
|
22 |
attr_dict["penalty"] = class_instance.penalty
|
23 |
attr_dict["bounds"] = class_instance.bounds
|
24 |
attr_dict["actual_total_spends"] = class_instance.actual_total_spends
|
25 |
attr_dict["actual_total_sales"] = class_instance.actual_total_sales
|
26 |
+
attr_dict["modified_total_spends"] = (
|
27 |
+
class_instance.modified_total_spends
|
28 |
+
)
|
29 |
attr_dict["modified_total_sales"] = class_instance.modified_total_sales
|
30 |
attr_dict["actual_mroi"] = class_instance.get_marginal_roi("actual")
|
31 |
+
attr_dict["modified_mroi"] = class_instance.get_marginal_roi(
|
32 |
+
"modified"
|
33 |
+
)
|
34 |
|
35 |
elif isinstance(class_instance, Scenario):
|
36 |
attr_dict["type"] = "Scenario"
|
|
|
43 |
attr_dict["correction"] = class_instance.correction
|
44 |
attr_dict["actual_total_spends"] = class_instance.actual_total_spends
|
45 |
attr_dict["actual_total_sales"] = class_instance.actual_total_sales
|
46 |
+
attr_dict["modified_total_spends"] = (
|
47 |
+
class_instance.modified_total_spends
|
48 |
+
)
|
49 |
attr_dict["modified_total_sales"] = class_instance.modified_total_sales
|
50 |
|
51 |
return attr_dict
|
|
|
95 |
self.modified_sales = self.calculate_sales()
|
96 |
self.modified_total_spends = self.modified_spends.sum()
|
97 |
self.modified_total_sales = self.modified_sales.sum()
|
98 |
+
self.delta_spends = (
|
99 |
+
self.modified_total_spends - self.actual_total_spends
|
100 |
+
)
|
101 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
102 |
|
103 |
def update_penalty(self, penalty):
|
|
|
119 |
x = np.where(
|
120 |
x < self.upper_limit,
|
121 |
x,
|
122 |
+
self.upper_limit
|
123 |
+
+ (x - self.upper_limit) * self.upper_limit / x,
|
124 |
)
|
125 |
if self.response_curve_type == "s-curve":
|
126 |
if self.power >= 0:
|
|
|
169 |
self.modified_sales = self.calculate_sales()
|
170 |
self.modified_total_spends = self.modified_spends.sum()
|
171 |
self.modified_total_sales = self.modified_sales.sum()
|
172 |
+
self.delta_spends = (
|
173 |
+
self.modified_total_spends - self.actual_total_spends
|
174 |
+
)
|
175 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
176 |
|
177 |
def intialize(self):
|
|
|
208 |
self.actual_total_sales = self.calculate_actual_total_sales()
|
209 |
self.modified_total_sales = self.calculate_modified_total_sales()
|
210 |
self.modified_total_spends = self.calculate_modified_total_spends()
|
211 |
+
self.delta_spends = (
|
212 |
+
self.modified_total_spends - self.actual_total_spends
|
213 |
+
)
|
214 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
215 |
|
216 |
def update_penalty(self, value):
|
|
|
220 |
def calculate_modified_total_spends(self):
|
221 |
total_actual_spends = 0.0
|
222 |
for channel in self.channels.values():
|
223 |
+
total_actual_spends += (
|
224 |
+
channel.actual_total_spends * channel.conversion_rate
|
225 |
+
)
|
226 |
return total_actual_spends
|
227 |
|
228 |
def calculate_modified_total_spends(self):
|
|
|
251 |
self.channels[channel_name].update(modified_spends)
|
252 |
self.modified_total_sales = self.calculate_modified_total_sales()
|
253 |
self.modified_total_spends = self.calculate_modified_total_spends()
|
254 |
+
self.delta_spends = (
|
255 |
+
self.modified_total_spends - self.actual_total_spends
|
256 |
+
)
|
257 |
self.delta_sales = self.modified_total_sales - self.actual_total_sales
|
258 |
|
259 |
+
def optimize_spends(self, sales_percent, channels_list, algo="COBYLA"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
desired_sales = self.actual_total_sales * (1 + sales_percent / 100.0)
|
261 |
|
262 |
def constraint(x):
|
|
|
285 |
x0=initial_point,
|
286 |
constraints=constraints,
|
287 |
method=algo,
|
288 |
+
options={"maxiter": int(2e7), "catol": 1},
|
289 |
)
|
290 |
|
291 |
for channel_name, modified_spends in zip(channels_list, res.x):
|
|
|
317 |
for channel_name in channels_list:
|
318 |
_channel_class = self.channels[channel_name]
|
319 |
channel_bounds = _channel_class.bounds
|
320 |
+
channel_actual_total_spends = (
|
321 |
+
_channel_class.actual_total_spends
|
322 |
+
* ((1 + spends_percent / 100))
|
323 |
)
|
324 |
old_spends.append(channel_actual_total_spends)
|
325 |
+
bounds.append(
|
326 |
+
(1 + channel_bounds / 100) * channel_actual_total_spends
|
327 |
+
)
|
328 |
|
329 |
def objective_function(x):
|
330 |
for channel_name, modified_spends in zip(channels_list, x):
|
|
|
332 |
return -1 * self.modified_total_sales
|
333 |
|
334 |
res = minimize(
|
335 |
+
lambda x : objective_function(x) / 1e8,
|
336 |
method="trust-constr",
|
337 |
x0=old_spends,
|
338 |
constraints=constraint,
|
339 |
bounds=bounds,
|
340 |
+
options={"maxiter": int(1e7), 'xtol' : 100},
|
341 |
)
|
342 |
# res = dual_annealing(
|
343 |
# objective_function,
|
|
|
361 |
channel_data = []
|
362 |
|
363 |
summary_rows = []
|
364 |
+
actual_list.append({
|
365 |
+
"name": "Total",
|
366 |
+
"Spends": self.actual_total_spends,
|
367 |
+
"Sales": self.actual_total_sales,
|
368 |
+
})
|
369 |
+
modified_list.append({
|
370 |
+
"name": "Total",
|
371 |
+
"Spends": self.modified_total_spends,
|
372 |
+
"Sales": self.modified_total_sales,
|
373 |
+
})
|
|
|
|
|
|
|
|
|
374 |
for channel in self.channels.values():
|
375 |
name_mod = channel.name.replace("_", " ")
|
376 |
if name_mod.lower().endswith(" imp"):
|
377 |
name_mod = name_mod.replace("Imp", " Impressions")
|
378 |
+
summary_rows.append([
|
379 |
+
name_mod,
|
380 |
+
channel.actual_total_spends,
|
381 |
+
channel.modified_total_spends,
|
382 |
+
channel.actual_total_sales,
|
383 |
+
channel.modified_total_sales,
|
384 |
+
round(
|
385 |
+
channel.actual_total_sales / channel.actual_total_spends, 2
|
386 |
+
),
|
387 |
+
round(
|
388 |
+
channel.modified_total_sales
|
389 |
+
/ channel.modified_total_spends,
|
390 |
+
2,
|
391 |
+
),
|
392 |
+
channel.get_marginal_roi("actual"),
|
393 |
+
channel.get_marginal_roi("modified"),
|
394 |
+
])
|
395 |
data[channel.name] = channel.modified_spends
|
396 |
data["Date"] = channel.dates
|
397 |
data["Sales"] = (
|
398 |
data.get("Sales", np.zeros((len(channel.dates),)))
|
399 |
+ channel.modified_sales
|
400 |
)
|
401 |
+
actual_list.append({
|
402 |
+
"name": channel.name,
|
403 |
+
"Spends": channel.actual_total_spends,
|
404 |
+
"Sales": channel.actual_total_sales,
|
405 |
+
"ROI": round(
|
406 |
+
channel.actual_total_sales / channel.actual_total_spends, 2
|
407 |
+
),
|
408 |
+
})
|
409 |
+
modified_list.append({
|
410 |
+
"name": channel.name,
|
411 |
+
"Spends": channel.modified_total_spends,
|
412 |
+
"Sales": channel.modified_total_sales,
|
413 |
+
"ROI": round(
|
414 |
+
channel.modified_total_sales
|
415 |
+
/ channel.modified_total_spends,
|
416 |
+
2,
|
417 |
+
),
|
418 |
+
"Marginal ROI": channel.get_marginal_roi("modified"),
|
419 |
+
})
|
420 |
+
|
421 |
+
channel_data.append({
|
422 |
+
"channel": channel.name,
|
423 |
+
"spends_act": channel.actual_total_spends,
|
424 |
+
"spends_mod": channel.modified_total_spends,
|
425 |
+
"sales_act": channel.actual_total_sales,
|
426 |
+
"sales_mod": channel.modified_total_sales,
|
427 |
+
})
|
428 |
+
summary_rows.append([
|
429 |
+
"Total",
|
430 |
+
self.actual_total_spends,
|
431 |
+
self.modified_total_spends,
|
432 |
+
self.actual_total_sales,
|
433 |
+
self.modified_total_sales,
|
434 |
+
round(self.actual_total_sales / self.actual_total_spends, 2),
|
435 |
+
round(self.modified_total_sales / self.modified_total_spends, 2),
|
436 |
+
0.0,
|
437 |
+
0.0,
|
438 |
+
])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
439 |
details["Actual"] = actual_list
|
440 |
details["Modified"] = modified_list
|
441 |
columns_index = pd.MultiIndex.from_product(
|
|
|
467 |
def from_dict(cls, attr_dict):
|
468 |
channels_list = attr_dict["channels"]
|
469 |
channels = {
|
470 |
+
channel["name"]: class_from_dict(channel)
|
471 |
+
for channel in channels_list
|
472 |
}
|
473 |
return Scenario(
|
474 |
name=attr_dict["name"],
|
upf_data_converted.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
upf_data_converted.xlsx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:92716069afa2c16a8afb6494da6d5f93878558de0215b1b9334ffeb997fdc6b6
|
3 |
+
size 1561111
|
utilities.py
CHANGED
@@ -12,6 +12,7 @@ import io
|
|
12 |
import plotly
|
13 |
from pathlib import Path
|
14 |
import pickle
|
|
|
15 |
import yaml
|
16 |
from yaml import SafeLoader
|
17 |
from streamlit.components.v1 import html
|
@@ -23,59 +24,27 @@ import os
|
|
23 |
import base64
|
24 |
|
25 |
|
26 |
-
color_palette = [
|
27 |
-
"#F3F3F0",
|
28 |
-
"#5E7D7E",
|
29 |
-
"#2FA1FF",
|
30 |
-
"#00EDED",
|
31 |
-
"#00EAE4",
|
32 |
-
"#304550",
|
33 |
-
"#EDEBEB",
|
34 |
-
"#7FBEFD",
|
35 |
-
"#003059",
|
36 |
-
"#A2F3F3",
|
37 |
-
"#E1D6E2",
|
38 |
-
"#B6B6B6",
|
39 |
-
]
|
40 |
|
41 |
|
42 |
-
|
43 |
|
44 |
-
import streamlit_authenticator as stauth
|
45 |
|
|
|
46 |
|
47 |
def load_authenticator():
|
48 |
-
with open(
|
49 |
config = yaml.load(file, Loader=SafeLoader)
|
50 |
-
st.session_state[
|
51 |
authenticator = stauth.Authenticate(
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
)
|
58 |
-
st.session_state[
|
59 |
return authenticator
|
60 |
|
61 |
-
|
62 |
-
# Authentication
|
63 |
-
def authentication():
|
64 |
-
with open("config.yaml") as file:
|
65 |
-
config = yaml.load(file, Loader=SafeLoader)
|
66 |
-
|
67 |
-
authenticator = stauth.Authenticate(
|
68 |
-
config["credentials"],
|
69 |
-
config["cookie"]["name"],
|
70 |
-
config["cookie"]["key"],
|
71 |
-
config["cookie"]["expiry_days"],
|
72 |
-
config["preauthorized"],
|
73 |
-
)
|
74 |
-
|
75 |
-
name, authentication_status, username = authenticator.login("Login", "main")
|
76 |
-
return authenticator, name, authentication_status, username
|
77 |
-
|
78 |
-
|
79 |
def nav_page(page_name, timeout_secs=3):
|
80 |
nav_script = """
|
81 |
<script type="text/javascript">
|
@@ -98,10 +67,7 @@ def nav_page(page_name, timeout_secs=3):
|
|
98 |
attempt_nav_page("%s", new Date(), %d);
|
99 |
});
|
100 |
</script>
|
101 |
-
""" % (
|
102 |
-
page_name,
|
103 |
-
timeout_secs,
|
104 |
-
)
|
105 |
html(nav_script)
|
106 |
|
107 |
|
@@ -126,18 +92,23 @@ data_url = base64.b64encode(contents).decode("utf-8")
|
|
126 |
|
127 |
file_.close()
|
128 |
|
|
|
129 |
|
130 |
-
DATA_PATH =
|
131 |
|
132 |
-
IMAGES_PATH =
|
133 |
|
|
|
134 |
|
135 |
def load_local_css(file_name):
|
136 |
|
137 |
with open(file_name) as f:
|
138 |
|
139 |
-
st.markdown(f
|
140 |
|
|
|
|
|
|
|
141 |
|
142 |
# def set_header():
|
143 |
|
@@ -158,24 +129,24 @@ data_url1 = base64.b64encode(contents1).decode("utf-8")
|
|
158 |
|
159 |
file_1.close()
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
-
DATA_PATH1 = "./data"
|
163 |
|
164 |
-
IMAGES_PATH1 = "./data/images_224_224"
|
165 |
|
166 |
|
167 |
def set_header():
|
168 |
-
return st.markdown(
|
169 |
-
f"""<div class='main-header'>
|
170 |
<!-- <h1></h1> -->
|
171 |
<div >
|
172 |
<img class='blend-logo' src="data:image;base64,{data_url1}", alt="Logo">
|
173 |
</div>
|
174 |
<img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo">
|
175 |
-
</div>""",
|
176 |
-
unsafe_allow_html=True,
|
177 |
-
)
|
178 |
-
|
179 |
|
180 |
# def set_header():
|
181 |
# logo_path = "./path/to/your/local/LIME_logo.png" # Replace with the actual file path
|
@@ -186,87 +157,51 @@ def set_header():
|
|
186 |
# </div>""", unsafe_allow_html=True)
|
187 |
|
188 |
|
189 |
-
def s_curve(x,
|
190 |
-
return K / (1 + b * np.exp(-a
|
191 |
-
|
192 |
-
|
193 |
-
def panel_level(input_df, date_column="Date"):
|
194 |
-
# Ensure 'Date' is set as the index
|
195 |
-
if date_column not in input_df.index.names:
|
196 |
-
input_df = input_df.set_index(date_column)
|
197 |
-
|
198 |
-
# Select numeric columns only (excluding 'Date' since it's now the index)
|
199 |
-
numeric_columns_df = input_df.select_dtypes(include="number")
|
200 |
|
201 |
-
|
202 |
-
aggregated_df = numeric_columns_df.groupby(input_df.index).sum()
|
203 |
-
|
204 |
-
# Reset index if you want 'Date' back as a column
|
205 |
-
aggregated_df = aggregated_df.reset_index()
|
206 |
-
|
207 |
-
return aggregated_df
|
208 |
-
|
209 |
-
|
210 |
-
def initialize_data(
|
211 |
-
panel=None, target_file="Overview_data_test.xlsx", updated_rcs=None, metrics=None
|
212 |
-
):
|
213 |
# uopx_conv_rates = {'streaming_impressions' : 0.007,'digital_impressions' : 0.007,'search_clicks' : 0.00719,'tv_impressions' : 0.000173,
|
214 |
# "digital_clicks":0.005,"streaming_clicks":0.004,'streaming_spends':1,"tv_spends":1,"search_spends":1,
|
215 |
# "digital_spends":1}
|
216 |
-
#
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
"Intervals ON 20201005-20201019",
|
248 |
-
"Promotion_PercentOff",
|
249 |
-
"Promotion_TimeBased",
|
250 |
-
"Seasonality_Indicator_Chirstmas",
|
251 |
-
"Seasonality_Indicator_NewYears_Days",
|
252 |
-
"Seasonality_Indicator_Thanksgiving",
|
253 |
-
"Trend 20200302 / 20200803",
|
254 |
-
]
|
255 |
-
raw_df["Date"] = pd.to_datetime(raw_df["Date"])
|
256 |
-
contri_df["Date"] = pd.to_datetime(contri_df["Date"])
|
257 |
-
input_df = raw_df.sort_values(by="Date")
|
258 |
-
output_df = contri_df.sort_values(by="Date")
|
259 |
-
spend_df["Week"] = pd.to_datetime(
|
260 |
-
spend_df["Week"], format="%Y-%m-%d", errors="coerce"
|
261 |
-
)
|
262 |
-
spend_df.sort_values(by="Week", inplace=True)
|
263 |
|
264 |
# spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce')
|
265 |
# spend_df = spend_df.sort_values(by='Week')
|
|
|
266 |
|
267 |
channel_list = [col for col in input_df.columns if col not in exclude_columns]
|
268 |
-
|
269 |
-
|
270 |
response_curves = {}
|
271 |
mapes = {}
|
272 |
rmses = {}
|
@@ -280,14 +215,14 @@ def initialize_data(
|
|
280 |
dates = input_df.Date.values
|
281 |
actual_output_dic = {}
|
282 |
actual_input_dic = {}
|
283 |
-
|
284 |
for inp_col in channel_list:
|
285 |
-
#
|
286 |
spends = input_df[inp_col].values
|
287 |
x = spends.copy()
|
288 |
-
# upper limit for penalty
|
289 |
-
upper_limits[inp_col] = 2
|
290 |
-
|
291 |
# contribution
|
292 |
out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0]
|
293 |
y = output_df[out_col].values.copy()
|
@@ -295,141 +230,96 @@ def initialize_data(
|
|
295 |
actual_input_dic[inp_col] = x.copy()
|
296 |
##output cols aggregation
|
297 |
output_cols.append(out_col)
|
298 |
-
|
299 |
## scale the input
|
300 |
-
power = np.ceil(np.log(x.max()) / np.log(10))
|
301 |
-
if power >= 0:
|
302 |
x = x / 10**power
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
#
|
308 |
-
#
|
309 |
-
#
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
p0=(2 * y.max(), 0.01, 1e-5, x.max()),
|
318 |
-
bounds=bounds,
|
319 |
-
maxfev=int(1e5),
|
320 |
-
)
|
321 |
mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean()
|
322 |
-
rmse =
|
323 |
-
r2_ = r2_score(y, s_curve(x
|
324 |
-
|
325 |
-
response_curves[inp_col] = {
|
326 |
-
"K": params[0],
|
327 |
-
"b": params[1],
|
328 |
-
"a": params[2],
|
329 |
-
"x0": params[3],
|
330 |
-
}
|
331 |
-
|
332 |
-
updated_rcs_key = f"{metrics}#@{panel}#@{inp_col}"
|
333 |
-
if updated_rcs is not None and updated_rcs_key in list(updated_rcs.keys()):
|
334 |
-
response_curves[inp_col] = updated_rcs[updated_rcs_key]
|
335 |
|
|
|
336 |
mapes[inp_col] = mape
|
337 |
rmses[inp_col] = rmse
|
338 |
r2[inp_col] = r2_
|
339 |
powers[inp_col] = power
|
340 |
-
|
|
|
341 |
## conversion rates
|
342 |
-
spend_col = [
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
][
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
conv = (
|
351 |
-
spend_df.set_index("Week")[spend_col]
|
352 |
-
/ input_df.set_index("Date")[inp_col].clip(lower=1)
|
353 |
-
).reset_index()
|
354 |
-
conv.rename(columns={"index": "Week"}, inplace=True)
|
355 |
-
conv["year"] = conv.Week.dt.year
|
356 |
-
conv_rates[inp_col] = list(conv.drop("Week", axis=1).mean().to_dict().values())[
|
357 |
-
0
|
358 |
-
]
|
359 |
##print('Before',conv_rates[inp_col])
|
360 |
# conv_rates[inp_col] = uopx_conv_rates[inp_col]
|
361 |
##print('After',(conv_rates[inp_col]))
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
"K": params[0],
|
372 |
-
"b": params[1],
|
373 |
-
"a": params[2],
|
374 |
-
"x0": params[3],
|
375 |
-
},
|
376 |
-
bounds=np.array([-10, 10]),
|
377 |
-
)
|
378 |
channels[inp_col] = channel
|
379 |
if sales is None:
|
380 |
sales = channel.actual_sales
|
381 |
else:
|
382 |
sales += channel.actual_sales
|
383 |
-
other_contributions = (
|
384 |
-
|
385 |
-
)
|
386 |
-
correction = output_df.drop("Date", axis=1).sum(axis=1).values - (
|
387 |
-
sales + other_contributions
|
388 |
-
)
|
389 |
-
scenario = Scenario(
|
390 |
-
name="default",
|
391 |
-
channels=channels,
|
392 |
-
constant=other_contributions,
|
393 |
-
correction=correction,
|
394 |
-
)
|
395 |
## setting session variables
|
396 |
-
st.session_state[
|
397 |
-
st.session_state[
|
398 |
-
st.session_state[
|
399 |
-
st.session_state[
|
400 |
default_scenario_dict = class_to_dict(scenario)
|
401 |
-
st.session_state[
|
402 |
-
st.session_state[
|
403 |
-
st.session_state[
|
404 |
-
st.session_state[
|
405 |
-
|
406 |
-
|
407 |
-
st.session_state[
|
408 |
-
|
409 |
-
|
410 |
-
st.session_state["actual_contribution_df"] = pd.DataFrame(actual_output_dic)
|
411 |
-
st.session_state["actual_input_df"] = pd.DataFrame(actual_input_dic)
|
412 |
-
|
413 |
for channel in channels.values():
|
414 |
-
st.session_state[channel.name] = numerize(
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
st.session_state["saved_scenarios"] = pickle.load(f)
|
423 |
else:
|
424 |
-
st.session_state[
|
425 |
-
|
426 |
-
|
427 |
-
st.session_state[
|
428 |
-
|
429 |
-
|
430 |
-
st.session_state["disable_download_button"] = True
|
431 |
-
|
432 |
-
|
433 |
# def initialize_data():
|
434 |
# # fetch data from excel
|
435 |
# output = pd.read_excel('data.xlsx',sheet_name=None)
|
@@ -445,17 +335,17 @@ def initialize_data(
|
|
445 |
# channel_list.append(col)
|
446 |
# else:
|
447 |
# pass
|
448 |
-
|
449 |
# ## NOTE : Considered only Desktop spends for all calculations
|
450 |
# acutal_df = raw_df[raw_df.Region == 'Desktop'].copy()
|
451 |
# ## NOTE : Considered one year of data
|
452 |
# acutal_df = acutal_df[acutal_df.Date>'2020-12-31']
|
453 |
# actual_df = acutal_df.drop('Region',axis=1).sort_values(by='Date')[[*channel_list,'Date']]
|
454 |
-
|
455 |
# ##load response curves
|
456 |
# with open('./grammarly_response_curves.json','r') as f:
|
457 |
# response_curves = json.load(f)
|
458 |
-
|
459 |
# ## create channel dict for scenario creation
|
460 |
# dates = actual_df.Date.values
|
461 |
# channels = {}
|
@@ -473,15 +363,15 @@ def initialize_data(
|
|
473 |
# response_curve_type=response_curve_type,
|
474 |
# response_curve_params=response_curve_params,
|
475 |
# bounds=np.array([-30,30]))
|
476 |
-
|
477 |
# channels[name] = channel
|
478 |
# else:
|
479 |
# constant = info_dict.get('value',0.) * len(dates)
|
480 |
-
|
481 |
# ## create scenario
|
482 |
# scenario = Scenario(name='default', channels=channels, constant=constant)
|
483 |
# default_scenario_dict = class_to_dict(scenario)
|
484 |
-
|
485 |
|
486 |
# ## setting session variables
|
487 |
# st.session_state['initialized'] = True
|
@@ -495,7 +385,7 @@ def initialize_data(
|
|
495 |
# for channel in channels.values():
|
496 |
# if channel.name not in st.session_state:
|
497 |
# st.session_state[channel.name] = float(channel.actual_total_spends)
|
498 |
-
|
499 |
# if 'xlsx_buffer' not in st.session_state:
|
500 |
# st.session_state['xlsx_buffer'] = io.BytesIO()
|
501 |
|
@@ -504,121 +394,51 @@ def initialize_data(
|
|
504 |
# if Path('../saved_scenarios.pkl').exists():
|
505 |
# with open('../saved_scenarios.pkl','rb') as f:
|
506 |
# st.session_state['saved_scenarios'] = pickle.load(f)
|
507 |
-
|
508 |
# else:
|
509 |
# st.session_state['saved_scenarios'] = OrderedDict()
|
510 |
|
511 |
# if 'total_spends_change' not in st.session_state:
|
512 |
# st.session_state['total_spends_change'] = 0
|
513 |
-
|
514 |
# if 'optimization_channels' not in st.session_state:
|
515 |
# st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
|
516 |
-
|
517 |
# if 'disable_download_button' not in st.session_state:
|
518 |
# st.session_state['disable_download_button'] = True
|
519 |
-
|
520 |
-
|
521 |
def create_channel_summary(scenario):
|
522 |
|
523 |
# Provided data
|
524 |
data = {
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
"Digital tactic others",
|
529 |
-
"Fb la tier 1",
|
530 |
-
"Fb la tier 2",
|
531 |
-
"Paid social others",
|
532 |
-
"Programmatic",
|
533 |
-
"Kwai",
|
534 |
-
"Indicacao",
|
535 |
-
"Infleux",
|
536 |
-
"Influencer",
|
537 |
-
],
|
538 |
-
"Spends": [
|
539 |
-
"$ 11.3K",
|
540 |
-
"$ 155.2K",
|
541 |
-
"$ 50.7K",
|
542 |
-
"$ 125.4K",
|
543 |
-
"$ 125.2K",
|
544 |
-
"$ 105K",
|
545 |
-
"$ 3.3M",
|
546 |
-
"$ 47.5K",
|
547 |
-
"$ 55.9K",
|
548 |
-
"$ 632.3K",
|
549 |
-
"$ 48.3K",
|
550 |
-
],
|
551 |
-
"Revenue": [
|
552 |
-
"558.0K",
|
553 |
-
"3.5M",
|
554 |
-
"5.2M",
|
555 |
-
"3.1M",
|
556 |
-
"3.1M",
|
557 |
-
"2.1M",
|
558 |
-
"20.8M",
|
559 |
-
"1.6M",
|
560 |
-
"728.4K",
|
561 |
-
"22.9M",
|
562 |
-
"4.8M",
|
563 |
-
],
|
564 |
}
|
565 |
|
566 |
# Create DataFrame
|
567 |
df = pd.DataFrame(data)
|
568 |
|
569 |
# Convert currency strings to numeric values
|
570 |
-
df[
|
571 |
-
|
572 |
-
.replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True)
|
573 |
-
.map(pd.eval)
|
574 |
-
.astype(int)
|
575 |
-
)
|
576 |
-
df["Revenue"] = (
|
577 |
-
df["Revenue"]
|
578 |
-
.replace({"\$": "", "K": "*1e3", "M": "*1e6"}, regex=True)
|
579 |
-
.map(pd.eval)
|
580 |
-
.astype(int)
|
581 |
-
)
|
582 |
|
583 |
# Calculate ROI
|
584 |
-
df[
|
585 |
|
586 |
# Format columns
|
587 |
format_currency = lambda x: f"${x:,.1f}"
|
588 |
format_roi = lambda x: f"{x:.1f}"
|
589 |
|
590 |
-
df[
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
"$ 125.4K",
|
595 |
-
"$ 125.2K",
|
596 |
-
"$ 105K",
|
597 |
-
"$ 3.3M",
|
598 |
-
"$ 47.5K",
|
599 |
-
"$ 55.9K",
|
600 |
-
"$ 632.3K",
|
601 |
-
"$ 48.3K",
|
602 |
-
]
|
603 |
-
df["Revenue"] = [
|
604 |
-
"$ 536.3K",
|
605 |
-
"$ 3.4M",
|
606 |
-
"$ 5M",
|
607 |
-
"$ 3M",
|
608 |
-
"$ 3M",
|
609 |
-
"$ 2M",
|
610 |
-
"$ 20M",
|
611 |
-
"$ 1.5M",
|
612 |
-
"$ 7.1M",
|
613 |
-
"$ 22M",
|
614 |
-
"$ 4.6M",
|
615 |
-
]
|
616 |
-
df["ROI"] = df["ROI"].apply(format_roi)
|
617 |
-
|
618 |
return df
|
619 |
|
620 |
|
621 |
-
|
622 |
# def create_contribution_pie(scenario):
|
623 |
# #c1f7dc
|
624 |
# colors_map = {col:color for col,color in zip(st.session_state['channels_list'],plotly.colors.n_colors(plotly.colors.hex_to_rgb('#BE6468'), plotly.colors.hex_to_rgb('#E7B8B7'),23))}
|
@@ -650,23 +470,23 @@ def create_channel_summary(scenario):
|
|
650 |
# weekly_spends_data = []
|
651 |
# weekly_sales_data = []
|
652 |
# for channel_name in st.session_state['channels_list']:
|
653 |
-
# weekly_spends_data.append((go.Bar(x=x,
|
654 |
# y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
|
655 |
-
# name=channel_name_formating(channel_name),
|
656 |
# hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
|
657 |
# legendgroup=channel_name)))
|
658 |
-
# weekly_sales_data.append((go.Bar(x=x,
|
659 |
# y=scenario.channels[channel_name].actual_sales,
|
660 |
-
# name=channel_name_formating(channel_name),
|
661 |
# hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
662 |
# legendgroup=channel_name, showlegend=False)))
|
663 |
# for _d in weekly_spends_data:
|
664 |
# weekly_contribution_fig.add_trace(_d, row=1, col=1)
|
665 |
# for _d in weekly_sales_data:
|
666 |
# weekly_contribution_fig.add_trace(_d, row=1, col=2)
|
667 |
-
# weekly_contribution_fig.add_trace(go.Bar(x=x,
|
668 |
# y=scenario.constant + scenario.correction,
|
669 |
-
# name='Non Media',
|
670 |
# hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}"), row=1, col=2)
|
671 |
# weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribuion by week', xaxis_title='Date')
|
672 |
# weekly_contribution_fig.update_xaxes(showgrid=False)
|
@@ -704,50 +524,14 @@ def create_channel_summary(scenario):
|
|
704 |
|
705 |
|
706 |
def create_contribution_pie():
|
707 |
-
color_palette = [
|
708 |
-
|
709 |
-
"#5E7D7E",
|
710 |
-
"#2FA1FF",
|
711 |
-
"#00EDED",
|
712 |
-
"#00EAE4",
|
713 |
-
"#304550",
|
714 |
-
"#EDEBEB",
|
715 |
-
"#7FBEFD",
|
716 |
-
"#003059",
|
717 |
-
"#A2F3F3",
|
718 |
-
"#E1D6E2",
|
719 |
-
"#B6B6B6",
|
720 |
-
]
|
721 |
-
total_contribution_fig = make_subplots(
|
722 |
-
rows=1,
|
723 |
-
cols=2,
|
724 |
-
subplot_titles=["Spends", "Revenue"],
|
725 |
-
specs=[[{"type": "pie"}, {"type": "pie"}]],
|
726 |
-
)
|
727 |
|
728 |
-
channels_list = [
|
729 |
-
"Paid Search",
|
730 |
-
"Ga will cid baixo risco",
|
731 |
-
"Digital tactic others",
|
732 |
-
"Fb la tier 1",
|
733 |
-
"Fb la tier 2",
|
734 |
-
"Paid social others",
|
735 |
-
"Programmatic",
|
736 |
-
"Kwai",
|
737 |
-
"Indicacao",
|
738 |
-
"Infleux",
|
739 |
-
"Influencer",
|
740 |
-
"Non Media",
|
741 |
-
]
|
742 |
|
743 |
# Assign colors from the limited palette to channels
|
744 |
-
colors_map = {
|
745 |
-
|
746 |
-
for i, col in enumerate(channels_list)
|
747 |
-
}
|
748 |
-
colors_map["Non Media"] = color_palette[
|
749 |
-
5
|
750 |
-
] # Assign fixed green color for 'Non Media'
|
751 |
|
752 |
# Hardcoded values for Spends and Revenue
|
753 |
spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0]
|
@@ -758,13 +542,10 @@ def create_contribution_pie():
|
|
758 |
go.Pie(
|
759 |
labels=[channel_name for channel_name in channels_list],
|
760 |
values=spends_values,
|
761 |
-
marker=dict(
|
762 |
-
|
763 |
-
),
|
764 |
-
hole=0.3,
|
765 |
),
|
766 |
-
row=1,
|
767 |
-
col=1,
|
768 |
)
|
769 |
|
770 |
# Add trace for Revenue pie chart
|
@@ -772,196 +553,144 @@ def create_contribution_pie():
|
|
772 |
go.Pie(
|
773 |
labels=[channel_name for channel_name in channels_list],
|
774 |
values=revenue_values,
|
775 |
-
marker=dict(
|
776 |
-
|
777 |
-
),
|
778 |
-
hole=0.3,
|
779 |
),
|
780 |
-
row=1,
|
781 |
-
col=2,
|
782 |
-
)
|
783 |
-
|
784 |
-
total_contribution_fig.update_traces(
|
785 |
-
textposition="inside", texttemplate="%{percent:.1%}"
|
786 |
-
)
|
787 |
-
total_contribution_fig.update_layout(
|
788 |
-
uniformtext_minsize=12, title="Channel contribution", uniformtext_mode="hide"
|
789 |
)
|
|
|
|
|
|
|
790 |
return total_contribution_fig
|
791 |
|
792 |
-
|
793 |
def create_contribuion_stacked_plot(scenario):
|
794 |
-
weekly_contribution_fig = make_subplots(
|
795 |
-
|
796 |
-
|
797 |
-
subplot_titles=["Spends", "Revenue"],
|
798 |
-
specs=[[{"type": "bar"}, {"type": "bar"}]],
|
799 |
-
)
|
800 |
-
raw_df = st.session_state["raw_df"]
|
801 |
-
df = raw_df.sort_values(by="Date")
|
802 |
x = df.Date
|
803 |
weekly_spends_data = []
|
804 |
weekly_sales_data = []
|
805 |
-
|
806 |
-
for i, channel_name in enumerate(st.session_state[
|
807 |
color = color_palette[i % len(color_palette)]
|
808 |
-
|
809 |
-
weekly_spends_data.append(
|
810 |
-
|
811 |
-
|
812 |
-
|
813 |
-
|
814 |
-
|
815 |
-
|
816 |
-
|
817 |
-
|
818 |
-
|
819 |
-
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
-
|
826 |
-
|
827 |
-
|
828 |
-
showlegend=False,
|
829 |
-
marker_color=color,
|
830 |
-
)
|
831 |
-
)
|
832 |
-
|
833 |
for _d in weekly_spends_data:
|
834 |
weekly_contribution_fig.add_trace(_d, row=1, col=1)
|
835 |
for _d in weekly_sales_data:
|
836 |
weekly_contribution_fig.add_trace(_d, row=1, col=2)
|
837 |
-
|
838 |
-
weekly_contribution_fig.add_trace(
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
col=2,
|
848 |
-
)
|
849 |
-
|
850 |
-
weekly_contribution_fig.update_layout(
|
851 |
-
barmode="stack", title="Channel contribution by week", xaxis_title="Date"
|
852 |
-
)
|
853 |
weekly_contribution_fig.update_xaxes(showgrid=False)
|
854 |
weekly_contribution_fig.update_yaxes(showgrid=False)
|
855 |
return weekly_contribution_fig
|
856 |
|
857 |
-
|
858 |
def create_channel_spends_sales_plot(channel):
|
859 |
if channel is not None:
|
860 |
x = channel.dates
|
861 |
_spends = channel.actual_spends * channel.conversion_rate
|
862 |
_sales = channel.actual_sales
|
863 |
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
|
864 |
-
channel_sales_spends_fig.add_trace(
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
|
869 |
-
|
870 |
-
|
871 |
-
|
872 |
-
|
873 |
-
|
874 |
-
|
875 |
-
|
876 |
-
|
877 |
-
|
878 |
-
|
879 |
-
|
880 |
-
|
881 |
-
line=dict(
|
882 |
-
color=color_palette[2]
|
883 |
-
), # You can choose another color from the palette
|
884 |
-
name="Spends",
|
885 |
-
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
|
886 |
-
),
|
887 |
-
secondary_y=True,
|
888 |
-
)
|
889 |
-
|
890 |
-
channel_sales_spends_fig.update_layout(
|
891 |
-
xaxis_title="Date",
|
892 |
-
yaxis_title="Revenue",
|
893 |
-
yaxis2_title="Spends ($)",
|
894 |
-
title="Channel spends and Revenue week-wise",
|
895 |
-
)
|
896 |
channel_sales_spends_fig.update_xaxes(showgrid=False)
|
897 |
channel_sales_spends_fig.update_yaxes(showgrid=False)
|
898 |
else:
|
899 |
-
raw_df = st.session_state[
|
900 |
-
df = raw_df.sort_values(by=
|
901 |
x = df.Date
|
902 |
-
scenario = class_from_dict(st.session_state[
|
903 |
_sales = scenario.constant + scenario.correction
|
904 |
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
|
905 |
-
channel_sales_spends_fig.add_trace(
|
906 |
-
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
911 |
-
|
912 |
-
|
913 |
-
|
914 |
-
),
|
915 |
-
secondary_y=False,
|
916 |
-
)
|
917 |
-
|
918 |
-
channel_sales_spends_fig.update_layout(
|
919 |
-
xaxis_title="Date",
|
920 |
-
yaxis_title="Revenue",
|
921 |
-
yaxis2_title="Spends ($)",
|
922 |
-
title="Channel spends and Revenue week-wise",
|
923 |
-
)
|
924 |
channel_sales_spends_fig.update_xaxes(showgrid=False)
|
925 |
channel_sales_spends_fig.update_yaxes(showgrid=False)
|
926 |
-
|
927 |
return channel_sales_spends_fig
|
928 |
|
929 |
-
|
930 |
-
def format_numbers(value, n_decimals=1, include_indicator=True):
|
931 |
if include_indicator:
|
932 |
-
return f
|
933 |
else:
|
934 |
-
return f
|
935 |
|
936 |
|
937 |
-
def decimal_formater(num_string,
|
938 |
-
parts = num_string.split(
|
939 |
if len(parts) == 1:
|
940 |
-
return num_string
|
941 |
else:
|
942 |
to_be_padded = n_decimals - len(parts[-1])
|
943 |
-
if to_be_padded > 0:
|
944 |
-
return num_string
|
945 |
else:
|
946 |
return num_string
|
947 |
-
|
948 |
-
|
949 |
def channel_name_formating(channel_name):
|
950 |
-
name_mod = channel_name.replace(
|
951 |
-
if name_mod.lower().endswith(
|
952 |
-
name_mod = name_mod.replace(
|
953 |
-
elif name_mod.lower().endswith(
|
954 |
-
name_mod = name_mod.replace(
|
955 |
return name_mod
|
956 |
|
957 |
|
958 |
-
def send_email(email,
|
959 |
-
s = smtplib.SMTP(
|
960 |
s.starttls()
|
961 |
s.login("geethu4444@gmail.com", "jgydhpfusuremcol")
|
962 |
s.sendmail("geethu4444@gmail.com", email, message)
|
963 |
s.quit()
|
964 |
|
965 |
-
|
966 |
if __name__ == "__main__":
|
967 |
initialize_data()
|
|
|
12 |
import plotly
|
13 |
from pathlib import Path
|
14 |
import pickle
|
15 |
+
import streamlit_authenticator as stauth
|
16 |
import yaml
|
17 |
from yaml import SafeLoader
|
18 |
from streamlit.components.v1 import html
|
|
|
24 |
import base64
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
|
29 |
+
color_palette = ['#F3F3F0', '#5E7D7E', '#2FA1FF', '#00EDED', '#00EAE4', '#304550', '#EDEBEB', '#7FBEFD', '#003059', '#A2F3F3', '#E1D6E2', '#B6B6B6']
|
30 |
|
|
|
31 |
|
32 |
+
CURRENCY_INDICATOR = '$'
|
33 |
|
34 |
def load_authenticator():
|
35 |
+
with open('config.yaml') as file:
|
36 |
config = yaml.load(file, Loader=SafeLoader)
|
37 |
+
st.session_state['config'] = config
|
38 |
authenticator = stauth.Authenticate(
|
39 |
+
config['credentials'],
|
40 |
+
config['cookie']['name'],
|
41 |
+
config['cookie']['key'],
|
42 |
+
config['cookie']['expiry_days'],
|
43 |
+
config['preauthorized']
|
44 |
)
|
45 |
+
st.session_state['authenticator'] = authenticator
|
46 |
return authenticator
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
def nav_page(page_name, timeout_secs=3):
|
49 |
nav_script = """
|
50 |
<script type="text/javascript">
|
|
|
67 |
attempt_nav_page("%s", new Date(), %d);
|
68 |
});
|
69 |
</script>
|
70 |
+
""" % (page_name, timeout_secs)
|
|
|
|
|
|
|
71 |
html(nav_script)
|
72 |
|
73 |
|
|
|
92 |
|
93 |
file_.close()
|
94 |
|
95 |
+
|
96 |
|
97 |
+
DATA_PATH = './data'
|
98 |
|
99 |
+
IMAGES_PATH = './data/images_224_224'
|
100 |
|
101 |
+
|
102 |
|
103 |
def load_local_css(file_name):
|
104 |
|
105 |
with open(file_name) as f:
|
106 |
|
107 |
+
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
|
108 |
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
|
113 |
# def set_header():
|
114 |
|
|
|
129 |
|
130 |
file_1.close()
|
131 |
|
132 |
+
|
133 |
+
|
134 |
+
DATA_PATH1 = './data'
|
135 |
+
|
136 |
+
IMAGES_PATH1 = './data/images_224_224'
|
137 |
+
|
138 |
|
|
|
139 |
|
|
|
140 |
|
141 |
|
142 |
def set_header():
|
143 |
+
return st.markdown(f"""<div class='main-header'>
|
|
|
144 |
<!-- <h1></h1> -->
|
145 |
<div >
|
146 |
<img class='blend-logo' src="data:image;base64,{data_url1}", alt="Logo">
|
147 |
</div>
|
148 |
<img class='blend-logo' src="data:image;base64,{data_url}", alt="Logo">
|
149 |
+
</div>""", unsafe_allow_html=True)
|
|
|
|
|
|
|
150 |
|
151 |
# def set_header():
|
152 |
# logo_path = "./path/to/your/local/LIME_logo.png" # Replace with the actual file path
|
|
|
157 |
# </div>""", unsafe_allow_html=True)
|
158 |
|
159 |
|
160 |
+
def s_curve(x,K,b,a,x0):
|
161 |
+
return K / (1 + b * np.exp(-a*(x-x0)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
+
def initialize_data():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
# uopx_conv_rates = {'streaming_impressions' : 0.007,'digital_impressions' : 0.007,'search_clicks' : 0.00719,'tv_impressions' : 0.000173,
|
165 |
# "digital_clicks":0.005,"streaming_clicks":0.004,'streaming_spends':1,"tv_spends":1,"search_spends":1,
|
166 |
# "digital_spends":1}
|
167 |
+
#print('State initialized')
|
168 |
+
excel = pd.read_excel("Overview_data_test.xlsx",sheet_name=None)
|
169 |
+
raw_df = excel['RAW DATA MMM']
|
170 |
+
|
171 |
+
spend_df = excel['SPEND INPUT']
|
172 |
+
contri_df = excel['CONTRIBUTION MMM']
|
173 |
+
#Revenue_df = excel['Revenue']
|
174 |
+
|
175 |
+
## remove sesonalities, indices etc ...
|
176 |
+
exclude_columns = ['Date',
|
177 |
+
'Region',
|
178 |
+
'Controls_Grammarly_Index_SeasonalAVG',
|
179 |
+
'Controls_Quillbot_Index',
|
180 |
+
'Daily_Positive_Outliers',
|
181 |
+
'External_RemoteClass_Index',
|
182 |
+
'Intervals ON 20190520-20190805 | 20200518-20200803 | 20210517-20210802',
|
183 |
+
'Intervals ON 20190826-20191209 | 20200824-20201207 | 20210823-20211206',
|
184 |
+
'Intervals ON 20201005-20201019',
|
185 |
+
'Promotion_PercentOff',
|
186 |
+
'Promotion_TimeBased',
|
187 |
+
'Seasonality_Indicator_Chirstmas',
|
188 |
+
'Seasonality_Indicator_NewYears_Days',
|
189 |
+
'Seasonality_Indicator_Thanksgiving',
|
190 |
+
'Trend 20200302 / 20200803',
|
191 |
+
]
|
192 |
+
raw_df['Date']=pd.to_datetime(raw_df['Date'])
|
193 |
+
contri_df['Date']=pd.to_datetime(contri_df['Date'])
|
194 |
+
input_df = raw_df.sort_values(by='Date')
|
195 |
+
output_df = contri_df.sort_values(by='Date')
|
196 |
+
spend_df['Week'] = pd.to_datetime(spend_df['Week'], format='%Y-%m-%d', errors='coerce')
|
197 |
+
spend_df.sort_values(by='Week', inplace=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
|
199 |
# spend_df['Week'] = pd.to_datetime(spend_df['Week'], errors='coerce')
|
200 |
# spend_df = spend_df.sort_values(by='Week')
|
201 |
+
|
202 |
|
203 |
channel_list = [col for col in input_df.columns if col not in exclude_columns]
|
204 |
+
|
|
|
205 |
response_curves = {}
|
206 |
mapes = {}
|
207 |
rmses = {}
|
|
|
215 |
dates = input_df.Date.values
|
216 |
actual_output_dic = {}
|
217 |
actual_input_dic = {}
|
218 |
+
|
219 |
for inp_col in channel_list:
|
220 |
+
#st.write(inp_col)
|
221 |
spends = input_df[inp_col].values
|
222 |
x = spends.copy()
|
223 |
+
# upper limit for penalty
|
224 |
+
upper_limits[inp_col] = 2*x.max()
|
225 |
+
|
226 |
# contribution
|
227 |
out_col = [_col for _col in output_df.columns if _col.startswith(inp_col)][0]
|
228 |
y = output_df[out_col].values.copy()
|
|
|
230 |
actual_input_dic[inp_col] = x.copy()
|
231 |
##output cols aggregation
|
232 |
output_cols.append(out_col)
|
233 |
+
|
234 |
## scale the input
|
235 |
+
power = (np.ceil(np.log(x.max()) / np.log(10) )- 3)
|
236 |
+
if power >= 0 :
|
237 |
x = x / 10**power
|
238 |
+
|
239 |
+
|
240 |
+
x = x.astype('float64')
|
241 |
+
y = y.astype('float64')
|
242 |
+
#print('#printing yyyyyyyyy')
|
243 |
+
#print(inp_col)
|
244 |
+
#print(x.max())
|
245 |
+
#print(y.max())
|
246 |
+
bounds = ((0, 0, 0, 0), (3*y.max(), 1000, 1, x.max()))
|
247 |
+
|
248 |
+
#bounds = ((y.max(), 3*y.max()),(0,1000),(0,1),(0,x.max()))
|
249 |
+
params,_ = curve_fit(s_curve,x,y,p0=(2*y.max(),0.01,1e-5,x.max()),
|
250 |
+
bounds=bounds,
|
251 |
+
maxfev=int(1e5))
|
|
|
|
|
|
|
|
|
252 |
mape = (100 * abs(1 - s_curve(x, *params) / y.clip(min=1))).mean()
|
253 |
+
rmse = np.sqrt(((y - s_curve(x,*params))**2).mean())
|
254 |
+
r2_ = r2_score(y, s_curve(x,*params))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
+
response_curves[inp_col] = {'K' : params[0], 'b' : params[1], 'a' : params[2], 'x0' : params[3]}
|
257 |
mapes[inp_col] = mape
|
258 |
rmses[inp_col] = rmse
|
259 |
r2[inp_col] = r2_
|
260 |
powers[inp_col] = power
|
261 |
+
|
262 |
+
|
263 |
## conversion rates
|
264 |
+
spend_col = [_col for _col in spend_df.columns if _col.startswith(inp_col.rsplit('_',1)[0])][0]
|
265 |
+
|
266 |
+
#print('#printing spendssss')
|
267 |
+
#print(spend_col)
|
268 |
+
conv = (spend_df.set_index('Week')[spend_col] / input_df.set_index('Date')[inp_col].clip(lower=1)).reset_index()
|
269 |
+
conv.rename(columns={'index':'Week'},inplace=True)
|
270 |
+
conv['year'] = conv.Week.dt.year
|
271 |
+
conv_rates[inp_col] = list(conv.drop('Week',axis=1).mean().to_dict().values())[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
##print('Before',conv_rates[inp_col])
|
273 |
# conv_rates[inp_col] = uopx_conv_rates[inp_col]
|
274 |
##print('After',(conv_rates[inp_col]))
|
275 |
+
|
276 |
+
|
277 |
+
channel = Channel(name=inp_col,dates=dates,
|
278 |
+
spends=spends,
|
279 |
+
# conversion_rate = np.mean(list(conv_rates[inp_col].values())),
|
280 |
+
conversion_rate = conv_rates[inp_col],
|
281 |
+
response_curve_type='s-curve',
|
282 |
+
response_curve_params={'K' : params[0], 'b' : params[1], 'a' : params[2], 'x0' : params[3]},
|
283 |
+
bounds=np.array([-10,10]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
channels[inp_col] = channel
|
285 |
if sales is None:
|
286 |
sales = channel.actual_sales
|
287 |
else:
|
288 |
sales += channel.actual_sales
|
289 |
+
other_contributions = output_df.drop([*output_cols], axis=1).sum(axis=1, numeric_only = True).values
|
290 |
+
correction = output_df.drop('Date',axis=1).sum(axis=1).values - (sales + other_contributions)
|
291 |
+
scenario = Scenario(name='default', channels=channels, constant=other_contributions, correction = correction)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
## setting session variables
|
293 |
+
st.session_state['initialized'] = True
|
294 |
+
st.session_state['actual_df'] = input_df
|
295 |
+
st.session_state['raw_df'] = raw_df
|
296 |
+
st.session_state['contri_df'] = output_df
|
297 |
default_scenario_dict = class_to_dict(scenario)
|
298 |
+
st.session_state['default_scenario_dict'] = default_scenario_dict
|
299 |
+
st.session_state['scenario'] = scenario
|
300 |
+
st.session_state['channels_list'] = channel_list
|
301 |
+
st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
|
302 |
+
st.session_state['rcs'] = response_curves
|
303 |
+
st.session_state['powers'] = powers
|
304 |
+
st.session_state['actual_contribution_df'] = pd.DataFrame(actual_output_dic)
|
305 |
+
st.session_state['actual_input_df'] = pd.DataFrame(actual_input_dic)
|
306 |
+
|
|
|
|
|
|
|
307 |
for channel in channels.values():
|
308 |
+
st.session_state[channel.name] = numerize(channel.actual_total_spends * channel.conversion_rate,1)
|
309 |
+
|
310 |
+
st.session_state['xlsx_buffer'] = io.BytesIO()
|
311 |
+
|
312 |
+
|
313 |
+
if Path('../saved_scenarios.pkl').exists():
|
314 |
+
with open('../saved_scenarios.pkl','rb') as f:
|
315 |
+
st.session_state['saved_scenarios'] = pickle.load(f)
|
|
|
316 |
else:
|
317 |
+
st.session_state['saved_scenarios'] = OrderedDict()
|
318 |
+
|
319 |
+
st.session_state['total_spends_change'] = 0
|
320 |
+
st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
|
321 |
+
st.session_state['disable_download_button'] = True
|
322 |
+
|
|
|
|
|
|
|
323 |
# def initialize_data():
|
324 |
# # fetch data from excel
|
325 |
# output = pd.read_excel('data.xlsx',sheet_name=None)
|
|
|
335 |
# channel_list.append(col)
|
336 |
# else:
|
337 |
# pass
|
338 |
+
|
339 |
# ## NOTE : Considered only Desktop spends for all calculations
|
340 |
# acutal_df = raw_df[raw_df.Region == 'Desktop'].copy()
|
341 |
# ## NOTE : Considered one year of data
|
342 |
# acutal_df = acutal_df[acutal_df.Date>'2020-12-31']
|
343 |
# actual_df = acutal_df.drop('Region',axis=1).sort_values(by='Date')[[*channel_list,'Date']]
|
344 |
+
|
345 |
# ##load response curves
|
346 |
# with open('./grammarly_response_curves.json','r') as f:
|
347 |
# response_curves = json.load(f)
|
348 |
+
|
349 |
# ## create channel dict for scenario creation
|
350 |
# dates = actual_df.Date.values
|
351 |
# channels = {}
|
|
|
363 |
# response_curve_type=response_curve_type,
|
364 |
# response_curve_params=response_curve_params,
|
365 |
# bounds=np.array([-30,30]))
|
366 |
+
|
367 |
# channels[name] = channel
|
368 |
# else:
|
369 |
# constant = info_dict.get('value',0.) * len(dates)
|
370 |
+
|
371 |
# ## create scenario
|
372 |
# scenario = Scenario(name='default', channels=channels, constant=constant)
|
373 |
# default_scenario_dict = class_to_dict(scenario)
|
374 |
+
|
375 |
|
376 |
# ## setting session variables
|
377 |
# st.session_state['initialized'] = True
|
|
|
385 |
# for channel in channels.values():
|
386 |
# if channel.name not in st.session_state:
|
387 |
# st.session_state[channel.name] = float(channel.actual_total_spends)
|
388 |
+
|
389 |
# if 'xlsx_buffer' not in st.session_state:
|
390 |
# st.session_state['xlsx_buffer'] = io.BytesIO()
|
391 |
|
|
|
394 |
# if Path('../saved_scenarios.pkl').exists():
|
395 |
# with open('../saved_scenarios.pkl','rb') as f:
|
396 |
# st.session_state['saved_scenarios'] = pickle.load(f)
|
397 |
+
|
398 |
# else:
|
399 |
# st.session_state['saved_scenarios'] = OrderedDict()
|
400 |
|
401 |
# if 'total_spends_change' not in st.session_state:
|
402 |
# st.session_state['total_spends_change'] = 0
|
403 |
+
|
404 |
# if 'optimization_channels' not in st.session_state:
|
405 |
# st.session_state['optimization_channels'] = {channel_name : False for channel_name in channel_list}
|
406 |
+
|
407 |
# if 'disable_download_button' not in st.session_state:
|
408 |
# st.session_state['disable_download_button'] = True
|
409 |
+
|
410 |
+
|
411 |
def create_channel_summary(scenario):
|
412 |
|
413 |
# Provided data
|
414 |
data = {
|
415 |
+
'Channel': ['Paid Search', 'Ga will cid baixo risco', 'Digital tactic others', 'Fb la tier 1', 'Fb la tier 2', 'Paid social others', 'Programmatic', 'Kwai', 'Indicacao', 'Infleux', 'Influencer'],
|
416 |
+
'Spends': ['$ 11.3K', '$ 155.2K', '$ 50.7K', '$ 125.4K', '$ 125.2K', '$ 105K', '$ 3.3M', '$ 47.5K', '$ 55.9K', '$ 632.3K', '$ 48.3K'],
|
417 |
+
'Revenue': ['558.0K', '3.5M', '5.2M', '3.1M', '3.1M', '2.1M', '20.8M', '1.6M', '728.4K', '22.9M', '4.8M']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
418 |
}
|
419 |
|
420 |
# Create DataFrame
|
421 |
df = pd.DataFrame(data)
|
422 |
|
423 |
# Convert currency strings to numeric values
|
424 |
+
df['Spends'] = df['Spends'].replace({'\$': '', 'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
|
425 |
+
df['Revenue'] = df['Revenue'].replace({'\$': '', 'K': '*1e3', 'M': '*1e6'}, regex=True).map(pd.eval).astype(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
426 |
|
427 |
# Calculate ROI
|
428 |
+
df['ROI'] = ((df['Revenue'] - df['Spends']) / df['Spends'])
|
429 |
|
430 |
# Format columns
|
431 |
format_currency = lambda x: f"${x:,.1f}"
|
432 |
format_roi = lambda x: f"{x:.1f}"
|
433 |
|
434 |
+
df['Spends'] = ['$ 11.3K', '$ 155.2K', '$ 50.7K', '$ 125.4K', '$ 125.2K', '$ 105K', '$ 3.3M', '$ 47.5K', '$ 55.9K', '$ 632.3K', '$ 48.3K']
|
435 |
+
df['Revenue'] = ['$ 536.3K', '$ 3.4M', '$ 5M', '$ 3M', '$ 3M', '$ 2M', '$ 20M', '$ 1.5M', '$ 7.1M', '$ 22M', '$ 4.6M']
|
436 |
+
df['ROI'] = df['ROI'].apply(format_roi)
|
437 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
438 |
return df
|
439 |
|
440 |
|
441 |
+
#@st.cache(allow_output_mutation=True)
|
442 |
# def create_contribution_pie(scenario):
|
443 |
# #c1f7dc
|
444 |
# colors_map = {col:color for col,color in zip(st.session_state['channels_list'],plotly.colors.n_colors(plotly.colors.hex_to_rgb('#BE6468'), plotly.colors.hex_to_rgb('#E7B8B7'),23))}
|
|
|
470 |
# weekly_spends_data = []
|
471 |
# weekly_sales_data = []
|
472 |
# for channel_name in st.session_state['channels_list']:
|
473 |
+
# weekly_spends_data.append((go.Bar(x=x,
|
474 |
# y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
|
475 |
+
# name=channel_name_formating(channel_name),
|
476 |
# hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
|
477 |
# legendgroup=channel_name)))
|
478 |
+
# weekly_sales_data.append((go.Bar(x=x,
|
479 |
# y=scenario.channels[channel_name].actual_sales,
|
480 |
+
# name=channel_name_formating(channel_name),
|
481 |
# hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
482 |
# legendgroup=channel_name, showlegend=False)))
|
483 |
# for _d in weekly_spends_data:
|
484 |
# weekly_contribution_fig.add_trace(_d, row=1, col=1)
|
485 |
# for _d in weekly_sales_data:
|
486 |
# weekly_contribution_fig.add_trace(_d, row=1, col=2)
|
487 |
+
# weekly_contribution_fig.add_trace(go.Bar(x=x,
|
488 |
# y=scenario.constant + scenario.correction,
|
489 |
+
# name='Non Media',
|
490 |
# hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}"), row=1, col=2)
|
491 |
# weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribuion by week', xaxis_title='Date')
|
492 |
# weekly_contribution_fig.update_xaxes(showgrid=False)
|
|
|
524 |
|
525 |
|
526 |
def create_contribution_pie():
|
527 |
+
color_palette = ['#F3F3F0', '#5E7D7E', '#2FA1FF', '#00EDED', '#00EAE4', '#304550', '#EDEBEB', '#7FBEFD', '#003059', '#A2F3F3', '#E1D6E2', '#B6B6B6']
|
528 |
+
total_contribution_fig = make_subplots(rows=1, cols=2, subplot_titles=['Spends', 'Revenue'], specs=[[{"type": "pie"}, {"type": "pie"}]])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
529 |
|
530 |
+
channels_list = ['Paid Search', 'Ga will cid baixo risco', 'Digital tactic others', 'Fb la tier 1', 'Fb la tier 2', 'Paid social others', 'Programmatic', 'Kwai', 'Indicacao', 'Infleux', 'Influencer', 'Non Media']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
# Assign colors from the limited palette to channels
|
533 |
+
colors_map = {col: color_palette[i % len(color_palette)] for i, col in enumerate(channels_list)}
|
534 |
+
colors_map['Non Media'] = color_palette[5] # Assign fixed green color for 'Non Media'
|
|
|
|
|
|
|
|
|
|
|
535 |
|
536 |
# Hardcoded values for Spends and Revenue
|
537 |
spends_values = [0.5, 3.36, 1.1, 2.7, 2.7, 2.27, 70.6, 1, 1, 13.7, 1, 0]
|
|
|
542 |
go.Pie(
|
543 |
labels=[channel_name for channel_name in channels_list],
|
544 |
values=spends_values,
|
545 |
+
marker=dict(colors=[colors_map[channel_name] for channel_name in channels_list]),
|
546 |
+
hole=0.3
|
|
|
|
|
547 |
),
|
548 |
+
row=1, col=1
|
|
|
549 |
)
|
550 |
|
551 |
# Add trace for Revenue pie chart
|
|
|
553 |
go.Pie(
|
554 |
labels=[channel_name for channel_name in channels_list],
|
555 |
values=revenue_values,
|
556 |
+
marker=dict(colors=[colors_map[channel_name] for channel_name in channels_list]),
|
557 |
+
hole=0.3
|
|
|
|
|
558 |
),
|
559 |
+
row=1, col=2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
560 |
)
|
561 |
+
|
562 |
+
total_contribution_fig.update_traces(textposition='inside', texttemplate='%{percent:.1%}')
|
563 |
+
total_contribution_fig.update_layout(uniformtext_minsize=12, title='Channel contribution', uniformtext_mode='hide')
|
564 |
return total_contribution_fig
|
565 |
|
|
|
566 |
def create_contribuion_stacked_plot(scenario):
|
567 |
+
weekly_contribution_fig = make_subplots(rows=1, cols=2, subplot_titles=['Spends', 'Revenue'], specs=[[{"type": "bar"}, {"type": "bar"}]])
|
568 |
+
raw_df = st.session_state['raw_df']
|
569 |
+
df = raw_df.sort_values(by='Date')
|
|
|
|
|
|
|
|
|
|
|
570 |
x = df.Date
|
571 |
weekly_spends_data = []
|
572 |
weekly_sales_data = []
|
573 |
+
|
574 |
+
for i, channel_name in enumerate(st.session_state['channels_list']):
|
575 |
color = color_palette[i % len(color_palette)]
|
576 |
+
|
577 |
+
weekly_spends_data.append(go.Bar(
|
578 |
+
x=x,
|
579 |
+
y=scenario.channels[channel_name].actual_spends * scenario.channels[channel_name].conversion_rate,
|
580 |
+
name=channel_name_formating(channel_name),
|
581 |
+
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
|
582 |
+
legendgroup=channel_name,
|
583 |
+
marker_color=color,
|
584 |
+
))
|
585 |
+
|
586 |
+
weekly_sales_data.append(go.Bar(
|
587 |
+
x=x,
|
588 |
+
y=scenario.channels[channel_name].actual_sales,
|
589 |
+
name=channel_name_formating(channel_name),
|
590 |
+
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
591 |
+
legendgroup=channel_name,
|
592 |
+
showlegend=False,
|
593 |
+
marker_color=color,
|
594 |
+
))
|
595 |
+
|
|
|
|
|
|
|
|
|
|
|
596 |
for _d in weekly_spends_data:
|
597 |
weekly_contribution_fig.add_trace(_d, row=1, col=1)
|
598 |
for _d in weekly_sales_data:
|
599 |
weekly_contribution_fig.add_trace(_d, row=1, col=2)
|
600 |
+
|
601 |
+
weekly_contribution_fig.add_trace(go.Bar(
|
602 |
+
x=x,
|
603 |
+
y=scenario.constant + scenario.correction,
|
604 |
+
name='Non Media',
|
605 |
+
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
606 |
+
marker_color=color_palette[-1],
|
607 |
+
), row=1, col=2)
|
608 |
+
|
609 |
+
weekly_contribution_fig.update_layout(barmode='stack', title='Channel contribution by week', xaxis_title='Date')
|
|
|
|
|
|
|
|
|
|
|
|
|
610 |
weekly_contribution_fig.update_xaxes(showgrid=False)
|
611 |
weekly_contribution_fig.update_yaxes(showgrid=False)
|
612 |
return weekly_contribution_fig
|
613 |
|
|
|
614 |
def create_channel_spends_sales_plot(channel):
|
615 |
if channel is not None:
|
616 |
x = channel.dates
|
617 |
_spends = channel.actual_spends * channel.conversion_rate
|
618 |
_sales = channel.actual_sales
|
619 |
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
|
620 |
+
channel_sales_spends_fig.add_trace(go.Bar(
|
621 |
+
x=x,
|
622 |
+
y=_sales,
|
623 |
+
marker_color=color_palette[3], # You can choose a color from the palette
|
624 |
+
name='Revenue',
|
625 |
+
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
626 |
+
), secondary_y=False)
|
627 |
+
|
628 |
+
channel_sales_spends_fig.add_trace(go.Scatter(
|
629 |
+
x=x,
|
630 |
+
y=_spends,
|
631 |
+
line=dict(color=color_palette[2]), # You can choose another color from the palette
|
632 |
+
name='Spends',
|
633 |
+
hovertemplate="Date:%{x}<br>Spend:%{y:$.2s}",
|
634 |
+
), secondary_y=True)
|
635 |
+
|
636 |
+
channel_sales_spends_fig.update_layout(xaxis_title='Date', yaxis_title='Revenue', yaxis2_title='Spends ($)', title='Channel spends and Revenue week-wise')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
637 |
channel_sales_spends_fig.update_xaxes(showgrid=False)
|
638 |
channel_sales_spends_fig.update_yaxes(showgrid=False)
|
639 |
else:
|
640 |
+
raw_df = st.session_state['raw_df']
|
641 |
+
df = raw_df.sort_values(by='Date')
|
642 |
x = df.Date
|
643 |
+
scenario = class_from_dict(st.session_state['default_scenario_dict'])
|
644 |
_sales = scenario.constant + scenario.correction
|
645 |
channel_sales_spends_fig = make_subplots(specs=[[{"secondary_y": True}]])
|
646 |
+
channel_sales_spends_fig.add_trace(go.Bar(
|
647 |
+
x=x,
|
648 |
+
y=_sales,
|
649 |
+
marker_color=color_palette[0], # You can choose a color from the palette
|
650 |
+
name='Revenue',
|
651 |
+
hovertemplate="Date:%{x}<br>Revenue:%{y:$.2s}",
|
652 |
+
), secondary_y=False)
|
653 |
+
|
654 |
+
channel_sales_spends_fig.update_layout(xaxis_title='Date', yaxis_title='Revenue', yaxis2_title='Spends ($)', title='Channel spends and Revenue week-wise')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
channel_sales_spends_fig.update_xaxes(showgrid=False)
|
656 |
channel_sales_spends_fig.update_yaxes(showgrid=False)
|
657 |
+
|
658 |
return channel_sales_spends_fig
|
659 |
|
660 |
+
def format_numbers(value, n_decimals=1,include_indicator = True):
|
|
|
661 |
if include_indicator:
|
662 |
+
return f'{CURRENCY_INDICATOR} {numerize(value,n_decimals)}'
|
663 |
else:
|
664 |
+
return f'{numerize(value,n_decimals)}'
|
665 |
|
666 |
|
667 |
+
def decimal_formater(num_string,n_decimals=1):
|
668 |
+
parts = num_string.split('.')
|
669 |
if len(parts) == 1:
|
670 |
+
return num_string+'.' + '0'*n_decimals
|
671 |
else:
|
672 |
to_be_padded = n_decimals - len(parts[-1])
|
673 |
+
if to_be_padded > 0 :
|
674 |
+
return num_string+'0'*to_be_padded
|
675 |
else:
|
676 |
return num_string
|
677 |
+
|
678 |
+
|
679 |
def channel_name_formating(channel_name):
|
680 |
+
name_mod = channel_name.replace('_', ' ')
|
681 |
+
if name_mod.lower().endswith(' imp'):
|
682 |
+
name_mod = name_mod.replace('Imp','Spend')
|
683 |
+
elif name_mod.lower().endswith(' clicks'):
|
684 |
+
name_mod = name_mod.replace('Clicks','Spend')
|
685 |
return name_mod
|
686 |
|
687 |
|
688 |
+
def send_email(email,message):
|
689 |
+
s = smtplib.SMTP('smtp.gmail.com', 587)
|
690 |
s.starttls()
|
691 |
s.login("geethu4444@gmail.com", "jgydhpfusuremcol")
|
692 |
s.sendmail("geethu4444@gmail.com", email, message)
|
693 |
s.quit()
|
694 |
|
|
|
695 |
if __name__ == "__main__":
|
696 |
initialize_data()
|