Causion / src /data_ingestion.py
tappyness1
update to charts
ae3451c
import pandas as pd
import numpy as np
def remove_previous_view(counts_df):
filtered_views_list = ['View_from_Second_Link_at_Tuas_to_sg',
'View_from_Second_Link_at_Tuas_to_jh',
'View_from_Tuas_Checkpoint_to_sg',
'View_from_Tuas_Checkpoint_to_jh',
'View_from_Woodlands_Causeway_Towards_Johor_to_sg',
'View_from_Woodlands_Causeway_Towards_Johor_to_jh',
'View_from_Woodlands_Checkpoint_Towards_BKE_to_sg',
'View_from_Woodlands_Checkpoint_Towards_BKE_to_jh']
counts_df = counts_df[counts_df['view'].isin(filtered_views_list)]
return counts_df
def merge_volumes(counts_df):
merge_groups = {"Tuas - to SG": ["View_from_Second_Link_at_Tuas_to_sg", "View_from_Tuas_Checkpoint_to_sg"],
"Tuas - to Johor": ['View_from_Second_Link_at_Tuas_to_jh', 'View_from_Tuas_Checkpoint_to_jh'],
"Woodlands - to SG": ['View_from_Woodlands_Causeway_Towards_Johor_to_sg', 'View_from_Woodlands_Checkpoint_Towards_BKE_to_sg'],
"Woodlands - to Johor": ['View_from_Woodlands_Causeway_Towards_Johor_to_jh', 'View_from_Woodlands_Checkpoint_Towards_BKE_to_jh']}
def apply_merge_groups(row):
for key, value in merge_groups.items():
if row in value:
return key
counts_df['merge_group'] = counts_df['view'].apply(apply_merge_groups)
counts_df = counts_df.groupby(by = ['merge_group', 'date', 'time']).sum(numeric_only = True)
counts_df = counts_df.reset_index()
counts_df = counts_df.rename(columns={"merge_group": "view"})
return counts_df
def daily_average(counts_df):
filtered_views_list = ['View_from_Second_Link_at_Tuas_to_sg',
'View_from_Second_Link_at_Tuas_to_jh',
'View_from_Tuas_Checkpoint_to_sg',
'View_from_Tuas_Checkpoint_to_jh',
'View_from_Woodlands_Causeway_Towards_Johor_to_sg',
'View_from_Woodlands_Causeway_Towards_Johor_to_jh',
'View_from_Woodlands_Checkpoint_Towards_BKE_to_sg',
'View_from_Woodlands_Checkpoint_Towards_BKE_to_jh']
counts_df_filter_views = counts_df[counts_df['view'].isin(filtered_views_list)]
counts_df_filter_views['date'] = pd.to_datetime(counts_df_filter_views['date'])
counts_df_filter_views['day_of_week'] = counts_df_filter_views['date'].dt.day_of_week
date_view_group = counts_df_filter_views.groupby(by=['view', 'day_of_week']).mean()
date_view_group = date_view_group.reset_index()