phitoduck commited on
Commit
adc9f0c
1 Parent(s): 02f17d0

structured repo for huggingface spaces

Browse files
streamlit_app.py → app.py RENAMED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
  from datetime import time, date
4
- from utils import generate_random_data, evaluate_alarm_state, aggregate_data
5
  from textwrap import dedent
6
  from matplotlib import pyplot as plt
7
 
@@ -20,7 +20,7 @@ def main():
20
 
21
  if not st.session_state.df.empty:
22
  display_dataframe("Raw Event Data", st.session_state.df)
23
- st.scatter_chart(st.session_state.df.set_index("Timestamp"))
24
 
25
  # Section 2 - Calculate Aggregations
26
  st.header("Section 2 - Calculate Aggregations")
@@ -34,7 +34,7 @@ def main():
34
  key='aggregation_function_input__storage',
35
  help="Select the aggregation function for visualizing the data."
36
  )
37
- st.line_chart(st.session_state.aggregated_df.set_index("Timestamp")[st.session_state.aggregation_function_input__storage])
38
 
39
  # Section 3 - Summary Data Aggregated by Period
40
  st.header("Section 3 - Summary Data Aggregated by Period")
@@ -48,7 +48,7 @@ def main():
48
  key='aggregation_function_input__alarm',
49
  help="Select the aggregation function for visualizing the data."
50
  )
51
- st.line_chart(st.session_state.summary_by_period_df.set_index("Timestamp")[st.session_state.aggregation_function_input__alarm])
52
 
53
  # Section 4 - Evaluate Alarm State
54
  st.header("Section 4 - Evaluate Alarm State")
@@ -96,8 +96,8 @@ def aggregation_form() -> None:
96
 
97
  def summary_by_period_form() -> None:
98
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
99
- if not st.session_state.df.empty:
100
- st.session_state.summary_by_period_df = aggregate_data(st.session_state.df, period_length_input)
101
 
102
  def alarm_state_form() -> None:
103
  threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
@@ -238,3 +238,4 @@ def display_key_tables() -> None:
238
 
239
  if __name__ == "__main__":
240
  main()
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from datetime import time, date
4
+ from utils import generate_random_data, evaluate_alarm_state, aggregate_data, re_aggregate_data
5
  from textwrap import dedent
6
  from matplotlib import pyplot as plt
7
 
 
20
 
21
  if not st.session_state.df.empty:
22
  display_dataframe("Raw Event Data", st.session_state.df)
23
+ st.line_chart(st.session_state.df.set_index("Timestamp"))
24
 
25
  # Section 2 - Calculate Aggregations
26
  st.header("Section 2 - Calculate Aggregations")
 
34
  key='aggregation_function_input__storage',
35
  help="Select the aggregation function for visualizing the data."
36
  )
37
+ st.line_chart(st.session_state.aggregated_df.set_index("Timestamp")[aggregation_function_input__storage])
38
 
39
  # Section 3 - Summary Data Aggregated by Period
40
  st.header("Section 3 - Summary Data Aggregated by Period")
 
48
  key='aggregation_function_input__alarm',
49
  help="Select the aggregation function for visualizing the data."
50
  )
51
+ st.line_chart(st.session_state.summary_by_period_df.set_index("Timestamp")[aggregation_function_input__alarm])
52
 
53
  # Section 4 - Evaluate Alarm State
54
  st.header("Section 4 - Evaluate Alarm State")
 
96
 
97
  def summary_by_period_form() -> None:
98
  period_length_input = st.selectbox("Period Length", ['1min', '5min', '15min'], key='period_length_input', help="Select the period length for aggregating the summary data.")
99
+ if not st.session_state.aggregated_df.empty:
100
+ st.session_state.summary_by_period_df = re_aggregate_data(st.session_state.aggregated_df, period_length_input)
101
 
102
  def alarm_state_form() -> None:
103
  threshold_input = st.slider("Threshold (ms)", min_value=50, max_value=300, value=150, key='threshold_input', help="Specify the threshold value for evaluating the alarm state.")
 
238
 
239
  if __name__ == "__main__":
240
  main()
241
+
requirements.dev.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ipykernel
2
+ jupyterlab
3
+ watchdog
requirements.txt CHANGED
@@ -1,6 +1,4 @@
1
  pandas
2
  numpy
3
- ipykernel
4
- jupyterlab
5
  streamlit
6
  matplotlib
 
1
  pandas
2
  numpy
 
 
3
  streamlit
4
  matplotlib
utils.py CHANGED
@@ -42,13 +42,14 @@ def calculate_percentile(
42
  freq: str,
43
  percentile: float
44
  ) -> pd.DataFrame:
45
- percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"] .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
 
46
  percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
47
  return percentile_df
48
 
49
  def aggregate_data(
50
  df: pd.DataFrame,
51
- period_length: str
52
  ) -> pd.DataFrame:
53
  if df.empty:
54
  return pd.DataFrame() # Return an empty DataFrame if input is empty
@@ -72,6 +73,33 @@ def aggregate_data(
72
  ).reset_index()
73
  return summary_df
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def chunk_list(input_list: List[Any], size: int = 3) -> Iterator[List[Any]]:
76
  while input_list:
77
  chunk: List[Any] = input_list[:size]
 
42
  freq: str,
43
  percentile: float
44
  ) -> pd.DataFrame:
45
+ percentile_df: pd.DataFrame = df.groupby(pd.Grouper(key='Timestamp', freq=freq))["ResponseTime(ms)"]\
46
+ .quantile(percentile).reset_index(name=f"p{int(percentile * 100)}_ResponseTime(ms)")
47
  percentile_df.replace(to_replace=np.nan, value=None, inplace=True)
48
  return percentile_df
49
 
50
  def aggregate_data(
51
  df: pd.DataFrame,
52
+ period_length: str,
53
  ) -> pd.DataFrame:
54
  if df.empty:
55
  return pd.DataFrame() # Return an empty DataFrame if input is empty
 
73
  ).reset_index()
74
  return summary_df
75
 
76
+ def re_aggregate_data(
77
+ df: pd.DataFrame,
78
+ period_length: str,
79
+ ) -> pd.DataFrame:
80
+ if df.empty:
81
+ return pd.DataFrame() # Return an empty DataFrame if input is empty
82
+
83
+ aggregation_funcs = {
84
+ 'p50': lambda x: np.percentile(x.dropna(), 50) if not x.dropna().empty else np.nan,
85
+ 'p95': lambda x: np.percentile(x.dropna(), 95) if not x.dropna().empty else np.nan,
86
+ 'p99': lambda x: np.percentile(x.dropna(), 99) if not x.dropna().empty else np.nan,
87
+ 'max': lambda x: np.max(x.dropna()) if not x.dropna().empty else np.nan,
88
+ 'min': lambda x: np.min(x.dropna()) if not x.dropna().empty else np.nan,
89
+ 'average': lambda x: np.mean(x.dropna()) if not x.dropna().empty else np.nan
90
+ }
91
+
92
+ summary_df = df.groupby(pd.Grouper(key='Timestamp', freq=period_length)).agg(
93
+ p50=('p50', aggregation_funcs['p50']),
94
+ p95=('p95', aggregation_funcs['p95']),
95
+ p99=('p99', aggregation_funcs['p99']),
96
+ max=('max', aggregation_funcs['max']),
97
+ min=('min', aggregation_funcs['min']),
98
+ average=('average', aggregation_funcs['average']),
99
+ ).reset_index()
100
+ return summary_df
101
+
102
+
103
  def chunk_list(input_list: List[Any], size: int = 3) -> Iterator[List[Any]]:
104
  while input_list:
105
  chunk: List[Any] = input_list[:size]