Mihkelmj commited on
Commit
647d992
1 Parent(s): 5064f83

connected real data to everything displayed; modified the layout a bit; added better graphs and expanders

Browse files
__pycache__/data_api_calls.cpython-312.pyc CHANGED
Binary files a/__pycache__/data_api_calls.cpython-312.pyc and b/__pycache__/data_api_calls.cpython-312.pyc differ
 
app.py CHANGED
@@ -9,7 +9,7 @@ from src.models_loading import run_model
9
 
10
  st.set_page_config(
11
  page_title="Utrecht Pollution Dashboard",
12
- page_icon="🏂��🌱",
13
  layout="wide",
14
  initial_sidebar_state="expanded",
15
  )
@@ -18,46 +18,13 @@ alt.themes.enable("dark")
18
 
19
  get_data()
20
  dataset = pd.read_csv("dataset.csv")
 
 
21
  prediction = run_model("O3", data=dataset)
22
  pred1 = prediction[0][0]
23
  pred2 = prediction[0][1]
24
  pred3 = prediction[0][2]
25
 
26
- # App Title
27
- st.title("Utrecht Pollution Dashboard🌱")
28
-
29
- col1, col2 = st.columns((1, 1))
30
- # Create a 3-column layout
31
- with col1:
32
- st.subheader("Current Weather")
33
- col1, col2, col3 = st.columns(3)
34
-
35
- # First column
36
- with col1:
37
- custom_metric_box(label="Temperature", value="2 °C", delta="-3 °C")
38
- custom_metric_box(label="Humidity", value="60 %", delta="-1 %")
39
-
40
- # Second column
41
- with col2:
42
- custom_metric_box(label="Pressure", value="1010 hPa", delta="+2 hPa")
43
- custom_metric_box(label="Precipitation", value="5 mm", delta="-1 mm")
44
-
45
- # Third column
46
- with col3:
47
- custom_metric_box(label="Solar Radiation", value="200 W/m²", delta="-20 W/m²")
48
- custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
49
-
50
- st.subheader("Current Pollution Levels")
51
- col1, col2 = st.columns((1, 1))
52
- # Display the prediction
53
- # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
54
- with col1:
55
- pollution_box(label="O<sub>3</sub>", value="37 µg/m³", delta="+2 µg/m³")
56
- with col2:
57
- pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
58
-
59
- # Sample data (replace with your actual data)
60
- # Sample data (replace with your actual data)
61
  dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
62
  dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
63
 
@@ -79,11 +46,48 @@ dates = dates_past + dates_future
79
  # Create a DataFrame
80
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
81
 
82
- st.subheader("O3 and NO2 Prediction")
83
- # Create two columns for two separate graphs
84
- subcol1, subcol2 = st.columns(2)
85
- # Plot O3 in the first subcolumn
86
- with subcol1:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  fig_o3 = go.Figure()
88
  fig_o3.add_trace(
89
  go.Scatter(
@@ -92,9 +96,9 @@ with subcol1:
92
  mode="lines+markers",
93
  name="O3",
94
  line=dict(color="rgb(0, 191, 255)", width=4),
 
95
  )
96
- ) # Bright blue
97
- # Add a vertical line for predictions (today's date)
98
  fig_o3.add_shape(
99
  dict(
100
  type="line",
@@ -106,16 +110,23 @@ with subcol1:
106
  )
107
  )
108
  fig_o3.update_layout(
109
- plot_bgcolor="rgba(0, 0, 0, 0)", # Transparent background
110
- paper_bgcolor="rgba(0, 0, 0, 0)", # Transparent paper background
111
  yaxis_title="O3 Concentration (µg/m³)",
112
  font=dict(size=14),
113
- hovermode="x unified",
 
 
 
 
 
 
 
 
 
114
  )
115
- st.plotly_chart(fig_o3)
116
 
117
- # Plot NO2 in the second subcolumn
118
- with subcol2:
119
  fig_no2 = go.Figure()
120
  fig_no2.add_trace(
121
  go.Scatter(
@@ -125,8 +136,7 @@ with subcol2:
125
  name="NO2",
126
  line=dict(color="rgb(255, 20, 147)", width=4),
127
  )
128
- ) # Bright pink
129
- # Add a vertical line for predictions (today's date)
130
  fig_no2.add_shape(
131
  dict(
132
  type="line",
@@ -134,14 +144,23 @@ with subcol2:
134
  x1=pd.Timestamp.today(),
135
  y0=min(no2_values),
136
  y1=max(no2_values),
137
- line=dict(color="White", width=3, dash="dash"),
138
  )
139
  )
140
  fig_no2.update_layout(
141
- plot_bgcolor="rgba(0, 0, 0, 0)", # Transparent background
142
- paper_bgcolor="rgba(0, 0, 0, 0)", # Transparent paper background
143
- yaxis_title="NO2 Concentration (µg/m³)",
144
  font=dict(size=14),
145
- hovermode="x unified",
 
 
 
 
 
 
 
 
 
146
  )
147
- st.plotly_chart(fig_no2)
 
9
 
10
  st.set_page_config(
11
  page_title="Utrecht Pollution Dashboard",
12
+ page_icon="������🌱",
13
  layout="wide",
14
  initial_sidebar_state="expanded",
15
  )
 
18
 
19
  get_data()
20
  dataset = pd.read_csv("dataset.csv")
21
+ today = dataset.iloc[-1]
22
+ previous_day = dataset.iloc[-2]
23
  prediction = run_model("O3", data=dataset)
24
  pred1 = prediction[0][0]
25
  pred2 = prediction[0][1]
26
  pred3 = prediction[0][2]
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
29
  dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
30
 
 
46
  # Create a DataFrame
47
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
48
 
49
+
50
+ # App Title
51
+ st.title("Utrecht Pollution Dashboard🌱")
52
+
53
+ col1, col2 = st.columns((2, 3))
54
+ # Create a 3-column layout
55
+ with col1:
56
+ st.subheader("Current Weather")
57
+ subcol1, subcol2 = st.columns((1, 1))
58
+ with subcol1:
59
+ custom_metric_box(label="Temperature", value=f"{round(today['mean_temp'] * 0.1)} °C", delta=f"{round(today['mean_temp'] * 0.1) - round(previous_day['mean_temp'] * 0.1)} °C")
60
+ custom_metric_box(label="Humidity", value=f"{round(today['humidity'])} %", delta=f"{round(today['humidity']) - round(previous_day['humidity'])} %")
61
+ custom_metric_box(label="Pressure", value=f"{round(today['pressure'] * 0.1)} hPa", delta=f"{round(today['pressure'] * 0.1) - round(previous_day['pressure'] * 0.1)} hPa")
62
+ with subcol2:
63
+ custom_metric_box(label="Precipitation", value=f"{round(today['percipitation'] * 0.1)} mm", delta=f"{round(today['percipitation'] * 0.1) - round(previous_day['percipitation'] * 0.1)} mm")
64
+ custom_metric_box(label="Solar Radiation", value=f"{round(today['global_radiation'])} J/m²", delta=f"{round(today['global_radiation']) - round(previous_day['global_radiation'])} J/m²")
65
+ custom_metric_box(label="Wind Speed", value=f"{round(today['wind_speed'] * 0.1, 1)} m/s", delta=f"{round(today['wind_speed'] * 0.1, 1) - round(previous_day['wind_speed'] * 0.1, 1)} m/s")
66
+
67
+ with col2:
68
+ st.subheader("Current Pollution Levels")
69
+ sub1, sub2 = st.columns((1, 1))
70
+ # Display the prediction
71
+ # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
72
+ with sub1:
73
+ pollution_box(label="O<sub>3</sub>", value=f"{round(today["O3"])} µg/m³", delta=f"{round(int(today["O3"]) - int(previous_day["O3"]))} µg/m³")
74
+ with st.expander("Learn more about O3", expanded=False):
75
+ st.markdown(
76
+ "*Ozone (O<sub>3</sub>)*: A harmful gas at ground level, contributing to respiratory issues and aggravating asthma.",
77
+ unsafe_allow_html=True,
78
+ )
79
+ with sub2:
80
+ pollution_box(label="NO<sub>2</sub>", value=f"{round(today["NO2"])} µg/m³", delta=f"{round(int(today["NO2"]) - int(previous_day["NO2"]))} µg/m³")
81
+ with st.expander("Learn more about O3", expanded=False):
82
+ st.markdown(
83
+ "*Wadeva particle (NO<sub>2</sub>)*: A harmful gas at ground level, contributing to respiratory issues and aggravating asthma.",
84
+ unsafe_allow_html=True,
85
+ )
86
+
87
+ # Create two columns for two separate graphs
88
+ # Plot O3 in the first subcolumn
89
+ st.subheader("O3 and NO2 Prediction")
90
+ # Plot NO2 in the second subcolumn
91
  fig_o3 = go.Figure()
92
  fig_o3.add_trace(
93
  go.Scatter(
 
96
  mode="lines+markers",
97
  name="O3",
98
  line=dict(color="rgb(0, 191, 255)", width=4),
99
+ hovertemplate="%{x|%d-%b-%Y}<br> %{y} µg/m³<extra></extra>",
100
  )
101
+ )
 
102
  fig_o3.add_shape(
103
  dict(
104
  type="line",
 
110
  )
111
  )
112
  fig_o3.update_layout(
113
+ plot_bgcolor="rgba(0, 0, 0, 0)",
114
+ paper_bgcolor="rgba(0, 0, 0, 0)",
115
  yaxis_title="O3 Concentration (µg/m³)",
116
  font=dict(size=14),
117
+ hovermode="x",
118
+ xaxis=dict(
119
+ title="Date",
120
+ type="date",
121
+ tickmode="array",
122
+ tickvals=df["Date"],
123
+ tickformat="%d-%b",
124
+ tickangle=-45,
125
+ tickcolor="gray",
126
+ ),
127
  )
128
+ st.plotly_chart(fig_o3, key="fig_o3")
129
 
 
 
130
  fig_no2 = go.Figure()
131
  fig_no2.add_trace(
132
  go.Scatter(
 
136
  name="NO2",
137
  line=dict(color="rgb(255, 20, 147)", width=4),
138
  )
139
+ )
 
140
  fig_no2.add_shape(
141
  dict(
142
  type="line",
 
144
  x1=pd.Timestamp.today(),
145
  y0=min(no2_values),
146
  y1=max(no2_values),
147
+ line=dict(color="gray", width=3, dash="dash"),
148
  )
149
  )
150
  fig_no2.update_layout(
151
+ plot_bgcolor="rgba(0, 0, 0, 0)",
152
+ paper_bgcolor="rgba(0, 0, 0, 0)",
153
+ yaxis_title="NO<sub>2</sub> Concentration (µg/m³)",
154
  font=dict(size=14),
155
+ hovermode="x",
156
+ xaxis=dict(
157
+ title="Date",
158
+ type="date",
159
+ tickmode="array",
160
+ tickvals=df["Date"],
161
+ tickformat="%d-%b",
162
+ tickangle=-45,
163
+ tickcolor="gray",
164
+ ),
165
  )
166
+ st.plotly_chart(fig_no2, key="fig_no2")
daily_api__pollution.py DELETED
File without changes
data_api_calls.py CHANGED
@@ -93,12 +93,11 @@ def add_columns():
93
  df.insert(2, 'O3', None)
94
  df.insert(10, 'weekday', None)
95
 
96
- df.to_csv('combined_data.csv', index=False)
97
 
98
 
99
- def scale():
100
- file_path = 'combined_data.csv'
101
- df = pd.read_csv(file_path)
102
  columns = list(df.columns)
103
 
104
 
@@ -142,11 +141,10 @@ def scale():
142
  df['humidity'] = df['humidity'].astype(int)
143
  df['global_radiation'] = df['global_radiation'].astype(int)
144
 
145
- df.to_csv('dataset.csv', index=False)
146
 
147
- def insert_pollution(NO2, O3):
148
- file_path = 'dataset.csv'
149
- df = pd.read_csv(file_path)
150
  start_index = 0
151
  while NO2:
152
  df.loc[start_index, 'NO2'] = NO2.pop()
@@ -186,8 +184,7 @@ def get_data():
186
  weather_data()
187
  pollution_data()
188
  NO2, O3 = clean_values()
189
- add_columns()
190
- scale()
191
- insert_pollution(NO2, O3)
192
- os.remove('combined_data.csv')
193
  os.remove('weather_data.csv')
 
93
  df.insert(2, 'O3', None)
94
  df.insert(10, 'weekday', None)
95
 
96
+ return df
97
 
98
 
99
+ def scale(data):
100
+ df = data
 
101
  columns = list(df.columns)
102
 
103
 
 
141
  df['humidity'] = df['humidity'].astype(int)
142
  df['global_radiation'] = df['global_radiation'].astype(int)
143
 
144
+ return df
145
 
146
+ def insert_pollution(NO2, O3, data):
147
+ df = data
 
148
  start_index = 0
149
  while NO2:
150
  df.loc[start_index, 'NO2'] = NO2.pop()
 
184
  weather_data()
185
  pollution_data()
186
  NO2, O3 = clean_values()
187
+ df = add_columns()
188
+ scaled_df = scale(df)
189
+ insert_pollution(NO2, O3, scaled_df)
 
190
  os.remove('weather_data.csv')
dataset.csv CHANGED
@@ -1,9 +1,9 @@
1
  date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
2
- 2024-10-15,22.853627569528417,22.52299076212471,51,87,71,0,10194,290,86,Tuesday
3
- 2024-10-16,22.4144459833795,22.78109803921569,61,151,40,0,10103,358,82,Wednesday
4
- 2024-10-17,22.990465489566613,22.928154311649017,51,169,43,6,10100,371,86,Thursday
5
- 2024-10-18,23.659013539651834,23.700536672629696,21,156,42,39,10140,64,97,Friday
6
- 2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
7
- 2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
8
- 2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
9
- 2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10265,110,86,Tuesday
 
1
  date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
2
+ 2024-10-16,22.602711656441716,22.88128805620609,61,151,40,0,10103,358,82,Wednesday
3
+ 2024-10-17,23.104327323162277,23.038637566137567,51,169,43,6,10100,371,86,Thursday
4
+ 2024-10-18,23.68285714285714,23.71661094224924,21,156,42,39,10140,64,97,Friday
5
+ 2024-10-19,24.532038834951457,23.604722719141325,43,147,43,28,10140,236,92,Saturday
6
+ 2024-10-20,23.019101941747575,24.173377192982453,68,145,0,0,10160,241,82,Sunday
7
+ 2024-10-21,21.275629139072848,25.05873563218391,58,144,27,43,10206,220,92,Monday
8
+ 2024-10-22,22.334374999999998,24.5942194092827,76,123,60,12,10265,100,87,Tuesday
9
+ 2024-10-23,24.261733333333336,23.56,31,115,7,0,10328,112,95,Wednesday
linear_regression_model.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbe290cfbb7bbd4766aba92ca738296536a79b435b9d9d51e0541d88340261dc
3
- size 593
 
 
 
 
src/daily_api__pollution.py DELETED
@@ -1,161 +0,0 @@
1
- import http.client
2
- from datetime import date, timedelta
3
- import pandas as pd
4
- from io import StringIO
5
- import os
6
- import re
7
- import csv
8
-
9
- def api_call():
10
- particles = ["NO2", "O3"]
11
- stations = ["NL10636", "NL10639", "NL10643"]
12
- all_dataframes = []
13
- today = date.today().isoformat() + "T09:00:00Z"
14
- yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
15
- latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
16
- days_today = 0
17
- days_yesterday = 1
18
- while(today != latest_date):
19
- days_today += 1
20
- days_yesterday += 1
21
- for particle in particles:
22
- for station in stations:
23
- conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
24
- payload = ''
25
- headers = {}
26
- conn.request("GET", f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", payload, headers)
27
- res = conn.getresponse()
28
- data = res.read()
29
- decoded_data = data.decode("utf-8")
30
- df = pd.read_csv(StringIO(decoded_data))
31
- df = df.filter(like='value')
32
- all_dataframes.append(df)
33
- combined_data = pd.concat(all_dataframes, ignore_index=True)
34
- combined_data.to_csv(f'{particle}_{today}.csv', index=False)
35
- today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
36
- yesterday = (date.today() - timedelta(days_yesterday)).isoformat() + "T09:00:00Z"
37
-
38
- def delete_csv(csvs):
39
- for csv in csvs:
40
- if(os.path.exists(csv) and os.path.isfile(csv)):
41
- os.remove(csv)
42
-
43
- def clean_values():
44
- particles = ["NO2", "O3"]
45
- csvs = []
46
- NO2 = []
47
- O3 = []
48
- today = date.today().isoformat() + "T09:00:00Z"
49
- yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
50
- latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
51
- days_today = 0
52
- while(today != latest_date):
53
- for particle in particles:
54
- name = f'{particle}_{today}.csv'
55
- csvs.append(name)
56
- days_today += 1
57
- today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
58
- for csv_file in csvs:
59
- values = [] # Reset values for each CSV file
60
- # Open the CSV file and read the values
61
- with open(csv_file, 'r') as file:
62
- reader = csv.reader(file)
63
- for row in reader:
64
- for value in row:
65
- # Use regular expressions to extract numeric part
66
- cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value)
67
- if cleaned_value: # If we successfully extract a number
68
- values.append(float(cleaned_value[0])) # Convert the first match to float
69
-
70
- # Compute the average if the values list is not empty
71
- if values:
72
- avg = sum(values) / len(values)
73
- if "NO2" in csv_file:
74
- NO2.append(avg)
75
- else:
76
- O3.append(avg)
77
-
78
- delete_csv(csvs)
79
-
80
- return NO2, O3
81
-
82
-
83
- def add_columns():
84
- file_path = 'weather_data.csv'
85
- df = pd.read_csv(file_path)
86
-
87
- df.insert(1, 'NO2', None)
88
- df.insert(2, 'O3', None)
89
- df.insert(10, 'weekday', None)
90
-
91
- df.to_csv('combined_data.csv', index=False)
92
-
93
-
94
- def scale():
95
- file_path = 'combined_data.csv'
96
- df = pd.read_csv(file_path)
97
- columns = list(df.columns)
98
-
99
-
100
- columns.insert(3, columns.pop(6))
101
-
102
- df = df[columns]
103
-
104
- columns.insert(5, columns.pop(9))
105
-
106
- df = df[columns]
107
-
108
- columns.insert(9, columns.pop(6))
109
-
110
- df = df[columns]
111
-
112
- df = df.rename(columns={
113
- 'datetime':'date',
114
- 'windspeed': 'wind_speed',
115
- 'temp': 'mean_temp',
116
- 'solarradiation':'global_radiation',
117
- 'precip':'percipitation',
118
- 'sealevelpressure':'pressure',
119
- 'visibility':'minimum_visibility'
120
- })
121
-
122
- df['date'] = pd.to_datetime(df['date'])
123
- df['weekday'] = df['date'].dt.day_name()
124
-
125
-
126
- df['wind_speed'] = (df['wind_speed'] / 3.6) * 10
127
- df['mean_temp'] = df['mean_temp'] * 10
128
- df['minimum_visibility'] = df['minimum_visibility'] * 10
129
- df['percipitation'] = df['percipitation'] * 10
130
- df['pressure'] = df['pressure'] * 10
131
-
132
- df['wind_speed'] = df['wind_speed'].astype(int)
133
- df['mean_temp'] = df['mean_temp'].astype(int)
134
- df['minimum_visibility'] = df['minimum_visibility'].astype(int)
135
- df['percipitation'] = df['percipitation'].astype(int)
136
- df['pressure'] = df['pressure'].astype(int)
137
- df['humidity'] = df['humidity'].astype(int)
138
- df['global_radiation'] = df['global_radiation'].astype(int)
139
-
140
- df.to_csv('recorded_data.csv', index=False)
141
-
142
- def insert_pollution(NO2, O3):
143
- file_path = 'recorded_data.csv'
144
- df = pd.read_csv(file_path)
145
- start_index = 0
146
- while NO2:
147
- df.loc[start_index, 'NO2'] = NO2.pop()
148
- start_index += 1
149
- start_index = 0
150
- while O3:
151
- df.loc[start_index, 'O3'] = O3.pop()
152
- start_index += 1
153
- df.to_csv('recorded_data.csv', index=False)
154
-
155
- api_call()
156
- NO2, O3 = clean_values()
157
- add_columns()
158
- scale()
159
- insert_pollution(NO2, O3)
160
- os.remove('combined_data.csv')
161
- os.remove('weather_data.csv')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/helper_functions.py CHANGED
@@ -1,24 +1,26 @@
1
  import streamlit as st
2
 
3
- # Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
 
4
  def custom_metric_box(label, value, delta):
5
  st.markdown(f"""
6
  <div style="
7
- background: rgba(255, 255, 255, 0.05);
8
- border-radius: 16px;
9
- box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
10
- backdrop-filter: blur(6px);
11
- -webkit-backdrop-filter: blur(6px);
12
- border: 1px solid rgba(255, 255, 255, 0.15);
13
- padding: 15px;
14
- margin-bottom: 10px;
15
- width: 200px; /* Fixed width */
16
  ">
17
- <h4 style="font-size: 18px; font-weight: normal; margin: 0;">{label}</h4> <!-- Smaller label -->
18
- <p style="font-size: 36px; font-weight: bold; margin: 0;">{value}</p> <!-- Larger metric -->
19
- <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
 
 
 
 
20
  </div>
21
- """, unsafe_allow_html=True)
22
 
23
  # Custom function to create pollution metric boxes with side-by-side layout for label and value
24
  # Custom function to create pollution metric boxes with side-by-side layout and fixed width
 
1
  import streamlit as st
2
 
3
+
4
+ # Custom function to create styled metric boxes with compact layout
5
  def custom_metric_box(label, value, delta):
6
  st.markdown(f"""
7
  <div style="
8
+ padding: 5px;
9
+ margin-bottom: 5px;
10
+ width: 100%; /* Full width */
11
+ display: flex;
12
+ flex-direction: column; /* Align items vertically */
13
+ align-items: flex-start; /* Align all content to the left */
 
 
 
14
  ">
15
+ <div>
16
+ <h4 style="font-size: 14px; font-weight: normal; margin: 0;">{label}</h4> <!-- Smaller label -->
17
+ </div>
18
+ <div>
19
+ <p style="font-size: 18px; font-weight: bold; margin: 0;">{value}</p> <!-- Smaller metric -->
20
+ <p style="color: {'green' if '+' in delta else 'orange'}; font-size: 12px; margin: 0;">{delta}</p> <!-- Smaller delta text -->
21
+ </div>
22
  </div>
23
+ """, unsafe_allow_html=True)
24
 
25
  # Custom function to create pollution metric boxes with side-by-side layout for label and value
26
  # Custom function to create pollution metric boxes with side-by-side layout and fixed width
test.ipynb DELETED
@@ -1,158 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 1,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "from data_loading import create_features_and_targets\n",
10
- "from data_api_calls import get_data\n",
11
- "import pandas as pd"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 2,
17
- "metadata": {},
18
- "outputs": [],
19
- "source": [
20
- "dataset = pd.read_csv(\"dataset.csv\")"
21
- ]
22
- },
23
- {
24
- "cell_type": "code",
25
- "execution_count": 3,
26
- "metadata": {},
27
- "outputs": [
28
- {
29
- "name": "stdout",
30
- "output_type": "stream",
31
- "text": [
32
- "Number of rows with missing values dropped: 7\n"
33
- ]
34
- }
35
- ],
36
- "source": [
37
- "test_data = create_features_and_targets(\n",
38
- " data=dataset,\n",
39
- " target_particle=\"NO2\",\n",
40
- " lag_days=7,\n",
41
- " sma_days=7,\n",
42
- " days_ahead=3,\n",
43
- ")"
44
- ]
45
- },
46
- {
47
- "cell_type": "code",
48
- "execution_count": 11,
49
- "metadata": {},
50
- "outputs": [
51
- {
52
- "data": {
53
- "text/plain": [
54
- "Index(['NO2', 'O3', 'wind_speed', 'mean_temp', 'global_radiation',\n",
55
- " 'percipitation', 'pressure', 'minimum_visibility', 'humidity',\n",
56
- " 'weekday_sin',\n",
57
- " ...\n",
58
- " 'O3_last_year_4_days_before', 'NO2_last_year_4_days_before',\n",
59
- " 'O3_last_year_5_days_before', 'NO2_last_year_5_days_before',\n",
60
- " 'O3_last_year_6_days_before', 'NO2_last_year_6_days_before',\n",
61
- " 'O3_last_year_7_days_before', 'NO2_last_year_7_days_before',\n",
62
- " 'O3_last_year_3_days_after', 'NO2_last_year_3_days_after'],\n",
63
- " dtype='object', length=103)"
64
- ]
65
- },
66
- "execution_count": 11,
67
- "metadata": {},
68
- "output_type": "execute_result"
69
- }
70
- ],
71
- "source": [
72
- "test_data.columns"
73
- ]
74
- },
75
- {
76
- "cell_type": "code",
77
- "execution_count": 8,
78
- "metadata": {},
79
- "outputs": [],
80
- "source": [
81
- "from src.models_loading import run_model"
82
- ]
83
- },
84
- {
85
- "cell_type": "code",
86
- "execution_count": null,
87
- "metadata": {},
88
- "outputs": [],
89
- "source": []
90
- },
91
- {
92
- "cell_type": "code",
93
- "execution_count": 12,
94
- "metadata": {},
95
- "outputs": [
96
- {
97
- "name": "stderr",
98
- "output_type": "stream",
99
- "text": [
100
- "2024-10-22 21:43:37.935 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
101
- "2024-10-22 21:43:37.938 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
102
- "2024-10-22 21:43:37.939 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
103
- "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
104
- "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
105
- ]
106
- },
107
- {
108
- "name": "stdout",
109
- "output_type": "stream",
110
- "text": [
111
- "Number of rows with missing values dropped: 7\n"
112
- ]
113
- },
114
- {
115
- "ename": "FileNotFoundError",
116
- "evalue": "[Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'",
117
- "output_type": "error",
118
- "traceback": [
119
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
120
- "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
121
- "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m)\u001b[49m\n",
122
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:210\u001b[0m, in \u001b[0;36mCachedFunc.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m spinner(message, _cache\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_or_create_cached_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_or_create_cached_value(args, kwargs)\n",
123
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:235\u001b[0m, in \u001b[0;36mCachedFunc._get_or_create_cached_value\u001b[0;34m(self, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m cached_result \u001b[38;5;241m=\u001b[39m cache\u001b[38;5;241m.\u001b[39mread_result(value_key)\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle_cache_hit(cached_result)\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_cache_miss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
124
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:292\u001b[0m, in \u001b[0;36mCachedFunc._handle_cache_miss\u001b[0;34m(self, cache, value_key, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;66;03m# We acquired the lock before any other thread. Compute the value!\u001b[39;00m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39mcalling_cached_function(\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfunc\n\u001b[1;32m 291\u001b[0m ):\n\u001b[0;32m--> 292\u001b[0m computed_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_info\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;66;03m# We've computed our value, and now we need to write it back to the cache\u001b[39;00m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;66;03m# along with any \"replay messages\" that were generated during value computation.\u001b[39;00m\n\u001b[1;32m 296\u001b[0m messages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39m_most_recent_messages\n",
125
- "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/models_loading.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[0;34m(particle, data)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;129m@st\u001b[39m\u001b[38;5;241m.\u001b[39mcache_resource(ttl\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m300\u001b[39m) \u001b[38;5;66;03m# Reruns every 6 hours\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[0;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# Run the model with static input\u001b[39;00m\n",
126
- "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/data_loading.py:125\u001b[0m, in \u001b[0;36mcreate_features\u001b[0;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[1;32m 121\u001b[0m x \u001b[38;5;241m=\u001b[39m data[feature_cols]\n\u001b[1;32m 124\u001b[0m \u001b[38;5;66;03m# Initialize scalers\u001b[39;00m\n\u001b[0;32m--> 125\u001b[0m feature_scaler \u001b[38;5;241m=\u001b[39m \u001b[43mjoblib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../scalers/feature_scaler_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.joblib\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# Fit the scalers on the training data\u001b[39;00m\n\u001b[1;32m 128\u001b[0m X_scaled \u001b[38;5;241m=\u001b[39m feature_scaler\u001b[38;5;241m.\u001b[39mfit_transform(x)\n",
127
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/joblib/numpy_pickle.py:650\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename, mmap_mode)\u001b[0m\n\u001b[1;32m 648\u001b[0m obj \u001b[38;5;241m=\u001b[39m _unpickle(fobj)\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 650\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 651\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[38;5;28;01mas\u001b[39;00m fobj:\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[1;32m 654\u001b[0m \u001b[38;5;66;03m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[1;32m 655\u001b[0m \u001b[38;5;66;03m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n",
128
- "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'"
129
- ]
130
- }
131
- ],
132
- "source": [
133
- "prediction = run_model(\"O3\", data=dataset)"
134
- ]
135
- }
136
- ],
137
- "metadata": {
138
- "kernelspec": {
139
- "display_name": "ml-industry",
140
- "language": "python",
141
- "name": "python3"
142
- },
143
- "language_info": {
144
- "codemirror_mode": {
145
- "name": "ipython",
146
- "version": 3
147
- },
148
- "file_extension": ".py",
149
- "mimetype": "text/x-python",
150
- "name": "python",
151
- "nbconvert_exporter": "python",
152
- "pygments_lexer": "ipython3",
153
- "version": "3.12.5"
154
- }
155
- },
156
- "nbformat": 4,
157
- "nbformat_minor": 2
158
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test.py DELETED
@@ -1,7 +0,0 @@
1
- import pandas as pd
2
-
3
- from src.models_loading import run_model
4
- dataset = pd.read_csv("dataset.csv")
5
- prediction = run_model("O3", data=dataset)
6
- print(type(prediction))
7
- print(prediction)