elisaklunder commited on
Commit
472271b
β€’
2 Parent(s): eeaf86d 5c6dd58

Merge branch 'elisa'

Browse files
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .venv/
2
+ .env
3
+ __pycache__/
4
+ *.pyc
README.md CHANGED
@@ -11,4 +11,3 @@ short_description: 'Demo: Model to predict O3 and NO2 concentrations in Utrecht'
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
- hhhrhehheehehehe
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
app.py CHANGED
@@ -1,35 +1,32 @@
1
- import time
2
  import altair as alt
3
- import joblib
4
- import numpy as np
5
  import pandas as pd
6
- import streamlit as st
7
- from sklearn.linear_model import LinearRegression
8
- import matplotlib.pyplot as plt
9
  import plotly.graph_objects as go
10
- from helper_functions import custom_metric_box, pollution_box, run_model
 
 
11
  from data_api_calls import get_data
12
 
13
  st.set_page_config(
14
  page_title="Utrecht Pollution Dashboard",
15
  page_icon="πŸ‚οΏ½οΏ½πŸŒ±",
16
  layout="wide",
17
- initial_sidebar_state="expanded")
 
18
 
19
  alt.themes.enable("dark")
20
 
21
- prediction = run_model() # Assuming you have a function run_model()
22
  get_data()
23
 
24
  data = pd.read_csv("dataset.csv")
25
 
26
  # App Title
27
- st.title("Utrecht Pollution Dashboard 🌱")
28
 
29
- col1, col2 = st.columns((1,1))
30
  # Create a 3-column layout
31
  with col1:
32
- st.subheader('Current Weather')
33
  col1, col2, col3 = st.columns(3)
34
 
35
  # First column
@@ -47,10 +44,10 @@ with col1:
47
  custom_metric_box(label="Solar Radiation", value="200 W/mΒ²", delta="-20 W/mΒ²")
48
  custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
49
 
50
- st.subheader('Current Pollution Levels')
51
- col1, col2 = st.columns((1,1))
52
  # Display the prediction
53
- #st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
54
  with col1:
55
  pollution_box(label="O<sub>3</sub>", value="37 Β΅g/mΒ³", delta="+2 Β΅g/mΒ³")
56
  with col2:
@@ -58,7 +55,9 @@ with col1:
58
 
59
  # Sample data (replace with your actual data)
60
  dates_past = pd.date_range(end=pd.Timestamp.today(), periods=7).to_list()
61
- dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
 
 
62
 
63
  # O3 and NO2 values for the past 7 days
64
  o3_past_values = [30, 32, 34, 33, 31, 35, 36]
@@ -74,61 +73,71 @@ o3_values = o3_past_values + o3_future_values
74
  no2_values = no2_past_values + no2_future_values
75
 
76
  # Create a DataFrame
77
- df = pd.DataFrame({
78
- 'Date': dates,
79
- 'O3': o3_values,
80
- 'NO2': no2_values
81
- })
82
 
83
- st.subheader('O3 and NO2 Prediction')
84
  # Create two columns for two separate graphs
85
  subcol1, subcol2 = st.columns(2)
86
  # Plot O3 in the first subcolumn
87
  with subcol1:
88
  fig_o3 = go.Figure()
89
- fig_o3.add_trace(go.Scatter(x=df['Date'], y=df['O3'],
90
- mode='lines+markers',
91
- name='O3',
92
- line=dict(color='rgb(0, 191, 255)', width=4))) # Bright blue
 
 
 
 
 
93
  # Add a vertical line for predictions (today's date)
94
  fig_o3.add_shape(
95
  dict(
96
  type="line",
97
- x0=pd.Timestamp.today(), x1=pd.Timestamp.today(),
98
- y0=min(o3_values), y1=max(o3_values),
 
 
99
  line=dict(color="White", width=3, dash="dash"),
100
  )
101
  )
102
  fig_o3.update_layout(
103
- plot_bgcolor='rgba(0, 0, 0, 0)', # Transparent background
104
- paper_bgcolor='rgba(0, 0, 0, 0)', # Transparent paper background
105
  yaxis_title="O3 Concentration (Β΅g/mΒ³)",
106
  font=dict(size=14),
107
- hovermode="x unified"
108
  )
109
  st.plotly_chart(fig_o3)
110
 
111
  # Plot NO2 in the second subcolumn
112
  with subcol2:
113
  fig_no2 = go.Figure()
114
- fig_no2.add_trace(go.Scatter(x=df['Date'], y=df['NO2'],
115
- mode='lines+markers',
116
- name='NO2',
117
- line=dict(color='rgb(255, 20, 147)', width=4))) # Bright pink
 
 
 
 
 
118
  # Add a vertical line for predictions (today's date)
119
  fig_no2.add_shape(
120
  dict(
121
  type="line",
122
- x0=pd.Timestamp.today(), x1=pd.Timestamp.today(),
123
- y0=min(no2_values), y1=max(no2_values),
 
 
124
  line=dict(color="White", width=3, dash="dash"),
125
  )
126
  )
127
  fig_no2.update_layout(
128
- plot_bgcolor='rgba(0, 0, 0, 0)', # Transparent background
129
- paper_bgcolor='rgba(0, 0, 0, 0)', # Transparent paper background
130
  yaxis_title="NO2 Concentration (Β΅g/mΒ³)",
131
  font=dict(size=14),
132
- hovermode="x unified"
133
  )
134
- st.plotly_chart(fig_no2)
 
 
1
  import altair as alt
 
 
2
  import pandas as pd
 
 
 
3
  import plotly.graph_objects as go
4
+ import streamlit as st
5
+ from src.helper_functions import custom_metric_box, pollution_box
6
+ from src.models_loading import run_model
7
  from data_api_calls import get_data
8
 
9
  st.set_page_config(
10
  page_title="Utrecht Pollution Dashboard",
11
  page_icon="πŸ‚οΏ½οΏ½πŸŒ±",
12
  layout="wide",
13
+ initial_sidebar_state="expanded",
14
+ )
15
 
16
  alt.themes.enable("dark")
17
 
18
+ test_predictions = run_model("O3")
19
  get_data()
20
 
21
  data = pd.read_csv("dataset.csv")
22
 
23
  # App Title
24
+ st.title("Utrecht Pollution Dashboard🌱")
25
 
26
+ col1, col2 = st.columns((1, 1))
27
  # Create a 3-column layout
28
  with col1:
29
+ st.subheader("Current Weather")
30
  col1, col2, col3 = st.columns(3)
31
 
32
  # First column
 
44
  custom_metric_box(label="Solar Radiation", value="200 W/mΒ²", delta="-20 W/mΒ²")
45
  custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
46
 
47
+ st.subheader("Current Pollution Levels")
48
+ col1, col2 = st.columns((1, 1))
49
  # Display the prediction
50
+ # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
51
  with col1:
52
  pollution_box(label="O<sub>3</sub>", value="37 Β΅g/mΒ³", delta="+2 Β΅g/mΒ³")
53
  with col2:
 
55
 
56
  # Sample data (replace with your actual data)
57
  dates_past = pd.date_range(end=pd.Timestamp.today(), periods=7).to_list()
58
+ dates_future = pd.date_range(
59
+ start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3
60
+ ).to_list()
61
 
62
  # O3 and NO2 values for the past 7 days
63
  o3_past_values = [30, 32, 34, 33, 31, 35, 36]
 
73
  no2_values = no2_past_values + no2_future_values
74
 
75
  # Create a DataFrame
76
+ df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
 
 
 
 
77
 
78
+ st.subheader("O3 and NO2 Prediction")
79
  # Create two columns for two separate graphs
80
  subcol1, subcol2 = st.columns(2)
81
  # Plot O3 in the first subcolumn
82
  with subcol1:
83
  fig_o3 = go.Figure()
84
+ fig_o3.add_trace(
85
+ go.Scatter(
86
+ x=df["Date"],
87
+ y=df["O3"],
88
+ mode="lines+markers",
89
+ name="O3",
90
+ line=dict(color="rgb(0, 191, 255)", width=4),
91
+ )
92
+ ) # Bright blue
93
  # Add a vertical line for predictions (today's date)
94
  fig_o3.add_shape(
95
  dict(
96
  type="line",
97
+ x0=pd.Timestamp.today(),
98
+ x1=pd.Timestamp.today(),
99
+ y0=min(o3_values),
100
+ y1=max(o3_values),
101
  line=dict(color="White", width=3, dash="dash"),
102
  )
103
  )
104
  fig_o3.update_layout(
105
+ plot_bgcolor="rgba(0, 0, 0, 0)", # Transparent background
106
+ paper_bgcolor="rgba(0, 0, 0, 0)", # Transparent paper background
107
  yaxis_title="O3 Concentration (Β΅g/mΒ³)",
108
  font=dict(size=14),
109
+ hovermode="x unified",
110
  )
111
  st.plotly_chart(fig_o3)
112
 
113
  # Plot NO2 in the second subcolumn
114
  with subcol2:
115
  fig_no2 = go.Figure()
116
+ fig_no2.add_trace(
117
+ go.Scatter(
118
+ x=df["Date"],
119
+ y=df["NO2"],
120
+ mode="lines+markers",
121
+ name="NO2",
122
+ line=dict(color="rgb(255, 20, 147)", width=4),
123
+ )
124
+ ) # Bright pink
125
  # Add a vertical line for predictions (today's date)
126
  fig_no2.add_shape(
127
  dict(
128
  type="line",
129
+ x0=pd.Timestamp.today(),
130
+ x1=pd.Timestamp.today(),
131
+ y0=min(no2_values),
132
+ y1=max(no2_values),
133
  line=dict(color="White", width=3, dash="dash"),
134
  )
135
  )
136
  fig_no2.update_layout(
137
+ plot_bgcolor="rgba(0, 0, 0, 0)", # Transparent background
138
+ paper_bgcolor="rgba(0, 0, 0, 0)", # Transparent paper background
139
  yaxis_title="NO2 Concentration (Β΅g/mΒ³)",
140
  font=dict(size=14),
141
+ hovermode="x unified",
142
  )
143
+ st.plotly_chart(fig_no2)
daily_api__pollution.py ADDED
File without changes
requirements.txt CHANGED
@@ -7,4 +7,5 @@ altair
7
  matplotlib
8
  plotly
9
  http.client
10
- datetime
 
 
7
  matplotlib
8
  plotly
9
  http.client
10
+ datetime
11
+ huggingface-hub
scalers/target_scaler_NO2.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
3
+ size 1023
scalers/target_scaler_O3.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
3
+ size 1023
src/daily_api__pollution.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.client
2
+ from datetime import date, timedelta
3
+ import pandas as pd
4
+ from io import StringIO
5
+ import os
6
+ import re
7
+ import csv
8
+
9
+ def api_call():
10
+ particles = ["NO2", "O3"]
11
+ stations = ["NL10636", "NL10639", "NL10643"]
12
+ all_dataframes = []
13
+ today = date.today().isoformat() + "T09:00:00Z"
14
+ yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
15
+ latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
16
+ days_today = 0
17
+ days_yesterday = 1
18
+ while(today != latest_date):
19
+ days_today += 1
20
+ days_yesterday += 1
21
+ for particle in particles:
22
+ for station in stations:
23
+ conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
24
+ payload = ''
25
+ headers = {}
26
+ conn.request("GET", f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", payload, headers)
27
+ res = conn.getresponse()
28
+ data = res.read()
29
+ decoded_data = data.decode("utf-8")
30
+ df = pd.read_csv(StringIO(decoded_data))
31
+ df = df.filter(like='value')
32
+ all_dataframes.append(df)
33
+ combined_data = pd.concat(all_dataframes, ignore_index=True)
34
+ combined_data.to_csv(f'{particle}_{today}.csv', index=False)
35
+ today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
36
+ yesterday = (date.today() - timedelta(days_yesterday)).isoformat() + "T09:00:00Z"
37
+
38
+ def delete_csv(csvs):
39
+ for csv in csvs:
40
+ if(os.path.exists(csv) and os.path.isfile(csv)):
41
+ os.remove(csv)
42
+
43
+ def clean_values():
44
+ particles = ["NO2", "O3"]
45
+ csvs = []
46
+ NO2 = []
47
+ O3 = []
48
+ today = date.today().isoformat() + "T09:00:00Z"
49
+ yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
50
+ latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
51
+ days_today = 0
52
+ while(today != latest_date):
53
+ for particle in particles:
54
+ name = f'{particle}_{today}.csv'
55
+ csvs.append(name)
56
+ days_today += 1
57
+ today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
58
+ for csv_file in csvs:
59
+ values = [] # Reset values for each CSV file
60
+ # Open the CSV file and read the values
61
+ with open(csv_file, 'r') as file:
62
+ reader = csv.reader(file)
63
+ for row in reader:
64
+ for value in row:
65
+ # Use regular expressions to extract numeric part
66
+ cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value)
67
+ if cleaned_value: # If we successfully extract a number
68
+ values.append(float(cleaned_value[0])) # Convert the first match to float
69
+
70
+ # Compute the average if the values list is not empty
71
+ if values:
72
+ avg = sum(values) / len(values)
73
+ if "NO2" in csv_file:
74
+ NO2.append(avg)
75
+ else:
76
+ O3.append(avg)
77
+
78
+ delete_csv(csvs)
79
+
80
+ return NO2, O3
81
+
82
+
83
+ def add_columns():
84
+ file_path = 'weather_data.csv'
85
+ df = pd.read_csv(file_path)
86
+
87
+ df.insert(1, 'NO2', None)
88
+ df.insert(2, 'O3', None)
89
+ df.insert(10, 'weekday', None)
90
+
91
+ df.to_csv('combined_data.csv', index=False)
92
+
93
+
94
+ def scale():
95
+ file_path = 'combined_data.csv'
96
+ df = pd.read_csv(file_path)
97
+ columns = list(df.columns)
98
+
99
+
100
+ columns.insert(3, columns.pop(6))
101
+
102
+ df = df[columns]
103
+
104
+ columns.insert(5, columns.pop(9))
105
+
106
+ df = df[columns]
107
+
108
+ columns.insert(9, columns.pop(6))
109
+
110
+ df = df[columns]
111
+
112
+ df = df.rename(columns={
113
+ 'datetime':'date',
114
+ 'windspeed': 'wind_speed',
115
+ 'temp': 'mean_temp',
116
+ 'solarradiation':'global_radiation',
117
+ 'precip':'percipitation',
118
+ 'sealevelpressure':'pressure',
119
+ 'visibility':'minimum_visibility'
120
+ })
121
+
122
+ df['date'] = pd.to_datetime(df['date'])
123
+ df['weekday'] = df['date'].dt.day_name()
124
+
125
+
126
+ df['wind_speed'] = (df['wind_speed'] / 3.6) * 10
127
+ df['mean_temp'] = df['mean_temp'] * 10
128
+ df['minimum_visibility'] = df['minimum_visibility'] * 10
129
+ df['percipitation'] = df['percipitation'] * 10
130
+ df['pressure'] = df['pressure'] * 10
131
+
132
+ df['wind_speed'] = df['wind_speed'].astype(int)
133
+ df['mean_temp'] = df['mean_temp'].astype(int)
134
+ df['minimum_visibility'] = df['minimum_visibility'].astype(int)
135
+ df['percipitation'] = df['percipitation'].astype(int)
136
+ df['pressure'] = df['pressure'].astype(int)
137
+ df['humidity'] = df['humidity'].astype(int)
138
+ df['global_radiation'] = df['global_radiation'].astype(int)
139
+
140
+ df.to_csv('recorded_data.csv', index=False)
141
+
142
+ def insert_pollution(NO2, O3):
143
+ file_path = 'recorded_data.csv'
144
+ df = pd.read_csv(file_path)
145
+ start_index = 0
146
+ while NO2:
147
+ df.loc[start_index, 'NO2'] = NO2.pop()
148
+ start_index += 1
149
+ start_index = 0
150
+ while O3:
151
+ df.loc[start_index, 'O3'] = O3.pop()
152
+ start_index += 1
153
+ df.to_csv('recorded_data.csv', index=False)
154
+
155
+ api_call()
156
+ NO2, O3 = clean_values()
157
+ add_columns()
158
+ scale()
159
+ insert_pollution(NO2, O3)
160
+ os.remove('combined_data.csv')
161
+ os.remove('weather_data.csv')
data_loading.py β†’ src/data_loading.py RENAMED
File without changes
helper_functions.py β†’ src/helper_functions.py RENAMED
@@ -1,22 +1,4 @@
1
  import streamlit as st
2
- import joblib
3
- import pandas as pd
4
-
5
- @st.cache_resource(ttl=6*300) # Reruns every 6 hours
6
- def run_model():
7
- # Load or train your model (pretrained model in this case)
8
- model = joblib.load("linear_regression_model.pkl")
9
-
10
- # Static input values
11
- input_data = pd.DataFrame({
12
- 'Temperature': [20.0],
13
- 'Wind Speed': [10.0],
14
- 'Humidity': [50.0]
15
- })
16
-
17
- # Run the model with static input
18
- prediction = model.predict(input_data)
19
- return prediction
20
 
21
  # Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
22
  def custom_metric_box(label, value, delta):
 
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
4
  def custom_metric_box(label, value, delta):
src/models_loading.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import joblib
4
+ import pandas as pd
5
+ import streamlit as st
6
+ from dotenv import load_dotenv
7
+ from huggingface_hub import hf_hub_download, login
8
+
9
+
10
+ def load_model(particle):
11
+ load_dotenv()
12
+ login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
13
+
14
+ repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
15
+ if particle == "O3":
16
+ file_name = "O3_svr_model.pkl"
17
+ elif particle == "NO2":
18
+ file_name == "hehehe"
19
+
20
+ model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
21
+ model = joblib.load(model_path)
22
+
23
+ return model
24
+
25
+
26
+ @st.cache_resource(ttl=6 * 300) # Reruns every 6 hours
27
+ def run_model(particle):
28
+ model = load_model(particle)
29
+
30
+ # Static input values
31
+ input_data = pd.DataFrame(
32
+ {"Temperature": [20.0], "Wind Speed": [10.0], "Humidity": [50.0]}
33
+ )
34
+
35
+ # Run the model with static input
36
+ prediction = model.predict(input_data)
37
+ return prediction