|
import codecs |
|
import csv |
|
import http.client |
|
import os |
|
import re |
|
import sys |
|
import urllib.request |
|
from datetime import date, timedelta |
|
from io import StringIO |
|
|
|
import pandas as pd |
|
|
|
WEATHER_DATA_FILE = "weather_data.csv" |
|
POLLUTION_DATA_FILE = "pollution_data.csv" |
|
|
|
|
|
def update_weather_data(): |
|
today = date.today().isoformat() |
|
|
|
if os.path.exists(WEATHER_DATA_FILE): |
|
df = pd.read_csv(WEATHER_DATA_FILE) |
|
last_date = pd.to_datetime(df["date"]).max() |
|
start_date = (last_date + timedelta(1)).isoformat() |
|
else: |
|
df = pd.DataFrame() |
|
start_date = (date.today() - timedelta(7)).isoformat() |
|
|
|
try: |
|
ResultBytes = urllib.request.urlopen( |
|
f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{start_date}/{today}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv" |
|
) |
|
CSVText = csv.reader(codecs.iterdecode(ResultBytes, "utf-8")) |
|
|
|
new_data = pd.DataFrame(list(CSVText)) |
|
new_data.columns = new_data.iloc[0] |
|
new_data = new_data[1:] |
|
new_data = new_data.rename(columns={"datetime": "date"}) |
|
|
|
updated_df = pd.concat([df, new_data], ignore_index=True) |
|
updated_df.drop_duplicates(subset="date", keep="last", inplace=True) |
|
updated_df.to_csv(WEATHER_DATA_FILE, index=False) |
|
|
|
except urllib.error.HTTPError as e: |
|
ErrorInfo = e.read().decode() |
|
print("Error code: ", e.code, ErrorInfo) |
|
sys.exit() |
|
except urllib.error.URLError as e: |
|
ErrorInfo = e.read().decode() |
|
print("Error code: ", e.code, ErrorInfo) |
|
sys.exit() |
|
|
|
|
|
def update_pollution_data(): |
|
O3 = [] |
|
NO2 = [] |
|
particles = ["NO2", "O3"] |
|
stations = ["NL10636", "NL10639", "NL10643"] |
|
all_dataframes = [] |
|
today = date.today().isoformat() + "T09:00:00Z" |
|
yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z" |
|
|
|
if os.path.exists(POLLUTION_DATA_FILE): |
|
existing_data = pd.read_csv(POLLUTION_DATA_FILE) |
|
last_date = pd.to_datetime(existing_data["date"]).max() |
|
if last_date >= pd.Timestamp(date.today()): |
|
print("Data is already up to date.") |
|
return |
|
|
|
|
|
for particle in particles: |
|
for station in stations: |
|
conn = http.client.HTTPSConnection("api.luchtmeetnet.nl") |
|
payload = "" |
|
headers = {} |
|
conn.request( |
|
"GET", |
|
f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", |
|
payload, |
|
headers, |
|
) |
|
res = conn.getresponse() |
|
data = res.read() |
|
decoded_data = data.decode("utf-8") |
|
df = pd.read_csv(StringIO(decoded_data)) |
|
df = df.filter(like="value") |
|
all_dataframes.append(df) |
|
combined_data = pd.concat(all_dataframes, ignore_index=True) |
|
values = [] |
|
|
|
for row in combined_data: |
|
cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", row) |
|
if cleaned_value: |
|
values.append(float(cleaned_value[0])) |
|
|
|
if values: |
|
avg = sum(values) / len(values) |
|
if particle == "NO2": |
|
NO2.append(avg) |
|
else: |
|
O3.append(avg) |
|
|
|
new_data = pd.DataFrame( |
|
{ |
|
"date": [date.today()], |
|
"NO2": NO2, |
|
"O3": O3, |
|
} |
|
) |
|
|
|
updated_data = pd.concat([existing_data, new_data], ignore_index=True) |
|
updated_data.drop_duplicates(subset="date", keep="last", inplace=True) |
|
|
|
updated_data.to_csv(POLLUTION_DATA_FILE, index=False) |
|
|
|
|
|
def get_combined_data(): |
|
update_weather_data() |
|
update_pollution_data() |
|
|
|
weather_df = pd.read_csv(WEATHER_DATA_FILE) |
|
|
|
weather_df.insert(1, "NO2", None) |
|
weather_df.insert(2, "O3", None) |
|
weather_df.insert(10, "weekday", None) |
|
columns = list(weather_df.columns) |
|
columns.insert(3, columns.pop(6)) |
|
weather_df = weather_df[columns] |
|
columns.insert(5, columns.pop(9)) |
|
weather_df = weather_df[columns] |
|
columns.insert(9, columns.pop(6)) |
|
weather_df = weather_df[columns] |
|
|
|
combined_df = weather_df |
|
|
|
|
|
combined_df = combined_df.rename( |
|
columns={ |
|
"date": "date", |
|
"windspeed": "wind_speed", |
|
"temp": "mean_temp", |
|
"solarradiation": "global_radiation", |
|
"precip": "percipitation", |
|
"sealevelpressure": "pressure", |
|
"visibility": "minimum_visibility", |
|
} |
|
) |
|
|
|
combined_df["date"] = pd.to_datetime(combined_df["date"]) |
|
combined_df["weekday"] = combined_df["date"].dt.day_name() |
|
|
|
combined_df["wind_speed"] = (combined_df["wind_speed"] / 3.6) * 10 |
|
combined_df["mean_temp"] = combined_df["mean_temp"] * 10 |
|
combined_df["minimum_visibility"] = combined_df["minimum_visibility"] * 10 |
|
combined_df["percipitation"] = combined_df["percipitation"] * 10 |
|
combined_df["pressure"] = combined_df["pressure"] * 10 |
|
|
|
combined_df["wind_speed"] = combined_df["wind_speed"].astype(int) |
|
combined_df["mean_temp"] = combined_df["mean_temp"].astype(int) |
|
combined_df["minimum_visibility"] = combined_df["minimum_visibility"].astype(int) |
|
combined_df["percipitation"] = combined_df["percipitation"].astype(int) |
|
combined_df["pressure"] = combined_df["pressure"].astype(int) |
|
combined_df["humidity"] = combined_df["humidity"].astype(int) |
|
combined_df["global_radiation"] = combined_df["global_radiation"].astype(int) |
|
|
|
pollution_df = pd.read_csv(POLLUTION_DATA_FILE) |
|
combined_df["NO2"] = pollution_df["NO2"] |
|
combined_df["O3"] = pollution_df["O3"] |
|
|
|
return combined_df |
|
|