|
import codecs |
|
import csv |
|
import http.client |
|
import os |
|
import re |
|
import sys |
|
import urllib.request |
|
from datetime import date, timedelta |
|
from io import StringIO |
|
|
|
import pandas as pd |
|
|
|
|
|
def pollution_data(): |
|
particles = ["NO2", "O3"] |
|
stations = ["NL10636", "NL10639", "NL10643"] |
|
all_dataframes = [] |
|
today = date.today().isoformat() + "T09:00:00Z" |
|
yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z" |
|
latest_date = (date.today() - timedelta(8)).isoformat() + "T09:00:00Z" |
|
days_today = 0 |
|
days_yesterday = 1 |
|
while(today != latest_date): |
|
days_today += 1 |
|
days_yesterday += 1 |
|
for particle in particles: |
|
for station in stations: |
|
conn = http.client.HTTPSConnection("api.luchtmeetnet.nl") |
|
payload = '' |
|
headers = {} |
|
conn.request("GET", f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", payload, headers) |
|
res = conn.getresponse() |
|
data = res.read() |
|
decoded_data = data.decode("utf-8") |
|
df = pd.read_csv(StringIO(decoded_data)) |
|
df = df.filter(like='value') |
|
all_dataframes.append(df) |
|
combined_data = pd.concat(all_dataframes, ignore_index=True) |
|
combined_data.to_csv(f'{particle}_{today}.csv', index=False) |
|
today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z" |
|
yesterday = (date.today() - timedelta(days_yesterday)).isoformat() + "T09:00:00Z" |
|
|
|
def delete_csv(csvs): |
|
for csv in csvs: |
|
if(os.path.exists(csv) and os.path.isfile(csv)): |
|
os.remove(csv) |
|
|
|
def clean_values(): |
|
particles = ["NO2", "O3"] |
|
csvs = [] |
|
NO2 = [] |
|
O3 = [] |
|
today = date.today().isoformat() + "T09:00:00Z" |
|
yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z" |
|
latest_date = (date.today() - timedelta(8)).isoformat() + "T09:00:00Z" |
|
days_today = 0 |
|
while(today != latest_date): |
|
for particle in particles: |
|
name = f'{particle}_{today}.csv' |
|
csvs.append(name) |
|
days_today += 1 |
|
today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z" |
|
for csv_file in csvs: |
|
values = [] |
|
|
|
with open(csv_file, 'r') as file: |
|
reader = csv.reader(file) |
|
for row in reader: |
|
for value in row: |
|
|
|
cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value) |
|
if cleaned_value: |
|
values.append(float(cleaned_value[0])) |
|
|
|
|
|
if values: |
|
avg = sum(values) / len(values) |
|
if "NO2" in csv_file: |
|
NO2.append(avg) |
|
else: |
|
O3.append(avg) |
|
|
|
delete_csv(csvs) |
|
|
|
return NO2, O3 |
|
|
|
|
|
def add_columns(): |
|
file_path = 'weather_data.csv' |
|
df = pd.read_csv(file_path) |
|
|
|
df.insert(1, 'NO2', None) |
|
df.insert(2, 'O3', None) |
|
df.insert(10, 'weekday', None) |
|
|
|
return df |
|
|
|
|
|
def scale(data): |
|
df = data |
|
columns = list(df.columns) |
|
|
|
|
|
columns.insert(3, columns.pop(6)) |
|
|
|
df = df[columns] |
|
|
|
columns.insert(5, columns.pop(9)) |
|
|
|
df = df[columns] |
|
|
|
columns.insert(9, columns.pop(6)) |
|
|
|
df = df[columns] |
|
|
|
df = df.rename(columns={ |
|
'datetime':'date', |
|
'windspeed': 'wind_speed', |
|
'temp': 'mean_temp', |
|
'solarradiation':'global_radiation', |
|
'precip':'percipitation', |
|
'sealevelpressure':'pressure', |
|
'visibility':'minimum_visibility' |
|
}) |
|
|
|
df['date'] = pd.to_datetime(df['date']) |
|
df['weekday'] = df['date'].dt.day_name() |
|
|
|
|
|
df['wind_speed'] = (df['wind_speed'] / 3.6) * 10 |
|
df['mean_temp'] = df['mean_temp'] * 10 |
|
df['minimum_visibility'] = df['minimum_visibility'] * 10 |
|
df['percipitation'] = df['percipitation'] * 10 |
|
df['pressure'] = df['pressure'] * 10 |
|
|
|
df['wind_speed'] = df['wind_speed'].astype(int) |
|
df['mean_temp'] = df['mean_temp'].astype(int) |
|
df['minimum_visibility'] = df['minimum_visibility'].astype(int) |
|
df['percipitation'] = df['percipitation'].astype(int) |
|
df['pressure'] = df['pressure'].astype(int) |
|
df['humidity'] = df['humidity'].astype(int) |
|
df['global_radiation'] = df['global_radiation'].astype(int) |
|
|
|
return df |
|
|
|
def insert_pollution(NO2, O3, data): |
|
df = data |
|
start_index = 0 |
|
while NO2: |
|
df.loc[start_index, 'NO2'] = NO2.pop() |
|
start_index += 1 |
|
start_index = 0 |
|
while O3: |
|
df.loc[start_index, 'O3'] = O3.pop() |
|
start_index += 1 |
|
df.to_csv('dataset.csv', index=False) |
|
|
|
def weather_data(): |
|
today = date.today().isoformat() |
|
seven_days = (date.today() - timedelta(7)).isoformat() |
|
try: |
|
ResultBytes = urllib.request.urlopen(f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{seven_days}/{today}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv") |
|
|
|
|
|
CSVText = csv.reader(codecs.iterdecode(ResultBytes, 'utf-8')) |
|
|
|
current_dir = os.path.dirname(os.path.realpath(__file__)) |
|
file_path = os.path.join(current_dir, 'weather_data.csv') |
|
with open(file_path, 'w', newline='', encoding='utf-8') as csvfile: |
|
csv_writer = csv.writer(csvfile) |
|
csv_writer.writerows(CSVText) |
|
|
|
except urllib.error.HTTPError as e: |
|
ErrorInfo= e.read().decode() |
|
print('Error code: ', e.code, ErrorInfo) |
|
sys.exit() |
|
except urllib.error.URLError as e: |
|
ErrorInfo= e.read().decode() |
|
print('Error code: ', e.code,ErrorInfo) |
|
sys.exit() |
|
|
|
|
|
def get_data(): |
|
weather_data() |
|
pollution_data() |
|
NO2, O3 = clean_values() |
|
df = add_columns() |
|
scaled_df = scale(df) |
|
insert_pollution(NO2, O3, scaled_df) |
|
os.remove('weather_data.csv') |
|
|