|
import numpy as np |
|
import pandas as pd |
|
|
|
|
|
|
|
def map_weather_to_numerical(weather, ordinal_values): |
|
val = 0 |
|
for w in weather.split(","): |
|
w = w.strip() |
|
if w in ordinal_values: |
|
val = max(ordinal_values[w], val) |
|
|
|
return val |
|
|
|
def standardizeX(X, mean, std): |
|
for i in range(len(mean)): |
|
X[:,:,i] = (X[:, :, i]-mean[i])/std[i] |
|
return X |
|
|
|
def process_data(df): |
|
|
|
redundant_cols = [col for col in ["Unnamed: 0", "Minimum Temperature", |
|
"Maximum Temperature", "Snow Depth", "Heat Index", |
|
"Precipitation Cover", "Wind Gust", "Wind Chill", |
|
"Snow Depth", "Info", "Latitude", |
|
"Longitude", "Address", "Resolved Address", "Name"] if col in df.columns] |
|
df.drop(redundant_cols, axis=1, inplace=True) |
|
|
|
|
|
df = df.interpolate() |
|
|
|
|
|
df['Conditions'].fillna("Clear", inplace=True) |
|
df['Weather Type'].fillna("", inplace=True) |
|
|
|
|
|
df.rename(columns={"Relative Humidity": "Humidity"}, inplace=True) |
|
df['DATETIME'] = pd.to_datetime(df['Date time']) |
|
|
|
|
|
df.drop(["Date time"], axis=1, inplace=True) |
|
|
|
|
|
rain_values = {'Heavy Rain': 7, 'Snow And Rain Showers': 6, 'Rain Showers': 5, 'Rain': 4, 'Light Rain': 3, 'Light Drizzle': 2, 'Drizzle': 1} |
|
storm_values = {'Dust storm': 1, 'Lightning Without Thunder': 2, 'Thunderstorm Without Precipitation': 3, 'Thunderstorm': 4} |
|
overview = {'Clear': 1, 'Partially cloudy': 2, 'Rain': 2, 'Overcast': 3} |
|
|
|
df["Rain"] = df['Weather Type'].apply(lambda s: map_weather_to_numerical(s, rain_values)) |
|
df["Storm"] = df['Weather Type'].apply(lambda s: map_weather_to_numerical(s, storm_values)) |
|
df["Overview"] = df['Conditions'].apply(lambda s: map_weather_to_numerical(s, overview)) |
|
|
|
|
|
df.drop(["Weather Type", "Conditions"], axis=1, inplace=True) |
|
|
|
|
|
df["seconds"] = df["DATETIME"].map(pd.Timestamp.timestamp) |
|
df.drop("DATETIME", axis=1, inplace=True) |
|
|
|
|
|
day_in_seconds = 24 * 3600 |
|
year_in_seconds = day_in_seconds * 365.2425 |
|
df["sin(day)"] = np.sin((df["seconds"] * (2 * np.pi)) / day_in_seconds) |
|
df["cos(day)"] = np.cos((df["seconds"] * (2 * np.pi)) / day_in_seconds) |
|
df["sin(year)"] = np.sin((df["seconds"] * (2 * np.pi)) / year_in_seconds) |
|
df["cos(year)"] = np.cos((df["seconds"] * (2 * np.pi)) / year_in_seconds) |
|
df.drop("seconds", axis=1, inplace=True) |
|
|
|
return df |
|
|
|
|