Spaces:
Sleeping
Sleeping
import numpy as np | |
import pandas as pd | |
# function to map weather values to numerical values | |
def map_weather_to_numerical(weather, ordinal_values): | |
val = 0 | |
for w in weather.split(","): | |
w = w.strip() | |
if w in ordinal_values: | |
val = max(ordinal_values[w], val) | |
return val | |
def standardizeX(X, mean, std): | |
for i in range(len(mean)): | |
X[:,:,i] = (X[:, :, i]-mean[i])/std[i] | |
return X | |
def process_data(df): | |
# Drop redundant columns | |
redundant_cols = [col for col in ["Unnamed: 0", "Minimum Temperature", | |
"Maximum Temperature", "Snow Depth", "Heat Index", | |
"Precipitation Cover", "Wind Gust", "Wind Chill", | |
"Snow Depth", "Info", "Latitude", | |
"Longitude", "Address", "Resolved Address", "Name"] if col in df.columns] | |
df.drop(redundant_cols, axis=1, inplace=True) | |
# Interpolate missing values | |
df = df.interpolate() | |
# Fill missing values in 'Conditions' and 'Weather Type' columns | |
df['Conditions'].fillna("Clear", inplace=True) | |
df['Weather Type'].fillna("", inplace=True) | |
# Rename column and convert to datetime format | |
df.rename(columns={"Relative Humidity": "Humidity"}, inplace=True) | |
df['DATETIME'] = pd.to_datetime(df['Date time']) | |
# Drop 'Date time' column | |
df.drop(["Date time"], axis=1, inplace=True) | |
# proceessing wind direction | |
angle = (df["Wind Direction"]*np.pi)/360 | |
df.drop("Wind Direction", axis=1, inplace=True) | |
df["sin(wind)"] = np.sin(angle) | |
df["cos(wind)"] = np.cos(angle) | |
# Map weather values to numerical values | |
rain_values = {'Heavy Rain': 7, 'Snow And Rain Showers': 6, 'Rain Showers': 5, 'Rain': 4, 'Light Rain': 3, 'Light Drizzle': 2, 'Drizzle': 1} | |
storm_values = {'Dust storm': 1, 'Lightning Without Thunder': 2, 'Thunderstorm Without Precipitation': 3, 'Thunderstorm': 4} | |
overview = {'Clear': 1, 'Partially cloudy': 2, 'Rain': 2, 'Overcast': 3} | |
df["Rain"] = df['Weather Type'].apply(lambda s: map_weather_to_numerical(s, rain_values)) | |
df["Storm"] = df['Weather Type'].apply(lambda s: map_weather_to_numerical(s, storm_values)) | |
df["Overview"] = df['Conditions'].apply(lambda s: map_weather_to_numerical(s, overview)) | |
# Drop 'Weather Type' and 'Conditions' columns | |
df.drop(["Weather Type", "Conditions"], axis=1, inplace=True) | |
# Convert DATETIME to seconds | |
df["seconds"] = df["DATETIME"].map(pd.Timestamp.timestamp) | |
df.drop("DATETIME", axis=1, inplace=True) | |
# Process seconds to represent periodic nature of days and years | |
day_in_seconds = 24 * 3600 | |
year_in_seconds = day_in_seconds * 365.2425 | |
df["sin(day)"] = np.sin((df["seconds"] * (2 * np.pi)) / day_in_seconds) | |
df["cos(day)"] = np.cos((df["seconds"] * (2 * np.pi)) / day_in_seconds) | |
df["sin(year)"] = np.sin((df["seconds"] * (2 * np.pi)) / year_in_seconds) | |
df["cos(year)"] = np.cos((df["seconds"] * (2 * np.pi)) / year_in_seconds) | |
df.drop("seconds", axis=1, inplace=True) | |
return df | |