Spaces:
Sleeping
Sleeping
Commit
·
66accd2
1
Parent(s):
a8eee80
changes to project
Browse files- .github/workflows/features-and-predictions.yml +35 -0
- .github/workflows/sync_to_hf.yml +8 -4
- app.py +136 -106
- features/__pycache__/calendar.cpython-311.pyc +0 -0
- features/__pycache__/electricity_prices.cpython-311.pyc +0 -0
- features/__pycache__/weather_measures.cpython-311.pyc +0 -0
- features/calendar.py +2 -2
- features/plots.py +0 -208
- hide/Old/1_feature_backfill_OLD.ipynb +0 -1404
- hide/Old/2_feature_pipeline_OLD.ipynb +0 -561
- hide/Old/3_training_pipeline copy.ipynb +0 -0
- hide/Old/3_training_pipeline_OLD.ipynb +0 -349
- hide/Old/4_batch_inference_OLD.ipynb +0 -80
- hide/Old/predict_example.py +0 -33
- hide/notebooks_dev/3_training_pipeline_dev_prophet.ipynb +0 -943
- hide/notebooks_dev/3_training_pipeline_dev_pytorch.ipynb +0 -874
- hide/notebooks_dev/3_training_pipeline_dev_tensorflow.ipynb +0 -818
- hide/notebooks_dev/3_training_pipeline_dev_windowtensor.ipynb +0 -0
- notebooks/1_feature_backfill.ipynb +391 -709
- notebooks/2_feature_pipeline.ipynb +293 -672
- notebooks/3_training_pipeline.ipynb +0 -0
- notebooks/4_batch_inference.ipynb +0 -0
- notebooks/model/dk_electricity_model.pkl +0 -0
- notebooks/test.ipynb +0 -0
- scripts/run_feature_and_prediction_pipelines.sh +11 -0
.github/workflows/features-and-predictions.yml
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: electricity-feature-and-prediction-pipelines
|
2 |
+
|
3 |
+
on:
|
4 |
+
|
5 |
+
# To run this workflow manually from the Actions tab
|
6 |
+
workflow_dispatch:
|
7 |
+
|
8 |
+
# Schedule the workflow to run at 23:50 everyday
|
9 |
+
schedule:
|
10 |
+
- cron: '50 23 * * *'
|
11 |
+
|
12 |
+
jobs:
|
13 |
+
test_schedule:
|
14 |
+
runs-on: ubuntu-latest
|
15 |
+
steps:
|
16 |
+
- name: checkout repo content
|
17 |
+
uses: actions/checkout@v2
|
18 |
+
|
19 |
+
- name: setup python
|
20 |
+
uses: actions/setup-python@v2
|
21 |
+
with:
|
22 |
+
python-version: '3.11.5'
|
23 |
+
|
24 |
+
- name: install python packages
|
25 |
+
run: |
|
26 |
+
python -m pip install --upgrade pip
|
27 |
+
pip install -r requirements.txt
|
28 |
+
|
29 |
+
- name: execute python workflows from bash script
|
30 |
+
env:
|
31 |
+
HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY }}
|
32 |
+
run: ./scripts/run_feature_and_prediction_pipelines.sh
|
33 |
+
|
34 |
+
|
35 |
+
|
.github/workflows/sync_to_hf.yml
CHANGED
@@ -1,13 +1,17 @@
|
|
1 |
name: Sync to Hugging Face hub
|
2 |
on:
|
|
|
|
|
|
|
|
|
|
|
3 |
schedule:
|
4 |
-
- cron: '
|
|
|
|
|
5 |
push:
|
6 |
branches: [main]
|
7 |
|
8 |
-
# to run this workflow manually from the Actions tab
|
9 |
-
workflow_dispatch:
|
10 |
-
|
11 |
jobs:
|
12 |
sync-to-hub:
|
13 |
runs-on: ubuntu-latest
|
|
|
1 |
name: Sync to Hugging Face hub
|
2 |
on:
|
3 |
+
|
4 |
+
# To run this workflow manually from the Actions tab
|
5 |
+
workflow_dispatch:
|
6 |
+
|
7 |
+
# Schedule the workflow to run at 23:59 everyday
|
8 |
schedule:
|
9 |
+
- cron: '59 23 * * *'
|
10 |
+
|
11 |
+
# Push events to the main branch
|
12 |
push:
|
13 |
branches: [main]
|
14 |
|
|
|
|
|
|
|
15 |
jobs:
|
16 |
sync-to-hub:
|
17 |
runs-on: ubuntu-latest
|
app.py
CHANGED
@@ -19,8 +19,12 @@ from streamlit_folium import st_folium
|
|
19 |
# This is the functions we have created to generate features for electricity prices and weather measures
|
20 |
from features import electricity_prices, weather_measures, calendar
|
21 |
|
22 |
-
def print_fancy_header(text, font_size=22, color="#
|
23 |
-
res = f'<span style="color:{color}; font-size:
|
|
|
|
|
|
|
|
|
24 |
st.markdown(res, unsafe_allow_html=True)
|
25 |
|
26 |
# I want to cache this so streamlit would run much faster after restart (it restarts a lot)
|
@@ -47,67 +51,102 @@ def download_model(name="electricity_price_prediction_model",
|
|
47 |
saved_model_dir = retrieved_model.download()
|
48 |
return saved_model_dir
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
|
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
'paper_bgcolor': 'rgba(0, 0, 0, 0)',
|
61 |
-
'legend_title': 'type',
|
62 |
-
'legend_font': {'size': 12},
|
63 |
-
'legend_bgcolor': 'rgba(0, 0, 0, 0)',
|
64 |
-
'xaxis': {'title': 'Date'},
|
65 |
-
'yaxis': {'title': 'dk1_spotpricedkk_kwh'},
|
66 |
-
'shapes': [{
|
67 |
-
'type': 'line',
|
68 |
-
'x0': datetime.datetime.now().strftime('%Y-%m-%d'),
|
69 |
-
'y0': 0,
|
70 |
-
'x1': datetime.datetime.now().strftime('%Y-%m-%d'),
|
71 |
-
'y1': df['dk1_spotpricedkk_kwh'].max(),
|
72 |
-
'line': {'color': 'red', 'width': 2, 'dash': 'dashdot'}
|
73 |
-
}]
|
74 |
-
})
|
75 |
-
|
76 |
-
# show plot
|
77 |
-
st.plotly_chart(fig, use_container_width=True)
|
78 |
-
|
79 |
-
with open('data/calendar_incl_holiday.csv') as csv_file:
|
80 |
-
target_days = csv.reader(csv_file)
|
81 |
|
82 |
#########################
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
st.write(3 * "-")
|
86 |
-
|
87 |
-
|
88 |
-
st.
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
# Retrieve the model registry
|
96 |
-
mr = project.get_model_registry()
|
97 |
-
|
98 |
-
# Retrieving the model from the Model Registry
|
99 |
-
retrieved_model = mr.get_model(
|
100 |
-
name="electricity_price_prediction_model",
|
101 |
-
version=1,
|
102 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
#
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
|
111 |
|
112 |
# I am going to load data for of last 60 days (for feature engineering)
|
113 |
today = datetime.date.today()
|
@@ -115,70 +154,61 @@ date_threshold = today - datetime.timedelta(days=60)
|
|
115 |
|
116 |
st.write(3 * "-")
|
117 |
print_fancy_header('\n☁️ Retriving batch data from Feature Store...')
|
118 |
-
# Fetching weather forecast measures for the next 5 days
|
119 |
-
weather_forecast_df = weather_measures.forecast_weather_measures(
|
120 |
-
forecast_length=5
|
121 |
-
)
|
122 |
-
|
123 |
-
# Fetching danish calendar
|
124 |
-
calendar_df = calendar.get_calendar()
|
125 |
|
126 |
-
|
127 |
-
new_data = pd.merge(weather_forecast_df, calendar_df, how='inner', left_on='date', right_on='date')
|
128 |
|
129 |
-
|
130 |
-
st.write(new_data.sample(5))
|
131 |
|
132 |
-
#
|
133 |
-
|
|
|
|
|
|
|
|
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
}
|
141 |
|
142 |
-
#
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
|
147 |
-
st.
|
148 |
|
|
|
|
|
|
|
149 |
|
150 |
-
|
151 |
-
st.write(3 * '-')
|
152 |
-
st.write("\n")
|
153 |
|
154 |
-
|
|
|
|
|
155 |
|
156 |
-
# Reshape the predictions data to a Table format, where each row represents a hour and each column a day
|
157 |
-
table_df = predictions_df['prediction'].values.reshape(-1, 24)
|
158 |
-
table_df = pd.DataFrame(table_df, columns=[f'{i}:00' for i in range(24)], index = [f'Day {i}' for i in range(table_df.shape[0])])
|
159 |
|
160 |
-
st.write(table_df.T.style.set_properties(**{'width': '100%', 'max-width': 'none'}))
|
161 |
|
162 |
-
#########################
|
163 |
-
st.write(3 * '-')
|
164 |
-
st.write("\n")
|
165 |
-
|
166 |
-
# Create a slider for selecting the number of days to display
|
167 |
-
num_hours = st.slider("Select number of hours to display", min_value=1, max_value=120, value=48)
|
168 |
-
|
169 |
-
# Filter the predictions dataframe based on the selected number of days
|
170 |
-
filtered_predictions_df = predictions_df.head(num_hours)
|
171 |
-
|
172 |
-
# Create Altair chart with line and dots
|
173 |
-
chart = alt.Chart(filtered_predictions_df).mark_line(point=True).encode(
|
174 |
-
x='time:T',
|
175 |
-
y='prediction:Q',
|
176 |
-
tooltip=[alt.Tooltip('time:T', title='Date', format='%d-%m-%Y'),
|
177 |
-
alt.Tooltip('time:T', title='Time', format='%H:%M'),
|
178 |
-
alt.Tooltip('prediction:Q', title='Spot Price (DKK)', format='.2f')
|
179 |
-
]
|
180 |
-
)
|
181 |
|
182 |
-
# Display the chart
|
183 |
-
st.altair_chart(chart, use_container_width=True)
|
184 |
|
|
|
19 |
# This is the functions we have created to generate features for electricity prices and weather measures
|
20 |
from features import electricity_prices, weather_measures, calendar
|
21 |
|
22 |
+
def print_fancy_header(text, font_width="bold", font_size=22, color="#2656a3"):
|
23 |
+
res = f'<span style="font-width:{font_width}; color:{color}; font-size:{font_size}px;">{text}</span>'
|
24 |
+
st.markdown(res, unsafe_allow_html=True)
|
25 |
+
|
26 |
+
def print_fancy_subheader(text, font_width="bold", font_size=22, color="#333"):
|
27 |
+
res = f'<span style="font-width:{font_width}; color:{color}; font-size:{font_size}px;">{text}</span>'
|
28 |
st.markdown(res, unsafe_allow_html=True)
|
29 |
|
30 |
# I want to cache this so streamlit would run much faster after restart (it restarts a lot)
|
|
|
51 |
saved_model_dir = retrieved_model.download()
|
52 |
return saved_model_dir
|
53 |
|
54 |
+
# with open('data/calendar_incl_holiday.csv') as csv_file:
|
55 |
+
# target_days = csv.reader(csv_file)
|
56 |
+
|
57 |
+
# Function to load the dataset
|
58 |
+
@st.cache_data # Cache the function to enhance performance
|
59 |
+
def load_data():
|
60 |
+
# Fetching weather forecast measures for the next 5 days
|
61 |
+
weather_forecast_df = weather_measures.forecast_weather_measures(
|
62 |
+
forecast_length=5
|
63 |
+
)
|
64 |
+
|
65 |
+
# Fetching danish calendar
|
66 |
+
calendar_df = calendar.get_calendar()
|
67 |
+
|
68 |
+
# Merging the weather forecast and calendar dataframes
|
69 |
+
new_data = pd.merge(weather_forecast_df, calendar_df, how='inner', left_on='date', right_on='date')
|
70 |
+
|
71 |
+
st.write("New data:")
|
72 |
+
st.write(new_data.sample(5))
|
73 |
+
|
74 |
+
# Drop columns 'date', 'datetime', 'timestamp' from the DataFrame 'new_data'
|
75 |
+
data = new_data.drop(columns=['date', 'datetime', 'timestamp'])
|
76 |
|
77 |
+
predictions = retrieved_xgboost_model.predict(data)
|
78 |
|
79 |
+
predictions_data = {
|
80 |
+
'prediction': predictions,
|
81 |
+
'time': new_data["datetime"],
|
82 |
+
}
|
83 |
+
|
84 |
+
predictions_df = pd.DataFrame(predictions_data).sort_values(by='time')
|
85 |
+
|
86 |
+
return predictions_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
#########################
|
89 |
+
|
90 |
+
progress_bar = st.sidebar.header('⚙️ Working Progress')
|
91 |
+
progress_bar = st.sidebar.progress(0)
|
92 |
+
|
93 |
+
# Title for the streamlit app
|
94 |
+
st.title('Electricity Price Prediction 🌦')
|
95 |
+
|
96 |
+
# Subtitle
|
97 |
+
st.markdown("""
|
98 |
+
Welcome to the electricity price predicter for DK1.
|
99 |
+
""")
|
100 |
|
101 |
st.write(3 * "-")
|
102 |
+
|
103 |
+
with st.expander("📊 **Data Engineering and Machine Learning Operations in Business**"):
|
104 |
+
st.markdown("""
|
105 |
+
LEARNING OBJECTIVES
|
106 |
+
- Using our skills for designing, implementing, and managing data pipelines and ML systems.
|
107 |
+
- Focus on practical applications within a business context.
|
108 |
+
- Cover topics such as data ingestion, preprocessing, model deployment, monitoring, and maintenance.
|
109 |
+
- Emphasize industry best practices for effective operation of ML systems.
|
110 |
+
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
)
|
112 |
+
|
113 |
+
with st.expander("📊 **This assigment**"):
|
114 |
+
st.markdown("""
|
115 |
+
The objective of this assignment is to build a prediction system that predicts the electricity prices in Denmark (area DK1) based on weather conditions, previous prices, and the Danish holidays.
|
116 |
+
"""
|
117 |
+
)
|
118 |
+
|
119 |
+
with st.sidebar:
|
120 |
+
# st.write("This code will be printed to the sidebar.")
|
121 |
+
|
122 |
+
print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
|
123 |
|
124 |
+
st.write("Logging... ")
|
125 |
+
# please enter your Hopsworks API Key in the commmand prompt.)
|
126 |
+
# project = hopsworks.login(project = "camillah", api_key_value=os.environ['HOPSWORKS_API_KEY'])
|
127 |
+
project = hopsworks.login()
|
128 |
+
fs = project.get_feature_store()
|
129 |
+
progress_bar.progress(40)
|
130 |
+
st.write("✅ Logged in successfully!")
|
131 |
|
132 |
+
# Retrieve the model registry
|
133 |
+
mr = project.get_model_registry()
|
134 |
+
|
135 |
+
# Retrieving the model from the Model Registry
|
136 |
+
retrieved_model = mr.get_model(
|
137 |
+
name="electricity_price_prediction_model",
|
138 |
+
version=1,
|
139 |
+
)
|
140 |
+
|
141 |
+
# Downloading the saved model to a local directory
|
142 |
+
saved_model_dir = retrieved_model.download()
|
143 |
+
|
144 |
+
# Loading the saved XGB model
|
145 |
+
retrieved_xgboost_model = joblib.load(saved_model_dir + "/dk_electricity_model.pkl")
|
146 |
+
|
147 |
+
st.write("✅ Model successfully loaded!")
|
148 |
|
149 |
+
progress_bar.progress(80)
|
150 |
|
151 |
# I am going to load data for of last 60 days (for feature engineering)
|
152 |
today = datetime.date.today()
|
|
|
154 |
|
155 |
st.write(3 * "-")
|
156 |
print_fancy_header('\n☁️ Retriving batch data from Feature Store...')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
+
predictions_df = load_data()
|
|
|
159 |
|
160 |
+
progress_bar.progress(100)
|
|
|
161 |
|
162 |
+
# Sidebar filter: Date range
|
163 |
+
min_value = 1
|
164 |
+
max_value = int(len(predictions_df['time'].unique()) / 24)
|
165 |
+
default = int(48 / 24)
|
166 |
+
date_range = st.sidebar.slider("Select Date Range", min_value=min_value, max_value=max_value, value=default)
|
167 |
+
filtered_predictions_df = predictions_df.head(date_range * 24)
|
168 |
|
169 |
+
visualization_option = st.selectbox(
|
170 |
+
"Select Visualization 🎨",
|
171 |
+
["Matrix",
|
172 |
+
"Linechart"]
|
173 |
+
)
|
|
|
174 |
|
175 |
+
# Visualizations based on user selection
|
176 |
+
if visualization_option == "Matrix":
|
177 |
+
data = filtered_predictions_df
|
178 |
+
data['date'] = data['time'].dt.strftime('%Y-%m-%d')
|
179 |
+
data['time_of_day'] = data['time'].dt.strftime('%H:%M')
|
180 |
+
data.drop(columns=['time'], inplace=True)
|
181 |
+
|
182 |
+
# Pivot the DataFrame
|
183 |
+
pivot_df = data.pivot(index='time_of_day', columns='date', values='prediction')
|
184 |
+
|
185 |
+
st.write(pivot_df)
|
186 |
+
|
187 |
+
elif visualization_option == "Linechart":
|
188 |
+
# Create Altair chart with line and dots
|
189 |
+
chart = alt.Chart(filtered_predictions_df).mark_line(point=True).encode(
|
190 |
+
x='time:T',
|
191 |
+
y='prediction:Q',
|
192 |
+
tooltip=[alt.Tooltip('time:T', title='Date', format='%d-%m-%Y'),
|
193 |
+
alt.Tooltip('time:T', title='Time', format='%H:%M'),
|
194 |
+
alt.Tooltip('prediction:Q', title='Spot Price (DKK)', format='.2f')
|
195 |
+
]
|
196 |
+
)
|
197 |
|
198 |
+
# Display the chart
|
199 |
+
st.altair_chart(chart, use_container_width=True)
|
200 |
|
201 |
+
# #########################
|
202 |
+
# st.write(3 * '-')
|
203 |
+
# st.write("\n")
|
204 |
|
205 |
+
# print_fancy_header('\n📈 Predictions Table for today and 4 days ahead')
|
|
|
|
|
206 |
|
207 |
+
# #########################
|
208 |
+
# st.write(3 * '-')
|
209 |
+
# st.write("\n")
|
210 |
|
|
|
|
|
|
|
211 |
|
|
|
212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
213 |
|
|
|
|
|
214 |
|
features/__pycache__/calendar.cpython-311.pyc
CHANGED
Binary files a/features/__pycache__/calendar.cpython-311.pyc and b/features/__pycache__/calendar.cpython-311.pyc differ
|
|
features/__pycache__/electricity_prices.cpython-311.pyc
CHANGED
Binary files a/features/__pycache__/electricity_prices.cpython-311.pyc and b/features/__pycache__/electricity_prices.cpython-311.pyc differ
|
|
features/__pycache__/weather_measures.cpython-311.pyc
CHANGED
Binary files a/features/__pycache__/weather_measures.cpython-311.pyc and b/features/__pycache__/weather_measures.cpython-311.pyc differ
|
|
features/calendar.py
CHANGED
@@ -3,7 +3,7 @@ import numpy as np
|
|
3 |
import pandas as pd
|
4 |
|
5 |
|
6 |
-
def
|
7 |
"""
|
8 |
Fetches calendar for Denmark.
|
9 |
|
@@ -25,7 +25,7 @@ def get_calendar() -> pd.DataFrame:
|
|
25 |
df['day'] = df['date_'].dt.day
|
26 |
df['month'] = df['date_'].dt.month
|
27 |
df['year'] = df['date_'].dt.year
|
28 |
-
df['
|
29 |
|
30 |
# Drop the columns 'type' and 'date_' to finalize the calender dataframe
|
31 |
calendar = df.drop(['type','date_'], axis=1)
|
|
|
3 |
import pandas as pd
|
4 |
|
5 |
|
6 |
+
def dk_calendar() -> pd.DataFrame:
|
7 |
"""
|
8 |
Fetches calendar for Denmark.
|
9 |
|
|
|
25 |
df['day'] = df['date_'].dt.day
|
26 |
df['month'] = df['date_'].dt.month
|
27 |
df['year'] = df['date_'].dt.year
|
28 |
+
df['workday'] = np.where(df['type'] == 'Not a Workday', 0, 1)
|
29 |
|
30 |
# Drop the columns 'type' and 'date_' to finalize the calender dataframe
|
31 |
calendar = df.drop(['type','date_'], axis=1)
|
features/plots.py
DELETED
@@ -1,208 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
import numpy as np
|
3 |
-
from datetime import date, timedelta
|
4 |
-
import datetime
|
5 |
-
from tqdm import tqdm
|
6 |
-
import plotly.express as px
|
7 |
-
import plotly.graph_objects as go
|
8 |
-
import plotly.colors as pc
|
9 |
-
from typing import List, Union, Optional, Tuple, Dict
|
10 |
-
|
11 |
-
|
12 |
-
def plot_historical_id(ids_to_show: List[int], data: pd.DataFrame) -> go.Figure:
|
13 |
-
"""
|
14 |
-
Plots time series data for a specified list of IDs.
|
15 |
-
|
16 |
-
Parameters:
|
17 |
-
- ids_to_show (list): A list of IDs for which time series data should be plotted.
|
18 |
-
- data (pd.DataFrame): The DataFrame containing the data to be plotted, with columns ['date', 'id', 'price'].
|
19 |
-
|
20 |
-
Returns:
|
21 |
-
- Figure
|
22 |
-
"""
|
23 |
-
# Filter the DataFrame to include only the specified IDs
|
24 |
-
filtered_df = data[data['id'].isin(ids_to_show)]
|
25 |
-
|
26 |
-
# Convert the 'date' column to datetime type
|
27 |
-
filtered_df['date'] = pd.to_datetime(filtered_df['date'], format='%Y-%m-%d')
|
28 |
-
filtered_df.sort_values('date', inplace=True)
|
29 |
-
|
30 |
-
# Generate a colormap with distinct colors based on the number of unique IDs
|
31 |
-
unique_ids = filtered_df['id'].unique()
|
32 |
-
num_ids = len(unique_ids)
|
33 |
-
colors = pc.qualitative.Set1 * (num_ids // len(pc.qualitative.Set1) + 1)
|
34 |
-
|
35 |
-
# Create a dictionary to map IDs to colors
|
36 |
-
color_map = dict(zip(unique_ids, colors[:num_ids]))
|
37 |
-
|
38 |
-
# Create a time series plot using Plotly Express
|
39 |
-
fig = px.line(
|
40 |
-
filtered_df,
|
41 |
-
x='date',
|
42 |
-
y='price',
|
43 |
-
color='id',
|
44 |
-
title=f'Historical Prices for {ids_to_show} IDs',
|
45 |
-
labels={'date': 'Date', 'price': 'Price'},
|
46 |
-
line_group='id',
|
47 |
-
color_discrete_map=color_map,
|
48 |
-
)
|
49 |
-
|
50 |
-
return fig
|
51 |
-
|
52 |
-
|
53 |
-
def plot_prediction_test(
|
54 |
-
id_to_show: int,
|
55 |
-
X_train: pd.DataFrame,
|
56 |
-
X_test: pd.DataFrame,
|
57 |
-
y_train: Union[pd.Series, pd.DataFrame],
|
58 |
-
y_test: Union[pd.Series, pd.DataFrame],
|
59 |
-
train_date: pd.Series,
|
60 |
-
test_date: pd.Series,
|
61 |
-
predictions: Optional[pd.Series] = None
|
62 |
-
) -> go.Figure:
|
63 |
-
"""
|
64 |
-
Plots a time series for a specific ID, showing training and test data on the same plot.
|
65 |
-
|
66 |
-
Parameters:
|
67 |
-
- id_to_show (int): The ID to be displayed in the plot.
|
68 |
-
- X_train (pd.DataFrame): The feature data for the training set.
|
69 |
-
- X_test (pd.DataFrame): The feature data for the test set.
|
70 |
-
- y_train (pd.Series or pd.DataFrame): The target data for the training set.
|
71 |
-
- y_test (pd.Series or pd.DataFrame): The target data for the test set.
|
72 |
-
- train_date (pd.Series): The date column for the training data.
|
73 |
-
- test_date (pd.Series): The date column for the test data.
|
74 |
-
- predictions (pd.Series or None): Predicted values for the test data. Default is None.
|
75 |
-
|
76 |
-
Returns:
|
77 |
-
- Figure
|
78 |
-
"""
|
79 |
-
# Combine features and target data for training and test sets
|
80 |
-
train = pd.concat([train_date, X_train, y_train], axis=1)
|
81 |
-
test = pd.concat([test_date, X_test, y_test], axis=1)
|
82 |
-
|
83 |
-
# Filter and sort data for the specified ID
|
84 |
-
train_sorted = train[train.id == id_to_show].sort_values('date')
|
85 |
-
test_sorted = test[test.id == id_to_show].sort_values('date')
|
86 |
-
|
87 |
-
# Create a Plotly figure
|
88 |
-
fig = go.Figure()
|
89 |
-
|
90 |
-
# Add a trace for training data (blue)
|
91 |
-
fig.add_trace(go.Scatter(
|
92 |
-
x=train_sorted['date'],
|
93 |
-
y=train_sorted['price'],
|
94 |
-
mode='lines',
|
95 |
-
name='Training Data',
|
96 |
-
line=dict(color='blue')
|
97 |
-
))
|
98 |
-
|
99 |
-
# Add a trace for test data (red)
|
100 |
-
fig.add_trace(go.Scatter(
|
101 |
-
x=test_sorted['date'],
|
102 |
-
y=test_sorted['price'],
|
103 |
-
mode='lines',
|
104 |
-
name='Test Data',
|
105 |
-
line=dict(color='green')
|
106 |
-
))
|
107 |
-
|
108 |
-
if predictions is not None:
|
109 |
-
pred_df = pd.DataFrame()
|
110 |
-
pred_df['date'] = test_sorted['date']
|
111 |
-
pred_df['price'] = predictions
|
112 |
-
fig.add_trace(go.Scatter(
|
113 |
-
x=pred_df['date'],
|
114 |
-
y=pred_df['price'],
|
115 |
-
mode='lines',
|
116 |
-
name='Prediction',
|
117 |
-
line=dict(color='red')
|
118 |
-
))
|
119 |
-
|
120 |
-
|
121 |
-
# Set X-axis range to span the entire date range from both training and test data
|
122 |
-
fig.update_xaxes(range=[train_sorted['date'].min(), test_sorted['date'].max()])
|
123 |
-
|
124 |
-
# Customize plot layout
|
125 |
-
fig.update_layout(
|
126 |
-
title=f'Time Series for the {id_to_show} ID',
|
127 |
-
xaxis_title='Date',
|
128 |
-
yaxis_title='Price',
|
129 |
-
legend_title='Data Type'
|
130 |
-
)
|
131 |
-
|
132 |
-
return fig
|
133 |
-
|
134 |
-
|
135 |
-
def plot_prediction(
|
136 |
-
id_to_show: int,
|
137 |
-
data: pd.DataFrame,
|
138 |
-
week_ago: str,
|
139 |
-
predictions: Optional[pd.Series] = None,
|
140 |
-
) -> go.Figure:
|
141 |
-
"""
|
142 |
-
Display a time series plot for a specific ID, showcasing historical data, real prices, and predicted prices.
|
143 |
-
|
144 |
-
Parameters:
|
145 |
-
- id_to_show (int): The unique identifier for the data series to be displayed.
|
146 |
-
- data (pd.DataFrame): A DataFrame containing time series data.
|
147 |
-
- week_ago (str): A string representing a date one week ago (in 'YYYY-MM-DD' format).
|
148 |
-
- predictions (pd.Series or None, optional): Predicted price values for the test data. Default is None.
|
149 |
-
|
150 |
-
Returns:
|
151 |
-
- fig (plotly.graph_objs.Figure): A Plotly figure object containing the generated time series plot.
|
152 |
-
"""
|
153 |
-
data_sorted = data[data.id == id_to_show].sort_values('date')
|
154 |
-
data_sorted['date'] = pd.to_datetime(data_sorted['date'])
|
155 |
-
|
156 |
-
time_ago = (datetime.datetime.strptime(week_ago, '%Y-%m-%d') - timedelta(days=210)).strftime("%Y-%m-%d")
|
157 |
-
data_historical = data_sorted.loc[
|
158 |
-
(data_sorted['date'] <= datetime.datetime.strptime(week_ago, "%Y-%m-%d")) &
|
159 |
-
(data_sorted['date'] >= datetime.datetime.strptime(time_ago, "%Y-%m-%d"))
|
160 |
-
]
|
161 |
-
data_last_week = data_sorted[data_sorted.date > week_ago]
|
162 |
-
|
163 |
-
# Create a Plotly figure
|
164 |
-
fig = go.Figure()
|
165 |
-
|
166 |
-
# Add a trace for training data (blue)
|
167 |
-
fig.add_trace(go.Scatter(
|
168 |
-
x=data_historical['date'],
|
169 |
-
y=data_historical['price'],
|
170 |
-
mode='lines',
|
171 |
-
name='Historical Data',
|
172 |
-
line=dict(color='blue')
|
173 |
-
))
|
174 |
-
|
175 |
-
# Add a trace for test data (red)
|
176 |
-
fig.add_trace(go.Scatter(
|
177 |
-
x=data_last_week['date'],
|
178 |
-
y=data_last_week['price'],
|
179 |
-
mode='lines',
|
180 |
-
name='Real Price',
|
181 |
-
line=dict(color='green')
|
182 |
-
))
|
183 |
-
|
184 |
-
if predictions is not None:
|
185 |
-
pred_df = pd.DataFrame()
|
186 |
-
pred_df['date'] = data_last_week['date']
|
187 |
-
pred_df['price'] = predictions
|
188 |
-
fig.add_trace(go.Scatter(
|
189 |
-
x=pred_df['date'],
|
190 |
-
y=pred_df['price'],
|
191 |
-
mode='lines',
|
192 |
-
name='Predicted Price',
|
193 |
-
line=dict(color='red')
|
194 |
-
))
|
195 |
-
|
196 |
-
|
197 |
-
# Set X-axis range to span the entire date range from both training and test data
|
198 |
-
fig.update_xaxes(range=[data_historical['date'].min(), data_last_week['date'].max()])
|
199 |
-
|
200 |
-
# Customize plot layout
|
201 |
-
fig.update_layout(
|
202 |
-
title=f'Predicted price for the {id_to_show} ID',
|
203 |
-
xaxis_title='Date',
|
204 |
-
yaxis_title='Price',
|
205 |
-
legend_title='Data Type'
|
206 |
-
)
|
207 |
-
|
208 |
-
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/Old/1_feature_backfill_OLD.ipynb
DELETED
@@ -1,1404 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 01: Feature Backfill</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Load the data and process features\n",
|
16 |
-
"2. Connect to the Hopsworks feature store\n",
|
17 |
-
"3. Create feature groups and upload them to the feature store"
|
18 |
-
]
|
19 |
-
},
|
20 |
-
{
|
21 |
-
"cell_type": "markdown",
|
22 |
-
"metadata": {},
|
23 |
-
"source": [
|
24 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages\n",
|
25 |
-
"\n",
|
26 |
-
"First, we'll install the Python packages required for this notebook. We'll use the --quiet command after specifying the names of the libraries to ensure a silent installation process. Then, we'll proceed to import all the necessary libraries."
|
27 |
-
]
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"cell_type": "code",
|
31 |
-
"execution_count": 1,
|
32 |
-
"metadata": {},
|
33 |
-
"outputs": [],
|
34 |
-
"source": [
|
35 |
-
"# Install of the packages for hopsworks\n",
|
36 |
-
"# !pip install -U hopsworks --quiet"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": 3,
|
42 |
-
"metadata": {},
|
43 |
-
"outputs": [],
|
44 |
-
"source": [
|
45 |
-
"# Importing of the packages for the needed libraries for the Jupyter notebook\n",
|
46 |
-
"import pandas as pd\n",
|
47 |
-
"import requests\n",
|
48 |
-
"\n",
|
49 |
-
"# Ignore warnings\n",
|
50 |
-
"import warnings \n",
|
51 |
-
"warnings.filterwarnings('ignore')"
|
52 |
-
]
|
53 |
-
},
|
54 |
-
{
|
55 |
-
"cell_type": "markdown",
|
56 |
-
"metadata": {},
|
57 |
-
"source": [
|
58 |
-
"## <span style=\"color:#2656a3;\"> 💽 Load the historical data\n",
|
59 |
-
"\n",
|
60 |
-
"The data you will use comes from three different sources:\n",
|
61 |
-
"\n",
|
62 |
-
"- Electricity prices in Denmark per day from [Energinet](https://www.energidataservice.dk).\n",
|
63 |
-
"- Different meteorological observations from [Open meteo](https://www.open-meteo.com).\n",
|
64 |
-
"- Danish Calendar with the type if the date is a national holiday or not. This files is made manually by the group and is located in the \"*data*\" folder inside this repository."
|
65 |
-
]
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"cell_type": "markdown",
|
69 |
-
"metadata": {},
|
70 |
-
"source": [
|
71 |
-
"### <span style=\"color:#2656a3;\">💸 Electricity prices per day from Energinet\n",
|
72 |
-
"*Hvis vi skal have tariffer med i modellen, anbefales det at vi sætter en faktor på 0.2 i tidsrummet 22 - 16 og en faktor på 0.6 eller 0.7 i tidsrummet 17 - 21.*\n",
|
73 |
-
"\n",
|
74 |
-
"This first dataset is Electricity prices per day from Energinet/Dataservice. Here we use "
|
75 |
-
]
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"cell_type": "code",
|
79 |
-
"execution_count": 4,
|
80 |
-
"metadata": {},
|
81 |
-
"outputs": [],
|
82 |
-
"source": [
|
83 |
-
"# Defining the URL for the API call to the electricity price data\n",
|
84 |
-
"electricity_api_url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2022-01-01T00:00&end=2023-12-31T23:59&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')"
|
85 |
-
]
|
86 |
-
},
|
87 |
-
{
|
88 |
-
"cell_type": "code",
|
89 |
-
"execution_count": 5,
|
90 |
-
"metadata": {},
|
91 |
-
"outputs": [
|
92 |
-
{
|
93 |
-
"name": "stdout",
|
94 |
-
"output_type": "stream",
|
95 |
-
"text": [
|
96 |
-
"<Response [200]>\n"
|
97 |
-
]
|
98 |
-
}
|
99 |
-
],
|
100 |
-
"source": [
|
101 |
-
"# Fetch data from the API and make the output to a pandas dataframe\n",
|
102 |
-
"electricity_data_response = requests.get(electricity_api_url)\n",
|
103 |
-
"electricity_data = electricity_data_response.json()\n",
|
104 |
-
"electricity_df = pd.DataFrame(electricity_data['records'])\n",
|
105 |
-
"\n",
|
106 |
-
"# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
|
107 |
-
"print(electricity_data_response)"
|
108 |
-
]
|
109 |
-
},
|
110 |
-
{
|
111 |
-
"cell_type": "code",
|
112 |
-
"execution_count": 6,
|
113 |
-
"metadata": {},
|
114 |
-
"outputs": [
|
115 |
-
{
|
116 |
-
"data": {
|
117 |
-
"text/html": [
|
118 |
-
"<div>\n",
|
119 |
-
"<style scoped>\n",
|
120 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
121 |
-
" vertical-align: middle;\n",
|
122 |
-
" }\n",
|
123 |
-
"\n",
|
124 |
-
" .dataframe tbody tr th {\n",
|
125 |
-
" vertical-align: top;\n",
|
126 |
-
" }\n",
|
127 |
-
"\n",
|
128 |
-
" .dataframe thead th {\n",
|
129 |
-
" text-align: right;\n",
|
130 |
-
" }\n",
|
131 |
-
"</style>\n",
|
132 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
133 |
-
" <thead>\n",
|
134 |
-
" <tr style=\"text-align: right;\">\n",
|
135 |
-
" <th></th>\n",
|
136 |
-
" <th>HourUTC</th>\n",
|
137 |
-
" <th>HourDK</th>\n",
|
138 |
-
" <th>PriceArea</th>\n",
|
139 |
-
" <th>SpotPriceDKK</th>\n",
|
140 |
-
" <th>SpotPriceEUR</th>\n",
|
141 |
-
" </tr>\n",
|
142 |
-
" </thead>\n",
|
143 |
-
" <tbody>\n",
|
144 |
-
" <tr>\n",
|
145 |
-
" <th>0</th>\n",
|
146 |
-
" <td>2023-12-31T22:00:00</td>\n",
|
147 |
-
" <td>2023-12-31T23:00:00</td>\n",
|
148 |
-
" <td>DK1</td>\n",
|
149 |
-
" <td>200.309998</td>\n",
|
150 |
-
" <td>26.870001</td>\n",
|
151 |
-
" </tr>\n",
|
152 |
-
" <tr>\n",
|
153 |
-
" <th>1</th>\n",
|
154 |
-
" <td>2023-12-31T21:00:00</td>\n",
|
155 |
-
" <td>2023-12-31T22:00:00</td>\n",
|
156 |
-
" <td>DK1</td>\n",
|
157 |
-
" <td>213.729996</td>\n",
|
158 |
-
" <td>28.670000</td>\n",
|
159 |
-
" </tr>\n",
|
160 |
-
" <tr>\n",
|
161 |
-
" <th>2</th>\n",
|
162 |
-
" <td>2023-12-31T20:00:00</td>\n",
|
163 |
-
" <td>2023-12-31T21:00:00</td>\n",
|
164 |
-
" <td>DK1</td>\n",
|
165 |
-
" <td>220.660004</td>\n",
|
166 |
-
" <td>29.600000</td>\n",
|
167 |
-
" </tr>\n",
|
168 |
-
" <tr>\n",
|
169 |
-
" <th>3</th>\n",
|
170 |
-
" <td>2023-12-31T19:00:00</td>\n",
|
171 |
-
" <td>2023-12-31T20:00:00</td>\n",
|
172 |
-
" <td>DK1</td>\n",
|
173 |
-
" <td>260.100006</td>\n",
|
174 |
-
" <td>34.889999</td>\n",
|
175 |
-
" </tr>\n",
|
176 |
-
" <tr>\n",
|
177 |
-
" <th>4</th>\n",
|
178 |
-
" <td>2023-12-31T18:00:00</td>\n",
|
179 |
-
" <td>2023-12-31T19:00:00</td>\n",
|
180 |
-
" <td>DK1</td>\n",
|
181 |
-
" <td>295.510010</td>\n",
|
182 |
-
" <td>39.639999</td>\n",
|
183 |
-
" </tr>\n",
|
184 |
-
" </tbody>\n",
|
185 |
-
"</table>\n",
|
186 |
-
"</div>"
|
187 |
-
],
|
188 |
-
"text/plain": [
|
189 |
-
" HourUTC HourDK PriceArea SpotPriceDKK \\\n",
|
190 |
-
"0 2023-12-31T22:00:00 2023-12-31T23:00:00 DK1 200.309998 \n",
|
191 |
-
"1 2023-12-31T21:00:00 2023-12-31T22:00:00 DK1 213.729996 \n",
|
192 |
-
"2 2023-12-31T20:00:00 2023-12-31T21:00:00 DK1 220.660004 \n",
|
193 |
-
"3 2023-12-31T19:00:00 2023-12-31T20:00:00 DK1 260.100006 \n",
|
194 |
-
"4 2023-12-31T18:00:00 2023-12-31T19:00:00 DK1 295.510010 \n",
|
195 |
-
"\n",
|
196 |
-
" SpotPriceEUR \n",
|
197 |
-
"0 26.870001 \n",
|
198 |
-
"1 28.670000 \n",
|
199 |
-
"2 29.600000 \n",
|
200 |
-
"3 34.889999 \n",
|
201 |
-
"4 39.639999 "
|
202 |
-
]
|
203 |
-
},
|
204 |
-
"execution_count": 6,
|
205 |
-
"metadata": {},
|
206 |
-
"output_type": "execute_result"
|
207 |
-
}
|
208 |
-
],
|
209 |
-
"source": [
|
210 |
-
"# Display the first 5 rows of the dataframe\n",
|
211 |
-
"electricity_df.head()"
|
212 |
-
]
|
213 |
-
},
|
214 |
-
{
|
215 |
-
"cell_type": "code",
|
216 |
-
"execution_count": 7,
|
217 |
-
"metadata": {},
|
218 |
-
"outputs": [],
|
219 |
-
"source": [
|
220 |
-
"# Datapreprocessing by making the spotprice per kwh instead of mwh\n",
|
221 |
-
"electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000"
|
222 |
-
]
|
223 |
-
},
|
224 |
-
{
|
225 |
-
"cell_type": "code",
|
226 |
-
"execution_count": 8,
|
227 |
-
"metadata": {},
|
228 |
-
"outputs": [],
|
229 |
-
"source": [
|
230 |
-
"# Datacleaning by removing the columns that are not needed\n",
|
231 |
-
"electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)\n",
|
232 |
-
"electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)\n",
|
233 |
-
"electricity_df.drop('HourUTC', axis=1, inplace=True)"
|
234 |
-
]
|
235 |
-
},
|
236 |
-
{
|
237 |
-
"cell_type": "code",
|
238 |
-
"execution_count": 9,
|
239 |
-
"metadata": {},
|
240 |
-
"outputs": [],
|
241 |
-
"source": [
|
242 |
-
"# Renaming the columns and reformating the time column\n",
|
243 |
-
"electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)"
|
244 |
-
]
|
245 |
-
},
|
246 |
-
{
|
247 |
-
"cell_type": "code",
|
248 |
-
"execution_count": 10,
|
249 |
-
"metadata": {},
|
250 |
-
"outputs": [],
|
251 |
-
"source": [
|
252 |
-
"# Formatting the date column\n",
|
253 |
-
"electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]\n",
|
254 |
-
"electricity_df['date'] = electricity_df['time'].str[:10]"
|
255 |
-
]
|
256 |
-
},
|
257 |
-
{
|
258 |
-
"cell_type": "code",
|
259 |
-
"execution_count": 11,
|
260 |
-
"metadata": {},
|
261 |
-
"outputs": [],
|
262 |
-
"source": [
|
263 |
-
"# Creating a new column for the date called electricity_temporary_date_column and insert it as the first column in the dataframe\n",
|
264 |
-
"electricity_temporary_date_column = electricity_df.pop('date')\n",
|
265 |
-
"electricity_df.insert(0, 'date', electricity_temporary_date_column)"
|
266 |
-
]
|
267 |
-
},
|
268 |
-
{
|
269 |
-
"cell_type": "code",
|
270 |
-
"execution_count": 12,
|
271 |
-
"metadata": {},
|
272 |
-
"outputs": [],
|
273 |
-
"source": [
|
274 |
-
"# Convert string 'date' column to date type and 'time' column to datetime format\n",
|
275 |
-
"electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date\n",
|
276 |
-
"electricity_df['time'] = pd.to_datetime(electricity_df['time'])"
|
277 |
-
]
|
278 |
-
},
|
279 |
-
{
|
280 |
-
"cell_type": "code",
|
281 |
-
"execution_count": 13,
|
282 |
-
"metadata": {},
|
283 |
-
"outputs": [
|
284 |
-
{
|
285 |
-
"data": {
|
286 |
-
"text/html": [
|
287 |
-
"<div>\n",
|
288 |
-
"<style scoped>\n",
|
289 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
290 |
-
" vertical-align: middle;\n",
|
291 |
-
" }\n",
|
292 |
-
"\n",
|
293 |
-
" .dataframe tbody tr th {\n",
|
294 |
-
" vertical-align: top;\n",
|
295 |
-
" }\n",
|
296 |
-
"\n",
|
297 |
-
" .dataframe thead th {\n",
|
298 |
-
" text-align: right;\n",
|
299 |
-
" }\n",
|
300 |
-
"</style>\n",
|
301 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
302 |
-
" <thead>\n",
|
303 |
-
" <tr style=\"text-align: right;\">\n",
|
304 |
-
" <th></th>\n",
|
305 |
-
" <th>date</th>\n",
|
306 |
-
" <th>time</th>\n",
|
307 |
-
" <th>PriceArea</th>\n",
|
308 |
-
" <th>SpotPriceDKK_KWH</th>\n",
|
309 |
-
" </tr>\n",
|
310 |
-
" </thead>\n",
|
311 |
-
" <tbody>\n",
|
312 |
-
" <tr>\n",
|
313 |
-
" <th>0</th>\n",
|
314 |
-
" <td>2023-12-31</td>\n",
|
315 |
-
" <td>2023-12-31 23:00:00</td>\n",
|
316 |
-
" <td>DK1</td>\n",
|
317 |
-
" <td>0.20031</td>\n",
|
318 |
-
" </tr>\n",
|
319 |
-
" <tr>\n",
|
320 |
-
" <th>1</th>\n",
|
321 |
-
" <td>2023-12-31</td>\n",
|
322 |
-
" <td>2023-12-31 22:00:00</td>\n",
|
323 |
-
" <td>DK1</td>\n",
|
324 |
-
" <td>0.21373</td>\n",
|
325 |
-
" </tr>\n",
|
326 |
-
" <tr>\n",
|
327 |
-
" <th>2</th>\n",
|
328 |
-
" <td>2023-12-31</td>\n",
|
329 |
-
" <td>2023-12-31 21:00:00</td>\n",
|
330 |
-
" <td>DK1</td>\n",
|
331 |
-
" <td>0.22066</td>\n",
|
332 |
-
" </tr>\n",
|
333 |
-
" <tr>\n",
|
334 |
-
" <th>3</th>\n",
|
335 |
-
" <td>2023-12-31</td>\n",
|
336 |
-
" <td>2023-12-31 20:00:00</td>\n",
|
337 |
-
" <td>DK1</td>\n",
|
338 |
-
" <td>0.26010</td>\n",
|
339 |
-
" </tr>\n",
|
340 |
-
" <tr>\n",
|
341 |
-
" <th>4</th>\n",
|
342 |
-
" <td>2023-12-31</td>\n",
|
343 |
-
" <td>2023-12-31 19:00:00</td>\n",
|
344 |
-
" <td>DK1</td>\n",
|
345 |
-
" <td>0.29551</td>\n",
|
346 |
-
" </tr>\n",
|
347 |
-
" </tbody>\n",
|
348 |
-
"</table>\n",
|
349 |
-
"</div>"
|
350 |
-
],
|
351 |
-
"text/plain": [
|
352 |
-
" date time PriceArea SpotPriceDKK_KWH\n",
|
353 |
-
"0 2023-12-31 2023-12-31 23:00:00 DK1 0.20031\n",
|
354 |
-
"1 2023-12-31 2023-12-31 22:00:00 DK1 0.21373\n",
|
355 |
-
"2 2023-12-31 2023-12-31 21:00:00 DK1 0.22066\n",
|
356 |
-
"3 2023-12-31 2023-12-31 20:00:00 DK1 0.26010\n",
|
357 |
-
"4 2023-12-31 2023-12-31 19:00:00 DK1 0.29551"
|
358 |
-
]
|
359 |
-
},
|
360 |
-
"execution_count": 13,
|
361 |
-
"metadata": {},
|
362 |
-
"output_type": "execute_result"
|
363 |
-
}
|
364 |
-
],
|
365 |
-
"source": [
|
366 |
-
"# Display the first 5 rows of the dataframe\n",
|
367 |
-
"electricity_df.head()"
|
368 |
-
]
|
369 |
-
},
|
370 |
-
{
|
371 |
-
"cell_type": "code",
|
372 |
-
"execution_count": 14,
|
373 |
-
"metadata": {},
|
374 |
-
"outputs": [
|
375 |
-
{
|
376 |
-
"name": "stdout",
|
377 |
-
"output_type": "stream",
|
378 |
-
"text": [
|
379 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
380 |
-
"RangeIndex: 17520 entries, 0 to 17519\n",
|
381 |
-
"Data columns (total 4 columns):\n",
|
382 |
-
" # Column Non-Null Count Dtype \n",
|
383 |
-
"--- ------ -------------- ----- \n",
|
384 |
-
" 0 date 17520 non-null object \n",
|
385 |
-
" 1 time 17520 non-null datetime64[ns]\n",
|
386 |
-
" 2 PriceArea 17520 non-null object \n",
|
387 |
-
" 3 SpotPriceDKK_KWH 17520 non-null float64 \n",
|
388 |
-
"dtypes: datetime64[ns](1), float64(1), object(2)\n",
|
389 |
-
"memory usage: 547.6+ KB\n"
|
390 |
-
]
|
391 |
-
}
|
392 |
-
],
|
393 |
-
"source": [
|
394 |
-
"# Showing the information for the electricity dataframe\n",
|
395 |
-
"electricity_df.info()"
|
396 |
-
]
|
397 |
-
},
|
398 |
-
{
|
399 |
-
"cell_type": "code",
|
400 |
-
"execution_count": 15,
|
401 |
-
"metadata": {},
|
402 |
-
"outputs": [
|
403 |
-
{
|
404 |
-
"data": {
|
405 |
-
"text/html": [
|
406 |
-
"<div>\n",
|
407 |
-
"<style scoped>\n",
|
408 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
409 |
-
" vertical-align: middle;\n",
|
410 |
-
" }\n",
|
411 |
-
"\n",
|
412 |
-
" .dataframe tbody tr th {\n",
|
413 |
-
" vertical-align: top;\n",
|
414 |
-
" }\n",
|
415 |
-
"\n",
|
416 |
-
" .dataframe thead th {\n",
|
417 |
-
" text-align: right;\n",
|
418 |
-
" }\n",
|
419 |
-
"</style>\n",
|
420 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
421 |
-
" <thead>\n",
|
422 |
-
" <tr style=\"text-align: right;\">\n",
|
423 |
-
" <th></th>\n",
|
424 |
-
" <th>date</th>\n",
|
425 |
-
" <th>time</th>\n",
|
426 |
-
" <th>PriceArea</th>\n",
|
427 |
-
" <th>SpotPriceDKK_KWH</th>\n",
|
428 |
-
" </tr>\n",
|
429 |
-
" </thead>\n",
|
430 |
-
" <tbody>\n",
|
431 |
-
" <tr>\n",
|
432 |
-
" <th>0</th>\n",
|
433 |
-
" <td>2023-12-31</td>\n",
|
434 |
-
" <td>2023-12-31 23:00:00</td>\n",
|
435 |
-
" <td>DK1</td>\n",
|
436 |
-
" <td>0.20031</td>\n",
|
437 |
-
" </tr>\n",
|
438 |
-
" <tr>\n",
|
439 |
-
" <th>2</th>\n",
|
440 |
-
" <td>2023-12-31</td>\n",
|
441 |
-
" <td>2023-12-31 22:00:00</td>\n",
|
442 |
-
" <td>DK1</td>\n",
|
443 |
-
" <td>0.21373</td>\n",
|
444 |
-
" </tr>\n",
|
445 |
-
" <tr>\n",
|
446 |
-
" <th>4</th>\n",
|
447 |
-
" <td>2023-12-31</td>\n",
|
448 |
-
" <td>2023-12-31 21:00:00</td>\n",
|
449 |
-
" <td>DK1</td>\n",
|
450 |
-
" <td>0.22066</td>\n",
|
451 |
-
" </tr>\n",
|
452 |
-
" <tr>\n",
|
453 |
-
" <th>6</th>\n",
|
454 |
-
" <td>2023-12-31</td>\n",
|
455 |
-
" <td>2023-12-31 20:00:00</td>\n",
|
456 |
-
" <td>DK1</td>\n",
|
457 |
-
" <td>0.26010</td>\n",
|
458 |
-
" </tr>\n",
|
459 |
-
" <tr>\n",
|
460 |
-
" <th>8</th>\n",
|
461 |
-
" <td>2023-12-31</td>\n",
|
462 |
-
" <td>2023-12-31 19:00:00</td>\n",
|
463 |
-
" <td>DK1</td>\n",
|
464 |
-
" <td>0.29551</td>\n",
|
465 |
-
" </tr>\n",
|
466 |
-
" </tbody>\n",
|
467 |
-
"</table>\n",
|
468 |
-
"</div>"
|
469 |
-
],
|
470 |
-
"text/plain": [
|
471 |
-
" date time PriceArea SpotPriceDKK_KWH\n",
|
472 |
-
"0 2023-12-31 2023-12-31 23:00:00 DK1 0.20031\n",
|
473 |
-
"2 2023-12-31 2023-12-31 22:00:00 DK1 0.21373\n",
|
474 |
-
"4 2023-12-31 2023-12-31 21:00:00 DK1 0.22066\n",
|
475 |
-
"6 2023-12-31 2023-12-31 20:00:00 DK1 0.26010\n",
|
476 |
-
"8 2023-12-31 2023-12-31 19:00:00 DK1 0.29551"
|
477 |
-
]
|
478 |
-
},
|
479 |
-
"execution_count": 15,
|
480 |
-
"metadata": {},
|
481 |
-
"output_type": "execute_result"
|
482 |
-
}
|
483 |
-
],
|
484 |
-
"source": [
|
485 |
-
"# Fetching historical electricity prices data\n",
|
486 |
-
"electricity = electricity_prices.fetch_electricity_prices(historical=True, start='2022-01-01', end='2023-12-31')\n",
|
487 |
-
"electricity = electricity[(electricity['PriceArea'] == \"DK1\")]\n",
|
488 |
-
"electricity.head()"
|
489 |
-
]
|
490 |
-
},
|
491 |
-
{
|
492 |
-
"cell_type": "markdown",
|
493 |
-
"metadata": {},
|
494 |
-
"source": [
|
495 |
-
"### <span style=\"color:#2656a3;\"> 🌤 Weather measurements from Open Meteo\n",
|
496 |
-
"\n",
|
497 |
-
"Burde have enddate 2023-12-31. url = (\"https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2022-01-01&end_date=2023-12-31&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m\")"
|
498 |
-
]
|
499 |
-
},
|
500 |
-
{
|
501 |
-
"cell_type": "code",
|
502 |
-
"execution_count": 16,
|
503 |
-
"metadata": {},
|
504 |
-
"outputs": [],
|
505 |
-
"source": [
|
506 |
-
"# Defining the URL for the API call to the weather data \n",
|
507 |
-
"weather_api_url = (\"https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2022-01-01&end_date=2023-12-31&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m\")"
|
508 |
-
]
|
509 |
-
},
|
510 |
-
{
|
511 |
-
"cell_type": "code",
|
512 |
-
"execution_count": 17,
|
513 |
-
"metadata": {},
|
514 |
-
"outputs": [
|
515 |
-
{
|
516 |
-
"name": "stdout",
|
517 |
-
"output_type": "stream",
|
518 |
-
"text": [
|
519 |
-
"<Response [200]>\n"
|
520 |
-
]
|
521 |
-
}
|
522 |
-
],
|
523 |
-
"source": [
|
524 |
-
"# Fetch data from the API and make the output to a pandas dataframe\n",
|
525 |
-
"weather_data_response = requests.get(weather_api_url)\n",
|
526 |
-
"weather_data = weather_data_response.json()\n",
|
527 |
-
"weather_df = pd.DataFrame(weather_data['hourly'])\n",
|
528 |
-
"\n",
|
529 |
-
"# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
|
530 |
-
"print(weather_data_response)"
|
531 |
-
]
|
532 |
-
},
|
533 |
-
{
|
534 |
-
"cell_type": "code",
|
535 |
-
"execution_count": 18,
|
536 |
-
"metadata": {},
|
537 |
-
"outputs": [],
|
538 |
-
"source": [
|
539 |
-
"# Formatting the date column\n",
|
540 |
-
"weather_df['date'] = weather_df['time'].str[:10]"
|
541 |
-
]
|
542 |
-
},
|
543 |
-
{
|
544 |
-
"cell_type": "code",
|
545 |
-
"execution_count": 19,
|
546 |
-
"metadata": {},
|
547 |
-
"outputs": [],
|
548 |
-
"source": [
|
549 |
-
"# Creating a new column for the date called weather_temporary_date_column and insert it as the first column in the dataframe\n",
|
550 |
-
"weather_temporary_date_column = weather_df.pop('date')\n",
|
551 |
-
"weather_df.insert(0, 'date', weather_temporary_date_column)"
|
552 |
-
]
|
553 |
-
},
|
554 |
-
{
|
555 |
-
"cell_type": "code",
|
556 |
-
"execution_count": 20,
|
557 |
-
"metadata": {},
|
558 |
-
"outputs": [],
|
559 |
-
"source": [
|
560 |
-
"# Convert string 'date' column to date type and 'time' column to datetime format\n",
|
561 |
-
"weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date\n",
|
562 |
-
"weather_df['time'] = pd.to_datetime(weather_df['time'])"
|
563 |
-
]
|
564 |
-
},
|
565 |
-
{
|
566 |
-
"cell_type": "code",
|
567 |
-
"execution_count": 21,
|
568 |
-
"metadata": {},
|
569 |
-
"outputs": [
|
570 |
-
{
|
571 |
-
"data": {
|
572 |
-
"text/html": [
|
573 |
-
"<div>\n",
|
574 |
-
"<style scoped>\n",
|
575 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
576 |
-
" vertical-align: middle;\n",
|
577 |
-
" }\n",
|
578 |
-
"\n",
|
579 |
-
" .dataframe tbody tr th {\n",
|
580 |
-
" vertical-align: top;\n",
|
581 |
-
" }\n",
|
582 |
-
"\n",
|
583 |
-
" .dataframe thead th {\n",
|
584 |
-
" text-align: right;\n",
|
585 |
-
" }\n",
|
586 |
-
"</style>\n",
|
587 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
588 |
-
" <thead>\n",
|
589 |
-
" <tr style=\"text-align: right;\">\n",
|
590 |
-
" <th></th>\n",
|
591 |
-
" <th>date</th>\n",
|
592 |
-
" <th>time</th>\n",
|
593 |
-
" <th>temperature_2m</th>\n",
|
594 |
-
" <th>relative_humidity_2m</th>\n",
|
595 |
-
" <th>precipitation</th>\n",
|
596 |
-
" <th>rain</th>\n",
|
597 |
-
" <th>snowfall</th>\n",
|
598 |
-
" <th>weather_code</th>\n",
|
599 |
-
" <th>cloud_cover</th>\n",
|
600 |
-
" <th>wind_speed_10m</th>\n",
|
601 |
-
" <th>wind_gusts_10m</th>\n",
|
602 |
-
" </tr>\n",
|
603 |
-
" </thead>\n",
|
604 |
-
" <tbody>\n",
|
605 |
-
" <tr>\n",
|
606 |
-
" <th>0</th>\n",
|
607 |
-
" <td>2022-01-01</td>\n",
|
608 |
-
" <td>2022-01-01 00:00:00</td>\n",
|
609 |
-
" <td>6.7</td>\n",
|
610 |
-
" <td>100</td>\n",
|
611 |
-
" <td>0.0</td>\n",
|
612 |
-
" <td>0.0</td>\n",
|
613 |
-
" <td>0.0</td>\n",
|
614 |
-
" <td>3</td>\n",
|
615 |
-
" <td>100</td>\n",
|
616 |
-
" <td>16.2</td>\n",
|
617 |
-
" <td>36.0</td>\n",
|
618 |
-
" </tr>\n",
|
619 |
-
" <tr>\n",
|
620 |
-
" <th>1</th>\n",
|
621 |
-
" <td>2022-01-01</td>\n",
|
622 |
-
" <td>2022-01-01 01:00:00</td>\n",
|
623 |
-
" <td>6.6</td>\n",
|
624 |
-
" <td>100</td>\n",
|
625 |
-
" <td>0.0</td>\n",
|
626 |
-
" <td>0.0</td>\n",
|
627 |
-
" <td>0.0</td>\n",
|
628 |
-
" <td>3</td>\n",
|
629 |
-
" <td>100</td>\n",
|
630 |
-
" <td>16.2</td>\n",
|
631 |
-
" <td>30.2</td>\n",
|
632 |
-
" </tr>\n",
|
633 |
-
" <tr>\n",
|
634 |
-
" <th>2</th>\n",
|
635 |
-
" <td>2022-01-01</td>\n",
|
636 |
-
" <td>2022-01-01 02:00:00</td>\n",
|
637 |
-
" <td>6.7</td>\n",
|
638 |
-
" <td>99</td>\n",
|
639 |
-
" <td>0.0</td>\n",
|
640 |
-
" <td>0.0</td>\n",
|
641 |
-
" <td>0.0</td>\n",
|
642 |
-
" <td>3</td>\n",
|
643 |
-
" <td>100</td>\n",
|
644 |
-
" <td>15.5</td>\n",
|
645 |
-
" <td>30.6</td>\n",
|
646 |
-
" </tr>\n",
|
647 |
-
" <tr>\n",
|
648 |
-
" <th>3</th>\n",
|
649 |
-
" <td>2022-01-01</td>\n",
|
650 |
-
" <td>2022-01-01 03:00:00</td>\n",
|
651 |
-
" <td>6.7</td>\n",
|
652 |
-
" <td>100</td>\n",
|
653 |
-
" <td>0.0</td>\n",
|
654 |
-
" <td>0.0</td>\n",
|
655 |
-
" <td>0.0</td>\n",
|
656 |
-
" <td>3</td>\n",
|
657 |
-
" <td>100</td>\n",
|
658 |
-
" <td>12.7</td>\n",
|
659 |
-
" <td>28.8</td>\n",
|
660 |
-
" </tr>\n",
|
661 |
-
" <tr>\n",
|
662 |
-
" <th>4</th>\n",
|
663 |
-
" <td>2022-01-01</td>\n",
|
664 |
-
" <td>2022-01-01 04:00:00</td>\n",
|
665 |
-
" <td>6.7</td>\n",
|
666 |
-
" <td>99</td>\n",
|
667 |
-
" <td>0.0</td>\n",
|
668 |
-
" <td>0.0</td>\n",
|
669 |
-
" <td>0.0</td>\n",
|
670 |
-
" <td>3</td>\n",
|
671 |
-
" <td>100</td>\n",
|
672 |
-
" <td>10.6</td>\n",
|
673 |
-
" <td>23.8</td>\n",
|
674 |
-
" </tr>\n",
|
675 |
-
" </tbody>\n",
|
676 |
-
"</table>\n",
|
677 |
-
"</div>"
|
678 |
-
],
|
679 |
-
"text/plain": [
|
680 |
-
" date time temperature_2m relative_humidity_2m \\\n",
|
681 |
-
"0 2022-01-01 2022-01-01 00:00:00 6.7 100 \n",
|
682 |
-
"1 2022-01-01 2022-01-01 01:00:00 6.6 100 \n",
|
683 |
-
"2 2022-01-01 2022-01-01 02:00:00 6.7 99 \n",
|
684 |
-
"3 2022-01-01 2022-01-01 03:00:00 6.7 100 \n",
|
685 |
-
"4 2022-01-01 2022-01-01 04:00:00 6.7 99 \n",
|
686 |
-
"\n",
|
687 |
-
" precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
|
688 |
-
"0 0.0 0.0 0.0 3 100 16.2 \n",
|
689 |
-
"1 0.0 0.0 0.0 3 100 16.2 \n",
|
690 |
-
"2 0.0 0.0 0.0 3 100 15.5 \n",
|
691 |
-
"3 0.0 0.0 0.0 3 100 12.7 \n",
|
692 |
-
"4 0.0 0.0 0.0 3 100 10.6 \n",
|
693 |
-
"\n",
|
694 |
-
" wind_gusts_10m \n",
|
695 |
-
"0 36.0 \n",
|
696 |
-
"1 30.2 \n",
|
697 |
-
"2 30.6 \n",
|
698 |
-
"3 28.8 \n",
|
699 |
-
"4 23.8 "
|
700 |
-
]
|
701 |
-
},
|
702 |
-
"execution_count": 21,
|
703 |
-
"metadata": {},
|
704 |
-
"output_type": "execute_result"
|
705 |
-
}
|
706 |
-
],
|
707 |
-
"source": [
|
708 |
-
"# Display the first 5 rows of the dataframe\n",
|
709 |
-
"weather_df.head()"
|
710 |
-
]
|
711 |
-
},
|
712 |
-
{
|
713 |
-
"cell_type": "code",
|
714 |
-
"execution_count": 22,
|
715 |
-
"metadata": {},
|
716 |
-
"outputs": [
|
717 |
-
{
|
718 |
-
"name": "stdout",
|
719 |
-
"output_type": "stream",
|
720 |
-
"text": [
|
721 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
722 |
-
"RangeIndex: 17520 entries, 0 to 17519\n",
|
723 |
-
"Data columns (total 11 columns):\n",
|
724 |
-
" # Column Non-Null Count Dtype \n",
|
725 |
-
"--- ------ -------------- ----- \n",
|
726 |
-
" 0 date 17520 non-null object \n",
|
727 |
-
" 1 time 17520 non-null datetime64[ns]\n",
|
728 |
-
" 2 temperature_2m 17520 non-null float64 \n",
|
729 |
-
" 3 relative_humidity_2m 17520 non-null int64 \n",
|
730 |
-
" 4 precipitation 17520 non-null float64 \n",
|
731 |
-
" 5 rain 17520 non-null float64 \n",
|
732 |
-
" 6 snowfall 17520 non-null float64 \n",
|
733 |
-
" 7 weather_code 17520 non-null int64 \n",
|
734 |
-
" 8 cloud_cover 17520 non-null int64 \n",
|
735 |
-
" 9 wind_speed_10m 17520 non-null float64 \n",
|
736 |
-
" 10 wind_gusts_10m 17520 non-null float64 \n",
|
737 |
-
"dtypes: datetime64[ns](1), float64(6), int64(3), object(1)\n",
|
738 |
-
"memory usage: 1.5+ MB\n"
|
739 |
-
]
|
740 |
-
}
|
741 |
-
],
|
742 |
-
"source": [
|
743 |
-
"# Showing the information for the weather dataframe\n",
|
744 |
-
"weather_df.info()"
|
745 |
-
]
|
746 |
-
},
|
747 |
-
{
|
748 |
-
"cell_type": "code",
|
749 |
-
"execution_count": 23,
|
750 |
-
"metadata": {},
|
751 |
-
"outputs": [
|
752 |
-
{
|
753 |
-
"data": {
|
754 |
-
"text/html": [
|
755 |
-
"<div>\n",
|
756 |
-
"<style scoped>\n",
|
757 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
758 |
-
" vertical-align: middle;\n",
|
759 |
-
" }\n",
|
760 |
-
"\n",
|
761 |
-
" .dataframe tbody tr th {\n",
|
762 |
-
" vertical-align: top;\n",
|
763 |
-
" }\n",
|
764 |
-
"\n",
|
765 |
-
" .dataframe thead th {\n",
|
766 |
-
" text-align: right;\n",
|
767 |
-
" }\n",
|
768 |
-
"</style>\n",
|
769 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
770 |
-
" <thead>\n",
|
771 |
-
" <tr style=\"text-align: right;\">\n",
|
772 |
-
" <th></th>\n",
|
773 |
-
" <th>date</th>\n",
|
774 |
-
" <th>time</th>\n",
|
775 |
-
" <th>temperature_2m</th>\n",
|
776 |
-
" <th>relative_humidity_2m</th>\n",
|
777 |
-
" <th>precipitation</th>\n",
|
778 |
-
" <th>rain</th>\n",
|
779 |
-
" <th>snowfall</th>\n",
|
780 |
-
" <th>weather_code</th>\n",
|
781 |
-
" <th>cloud_cover</th>\n",
|
782 |
-
" <th>wind_speed_10m</th>\n",
|
783 |
-
" <th>wind_gusts_10m</th>\n",
|
784 |
-
" </tr>\n",
|
785 |
-
" </thead>\n",
|
786 |
-
" <tbody>\n",
|
787 |
-
" <tr>\n",
|
788 |
-
" <th>0</th>\n",
|
789 |
-
" <td>2022-01-01</td>\n",
|
790 |
-
" <td>2022-01-01 00:00:00</td>\n",
|
791 |
-
" <td>6.7</td>\n",
|
792 |
-
" <td>100</td>\n",
|
793 |
-
" <td>0.0</td>\n",
|
794 |
-
" <td>0.0</td>\n",
|
795 |
-
" <td>0.0</td>\n",
|
796 |
-
" <td>3</td>\n",
|
797 |
-
" <td>100</td>\n",
|
798 |
-
" <td>16.2</td>\n",
|
799 |
-
" <td>36.0</td>\n",
|
800 |
-
" </tr>\n",
|
801 |
-
" <tr>\n",
|
802 |
-
" <th>1</th>\n",
|
803 |
-
" <td>2022-01-01</td>\n",
|
804 |
-
" <td>2022-01-01 01:00:00</td>\n",
|
805 |
-
" <td>6.6</td>\n",
|
806 |
-
" <td>100</td>\n",
|
807 |
-
" <td>0.0</td>\n",
|
808 |
-
" <td>0.0</td>\n",
|
809 |
-
" <td>0.0</td>\n",
|
810 |
-
" <td>3</td>\n",
|
811 |
-
" <td>100</td>\n",
|
812 |
-
" <td>16.2</td>\n",
|
813 |
-
" <td>30.2</td>\n",
|
814 |
-
" </tr>\n",
|
815 |
-
" <tr>\n",
|
816 |
-
" <th>2</th>\n",
|
817 |
-
" <td>2022-01-01</td>\n",
|
818 |
-
" <td>2022-01-01 02:00:00</td>\n",
|
819 |
-
" <td>6.7</td>\n",
|
820 |
-
" <td>99</td>\n",
|
821 |
-
" <td>0.0</td>\n",
|
822 |
-
" <td>0.0</td>\n",
|
823 |
-
" <td>0.0</td>\n",
|
824 |
-
" <td>3</td>\n",
|
825 |
-
" <td>100</td>\n",
|
826 |
-
" <td>15.5</td>\n",
|
827 |
-
" <td>30.6</td>\n",
|
828 |
-
" </tr>\n",
|
829 |
-
" <tr>\n",
|
830 |
-
" <th>3</th>\n",
|
831 |
-
" <td>2022-01-01</td>\n",
|
832 |
-
" <td>2022-01-01 03:00:00</td>\n",
|
833 |
-
" <td>6.7</td>\n",
|
834 |
-
" <td>100</td>\n",
|
835 |
-
" <td>0.0</td>\n",
|
836 |
-
" <td>0.0</td>\n",
|
837 |
-
" <td>0.0</td>\n",
|
838 |
-
" <td>3</td>\n",
|
839 |
-
" <td>100</td>\n",
|
840 |
-
" <td>12.7</td>\n",
|
841 |
-
" <td>28.8</td>\n",
|
842 |
-
" </tr>\n",
|
843 |
-
" <tr>\n",
|
844 |
-
" <th>4</th>\n",
|
845 |
-
" <td>2022-01-01</td>\n",
|
846 |
-
" <td>2022-01-01 04:00:00</td>\n",
|
847 |
-
" <td>6.7</td>\n",
|
848 |
-
" <td>99</td>\n",
|
849 |
-
" <td>0.0</td>\n",
|
850 |
-
" <td>0.0</td>\n",
|
851 |
-
" <td>0.0</td>\n",
|
852 |
-
" <td>3</td>\n",
|
853 |
-
" <td>100</td>\n",
|
854 |
-
" <td>10.6</td>\n",
|
855 |
-
" <td>23.8</td>\n",
|
856 |
-
" </tr>\n",
|
857 |
-
" </tbody>\n",
|
858 |
-
"</table>\n",
|
859 |
-
"</div>"
|
860 |
-
],
|
861 |
-
"text/plain": [
|
862 |
-
" date time temperature_2m relative_humidity_2m \\\n",
|
863 |
-
"0 2022-01-01 2022-01-01 00:00:00 6.7 100 \n",
|
864 |
-
"1 2022-01-01 2022-01-01 01:00:00 6.6 100 \n",
|
865 |
-
"2 2022-01-01 2022-01-01 02:00:00 6.7 99 \n",
|
866 |
-
"3 2022-01-01 2022-01-01 03:00:00 6.7 100 \n",
|
867 |
-
"4 2022-01-01 2022-01-01 04:00:00 6.7 99 \n",
|
868 |
-
"\n",
|
869 |
-
" precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
|
870 |
-
"0 0.0 0.0 0.0 3 100 16.2 \n",
|
871 |
-
"1 0.0 0.0 0.0 3 100 16.2 \n",
|
872 |
-
"2 0.0 0.0 0.0 3 100 15.5 \n",
|
873 |
-
"3 0.0 0.0 0.0 3 100 12.7 \n",
|
874 |
-
"4 0.0 0.0 0.0 3 100 10.6 \n",
|
875 |
-
"\n",
|
876 |
-
" wind_gusts_10m \n",
|
877 |
-
"0 36.0 \n",
|
878 |
-
"1 30.2 \n",
|
879 |
-
"2 30.6 \n",
|
880 |
-
"3 28.8 \n",
|
881 |
-
"4 23.8 "
|
882 |
-
]
|
883 |
-
},
|
884 |
-
"execution_count": 23,
|
885 |
-
"metadata": {},
|
886 |
-
"output_type": "execute_result"
|
887 |
-
}
|
888 |
-
],
|
889 |
-
"source": [
|
890 |
-
"# Fetching historical electricity prices data\n",
|
891 |
-
"weater = weater_measures.fetch_weater_measures()\n",
|
892 |
-
"weater.head()"
|
893 |
-
]
|
894 |
-
},
|
895 |
-
{
|
896 |
-
"cell_type": "markdown",
|
897 |
-
"metadata": {},
|
898 |
-
"source": [
|
899 |
-
"### <span style=\"color:#2656a3;\"> 🗓️ Calendar of Danish workdays and holidays "
|
900 |
-
]
|
901 |
-
},
|
902 |
-
{
|
903 |
-
"cell_type": "code",
|
904 |
-
"execution_count": 24,
|
905 |
-
"metadata": {},
|
906 |
-
"outputs": [
|
907 |
-
{
|
908 |
-
"data": {
|
909 |
-
"text/html": [
|
910 |
-
"<div>\n",
|
911 |
-
"<style scoped>\n",
|
912 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
913 |
-
" vertical-align: middle;\n",
|
914 |
-
" }\n",
|
915 |
-
"\n",
|
916 |
-
" .dataframe tbody tr th {\n",
|
917 |
-
" vertical-align: top;\n",
|
918 |
-
" }\n",
|
919 |
-
"\n",
|
920 |
-
" .dataframe thead th {\n",
|
921 |
-
" text-align: right;\n",
|
922 |
-
" }\n",
|
923 |
-
"</style>\n",
|
924 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
925 |
-
" <thead>\n",
|
926 |
-
" <tr style=\"text-align: right;\">\n",
|
927 |
-
" <th></th>\n",
|
928 |
-
" <th>date</th>\n",
|
929 |
-
" <th>type</th>\n",
|
930 |
-
" </tr>\n",
|
931 |
-
" </thead>\n",
|
932 |
-
" <tbody>\n",
|
933 |
-
" <tr>\n",
|
934 |
-
" <th>0</th>\n",
|
935 |
-
" <td>01/01/2022</td>\n",
|
936 |
-
" <td>Not a Workday</td>\n",
|
937 |
-
" </tr>\n",
|
938 |
-
" <tr>\n",
|
939 |
-
" <th>1</th>\n",
|
940 |
-
" <td>02/01/2022</td>\n",
|
941 |
-
" <td>Not a Workday</td>\n",
|
942 |
-
" </tr>\n",
|
943 |
-
" <tr>\n",
|
944 |
-
" <th>2</th>\n",
|
945 |
-
" <td>03/01/2022</td>\n",
|
946 |
-
" <td>Workday</td>\n",
|
947 |
-
" </tr>\n",
|
948 |
-
" <tr>\n",
|
949 |
-
" <th>3</th>\n",
|
950 |
-
" <td>04/01/2022</td>\n",
|
951 |
-
" <td>Workday</td>\n",
|
952 |
-
" </tr>\n",
|
953 |
-
" <tr>\n",
|
954 |
-
" <th>4</th>\n",
|
955 |
-
" <td>05/01/2022</td>\n",
|
956 |
-
" <td>Workday</td>\n",
|
957 |
-
" </tr>\n",
|
958 |
-
" <tr>\n",
|
959 |
-
" <th>...</th>\n",
|
960 |
-
" <td>...</td>\n",
|
961 |
-
" <td>...</td>\n",
|
962 |
-
" </tr>\n",
|
963 |
-
" <tr>\n",
|
964 |
-
" <th>1091</th>\n",
|
965 |
-
" <td>27/12/2024</td>\n",
|
966 |
-
" <td>Workday</td>\n",
|
967 |
-
" </tr>\n",
|
968 |
-
" <tr>\n",
|
969 |
-
" <th>1092</th>\n",
|
970 |
-
" <td>28/12/2024</td>\n",
|
971 |
-
" <td>Not a Workday</td>\n",
|
972 |
-
" </tr>\n",
|
973 |
-
" <tr>\n",
|
974 |
-
" <th>1093</th>\n",
|
975 |
-
" <td>29/12/2024</td>\n",
|
976 |
-
" <td>Not a Workday</td>\n",
|
977 |
-
" </tr>\n",
|
978 |
-
" <tr>\n",
|
979 |
-
" <th>1094</th>\n",
|
980 |
-
" <td>30/12/2024</td>\n",
|
981 |
-
" <td>Workday</td>\n",
|
982 |
-
" </tr>\n",
|
983 |
-
" <tr>\n",
|
984 |
-
" <th>1095</th>\n",
|
985 |
-
" <td>31/12/2024</td>\n",
|
986 |
-
" <td>Workday</td>\n",
|
987 |
-
" </tr>\n",
|
988 |
-
" </tbody>\n",
|
989 |
-
"</table>\n",
|
990 |
-
"<p>1096 rows × 2 columns</p>\n",
|
991 |
-
"</div>"
|
992 |
-
],
|
993 |
-
"text/plain": [
|
994 |
-
" date type\n",
|
995 |
-
"0 01/01/2022 Not a Workday\n",
|
996 |
-
"1 02/01/2022 Not a Workday\n",
|
997 |
-
"2 03/01/2022 Workday\n",
|
998 |
-
"3 04/01/2022 Workday\n",
|
999 |
-
"4 05/01/2022 Workday\n",
|
1000 |
-
"... ... ...\n",
|
1001 |
-
"1091 27/12/2024 Workday\n",
|
1002 |
-
"1092 28/12/2024 Not a Workday\n",
|
1003 |
-
"1093 29/12/2024 Not a Workday\n",
|
1004 |
-
"1094 30/12/2024 Workday\n",
|
1005 |
-
"1095 31/12/2024 Workday\n",
|
1006 |
-
"\n",
|
1007 |
-
"[1096 rows x 2 columns]"
|
1008 |
-
]
|
1009 |
-
},
|
1010 |
-
"execution_count": 24,
|
1011 |
-
"metadata": {},
|
1012 |
-
"output_type": "execute_result"
|
1013 |
-
}
|
1014 |
-
],
|
1015 |
-
"source": [
|
1016 |
-
"# Read csv file with calender\n",
|
1017 |
-
"calender_df = pd.read_csv('https://raw.githubusercontent.com/Camillahannesbo/MLOPs-Assignment-/main/data/calendar_incl_holiday.csv', delimiter=';', usecols=['date', 'type'])\n",
|
1018 |
-
" \n",
|
1019 |
-
"# Display the DataFrame\n",
|
1020 |
-
"calender_df"
|
1021 |
-
]
|
1022 |
-
},
|
1023 |
-
{
|
1024 |
-
"cell_type": "code",
|
1025 |
-
"execution_count": 25,
|
1026 |
-
"metadata": {},
|
1027 |
-
"outputs": [],
|
1028 |
-
"source": [
|
1029 |
-
"# Formatting the date column by replacing the / with -\n",
|
1030 |
-
"calender_df['date'] = calender_df['date'].str.replace('/', '-')"
|
1031 |
-
]
|
1032 |
-
},
|
1033 |
-
{
|
1034 |
-
"cell_type": "code",
|
1035 |
-
"execution_count": 26,
|
1036 |
-
"metadata": {},
|
1037 |
-
"outputs": [],
|
1038 |
-
"source": [
|
1039 |
-
"# Defining the function for flipping the date to the left\n",
|
1040 |
-
"\n",
|
1041 |
-
"# Flip the date to the left\n",
|
1042 |
-
"def flip_date_left(date):\n",
|
1043 |
-
" parts = date.split(\"-\") # Assuming the date format is \"YYYY-MM-DD\"\n",
|
1044 |
-
" flipped_date = \"-\".join(parts[::-1])\n",
|
1045 |
-
" return flipped_date\n",
|
1046 |
-
"\n",
|
1047 |
-
"# Flip the date to the left for the date column\n",
|
1048 |
-
"def flip_dates_left_in_column(column):\n",
|
1049 |
-
" flipped_column = [flip_date_left(date) for date in column]\n",
|
1050 |
-
" return flipped_column"
|
1051 |
-
]
|
1052 |
-
},
|
1053 |
-
{
|
1054 |
-
"cell_type": "code",
|
1055 |
-
"execution_count": 27,
|
1056 |
-
"metadata": {},
|
1057 |
-
"outputs": [],
|
1058 |
-
"source": [
|
1059 |
-
"# Make the new flipped_dates_column based on the function above and insert it as the 'date' column in the dataframe\n",
|
1060 |
-
"flipped_dates_column = flip_dates_left_in_column(calender_df['date'])\n",
|
1061 |
-
"calender_df['date'] = flipped_dates_column"
|
1062 |
-
]
|
1063 |
-
},
|
1064 |
-
{
|
1065 |
-
"cell_type": "code",
|
1066 |
-
"execution_count": 28,
|
1067 |
-
"metadata": {},
|
1068 |
-
"outputs": [],
|
1069 |
-
"source": [
|
1070 |
-
"# Convert string 'date' column to date type\n",
|
1071 |
-
"calender_df['date'] = pd.to_datetime(calender_df['date'], format='%Y-%m-%d').dt.date"
|
1072 |
-
]
|
1073 |
-
},
|
1074 |
-
{
|
1075 |
-
"cell_type": "code",
|
1076 |
-
"execution_count": 29,
|
1077 |
-
"metadata": {},
|
1078 |
-
"outputs": [
|
1079 |
-
{
|
1080 |
-
"data": {
|
1081 |
-
"text/html": [
|
1082 |
-
"<div>\n",
|
1083 |
-
"<style scoped>\n",
|
1084 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
1085 |
-
" vertical-align: middle;\n",
|
1086 |
-
" }\n",
|
1087 |
-
"\n",
|
1088 |
-
" .dataframe tbody tr th {\n",
|
1089 |
-
" vertical-align: top;\n",
|
1090 |
-
" }\n",
|
1091 |
-
"\n",
|
1092 |
-
" .dataframe thead th {\n",
|
1093 |
-
" text-align: right;\n",
|
1094 |
-
" }\n",
|
1095 |
-
"</style>\n",
|
1096 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
1097 |
-
" <thead>\n",
|
1098 |
-
" <tr style=\"text-align: right;\">\n",
|
1099 |
-
" <th></th>\n",
|
1100 |
-
" <th>date</th>\n",
|
1101 |
-
" <th>type</th>\n",
|
1102 |
-
" </tr>\n",
|
1103 |
-
" </thead>\n",
|
1104 |
-
" <tbody>\n",
|
1105 |
-
" <tr>\n",
|
1106 |
-
" <th>0</th>\n",
|
1107 |
-
" <td>2022-01-01</td>\n",
|
1108 |
-
" <td>Not a Workday</td>\n",
|
1109 |
-
" </tr>\n",
|
1110 |
-
" <tr>\n",
|
1111 |
-
" <th>1</th>\n",
|
1112 |
-
" <td>2022-01-02</td>\n",
|
1113 |
-
" <td>Not a Workday</td>\n",
|
1114 |
-
" </tr>\n",
|
1115 |
-
" <tr>\n",
|
1116 |
-
" <th>2</th>\n",
|
1117 |
-
" <td>2022-01-03</td>\n",
|
1118 |
-
" <td>Workday</td>\n",
|
1119 |
-
" </tr>\n",
|
1120 |
-
" <tr>\n",
|
1121 |
-
" <th>3</th>\n",
|
1122 |
-
" <td>2022-01-04</td>\n",
|
1123 |
-
" <td>Workday</td>\n",
|
1124 |
-
" </tr>\n",
|
1125 |
-
" <tr>\n",
|
1126 |
-
" <th>4</th>\n",
|
1127 |
-
" <td>2022-01-05</td>\n",
|
1128 |
-
" <td>Workday</td>\n",
|
1129 |
-
" </tr>\n",
|
1130 |
-
" </tbody>\n",
|
1131 |
-
"</table>\n",
|
1132 |
-
"</div>"
|
1133 |
-
],
|
1134 |
-
"text/plain": [
|
1135 |
-
" date type\n",
|
1136 |
-
"0 2022-01-01 Not a Workday\n",
|
1137 |
-
"1 2022-01-02 Not a Workday\n",
|
1138 |
-
"2 2022-01-03 Workday\n",
|
1139 |
-
"3 2022-01-04 Workday\n",
|
1140 |
-
"4 2022-01-05 Workday"
|
1141 |
-
]
|
1142 |
-
},
|
1143 |
-
"execution_count": 29,
|
1144 |
-
"metadata": {},
|
1145 |
-
"output_type": "execute_result"
|
1146 |
-
}
|
1147 |
-
],
|
1148 |
-
"source": [
|
1149 |
-
"# Display the first 5 rows of the dataframe\n",
|
1150 |
-
"calender_df.head()"
|
1151 |
-
]
|
1152 |
-
},
|
1153 |
-
{
|
1154 |
-
"cell_type": "code",
|
1155 |
-
"execution_count": 30,
|
1156 |
-
"metadata": {},
|
1157 |
-
"outputs": [
|
1158 |
-
{
|
1159 |
-
"name": "stdout",
|
1160 |
-
"output_type": "stream",
|
1161 |
-
"text": [
|
1162 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
1163 |
-
"RangeIndex: 1096 entries, 0 to 1095\n",
|
1164 |
-
"Data columns (total 2 columns):\n",
|
1165 |
-
" # Column Non-Null Count Dtype \n",
|
1166 |
-
"--- ------ -------------- ----- \n",
|
1167 |
-
" 0 date 1096 non-null object\n",
|
1168 |
-
" 1 type 1096 non-null object\n",
|
1169 |
-
"dtypes: object(2)\n",
|
1170 |
-
"memory usage: 17.3+ KB\n"
|
1171 |
-
]
|
1172 |
-
}
|
1173 |
-
],
|
1174 |
-
"source": [
|
1175 |
-
"# Showing the information for the calender dataframe\n",
|
1176 |
-
"calender_df.info()"
|
1177 |
-
]
|
1178 |
-
},
|
1179 |
-
{
|
1180 |
-
"cell_type": "markdown",
|
1181 |
-
"metadata": {},
|
1182 |
-
"source": [
|
1183 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store\n",
|
1184 |
-
"\n",
|
1185 |
-
"First we will connect to Hopsworks Feature Store so we can access and create Feature Groups.\n",
|
1186 |
-
"Feature groups can also be used to define a namespace for features. For instance, in a real-life setting you would likely want to experiment with different window lengths. In that case, you can create feature groups with identical schema for each window length. \n",
|
1187 |
-
"\n",
|
1188 |
-
"Before you can create a feature group you need to connect to our feature store."
|
1189 |
-
]
|
1190 |
-
},
|
1191 |
-
{
|
1192 |
-
"cell_type": "code",
|
1193 |
-
"execution_count": null,
|
1194 |
-
"metadata": {},
|
1195 |
-
"outputs": [],
|
1196 |
-
"source": [
|
1197 |
-
"import hopsworks\n",
|
1198 |
-
"\n",
|
1199 |
-
"project = hopsworks.login()\n",
|
1200 |
-
"\n",
|
1201 |
-
"fs = project.get_feature_store()"
|
1202 |
-
]
|
1203 |
-
},
|
1204 |
-
{
|
1205 |
-
"cell_type": "markdown",
|
1206 |
-
"metadata": {},
|
1207 |
-
"source": [
|
1208 |
-
"### <span style=\"color:#2656a3;\"> 🪄 Creating Feature Groups\n",
|
1209 |
-
"\n",
|
1210 |
-
"When creating a feature group, you must name it and designate a primary key. Additionally, it's helpful to include a description of the feature group's contents and a version number; if not defined, it will default to `1`. \n",
|
1211 |
-
"\n",
|
1212 |
-
"We've configured `online_enabled` as `True` to enable the feature group to be read via the Online API for a Feature View."
|
1213 |
-
]
|
1214 |
-
},
|
1215 |
-
{
|
1216 |
-
"cell_type": "code",
|
1217 |
-
"execution_count": null,
|
1218 |
-
"metadata": {},
|
1219 |
-
"outputs": [],
|
1220 |
-
"source": [
|
1221 |
-
"# Creating the feature group for the weater data\n",
|
1222 |
-
"weather_fg = fs.get_or_create_feature_group(\n",
|
1223 |
-
" name=\"weather_measurements\",\n",
|
1224 |
-
" version=1,\n",
|
1225 |
-
" description=\"Weather measurements from Open Meteo API\",\n",
|
1226 |
-
" primary_key=[\"date\"],\n",
|
1227 |
-
" event_time=\"time\",\n",
|
1228 |
-
" online_enabled=True,\n",
|
1229 |
-
")"
|
1230 |
-
]
|
1231 |
-
},
|
1232 |
-
{
|
1233 |
-
"cell_type": "markdown",
|
1234 |
-
"metadata": {},
|
1235 |
-
"source": [
|
1236 |
-
"By now, you've only outlined metadata for the feature group. There's no data stored, nor is there a defined schema for it. To establish persistence for the feature group, you'll need to populate it with its associated data using the `insert` function"
|
1237 |
-
]
|
1238 |
-
},
|
1239 |
-
{
|
1240 |
-
"cell_type": "code",
|
1241 |
-
"execution_count": null,
|
1242 |
-
"metadata": {},
|
1243 |
-
"outputs": [],
|
1244 |
-
"source": [
|
1245 |
-
"# Inserting the weather_df into the feature group named weather_fg\n",
|
1246 |
-
"weather_fg.insert(weather_df)"
|
1247 |
-
]
|
1248 |
-
},
|
1249 |
-
{
|
1250 |
-
"cell_type": "markdown",
|
1251 |
-
"metadata": {},
|
1252 |
-
"source": [
|
1253 |
-
"We make a descriptions for each feature we put into the feature group. In this way we are adding more information and documentation to the user"
|
1254 |
-
]
|
1255 |
-
},
|
1256 |
-
{
|
1257 |
-
"cell_type": "code",
|
1258 |
-
"execution_count": null,
|
1259 |
-
"metadata": {},
|
1260 |
-
"outputs": [],
|
1261 |
-
"source": [
|
1262 |
-
"weather_feature_descriptions = [\n",
|
1263 |
-
" {\"name\": \"date\", \"description\": \"Date of the weather measurement\"},\n",
|
1264 |
-
" {\"name\": \"time\", \"description\": \"Time of the weather measurement\"},\n",
|
1265 |
-
" {\"name\": \"temperature_2m\", \"description\": \"Temperature at 2m above ground\"},\n",
|
1266 |
-
" {\"name\": \"relative_humidity_2m\", \"description\": \"Relative humidity at 2m above ground\"},\n",
|
1267 |
-
" {\"name\": \"precipitation\", \"description\": \"Precipitation\"},\n",
|
1268 |
-
" {\"name\": \"rain\", \"description\": \"Rain\"},\n",
|
1269 |
-
" {\"name\": \"snowfall\", \"description\": \"Snowfall\"}, \n",
|
1270 |
-
" {\"name\": \"weather_code\", \"description\": \"Weather code\"}, \n",
|
1271 |
-
" {\"name\": \"cloud_cover\", \"description\": \"Cloud cover\"}, \n",
|
1272 |
-
" {\"name\": \"wind_speed_10m\", \"description\": \"Wind speed at 10m above ground\"}, \n",
|
1273 |
-
" {\"name\": \"wind_gusts_10m\", \"description\": \"Wind gusts at 10m above ground\"}, \n",
|
1274 |
-
"]\n",
|
1275 |
-
"\n",
|
1276 |
-
"for desc in weather_feature_descriptions: \n",
|
1277 |
-
" weather_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
|
1278 |
-
]
|
1279 |
-
},
|
1280 |
-
{
|
1281 |
-
"cell_type": "markdown",
|
1282 |
-
"metadata": {},
|
1283 |
-
"source": [
|
1284 |
-
"We replicate the process for both the `electricity_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
|
1285 |
-
]
|
1286 |
-
},
|
1287 |
-
{
|
1288 |
-
"cell_type": "code",
|
1289 |
-
"execution_count": null,
|
1290 |
-
"metadata": {},
|
1291 |
-
"outputs": [],
|
1292 |
-
"source": [
|
1293 |
-
"# Creating the feature group for the electricity prices\n",
|
1294 |
-
"electricity_fg = fs.get_or_create_feature_group(\n",
|
1295 |
-
" name=\"electricity_prices\",\n",
|
1296 |
-
" version=1,\n",
|
1297 |
-
" description=\"Electricity prices from Energidata API\",\n",
|
1298 |
-
" primary_key=[\"date\"],\n",
|
1299 |
-
" online_enabled=True,\n",
|
1300 |
-
" event_time=\"time\",\n",
|
1301 |
-
")"
|
1302 |
-
]
|
1303 |
-
},
|
1304 |
-
{
|
1305 |
-
"cell_type": "code",
|
1306 |
-
"execution_count": null,
|
1307 |
-
"metadata": {},
|
1308 |
-
"outputs": [],
|
1309 |
-
"source": [
|
1310 |
-
"# Inserting the electricity_df into the feature group named electricity_fg\n",
|
1311 |
-
"electricity_fg.insert(electricity_df)"
|
1312 |
-
]
|
1313 |
-
},
|
1314 |
-
{
|
1315 |
-
"cell_type": "code",
|
1316 |
-
"execution_count": null,
|
1317 |
-
"metadata": {},
|
1318 |
-
"outputs": [],
|
1319 |
-
"source": [
|
1320 |
-
"electricity_feature_descriptions = [\n",
|
1321 |
-
" {\"name\": \"date\", \"description\": \"Date of the electricity measurement\"},\n",
|
1322 |
-
" {\"name\": \"time\", \"description\": \"Time of the electricity measurement\"},\n",
|
1323 |
-
" {\"name\": \"PriceArea\", \"description\": \"Price area for the electricity measurement\"},\n",
|
1324 |
-
" {\"name\": \"SpotPriceDKK_KWH\", \"description\": \"Spot price in DKK per KWH\"}, \n",
|
1325 |
-
"]\n",
|
1326 |
-
"\n",
|
1327 |
-
"for desc in electricity_feature_descriptions: \n",
|
1328 |
-
" electricity_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
|
1329 |
-
]
|
1330 |
-
},
|
1331 |
-
{
|
1332 |
-
"cell_type": "code",
|
1333 |
-
"execution_count": null,
|
1334 |
-
"metadata": {},
|
1335 |
-
"outputs": [],
|
1336 |
-
"source": [
|
1337 |
-
"# Creating the feature group for the danish holidays\n",
|
1338 |
-
"danish_holidays_fg = fs.get_or_create_feature_group(\n",
|
1339 |
-
" name=\"danish_holidays\",\n",
|
1340 |
-
" version=1,\n",
|
1341 |
-
" description=\"Danish holidays calendar.\",\n",
|
1342 |
-
" online_enabled=True,\n",
|
1343 |
-
" primary_key=[\"date\"],\n",
|
1344 |
-
")"
|
1345 |
-
]
|
1346 |
-
},
|
1347 |
-
{
|
1348 |
-
"cell_type": "code",
|
1349 |
-
"execution_count": null,
|
1350 |
-
"metadata": {},
|
1351 |
-
"outputs": [],
|
1352 |
-
"source": [
|
1353 |
-
"# Inserting the calender_df into the feature group named danish_holidays_fg\n",
|
1354 |
-
"danish_holidays_fg.insert(calender_df)"
|
1355 |
-
]
|
1356 |
-
},
|
1357 |
-
{
|
1358 |
-
"cell_type": "code",
|
1359 |
-
"execution_count": null,
|
1360 |
-
"metadata": {},
|
1361 |
-
"outputs": [],
|
1362 |
-
"source": [
|
1363 |
-
"danish_holidays_feature_descriptions = [\n",
|
1364 |
-
" {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
|
1365 |
-
" {\"name\": \"type\", \"description\": \"Holyday or not holyday\"},\n",
|
1366 |
-
"]\n",
|
1367 |
-
"\n",
|
1368 |
-
"for desc in danish_holidays_feature_descriptions: \n",
|
1369 |
-
" danish_holidays_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
|
1370 |
-
]
|
1371 |
-
},
|
1372 |
-
{
|
1373 |
-
"cell_type": "markdown",
|
1374 |
-
"metadata": {},
|
1375 |
-
"source": [
|
1376 |
-
"---\n",
|
1377 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 02: Feature Pipeline </span>\n",
|
1378 |
-
"\n",
|
1379 |
-
"In the next notebook, you will be generating new data for the Feature Groups."
|
1380 |
-
]
|
1381 |
-
}
|
1382 |
-
],
|
1383 |
-
"metadata": {
|
1384 |
-
"kernelspec": {
|
1385 |
-
"display_name": "bds-streamlit",
|
1386 |
-
"language": "python",
|
1387 |
-
"name": "python3"
|
1388 |
-
},
|
1389 |
-
"language_info": {
|
1390 |
-
"codemirror_mode": {
|
1391 |
-
"name": "ipython",
|
1392 |
-
"version": 3
|
1393 |
-
},
|
1394 |
-
"file_extension": ".py",
|
1395 |
-
"mimetype": "text/x-python",
|
1396 |
-
"name": "python",
|
1397 |
-
"nbconvert_exporter": "python",
|
1398 |
-
"pygments_lexer": "ipython3",
|
1399 |
-
"version": "3.11.8"
|
1400 |
-
}
|
1401 |
-
},
|
1402 |
-
"nbformat": 4,
|
1403 |
-
"nbformat_minor": 2
|
1404 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/Old/2_feature_pipeline_OLD.ipynb
DELETED
@@ -1,561 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 02: Feature Pipeline</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Parse new aata.\n",
|
16 |
-
"2. Insert new data into the Feature Store."
|
17 |
-
]
|
18 |
-
},
|
19 |
-
{
|
20 |
-
"cell_type": "markdown",
|
21 |
-
"metadata": {},
|
22 |
-
"source": [
|
23 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
|
24 |
-
]
|
25 |
-
},
|
26 |
-
{
|
27 |
-
"cell_type": "code",
|
28 |
-
"execution_count": 1,
|
29 |
-
"metadata": {},
|
30 |
-
"outputs": [],
|
31 |
-
"source": [
|
32 |
-
"# Importing of the packages for the needed libraries for the Jupyter notebook\n",
|
33 |
-
"import pandas as pd\n",
|
34 |
-
"import requests\n",
|
35 |
-
"\n",
|
36 |
-
"# Ignore warnings\n",
|
37 |
-
"import warnings \n",
|
38 |
-
"warnings.filterwarnings('ignore')"
|
39 |
-
]
|
40 |
-
},
|
41 |
-
{
|
42 |
-
"cell_type": "markdown",
|
43 |
-
"metadata": {},
|
44 |
-
"source": [
|
45 |
-
"## <span style='color:#2656a3'> 🪄 Parsing new data"
|
46 |
-
]
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"cell_type": "markdown",
|
50 |
-
"metadata": {},
|
51 |
-
"source": [
|
52 |
-
"### <span style=\"color:#2656a3;\">💸 Electricity prices per day from Energinet"
|
53 |
-
]
|
54 |
-
},
|
55 |
-
{
|
56 |
-
"cell_type": "code",
|
57 |
-
"execution_count": 2,
|
58 |
-
"metadata": {},
|
59 |
-
"outputs": [],
|
60 |
-
"source": [
|
61 |
-
"# Defining the URL for the API call to the electricity price data\n",
|
62 |
-
"electricity_api_url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2024-01-01T00:00&end=2024-04-08T00:00&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')"
|
63 |
-
]
|
64 |
-
},
|
65 |
-
{
|
66 |
-
"cell_type": "code",
|
67 |
-
"execution_count": 3,
|
68 |
-
"metadata": {},
|
69 |
-
"outputs": [
|
70 |
-
{
|
71 |
-
"name": "stdout",
|
72 |
-
"output_type": "stream",
|
73 |
-
"text": [
|
74 |
-
"<Response [200]>\n"
|
75 |
-
]
|
76 |
-
}
|
77 |
-
],
|
78 |
-
"source": [
|
79 |
-
"# Fetch data from the API and make the output to a pandas dataframe\n",
|
80 |
-
"electricity_data_response = requests.get(electricity_api_url)\n",
|
81 |
-
"electricity_data = electricity_data_response.json()\n",
|
82 |
-
"electricity_df = pd.DataFrame(electricity_data['records'])\n",
|
83 |
-
"\n",
|
84 |
-
"# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
|
85 |
-
"print(electricity_data_response)"
|
86 |
-
]
|
87 |
-
},
|
88 |
-
{
|
89 |
-
"cell_type": "code",
|
90 |
-
"execution_count": 4,
|
91 |
-
"metadata": {},
|
92 |
-
"outputs": [],
|
93 |
-
"source": [
|
94 |
-
"# Datapreprocessing by making the spotprice per kwh instead of mwh\n",
|
95 |
-
"electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000"
|
96 |
-
]
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"cell_type": "code",
|
100 |
-
"execution_count": 5,
|
101 |
-
"metadata": {},
|
102 |
-
"outputs": [],
|
103 |
-
"source": [
|
104 |
-
"# Datacleaning by removing the columns that are not needed\n",
|
105 |
-
"electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)\n",
|
106 |
-
"electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)\n",
|
107 |
-
"electricity_df.drop('HourUTC', axis=1, inplace=True)"
|
108 |
-
]
|
109 |
-
},
|
110 |
-
{
|
111 |
-
"cell_type": "code",
|
112 |
-
"execution_count": 6,
|
113 |
-
"metadata": {},
|
114 |
-
"outputs": [],
|
115 |
-
"source": [
|
116 |
-
"# Renaming the columns and reformating the time column\n",
|
117 |
-
"electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)"
|
118 |
-
]
|
119 |
-
},
|
120 |
-
{
|
121 |
-
"cell_type": "code",
|
122 |
-
"execution_count": 7,
|
123 |
-
"metadata": {},
|
124 |
-
"outputs": [],
|
125 |
-
"source": [
|
126 |
-
"# Formatting the date column\n",
|
127 |
-
"electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]\n",
|
128 |
-
"electricity_df['date'] = electricity_df['time'].str[:10]"
|
129 |
-
]
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"cell_type": "code",
|
133 |
-
"execution_count": 8,
|
134 |
-
"metadata": {},
|
135 |
-
"outputs": [],
|
136 |
-
"source": [
|
137 |
-
"# Creating a new column for the date called electricity_temporary_date_column and insert it as the first column in the dataframe\n",
|
138 |
-
"electricity_temporary_date_column = electricity_df.pop('date')\n",
|
139 |
-
"electricity_df.insert(0, 'date', electricity_temporary_date_column)"
|
140 |
-
]
|
141 |
-
},
|
142 |
-
{
|
143 |
-
"cell_type": "code",
|
144 |
-
"execution_count": 9,
|
145 |
-
"metadata": {},
|
146 |
-
"outputs": [],
|
147 |
-
"source": [
|
148 |
-
"# Convert string 'date' column to date type and 'time' column to datetime format\n",
|
149 |
-
"electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date\n",
|
150 |
-
"electricity_df['time'] = pd.to_datetime(electricity_df['time'])"
|
151 |
-
]
|
152 |
-
},
|
153 |
-
{
|
154 |
-
"cell_type": "code",
|
155 |
-
"execution_count": 10,
|
156 |
-
"metadata": {},
|
157 |
-
"outputs": [
|
158 |
-
{
|
159 |
-
"data": {
|
160 |
-
"text/html": [
|
161 |
-
"<div>\n",
|
162 |
-
"<style scoped>\n",
|
163 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
164 |
-
" vertical-align: middle;\n",
|
165 |
-
" }\n",
|
166 |
-
"\n",
|
167 |
-
" .dataframe tbody tr th {\n",
|
168 |
-
" vertical-align: top;\n",
|
169 |
-
" }\n",
|
170 |
-
"\n",
|
171 |
-
" .dataframe thead th {\n",
|
172 |
-
" text-align: right;\n",
|
173 |
-
" }\n",
|
174 |
-
"</style>\n",
|
175 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
176 |
-
" <thead>\n",
|
177 |
-
" <tr style=\"text-align: right;\">\n",
|
178 |
-
" <th></th>\n",
|
179 |
-
" <th>date</th>\n",
|
180 |
-
" <th>time</th>\n",
|
181 |
-
" <th>PriceArea</th>\n",
|
182 |
-
" <th>SpotPriceDKK_KWH</th>\n",
|
183 |
-
" </tr>\n",
|
184 |
-
" </thead>\n",
|
185 |
-
" <tbody>\n",
|
186 |
-
" <tr>\n",
|
187 |
-
" <th>0</th>\n",
|
188 |
-
" <td>2024-04-07</td>\n",
|
189 |
-
" <td>2024-04-07 23:00:00</td>\n",
|
190 |
-
" <td>DK1</td>\n",
|
191 |
-
" <td>0.31886</td>\n",
|
192 |
-
" </tr>\n",
|
193 |
-
" <tr>\n",
|
194 |
-
" <th>1</th>\n",
|
195 |
-
" <td>2024-04-07</td>\n",
|
196 |
-
" <td>2024-04-07 22:00:00</td>\n",
|
197 |
-
" <td>DK1</td>\n",
|
198 |
-
" <td>0.34078</td>\n",
|
199 |
-
" </tr>\n",
|
200 |
-
" <tr>\n",
|
201 |
-
" <th>2</th>\n",
|
202 |
-
" <td>2024-04-07</td>\n",
|
203 |
-
" <td>2024-04-07 21:00:00</td>\n",
|
204 |
-
" <td>DK1</td>\n",
|
205 |
-
" <td>0.35958</td>\n",
|
206 |
-
" </tr>\n",
|
207 |
-
" <tr>\n",
|
208 |
-
" <th>3</th>\n",
|
209 |
-
" <td>2024-04-07</td>\n",
|
210 |
-
" <td>2024-04-07 20:00:00</td>\n",
|
211 |
-
" <td>DK1</td>\n",
|
212 |
-
" <td>0.35645</td>\n",
|
213 |
-
" </tr>\n",
|
214 |
-
" <tr>\n",
|
215 |
-
" <th>4</th>\n",
|
216 |
-
" <td>2024-04-07</td>\n",
|
217 |
-
" <td>2024-04-07 19:00:00</td>\n",
|
218 |
-
" <td>DK1</td>\n",
|
219 |
-
" <td>0.34399</td>\n",
|
220 |
-
" </tr>\n",
|
221 |
-
" </tbody>\n",
|
222 |
-
"</table>\n",
|
223 |
-
"</div>"
|
224 |
-
],
|
225 |
-
"text/plain": [
|
226 |
-
" date time PriceArea SpotPriceDKK_KWH\n",
|
227 |
-
"0 2024-04-07 2024-04-07 23:00:00 DK1 0.31886\n",
|
228 |
-
"1 2024-04-07 2024-04-07 22:00:00 DK1 0.34078\n",
|
229 |
-
"2 2024-04-07 2024-04-07 21:00:00 DK1 0.35958\n",
|
230 |
-
"3 2024-04-07 2024-04-07 20:00:00 DK1 0.35645\n",
|
231 |
-
"4 2024-04-07 2024-04-07 19:00:00 DK1 0.34399"
|
232 |
-
]
|
233 |
-
},
|
234 |
-
"execution_count": 10,
|
235 |
-
"metadata": {},
|
236 |
-
"output_type": "execute_result"
|
237 |
-
}
|
238 |
-
],
|
239 |
-
"source": [
|
240 |
-
"# Display the first 5 rows of the dataframe\n",
|
241 |
-
"electricity_df.head()"
|
242 |
-
]
|
243 |
-
},
|
244 |
-
{
|
245 |
-
"cell_type": "markdown",
|
246 |
-
"metadata": {},
|
247 |
-
"source": [
|
248 |
-
"### <span style=\"color:#2656a3;\"> 🌤 Weather measurements from Open Meteo"
|
249 |
-
]
|
250 |
-
},
|
251 |
-
{
|
252 |
-
"cell_type": "code",
|
253 |
-
"execution_count": 11,
|
254 |
-
"metadata": {},
|
255 |
-
"outputs": [],
|
256 |
-
"source": [
|
257 |
-
"# Defining the URL for the API call to the electricity price data\n",
|
258 |
-
"weather_api_url = ('https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2024-01-01&end_date=2024-04-08&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m&timezone=auto')"
|
259 |
-
]
|
260 |
-
},
|
261 |
-
{
|
262 |
-
"cell_type": "code",
|
263 |
-
"execution_count": 12,
|
264 |
-
"metadata": {},
|
265 |
-
"outputs": [
|
266 |
-
{
|
267 |
-
"name": "stdout",
|
268 |
-
"output_type": "stream",
|
269 |
-
"text": [
|
270 |
-
"<Response [200]>\n"
|
271 |
-
]
|
272 |
-
}
|
273 |
-
],
|
274 |
-
"source": [
|
275 |
-
"# Fetch data from the API and make the output to a pandas dataframe\n",
|
276 |
-
"weather_data_response = requests.get(weather_api_url)\n",
|
277 |
-
"weather_data = weather_data_response.json()\n",
|
278 |
-
"weather_df = pd.DataFrame(weather_data['hourly'])\n",
|
279 |
-
"\n",
|
280 |
-
"# Checking the result of the API call\n",
|
281 |
-
"print(weather_data_response)\n"
|
282 |
-
]
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"cell_type": "code",
|
286 |
-
"execution_count": 13,
|
287 |
-
"metadata": {},
|
288 |
-
"outputs": [],
|
289 |
-
"source": [
|
290 |
-
"# Formatting the date column\n",
|
291 |
-
"weather_df['date'] = weather_df['time'].str[:10]"
|
292 |
-
]
|
293 |
-
},
|
294 |
-
{
|
295 |
-
"cell_type": "code",
|
296 |
-
"execution_count": 14,
|
297 |
-
"metadata": {},
|
298 |
-
"outputs": [],
|
299 |
-
"source": [
|
300 |
-
"# Creating a new column for the date called weather_temporary_date_column and insert it as the first column in the dataframe\n",
|
301 |
-
"weather_temporary_date_column = weather_df.pop('date')\n",
|
302 |
-
"weather_df.insert(0, 'date', weather_temporary_date_column)"
|
303 |
-
]
|
304 |
-
},
|
305 |
-
{
|
306 |
-
"cell_type": "code",
|
307 |
-
"execution_count": 15,
|
308 |
-
"metadata": {},
|
309 |
-
"outputs": [],
|
310 |
-
"source": [
|
311 |
-
"# Convert string 'date' column to date type\n",
|
312 |
-
"weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date\n",
|
313 |
-
"weather_df['time'] = pd.to_datetime(weather_df['time'])"
|
314 |
-
]
|
315 |
-
},
|
316 |
-
{
|
317 |
-
"cell_type": "code",
|
318 |
-
"execution_count": 16,
|
319 |
-
"metadata": {},
|
320 |
-
"outputs": [
|
321 |
-
{
|
322 |
-
"data": {
|
323 |
-
"text/html": [
|
324 |
-
"<div>\n",
|
325 |
-
"<style scoped>\n",
|
326 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
327 |
-
" vertical-align: middle;\n",
|
328 |
-
" }\n",
|
329 |
-
"\n",
|
330 |
-
" .dataframe tbody tr th {\n",
|
331 |
-
" vertical-align: top;\n",
|
332 |
-
" }\n",
|
333 |
-
"\n",
|
334 |
-
" .dataframe thead th {\n",
|
335 |
-
" text-align: right;\n",
|
336 |
-
" }\n",
|
337 |
-
"</style>\n",
|
338 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
339 |
-
" <thead>\n",
|
340 |
-
" <tr style=\"text-align: right;\">\n",
|
341 |
-
" <th></th>\n",
|
342 |
-
" <th>date</th>\n",
|
343 |
-
" <th>time</th>\n",
|
344 |
-
" <th>temperature_2m</th>\n",
|
345 |
-
" <th>relative_humidity_2m</th>\n",
|
346 |
-
" <th>precipitation</th>\n",
|
347 |
-
" <th>rain</th>\n",
|
348 |
-
" <th>snowfall</th>\n",
|
349 |
-
" <th>weather_code</th>\n",
|
350 |
-
" <th>cloud_cover</th>\n",
|
351 |
-
" <th>wind_speed_10m</th>\n",
|
352 |
-
" <th>wind_gusts_10m</th>\n",
|
353 |
-
" </tr>\n",
|
354 |
-
" </thead>\n",
|
355 |
-
" <tbody>\n",
|
356 |
-
" <tr>\n",
|
357 |
-
" <th>0</th>\n",
|
358 |
-
" <td>2024-01-01</td>\n",
|
359 |
-
" <td>2024-01-01 00:00:00</td>\n",
|
360 |
-
" <td>4.8</td>\n",
|
361 |
-
" <td>95</td>\n",
|
362 |
-
" <td>1.8</td>\n",
|
363 |
-
" <td>1.8</td>\n",
|
364 |
-
" <td>0.0</td>\n",
|
365 |
-
" <td>61</td>\n",
|
366 |
-
" <td>100</td>\n",
|
367 |
-
" <td>23.6</td>\n",
|
368 |
-
" <td>49.0</td>\n",
|
369 |
-
" </tr>\n",
|
370 |
-
" <tr>\n",
|
371 |
-
" <th>1</th>\n",
|
372 |
-
" <td>2024-01-01</td>\n",
|
373 |
-
" <td>2024-01-01 01:00:00</td>\n",
|
374 |
-
" <td>4.9</td>\n",
|
375 |
-
" <td>95</td>\n",
|
376 |
-
" <td>1.2</td>\n",
|
377 |
-
" <td>1.2</td>\n",
|
378 |
-
" <td>0.0</td>\n",
|
379 |
-
" <td>55</td>\n",
|
380 |
-
" <td>100</td>\n",
|
381 |
-
" <td>21.6</td>\n",
|
382 |
-
" <td>43.2</td>\n",
|
383 |
-
" </tr>\n",
|
384 |
-
" <tr>\n",
|
385 |
-
" <th>2</th>\n",
|
386 |
-
" <td>2024-01-01</td>\n",
|
387 |
-
" <td>2024-01-01 02:00:00</td>\n",
|
388 |
-
" <td>4.8</td>\n",
|
389 |
-
" <td>96</td>\n",
|
390 |
-
" <td>0.6</td>\n",
|
391 |
-
" <td>0.6</td>\n",
|
392 |
-
" <td>0.0</td>\n",
|
393 |
-
" <td>53</td>\n",
|
394 |
-
" <td>100</td>\n",
|
395 |
-
" <td>18.4</td>\n",
|
396 |
-
" <td>39.2</td>\n",
|
397 |
-
" </tr>\n",
|
398 |
-
" <tr>\n",
|
399 |
-
" <th>3</th>\n",
|
400 |
-
" <td>2024-01-01</td>\n",
|
401 |
-
" <td>2024-01-01 03:00:00</td>\n",
|
402 |
-
" <td>4.3</td>\n",
|
403 |
-
" <td>96</td>\n",
|
404 |
-
" <td>0.8</td>\n",
|
405 |
-
" <td>0.8</td>\n",
|
406 |
-
" <td>0.0</td>\n",
|
407 |
-
" <td>53</td>\n",
|
408 |
-
" <td>100</td>\n",
|
409 |
-
" <td>16.7</td>\n",
|
410 |
-
" <td>33.8</td>\n",
|
411 |
-
" </tr>\n",
|
412 |
-
" <tr>\n",
|
413 |
-
" <th>4</th>\n",
|
414 |
-
" <td>2024-01-01</td>\n",
|
415 |
-
" <td>2024-01-01 04:00:00</td>\n",
|
416 |
-
" <td>4.4</td>\n",
|
417 |
-
" <td>97</td>\n",
|
418 |
-
" <td>0.3</td>\n",
|
419 |
-
" <td>0.3</td>\n",
|
420 |
-
" <td>0.0</td>\n",
|
421 |
-
" <td>51</td>\n",
|
422 |
-
" <td>100</td>\n",
|
423 |
-
" <td>15.4</td>\n",
|
424 |
-
" <td>30.2</td>\n",
|
425 |
-
" </tr>\n",
|
426 |
-
" </tbody>\n",
|
427 |
-
"</table>\n",
|
428 |
-
"</div>"
|
429 |
-
],
|
430 |
-
"text/plain": [
|
431 |
-
" date time temperature_2m relative_humidity_2m \\\n",
|
432 |
-
"0 2024-01-01 2024-01-01 00:00:00 4.8 95 \n",
|
433 |
-
"1 2024-01-01 2024-01-01 01:00:00 4.9 95 \n",
|
434 |
-
"2 2024-01-01 2024-01-01 02:00:00 4.8 96 \n",
|
435 |
-
"3 2024-01-01 2024-01-01 03:00:00 4.3 96 \n",
|
436 |
-
"4 2024-01-01 2024-01-01 04:00:00 4.4 97 \n",
|
437 |
-
"\n",
|
438 |
-
" precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
|
439 |
-
"0 1.8 1.8 0.0 61 100 23.6 \n",
|
440 |
-
"1 1.2 1.2 0.0 55 100 21.6 \n",
|
441 |
-
"2 0.6 0.6 0.0 53 100 18.4 \n",
|
442 |
-
"3 0.8 0.8 0.0 53 100 16.7 \n",
|
443 |
-
"4 0.3 0.3 0.0 51 100 15.4 \n",
|
444 |
-
"\n",
|
445 |
-
" wind_gusts_10m \n",
|
446 |
-
"0 49.0 \n",
|
447 |
-
"1 43.2 \n",
|
448 |
-
"2 39.2 \n",
|
449 |
-
"3 33.8 \n",
|
450 |
-
"4 30.2 "
|
451 |
-
]
|
452 |
-
},
|
453 |
-
"execution_count": 16,
|
454 |
-
"metadata": {},
|
455 |
-
"output_type": "execute_result"
|
456 |
-
}
|
457 |
-
],
|
458 |
-
"source": [
|
459 |
-
"# Display the first 5 rows of the dataframe\n",
|
460 |
-
"weather_df.head()"
|
461 |
-
]
|
462 |
-
},
|
463 |
-
{
|
464 |
-
"cell_type": "markdown",
|
465 |
-
"metadata": {},
|
466 |
-
"source": [
|
467 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
468 |
-
]
|
469 |
-
},
|
470 |
-
{
|
471 |
-
"cell_type": "code",
|
472 |
-
"execution_count": null,
|
473 |
-
"metadata": {},
|
474 |
-
"outputs": [],
|
475 |
-
"source": [
|
476 |
-
"import hopsworks\n",
|
477 |
-
"\n",
|
478 |
-
"project = hopsworks.login()\n",
|
479 |
-
"\n",
|
480 |
-
"fs = project.get_feature_store()"
|
481 |
-
]
|
482 |
-
},
|
483 |
-
{
|
484 |
-
"cell_type": "code",
|
485 |
-
"execution_count": null,
|
486 |
-
"metadata": {},
|
487 |
-
"outputs": [],
|
488 |
-
"source": [
|
489 |
-
"# Retrieve feature groups\n",
|
490 |
-
"weather_fg = fs.get_feature_group(\n",
|
491 |
-
" name=\"weather_measurements\",\n",
|
492 |
-
" version=1,\n",
|
493 |
-
")\n",
|
494 |
-
"\n",
|
495 |
-
"electricity_fg = fs.get_feature_group(\n",
|
496 |
-
" name=\"electricity_prices\",\n",
|
497 |
-
" version=1,\n",
|
498 |
-
")"
|
499 |
-
]
|
500 |
-
},
|
501 |
-
{
|
502 |
-
"cell_type": "markdown",
|
503 |
-
"metadata": {},
|
504 |
-
"source": [
|
505 |
-
"### <span style=\"color:#2656a3;\"> ⬆️ Uploading new data to the Feature Store"
|
506 |
-
]
|
507 |
-
},
|
508 |
-
{
|
509 |
-
"cell_type": "code",
|
510 |
-
"execution_count": null,
|
511 |
-
"metadata": {},
|
512 |
-
"outputs": [],
|
513 |
-
"source": [
|
514 |
-
"# Inserting the weather_df into the feature group named weather_fg\n",
|
515 |
-
"weather_fg.insert(weather_df)"
|
516 |
-
]
|
517 |
-
},
|
518 |
-
{
|
519 |
-
"cell_type": "code",
|
520 |
-
"execution_count": null,
|
521 |
-
"metadata": {},
|
522 |
-
"outputs": [],
|
523 |
-
"source": [
|
524 |
-
"# Inserting the electricity_df into the feature group named electricity_fg\n",
|
525 |
-
"electricity_fg.insert(electricity_df)"
|
526 |
-
]
|
527 |
-
},
|
528 |
-
{
|
529 |
-
"cell_type": "markdown",
|
530 |
-
"metadata": {},
|
531 |
-
"source": [
|
532 |
-
"---\n",
|
533 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 03: Traning </span>\n",
|
534 |
-
"\n",
|
535 |
-
"In the next notebook, you will be generating new data for the Feature Groups."
|
536 |
-
]
|
537 |
-
}
|
538 |
-
],
|
539 |
-
"metadata": {
|
540 |
-
"kernelspec": {
|
541 |
-
"display_name": "bds-mlops",
|
542 |
-
"language": "python",
|
543 |
-
"name": "python3"
|
544 |
-
},
|
545 |
-
"language_info": {
|
546 |
-
"codemirror_mode": {
|
547 |
-
"name": "ipython",
|
548 |
-
"version": 3
|
549 |
-
},
|
550 |
-
"file_extension": ".py",
|
551 |
-
"mimetype": "text/x-python",
|
552 |
-
"name": "python",
|
553 |
-
"nbconvert_exporter": "python",
|
554 |
-
"pygments_lexer": "ipython3",
|
555 |
-
"version": "3.11.8"
|
556 |
-
},
|
557 |
-
"orig_nbformat": 4
|
558 |
-
},
|
559 |
-
"nbformat": 4,
|
560 |
-
"nbformat_minor": 2
|
561 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/Old/3_training_pipeline copy.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
hide/Old/3_training_pipeline_OLD.ipynb
DELETED
@@ -1,349 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Feature selection.\n",
|
16 |
-
"2. Feature transformations.\n",
|
17 |
-
"3. Training datasets creation.\n",
|
18 |
-
"4. Loading the training data.\n",
|
19 |
-
"5. Train the model.\n",
|
20 |
-
"6. Register model to Hopsworks model registry."
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "markdown",
|
25 |
-
"metadata": {},
|
26 |
-
"source": [
|
27 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "code",
|
32 |
-
"execution_count": 1,
|
33 |
-
"metadata": {},
|
34 |
-
"outputs": [],
|
35 |
-
"source": [
|
36 |
-
"!pip install tensorflow --quiet"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": 2,
|
42 |
-
"metadata": {},
|
43 |
-
"outputs": [
|
44 |
-
{
|
45 |
-
"name": "stderr",
|
46 |
-
"output_type": "stream",
|
47 |
-
"text": [
|
48 |
-
"2024-04-16 16:06:19.917866: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
49 |
-
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
50 |
-
]
|
51 |
-
}
|
52 |
-
],
|
53 |
-
"source": [
|
54 |
-
"import inspect \n",
|
55 |
-
"import datetime\n",
|
56 |
-
"\n",
|
57 |
-
"import pandas as pd\n",
|
58 |
-
"import numpy as np\n",
|
59 |
-
"import matplotlib.pyplot as plt\n",
|
60 |
-
"import tensorflow as tf\n",
|
61 |
-
"\n",
|
62 |
-
"#ignore warnings\n",
|
63 |
-
"import warnings\n",
|
64 |
-
"warnings.filterwarnings('ignore')"
|
65 |
-
]
|
66 |
-
},
|
67 |
-
{
|
68 |
-
"cell_type": "markdown",
|
69 |
-
"metadata": {},
|
70 |
-
"source": [
|
71 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
72 |
-
]
|
73 |
-
},
|
74 |
-
{
|
75 |
-
"cell_type": "code",
|
76 |
-
"execution_count": 3,
|
77 |
-
"metadata": {},
|
78 |
-
"outputs": [
|
79 |
-
{
|
80 |
-
"name": "stdout",
|
81 |
-
"output_type": "stream",
|
82 |
-
"text": [
|
83 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
84 |
-
"\n",
|
85 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/550040\n",
|
86 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
87 |
-
]
|
88 |
-
}
|
89 |
-
],
|
90 |
-
"source": [
|
91 |
-
"import hopsworks\n",
|
92 |
-
"\n",
|
93 |
-
"project = hopsworks.login()\n",
|
94 |
-
"\n",
|
95 |
-
"fs = project.get_feature_store() "
|
96 |
-
]
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"cell_type": "code",
|
100 |
-
"execution_count": 4,
|
101 |
-
"metadata": {},
|
102 |
-
"outputs": [],
|
103 |
-
"source": [
|
104 |
-
"# Retrieve feature groups\n",
|
105 |
-
"electricity_fg = fs.get_feature_group(\n",
|
106 |
-
" name='electricity_prices',\n",
|
107 |
-
" version=1,\n",
|
108 |
-
")\n",
|
109 |
-
"\n",
|
110 |
-
"weather_fg = fs.get_feature_group(\n",
|
111 |
-
" name='weather_measurements',\n",
|
112 |
-
" version=1,\n",
|
113 |
-
")\n",
|
114 |
-
"\n",
|
115 |
-
"danish_holidays_fg = fs.get_feature_group(\n",
|
116 |
-
" name='danish_holidays',\n",
|
117 |
-
" version=1,\n",
|
118 |
-
")"
|
119 |
-
]
|
120 |
-
},
|
121 |
-
{
|
122 |
-
"cell_type": "markdown",
|
123 |
-
"metadata": {},
|
124 |
-
"source": [
|
125 |
-
"## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
|
126 |
-
"\n",
|
127 |
-
"Let's start by selecting all the features you want to include for model training/inference."
|
128 |
-
]
|
129 |
-
},
|
130 |
-
{
|
131 |
-
"cell_type": "code",
|
132 |
-
"execution_count": 5,
|
133 |
-
"metadata": {},
|
134 |
-
"outputs": [],
|
135 |
-
"source": [
|
136 |
-
"# Select features for training data\n",
|
137 |
-
"selected_features = electricity_fg.select_all()\\\n",
|
138 |
-
" .join(\n",
|
139 |
-
" weather_fg\\\n",
|
140 |
-
" .select_except([\"timestamp\"])\n",
|
141 |
-
" )\\\n",
|
142 |
-
" .join(\n",
|
143 |
-
" danish_holidays_fg.select_all()\n",
|
144 |
-
" )"
|
145 |
-
]
|
146 |
-
},
|
147 |
-
{
|
148 |
-
"cell_type": "markdown",
|
149 |
-
"metadata": {},
|
150 |
-
"source": [
|
151 |
-
"### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
|
152 |
-
"\n",
|
153 |
-
"Hopsworks Feature Store provides functionality to attach transformation functions to feature views and comes with built-in transformation functions such as `min_max_scaler`, `standard_scaler`, `robust_scaler` and `label_encoder`.\n",
|
154 |
-
"\n",
|
155 |
-
"You will preprocess your data using *min-max scaling* on numerical features and *label encoding* on categorical features. To do this you simply define a mapping between our features and transformation functions. This ensures that transformation functions such as *min-max scaling* are fitted only on the training data (and not the validation/test data), which ensures that there is no data leakage."
|
156 |
-
]
|
157 |
-
},
|
158 |
-
{
|
159 |
-
"cell_type": "code",
|
160 |
-
"execution_count": 6,
|
161 |
-
"metadata": {},
|
162 |
-
"outputs": [],
|
163 |
-
"source": [
|
164 |
-
"transformation_functions = {\n",
|
165 |
-
" \"SpotPriceDKK_KWH\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
166 |
-
" \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
167 |
-
" \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
168 |
-
" \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
169 |
-
" \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
170 |
-
" \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
171 |
-
" \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
172 |
-
" \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
173 |
-
" \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
174 |
-
" \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\")\n",
|
175 |
-
" }"
|
176 |
-
]
|
177 |
-
},
|
178 |
-
{
|
179 |
-
"cell_type": "markdown",
|
180 |
-
"metadata": {},
|
181 |
-
"source": [
|
182 |
-
"`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
|
183 |
-
"\n",
|
184 |
-
"The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
|
185 |
-
"\n",
|
186 |
-
"In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
|
187 |
-
"\n",
|
188 |
-
"We can specify next parameters:\n",
|
189 |
-
"\n",
|
190 |
-
"- `name` - name of a feature group.\n",
|
191 |
-
"\n",
|
192 |
-
"- `version` - version of a feature group.\n",
|
193 |
-
"\n",
|
194 |
-
"- `labels`- our target variable.\n",
|
195 |
-
"\n",
|
196 |
-
"- `transformation_functions` - functions to transform our features.\n",
|
197 |
-
"\n",
|
198 |
-
"- `query` - query object with data."
|
199 |
-
]
|
200 |
-
},
|
201 |
-
{
|
202 |
-
"cell_type": "code",
|
203 |
-
"execution_count": 7,
|
204 |
-
"metadata": {},
|
205 |
-
"outputs": [],
|
206 |
-
"source": [
|
207 |
-
"feature_view = fs.get_or_create_feature_view(\n",
|
208 |
-
" name='electricity_feature_view',\n",
|
209 |
-
" version=1,\n",
|
210 |
-
" labels=[], # you will define our 'y' later manualy\n",
|
211 |
-
" transformation_functions=transformation_functions,\n",
|
212 |
-
" query=selected_features,\n",
|
213 |
-
")"
|
214 |
-
]
|
215 |
-
},
|
216 |
-
{
|
217 |
-
"cell_type": "markdown",
|
218 |
-
"metadata": {},
|
219 |
-
"source": [
|
220 |
-
"## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>"
|
221 |
-
]
|
222 |
-
},
|
223 |
-
{
|
224 |
-
"cell_type": "markdown",
|
225 |
-
"metadata": {},
|
226 |
-
"source": [
|
227 |
-
"### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
|
228 |
-
]
|
229 |
-
},
|
230 |
-
{
|
231 |
-
"cell_type": "code",
|
232 |
-
"execution_count": null,
|
233 |
-
"metadata": {},
|
234 |
-
"outputs": [],
|
235 |
-
"source": [
|
236 |
-
"# since you didn't specify 'labels' in feature view creation, it will return None for Y.\n",
|
237 |
-
"X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
|
238 |
-
" train_start=\"2021-01-01\",\n",
|
239 |
-
" train_end=\"2022-02-28\",\n",
|
240 |
-
" validation_start=\"2022-03-01\",\n",
|
241 |
-
" validation_end=\"2022-05-31\",\n",
|
242 |
-
" test_start=\"2022-06-01\",\n",
|
243 |
-
" test_end=\"2022-09-09\",\n",
|
244 |
-
" description='Electricity price prediction dataset',\n",
|
245 |
-
")"
|
246 |
-
]
|
247 |
-
},
|
248 |
-
{
|
249 |
-
"cell_type": "code",
|
250 |
-
"execution_count": null,
|
251 |
-
"metadata": {},
|
252 |
-
"outputs": [],
|
253 |
-
"source": [
|
254 |
-
"# Sorting the training, validation, and test datasets based on the 'time' column\n",
|
255 |
-
"X_train.sort_values([\"time\"], inplace=True)\n",
|
256 |
-
"X_val.sort_values([\"time\"], inplace=True)\n",
|
257 |
-
"X_test.sort_values([\"time\"], inplace=True)"
|
258 |
-
]
|
259 |
-
},
|
260 |
-
{
|
261 |
-
"cell_type": "code",
|
262 |
-
"execution_count": null,
|
263 |
-
"metadata": {},
|
264 |
-
"outputs": [],
|
265 |
-
"source": [
|
266 |
-
"# Define 'y_train', 'y_val' and 'y_test'\n",
|
267 |
-
"y_train = X_train[[\"SpotPriceDKK_KWH\"]]\n",
|
268 |
-
"y_val = X_val[[\"SpotPriceDKK_KWH\"]]\n",
|
269 |
-
"y_test = X_test[[\"SpotPriceDKK_KWH\"]]"
|
270 |
-
]
|
271 |
-
},
|
272 |
-
{
|
273 |
-
"cell_type": "code",
|
274 |
-
"execution_count": null,
|
275 |
-
"metadata": {},
|
276 |
-
"outputs": [],
|
277 |
-
"source": [
|
278 |
-
"# Dropping the 'day' and 'timestamp' columns from the training, validation, and test datasets\n",
|
279 |
-
"X_train.drop([\"date\", \"time\"], axis=1, inplace=True)\n",
|
280 |
-
"X_val.drop([\"date\", \"time\"], axis=1, inplace=True)\n",
|
281 |
-
"X_test.drop([\"date\", \"time\"], axis=1, inplace=True)"
|
282 |
-
]
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"cell_type": "code",
|
286 |
-
"execution_count": null,
|
287 |
-
"metadata": {},
|
288 |
-
"outputs": [],
|
289 |
-
"source": [
|
290 |
-
"# Displaying the first 5 rows of the test dataset (X_test)\n",
|
291 |
-
"X_test.head()"
|
292 |
-
]
|
293 |
-
},
|
294 |
-
{
|
295 |
-
"cell_type": "markdown",
|
296 |
-
"metadata": {},
|
297 |
-
"source": [
|
298 |
-
"## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
|
299 |
-
]
|
300 |
-
},
|
301 |
-
{
|
302 |
-
"cell_type": "markdown",
|
303 |
-
"metadata": {},
|
304 |
-
"source": [
|
305 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
|
306 |
-
]
|
307 |
-
},
|
308 |
-
{
|
309 |
-
"cell_type": "markdown",
|
310 |
-
"metadata": {},
|
311 |
-
"source": [
|
312 |
-
"## <span style='color:#2656a3'>🗄 Model Registry</span>"
|
313 |
-
]
|
314 |
-
},
|
315 |
-
{
|
316 |
-
"cell_type": "markdown",
|
317 |
-
"metadata": {},
|
318 |
-
"source": [
|
319 |
-
"---\n",
|
320 |
-
"\n",
|
321 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
|
322 |
-
"\n",
|
323 |
-
"In the next notebook you will use your registered model to predict batch data."
|
324 |
-
]
|
325 |
-
}
|
326 |
-
],
|
327 |
-
"metadata": {
|
328 |
-
"kernelspec": {
|
329 |
-
"display_name": "bds-mlops",
|
330 |
-
"language": "python",
|
331 |
-
"name": "python3"
|
332 |
-
},
|
333 |
-
"language_info": {
|
334 |
-
"codemirror_mode": {
|
335 |
-
"name": "ipython",
|
336 |
-
"version": 3
|
337 |
-
},
|
338 |
-
"file_extension": ".py",
|
339 |
-
"mimetype": "text/x-python",
|
340 |
-
"name": "python",
|
341 |
-
"nbconvert_exporter": "python",
|
342 |
-
"pygments_lexer": "ipython3",
|
343 |
-
"version": "3.11.8"
|
344 |
-
},
|
345 |
-
"orig_nbformat": 4
|
346 |
-
},
|
347 |
-
"nbformat": 4,
|
348 |
-
"nbformat_minor": 2
|
349 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/Old/4_batch_inference_OLD.ipynb
DELETED
@@ -1,80 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 04: Batch Inference</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"\n",
|
16 |
-
"1. Load batch data.\n",
|
17 |
-
"2. Predict using model from Model Registry."
|
18 |
-
]
|
19 |
-
},
|
20 |
-
{
|
21 |
-
"cell_type": "markdown",
|
22 |
-
"metadata": {},
|
23 |
-
"source": [
|
24 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages\n",
|
25 |
-
"\n",
|
26 |
-
"First, we'll install the Python packages required for this notebook. We'll use the --quiet command after specifying the names of the libraries to ensure a silent installation process. Then, we'll proceed to import all the necessary libraries."
|
27 |
-
]
|
28 |
-
},
|
29 |
-
{
|
30 |
-
"cell_type": "markdown",
|
31 |
-
"metadata": {},
|
32 |
-
"source": [
|
33 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
34 |
-
]
|
35 |
-
},
|
36 |
-
{
|
37 |
-
"cell_type": "markdown",
|
38 |
-
"metadata": {},
|
39 |
-
"source": [
|
40 |
-
"## <span style='color:#2656a3'> ⚙️ Feature View Retrieval"
|
41 |
-
]
|
42 |
-
},
|
43 |
-
{
|
44 |
-
"cell_type": "markdown",
|
45 |
-
"metadata": {},
|
46 |
-
"source": [
|
47 |
-
"## <span style='color:#2656a3'> 🗄 Model Registry"
|
48 |
-
]
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"cell_type": "markdown",
|
52 |
-
"metadata": {},
|
53 |
-
"source": [
|
54 |
-
"## <span style='color:#2656a3'> 📮 Retrieving model from Model Registry"
|
55 |
-
]
|
56 |
-
},
|
57 |
-
{
|
58 |
-
"cell_type": "markdown",
|
59 |
-
"metadata": {},
|
60 |
-
"source": [
|
61 |
-
"## <span style='color:#2656a3'> ✨ Load Batch Data"
|
62 |
-
]
|
63 |
-
},
|
64 |
-
{
|
65 |
-
"cell_type": "markdown",
|
66 |
-
"metadata": {},
|
67 |
-
"source": [
|
68 |
-
"## <span style='color:#2656a3'> 🤖 Making the predictions"
|
69 |
-
]
|
70 |
-
}
|
71 |
-
],
|
72 |
-
"metadata": {
|
73 |
-
"language_info": {
|
74 |
-
"name": "python"
|
75 |
-
},
|
76 |
-
"orig_nbformat": 4
|
77 |
-
},
|
78 |
-
"nbformat": 4,
|
79 |
-
"nbformat_minor": 2
|
80 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/Old/predict_example.py
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import numpy as np
|
3 |
-
import pandas as pd
|
4 |
-
import hsfs
|
5 |
-
import joblib
|
6 |
-
|
7 |
-
|
8 |
-
class Predict(object):
|
9 |
-
|
10 |
-
def __init__(self):
|
11 |
-
""" Initializes the serving state, reads a trained model"""
|
12 |
-
# get feature store handle
|
13 |
-
fs_conn = hsfs.connection()
|
14 |
-
self.fs = fs_conn.get_feature_store()
|
15 |
-
|
16 |
-
# get feature view
|
17 |
-
self.fv = self.fs.get_feature_view("electricity_feature_view", 1)
|
18 |
-
|
19 |
-
# initialize serving
|
20 |
-
self.fv.init_serving(1)
|
21 |
-
|
22 |
-
# load the trained model
|
23 |
-
self.model = joblib.load(os.environ["ARTIFACT_FILES_PATH"] + "/dk_electricity_model.pkl")
|
24 |
-
print("Initialization Complete")
|
25 |
-
|
26 |
-
|
27 |
-
def predict(self, timestamp_value, date_value):
|
28 |
-
""" Serves a prediction request usign a trained model"""
|
29 |
-
# Retrieve feature vectors
|
30 |
-
feature_vector = self.fv.get_feature_vector(
|
31 |
-
entry = {['timestamp','date']: [timestamp_value[0], date_value[0]]}
|
32 |
-
)
|
33 |
-
return self.model.predict(np.asarray(feature_vector[1:]).reshape(1, -1)).tolist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/notebooks_dev/3_training_pipeline_dev_prophet.ipynb
DELETED
@@ -1,943 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Feature selection.\n",
|
16 |
-
"2. Feature transformations.\n",
|
17 |
-
"3. Training datasets creation.\n",
|
18 |
-
"4. Loading the training data.\n",
|
19 |
-
"5. Train the model.\n",
|
20 |
-
"6. Register model to Hopsworks model registry."
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "markdown",
|
25 |
-
"metadata": {},
|
26 |
-
"source": [
|
27 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "code",
|
32 |
-
"execution_count": 1,
|
33 |
-
"metadata": {},
|
34 |
-
"outputs": [],
|
35 |
-
"source": [
|
36 |
-
"!pip install tensorflow --quiet"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": 2,
|
42 |
-
"metadata": {},
|
43 |
-
"outputs": [],
|
44 |
-
"source": [
|
45 |
-
"# Importing the packages for the needed libraries for the Jupyter notebook\n",
|
46 |
-
"import inspect \n",
|
47 |
-
"import datetime\n",
|
48 |
-
"\n",
|
49 |
-
"import pandas as pd\n",
|
50 |
-
"import numpy as np\n",
|
51 |
-
"import matplotlib.pyplot as plt\n",
|
52 |
-
"import tensorflow as tf\n",
|
53 |
-
"\n",
|
54 |
-
"#ignore warnings\n",
|
55 |
-
"import warnings\n",
|
56 |
-
"warnings.filterwarnings('ignore')"
|
57 |
-
]
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"cell_type": "markdown",
|
61 |
-
"metadata": {},
|
62 |
-
"source": [
|
63 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
64 |
-
]
|
65 |
-
},
|
66 |
-
{
|
67 |
-
"cell_type": "code",
|
68 |
-
"execution_count": 3,
|
69 |
-
"metadata": {},
|
70 |
-
"outputs": [
|
71 |
-
{
|
72 |
-
"name": "stdout",
|
73 |
-
"output_type": "stream",
|
74 |
-
"text": [
|
75 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
76 |
-
"\n",
|
77 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
|
78 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
79 |
-
]
|
80 |
-
}
|
81 |
-
],
|
82 |
-
"source": [
|
83 |
-
"# Importing the hopsworks module\n",
|
84 |
-
"import hopsworks\n",
|
85 |
-
"\n",
|
86 |
-
"# Logging in to the Hopsworks project\n",
|
87 |
-
"project = hopsworks.login()\n",
|
88 |
-
"\n",
|
89 |
-
"# Getting the feature store from the project\n",
|
90 |
-
"fs = project.get_feature_store() "
|
91 |
-
]
|
92 |
-
},
|
93 |
-
{
|
94 |
-
"cell_type": "code",
|
95 |
-
"execution_count": 4,
|
96 |
-
"metadata": {},
|
97 |
-
"outputs": [],
|
98 |
-
"source": [
|
99 |
-
"# Retrieve the feature groups\n",
|
100 |
-
"electricity_fg = fs.get_feature_group(\n",
|
101 |
-
" name='electricity_prices',\n",
|
102 |
-
" version=1,\n",
|
103 |
-
")\n",
|
104 |
-
"\n",
|
105 |
-
"weather_fg = fs.get_feature_group(\n",
|
106 |
-
" name='weather_measurements',\n",
|
107 |
-
" version=1,\n",
|
108 |
-
")\n",
|
109 |
-
"\n",
|
110 |
-
"danish_holidays_fg = fs.get_feature_group(\n",
|
111 |
-
" name='danish_holidayss',\n",
|
112 |
-
" version=1,\n",
|
113 |
-
")\n",
|
114 |
-
"forecast_renewable_energy_fg = fs.get_feature_group(\n",
|
115 |
-
" name='forecast_renewable_energy',\n",
|
116 |
-
" version=1\n",
|
117 |
-
")"
|
118 |
-
]
|
119 |
-
},
|
120 |
-
{
|
121 |
-
"cell_type": "markdown",
|
122 |
-
"metadata": {},
|
123 |
-
"source": [
|
124 |
-
"## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
|
125 |
-
"\n",
|
126 |
-
"We first select the features that we want to include for model training.\n",
|
127 |
-
"\n",
|
128 |
-
"Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
|
129 |
-
]
|
130 |
-
},
|
131 |
-
{
|
132 |
-
"cell_type": "markdown",
|
133 |
-
"metadata": {},
|
134 |
-
"source": [
|
135 |
-
"hmmm skal 'time' egentlig være 'date'???"
|
136 |
-
]
|
137 |
-
},
|
138 |
-
{
|
139 |
-
"cell_type": "code",
|
140 |
-
"execution_count": 5,
|
141 |
-
"metadata": {},
|
142 |
-
"outputs": [],
|
143 |
-
"source": [
|
144 |
-
"# Select features for training data\n",
|
145 |
-
"selected_features = electricity_fg.select_all()\\\n",
|
146 |
-
" .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
147 |
-
" .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
148 |
-
" .join(danish_holidays_fg.select_all())"
|
149 |
-
]
|
150 |
-
},
|
151 |
-
{
|
152 |
-
"cell_type": "code",
|
153 |
-
"execution_count": 6,
|
154 |
-
"metadata": {},
|
155 |
-
"outputs": [],
|
156 |
-
"source": [
|
157 |
-
"# Uncomment this if you would like to view your selected features\n",
|
158 |
-
"# selected_features.show(5)"
|
159 |
-
]
|
160 |
-
},
|
161 |
-
{
|
162 |
-
"cell_type": "markdown",
|
163 |
-
"metadata": {},
|
164 |
-
"source": [
|
165 |
-
"### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
|
166 |
-
"\n",
|
167 |
-
"We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
|
168 |
-
"To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
|
169 |
-
"\n",
|
170 |
-
"To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
|
171 |
-
]
|
172 |
-
},
|
173 |
-
{
|
174 |
-
"cell_type": "code",
|
175 |
-
"execution_count": 7,
|
176 |
-
"metadata": {},
|
177 |
-
"outputs": [],
|
178 |
-
"source": [
|
179 |
-
"# Defining transformation functions for feature scaling and encoding\n",
|
180 |
-
"transformation_functions = {\n",
|
181 |
-
" \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
182 |
-
" \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
183 |
-
" \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
184 |
-
" \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
185 |
-
" \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
186 |
-
" \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
187 |
-
" \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
188 |
-
" \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
189 |
-
" \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
190 |
-
" \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
191 |
-
" \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
192 |
-
" \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
193 |
-
" \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
194 |
-
" \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
|
195 |
-
" }"
|
196 |
-
]
|
197 |
-
},
|
198 |
-
{
|
199 |
-
"cell_type": "markdown",
|
200 |
-
"metadata": {},
|
201 |
-
"source": [
|
202 |
-
"`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
|
203 |
-
"\n",
|
204 |
-
"The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
|
205 |
-
"\n",
|
206 |
-
"In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
|
207 |
-
"\n",
|
208 |
-
"We can specify next parameters:\n",
|
209 |
-
"\n",
|
210 |
-
"- `name` - name of a feature group.\n",
|
211 |
-
"\n",
|
212 |
-
"- `version` - version of a feature group.\n",
|
213 |
-
"\n",
|
214 |
-
"- `labels`- our target variable.\n",
|
215 |
-
"\n",
|
216 |
-
"- `transformation_functions` - functions to transform our features.\n",
|
217 |
-
"\n",
|
218 |
-
"- `query` - query object with data."
|
219 |
-
]
|
220 |
-
},
|
221 |
-
{
|
222 |
-
"cell_type": "markdown",
|
223 |
-
"metadata": {},
|
224 |
-
"source": [
|
225 |
-
"ved ikke om den her omformulering af botten går an?"
|
226 |
-
]
|
227 |
-
},
|
228 |
-
{
|
229 |
-
"cell_type": "markdown",
|
230 |
-
"metadata": {},
|
231 |
-
"source": [
|
232 |
-
"`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
|
233 |
-
"\n",
|
234 |
-
"Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
|
235 |
-
"\n",
|
236 |
-
"To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
|
237 |
-
"\n",
|
238 |
-
"- `name`: The name of the feature group.\n",
|
239 |
-
"\n",
|
240 |
-
"- `version`: The version of the feature group.\n",
|
241 |
-
"\n",
|
242 |
-
"- `labels`: Our target variable.\n",
|
243 |
-
"\n",
|
244 |
-
"- `transformation_functions`: Functions to transform our features.\n",
|
245 |
-
"\n",
|
246 |
-
"- `query`: A query object containing the relevant data."
|
247 |
-
]
|
248 |
-
},
|
249 |
-
{
|
250 |
-
"cell_type": "code",
|
251 |
-
"execution_count": 8,
|
252 |
-
"metadata": {},
|
253 |
-
"outputs": [],
|
254 |
-
"source": [
|
255 |
-
"# Getting or creating a feature view named 'electricity_feature_view'\n",
|
256 |
-
"version = 1 # Defining the version for the feature view\n",
|
257 |
-
"feature_view = fs.get_or_create_feature_view(\n",
|
258 |
-
" name='electricity_feature_view',\n",
|
259 |
-
" version=version,\n",
|
260 |
-
" labels=[], # Labels will be defined manually later for our 'y'\n",
|
261 |
-
" transformation_functions=transformation_functions,\n",
|
262 |
-
" query=selected_features,\n",
|
263 |
-
")"
|
264 |
-
]
|
265 |
-
},
|
266 |
-
{
|
267 |
-
"cell_type": "markdown",
|
268 |
-
"metadata": {},
|
269 |
-
"source": [
|
270 |
-
"## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
|
271 |
-
"\n",
|
272 |
-
"In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
|
273 |
-
"\n",
|
274 |
-
"**Training Dataset may contain splits such as:** \n",
|
275 |
-
"* Training set - the subset of training data used to train a model.\n",
|
276 |
-
"* Validation set - the subset of training data used to evaluate hparams when training a model\n",
|
277 |
-
"* Test set - the holdout subset of training data used to evaluate a mode\n",
|
278 |
-
"\n",
|
279 |
-
"Training dataset is created using `fs.create_training_dataset()` method.\n",
|
280 |
-
"\n",
|
281 |
-
"**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
|
282 |
-
]
|
283 |
-
},
|
284 |
-
{
|
285 |
-
"cell_type": "markdown",
|
286 |
-
"metadata": {},
|
287 |
-
"source": [
|
288 |
-
"### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
|
289 |
-
]
|
290 |
-
},
|
291 |
-
{
|
292 |
-
"cell_type": "code",
|
293 |
-
"execution_count": 9,
|
294 |
-
"metadata": {},
|
295 |
-
"outputs": [
|
296 |
-
{
|
297 |
-
"name": "stdout",
|
298 |
-
"output_type": "stream",
|
299 |
-
"text": [
|
300 |
-
"Finished: Reading data from Hopsworks, using ArrowFlight (199.29s) \n"
|
301 |
-
]
|
302 |
-
},
|
303 |
-
{
|
304 |
-
"name": "stderr",
|
305 |
-
"output_type": "stream",
|
306 |
-
"text": [
|
307 |
-
"VersionWarning: Incremented version to `19`.\n"
|
308 |
-
]
|
309 |
-
}
|
310 |
-
],
|
311 |
-
"source": [
|
312 |
-
"# Splitting the feature view data into train, validation, and test sets\n",
|
313 |
-
"# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
|
314 |
-
"X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
|
315 |
-
" train_start=\"2022-01-01\",\n",
|
316 |
-
" train_end=\"2023-06-30\",\n",
|
317 |
-
" validation_start=\"2023-07-01\",\n",
|
318 |
-
" validation_end=\"2023-09-30\",\n",
|
319 |
-
" test_start=\"2023-10-01\",\n",
|
320 |
-
" test_end=\"2023-12-31\",\n",
|
321 |
-
" description='Electricity price prediction dataset',\n",
|
322 |
-
")"
|
323 |
-
]
|
324 |
-
},
|
325 |
-
{
|
326 |
-
"cell_type": "code",
|
327 |
-
"execution_count": 10,
|
328 |
-
"metadata": {},
|
329 |
-
"outputs": [],
|
330 |
-
"source": [
|
331 |
-
"# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
|
332 |
-
"X_train.sort_values([\"timestamp\"], inplace=True)\n",
|
333 |
-
"X_val.sort_values([\"timestamp\"], inplace=True)\n",
|
334 |
-
"X_test.sort_values([\"timestamp\"], inplace=True)"
|
335 |
-
]
|
336 |
-
},
|
337 |
-
{
|
338 |
-
"cell_type": "code",
|
339 |
-
"execution_count": 11,
|
340 |
-
"metadata": {},
|
341 |
-
"outputs": [],
|
342 |
-
"source": [
|
343 |
-
"# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
|
344 |
-
"y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
|
345 |
-
"y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
|
346 |
-
"y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
|
347 |
-
]
|
348 |
-
},
|
349 |
-
{
|
350 |
-
"cell_type": "code",
|
351 |
-
"execution_count": null,
|
352 |
-
"metadata": {},
|
353 |
-
"outputs": [],
|
354 |
-
"source": [
|
355 |
-
"# # Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
|
356 |
-
"# X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
357 |
-
"# X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
358 |
-
"# X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
|
359 |
-
]
|
360 |
-
},
|
361 |
-
{
|
362 |
-
"cell_type": "code",
|
363 |
-
"execution_count": null,
|
364 |
-
"metadata": {},
|
365 |
-
"outputs": [],
|
366 |
-
"source": [
|
367 |
-
"# # Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
|
368 |
-
"# X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
369 |
-
"# X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
370 |
-
"# X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
|
371 |
-
]
|
372 |
-
},
|
373 |
-
{
|
374 |
-
"cell_type": "code",
|
375 |
-
"execution_count": 12,
|
376 |
-
"metadata": {},
|
377 |
-
"outputs": [
|
378 |
-
{
|
379 |
-
"data": {
|
380 |
-
"text/html": [
|
381 |
-
"<div>\n",
|
382 |
-
"<style scoped>\n",
|
383 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
384 |
-
" vertical-align: middle;\n",
|
385 |
-
" }\n",
|
386 |
-
"\n",
|
387 |
-
" .dataframe tbody tr th {\n",
|
388 |
-
" vertical-align: top;\n",
|
389 |
-
" }\n",
|
390 |
-
"\n",
|
391 |
-
" .dataframe thead th {\n",
|
392 |
-
" text-align: right;\n",
|
393 |
-
" }\n",
|
394 |
-
"</style>\n",
|
395 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
396 |
-
" <thead>\n",
|
397 |
-
" <tr style=\"text-align: right;\">\n",
|
398 |
-
" <th></th>\n",
|
399 |
-
" <th>timestamp</th>\n",
|
400 |
-
" <th>time</th>\n",
|
401 |
-
" <th>date</th>\n",
|
402 |
-
" <th>dk1_spotpricedkk_kwh</th>\n",
|
403 |
-
" <th>temperature_2m</th>\n",
|
404 |
-
" <th>relative_humidity_2m</th>\n",
|
405 |
-
" <th>precipitation</th>\n",
|
406 |
-
" <th>rain</th>\n",
|
407 |
-
" <th>snowfall</th>\n",
|
408 |
-
" <th>weather_code</th>\n",
|
409 |
-
" <th>cloud_cover</th>\n",
|
410 |
-
" <th>wind_speed_10m</th>\n",
|
411 |
-
" <th>wind_gusts_10m</th>\n",
|
412 |
-
" <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
|
413 |
-
" <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
|
414 |
-
" <th>dk1_solar_forecastintraday_kwh</th>\n",
|
415 |
-
" <th>type</th>\n",
|
416 |
-
" </tr>\n",
|
417 |
-
" </thead>\n",
|
418 |
-
" <tbody>\n",
|
419 |
-
" <tr>\n",
|
420 |
-
" <th>5905751</th>\n",
|
421 |
-
" <td>1640995200000</td>\n",
|
422 |
-
" <td>2022-01-01 00:00:00+00:00</td>\n",
|
423 |
-
" <td>2022-01-01</td>\n",
|
424 |
-
" <td>0.179988</td>\n",
|
425 |
-
" <td>0.435268</td>\n",
|
426 |
-
" <td>0.986667</td>\n",
|
427 |
-
" <td>0.011364</td>\n",
|
428 |
-
" <td>0.011364</td>\n",
|
429 |
-
" <td>0.0</td>\n",
|
430 |
-
" <td>0.68</td>\n",
|
431 |
-
" <td>1.0</td>\n",
|
432 |
-
" <td>0.315152</td>\n",
|
433 |
-
" <td>0.272633</td>\n",
|
434 |
-
" <td>0.945277</td>\n",
|
435 |
-
" <td>0.481878</td>\n",
|
436 |
-
" <td>0.000000</td>\n",
|
437 |
-
" <td>1</td>\n",
|
438 |
-
" </tr>\n",
|
439 |
-
" <tr>\n",
|
440 |
-
" <th>19398</th>\n",
|
441 |
-
" <td>1640995200000</td>\n",
|
442 |
-
" <td>2022-01-01 00:00:00+00:00</td>\n",
|
443 |
-
" <td>2022-01-01</td>\n",
|
444 |
-
" <td>0.179988</td>\n",
|
445 |
-
" <td>0.435268</td>\n",
|
446 |
-
" <td>0.986667</td>\n",
|
447 |
-
" <td>0.011364</td>\n",
|
448 |
-
" <td>0.011364</td>\n",
|
449 |
-
" <td>0.0</td>\n",
|
450 |
-
" <td>0.68</td>\n",
|
451 |
-
" <td>1.0</td>\n",
|
452 |
-
" <td>0.315152</td>\n",
|
453 |
-
" <td>0.272633</td>\n",
|
454 |
-
" <td>0.934795</td>\n",
|
455 |
-
" <td>0.446702</td>\n",
|
456 |
-
" <td>0.000008</td>\n",
|
457 |
-
" <td>1</td>\n",
|
458 |
-
" </tr>\n",
|
459 |
-
" <tr>\n",
|
460 |
-
" <th>5919627</th>\n",
|
461 |
-
" <td>1640995200000</td>\n",
|
462 |
-
" <td>2022-01-01 00:00:00+00:00</td>\n",
|
463 |
-
" <td>2022-01-01</td>\n",
|
464 |
-
" <td>0.179988</td>\n",
|
465 |
-
" <td>0.417411</td>\n",
|
466 |
-
" <td>0.933333</td>\n",
|
467 |
-
" <td>0.000000</td>\n",
|
468 |
-
" <td>0.000000</td>\n",
|
469 |
-
" <td>0.0</td>\n",
|
470 |
-
" <td>0.04</td>\n",
|
471 |
-
" <td>1.0</td>\n",
|
472 |
-
" <td>0.082828</td>\n",
|
473 |
-
" <td>0.074922</td>\n",
|
474 |
-
" <td>0.773045</td>\n",
|
475 |
-
" <td>0.264375</td>\n",
|
476 |
-
" <td>0.000018</td>\n",
|
477 |
-
" <td>1</td>\n",
|
478 |
-
" </tr>\n",
|
479 |
-
" <tr>\n",
|
480 |
-
" <th>4719247</th>\n",
|
481 |
-
" <td>1640995200000</td>\n",
|
482 |
-
" <td>2022-01-01 00:00:00+00:00</td>\n",
|
483 |
-
" <td>2022-01-01</td>\n",
|
484 |
-
" <td>0.179988</td>\n",
|
485 |
-
" <td>0.426339</td>\n",
|
486 |
-
" <td>0.933333</td>\n",
|
487 |
-
" <td>0.000000</td>\n",
|
488 |
-
" <td>0.000000</td>\n",
|
489 |
-
" <td>0.0</td>\n",
|
490 |
-
" <td>0.04</td>\n",
|
491 |
-
" <td>1.0</td>\n",
|
492 |
-
" <td>0.195960</td>\n",
|
493 |
-
" <td>0.187305</td>\n",
|
494 |
-
" <td>0.913059</td>\n",
|
495 |
-
" <td>0.358547</td>\n",
|
496 |
-
" <td>0.000012</td>\n",
|
497 |
-
" <td>1</td>\n",
|
498 |
-
" </tr>\n",
|
499 |
-
" <tr>\n",
|
500 |
-
" <th>4743896</th>\n",
|
501 |
-
" <td>1640995200000</td>\n",
|
502 |
-
" <td>2022-01-01 00:00:00+00:00</td>\n",
|
503 |
-
" <td>2022-01-01</td>\n",
|
504 |
-
" <td>0.179988</td>\n",
|
505 |
-
" <td>0.417411</td>\n",
|
506 |
-
" <td>0.933333</td>\n",
|
507 |
-
" <td>0.000000</td>\n",
|
508 |
-
" <td>0.000000</td>\n",
|
509 |
-
" <td>0.0</td>\n",
|
510 |
-
" <td>0.04</td>\n",
|
511 |
-
" <td>1.0</td>\n",
|
512 |
-
" <td>0.082828</td>\n",
|
513 |
-
" <td>0.074922</td>\n",
|
514 |
-
" <td>0.493641</td>\n",
|
515 |
-
" <td>0.133456</td>\n",
|
516 |
-
" <td>0.005406</td>\n",
|
517 |
-
" <td>1</td>\n",
|
518 |
-
" </tr>\n",
|
519 |
-
" </tbody>\n",
|
520 |
-
"</table>\n",
|
521 |
-
"</div>"
|
522 |
-
],
|
523 |
-
"text/plain": [
|
524 |
-
" timestamp time date \\\n",
|
525 |
-
"5905751 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
|
526 |
-
"19398 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
|
527 |
-
"5919627 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
|
528 |
-
"4719247 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
|
529 |
-
"4743896 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
|
530 |
-
"\n",
|
531 |
-
" dk1_spotpricedkk_kwh temperature_2m relative_humidity_2m \\\n",
|
532 |
-
"5905751 0.179988 0.435268 0.986667 \n",
|
533 |
-
"19398 0.179988 0.435268 0.986667 \n",
|
534 |
-
"5919627 0.179988 0.417411 0.933333 \n",
|
535 |
-
"4719247 0.179988 0.426339 0.933333 \n",
|
536 |
-
"4743896 0.179988 0.417411 0.933333 \n",
|
537 |
-
"\n",
|
538 |
-
" precipitation rain snowfall weather_code cloud_cover \\\n",
|
539 |
-
"5905751 0.011364 0.011364 0.0 0.68 1.0 \n",
|
540 |
-
"19398 0.011364 0.011364 0.0 0.68 1.0 \n",
|
541 |
-
"5919627 0.000000 0.000000 0.0 0.04 1.0 \n",
|
542 |
-
"4719247 0.000000 0.000000 0.0 0.04 1.0 \n",
|
543 |
-
"4743896 0.000000 0.000000 0.0 0.04 1.0 \n",
|
544 |
-
"\n",
|
545 |
-
" wind_speed_10m wind_gusts_10m \\\n",
|
546 |
-
"5905751 0.315152 0.272633 \n",
|
547 |
-
"19398 0.315152 0.272633 \n",
|
548 |
-
"5919627 0.082828 0.074922 \n",
|
549 |
-
"4719247 0.195960 0.187305 \n",
|
550 |
-
"4743896 0.082828 0.074922 \n",
|
551 |
-
"\n",
|
552 |
-
" dk1_offshore_wind_forecastintraday_kwh \\\n",
|
553 |
-
"5905751 0.945277 \n",
|
554 |
-
"19398 0.934795 \n",
|
555 |
-
"5919627 0.773045 \n",
|
556 |
-
"4719247 0.913059 \n",
|
557 |
-
"4743896 0.493641 \n",
|
558 |
-
"\n",
|
559 |
-
" dk1_onshore_wind_forecastintraday_kwh \\\n",
|
560 |
-
"5905751 0.481878 \n",
|
561 |
-
"19398 0.446702 \n",
|
562 |
-
"5919627 0.264375 \n",
|
563 |
-
"4719247 0.358547 \n",
|
564 |
-
"4743896 0.133456 \n",
|
565 |
-
"\n",
|
566 |
-
" dk1_solar_forecastintraday_kwh type \n",
|
567 |
-
"5905751 0.000000 1 \n",
|
568 |
-
"19398 0.000008 1 \n",
|
569 |
-
"5919627 0.000018 1 \n",
|
570 |
-
"4719247 0.000012 1 \n",
|
571 |
-
"4743896 0.005406 1 "
|
572 |
-
]
|
573 |
-
},
|
574 |
-
"execution_count": 12,
|
575 |
-
"metadata": {},
|
576 |
-
"output_type": "execute_result"
|
577 |
-
}
|
578 |
-
],
|
579 |
-
"source": [
|
580 |
-
"# Displaying the first 5 rows of the train dataset (X_train)\n",
|
581 |
-
"X_train.head()"
|
582 |
-
]
|
583 |
-
},
|
584 |
-
{
|
585 |
-
"cell_type": "code",
|
586 |
-
"execution_count": 14,
|
587 |
-
"metadata": {},
|
588 |
-
"outputs": [
|
589 |
-
{
|
590 |
-
"data": {
|
591 |
-
"text/html": [
|
592 |
-
"<div>\n",
|
593 |
-
"<style scoped>\n",
|
594 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
595 |
-
" vertical-align: middle;\n",
|
596 |
-
" }\n",
|
597 |
-
"\n",
|
598 |
-
" .dataframe tbody tr th {\n",
|
599 |
-
" vertical-align: top;\n",
|
600 |
-
" }\n",
|
601 |
-
"\n",
|
602 |
-
" .dataframe thead th {\n",
|
603 |
-
" text-align: right;\n",
|
604 |
-
" }\n",
|
605 |
-
"</style>\n",
|
606 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
607 |
-
" <thead>\n",
|
608 |
-
" <tr style=\"text-align: right;\">\n",
|
609 |
-
" <th></th>\n",
|
610 |
-
" <th>date</th>\n",
|
611 |
-
" <th>dk1_spotpricedkk_kwh</th>\n",
|
612 |
-
" </tr>\n",
|
613 |
-
" </thead>\n",
|
614 |
-
" <tbody>\n",
|
615 |
-
" <tr>\n",
|
616 |
-
" <th>5905751</th>\n",
|
617 |
-
" <td>2022-01-01</td>\n",
|
618 |
-
" <td>0.179988</td>\n",
|
619 |
-
" </tr>\n",
|
620 |
-
" <tr>\n",
|
621 |
-
" <th>19398</th>\n",
|
622 |
-
" <td>2022-01-01</td>\n",
|
623 |
-
" <td>0.179988</td>\n",
|
624 |
-
" </tr>\n",
|
625 |
-
" <tr>\n",
|
626 |
-
" <th>5919627</th>\n",
|
627 |
-
" <td>2022-01-01</td>\n",
|
628 |
-
" <td>0.179988</td>\n",
|
629 |
-
" </tr>\n",
|
630 |
-
" <tr>\n",
|
631 |
-
" <th>4719247</th>\n",
|
632 |
-
" <td>2022-01-01</td>\n",
|
633 |
-
" <td>0.179988</td>\n",
|
634 |
-
" </tr>\n",
|
635 |
-
" <tr>\n",
|
636 |
-
" <th>4743896</th>\n",
|
637 |
-
" <td>2022-01-01</td>\n",
|
638 |
-
" <td>0.179988</td>\n",
|
639 |
-
" </tr>\n",
|
640 |
-
" </tbody>\n",
|
641 |
-
"</table>\n",
|
642 |
-
"</div>"
|
643 |
-
],
|
644 |
-
"text/plain": [
|
645 |
-
" date dk1_spotpricedkk_kwh\n",
|
646 |
-
"5905751 2022-01-01 0.179988\n",
|
647 |
-
"19398 2022-01-01 0.179988\n",
|
648 |
-
"5919627 2022-01-01 0.179988\n",
|
649 |
-
"4719247 2022-01-01 0.179988\n",
|
650 |
-
"4743896 2022-01-01 0.179988"
|
651 |
-
]
|
652 |
-
},
|
653 |
-
"execution_count": 14,
|
654 |
-
"metadata": {},
|
655 |
-
"output_type": "execute_result"
|
656 |
-
}
|
657 |
-
],
|
658 |
-
"source": [
|
659 |
-
"df = X_train[[\"date\", \"dk1_spotpricedkk_kwh\"]]"
|
660 |
-
]
|
661 |
-
},
|
662 |
-
{
|
663 |
-
"cell_type": "code",
|
664 |
-
"execution_count": 25,
|
665 |
-
"metadata": {},
|
666 |
-
"outputs": [
|
667 |
-
{
|
668 |
-
"data": {
|
669 |
-
"text/html": [
|
670 |
-
"<div>\n",
|
671 |
-
"<style scoped>\n",
|
672 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
673 |
-
" vertical-align: middle;\n",
|
674 |
-
" }\n",
|
675 |
-
"\n",
|
676 |
-
" .dataframe tbody tr th {\n",
|
677 |
-
" vertical-align: top;\n",
|
678 |
-
" }\n",
|
679 |
-
"\n",
|
680 |
-
" .dataframe thead th {\n",
|
681 |
-
" text-align: right;\n",
|
682 |
-
" }\n",
|
683 |
-
"</style>\n",
|
684 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
685 |
-
" <thead>\n",
|
686 |
-
" <tr style=\"text-align: right;\">\n",
|
687 |
-
" <th></th>\n",
|
688 |
-
" <th>ds</th>\n",
|
689 |
-
" <th>y</th>\n",
|
690 |
-
" </tr>\n",
|
691 |
-
" </thead>\n",
|
692 |
-
" <tbody>\n",
|
693 |
-
" <tr>\n",
|
694 |
-
" <th>5905751</th>\n",
|
695 |
-
" <td>2022-01-01</td>\n",
|
696 |
-
" <td>0.179988</td>\n",
|
697 |
-
" </tr>\n",
|
698 |
-
" <tr>\n",
|
699 |
-
" <th>19398</th>\n",
|
700 |
-
" <td>2022-01-01</td>\n",
|
701 |
-
" <td>0.179988</td>\n",
|
702 |
-
" </tr>\n",
|
703 |
-
" <tr>\n",
|
704 |
-
" <th>5919627</th>\n",
|
705 |
-
" <td>2022-01-01</td>\n",
|
706 |
-
" <td>0.179988</td>\n",
|
707 |
-
" </tr>\n",
|
708 |
-
" <tr>\n",
|
709 |
-
" <th>4719247</th>\n",
|
710 |
-
" <td>2022-01-01</td>\n",
|
711 |
-
" <td>0.179988</td>\n",
|
712 |
-
" </tr>\n",
|
713 |
-
" <tr>\n",
|
714 |
-
" <th>4743896</th>\n",
|
715 |
-
" <td>2022-01-01</td>\n",
|
716 |
-
" <td>0.179988</td>\n",
|
717 |
-
" </tr>\n",
|
718 |
-
" </tbody>\n",
|
719 |
-
"</table>\n",
|
720 |
-
"</div>"
|
721 |
-
],
|
722 |
-
"text/plain": [
|
723 |
-
" ds y\n",
|
724 |
-
"5905751 2022-01-01 0.179988\n",
|
725 |
-
"19398 2022-01-01 0.179988\n",
|
726 |
-
"5919627 2022-01-01 0.179988\n",
|
727 |
-
"4719247 2022-01-01 0.179988\n",
|
728 |
-
"4743896 2022-01-01 0.179988"
|
729 |
-
]
|
730 |
-
},
|
731 |
-
"execution_count": 25,
|
732 |
-
"metadata": {},
|
733 |
-
"output_type": "execute_result"
|
734 |
-
}
|
735 |
-
],
|
736 |
-
"source": [
|
737 |
-
"df.columns = [\"ds\", \"y\"]\n",
|
738 |
-
"df.head()"
|
739 |
-
]
|
740 |
-
},
|
741 |
-
{
|
742 |
-
"cell_type": "markdown",
|
743 |
-
"metadata": {},
|
744 |
-
"source": [
|
745 |
-
"## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
|
746 |
-
]
|
747 |
-
},
|
748 |
-
{
|
749 |
-
"cell_type": "markdown",
|
750 |
-
"metadata": {},
|
751 |
-
"source": [
|
752 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling Testing</span>"
|
753 |
-
]
|
754 |
-
},
|
755 |
-
{
|
756 |
-
"cell_type": "code",
|
757 |
-
"execution_count": 22,
|
758 |
-
"metadata": {},
|
759 |
-
"outputs": [],
|
760 |
-
"source": [
|
761 |
-
"from prophet import Prophet"
|
762 |
-
]
|
763 |
-
},
|
764 |
-
{
|
765 |
-
"cell_type": "code",
|
766 |
-
"execution_count": 26,
|
767 |
-
"metadata": {},
|
768 |
-
"outputs": [
|
769 |
-
{
|
770 |
-
"name": "stderr",
|
771 |
-
"output_type": "stream",
|
772 |
-
"text": [
|
773 |
-
"14:24:30 - cmdstanpy - INFO - Chain [1] start processing\n"
|
774 |
-
]
|
775 |
-
}
|
776 |
-
],
|
777 |
-
"source": [
|
778 |
-
"m = Prophet(interval_width=0.95, daily_seasonality=True)\n",
|
779 |
-
"model = m.fit(df)"
|
780 |
-
]
|
781 |
-
},
|
782 |
-
{
|
783 |
-
"cell_type": "code",
|
784 |
-
"execution_count": null,
|
785 |
-
"metadata": {},
|
786 |
-
"outputs": [],
|
787 |
-
"source": [
|
788 |
-
"future = m.make_future_dataframe(periods=100,freq='D')\n",
|
789 |
-
"forecast = m.predict(future)\n",
|
790 |
-
"forecast.head()"
|
791 |
-
]
|
792 |
-
},
|
793 |
-
{
|
794 |
-
"cell_type": "code",
|
795 |
-
"execution_count": null,
|
796 |
-
"metadata": {},
|
797 |
-
"outputs": [],
|
798 |
-
"source": [
|
799 |
-
"plot1 = m.plot(forecast)\n"
|
800 |
-
]
|
801 |
-
},
|
802 |
-
{
|
803 |
-
"cell_type": "markdown",
|
804 |
-
"metadata": {},
|
805 |
-
"source": [
|
806 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
|
807 |
-
]
|
808 |
-
},
|
809 |
-
{
|
810 |
-
"cell_type": "code",
|
811 |
-
"execution_count": null,
|
812 |
-
"metadata": {},
|
813 |
-
"outputs": [],
|
814 |
-
"source": [
|
815 |
-
"# import pandas as pd\n",
|
816 |
-
"# import numpy as np\n",
|
817 |
-
"# import xgboost as xgb\n",
|
818 |
-
"# from sklearn.metrics import mean_squared_error\n",
|
819 |
-
"# import os"
|
820 |
-
]
|
821 |
-
},
|
822 |
-
{
|
823 |
-
"cell_type": "code",
|
824 |
-
"execution_count": null,
|
825 |
-
"metadata": {},
|
826 |
-
"outputs": [],
|
827 |
-
"source": [
|
828 |
-
"# # Initialize the XGBoost regressor\n",
|
829 |
-
"# model = xgb.XGBRegressor()\n",
|
830 |
-
"# model_val = xgb.XGBRegressor()"
|
831 |
-
]
|
832 |
-
},
|
833 |
-
{
|
834 |
-
"cell_type": "code",
|
835 |
-
"execution_count": null,
|
836 |
-
"metadata": {},
|
837 |
-
"outputs": [],
|
838 |
-
"source": [
|
839 |
-
"# # Train the model on the training data\n",
|
840 |
-
"# model.fit(X_train, y_train)"
|
841 |
-
]
|
842 |
-
},
|
843 |
-
{
|
844 |
-
"cell_type": "code",
|
845 |
-
"execution_count": null,
|
846 |
-
"metadata": {},
|
847 |
-
"outputs": [],
|
848 |
-
"source": [
|
849 |
-
"# # Make predictions on the validation set\n",
|
850 |
-
"# y_test_pred = model.predict(X_test)"
|
851 |
-
]
|
852 |
-
},
|
853 |
-
{
|
854 |
-
"cell_type": "code",
|
855 |
-
"execution_count": null,
|
856 |
-
"metadata": {},
|
857 |
-
"outputs": [],
|
858 |
-
"source": [
|
859 |
-
"# # Calculate RMSE on the validation set\n",
|
860 |
-
"# mse = mean_squared_error(y_test, y_test_pred, squared=False)\n",
|
861 |
-
"# print(f\"Mean Squared Error (MSE): {mse}\")"
|
862 |
-
]
|
863 |
-
},
|
864 |
-
{
|
865 |
-
"cell_type": "markdown",
|
866 |
-
"metadata": {},
|
867 |
-
"source": [
|
868 |
-
"## <span style='color:#2656a3'>🗄 Model Registry</span>"
|
869 |
-
]
|
870 |
-
},
|
871 |
-
{
|
872 |
-
"cell_type": "code",
|
873 |
-
"execution_count": null,
|
874 |
-
"metadata": {},
|
875 |
-
"outputs": [],
|
876 |
-
"source": [
|
877 |
-
"# Exporting the trained model to a directory\n",
|
878 |
-
"model_dir = \"electricity_price_model\"\n",
|
879 |
-
"print('Exporting trained model to: {}'.format(model_dir))\n",
|
880 |
-
"\n",
|
881 |
-
"# Saving the model using TensorFlow's saved_model.save function\n",
|
882 |
-
"tf.saved_model.save(model, model_dir)"
|
883 |
-
]
|
884 |
-
},
|
885 |
-
{
|
886 |
-
"cell_type": "code",
|
887 |
-
"execution_count": null,
|
888 |
-
"metadata": {},
|
889 |
-
"outputs": [],
|
890 |
-
"source": [
|
891 |
-
"# Retrieving the Model Registry\n",
|
892 |
-
"mr = project.get_model_registry()\n",
|
893 |
-
"\n",
|
894 |
-
"# Extracting loss value from the training history\n",
|
895 |
-
"metrics = {'loss': history_dict['val_loss'][0]} \n",
|
896 |
-
"\n",
|
897 |
-
"# Creating a TensorFlow model in the Model Registry\n",
|
898 |
-
"tf_model = mr.tensorflow.create_model(\n",
|
899 |
-
" name=\"DK_electricity_price_prediction_model\",\n",
|
900 |
-
" metrics=metrics,\n",
|
901 |
-
" description=\"Hourly electricity price prediction model.\",\n",
|
902 |
-
" input_example=n_step_window.example[0].numpy(),\n",
|
903 |
-
")\n",
|
904 |
-
"\n",
|
905 |
-
"# Saving the model to the specified directory\n",
|
906 |
-
"tf_model.save(model_dir)"
|
907 |
-
]
|
908 |
-
},
|
909 |
-
{
|
910 |
-
"cell_type": "markdown",
|
911 |
-
"metadata": {},
|
912 |
-
"source": [
|
913 |
-
"---\n",
|
914 |
-
"\n",
|
915 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
|
916 |
-
"\n",
|
917 |
-
"In the next notebook you will use your registered model to predict batch data."
|
918 |
-
]
|
919 |
-
}
|
920 |
-
],
|
921 |
-
"metadata": {
|
922 |
-
"kernelspec": {
|
923 |
-
"display_name": "bds-mlops",
|
924 |
-
"language": "python",
|
925 |
-
"name": "python3"
|
926 |
-
},
|
927 |
-
"language_info": {
|
928 |
-
"codemirror_mode": {
|
929 |
-
"name": "ipython",
|
930 |
-
"version": 3
|
931 |
-
},
|
932 |
-
"file_extension": ".py",
|
933 |
-
"mimetype": "text/x-python",
|
934 |
-
"name": "python",
|
935 |
-
"nbconvert_exporter": "python",
|
936 |
-
"pygments_lexer": "ipython3",
|
937 |
-
"version": "3.11.9"
|
938 |
-
},
|
939 |
-
"orig_nbformat": 4
|
940 |
-
},
|
941 |
-
"nbformat": 4,
|
942 |
-
"nbformat_minor": 2
|
943 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/notebooks_dev/3_training_pipeline_dev_pytorch.ipynb
DELETED
@@ -1,874 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Feature selection.\n",
|
16 |
-
"2. Feature transformations.\n",
|
17 |
-
"3. Training datasets creation.\n",
|
18 |
-
"4. Loading the training data.\n",
|
19 |
-
"5. Train the model.\n",
|
20 |
-
"6. Register model to Hopsworks model registry."
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "markdown",
|
25 |
-
"metadata": {},
|
26 |
-
"source": [
|
27 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "code",
|
32 |
-
"execution_count": 1,
|
33 |
-
"metadata": {},
|
34 |
-
"outputs": [],
|
35 |
-
"source": [
|
36 |
-
"!pip install tensorflow --quiet"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": 22,
|
42 |
-
"metadata": {},
|
43 |
-
"outputs": [],
|
44 |
-
"source": [
|
45 |
-
"# Importing the packages for the needed libraries for the Jupyter notebook\n",
|
46 |
-
"import inspect \n",
|
47 |
-
"import datetime\n",
|
48 |
-
"\n",
|
49 |
-
"import pandas as pd\n",
|
50 |
-
"import numpy as np\n",
|
51 |
-
"import matplotlib.pyplot as plt\n",
|
52 |
-
"import torch\n",
|
53 |
-
"import torch.nn as nn\n",
|
54 |
-
"\n",
|
55 |
-
"#ignore warnings\n",
|
56 |
-
"import warnings\n",
|
57 |
-
"warnings.filterwarnings('ignore')"
|
58 |
-
]
|
59 |
-
},
|
60 |
-
{
|
61 |
-
"cell_type": "code",
|
62 |
-
"execution_count": 2,
|
63 |
-
"metadata": {},
|
64 |
-
"outputs": [
|
65 |
-
{
|
66 |
-
"data": {
|
67 |
-
"text/plain": [
|
68 |
-
"'cuda:0'"
|
69 |
-
]
|
70 |
-
},
|
71 |
-
"execution_count": 2,
|
72 |
-
"metadata": {},
|
73 |
-
"output_type": "execute_result"
|
74 |
-
}
|
75 |
-
],
|
76 |
-
"source": [
|
77 |
-
"device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
|
78 |
-
"device"
|
79 |
-
]
|
80 |
-
},
|
81 |
-
{
|
82 |
-
"cell_type": "markdown",
|
83 |
-
"metadata": {},
|
84 |
-
"source": [
|
85 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
86 |
-
]
|
87 |
-
},
|
88 |
-
{
|
89 |
-
"cell_type": "code",
|
90 |
-
"execution_count": 3,
|
91 |
-
"metadata": {},
|
92 |
-
"outputs": [
|
93 |
-
{
|
94 |
-
"name": "stdout",
|
95 |
-
"output_type": "stream",
|
96 |
-
"text": [
|
97 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
98 |
-
"\n",
|
99 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
|
100 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
101 |
-
]
|
102 |
-
}
|
103 |
-
],
|
104 |
-
"source": [
|
105 |
-
"# Importing the hopsworks module\n",
|
106 |
-
"import hopsworks\n",
|
107 |
-
"\n",
|
108 |
-
"# Logging in to the Hopsworks project\n",
|
109 |
-
"project = hopsworks.login()\n",
|
110 |
-
"\n",
|
111 |
-
"# Getting the feature store from the project\n",
|
112 |
-
"fs = project.get_feature_store() "
|
113 |
-
]
|
114 |
-
},
|
115 |
-
{
|
116 |
-
"cell_type": "code",
|
117 |
-
"execution_count": 4,
|
118 |
-
"metadata": {},
|
119 |
-
"outputs": [],
|
120 |
-
"source": [
|
121 |
-
"# Retrieve the feature groups\n",
|
122 |
-
"electricity_fg = fs.get_feature_group(\n",
|
123 |
-
" name='electricity_prices',\n",
|
124 |
-
" version=1,\n",
|
125 |
-
")\n",
|
126 |
-
"\n",
|
127 |
-
"weather_fg = fs.get_feature_group(\n",
|
128 |
-
" name='weather_measurements',\n",
|
129 |
-
" version=1,\n",
|
130 |
-
")\n",
|
131 |
-
"\n",
|
132 |
-
"danish_holidays_fg = fs.get_feature_group(\n",
|
133 |
-
" name='danish_holidayss',\n",
|
134 |
-
" version=1,\n",
|
135 |
-
")\n",
|
136 |
-
"forecast_renewable_energy_fg = fs.get_feature_group(\n",
|
137 |
-
" name='forecast_renewable_energy',\n",
|
138 |
-
" version=1\n",
|
139 |
-
")"
|
140 |
-
]
|
141 |
-
},
|
142 |
-
{
|
143 |
-
"cell_type": "markdown",
|
144 |
-
"metadata": {},
|
145 |
-
"source": [
|
146 |
-
"## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
|
147 |
-
"\n",
|
148 |
-
"We first select the features that we want to include for model training.\n",
|
149 |
-
"\n",
|
150 |
-
"Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
|
151 |
-
]
|
152 |
-
},
|
153 |
-
{
|
154 |
-
"cell_type": "markdown",
|
155 |
-
"metadata": {},
|
156 |
-
"source": [
|
157 |
-
"hmmm skal 'time' egentlig være 'date'???"
|
158 |
-
]
|
159 |
-
},
|
160 |
-
{
|
161 |
-
"cell_type": "code",
|
162 |
-
"execution_count": 5,
|
163 |
-
"metadata": {},
|
164 |
-
"outputs": [],
|
165 |
-
"source": [
|
166 |
-
"# Select features for training data\n",
|
167 |
-
"selected_features = electricity_fg.select_all()\\\n",
|
168 |
-
" .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
169 |
-
" .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
170 |
-
" .join(danish_holidays_fg.select_all())"
|
171 |
-
]
|
172 |
-
},
|
173 |
-
{
|
174 |
-
"cell_type": "code",
|
175 |
-
"execution_count": 7,
|
176 |
-
"metadata": {},
|
177 |
-
"outputs": [],
|
178 |
-
"source": [
|
179 |
-
"# Uncomment this if you would like to view your selected features\n",
|
180 |
-
"# selected_features.show(5)"
|
181 |
-
]
|
182 |
-
},
|
183 |
-
{
|
184 |
-
"cell_type": "markdown",
|
185 |
-
"metadata": {},
|
186 |
-
"source": [
|
187 |
-
"### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
|
188 |
-
"\n",
|
189 |
-
"We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
|
190 |
-
"To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
|
191 |
-
"\n",
|
192 |
-
"To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
|
193 |
-
]
|
194 |
-
},
|
195 |
-
{
|
196 |
-
"cell_type": "code",
|
197 |
-
"execution_count": 6,
|
198 |
-
"metadata": {},
|
199 |
-
"outputs": [],
|
200 |
-
"source": [
|
201 |
-
"# Defining transformation functions for feature scaling and encoding\n",
|
202 |
-
"transformation_functions = {\n",
|
203 |
-
" \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
204 |
-
" \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
205 |
-
" \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
206 |
-
" \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
207 |
-
" \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
208 |
-
" \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
209 |
-
" \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
210 |
-
" \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
211 |
-
" \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
212 |
-
" \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
213 |
-
" \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
214 |
-
" \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
215 |
-
" \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
216 |
-
" \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
|
217 |
-
" }"
|
218 |
-
]
|
219 |
-
},
|
220 |
-
{
|
221 |
-
"cell_type": "markdown",
|
222 |
-
"metadata": {},
|
223 |
-
"source": [
|
224 |
-
"`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
|
225 |
-
"\n",
|
226 |
-
"The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
|
227 |
-
"\n",
|
228 |
-
"In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
|
229 |
-
"\n",
|
230 |
-
"We can specify next parameters:\n",
|
231 |
-
"\n",
|
232 |
-
"- `name` - name of a feature group.\n",
|
233 |
-
"\n",
|
234 |
-
"- `version` - version of a feature group.\n",
|
235 |
-
"\n",
|
236 |
-
"- `labels`- our target variable.\n",
|
237 |
-
"\n",
|
238 |
-
"- `transformation_functions` - functions to transform our features.\n",
|
239 |
-
"\n",
|
240 |
-
"- `query` - query object with data."
|
241 |
-
]
|
242 |
-
},
|
243 |
-
{
|
244 |
-
"cell_type": "markdown",
|
245 |
-
"metadata": {},
|
246 |
-
"source": [
|
247 |
-
"ved ikke om den her omformulering af botten går an?"
|
248 |
-
]
|
249 |
-
},
|
250 |
-
{
|
251 |
-
"cell_type": "markdown",
|
252 |
-
"metadata": {},
|
253 |
-
"source": [
|
254 |
-
"`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
|
255 |
-
"\n",
|
256 |
-
"Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
|
257 |
-
"\n",
|
258 |
-
"To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
|
259 |
-
"\n",
|
260 |
-
"- `name`: The name of the feature group.\n",
|
261 |
-
"\n",
|
262 |
-
"- `version`: The version of the feature group.\n",
|
263 |
-
"\n",
|
264 |
-
"- `labels`: Our target variable.\n",
|
265 |
-
"\n",
|
266 |
-
"- `transformation_functions`: Functions to transform our features.\n",
|
267 |
-
"\n",
|
268 |
-
"- `query`: A query object containing the relevant data."
|
269 |
-
]
|
270 |
-
},
|
271 |
-
{
|
272 |
-
"cell_type": "code",
|
273 |
-
"execution_count": 7,
|
274 |
-
"metadata": {},
|
275 |
-
"outputs": [],
|
276 |
-
"source": [
|
277 |
-
"# Getting or creating a feature view named 'electricity_feature_view'\n",
|
278 |
-
"version = 1 # Defining the version for the feature view\n",
|
279 |
-
"feature_view = fs.get_or_create_feature_view(\n",
|
280 |
-
" name='electricity_feature_view',\n",
|
281 |
-
" version=version,\n",
|
282 |
-
" labels=[], # Labels will be defined manually later for our 'y'\n",
|
283 |
-
" transformation_functions=transformation_functions,\n",
|
284 |
-
" query=selected_features,\n",
|
285 |
-
")"
|
286 |
-
]
|
287 |
-
},
|
288 |
-
{
|
289 |
-
"cell_type": "markdown",
|
290 |
-
"metadata": {},
|
291 |
-
"source": [
|
292 |
-
"## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
|
293 |
-
"\n",
|
294 |
-
"In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
|
295 |
-
"\n",
|
296 |
-
"**Training Dataset may contain splits such as:** \n",
|
297 |
-
"* Training set - the subset of training data used to train a model.\n",
|
298 |
-
"* Validation set - the subset of training data used to evaluate hparams when training a model\n",
|
299 |
-
"* Test set - the holdout subset of training data used to evaluate a mode\n",
|
300 |
-
"\n",
|
301 |
-
"Training dataset is created using `fs.create_training_dataset()` method.\n",
|
302 |
-
"\n",
|
303 |
-
"**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
|
304 |
-
]
|
305 |
-
},
|
306 |
-
{
|
307 |
-
"cell_type": "markdown",
|
308 |
-
"metadata": {},
|
309 |
-
"source": [
|
310 |
-
"### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
|
311 |
-
]
|
312 |
-
},
|
313 |
-
{
|
314 |
-
"cell_type": "code",
|
315 |
-
"execution_count": 8,
|
316 |
-
"metadata": {},
|
317 |
-
"outputs": [
|
318 |
-
{
|
319 |
-
"name": "stdout",
|
320 |
-
"output_type": "stream",
|
321 |
-
"text": [
|
322 |
-
"Finished: Reading data from Hopsworks, using ArrowFlight (211.16s) \n"
|
323 |
-
]
|
324 |
-
},
|
325 |
-
{
|
326 |
-
"name": "stderr",
|
327 |
-
"output_type": "stream",
|
328 |
-
"text": [
|
329 |
-
"VersionWarning: Incremented version to `21`.\n"
|
330 |
-
]
|
331 |
-
}
|
332 |
-
],
|
333 |
-
"source": [
|
334 |
-
"# Splitting the feature view data into train, validation, and test sets\n",
|
335 |
-
"# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
|
336 |
-
"X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
|
337 |
-
" train_start=\"2022-01-01\",\n",
|
338 |
-
" train_end=\"2023-06-30\",\n",
|
339 |
-
" validation_start=\"2023-07-01\",\n",
|
340 |
-
" validation_end=\"2023-09-30\",\n",
|
341 |
-
" test_start=\"2023-10-01\",\n",
|
342 |
-
" test_end=\"2023-12-31\",\n",
|
343 |
-
" description='Electricity price prediction dataset',\n",
|
344 |
-
")"
|
345 |
-
]
|
346 |
-
},
|
347 |
-
{
|
348 |
-
"cell_type": "code",
|
349 |
-
"execution_count": 9,
|
350 |
-
"metadata": {},
|
351 |
-
"outputs": [],
|
352 |
-
"source": [
|
353 |
-
"# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
|
354 |
-
"X_train.sort_values([\"timestamp\"], inplace=True)\n",
|
355 |
-
"X_val.sort_values([\"timestamp\"], inplace=True)\n",
|
356 |
-
"X_test.sort_values([\"timestamp\"], inplace=True)"
|
357 |
-
]
|
358 |
-
},
|
359 |
-
{
|
360 |
-
"cell_type": "code",
|
361 |
-
"execution_count": 10,
|
362 |
-
"metadata": {},
|
363 |
-
"outputs": [],
|
364 |
-
"source": [
|
365 |
-
"# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
|
366 |
-
"y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
|
367 |
-
"y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
|
368 |
-
"y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
|
369 |
-
]
|
370 |
-
},
|
371 |
-
{
|
372 |
-
"cell_type": "code",
|
373 |
-
"execution_count": 11,
|
374 |
-
"metadata": {},
|
375 |
-
"outputs": [],
|
376 |
-
"source": [
|
377 |
-
"# Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
|
378 |
-
"X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
379 |
-
"X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
380 |
-
"X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
|
381 |
-
]
|
382 |
-
},
|
383 |
-
{
|
384 |
-
"cell_type": "code",
|
385 |
-
"execution_count": 12,
|
386 |
-
"metadata": {},
|
387 |
-
"outputs": [],
|
388 |
-
"source": [
|
389 |
-
"# Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
|
390 |
-
"X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
391 |
-
"X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
392 |
-
"X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
|
393 |
-
]
|
394 |
-
},
|
395 |
-
{
|
396 |
-
"cell_type": "code",
|
397 |
-
"execution_count": 13,
|
398 |
-
"metadata": {},
|
399 |
-
"outputs": [
|
400 |
-
{
|
401 |
-
"data": {
|
402 |
-
"text/html": [
|
403 |
-
"<div>\n",
|
404 |
-
"<style scoped>\n",
|
405 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
406 |
-
" vertical-align: middle;\n",
|
407 |
-
" }\n",
|
408 |
-
"\n",
|
409 |
-
" .dataframe tbody tr th {\n",
|
410 |
-
" vertical-align: top;\n",
|
411 |
-
" }\n",
|
412 |
-
"\n",
|
413 |
-
" .dataframe thead th {\n",
|
414 |
-
" text-align: right;\n",
|
415 |
-
" }\n",
|
416 |
-
"</style>\n",
|
417 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
418 |
-
" <thead>\n",
|
419 |
-
" <tr style=\"text-align: right;\">\n",
|
420 |
-
" <th></th>\n",
|
421 |
-
" <th>temperature_2m</th>\n",
|
422 |
-
" <th>relative_humidity_2m</th>\n",
|
423 |
-
" <th>precipitation</th>\n",
|
424 |
-
" <th>rain</th>\n",
|
425 |
-
" <th>snowfall</th>\n",
|
426 |
-
" <th>weather_code</th>\n",
|
427 |
-
" <th>cloud_cover</th>\n",
|
428 |
-
" <th>wind_speed_10m</th>\n",
|
429 |
-
" <th>wind_gusts_10m</th>\n",
|
430 |
-
" <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
|
431 |
-
" <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
|
432 |
-
" <th>dk1_solar_forecastintraday_kwh</th>\n",
|
433 |
-
" <th>type</th>\n",
|
434 |
-
" </tr>\n",
|
435 |
-
" </thead>\n",
|
436 |
-
" <tbody>\n",
|
437 |
-
" <tr>\n",
|
438 |
-
" <th>5905751</th>\n",
|
439 |
-
" <td>0.435268</td>\n",
|
440 |
-
" <td>0.986667</td>\n",
|
441 |
-
" <td>0.011364</td>\n",
|
442 |
-
" <td>0.011364</td>\n",
|
443 |
-
" <td>0.0</td>\n",
|
444 |
-
" <td>0.68</td>\n",
|
445 |
-
" <td>1.0</td>\n",
|
446 |
-
" <td>0.315152</td>\n",
|
447 |
-
" <td>0.272633</td>\n",
|
448 |
-
" <td>0.945277</td>\n",
|
449 |
-
" <td>0.481878</td>\n",
|
450 |
-
" <td>0.000000</td>\n",
|
451 |
-
" <td>1</td>\n",
|
452 |
-
" </tr>\n",
|
453 |
-
" <tr>\n",
|
454 |
-
" <th>19398</th>\n",
|
455 |
-
" <td>0.435268</td>\n",
|
456 |
-
" <td>0.986667</td>\n",
|
457 |
-
" <td>0.011364</td>\n",
|
458 |
-
" <td>0.011364</td>\n",
|
459 |
-
" <td>0.0</td>\n",
|
460 |
-
" <td>0.68</td>\n",
|
461 |
-
" <td>1.0</td>\n",
|
462 |
-
" <td>0.315152</td>\n",
|
463 |
-
" <td>0.272633</td>\n",
|
464 |
-
" <td>0.934795</td>\n",
|
465 |
-
" <td>0.446702</td>\n",
|
466 |
-
" <td>0.000008</td>\n",
|
467 |
-
" <td>1</td>\n",
|
468 |
-
" </tr>\n",
|
469 |
-
" <tr>\n",
|
470 |
-
" <th>5919627</th>\n",
|
471 |
-
" <td>0.417411</td>\n",
|
472 |
-
" <td>0.933333</td>\n",
|
473 |
-
" <td>0.000000</td>\n",
|
474 |
-
" <td>0.000000</td>\n",
|
475 |
-
" <td>0.0</td>\n",
|
476 |
-
" <td>0.04</td>\n",
|
477 |
-
" <td>1.0</td>\n",
|
478 |
-
" <td>0.082828</td>\n",
|
479 |
-
" <td>0.074922</td>\n",
|
480 |
-
" <td>0.773045</td>\n",
|
481 |
-
" <td>0.264375</td>\n",
|
482 |
-
" <td>0.000018</td>\n",
|
483 |
-
" <td>1</td>\n",
|
484 |
-
" </tr>\n",
|
485 |
-
" <tr>\n",
|
486 |
-
" <th>4719247</th>\n",
|
487 |
-
" <td>0.426339</td>\n",
|
488 |
-
" <td>0.933333</td>\n",
|
489 |
-
" <td>0.000000</td>\n",
|
490 |
-
" <td>0.000000</td>\n",
|
491 |
-
" <td>0.0</td>\n",
|
492 |
-
" <td>0.04</td>\n",
|
493 |
-
" <td>1.0</td>\n",
|
494 |
-
" <td>0.195960</td>\n",
|
495 |
-
" <td>0.187305</td>\n",
|
496 |
-
" <td>0.913059</td>\n",
|
497 |
-
" <td>0.358547</td>\n",
|
498 |
-
" <td>0.000012</td>\n",
|
499 |
-
" <td>1</td>\n",
|
500 |
-
" </tr>\n",
|
501 |
-
" <tr>\n",
|
502 |
-
" <th>4743896</th>\n",
|
503 |
-
" <td>0.417411</td>\n",
|
504 |
-
" <td>0.933333</td>\n",
|
505 |
-
" <td>0.000000</td>\n",
|
506 |
-
" <td>0.000000</td>\n",
|
507 |
-
" <td>0.0</td>\n",
|
508 |
-
" <td>0.04</td>\n",
|
509 |
-
" <td>1.0</td>\n",
|
510 |
-
" <td>0.082828</td>\n",
|
511 |
-
" <td>0.074922</td>\n",
|
512 |
-
" <td>0.493641</td>\n",
|
513 |
-
" <td>0.133456</td>\n",
|
514 |
-
" <td>0.005406</td>\n",
|
515 |
-
" <td>1</td>\n",
|
516 |
-
" </tr>\n",
|
517 |
-
" </tbody>\n",
|
518 |
-
"</table>\n",
|
519 |
-
"</div>"
|
520 |
-
],
|
521 |
-
"text/plain": [
|
522 |
-
" temperature_2m relative_humidity_2m precipitation rain \\\n",
|
523 |
-
"5905751 0.435268 0.986667 0.011364 0.011364 \n",
|
524 |
-
"19398 0.435268 0.986667 0.011364 0.011364 \n",
|
525 |
-
"5919627 0.417411 0.933333 0.000000 0.000000 \n",
|
526 |
-
"4719247 0.426339 0.933333 0.000000 0.000000 \n",
|
527 |
-
"4743896 0.417411 0.933333 0.000000 0.000000 \n",
|
528 |
-
"\n",
|
529 |
-
" snowfall weather_code cloud_cover wind_speed_10m wind_gusts_10m \\\n",
|
530 |
-
"5905751 0.0 0.68 1.0 0.315152 0.272633 \n",
|
531 |
-
"19398 0.0 0.68 1.0 0.315152 0.272633 \n",
|
532 |
-
"5919627 0.0 0.04 1.0 0.082828 0.074922 \n",
|
533 |
-
"4719247 0.0 0.04 1.0 0.195960 0.187305 \n",
|
534 |
-
"4743896 0.0 0.04 1.0 0.082828 0.074922 \n",
|
535 |
-
"\n",
|
536 |
-
" dk1_offshore_wind_forecastintraday_kwh \\\n",
|
537 |
-
"5905751 0.945277 \n",
|
538 |
-
"19398 0.934795 \n",
|
539 |
-
"5919627 0.773045 \n",
|
540 |
-
"4719247 0.913059 \n",
|
541 |
-
"4743896 0.493641 \n",
|
542 |
-
"\n",
|
543 |
-
" dk1_onshore_wind_forecastintraday_kwh \\\n",
|
544 |
-
"5905751 0.481878 \n",
|
545 |
-
"19398 0.446702 \n",
|
546 |
-
"5919627 0.264375 \n",
|
547 |
-
"4719247 0.358547 \n",
|
548 |
-
"4743896 0.133456 \n",
|
549 |
-
"\n",
|
550 |
-
" dk1_solar_forecastintraday_kwh type \n",
|
551 |
-
"5905751 0.000000 1 \n",
|
552 |
-
"19398 0.000008 1 \n",
|
553 |
-
"5919627 0.000018 1 \n",
|
554 |
-
"4719247 0.000012 1 \n",
|
555 |
-
"4743896 0.005406 1 "
|
556 |
-
]
|
557 |
-
},
|
558 |
-
"execution_count": 13,
|
559 |
-
"metadata": {},
|
560 |
-
"output_type": "execute_result"
|
561 |
-
}
|
562 |
-
],
|
563 |
-
"source": [
|
564 |
-
"# Displaying the first 5 rows of the train dataset (X_train)\n",
|
565 |
-
"X_train.head()"
|
566 |
-
]
|
567 |
-
},
|
568 |
-
{
|
569 |
-
"cell_type": "markdown",
|
570 |
-
"metadata": {},
|
571 |
-
"source": [
|
572 |
-
"## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
|
573 |
-
]
|
574 |
-
},
|
575 |
-
{
|
576 |
-
"cell_type": "code",
|
577 |
-
"execution_count": 21,
|
578 |
-
"metadata": {},
|
579 |
-
"outputs": [
|
580 |
-
{
|
581 |
-
"data": {
|
582 |
-
"text/plain": [
|
583 |
-
"(5012736, 13)"
|
584 |
-
]
|
585 |
-
},
|
586 |
-
"execution_count": 21,
|
587 |
-
"metadata": {},
|
588 |
-
"output_type": "execute_result"
|
589 |
-
}
|
590 |
-
],
|
591 |
-
"source": [
|
592 |
-
"X_train.shape"
|
593 |
-
]
|
594 |
-
},
|
595 |
-
{
|
596 |
-
"cell_type": "markdown",
|
597 |
-
"metadata": {},
|
598 |
-
"source": [
|
599 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling Testing</span>"
|
600 |
-
]
|
601 |
-
},
|
602 |
-
{
|
603 |
-
"cell_type": "code",
|
604 |
-
"execution_count": 25,
|
605 |
-
"metadata": {},
|
606 |
-
"outputs": [
|
607 |
-
{
|
608 |
-
"ename": "AttributeError",
|
609 |
-
"evalue": "'DataFrame' object has no attribute 'reshape'",
|
610 |
-
"output_type": "error",
|
611 |
-
"traceback": [
|
612 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
613 |
-
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
614 |
-
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_15496\\1411499862.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mX_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
615 |
-
"\u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\cudatest\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6200\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6201\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6202\u001b[0m ):\n\u001b[0;32m 6203\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6204\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
616 |
-
"\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'reshape'"
|
617 |
-
]
|
618 |
-
}
|
619 |
-
],
|
620 |
-
"source": [
|
621 |
-
"X_train = X_train.reshape((-1, 1, 13))\n"
|
622 |
-
]
|
623 |
-
},
|
624 |
-
{
|
625 |
-
"cell_type": "code",
|
626 |
-
"execution_count": 23,
|
627 |
-
"metadata": {},
|
628 |
-
"outputs": [
|
629 |
-
{
|
630 |
-
"ename": "AttributeError",
|
631 |
-
"evalue": "'DataFrame' object has no attribute 'reshape'",
|
632 |
-
"output_type": "error",
|
633 |
-
"traceback": [
|
634 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
635 |
-
"\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
|
636 |
-
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_15496\\1311144430.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mX_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mX_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0my_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
|
637 |
-
"\u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\cudatest\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6200\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6201\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6202\u001b[0m ):\n\u001b[0;32m 6203\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6204\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
|
638 |
-
"\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'reshape'"
|
639 |
-
]
|
640 |
-
}
|
641 |
-
],
|
642 |
-
"source": [
|
643 |
-
"X_train = X_train.reshape((-1, 13, 1))\n",
|
644 |
-
"X_test = X_test.reshape((-1, 13, 1))\n",
|
645 |
-
"\n",
|
646 |
-
"y_train = y_train.reshape((-1, 1))\n",
|
647 |
-
"y_test = y_test.reshape((-1, 1))\n",
|
648 |
-
"\n",
|
649 |
-
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
|
650 |
-
]
|
651 |
-
},
|
652 |
-
{
|
653 |
-
"cell_type": "code",
|
654 |
-
"execution_count": 17,
|
655 |
-
"metadata": {},
|
656 |
-
"outputs": [
|
657 |
-
{
|
658 |
-
"ename": "ValueError",
|
659 |
-
"evalue": "could not determine the shape of object type 'DataFrame'",
|
660 |
-
"output_type": "error",
|
661 |
-
"traceback": [
|
662 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
663 |
-
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
664 |
-
"Cell \u001b[1;32mIn[17], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m X_train \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mfloat\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfloat()\n\u001b[0;32m 2\u001b[0m y_train \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(y_train)\u001b[38;5;241m.\u001b[39mfloat()\n\u001b[0;32m 3\u001b[0m X_test \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(X_test\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m))\u001b[38;5;241m.\u001b[39mfloat()\n",
|
665 |
-
"\u001b[1;31mValueError\u001b[0m: could not determine the shape of object type 'DataFrame'"
|
666 |
-
]
|
667 |
-
}
|
668 |
-
],
|
669 |
-
"source": [
|
670 |
-
"X_train = torch.tensor(X_train.astype(float)).float()\n",
|
671 |
-
"y_train = torch.tensor(y_train).float()\n",
|
672 |
-
"X_test = torch.tensor(X_test.astype(float)).float()\n",
|
673 |
-
"y_test = torch.tensor(y_test).float()\n",
|
674 |
-
"\n",
|
675 |
-
"X_train.shape, X_test.shape, y_train.shape, y_test.shape"
|
676 |
-
]
|
677 |
-
},
|
678 |
-
{
|
679 |
-
"cell_type": "markdown",
|
680 |
-
"metadata": {},
|
681 |
-
"source": [
|
682 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
|
683 |
-
]
|
684 |
-
},
|
685 |
-
{
|
686 |
-
"cell_type": "code",
|
687 |
-
"execution_count": null,
|
688 |
-
"metadata": {},
|
689 |
-
"outputs": [],
|
690 |
-
"source": [
|
691 |
-
"# import pandas as pd\n",
|
692 |
-
"# import numpy as np\n",
|
693 |
-
"# import xgboost as xgb\n",
|
694 |
-
"# from sklearn.metrics import mean_squared_error\n",
|
695 |
-
"# import os"
|
696 |
-
]
|
697 |
-
},
|
698 |
-
{
|
699 |
-
"cell_type": "code",
|
700 |
-
"execution_count": null,
|
701 |
-
"metadata": {},
|
702 |
-
"outputs": [],
|
703 |
-
"source": [
|
704 |
-
"# # Initialize the XGBoost regressor\n",
|
705 |
-
"# model = xgb.XGBRegressor()\n",
|
706 |
-
"# model_val = xgb.XGBRegressor()"
|
707 |
-
]
|
708 |
-
},
|
709 |
-
{
|
710 |
-
"cell_type": "code",
|
711 |
-
"execution_count": null,
|
712 |
-
"metadata": {},
|
713 |
-
"outputs": [],
|
714 |
-
"source": [
|
715 |
-
"# # Train the model on the training data\n",
|
716 |
-
"# model.fit(X_train, y_train)"
|
717 |
-
]
|
718 |
-
},
|
719 |
-
{
|
720 |
-
"cell_type": "code",
|
721 |
-
"execution_count": null,
|
722 |
-
"metadata": {},
|
723 |
-
"outputs": [],
|
724 |
-
"source": [
|
725 |
-
"# # Make predictions on the validation set\n",
|
726 |
-
"# y_test_pred = model.predict(X_test)"
|
727 |
-
]
|
728 |
-
},
|
729 |
-
{
|
730 |
-
"cell_type": "code",
|
731 |
-
"execution_count": null,
|
732 |
-
"metadata": {},
|
733 |
-
"outputs": [],
|
734 |
-
"source": [
|
735 |
-
"# # Calculate RMSE on the validation set\n",
|
736 |
-
"# mse = mean_squared_error(y_test, y_test_pred, squared=False)\n",
|
737 |
-
"# print(f\"Mean Squared Error (MSE): {mse}\")"
|
738 |
-
]
|
739 |
-
},
|
740 |
-
{
|
741 |
-
"cell_type": "markdown",
|
742 |
-
"metadata": {},
|
743 |
-
"source": [
|
744 |
-
"## <span style='color:#2656a3'>🗄 Model Registry</span>"
|
745 |
-
]
|
746 |
-
},
|
747 |
-
{
|
748 |
-
"cell_type": "code",
|
749 |
-
"execution_count": 39,
|
750 |
-
"metadata": {},
|
751 |
-
"outputs": [
|
752 |
-
{
|
753 |
-
"name": "stdout",
|
754 |
-
"output_type": "stream",
|
755 |
-
"text": [
|
756 |
-
"Exporting trained model to: electricity_price_model\n",
|
757 |
-
"INFO:tensorflow:Assets written to: electricity_price_model\\assets\n"
|
758 |
-
]
|
759 |
-
}
|
760 |
-
],
|
761 |
-
"source": [
|
762 |
-
"# Exporting the trained model to a directory\n",
|
763 |
-
"model_dir = \"electricity_price_model\"\n",
|
764 |
-
"print('Exporting trained model to: {}'.format(model_dir))\n",
|
765 |
-
"\n",
|
766 |
-
"# Saving the model using TensorFlow's saved_model.save function\n",
|
767 |
-
"tf.saved_model.save(model, model_dir)"
|
768 |
-
]
|
769 |
-
},
|
770 |
-
{
|
771 |
-
"cell_type": "code",
|
772 |
-
"execution_count": 44,
|
773 |
-
"metadata": {},
|
774 |
-
"outputs": [
|
775 |
-
{
|
776 |
-
"name": "stdout",
|
777 |
-
"output_type": "stream",
|
778 |
-
"text": [
|
779 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
780 |
-
]
|
781 |
-
},
|
782 |
-
{
|
783 |
-
"name": "stderr",
|
784 |
-
"output_type": "stream",
|
785 |
-
"text": [
|
786 |
-
"Uploading: 100.000%|██████████| 59/59 elapsed<00:01 remaining<00:001<00:01, 3.38it/s]\n",
|
787 |
-
"Uploading: 100.000%|██████████| 397272/397272 elapsed<00:02 remaining<00:00 3.38it/s]\n",
|
788 |
-
"Uploading: 0.000%| | 0/112411 elapsed<00:01 remaining<?0:04<00:01, 3.38it/s]\n",
|
789 |
-
"Uploading model files (2 dirs, 2 files): 17%|█▋ | 1/6 [00:07<00:35, 7.08s/it]\n"
|
790 |
-
]
|
791 |
-
},
|
792 |
-
{
|
793 |
-
"ename": "RestAPIError",
|
794 |
-
"evalue": "Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''",
|
795 |
-
"output_type": "error",
|
796 |
-
"traceback": [
|
797 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
798 |
-
"\u001b[1;31mRestAPIError\u001b[0m Traceback (most recent call last)",
|
799 |
-
"Cell \u001b[1;32mIn[44], line 16\u001b[0m\n\u001b[0;32m 8\u001b[0m tf_model \u001b[38;5;241m=\u001b[39m mr\u001b[38;5;241m.\u001b[39mtensorflow\u001b[38;5;241m.\u001b[39mcreate_model(\n\u001b[0;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDK_electricity_price_prediction_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m metrics\u001b[38;5;241m=\u001b[39mmetrics,\n\u001b[0;32m 11\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHourly electricity price prediction model.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 12\u001b[0m input_example\u001b[38;5;241m=\u001b[39mn_step_window\u001b[38;5;241m.\u001b[39mexample[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mnumpy(),\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# Saving the model to the specified directory\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m \u001b[43mtf_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n",
|
800 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\model.py:101\u001b[0m, in \u001b[0;36mModel.save\u001b[1;34m(self, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, model_path, await_registration\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m480\u001b[39m, keep_original_files\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m 91\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Persist this model including model files and metadata to the model registry.\u001b[39;00m\n\u001b[0;32m 92\u001b[0m \n\u001b[0;32m 93\u001b[0m \u001b[38;5;124;03m # Arguments\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[38;5;124;03m `Model`: The model metadata object.\u001b[39;00m\n\u001b[0;32m 100\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 102\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 103\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 104\u001b[0m \u001b[43m \u001b[49m\u001b[43mawait_registration\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mawait_registration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 105\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
801 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:421\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m be:\n\u001b[0;32m 420\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mrm(model_instance\u001b[38;5;241m.\u001b[39mversion_path)\n\u001b[1;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m be\n\u001b[0;32m 423\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel created, explore it at \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m model_instance\u001b[38;5;241m.\u001b[39mget_url())\n\u001b[0;32m 425\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_instance\n",
|
802 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:385\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[0;32m 382\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\n\u001b[0;32m 383\u001b[0m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), model_path)\n\u001b[0;32m 384\u001b[0m ): \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[1;32m--> 385\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_model_from_local_or_hopsfs_mount\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetcwd\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 391\u001b[0m \u001b[38;5;66;03m# check project relative\u001b[39;00m\n\u001b[0;32m 392\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mpath_exists(\n\u001b[0;32m 393\u001b[0m model_path\n\u001b[0;32m 394\u001b[0m ): \u001b[38;5;66;03m# check hdfs relative and absolute\u001b[39;00m\n",
|
803 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:249\u001b[0m, in \u001b[0;36mModelEngine._save_model_from_local_or_hopsfs_mount\u001b[1;34m(self, model_instance, model_path, keep_original_files, update_upload_progress)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_or_move_hopsfs_model(\n\u001b[0;32m 241\u001b[0m from_hdfs_model_path\u001b[38;5;241m=\u001b[39mmodel_path\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[0;32m 242\u001b[0m constants\u001b[38;5;241m.\u001b[39mMODEL_REGISTRY\u001b[38;5;241m.\u001b[39mHOPSFS_MOUNT_PREFIX, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 246\u001b[0m update_upload_progress\u001b[38;5;241m=\u001b[39mupdate_upload_progress,\n\u001b[0;32m 247\u001b[0m )\n\u001b[0;32m 248\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 249\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_local_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_local_model_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_model_version_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
804 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:225\u001b[0m, in \u001b[0;36mModelEngine._upload_local_model\u001b[1;34m(self, from_local_model_path, to_model_version_path, update_upload_progress)\u001b[0m\n\u001b[0;32m 223\u001b[0m update_upload_progress(n_dirs, n_files)\n\u001b[0;32m 224\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m f_name \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[1;32m--> 225\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mf_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_base_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 226\u001b[0m n_files \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 227\u001b[0m update_upload_progress(n_dirs, n_files)\n",
|
805 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\local_engine.py:38\u001b[0m, in \u001b[0;36mLocalEngine.upload\u001b[1;34m(self, local_path, remote_path)\u001b[0m\n\u001b[0;32m 36\u001b[0m local_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_abs_path(local_path)\n\u001b[0;32m 37\u001b[0m remote_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepend_project_path(remote_path)\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_path\u001b[49m\u001b[43m)\u001b[49m\n",
|
806 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:152\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 151\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m--> 152\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[0;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 155\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n",
|
807 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
808 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m [\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m futures]\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
809 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
|
810 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
811 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n",
|
812 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:189\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 185\u001b[0m re\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m DatasetApi\u001b[38;5;241m.\u001b[39mFLOW_PERMANENT_ERRORS\n\u001b[0;32m 186\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mretries \u001b[38;5;241m>\u001b[39m max_chunk_retries\n\u001b[0;32m 187\u001b[0m ):\n\u001b[0;32m 188\u001b[0m chunk\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfailed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 189\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m re\n\u001b[0;32m 190\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(chunk_retry_interval)\n\u001b[0;32m 191\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
|
813 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:178\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m\n\u001b[0;32m 180\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RestAPIError \u001b[38;5;28;01mas\u001b[39;00m re:\n",
|
814 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:214\u001b[0m, in \u001b[0;36mDatasetApi._upload_request\u001b[1;34m(self, params, path, file_name, chunk)\u001b[0m\n\u001b[0;32m 211\u001b[0m path_params \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproject\u001b[39m\u001b[38;5;124m\"\u001b[39m, _client\u001b[38;5;241m.\u001b[39m_project_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupload\u001b[39m\u001b[38;5;124m\"\u001b[39m, path]\n\u001b[0;32m 213\u001b[0m \u001b[38;5;66;03m# Flow configuration params are sent as form data\u001b[39;00m\n\u001b[1;32m--> 214\u001b[0m \u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
815 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\decorators.py:35\u001b[0m, in \u001b[0;36mconnected.<locals>.if_connected\u001b[1;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m inst\u001b[38;5;241m.\u001b[39m_connected:\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NoHopsworksConnectionError\n\u001b[1;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
816 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\client\\base.py:108\u001b[0m, in \u001b[0;36mClient._send_request\u001b[1;34m(self, method, path_params, query_params, headers, data, stream, files)\u001b[0m\n\u001b[0;32m 105\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39msend(prepped, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verify, stream\u001b[38;5;241m=\u001b[39mstream)\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mRestAPIError(url, response)\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[0;32m 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
|
817 |
-
"\u001b[1;31mRestAPIError\u001b[0m: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''"
|
818 |
-
]
|
819 |
-
}
|
820 |
-
],
|
821 |
-
"source": [
|
822 |
-
"# Retrieving the Model Registry\n",
|
823 |
-
"mr = project.get_model_registry()\n",
|
824 |
-
"\n",
|
825 |
-
"# Extracting loss value from the training history\n",
|
826 |
-
"metrics = {'loss': history_dict['val_loss'][0]} \n",
|
827 |
-
"\n",
|
828 |
-
"# Creating a TensorFlow model in the Model Registry\n",
|
829 |
-
"tf_model = mr.tensorflow.create_model(\n",
|
830 |
-
" name=\"DK_electricity_price_prediction_model\",\n",
|
831 |
-
" metrics=metrics,\n",
|
832 |
-
" description=\"Hourly electricity price prediction model.\",\n",
|
833 |
-
" input_example=n_step_window.example[0].numpy(),\n",
|
834 |
-
")\n",
|
835 |
-
"\n",
|
836 |
-
"# Saving the model to the specified directory\n",
|
837 |
-
"tf_model.save(model_dir)"
|
838 |
-
]
|
839 |
-
},
|
840 |
-
{
|
841 |
-
"cell_type": "markdown",
|
842 |
-
"metadata": {},
|
843 |
-
"source": [
|
844 |
-
"---\n",
|
845 |
-
"\n",
|
846 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
|
847 |
-
"\n",
|
848 |
-
"In the next notebook you will use your registered model to predict batch data."
|
849 |
-
]
|
850 |
-
}
|
851 |
-
],
|
852 |
-
"metadata": {
|
853 |
-
"kernelspec": {
|
854 |
-
"display_name": "bds-mlops",
|
855 |
-
"language": "python",
|
856 |
-
"name": "python3"
|
857 |
-
},
|
858 |
-
"language_info": {
|
859 |
-
"codemirror_mode": {
|
860 |
-
"name": "ipython",
|
861 |
-
"version": 3
|
862 |
-
},
|
863 |
-
"file_extension": ".py",
|
864 |
-
"mimetype": "text/x-python",
|
865 |
-
"name": "python",
|
866 |
-
"nbconvert_exporter": "python",
|
867 |
-
"pygments_lexer": "ipython3",
|
868 |
-
"version": "3.7.16"
|
869 |
-
},
|
870 |
-
"orig_nbformat": 4
|
871 |
-
},
|
872 |
-
"nbformat": 4,
|
873 |
-
"nbformat_minor": 2
|
874 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/notebooks_dev/3_training_pipeline_dev_tensorflow.ipynb
DELETED
@@ -1,818 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
|
8 |
-
]
|
9 |
-
},
|
10 |
-
{
|
11 |
-
"cell_type": "markdown",
|
12 |
-
"metadata": {},
|
13 |
-
"source": [
|
14 |
-
"## 🗒️ This notebook is divided into the following sections:\n",
|
15 |
-
"1. Feature selection.\n",
|
16 |
-
"2. Feature transformations.\n",
|
17 |
-
"3. Training datasets creation.\n",
|
18 |
-
"4. Loading the training data.\n",
|
19 |
-
"5. Train the model.\n",
|
20 |
-
"6. Register model to Hopsworks model registry."
|
21 |
-
]
|
22 |
-
},
|
23 |
-
{
|
24 |
-
"cell_type": "markdown",
|
25 |
-
"metadata": {},
|
26 |
-
"source": [
|
27 |
-
"## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
|
28 |
-
]
|
29 |
-
},
|
30 |
-
{
|
31 |
-
"cell_type": "code",
|
32 |
-
"execution_count": 1,
|
33 |
-
"metadata": {},
|
34 |
-
"outputs": [],
|
35 |
-
"source": [
|
36 |
-
"!pip install tensorflow --quiet"
|
37 |
-
]
|
38 |
-
},
|
39 |
-
{
|
40 |
-
"cell_type": "code",
|
41 |
-
"execution_count": 11,
|
42 |
-
"metadata": {},
|
43 |
-
"outputs": [],
|
44 |
-
"source": [
|
45 |
-
"# Importing the packages for the needed libraries for the Jupyter notebook\n",
|
46 |
-
"import inspect \n",
|
47 |
-
"import datetime\n",
|
48 |
-
"\n",
|
49 |
-
"import pandas as pd\n",
|
50 |
-
"import numpy as np\n",
|
51 |
-
"import matplotlib.pyplot as plt\n",
|
52 |
-
"import tensorflow as tf\n",
|
53 |
-
"\n",
|
54 |
-
"#ignore warnings\n",
|
55 |
-
"import warnings\n",
|
56 |
-
"warnings.filterwarnings('ignore')"
|
57 |
-
]
|
58 |
-
},
|
59 |
-
{
|
60 |
-
"cell_type": "markdown",
|
61 |
-
"metadata": {},
|
62 |
-
"source": [
|
63 |
-
"## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
|
64 |
-
]
|
65 |
-
},
|
66 |
-
{
|
67 |
-
"cell_type": "code",
|
68 |
-
"execution_count": 1,
|
69 |
-
"metadata": {},
|
70 |
-
"outputs": [
|
71 |
-
{
|
72 |
-
"name": "stderr",
|
73 |
-
"output_type": "stream",
|
74 |
-
"text": [
|
75 |
-
"c:\\Users\\Benj3\\anaconda3\\envs\\tensor\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
76 |
-
" from .autonotebook import tqdm as notebook_tqdm\n"
|
77 |
-
]
|
78 |
-
},
|
79 |
-
{
|
80 |
-
"name": "stdout",
|
81 |
-
"output_type": "stream",
|
82 |
-
"text": [
|
83 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
84 |
-
"\n",
|
85 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
|
86 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
87 |
-
]
|
88 |
-
}
|
89 |
-
],
|
90 |
-
"source": [
|
91 |
-
"# Importing the hopsworks module\n",
|
92 |
-
"import hopsworks\n",
|
93 |
-
"\n",
|
94 |
-
"# Logging in to the Hopsworks project\n",
|
95 |
-
"project = hopsworks.login()\n",
|
96 |
-
"\n",
|
97 |
-
"# Getting the feature store from the project\n",
|
98 |
-
"fs = project.get_feature_store() "
|
99 |
-
]
|
100 |
-
},
|
101 |
-
{
|
102 |
-
"cell_type": "code",
|
103 |
-
"execution_count": 3,
|
104 |
-
"metadata": {},
|
105 |
-
"outputs": [],
|
106 |
-
"source": [
|
107 |
-
"# Retrieve the feature groups\n",
|
108 |
-
"electricity_fg = fs.get_feature_group(\n",
|
109 |
-
" name='electricity_prices',\n",
|
110 |
-
" version=1,\n",
|
111 |
-
")\n",
|
112 |
-
"\n",
|
113 |
-
"weather_fg = fs.get_feature_group(\n",
|
114 |
-
" name='weather_measurements',\n",
|
115 |
-
" version=1,\n",
|
116 |
-
")\n",
|
117 |
-
"\n",
|
118 |
-
"danish_holidays_fg = fs.get_feature_group(\n",
|
119 |
-
" name='danish_holidayss',\n",
|
120 |
-
" version=1,\n",
|
121 |
-
")\n",
|
122 |
-
"forecast_renewable_energy_fg = fs.get_feature_group(\n",
|
123 |
-
" name='forecast_renewable_energy',\n",
|
124 |
-
" version=1\n",
|
125 |
-
")"
|
126 |
-
]
|
127 |
-
},
|
128 |
-
{
|
129 |
-
"cell_type": "markdown",
|
130 |
-
"metadata": {},
|
131 |
-
"source": [
|
132 |
-
"## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
|
133 |
-
"\n",
|
134 |
-
"We first select the features that we want to include for model training.\n",
|
135 |
-
"\n",
|
136 |
-
"Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
|
137 |
-
]
|
138 |
-
},
|
139 |
-
{
|
140 |
-
"cell_type": "markdown",
|
141 |
-
"metadata": {},
|
142 |
-
"source": [
|
143 |
-
"hmmm skal 'time' egentlig være 'date'???"
|
144 |
-
]
|
145 |
-
},
|
146 |
-
{
|
147 |
-
"cell_type": "code",
|
148 |
-
"execution_count": 4,
|
149 |
-
"metadata": {},
|
150 |
-
"outputs": [],
|
151 |
-
"source": [
|
152 |
-
"# Select features for training data\n",
|
153 |
-
"selected_features = electricity_fg.select_all()\\\n",
|
154 |
-
" .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
155 |
-
" .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
|
156 |
-
" .join(danish_holidays_fg.select_all())"
|
157 |
-
]
|
158 |
-
},
|
159 |
-
{
|
160 |
-
"cell_type": "code",
|
161 |
-
"execution_count": 7,
|
162 |
-
"metadata": {},
|
163 |
-
"outputs": [],
|
164 |
-
"source": [
|
165 |
-
"# Uncomment this if you would like to view your selected features\n",
|
166 |
-
"# selected_features.show(5)"
|
167 |
-
]
|
168 |
-
},
|
169 |
-
{
|
170 |
-
"cell_type": "markdown",
|
171 |
-
"metadata": {},
|
172 |
-
"source": [
|
173 |
-
"### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
|
174 |
-
"\n",
|
175 |
-
"We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
|
176 |
-
"To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
|
177 |
-
"\n",
|
178 |
-
"To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
|
179 |
-
]
|
180 |
-
},
|
181 |
-
{
|
182 |
-
"cell_type": "code",
|
183 |
-
"execution_count": 5,
|
184 |
-
"metadata": {},
|
185 |
-
"outputs": [],
|
186 |
-
"source": [
|
187 |
-
"# Defining transformation functions for feature scaling and encoding\n",
|
188 |
-
"transformation_functions = {\n",
|
189 |
-
" \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
190 |
-
" \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
191 |
-
" \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
192 |
-
" \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
193 |
-
" \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
194 |
-
" \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
195 |
-
" \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
196 |
-
" \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
197 |
-
" \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
198 |
-
" \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
199 |
-
" \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
|
200 |
-
" \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
201 |
-
" \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
|
202 |
-
" \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
|
203 |
-
" }"
|
204 |
-
]
|
205 |
-
},
|
206 |
-
{
|
207 |
-
"cell_type": "markdown",
|
208 |
-
"metadata": {},
|
209 |
-
"source": [
|
210 |
-
"`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
|
211 |
-
"\n",
|
212 |
-
"The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
|
213 |
-
"\n",
|
214 |
-
"In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
|
215 |
-
"\n",
|
216 |
-
"We can specify next parameters:\n",
|
217 |
-
"\n",
|
218 |
-
"- `name` - name of a feature group.\n",
|
219 |
-
"\n",
|
220 |
-
"- `version` - version of a feature group.\n",
|
221 |
-
"\n",
|
222 |
-
"- `labels`- our target variable.\n",
|
223 |
-
"\n",
|
224 |
-
"- `transformation_functions` - functions to transform our features.\n",
|
225 |
-
"\n",
|
226 |
-
"- `query` - query object with data."
|
227 |
-
]
|
228 |
-
},
|
229 |
-
{
|
230 |
-
"cell_type": "markdown",
|
231 |
-
"metadata": {},
|
232 |
-
"source": [
|
233 |
-
"ved ikke om den her omformulering af botten går an?"
|
234 |
-
]
|
235 |
-
},
|
236 |
-
{
|
237 |
-
"cell_type": "markdown",
|
238 |
-
"metadata": {},
|
239 |
-
"source": [
|
240 |
-
"`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
|
241 |
-
"\n",
|
242 |
-
"Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
|
243 |
-
"\n",
|
244 |
-
"To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
|
245 |
-
"\n",
|
246 |
-
"- `name`: The name of the feature group.\n",
|
247 |
-
"\n",
|
248 |
-
"- `version`: The version of the feature group.\n",
|
249 |
-
"\n",
|
250 |
-
"- `labels`: Our target variable.\n",
|
251 |
-
"\n",
|
252 |
-
"- `transformation_functions`: Functions to transform our features.\n",
|
253 |
-
"\n",
|
254 |
-
"- `query`: A query object containing the relevant data."
|
255 |
-
]
|
256 |
-
},
|
257 |
-
{
|
258 |
-
"cell_type": "code",
|
259 |
-
"execution_count": 6,
|
260 |
-
"metadata": {},
|
261 |
-
"outputs": [],
|
262 |
-
"source": [
|
263 |
-
"# Getting or creating a feature view named 'electricity_feature_view'\n",
|
264 |
-
"version = 1 # Defining the version for the feature view\n",
|
265 |
-
"feature_view = fs.get_or_create_feature_view(\n",
|
266 |
-
" name='electricity_feature_view',\n",
|
267 |
-
" version=version,\n",
|
268 |
-
" labels=[], # Labels will be defined manually later for our 'y'\n",
|
269 |
-
" transformation_functions=transformation_functions,\n",
|
270 |
-
" query=selected_features,\n",
|
271 |
-
")"
|
272 |
-
]
|
273 |
-
},
|
274 |
-
{
|
275 |
-
"cell_type": "markdown",
|
276 |
-
"metadata": {},
|
277 |
-
"source": [
|
278 |
-
"## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
|
279 |
-
"\n",
|
280 |
-
"In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
|
281 |
-
"\n",
|
282 |
-
"**Training Dataset may contain splits such as:** \n",
|
283 |
-
"* Training set - the subset of training data used to train a model.\n",
|
284 |
-
"* Validation set - the subset of training data used to evaluate hparams when training a model\n",
|
285 |
-
"* Test set - the holdout subset of training data used to evaluate a mode\n",
|
286 |
-
"\n",
|
287 |
-
"Training dataset is created using `fs.create_training_dataset()` method.\n",
|
288 |
-
"\n",
|
289 |
-
"**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
|
290 |
-
]
|
291 |
-
},
|
292 |
-
{
|
293 |
-
"cell_type": "markdown",
|
294 |
-
"metadata": {},
|
295 |
-
"source": [
|
296 |
-
"### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
|
297 |
-
]
|
298 |
-
},
|
299 |
-
{
|
300 |
-
"cell_type": "code",
|
301 |
-
"execution_count": 7,
|
302 |
-
"metadata": {},
|
303 |
-
"outputs": [
|
304 |
-
{
|
305 |
-
"name": "stdout",
|
306 |
-
"output_type": "stream",
|
307 |
-
"text": [
|
308 |
-
"Finished: Reading data from Hopsworks, using ArrowFlight (198.29s) \n"
|
309 |
-
]
|
310 |
-
},
|
311 |
-
{
|
312 |
-
"name": "stderr",
|
313 |
-
"output_type": "stream",
|
314 |
-
"text": [
|
315 |
-
"VersionWarning: Incremented version to `15`.\n"
|
316 |
-
]
|
317 |
-
}
|
318 |
-
],
|
319 |
-
"source": [
|
320 |
-
"# Splitting the feature view data into train, validation, and test sets\n",
|
321 |
-
"# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
|
322 |
-
"X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
|
323 |
-
" train_start=\"2022-01-01\",\n",
|
324 |
-
" train_end=\"2023-06-30\",\n",
|
325 |
-
" validation_start=\"2023-07-01\",\n",
|
326 |
-
" validation_end=\"2023-09-30\",\n",
|
327 |
-
" test_start=\"2023-10-01\",\n",
|
328 |
-
" test_end=\"2023-12-31\",\n",
|
329 |
-
" description='Electricity price prediction dataset',\n",
|
330 |
-
")"
|
331 |
-
]
|
332 |
-
},
|
333 |
-
{
|
334 |
-
"cell_type": "code",
|
335 |
-
"execution_count": 13,
|
336 |
-
"metadata": {},
|
337 |
-
"outputs": [],
|
338 |
-
"source": [
|
339 |
-
"# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
|
340 |
-
"X_train.sort_values([\"timestamp\"], inplace=True)\n",
|
341 |
-
"X_val.sort_values([\"timestamp\"], inplace=True)\n",
|
342 |
-
"X_test.sort_values([\"timestamp\"], inplace=True)"
|
343 |
-
]
|
344 |
-
},
|
345 |
-
{
|
346 |
-
"cell_type": "code",
|
347 |
-
"execution_count": 14,
|
348 |
-
"metadata": {},
|
349 |
-
"outputs": [],
|
350 |
-
"source": [
|
351 |
-
"# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
|
352 |
-
"y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
|
353 |
-
"y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
|
354 |
-
"y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
|
355 |
-
]
|
356 |
-
},
|
357 |
-
{
|
358 |
-
"cell_type": "code",
|
359 |
-
"execution_count": 15,
|
360 |
-
"metadata": {},
|
361 |
-
"outputs": [],
|
362 |
-
"source": [
|
363 |
-
"# Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
|
364 |
-
"X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
365 |
-
"X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
|
366 |
-
"X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
|
367 |
-
]
|
368 |
-
},
|
369 |
-
{
|
370 |
-
"cell_type": "code",
|
371 |
-
"execution_count": 16,
|
372 |
-
"metadata": {},
|
373 |
-
"outputs": [],
|
374 |
-
"source": [
|
375 |
-
"# Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
|
376 |
-
"X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
377 |
-
"X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
|
378 |
-
"X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
|
379 |
-
]
|
380 |
-
},
|
381 |
-
{
|
382 |
-
"cell_type": "code",
|
383 |
-
"execution_count": 17,
|
384 |
-
"metadata": {},
|
385 |
-
"outputs": [
|
386 |
-
{
|
387 |
-
"data": {
|
388 |
-
"text/html": [
|
389 |
-
"<div>\n",
|
390 |
-
"<style scoped>\n",
|
391 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
392 |
-
" vertical-align: middle;\n",
|
393 |
-
" }\n",
|
394 |
-
"\n",
|
395 |
-
" .dataframe tbody tr th {\n",
|
396 |
-
" vertical-align: top;\n",
|
397 |
-
" }\n",
|
398 |
-
"\n",
|
399 |
-
" .dataframe thead th {\n",
|
400 |
-
" text-align: right;\n",
|
401 |
-
" }\n",
|
402 |
-
"</style>\n",
|
403 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
404 |
-
" <thead>\n",
|
405 |
-
" <tr style=\"text-align: right;\">\n",
|
406 |
-
" <th></th>\n",
|
407 |
-
" <th>temperature_2m</th>\n",
|
408 |
-
" <th>relative_humidity_2m</th>\n",
|
409 |
-
" <th>precipitation</th>\n",
|
410 |
-
" <th>rain</th>\n",
|
411 |
-
" <th>snowfall</th>\n",
|
412 |
-
" <th>weather_code</th>\n",
|
413 |
-
" <th>cloud_cover</th>\n",
|
414 |
-
" <th>wind_speed_10m</th>\n",
|
415 |
-
" <th>wind_gusts_10m</th>\n",
|
416 |
-
" <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
|
417 |
-
" <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
|
418 |
-
" <th>dk1_solar_forecastintraday_kwh</th>\n",
|
419 |
-
" <th>type</th>\n",
|
420 |
-
" </tr>\n",
|
421 |
-
" </thead>\n",
|
422 |
-
" <tbody>\n",
|
423 |
-
" <tr>\n",
|
424 |
-
" <th>5905751</th>\n",
|
425 |
-
" <td>0.435268</td>\n",
|
426 |
-
" <td>0.986667</td>\n",
|
427 |
-
" <td>0.011364</td>\n",
|
428 |
-
" <td>0.011364</td>\n",
|
429 |
-
" <td>0.0</td>\n",
|
430 |
-
" <td>0.68</td>\n",
|
431 |
-
" <td>1.0</td>\n",
|
432 |
-
" <td>0.315152</td>\n",
|
433 |
-
" <td>0.272633</td>\n",
|
434 |
-
" <td>0.945277</td>\n",
|
435 |
-
" <td>0.481878</td>\n",
|
436 |
-
" <td>0.000000</td>\n",
|
437 |
-
" <td>1</td>\n",
|
438 |
-
" </tr>\n",
|
439 |
-
" <tr>\n",
|
440 |
-
" <th>19398</th>\n",
|
441 |
-
" <td>0.435268</td>\n",
|
442 |
-
" <td>0.986667</td>\n",
|
443 |
-
" <td>0.011364</td>\n",
|
444 |
-
" <td>0.011364</td>\n",
|
445 |
-
" <td>0.0</td>\n",
|
446 |
-
" <td>0.68</td>\n",
|
447 |
-
" <td>1.0</td>\n",
|
448 |
-
" <td>0.315152</td>\n",
|
449 |
-
" <td>0.272633</td>\n",
|
450 |
-
" <td>0.934795</td>\n",
|
451 |
-
" <td>0.446702</td>\n",
|
452 |
-
" <td>0.000008</td>\n",
|
453 |
-
" <td>1</td>\n",
|
454 |
-
" </tr>\n",
|
455 |
-
" <tr>\n",
|
456 |
-
" <th>5919627</th>\n",
|
457 |
-
" <td>0.417411</td>\n",
|
458 |
-
" <td>0.933333</td>\n",
|
459 |
-
" <td>0.000000</td>\n",
|
460 |
-
" <td>0.000000</td>\n",
|
461 |
-
" <td>0.0</td>\n",
|
462 |
-
" <td>0.04</td>\n",
|
463 |
-
" <td>1.0</td>\n",
|
464 |
-
" <td>0.082828</td>\n",
|
465 |
-
" <td>0.074922</td>\n",
|
466 |
-
" <td>0.773045</td>\n",
|
467 |
-
" <td>0.264375</td>\n",
|
468 |
-
" <td>0.000018</td>\n",
|
469 |
-
" <td>1</td>\n",
|
470 |
-
" </tr>\n",
|
471 |
-
" <tr>\n",
|
472 |
-
" <th>4719247</th>\n",
|
473 |
-
" <td>0.426339</td>\n",
|
474 |
-
" <td>0.933333</td>\n",
|
475 |
-
" <td>0.000000</td>\n",
|
476 |
-
" <td>0.000000</td>\n",
|
477 |
-
" <td>0.0</td>\n",
|
478 |
-
" <td>0.04</td>\n",
|
479 |
-
" <td>1.0</td>\n",
|
480 |
-
" <td>0.195960</td>\n",
|
481 |
-
" <td>0.187305</td>\n",
|
482 |
-
" <td>0.913059</td>\n",
|
483 |
-
" <td>0.358547</td>\n",
|
484 |
-
" <td>0.000012</td>\n",
|
485 |
-
" <td>1</td>\n",
|
486 |
-
" </tr>\n",
|
487 |
-
" <tr>\n",
|
488 |
-
" <th>4743896</th>\n",
|
489 |
-
" <td>0.417411</td>\n",
|
490 |
-
" <td>0.933333</td>\n",
|
491 |
-
" <td>0.000000</td>\n",
|
492 |
-
" <td>0.000000</td>\n",
|
493 |
-
" <td>0.0</td>\n",
|
494 |
-
" <td>0.04</td>\n",
|
495 |
-
" <td>1.0</td>\n",
|
496 |
-
" <td>0.082828</td>\n",
|
497 |
-
" <td>0.074922</td>\n",
|
498 |
-
" <td>0.493641</td>\n",
|
499 |
-
" <td>0.133456</td>\n",
|
500 |
-
" <td>0.005406</td>\n",
|
501 |
-
" <td>1</td>\n",
|
502 |
-
" </tr>\n",
|
503 |
-
" </tbody>\n",
|
504 |
-
"</table>\n",
|
505 |
-
"</div>"
|
506 |
-
],
|
507 |
-
"text/plain": [
|
508 |
-
" temperature_2m relative_humidity_2m precipitation rain \\\n",
|
509 |
-
"5905751 0.435268 0.986667 0.011364 0.011364 \n",
|
510 |
-
"19398 0.435268 0.986667 0.011364 0.011364 \n",
|
511 |
-
"5919627 0.417411 0.933333 0.000000 0.000000 \n",
|
512 |
-
"4719247 0.426339 0.933333 0.000000 0.000000 \n",
|
513 |
-
"4743896 0.417411 0.933333 0.000000 0.000000 \n",
|
514 |
-
"\n",
|
515 |
-
" snowfall weather_code cloud_cover wind_speed_10m wind_gusts_10m \\\n",
|
516 |
-
"5905751 0.0 0.68 1.0 0.315152 0.272633 \n",
|
517 |
-
"19398 0.0 0.68 1.0 0.315152 0.272633 \n",
|
518 |
-
"5919627 0.0 0.04 1.0 0.082828 0.074922 \n",
|
519 |
-
"4719247 0.0 0.04 1.0 0.195960 0.187305 \n",
|
520 |
-
"4743896 0.0 0.04 1.0 0.082828 0.074922 \n",
|
521 |
-
"\n",
|
522 |
-
" dk1_offshore_wind_forecastintraday_kwh \\\n",
|
523 |
-
"5905751 0.945277 \n",
|
524 |
-
"19398 0.934795 \n",
|
525 |
-
"5919627 0.773045 \n",
|
526 |
-
"4719247 0.913059 \n",
|
527 |
-
"4743896 0.493641 \n",
|
528 |
-
"\n",
|
529 |
-
" dk1_onshore_wind_forecastintraday_kwh \\\n",
|
530 |
-
"5905751 0.481878 \n",
|
531 |
-
"19398 0.446702 \n",
|
532 |
-
"5919627 0.264375 \n",
|
533 |
-
"4719247 0.358547 \n",
|
534 |
-
"4743896 0.133456 \n",
|
535 |
-
"\n",
|
536 |
-
" dk1_solar_forecastintraday_kwh type \n",
|
537 |
-
"5905751 0.000000 1 \n",
|
538 |
-
"19398 0.000008 1 \n",
|
539 |
-
"5919627 0.000018 1 \n",
|
540 |
-
"4719247 0.000012 1 \n",
|
541 |
-
"4743896 0.005406 1 "
|
542 |
-
]
|
543 |
-
},
|
544 |
-
"execution_count": 17,
|
545 |
-
"metadata": {},
|
546 |
-
"output_type": "execute_result"
|
547 |
-
}
|
548 |
-
],
|
549 |
-
"source": [
|
550 |
-
"# Displaying the first 5 rows of the train dataset (X_train)\n",
|
551 |
-
"X_train.head()"
|
552 |
-
]
|
553 |
-
},
|
554 |
-
{
|
555 |
-
"cell_type": "code",
|
556 |
-
"execution_count": 18,
|
557 |
-
"metadata": {},
|
558 |
-
"outputs": [
|
559 |
-
{
|
560 |
-
"data": {
|
561 |
-
"text/html": [
|
562 |
-
"<div>\n",
|
563 |
-
"<style scoped>\n",
|
564 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
565 |
-
" vertical-align: middle;\n",
|
566 |
-
" }\n",
|
567 |
-
"\n",
|
568 |
-
" .dataframe tbody tr th {\n",
|
569 |
-
" vertical-align: top;\n",
|
570 |
-
" }\n",
|
571 |
-
"\n",
|
572 |
-
" .dataframe thead th {\n",
|
573 |
-
" text-align: right;\n",
|
574 |
-
" }\n",
|
575 |
-
"</style>\n",
|
576 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
577 |
-
" <thead>\n",
|
578 |
-
" <tr style=\"text-align: right;\">\n",
|
579 |
-
" <th></th>\n",
|
580 |
-
" <th>dk1_spotpricedkk_kwh</th>\n",
|
581 |
-
" </tr>\n",
|
582 |
-
" </thead>\n",
|
583 |
-
" <tbody>\n",
|
584 |
-
" <tr>\n",
|
585 |
-
" <th>5905751</th>\n",
|
586 |
-
" <td>0.179988</td>\n",
|
587 |
-
" </tr>\n",
|
588 |
-
" <tr>\n",
|
589 |
-
" <th>19398</th>\n",
|
590 |
-
" <td>0.179988</td>\n",
|
591 |
-
" </tr>\n",
|
592 |
-
" <tr>\n",
|
593 |
-
" <th>5919627</th>\n",
|
594 |
-
" <td>0.179988</td>\n",
|
595 |
-
" </tr>\n",
|
596 |
-
" <tr>\n",
|
597 |
-
" <th>4719247</th>\n",
|
598 |
-
" <td>0.179988</td>\n",
|
599 |
-
" </tr>\n",
|
600 |
-
" <tr>\n",
|
601 |
-
" <th>4743896</th>\n",
|
602 |
-
" <td>0.179988</td>\n",
|
603 |
-
" </tr>\n",
|
604 |
-
" </tbody>\n",
|
605 |
-
"</table>\n",
|
606 |
-
"</div>"
|
607 |
-
],
|
608 |
-
"text/plain": [
|
609 |
-
" dk1_spotpricedkk_kwh\n",
|
610 |
-
"5905751 0.179988\n",
|
611 |
-
"19398 0.179988\n",
|
612 |
-
"5919627 0.179988\n",
|
613 |
-
"4719247 0.179988\n",
|
614 |
-
"4743896 0.179988"
|
615 |
-
]
|
616 |
-
},
|
617 |
-
"execution_count": 18,
|
618 |
-
"metadata": {},
|
619 |
-
"output_type": "execute_result"
|
620 |
-
}
|
621 |
-
],
|
622 |
-
"source": [
|
623 |
-
"\n",
|
624 |
-
"# Displaying the first 5 rows of the train dataset (y_train)\n",
|
625 |
-
"y_train.head()"
|
626 |
-
]
|
627 |
-
},
|
628 |
-
{
|
629 |
-
"cell_type": "markdown",
|
630 |
-
"metadata": {},
|
631 |
-
"source": [
|
632 |
-
"## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
|
633 |
-
]
|
634 |
-
},
|
635 |
-
{
|
636 |
-
"cell_type": "markdown",
|
637 |
-
"metadata": {},
|
638 |
-
"source": [
|
639 |
-
"## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
|
640 |
-
]
|
641 |
-
},
|
642 |
-
{
|
643 |
-
"cell_type": "code",
|
644 |
-
"execution_count": 43,
|
645 |
-
"metadata": {},
|
646 |
-
"outputs": [],
|
647 |
-
"source": [
|
648 |
-
"from keras.preprocessing.sequence import TimeseriesGenerator\n"
|
649 |
-
]
|
650 |
-
},
|
651 |
-
{
|
652 |
-
"cell_type": "code",
|
653 |
-
"execution_count": 55,
|
654 |
-
"metadata": {},
|
655 |
-
"outputs": [],
|
656 |
-
"source": [
|
657 |
-
"# define generator\n",
|
658 |
-
"n_input = 12\n",
|
659 |
-
"n_features = 13\n",
|
660 |
-
"generator = TimeseriesGenerator(X_train, X_train, length=n_input, batch_size=32)"
|
661 |
-
]
|
662 |
-
},
|
663 |
-
{
|
664 |
-
"cell_type": "code",
|
665 |
-
"execution_count": null,
|
666 |
-
"metadata": {},
|
667 |
-
"outputs": [
|
668 |
-
{
|
669 |
-
"ename": "",
|
670 |
-
"evalue": "",
|
671 |
-
"output_type": "error",
|
672 |
-
"traceback": [
|
673 |
-
"\u001b[1;31mThe kernel failed to start as 'TypeAliasType' could not be imported from 'c:\\Users\\Benj3\\anaconda3\\envs\\tensor\\Lib\\site-packages\\typing_extensions.py'.\n",
|
674 |
-
"\u001b[1;31mClick <a href='https://aka.ms/kernelFailuresModuleImportErrFromFile'>here</a> for more info."
|
675 |
-
]
|
676 |
-
}
|
677 |
-
],
|
678 |
-
"source": [
|
679 |
-
"X, y = generator[1]\n",
|
680 |
-
"print(f'Given the Array: \\n{X.flatten()}')\n",
|
681 |
-
"print(f'Predict this y: \\n {y}')"
|
682 |
-
]
|
683 |
-
},
|
684 |
-
{
|
685 |
-
"cell_type": "markdown",
|
686 |
-
"metadata": {},
|
687 |
-
"source": [
|
688 |
-
"## <span style='color:#2656a3'>🗄 Model Registry</span>"
|
689 |
-
]
|
690 |
-
},
|
691 |
-
{
|
692 |
-
"cell_type": "code",
|
693 |
-
"execution_count": 39,
|
694 |
-
"metadata": {},
|
695 |
-
"outputs": [
|
696 |
-
{
|
697 |
-
"name": "stdout",
|
698 |
-
"output_type": "stream",
|
699 |
-
"text": [
|
700 |
-
"Exporting trained model to: electricity_price_model\n",
|
701 |
-
"INFO:tensorflow:Assets written to: electricity_price_model\\assets\n"
|
702 |
-
]
|
703 |
-
}
|
704 |
-
],
|
705 |
-
"source": [
|
706 |
-
"# Exporting the trained model to a directory\n",
|
707 |
-
"model_dir = \"electricity_price_model\"\n",
|
708 |
-
"print('Exporting trained model to: {}'.format(model_dir))\n",
|
709 |
-
"\n",
|
710 |
-
"# Saving the model using TensorFlow's saved_model.save function\n",
|
711 |
-
"tf.saved_model.save(model, model_dir)"
|
712 |
-
]
|
713 |
-
},
|
714 |
-
{
|
715 |
-
"cell_type": "code",
|
716 |
-
"execution_count": 44,
|
717 |
-
"metadata": {},
|
718 |
-
"outputs": [
|
719 |
-
{
|
720 |
-
"name": "stdout",
|
721 |
-
"output_type": "stream",
|
722 |
-
"text": [
|
723 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
724 |
-
]
|
725 |
-
},
|
726 |
-
{
|
727 |
-
"name": "stderr",
|
728 |
-
"output_type": "stream",
|
729 |
-
"text": [
|
730 |
-
"Uploading: 100.000%|██████████| 59/59 elapsed<00:01 remaining<00:001<00:01, 3.38it/s]\n",
|
731 |
-
"Uploading: 100.000%|██████████| 397272/397272 elapsed<00:02 remaining<00:00 3.38it/s]\n",
|
732 |
-
"Uploading: 0.000%| | 0/112411 elapsed<00:01 remaining<?0:04<00:01, 3.38it/s]\n",
|
733 |
-
"Uploading model files (2 dirs, 2 files): 17%|█▋ | 1/6 [00:07<00:35, 7.08s/it]\n"
|
734 |
-
]
|
735 |
-
},
|
736 |
-
{
|
737 |
-
"ename": "RestAPIError",
|
738 |
-
"evalue": "Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''",
|
739 |
-
"output_type": "error",
|
740 |
-
"traceback": [
|
741 |
-
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
742 |
-
"\u001b[1;31mRestAPIError\u001b[0m Traceback (most recent call last)",
|
743 |
-
"Cell \u001b[1;32mIn[44], line 16\u001b[0m\n\u001b[0;32m 8\u001b[0m tf_model \u001b[38;5;241m=\u001b[39m mr\u001b[38;5;241m.\u001b[39mtensorflow\u001b[38;5;241m.\u001b[39mcreate_model(\n\u001b[0;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDK_electricity_price_prediction_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m metrics\u001b[38;5;241m=\u001b[39mmetrics,\n\u001b[0;32m 11\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHourly electricity price prediction model.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 12\u001b[0m input_example\u001b[38;5;241m=\u001b[39mn_step_window\u001b[38;5;241m.\u001b[39mexample[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mnumpy(),\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# Saving the model to the specified directory\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m \u001b[43mtf_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n",
|
744 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\model.py:101\u001b[0m, in \u001b[0;36mModel.save\u001b[1;34m(self, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, model_path, await_registration\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m480\u001b[39m, keep_original_files\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m 91\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Persist this model including model files and metadata to the model registry.\u001b[39;00m\n\u001b[0;32m 92\u001b[0m \n\u001b[0;32m 93\u001b[0m \u001b[38;5;124;03m # Arguments\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[38;5;124;03m `Model`: The model metadata object.\u001b[39;00m\n\u001b[0;32m 100\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 102\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 103\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 104\u001b[0m \u001b[43m \u001b[49m\u001b[43mawait_registration\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mawait_registration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 105\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
745 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:421\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m be:\n\u001b[0;32m 420\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mrm(model_instance\u001b[38;5;241m.\u001b[39mversion_path)\n\u001b[1;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m be\n\u001b[0;32m 423\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel created, explore it at \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m model_instance\u001b[38;5;241m.\u001b[39mget_url())\n\u001b[0;32m 425\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_instance\n",
|
746 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:385\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[0;32m 382\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\n\u001b[0;32m 383\u001b[0m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), model_path)\n\u001b[0;32m 384\u001b[0m ): \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[1;32m--> 385\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_model_from_local_or_hopsfs_mount\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetcwd\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 391\u001b[0m \u001b[38;5;66;03m# check project relative\u001b[39;00m\n\u001b[0;32m 392\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mpath_exists(\n\u001b[0;32m 393\u001b[0m model_path\n\u001b[0;32m 394\u001b[0m ): \u001b[38;5;66;03m# check hdfs relative and absolute\u001b[39;00m\n",
|
747 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:249\u001b[0m, in \u001b[0;36mModelEngine._save_model_from_local_or_hopsfs_mount\u001b[1;34m(self, model_instance, model_path, keep_original_files, update_upload_progress)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_or_move_hopsfs_model(\n\u001b[0;32m 241\u001b[0m from_hdfs_model_path\u001b[38;5;241m=\u001b[39mmodel_path\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[0;32m 242\u001b[0m constants\u001b[38;5;241m.\u001b[39mMODEL_REGISTRY\u001b[38;5;241m.\u001b[39mHOPSFS_MOUNT_PREFIX, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 246\u001b[0m update_upload_progress\u001b[38;5;241m=\u001b[39mupdate_upload_progress,\n\u001b[0;32m 247\u001b[0m )\n\u001b[0;32m 248\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 249\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_local_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_local_model_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_model_version_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
748 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:225\u001b[0m, in \u001b[0;36mModelEngine._upload_local_model\u001b[1;34m(self, from_local_model_path, to_model_version_path, update_upload_progress)\u001b[0m\n\u001b[0;32m 223\u001b[0m update_upload_progress(n_dirs, n_files)\n\u001b[0;32m 224\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m f_name \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[1;32m--> 225\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mf_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_base_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 226\u001b[0m n_files \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 227\u001b[0m update_upload_progress(n_dirs, n_files)\n",
|
749 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\local_engine.py:38\u001b[0m, in \u001b[0;36mLocalEngine.upload\u001b[1;34m(self, local_path, remote_path)\u001b[0m\n\u001b[0;32m 36\u001b[0m local_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_abs_path(local_path)\n\u001b[0;32m 37\u001b[0m remote_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepend_project_path(remote_path)\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_path\u001b[49m\u001b[43m)\u001b[49m\n",
|
750 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:152\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 151\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m--> 152\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[0;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 155\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n",
|
751 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
752 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m [\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m futures]\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
|
753 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
|
754 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
755 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n",
|
756 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:189\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 185\u001b[0m re\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m DatasetApi\u001b[38;5;241m.\u001b[39mFLOW_PERMANENT_ERRORS\n\u001b[0;32m 186\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mretries \u001b[38;5;241m>\u001b[39m max_chunk_retries\n\u001b[0;32m 187\u001b[0m ):\n\u001b[0;32m 188\u001b[0m chunk\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfailed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 189\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m re\n\u001b[0;32m 190\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(chunk_retry_interval)\n\u001b[0;32m 191\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
|
757 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:178\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m\n\u001b[0;32m 180\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RestAPIError \u001b[38;5;28;01mas\u001b[39;00m re:\n",
|
758 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:214\u001b[0m, in \u001b[0;36mDatasetApi._upload_request\u001b[1;34m(self, params, path, file_name, chunk)\u001b[0m\n\u001b[0;32m 211\u001b[0m path_params \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproject\u001b[39m\u001b[38;5;124m\"\u001b[39m, _client\u001b[38;5;241m.\u001b[39m_project_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupload\u001b[39m\u001b[38;5;124m\"\u001b[39m, path]\n\u001b[0;32m 213\u001b[0m \u001b[38;5;66;03m# Flow configuration params are sent as form data\u001b[39;00m\n\u001b[1;32m--> 214\u001b[0m \u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
759 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\decorators.py:35\u001b[0m, in \u001b[0;36mconnected.<locals>.if_connected\u001b[1;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m inst\u001b[38;5;241m.\u001b[39m_connected:\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NoHopsworksConnectionError\n\u001b[1;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
760 |
-
"File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\client\\base.py:108\u001b[0m, in \u001b[0;36mClient._send_request\u001b[1;34m(self, method, path_params, query_params, headers, data, stream, files)\u001b[0m\n\u001b[0;32m 105\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39msend(prepped, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verify, stream\u001b[38;5;241m=\u001b[39mstream)\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mRestAPIError(url, response)\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[0;32m 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
|
761 |
-
"\u001b[1;31mRestAPIError\u001b[0m: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''"
|
762 |
-
]
|
763 |
-
}
|
764 |
-
],
|
765 |
-
"source": [
|
766 |
-
"# Retrieving the Model Registry\n",
|
767 |
-
"mr = project.get_model_registry()\n",
|
768 |
-
"\n",
|
769 |
-
"# Extracting loss value from the training history\n",
|
770 |
-
"metrics = {'loss': history_dict['val_loss'][0]} \n",
|
771 |
-
"\n",
|
772 |
-
"# Creating a TensorFlow model in the Model Registry\n",
|
773 |
-
"tf_model = mr.tensorflow.create_model(\n",
|
774 |
-
" name=\"DK_electricity_price_prediction_model\",\n",
|
775 |
-
" metrics=metrics,\n",
|
776 |
-
" description=\"Hourly electricity price prediction model.\",\n",
|
777 |
-
" input_example=n_step_window.example[0].numpy(),\n",
|
778 |
-
")\n",
|
779 |
-
"\n",
|
780 |
-
"# Saving the model to the specified directory\n",
|
781 |
-
"tf_model.save(model_dir)"
|
782 |
-
]
|
783 |
-
},
|
784 |
-
{
|
785 |
-
"cell_type": "markdown",
|
786 |
-
"metadata": {},
|
787 |
-
"source": [
|
788 |
-
"---\n",
|
789 |
-
"\n",
|
790 |
-
"## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
|
791 |
-
"\n",
|
792 |
-
"In the next notebook you will use your registered model to predict batch data."
|
793 |
-
]
|
794 |
-
}
|
795 |
-
],
|
796 |
-
"metadata": {
|
797 |
-
"kernelspec": {
|
798 |
-
"display_name": "bds-mlops",
|
799 |
-
"language": "python",
|
800 |
-
"name": "python3"
|
801 |
-
},
|
802 |
-
"language_info": {
|
803 |
-
"codemirror_mode": {
|
804 |
-
"name": "ipython",
|
805 |
-
"version": 3
|
806 |
-
},
|
807 |
-
"file_extension": ".py",
|
808 |
-
"mimetype": "text/x-python",
|
809 |
-
"name": "python",
|
810 |
-
"nbconvert_exporter": "python",
|
811 |
-
"pygments_lexer": "ipython3",
|
812 |
-
"version": "3.11.9"
|
813 |
-
},
|
814 |
-
"orig_nbformat": 4
|
815 |
-
},
|
816 |
-
"nbformat": 4,
|
817 |
-
"nbformat_minor": 2
|
818 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hide/notebooks_dev/3_training_pipeline_dev_windowtensor.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/1_feature_backfill.ipynb
CHANGED
@@ -28,7 +28,7 @@
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
-
"execution_count":
|
32 |
"metadata": {},
|
33 |
"outputs": [],
|
34 |
"source": [
|
@@ -38,15 +38,15 @@
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
-
"execution_count":
|
42 |
"metadata": {},
|
43 |
"outputs": [
|
44 |
{
|
45 |
"name": "stdout",
|
46 |
"output_type": "stream",
|
47 |
"text": [
|
48 |
-
"
|
49 |
-
"
|
50 |
]
|
51 |
}
|
52 |
],
|
@@ -64,7 +64,7 @@
|
|
64 |
},
|
65 |
{
|
66 |
"cell_type": "code",
|
67 |
-
"execution_count":
|
68 |
"metadata": {},
|
69 |
"outputs": [],
|
70 |
"source": [
|
@@ -80,6 +80,18 @@
|
|
80 |
"warnings.filterwarnings('ignore', category=DeprecationWarning)"
|
81 |
]
|
82 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
{
|
84 |
"cell_type": "markdown",
|
85 |
"metadata": {},
|
@@ -91,7 +103,6 @@
|
|
91 |
"- Electricity prices in Denmark on hourly basis per day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
|
92 |
"- Different meteorological observations based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures.\n",
|
93 |
"- Danish calendar that categorizes dates into types based on whether it is a weekday or not. This files is made manually by the group and is located in the *datafolder* inside this repository.\n",
|
94 |
-
"- Forecast Renewable Energy next day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
|
95 |
"- Weather Forecast based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures. (This data is used later to parse in new real-time weather data)\n"
|
96 |
]
|
97 |
},
|
@@ -105,7 +116,7 @@
|
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
-
"execution_count":
|
109 |
"metadata": {},
|
110 |
"outputs": [],
|
111 |
"source": [
|
@@ -122,7 +133,7 @@
|
|
122 |
},
|
123 |
{
|
124 |
"cell_type": "code",
|
125 |
-
"execution_count":
|
126 |
"metadata": {},
|
127 |
"outputs": [
|
128 |
{
|
@@ -207,7 +218,7 @@
|
|
207 |
"4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 0.28013"
|
208 |
]
|
209 |
},
|
210 |
-
"execution_count":
|
211 |
"metadata": {},
|
212 |
"output_type": "execute_result"
|
213 |
}
|
@@ -219,7 +230,7 @@
|
|
219 |
},
|
220 |
{
|
221 |
"cell_type": "code",
|
222 |
-
"execution_count":
|
223 |
"metadata": {},
|
224 |
"outputs": [
|
225 |
{
|
@@ -252,44 +263,44 @@
|
|
252 |
" </thead>\n",
|
253 |
" <tbody>\n",
|
254 |
" <tr>\n",
|
255 |
-
" <th>
|
256 |
-
" <td>
|
257 |
-
" <td>2024-05-
|
258 |
-
" <td>2024-05-
|
259 |
" <td>19</td>\n",
|
260 |
-
" <td>0.
|
261 |
" </tr>\n",
|
262 |
" <tr>\n",
|
263 |
-
" <th>
|
264 |
-
" <td>
|
265 |
-
" <td>2024-05-
|
266 |
-
" <td>2024-05-
|
267 |
" <td>20</td>\n",
|
268 |
-
" <td>0.
|
269 |
" </tr>\n",
|
270 |
" <tr>\n",
|
271 |
-
" <th>
|
272 |
-
" <td>
|
273 |
-
" <td>2024-05-
|
274 |
-
" <td>2024-05-
|
275 |
" <td>21</td>\n",
|
276 |
-
" <td>0.
|
277 |
" </tr>\n",
|
278 |
" <tr>\n",
|
279 |
-
" <th>
|
280 |
-
" <td>
|
281 |
-
" <td>2024-05-
|
282 |
-
" <td>2024-05-
|
283 |
" <td>22</td>\n",
|
284 |
-
" <td>0.
|
285 |
" </tr>\n",
|
286 |
" <tr>\n",
|
287 |
-
" <th>
|
288 |
-
" <td>
|
289 |
-
" <td>2024-05-
|
290 |
-
" <td>2024-05-
|
291 |
" <td>23</td>\n",
|
292 |
-
" <td>0.
|
293 |
" </tr>\n",
|
294 |
" </tbody>\n",
|
295 |
"</table>\n",
|
@@ -297,21 +308,21 @@
|
|
297 |
],
|
298 |
"text/plain": [
|
299 |
" timestamp datetime date hour \\\n",
|
300 |
-
"
|
301 |
-
"
|
302 |
-
"
|
303 |
-
"
|
304 |
-
"
|
305 |
"\n",
|
306 |
" dk1_spotpricedkk_kwh \n",
|
307 |
-
"
|
308 |
-
"
|
309 |
-
"
|
310 |
-
"
|
311 |
-
"
|
312 |
]
|
313 |
},
|
314 |
-
"execution_count":
|
315 |
"metadata": {},
|
316 |
"output_type": "execute_result"
|
317 |
}
|
@@ -323,7 +334,7 @@
|
|
323 |
},
|
324 |
{
|
325 |
"cell_type": "code",
|
326 |
-
"execution_count":
|
327 |
"metadata": {},
|
328 |
"outputs": [
|
329 |
{
|
@@ -331,17 +342,17 @@
|
|
331 |
"output_type": "stream",
|
332 |
"text": [
|
333 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
334 |
-
"RangeIndex:
|
335 |
"Data columns (total 5 columns):\n",
|
336 |
" # Column Non-Null Count Dtype \n",
|
337 |
"--- ------ -------------- ----- \n",
|
338 |
-
" 0 timestamp
|
339 |
-
" 1 datetime
|
340 |
-
" 2 date
|
341 |
-
" 3 hour
|
342 |
-
" 4 dk1_spotpricedkk_kwh
|
343 |
-
"dtypes: datetime64[ns](1), float64(1),
|
344 |
-
"memory usage:
|
345 |
]
|
346 |
}
|
347 |
],
|
@@ -350,308 +361,6 @@
|
|
350 |
"electricity_df.info()"
|
351 |
]
|
352 |
},
|
353 |
-
{
|
354 |
-
"cell_type": "markdown",
|
355 |
-
"metadata": {},
|
356 |
-
"source": [
|
357 |
-
"### <span style=\"color:#2656a3;\">☀️💨 Forecast Renewable Energy next day from Energinet\n",
|
358 |
-
"Second dataset is Forecast Renewable Energy next day also on hourly basis from Energinet"
|
359 |
-
]
|
360 |
-
},
|
361 |
-
{
|
362 |
-
"cell_type": "code",
|
363 |
-
"execution_count": 45,
|
364 |
-
"metadata": {},
|
365 |
-
"outputs": [],
|
366 |
-
"source": [
|
367 |
-
"# Fetching historical forecast of renewable energy data for area DK1 from January 1, 2022\n",
|
368 |
-
"# Note: The end date is currently left out to retrieve data up to the present date of yesterday \n",
|
369 |
-
"# Today is not included in the data as it is not historical data\n",
|
370 |
-
"forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
|
371 |
-
" historical=True, \n",
|
372 |
-
" area = [\"DK1\"],\n",
|
373 |
-
" start= '2022-01-01', \n",
|
374 |
-
" #end='2023-12-31'\n",
|
375 |
-
")"
|
376 |
-
]
|
377 |
-
},
|
378 |
-
{
|
379 |
-
"cell_type": "code",
|
380 |
-
"execution_count": 46,
|
381 |
-
"metadata": {},
|
382 |
-
"outputs": [
|
383 |
-
{
|
384 |
-
"data": {
|
385 |
-
"text/html": [
|
386 |
-
"<div>\n",
|
387 |
-
"<style scoped>\n",
|
388 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
389 |
-
" vertical-align: middle;\n",
|
390 |
-
" }\n",
|
391 |
-
"\n",
|
392 |
-
" .dataframe tbody tr th {\n",
|
393 |
-
" vertical-align: top;\n",
|
394 |
-
" }\n",
|
395 |
-
"\n",
|
396 |
-
" .dataframe thead th {\n",
|
397 |
-
" text-align: right;\n",
|
398 |
-
" }\n",
|
399 |
-
"</style>\n",
|
400 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
401 |
-
" <thead>\n",
|
402 |
-
" <tr style=\"text-align: right;\">\n",
|
403 |
-
" <th></th>\n",
|
404 |
-
" <th>timestamp</th>\n",
|
405 |
-
" <th>datetime</th>\n",
|
406 |
-
" <th>date</th>\n",
|
407 |
-
" <th>hour</th>\n",
|
408 |
-
" <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
|
409 |
-
" <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
|
410 |
-
" <th>dk1_solar_forecastintraday_kwh</th>\n",
|
411 |
-
" </tr>\n",
|
412 |
-
" </thead>\n",
|
413 |
-
" <tbody>\n",
|
414 |
-
" <tr>\n",
|
415 |
-
" <th>0</th>\n",
|
416 |
-
" <td>1641024000000</td>\n",
|
417 |
-
" <td>2022-01-01 08:00:00</td>\n",
|
418 |
-
" <td>2022-01-01</td>\n",
|
419 |
-
" <td>8</td>\n",
|
420 |
-
" <td>611708.313</td>\n",
|
421 |
-
" <td>236791.672</td>\n",
|
422 |
-
" <td>49.583</td>\n",
|
423 |
-
" </tr>\n",
|
424 |
-
" <tr>\n",
|
425 |
-
" <th>1</th>\n",
|
426 |
-
" <td>1641027600000</td>\n",
|
427 |
-
" <td>2022-01-01 09:00:00</td>\n",
|
428 |
-
" <td>2022-01-01</td>\n",
|
429 |
-
" <td>9</td>\n",
|
430 |
-
" <td>459708.344</td>\n",
|
431 |
-
" <td>196666.672</td>\n",
|
432 |
-
" <td>4841.250</td>\n",
|
433 |
-
" </tr>\n",
|
434 |
-
" <tr>\n",
|
435 |
-
" <th>2</th>\n",
|
436 |
-
" <td>1641031200000</td>\n",
|
437 |
-
" <td>2022-01-01 10:00:00</td>\n",
|
438 |
-
" <td>2022-01-01</td>\n",
|
439 |
-
" <td>10</td>\n",
|
440 |
-
" <td>310375.000</td>\n",
|
441 |
-
" <td>178500.000</td>\n",
|
442 |
-
" <td>20352.501</td>\n",
|
443 |
-
" </tr>\n",
|
444 |
-
" <tr>\n",
|
445 |
-
" <th>3</th>\n",
|
446 |
-
" <td>1641034800000</td>\n",
|
447 |
-
" <td>2022-01-01 11:00:00</td>\n",
|
448 |
-
" <td>2022-01-01</td>\n",
|
449 |
-
" <td>11</td>\n",
|
450 |
-
" <td>320750.000</td>\n",
|
451 |
-
" <td>201125.000</td>\n",
|
452 |
-
" <td>35718.750</td>\n",
|
453 |
-
" </tr>\n",
|
454 |
-
" <tr>\n",
|
455 |
-
" <th>4</th>\n",
|
456 |
-
" <td>1641038400000</td>\n",
|
457 |
-
" <td>2022-01-01 12:00:00</td>\n",
|
458 |
-
" <td>2022-01-01</td>\n",
|
459 |
-
" <td>12</td>\n",
|
460 |
-
" <td>355666.656</td>\n",
|
461 |
-
" <td>277666.656</td>\n",
|
462 |
-
" <td>38026.669</td>\n",
|
463 |
-
" </tr>\n",
|
464 |
-
" </tbody>\n",
|
465 |
-
"</table>\n",
|
466 |
-
"</div>"
|
467 |
-
],
|
468 |
-
"text/plain": [
|
469 |
-
" timestamp datetime date hour \\\n",
|
470 |
-
"0 1641024000000 2022-01-01 08:00:00 2022-01-01 8 \n",
|
471 |
-
"1 1641027600000 2022-01-01 09:00:00 2022-01-01 9 \n",
|
472 |
-
"2 1641031200000 2022-01-01 10:00:00 2022-01-01 10 \n",
|
473 |
-
"3 1641034800000 2022-01-01 11:00:00 2022-01-01 11 \n",
|
474 |
-
"4 1641038400000 2022-01-01 12:00:00 2022-01-01 12 \n",
|
475 |
-
"\n",
|
476 |
-
" dk1_offshore_wind_forecastintraday_kwh \\\n",
|
477 |
-
"0 611708.313 \n",
|
478 |
-
"1 459708.344 \n",
|
479 |
-
"2 310375.000 \n",
|
480 |
-
"3 320750.000 \n",
|
481 |
-
"4 355666.656 \n",
|
482 |
-
"\n",
|
483 |
-
" dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
|
484 |
-
"0 236791.672 49.583 \n",
|
485 |
-
"1 196666.672 4841.250 \n",
|
486 |
-
"2 178500.000 20352.501 \n",
|
487 |
-
"3 201125.000 35718.750 \n",
|
488 |
-
"4 277666.656 38026.669 "
|
489 |
-
]
|
490 |
-
},
|
491 |
-
"execution_count": 46,
|
492 |
-
"metadata": {},
|
493 |
-
"output_type": "execute_result"
|
494 |
-
}
|
495 |
-
],
|
496 |
-
"source": [
|
497 |
-
"# Display the first 5 rows of the forecast_renewable_energy dataframe\n",
|
498 |
-
"forecast_renewable_energy_df.head(5)"
|
499 |
-
]
|
500 |
-
},
|
501 |
-
{
|
502 |
-
"cell_type": "code",
|
503 |
-
"execution_count": 47,
|
504 |
-
"metadata": {},
|
505 |
-
"outputs": [
|
506 |
-
{
|
507 |
-
"data": {
|
508 |
-
"text/html": [
|
509 |
-
"<div>\n",
|
510 |
-
"<style scoped>\n",
|
511 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
512 |
-
" vertical-align: middle;\n",
|
513 |
-
" }\n",
|
514 |
-
"\n",
|
515 |
-
" .dataframe tbody tr th {\n",
|
516 |
-
" vertical-align: top;\n",
|
517 |
-
" }\n",
|
518 |
-
"\n",
|
519 |
-
" .dataframe thead th {\n",
|
520 |
-
" text-align: right;\n",
|
521 |
-
" }\n",
|
522 |
-
"</style>\n",
|
523 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
524 |
-
" <thead>\n",
|
525 |
-
" <tr style=\"text-align: right;\">\n",
|
526 |
-
" <th></th>\n",
|
527 |
-
" <th>timestamp</th>\n",
|
528 |
-
" <th>datetime</th>\n",
|
529 |
-
" <th>date</th>\n",
|
530 |
-
" <th>hour</th>\n",
|
531 |
-
" <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
|
532 |
-
" <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
|
533 |
-
" <th>dk1_solar_forecastintraday_kwh</th>\n",
|
534 |
-
" </tr>\n",
|
535 |
-
" </thead>\n",
|
536 |
-
" <tbody>\n",
|
537 |
-
" <tr>\n",
|
538 |
-
" <th>14426</th>\n",
|
539 |
-
" <td>1714590000000</td>\n",
|
540 |
-
" <td>2024-05-01 19:00:00</td>\n",
|
541 |
-
" <td>2024-05-01</td>\n",
|
542 |
-
" <td>19</td>\n",
|
543 |
-
" <td>816250.000</td>\n",
|
544 |
-
" <td>1382208.374</td>\n",
|
545 |
-
" <td>272910.828</td>\n",
|
546 |
-
" </tr>\n",
|
547 |
-
" <tr>\n",
|
548 |
-
" <th>14427</th>\n",
|
549 |
-
" <td>1714593600000</td>\n",
|
550 |
-
" <td>2024-05-01 20:00:00</td>\n",
|
551 |
-
" <td>2024-05-01</td>\n",
|
552 |
-
" <td>20</td>\n",
|
553 |
-
" <td>848500.000</td>\n",
|
554 |
-
" <td>1388583.374</td>\n",
|
555 |
-
" <td>46086.666</td>\n",
|
556 |
-
" </tr>\n",
|
557 |
-
" <tr>\n",
|
558 |
-
" <th>14428</th>\n",
|
559 |
-
" <td>1714597200000</td>\n",
|
560 |
-
" <td>2024-05-01 21:00:00</td>\n",
|
561 |
-
" <td>2024-05-01</td>\n",
|
562 |
-
" <td>21</td>\n",
|
563 |
-
" <td>886041.687</td>\n",
|
564 |
-
" <td>1554791.626</td>\n",
|
565 |
-
" <td>1338.750</td>\n",
|
566 |
-
" </tr>\n",
|
567 |
-
" <tr>\n",
|
568 |
-
" <th>14429</th>\n",
|
569 |
-
" <td>1714600800000</td>\n",
|
570 |
-
" <td>2024-05-01 22:00:00</td>\n",
|
571 |
-
" <td>2024-05-01</td>\n",
|
572 |
-
" <td>22</td>\n",
|
573 |
-
" <td>919416.687</td>\n",
|
574 |
-
" <td>1698875.000</td>\n",
|
575 |
-
" <td>0.000</td>\n",
|
576 |
-
" </tr>\n",
|
577 |
-
" <tr>\n",
|
578 |
-
" <th>14430</th>\n",
|
579 |
-
" <td>1714604400000</td>\n",
|
580 |
-
" <td>2024-05-01 23:00:00</td>\n",
|
581 |
-
" <td>2024-05-01</td>\n",
|
582 |
-
" <td>23</td>\n",
|
583 |
-
" <td>934708.313</td>\n",
|
584 |
-
" <td>1739375.000</td>\n",
|
585 |
-
" <td>0.000</td>\n",
|
586 |
-
" </tr>\n",
|
587 |
-
" </tbody>\n",
|
588 |
-
"</table>\n",
|
589 |
-
"</div>"
|
590 |
-
],
|
591 |
-
"text/plain": [
|
592 |
-
" timestamp datetime date hour \\\n",
|
593 |
-
"14426 1714590000000 2024-05-01 19:00:00 2024-05-01 19 \n",
|
594 |
-
"14427 1714593600000 2024-05-01 20:00:00 2024-05-01 20 \n",
|
595 |
-
"14428 1714597200000 2024-05-01 21:00:00 2024-05-01 21 \n",
|
596 |
-
"14429 1714600800000 2024-05-01 22:00:00 2024-05-01 22 \n",
|
597 |
-
"14430 1714604400000 2024-05-01 23:00:00 2024-05-01 23 \n",
|
598 |
-
"\n",
|
599 |
-
" dk1_offshore_wind_forecastintraday_kwh \\\n",
|
600 |
-
"14426 816250.000 \n",
|
601 |
-
"14427 848500.000 \n",
|
602 |
-
"14428 886041.687 \n",
|
603 |
-
"14429 919416.687 \n",
|
604 |
-
"14430 934708.313 \n",
|
605 |
-
"\n",
|
606 |
-
" dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
|
607 |
-
"14426 1382208.374 272910.828 \n",
|
608 |
-
"14427 1388583.374 46086.666 \n",
|
609 |
-
"14428 1554791.626 1338.750 \n",
|
610 |
-
"14429 1698875.000 0.000 \n",
|
611 |
-
"14430 1739375.000 0.000 "
|
612 |
-
]
|
613 |
-
},
|
614 |
-
"execution_count": 47,
|
615 |
-
"metadata": {},
|
616 |
-
"output_type": "execute_result"
|
617 |
-
}
|
618 |
-
],
|
619 |
-
"source": [
|
620 |
-
"# Display the last 5 rows of the forecast_renewable_energy dataframe\n",
|
621 |
-
"forecast_renewable_energy_df.tail(5)"
|
622 |
-
]
|
623 |
-
},
|
624 |
-
{
|
625 |
-
"cell_type": "code",
|
626 |
-
"execution_count": 48,
|
627 |
-
"metadata": {},
|
628 |
-
"outputs": [
|
629 |
-
{
|
630 |
-
"name": "stdout",
|
631 |
-
"output_type": "stream",
|
632 |
-
"text": [
|
633 |
-
"<class 'pandas.core.frame.DataFrame'>\n",
|
634 |
-
"RangeIndex: 14431 entries, 0 to 14430\n",
|
635 |
-
"Data columns (total 7 columns):\n",
|
636 |
-
" # Column Non-Null Count Dtype \n",
|
637 |
-
"--- ------ -------------- ----- \n",
|
638 |
-
" 0 timestamp 14431 non-null int64 \n",
|
639 |
-
" 1 datetime 14431 non-null datetime64[ns]\n",
|
640 |
-
" 2 date 14431 non-null object \n",
|
641 |
-
" 3 hour 14431 non-null int32 \n",
|
642 |
-
" 4 dk1_offshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
|
643 |
-
" 5 dk1_onshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
|
644 |
-
" 6 dk1_solar_forecastintraday_kwh 14415 non-null float64 \n",
|
645 |
-
"dtypes: datetime64[ns](1), float64(3), int32(1), int64(1), object(1)\n",
|
646 |
-
"memory usage: 733.0+ KB\n"
|
647 |
-
]
|
648 |
-
}
|
649 |
-
],
|
650 |
-
"source": [
|
651 |
-
"# Showing the information for the forecast_renewable_energy dataframe\n",
|
652 |
-
"forecast_renewable_energy_df.info()"
|
653 |
-
]
|
654 |
-
},
|
655 |
{
|
656 |
"cell_type": "markdown",
|
657 |
"metadata": {},
|
@@ -669,7 +378,7 @@
|
|
669 |
},
|
670 |
{
|
671 |
"cell_type": "code",
|
672 |
-
"execution_count":
|
673 |
"metadata": {},
|
674 |
"outputs": [],
|
675 |
"source": [
|
@@ -685,7 +394,7 @@
|
|
685 |
},
|
686 |
{
|
687 |
"cell_type": "code",
|
688 |
-
"execution_count":
|
689 |
"metadata": {},
|
690 |
"outputs": [
|
691 |
{
|
@@ -832,7 +541,7 @@
|
|
832 |
"4 100.0 10.6 23.8 "
|
833 |
]
|
834 |
},
|
835 |
-
"execution_count":
|
836 |
"metadata": {},
|
837 |
"output_type": "execute_result"
|
838 |
}
|
@@ -844,7 +553,7 @@
|
|
844 |
},
|
845 |
{
|
846 |
"cell_type": "code",
|
847 |
-
"execution_count":
|
848 |
"metadata": {},
|
849 |
"outputs": [
|
850 |
{
|
@@ -885,84 +594,84 @@
|
|
885 |
" </thead>\n",
|
886 |
" <tbody>\n",
|
887 |
" <tr>\n",
|
888 |
-
" <th>
|
889 |
-
" <td>
|
890 |
-
" <td>2024-
|
891 |
-
" <td>2024-
|
892 |
" <td>19</td>\n",
|
893 |
-
" <td>
|
894 |
-
" <td>
|
895 |
" <td>0.0</td>\n",
|
896 |
" <td>0.0</td>\n",
|
897 |
" <td>0.0</td>\n",
|
898 |
" <td>0.0</td>\n",
|
899 |
-
" <td>
|
900 |
-
" <td>
|
901 |
-
" <td>
|
902 |
" </tr>\n",
|
903 |
" <tr>\n",
|
904 |
-
" <th>
|
905 |
-
" <td>
|
906 |
-
" <td>2024-
|
907 |
-
" <td>2024-
|
908 |
" <td>20</td>\n",
|
909 |
-
" <td>
|
910 |
-
" <td>
|
911 |
" <td>0.0</td>\n",
|
912 |
" <td>0.0</td>\n",
|
913 |
" <td>0.0</td>\n",
|
914 |
" <td>0.0</td>\n",
|
915 |
-
" <td>
|
916 |
-
" <td>
|
917 |
-
" <td>
|
918 |
" </tr>\n",
|
919 |
" <tr>\n",
|
920 |
-
" <th>
|
921 |
-
" <td>
|
922 |
-
" <td>2024-
|
923 |
-
" <td>2024-
|
924 |
" <td>21</td>\n",
|
925 |
-
" <td>
|
926 |
" <td>67.0</td>\n",
|
927 |
" <td>0.0</td>\n",
|
928 |
" <td>0.0</td>\n",
|
929 |
" <td>0.0</td>\n",
|
930 |
" <td>0.0</td>\n",
|
931 |
-
" <td>
|
932 |
-
" <td>
|
933 |
-
" <td>
|
934 |
" </tr>\n",
|
935 |
" <tr>\n",
|
936 |
-
" <th>
|
937 |
-
" <td>
|
938 |
-
" <td>2024-
|
939 |
-
" <td>2024-
|
940 |
" <td>22</td>\n",
|
941 |
-
" <td>
|
942 |
-
" <td>
|
943 |
" <td>0.0</td>\n",
|
944 |
" <td>0.0</td>\n",
|
945 |
" <td>0.0</td>\n",
|
946 |
" <td>0.0</td>\n",
|
947 |
-
" <td>
|
948 |
-
" <td>
|
949 |
-
" <td>
|
950 |
" </tr>\n",
|
951 |
" <tr>\n",
|
952 |
-
" <th>
|
953 |
-
" <td>
|
954 |
-
" <td>2024-
|
955 |
-
" <td>2024-
|
956 |
" <td>23</td>\n",
|
957 |
-
" <td>12.
|
958 |
" <td>70.0</td>\n",
|
959 |
" <td>0.0</td>\n",
|
960 |
" <td>0.0</td>\n",
|
961 |
" <td>0.0</td>\n",
|
962 |
" <td>0.0</td>\n",
|
963 |
-
" <td>
|
964 |
-
" <td>
|
965 |
-
" <td>
|
966 |
" </tr>\n",
|
967 |
" </tbody>\n",
|
968 |
"</table>\n",
|
@@ -970,28 +679,28 @@
|
|
970 |
],
|
971 |
"text/plain": [
|
972 |
" timestamp datetime date hour temperature_2m \\\n",
|
973 |
-
"
|
974 |
-
"
|
975 |
-
"
|
976 |
-
"
|
977 |
-
"
|
978 |
"\n",
|
979 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
980 |
-
"
|
981 |
-
"
|
982 |
-
"
|
983 |
-
"
|
984 |
-
"
|
985 |
"\n",
|
986 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
987 |
-
"
|
988 |
-
"
|
989 |
-
"
|
990 |
-
"
|
991 |
-
"
|
992 |
]
|
993 |
},
|
994 |
-
"execution_count":
|
995 |
"metadata": {},
|
996 |
"output_type": "execute_result"
|
997 |
}
|
@@ -1003,7 +712,7 @@
|
|
1003 |
},
|
1004 |
{
|
1005 |
"cell_type": "code",
|
1006 |
-
"execution_count":
|
1007 |
"metadata": {},
|
1008 |
"outputs": [
|
1009 |
{
|
@@ -1011,25 +720,25 @@
|
|
1011 |
"output_type": "stream",
|
1012 |
"text": [
|
1013 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
1014 |
-
"
|
1015 |
"Data columns (total 13 columns):\n",
|
1016 |
" # Column Non-Null Count Dtype \n",
|
1017 |
"--- ------ -------------- ----- \n",
|
1018 |
-
" 0 timestamp
|
1019 |
-
" 1 datetime
|
1020 |
-
" 2 date
|
1021 |
-
" 3 hour
|
1022 |
-
" 4 temperature_2m
|
1023 |
-
" 5 relative_humidity_2m
|
1024 |
-
" 6 precipitation
|
1025 |
-
" 7 rain
|
1026 |
-
" 8 snowfall
|
1027 |
-
" 9 weather_code
|
1028 |
-
" 10 cloud_cover
|
1029 |
-
" 11 wind_speed_10m
|
1030 |
-
" 12 wind_gusts_10m
|
1031 |
-
"dtypes: datetime64[ns](1), float64(9),
|
1032 |
-
"memory usage: 2.
|
1033 |
]
|
1034 |
}
|
1035 |
],
|
@@ -1042,13 +751,13 @@
|
|
1042 |
"cell_type": "markdown",
|
1043 |
"metadata": {},
|
1044 |
"source": [
|
1045 |
-
"#### <span style=\"color:#2656a3;\"> 🌈 Weather
|
1046 |
"Weather Forecast from Open Meteo is now being fetched. This data is used in part 02 the feature_pipeline to parse in new real-time weather data."
|
1047 |
]
|
1048 |
},
|
1049 |
{
|
1050 |
"cell_type": "code",
|
1051 |
-
"execution_count":
|
1052 |
"metadata": {},
|
1053 |
"outputs": [],
|
1054 |
"source": [
|
@@ -1060,7 +769,7 @@
|
|
1060 |
},
|
1061 |
{
|
1062 |
"cell_type": "code",
|
1063 |
-
"execution_count":
|
1064 |
"metadata": {},
|
1065 |
"outputs": [
|
1066 |
{
|
@@ -1102,83 +811,83 @@
|
|
1102 |
" <tbody>\n",
|
1103 |
" <tr>\n",
|
1104 |
" <th>0</th>\n",
|
1105 |
-
" <td>
|
1106 |
-
" <td>2024-05-
|
1107 |
-
" <td>2024-05-
|
1108 |
" <td>0</td>\n",
|
1109 |
-
" <td>14.
|
1110 |
-
" <td>
|
1111 |
-
" <td>0.0</td>\n",
|
1112 |
" <td>0.0</td>\n",
|
1113 |
" <td>0.0</td>\n",
|
1114 |
" <td>0.0</td>\n",
|
1115 |
-
" <td>
|
1116 |
-
" <td>
|
1117 |
-
" <td>
|
|
|
1118 |
" </tr>\n",
|
1119 |
" <tr>\n",
|
1120 |
" <th>1</th>\n",
|
1121 |
-
" <td>
|
1122 |
-
" <td>2024-05-
|
1123 |
-
" <td>2024-05-
|
1124 |
" <td>1</td>\n",
|
1125 |
-
" <td>
|
1126 |
-
" <td>
|
1127 |
" <td>0.0</td>\n",
|
1128 |
" <td>0.0</td>\n",
|
1129 |
" <td>0.0</td>\n",
|
1130 |
" <td>0.0</td>\n",
|
1131 |
-
" <td>
|
1132 |
-
" <td>
|
1133 |
-
" <td>37.
|
1134 |
" </tr>\n",
|
1135 |
" <tr>\n",
|
1136 |
" <th>2</th>\n",
|
1137 |
-
" <td>
|
1138 |
-
" <td>2024-05-
|
1139 |
-
" <td>2024-05-
|
1140 |
" <td>2</td>\n",
|
1141 |
-
" <td>13.
|
1142 |
-
" <td>
|
1143 |
" <td>0.0</td>\n",
|
1144 |
" <td>0.0</td>\n",
|
1145 |
" <td>0.0</td>\n",
|
1146 |
-
" <td>
|
1147 |
-
" <td>
|
1148 |
-
" <td>
|
1149 |
-
" <td>
|
1150 |
" </tr>\n",
|
1151 |
" <tr>\n",
|
1152 |
" <th>3</th>\n",
|
1153 |
-
" <td>
|
1154 |
-
" <td>2024-05-
|
1155 |
-
" <td>2024-05-
|
1156 |
" <td>3</td>\n",
|
1157 |
-
" <td>
|
1158 |
-
" <td>
|
1159 |
-
" <td>0.1</td>\n",
|
1160 |
-
" <td>0.1</td>\n",
|
1161 |
" <td>0.0</td>\n",
|
1162 |
-
" <td>
|
1163 |
-
" <td>
|
1164 |
-
" <td>
|
1165 |
-
" <td>
|
|
|
|
|
1166 |
" </tr>\n",
|
1167 |
" <tr>\n",
|
1168 |
" <th>4</th>\n",
|
1169 |
-
" <td>
|
1170 |
-
" <td>2024-05-
|
1171 |
-
" <td>2024-05-
|
1172 |
" <td>4</td>\n",
|
1173 |
-
" <td>12.
|
1174 |
" <td>73.0</td>\n",
|
1175 |
" <td>0.0</td>\n",
|
1176 |
" <td>0.0</td>\n",
|
1177 |
" <td>0.0</td>\n",
|
1178 |
" <td>2.0</td>\n",
|
1179 |
-
" <td>
|
1180 |
-
" <td>
|
1181 |
-
" <td>
|
1182 |
" </tr>\n",
|
1183 |
" </tbody>\n",
|
1184 |
"</table>\n",
|
@@ -1186,28 +895,28 @@
|
|
1186 |
],
|
1187 |
"text/plain": [
|
1188 |
" timestamp datetime date hour temperature_2m \\\n",
|
1189 |
-
"0
|
1190 |
-
"1
|
1191 |
-
"2
|
1192 |
-
"3
|
1193 |
-
"4
|
1194 |
"\n",
|
1195 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
1196 |
-
"0
|
1197 |
-
"1
|
1198 |
-
"2
|
1199 |
-
"3
|
1200 |
"4 73.0 0.0 0.0 0.0 2.0 \n",
|
1201 |
"\n",
|
1202 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
1203 |
-
"0
|
1204 |
-
"1
|
1205 |
-
"2
|
1206 |
-
"3
|
1207 |
-
"4
|
1208 |
]
|
1209 |
},
|
1210 |
-
"execution_count":
|
1211 |
"metadata": {},
|
1212 |
"output_type": "execute_result"
|
1213 |
}
|
@@ -1219,7 +928,7 @@
|
|
1219 |
},
|
1220 |
{
|
1221 |
"cell_type": "code",
|
1222 |
-
"execution_count":
|
1223 |
"metadata": {},
|
1224 |
"outputs": [
|
1225 |
{
|
@@ -1261,83 +970,83 @@
|
|
1261 |
" <tbody>\n",
|
1262 |
" <tr>\n",
|
1263 |
" <th>115</th>\n",
|
1264 |
-
" <td>
|
1265 |
-
" <td>2024-05-
|
1266 |
-
" <td>2024-05-
|
1267 |
" <td>19</td>\n",
|
1268 |
-
" <td>
|
1269 |
-
" <td>
|
1270 |
-
" <td>1.4</td>\n",
|
1271 |
-
" <td>1.4</td>\n",
|
1272 |
" <td>0.0</td>\n",
|
1273 |
-
" <td>
|
1274 |
-
" <td>
|
1275 |
-
" <td>
|
1276 |
-
" <td>
|
|
|
|
|
1277 |
" </tr>\n",
|
1278 |
" <tr>\n",
|
1279 |
" <th>116</th>\n",
|
1280 |
-
" <td>
|
1281 |
-
" <td>2024-05-
|
1282 |
-
" <td>2024-05-
|
1283 |
" <td>20</td>\n",
|
1284 |
-
" <td>10.
|
1285 |
-
" <td>
|
1286 |
-
" <td>1.4</td>\n",
|
1287 |
-
" <td>1.4</td>\n",
|
1288 |
" <td>0.0</td>\n",
|
1289 |
-
" <td>
|
1290 |
-
" <td>
|
1291 |
-
" <td>
|
1292 |
-
" <td>
|
|
|
|
|
1293 |
" </tr>\n",
|
1294 |
" <tr>\n",
|
1295 |
" <th>117</th>\n",
|
1296 |
-
" <td>
|
1297 |
-
" <td>2024-05-
|
1298 |
-
" <td>2024-05-
|
1299 |
" <td>21</td>\n",
|
1300 |
-
" <td>9.
|
1301 |
-
" <td>
|
1302 |
-
" <td>1.4</td>\n",
|
1303 |
-
" <td>1.4</td>\n",
|
1304 |
" <td>0.0</td>\n",
|
1305 |
-
" <td>
|
1306 |
-
" <td>
|
1307 |
-
" <td>
|
1308 |
-
" <td>
|
|
|
|
|
1309 |
" </tr>\n",
|
1310 |
" <tr>\n",
|
1311 |
" <th>118</th>\n",
|
1312 |
-
" <td>
|
1313 |
-
" <td>2024-05-
|
1314 |
-
" <td>2024-05-
|
1315 |
" <td>22</td>\n",
|
1316 |
-
" <td>
|
1317 |
-
" <td>
|
1318 |
-
" <td>0.6</td>\n",
|
1319 |
-
" <td>0.6</td>\n",
|
1320 |
" <td>0.0</td>\n",
|
1321 |
-
" <td>
|
1322 |
-
" <td>
|
1323 |
-
" <td>
|
1324 |
-
" <td>
|
|
|
|
|
1325 |
" </tr>\n",
|
1326 |
" <tr>\n",
|
1327 |
" <th>119</th>\n",
|
1328 |
-
" <td>
|
1329 |
-
" <td>2024-05-
|
1330 |
-
" <td>2024-05-
|
1331 |
" <td>23</td>\n",
|
1332 |
-
" <td>9
|
1333 |
-
" <td>
|
1334 |
-
" <td>0.6</td>\n",
|
1335 |
-
" <td>0.6</td>\n",
|
1336 |
" <td>0.0</td>\n",
|
1337 |
-
" <td>
|
1338 |
-
" <td>
|
1339 |
-
" <td>
|
1340 |
-
" <td>
|
|
|
|
|
1341 |
" </tr>\n",
|
1342 |
" </tbody>\n",
|
1343 |
"</table>\n",
|
@@ -1345,28 +1054,28 @@
|
|
1345 |
],
|
1346 |
"text/plain": [
|
1347 |
" timestamp datetime date hour temperature_2m \\\n",
|
1348 |
-
"115
|
1349 |
-
"116
|
1350 |
-
"117
|
1351 |
-
"118
|
1352 |
-
"119
|
1353 |
"\n",
|
1354 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
1355 |
-
"115
|
1356 |
-
"116
|
1357 |
-
"117
|
1358 |
-
"118
|
1359 |
-
"119
|
1360 |
"\n",
|
1361 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
1362 |
-
"115
|
1363 |
-
"116
|
1364 |
-
"117
|
1365 |
-
"118
|
1366 |
-
"119
|
1367 |
]
|
1368 |
},
|
1369 |
-
"execution_count":
|
1370 |
"metadata": {},
|
1371 |
"output_type": "execute_result"
|
1372 |
}
|
@@ -1378,7 +1087,7 @@
|
|
1378 |
},
|
1379 |
{
|
1380 |
"cell_type": "code",
|
1381 |
-
"execution_count":
|
1382 |
"metadata": {},
|
1383 |
"outputs": [
|
1384 |
{
|
@@ -1393,7 +1102,7 @@
|
|
1393 |
" 0 timestamp 120 non-null int64 \n",
|
1394 |
" 1 datetime 120 non-null datetime64[ns]\n",
|
1395 |
" 2 date 120 non-null object \n",
|
1396 |
-
" 3 hour 120 non-null
|
1397 |
" 4 temperature_2m 120 non-null float64 \n",
|
1398 |
" 5 relative_humidity_2m 120 non-null float64 \n",
|
1399 |
" 6 precipitation 120 non-null float64 \n",
|
@@ -1403,8 +1112,8 @@
|
|
1403 |
" 10 cloud_cover 120 non-null float64 \n",
|
1404 |
" 11 wind_speed_10m 120 non-null float64 \n",
|
1405 |
" 12 wind_gusts_10m 120 non-null float64 \n",
|
1406 |
-
"dtypes: datetime64[ns](1), float64(9),
|
1407 |
-
"memory usage:
|
1408 |
]
|
1409 |
}
|
1410 |
],
|
@@ -1423,16 +1132,16 @@
|
|
1423 |
},
|
1424 |
{
|
1425 |
"cell_type": "code",
|
1426 |
-
"execution_count":
|
1427 |
"metadata": {},
|
1428 |
"outputs": [],
|
1429 |
"source": [
|
1430 |
-
"calender_df = calendar.
|
1431 |
]
|
1432 |
},
|
1433 |
{
|
1434 |
"cell_type": "code",
|
1435 |
-
"execution_count":
|
1436 |
"metadata": {},
|
1437 |
"outputs": [
|
1438 |
{
|
@@ -1461,7 +1170,7 @@
|
|
1461 |
" <th>day</th>\n",
|
1462 |
" <th>month</th>\n",
|
1463 |
" <th>year</th>\n",
|
1464 |
-
" <th>
|
1465 |
" </tr>\n",
|
1466 |
" </thead>\n",
|
1467 |
" <tbody>\n",
|
@@ -1472,7 +1181,7 @@
|
|
1472 |
" <td>1</td>\n",
|
1473 |
" <td>1</td>\n",
|
1474 |
" <td>2022</td>\n",
|
1475 |
-
" <td>
|
1476 |
" </tr>\n",
|
1477 |
" <tr>\n",
|
1478 |
" <th>1</th>\n",
|
@@ -1481,7 +1190,7 @@
|
|
1481 |
" <td>2</td>\n",
|
1482 |
" <td>1</td>\n",
|
1483 |
" <td>2022</td>\n",
|
1484 |
-
" <td>
|
1485 |
" </tr>\n",
|
1486 |
" <tr>\n",
|
1487 |
" <th>2</th>\n",
|
@@ -1490,7 +1199,7 @@
|
|
1490 |
" <td>3</td>\n",
|
1491 |
" <td>1</td>\n",
|
1492 |
" <td>2022</td>\n",
|
1493 |
-
" <td>
|
1494 |
" </tr>\n",
|
1495 |
" <tr>\n",
|
1496 |
" <th>3</th>\n",
|
@@ -1499,7 +1208,7 @@
|
|
1499 |
" <td>4</td>\n",
|
1500 |
" <td>1</td>\n",
|
1501 |
" <td>2022</td>\n",
|
1502 |
-
" <td>
|
1503 |
" </tr>\n",
|
1504 |
" <tr>\n",
|
1505 |
" <th>4</th>\n",
|
@@ -1508,22 +1217,22 @@
|
|
1508 |
" <td>5</td>\n",
|
1509 |
" <td>1</td>\n",
|
1510 |
" <td>2022</td>\n",
|
1511 |
-
" <td>
|
1512 |
" </tr>\n",
|
1513 |
" </tbody>\n",
|
1514 |
"</table>\n",
|
1515 |
"</div>"
|
1516 |
],
|
1517 |
"text/plain": [
|
1518 |
-
" date dayofweek day month year
|
1519 |
-
"0 2022-01-01 5 1 1 2022
|
1520 |
-
"1 2022-01-02 6 2 1 2022
|
1521 |
-
"2 2022-01-03 0 3 1 2022
|
1522 |
-
"3 2022-01-04 1 4 1 2022
|
1523 |
-
"4 2022-01-05 2 5 1 2022
|
1524 |
]
|
1525 |
},
|
1526 |
-
"execution_count":
|
1527 |
"metadata": {},
|
1528 |
"output_type": "execute_result"
|
1529 |
}
|
@@ -1535,7 +1244,7 @@
|
|
1535 |
},
|
1536 |
{
|
1537 |
"cell_type": "code",
|
1538 |
-
"execution_count":
|
1539 |
"metadata": {},
|
1540 |
"outputs": [
|
1541 |
{
|
@@ -1564,7 +1273,7 @@
|
|
1564 |
" <th>day</th>\n",
|
1565 |
" <th>month</th>\n",
|
1566 |
" <th>year</th>\n",
|
1567 |
-
" <th>
|
1568 |
" </tr>\n",
|
1569 |
" </thead>\n",
|
1570 |
" <tbody>\n",
|
@@ -1575,7 +1284,7 @@
|
|
1575 |
" <td>27</td>\n",
|
1576 |
" <td>12</td>\n",
|
1577 |
" <td>2024</td>\n",
|
1578 |
-
" <td>
|
1579 |
" </tr>\n",
|
1580 |
" <tr>\n",
|
1581 |
" <th>1092</th>\n",
|
@@ -1584,7 +1293,7 @@
|
|
1584 |
" <td>28</td>\n",
|
1585 |
" <td>12</td>\n",
|
1586 |
" <td>2024</td>\n",
|
1587 |
-
" <td>
|
1588 |
" </tr>\n",
|
1589 |
" <tr>\n",
|
1590 |
" <th>1093</th>\n",
|
@@ -1593,7 +1302,7 @@
|
|
1593 |
" <td>29</td>\n",
|
1594 |
" <td>12</td>\n",
|
1595 |
" <td>2024</td>\n",
|
1596 |
-
" <td>
|
1597 |
" </tr>\n",
|
1598 |
" <tr>\n",
|
1599 |
" <th>1094</th>\n",
|
@@ -1602,7 +1311,7 @@
|
|
1602 |
" <td>30</td>\n",
|
1603 |
" <td>12</td>\n",
|
1604 |
" <td>2024</td>\n",
|
1605 |
-
" <td>
|
1606 |
" </tr>\n",
|
1607 |
" <tr>\n",
|
1608 |
" <th>1095</th>\n",
|
@@ -1611,22 +1320,22 @@
|
|
1611 |
" <td>31</td>\n",
|
1612 |
" <td>12</td>\n",
|
1613 |
" <td>2024</td>\n",
|
1614 |
-
" <td>
|
1615 |
" </tr>\n",
|
1616 |
" </tbody>\n",
|
1617 |
"</table>\n",
|
1618 |
"</div>"
|
1619 |
],
|
1620 |
"text/plain": [
|
1621 |
-
" date dayofweek day month year
|
1622 |
-
"1091 2024-12-27 4 27 12 2024
|
1623 |
-
"1092 2024-12-28 5 28 12 2024
|
1624 |
-
"1093 2024-12-29 6 29 12 2024
|
1625 |
-
"1094 2024-12-30 0 30 12 2024
|
1626 |
-
"1095 2024-12-31 1 31 12 2024
|
1627 |
]
|
1628 |
},
|
1629 |
-
"execution_count":
|
1630 |
"metadata": {},
|
1631 |
"output_type": "execute_result"
|
1632 |
}
|
@@ -1638,7 +1347,7 @@
|
|
1638 |
},
|
1639 |
{
|
1640 |
"cell_type": "code",
|
1641 |
-
"execution_count":
|
1642 |
"metadata": {},
|
1643 |
"outputs": [
|
1644 |
{
|
@@ -1651,13 +1360,13 @@
|
|
1651 |
" # Column Non-Null Count Dtype \n",
|
1652 |
"--- ------ -------------- ----- \n",
|
1653 |
" 0 date 1096 non-null object\n",
|
1654 |
-
" 1 dayofweek 1096 non-null
|
1655 |
-
" 2 day 1096 non-null
|
1656 |
-
" 3 month 1096 non-null
|
1657 |
-
" 4 year 1096 non-null
|
1658 |
-
" 5
|
1659 |
-
"dtypes:
|
1660 |
-
"memory usage:
|
1661 |
]
|
1662 |
}
|
1663 |
],
|
@@ -1668,7 +1377,7 @@
|
|
1668 |
},
|
1669 |
{
|
1670 |
"cell_type": "code",
|
1671 |
-
"execution_count":
|
1672 |
"metadata": {},
|
1673 |
"outputs": [
|
1674 |
{
|
@@ -1681,13 +1390,13 @@
|
|
1681 |
" # Column Non-Null Count Dtype \n",
|
1682 |
"--- ------ -------------- ----- \n",
|
1683 |
" 0 date 1096 non-null object\n",
|
1684 |
-
" 1 dayofweek 1096 non-null
|
1685 |
-
" 2 day 1096 non-null
|
1686 |
-
" 3 month 1096 non-null
|
1687 |
-
" 4 year 1096 non-null
|
1688 |
-
" 5
|
1689 |
-
"dtypes:
|
1690 |
-
"memory usage:
|
1691 |
]
|
1692 |
}
|
1693 |
],
|
@@ -1707,17 +1416,16 @@
|
|
1707 |
},
|
1708 |
{
|
1709 |
"cell_type": "code",
|
1710 |
-
"execution_count":
|
1711 |
"metadata": {},
|
1712 |
"outputs": [
|
1713 |
{
|
1714 |
"name": "stdout",
|
1715 |
"output_type": "stream",
|
1716 |
"text": [
|
1717 |
-
"Connection closed.\n",
|
1718 |
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
1719 |
"\n",
|
1720 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/
|
1721 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
1722 |
]
|
1723 |
}
|
@@ -1754,7 +1462,7 @@
|
|
1754 |
},
|
1755 |
{
|
1756 |
"cell_type": "code",
|
1757 |
-
"execution_count":
|
1758 |
"metadata": {},
|
1759 |
"outputs": [],
|
1760 |
"source": [
|
@@ -1778,7 +1486,7 @@
|
|
1778 |
},
|
1779 |
{
|
1780 |
"cell_type": "code",
|
1781 |
-
"execution_count":
|
1782 |
"metadata": {},
|
1783 |
"outputs": [
|
1784 |
{
|
@@ -1786,15 +1494,22 @@
|
|
1786 |
"output_type": "stream",
|
1787 |
"text": [
|
1788 |
"Feature Group created successfully, explore it at \n",
|
1789 |
-
"https://c.app.hopsworks.ai:443/p/
|
1790 |
]
|
1791 |
},
|
1792 |
{
|
1793 |
-
"
|
1794 |
-
|
1795 |
-
|
1796 |
-
|
1797 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1798 |
},
|
1799 |
{
|
1800 |
"name": "stdout",
|
@@ -1802,16 +1517,16 @@
|
|
1802 |
"text": [
|
1803 |
"Launching job: electricity_prices_1_offline_fg_materialization\n",
|
1804 |
"Job started successfully, you can follow the progress at \n",
|
1805 |
-
"https://c.app.hopsworks.ai/p/
|
1806 |
]
|
1807 |
},
|
1808 |
{
|
1809 |
"data": {
|
1810 |
"text/plain": [
|
1811 |
-
"(<hsfs.core.job.Job at
|
1812 |
]
|
1813 |
},
|
1814 |
-
"execution_count":
|
1815 |
"metadata": {},
|
1816 |
"output_type": "execute_result"
|
1817 |
}
|
@@ -1830,7 +1545,7 @@
|
|
1830 |
},
|
1831 |
{
|
1832 |
"cell_type": "code",
|
1833 |
-
"execution_count":
|
1834 |
"metadata": {},
|
1835 |
"outputs": [],
|
1836 |
"source": [
|
@@ -1852,59 +1567,12 @@
|
|
1852 |
"cell_type": "markdown",
|
1853 |
"metadata": {},
|
1854 |
"source": [
|
1855 |
-
"We replicate the process for both the `
|
1856 |
]
|
1857 |
},
|
1858 |
{
|
1859 |
"cell_type": "code",
|
1860 |
-
"execution_count":
|
1861 |
-
"metadata": {},
|
1862 |
-
"outputs": [],
|
1863 |
-
"source": [
|
1864 |
-
"# # Creating the feature group for the electricity prices\n",
|
1865 |
-
"# forecast_renewable_energy_fg = fs.get_or_create_feature_group(\n",
|
1866 |
-
"# name=\"forecast_renewable_energy\",\n",
|
1867 |
-
"# version=1,\n",
|
1868 |
-
"# description=\"Forecast on Renewable Energy on ForecastType from Energidata API\",\n",
|
1869 |
-
"# primary_key=[\"date\",\"timestamp\"], \n",
|
1870 |
-
"# online_enabled=True,\n",
|
1871 |
-
"# event_time=\"timestamp\",\n",
|
1872 |
-
"# )"
|
1873 |
-
]
|
1874 |
-
},
|
1875 |
-
{
|
1876 |
-
"cell_type": "code",
|
1877 |
-
"execution_count": null,
|
1878 |
-
"metadata": {},
|
1879 |
-
"outputs": [],
|
1880 |
-
"source": [
|
1881 |
-
"# # Inserting the electricity_df into the feature group named electricity_fg\n",
|
1882 |
-
"# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df)"
|
1883 |
-
]
|
1884 |
-
},
|
1885 |
-
{
|
1886 |
-
"cell_type": "code",
|
1887 |
-
"execution_count": null,
|
1888 |
-
"metadata": {},
|
1889 |
-
"outputs": [],
|
1890 |
-
"source": [
|
1891 |
-
"# # List of descriptions for forecast_renewable_energy features\n",
|
1892 |
-
"# forecast_renewable_energy_feature_descriptions = [\n",
|
1893 |
-
"# {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
|
1894 |
-
"# {\"name\": \"date\", \"description\": \"Date of the forecast\"},\n",
|
1895 |
-
"# {\"name\": \"datetime\", \"description\": \"Date and time for the forecast\"},\n",
|
1896 |
-
"# {\"name\": \"hour\", \"description\": \"Hour of day\"},\n",
|
1897 |
-
"# {\"name\": \"dk1_offshore_wind_forecastintraday_kwh\", \"description\": \"The forecast for the coming day at 6am Danish time zone\"},\n",
|
1898 |
-
"# ]\n",
|
1899 |
-
"\n",
|
1900 |
-
"# # Updating feature descriptions\n",
|
1901 |
-
"# for desc in forecast_renewable_energy_feature_descriptions: \n",
|
1902 |
-
"# forecast_renewable_energy_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
|
1903 |
-
]
|
1904 |
-
},
|
1905 |
-
{
|
1906 |
-
"cell_type": "code",
|
1907 |
-
"execution_count": 77,
|
1908 |
"metadata": {},
|
1909 |
"outputs": [],
|
1910 |
"source": [
|
@@ -1921,7 +1589,7 @@
|
|
1921 |
},
|
1922 |
{
|
1923 |
"cell_type": "code",
|
1924 |
-
"execution_count":
|
1925 |
"metadata": {},
|
1926 |
"outputs": [
|
1927 |
{
|
@@ -1929,15 +1597,22 @@
|
|
1929 |
"output_type": "stream",
|
1930 |
"text": [
|
1931 |
"Feature Group created successfully, explore it at \n",
|
1932 |
-
"https://c.app.hopsworks.ai:443/p/
|
1933 |
]
|
1934 |
},
|
1935 |
{
|
1936 |
-
"
|
1937 |
-
|
1938 |
-
|
1939 |
-
|
1940 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1941 |
},
|
1942 |
{
|
1943 |
"name": "stdout",
|
@@ -1945,16 +1620,16 @@
|
|
1945 |
"text": [
|
1946 |
"Launching job: weather_measurements_1_offline_fg_materialization\n",
|
1947 |
"Job started successfully, you can follow the progress at \n",
|
1948 |
-
"https://c.app.hopsworks.ai/p/
|
1949 |
]
|
1950 |
},
|
1951 |
{
|
1952 |
"data": {
|
1953 |
"text/plain": [
|
1954 |
-
"(<hsfs.core.job.Job at
|
1955 |
]
|
1956 |
},
|
1957 |
-
"execution_count":
|
1958 |
"metadata": {},
|
1959 |
"output_type": "execute_result"
|
1960 |
}
|
@@ -1966,7 +1641,7 @@
|
|
1966 |
},
|
1967 |
{
|
1968 |
"cell_type": "code",
|
1969 |
-
"execution_count":
|
1970 |
"metadata": {},
|
1971 |
"outputs": [],
|
1972 |
"source": [
|
@@ -1994,7 +1669,7 @@
|
|
1994 |
},
|
1995 |
{
|
1996 |
"cell_type": "code",
|
1997 |
-
"execution_count":
|
1998 |
"metadata": {},
|
1999 |
"outputs": [],
|
2000 |
"source": [
|
@@ -2010,7 +1685,7 @@
|
|
2010 |
},
|
2011 |
{
|
2012 |
"cell_type": "code",
|
2013 |
-
"execution_count":
|
2014 |
"metadata": {},
|
2015 |
"outputs": [
|
2016 |
{
|
@@ -2018,15 +1693,22 @@
|
|
2018 |
"output_type": "stream",
|
2019 |
"text": [
|
2020 |
"Feature Group created successfully, explore it at \n",
|
2021 |
-
"https://c.app.hopsworks.ai:443/p/
|
2022 |
]
|
2023 |
},
|
2024 |
{
|
2025 |
-
"
|
2026 |
-
|
2027 |
-
|
2028 |
-
|
2029 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2030 |
},
|
2031 |
{
|
2032 |
"name": "stdout",
|
@@ -2034,16 +1716,16 @@
|
|
2034 |
"text": [
|
2035 |
"Launching job: dk_calendar_1_offline_fg_materialization\n",
|
2036 |
"Job started successfully, you can follow the progress at \n",
|
2037 |
-
"https://c.app.hopsworks.ai/p/
|
2038 |
]
|
2039 |
},
|
2040 |
{
|
2041 |
"data": {
|
2042 |
"text/plain": [
|
2043 |
-
"(<hsfs.core.job.Job at
|
2044 |
]
|
2045 |
},
|
2046 |
-
"execution_count":
|
2047 |
"metadata": {},
|
2048 |
"output_type": "execute_result"
|
2049 |
}
|
@@ -2055,7 +1737,7 @@
|
|
2055 |
},
|
2056 |
{
|
2057 |
"cell_type": "code",
|
2058 |
-
"execution_count":
|
2059 |
"metadata": {},
|
2060 |
"outputs": [],
|
2061 |
"source": [
|
@@ -2064,7 +1746,7 @@
|
|
2064 |
" {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
|
2065 |
" {\"name\": \"day\", \"description\": \"Day number of the week. Monday is 0 and Sunday is 6\"},\n",
|
2066 |
" {\"name\": \"month\", \"description\": \"Month number of the year\"},\n",
|
2067 |
-
" {\"name\": \"
|
2068 |
"]\n",
|
2069 |
"\n",
|
2070 |
"# Updating feature descriptions\n",
|
|
|
28 |
},
|
29 |
{
|
30 |
"cell_type": "code",
|
31 |
+
"execution_count": 1,
|
32 |
"metadata": {},
|
33 |
"outputs": [],
|
34 |
"source": [
|
|
|
38 |
},
|
39 |
{
|
40 |
"cell_type": "code",
|
41 |
+
"execution_count": 2,
|
42 |
"metadata": {},
|
43 |
"outputs": [
|
44 |
{
|
45 |
"name": "stdout",
|
46 |
"output_type": "stream",
|
47 |
"text": [
|
48 |
+
"/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-\n",
|
49 |
+
"/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-/notebooks\n"
|
50 |
]
|
51 |
}
|
52 |
],
|
|
|
64 |
},
|
65 |
{
|
66 |
"cell_type": "code",
|
67 |
+
"execution_count": 3,
|
68 |
"metadata": {},
|
69 |
"outputs": [],
|
70 |
"source": [
|
|
|
80 |
"warnings.filterwarnings('ignore', category=DeprecationWarning)"
|
81 |
]
|
82 |
},
|
83 |
+
{
|
84 |
+
"cell_type": "markdown",
|
85 |
+
"metadata": {},
|
86 |
+
"source": [
|
87 |
+
"# <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
|
88 |
+
"\n",
|
89 |
+
"We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
|
90 |
+
"To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
|
91 |
+
"\n",
|
92 |
+
"To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
|
93 |
+
]
|
94 |
+
},
|
95 |
{
|
96 |
"cell_type": "markdown",
|
97 |
"metadata": {},
|
|
|
103 |
"- Electricity prices in Denmark on hourly basis per day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
|
104 |
"- Different meteorological observations based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures.\n",
|
105 |
"- Danish calendar that categorizes dates into types based on whether it is a weekday or not. This files is made manually by the group and is located in the *datafolder* inside this repository.\n",
|
|
|
106 |
"- Weather Forecast based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures. (This data is used later to parse in new real-time weather data)\n"
|
107 |
]
|
108 |
},
|
|
|
116 |
},
|
117 |
{
|
118 |
"cell_type": "code",
|
119 |
+
"execution_count": 4,
|
120 |
"metadata": {},
|
121 |
"outputs": [],
|
122 |
"source": [
|
|
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
+
"execution_count": 5,
|
137 |
"metadata": {},
|
138 |
"outputs": [
|
139 |
{
|
|
|
218 |
"4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 0.28013"
|
219 |
]
|
220 |
},
|
221 |
+
"execution_count": 5,
|
222 |
"metadata": {},
|
223 |
"output_type": "execute_result"
|
224 |
}
|
|
|
230 |
},
|
231 |
{
|
232 |
"cell_type": "code",
|
233 |
+
"execution_count": 6,
|
234 |
"metadata": {},
|
235 |
"outputs": [
|
236 |
{
|
|
|
263 |
" </thead>\n",
|
264 |
" <tbody>\n",
|
265 |
" <tr>\n",
|
266 |
+
" <th>20464</th>\n",
|
267 |
+
" <td>1714676400000</td>\n",
|
268 |
+
" <td>2024-05-02 19:00:00</td>\n",
|
269 |
+
" <td>2024-05-02</td>\n",
|
270 |
" <td>19</td>\n",
|
271 |
+
" <td>0.31266</td>\n",
|
272 |
" </tr>\n",
|
273 |
" <tr>\n",
|
274 |
+
" <th>20465</th>\n",
|
275 |
+
" <td>1714680000000</td>\n",
|
276 |
+
" <td>2024-05-02 20:00:00</td>\n",
|
277 |
+
" <td>2024-05-02</td>\n",
|
278 |
" <td>20</td>\n",
|
279 |
+
" <td>0.31318</td>\n",
|
280 |
" </tr>\n",
|
281 |
" <tr>\n",
|
282 |
+
" <th>20466</th>\n",
|
283 |
+
" <td>1714683600000</td>\n",
|
284 |
+
" <td>2024-05-02 21:00:00</td>\n",
|
285 |
+
" <td>2024-05-02</td>\n",
|
286 |
" <td>21</td>\n",
|
287 |
+
" <td>0.31266</td>\n",
|
288 |
" </tr>\n",
|
289 |
" <tr>\n",
|
290 |
+
" <th>20467</th>\n",
|
291 |
+
" <td>1714687200000</td>\n",
|
292 |
+
" <td>2024-05-02 22:00:00</td>\n",
|
293 |
+
" <td>2024-05-02</td>\n",
|
294 |
" <td>22</td>\n",
|
295 |
+
" <td>0.28245</td>\n",
|
296 |
" </tr>\n",
|
297 |
" <tr>\n",
|
298 |
+
" <th>20468</th>\n",
|
299 |
+
" <td>1714690800000</td>\n",
|
300 |
+
" <td>2024-05-02 23:00:00</td>\n",
|
301 |
+
" <td>2024-05-02</td>\n",
|
302 |
" <td>23</td>\n",
|
303 |
+
" <td>0.25306</td>\n",
|
304 |
" </tr>\n",
|
305 |
" </tbody>\n",
|
306 |
"</table>\n",
|
|
|
308 |
],
|
309 |
"text/plain": [
|
310 |
" timestamp datetime date hour \\\n",
|
311 |
+
"20464 1714676400000 2024-05-02 19:00:00 2024-05-02 19 \n",
|
312 |
+
"20465 1714680000000 2024-05-02 20:00:00 2024-05-02 20 \n",
|
313 |
+
"20466 1714683600000 2024-05-02 21:00:00 2024-05-02 21 \n",
|
314 |
+
"20467 1714687200000 2024-05-02 22:00:00 2024-05-02 22 \n",
|
315 |
+
"20468 1714690800000 2024-05-02 23:00:00 2024-05-02 23 \n",
|
316 |
"\n",
|
317 |
" dk1_spotpricedkk_kwh \n",
|
318 |
+
"20464 0.31266 \n",
|
319 |
+
"20465 0.31318 \n",
|
320 |
+
"20466 0.31266 \n",
|
321 |
+
"20467 0.28245 \n",
|
322 |
+
"20468 0.25306 "
|
323 |
]
|
324 |
},
|
325 |
+
"execution_count": 6,
|
326 |
"metadata": {},
|
327 |
"output_type": "execute_result"
|
328 |
}
|
|
|
334 |
},
|
335 |
{
|
336 |
"cell_type": "code",
|
337 |
+
"execution_count": 7,
|
338 |
"metadata": {},
|
339 |
"outputs": [
|
340 |
{
|
|
|
342 |
"output_type": "stream",
|
343 |
"text": [
|
344 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
345 |
+
"RangeIndex: 20469 entries, 0 to 20468\n",
|
346 |
"Data columns (total 5 columns):\n",
|
347 |
" # Column Non-Null Count Dtype \n",
|
348 |
"--- ------ -------------- ----- \n",
|
349 |
+
" 0 timestamp 20469 non-null int64 \n",
|
350 |
+
" 1 datetime 20469 non-null datetime64[ns]\n",
|
351 |
+
" 2 date 20469 non-null object \n",
|
352 |
+
" 3 hour 20469 non-null int64 \n",
|
353 |
+
" 4 dk1_spotpricedkk_kwh 20469 non-null float64 \n",
|
354 |
+
"dtypes: datetime64[ns](1), float64(1), int64(2), object(1)\n",
|
355 |
+
"memory usage: 799.7+ KB\n"
|
356 |
]
|
357 |
}
|
358 |
],
|
|
|
361 |
"electricity_df.info()"
|
362 |
]
|
363 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
{
|
365 |
"cell_type": "markdown",
|
366 |
"metadata": {},
|
|
|
378 |
},
|
379 |
{
|
380 |
"cell_type": "code",
|
381 |
+
"execution_count": 8,
|
382 |
"metadata": {},
|
383 |
"outputs": [],
|
384 |
"source": [
|
|
|
394 |
},
|
395 |
{
|
396 |
"cell_type": "code",
|
397 |
+
"execution_count": 9,
|
398 |
"metadata": {},
|
399 |
"outputs": [
|
400 |
{
|
|
|
541 |
"4 100.0 10.6 23.8 "
|
542 |
]
|
543 |
},
|
544 |
+
"execution_count": 9,
|
545 |
"metadata": {},
|
546 |
"output_type": "execute_result"
|
547 |
}
|
|
|
553 |
},
|
554 |
{
|
555 |
"cell_type": "code",
|
556 |
+
"execution_count": 10,
|
557 |
"metadata": {},
|
558 |
"outputs": [
|
559 |
{
|
|
|
594 |
" </thead>\n",
|
595 |
" <tbody>\n",
|
596 |
" <tr>\n",
|
597 |
+
" <th>20443</th>\n",
|
598 |
+
" <td>1714590000000</td>\n",
|
599 |
+
" <td>2024-05-01 19:00:00</td>\n",
|
600 |
+
" <td>2024-05-01</td>\n",
|
601 |
" <td>19</td>\n",
|
602 |
+
" <td>15.4</td>\n",
|
603 |
+
" <td>71.0</td>\n",
|
604 |
" <td>0.0</td>\n",
|
605 |
" <td>0.0</td>\n",
|
606 |
" <td>0.0</td>\n",
|
607 |
" <td>0.0</td>\n",
|
608 |
+
" <td>13.0</td>\n",
|
609 |
+
" <td>14.8</td>\n",
|
610 |
+
" <td>25.9</td>\n",
|
611 |
" </tr>\n",
|
612 |
" <tr>\n",
|
613 |
+
" <th>20444</th>\n",
|
614 |
+
" <td>1714593600000</td>\n",
|
615 |
+
" <td>2024-05-01 20:00:00</td>\n",
|
616 |
+
" <td>2024-05-01</td>\n",
|
617 |
" <td>20</td>\n",
|
618 |
+
" <td>14.7</td>\n",
|
619 |
+
" <td>69.0</td>\n",
|
620 |
" <td>0.0</td>\n",
|
621 |
" <td>0.0</td>\n",
|
622 |
" <td>0.0</td>\n",
|
623 |
" <td>0.0</td>\n",
|
624 |
+
" <td>7.0</td>\n",
|
625 |
+
" <td>19.2</td>\n",
|
626 |
+
" <td>33.8</td>\n",
|
627 |
" </tr>\n",
|
628 |
" <tr>\n",
|
629 |
+
" <th>20445</th>\n",
|
630 |
+
" <td>1714597200000</td>\n",
|
631 |
+
" <td>2024-05-01 21:00:00</td>\n",
|
632 |
+
" <td>2024-05-01</td>\n",
|
633 |
" <td>21</td>\n",
|
634 |
+
" <td>14.3</td>\n",
|
635 |
" <td>67.0</td>\n",
|
636 |
" <td>0.0</td>\n",
|
637 |
" <td>0.0</td>\n",
|
638 |
" <td>0.0</td>\n",
|
639 |
" <td>0.0</td>\n",
|
640 |
+
" <td>2.0</td>\n",
|
641 |
+
" <td>20.5</td>\n",
|
642 |
+
" <td>37.4</td>\n",
|
643 |
" </tr>\n",
|
644 |
" <tr>\n",
|
645 |
+
" <th>20446</th>\n",
|
646 |
+
" <td>1714600800000</td>\n",
|
647 |
+
" <td>2024-05-01 22:00:00</td>\n",
|
648 |
+
" <td>2024-05-01</td>\n",
|
649 |
" <td>22</td>\n",
|
650 |
+
" <td>13.4</td>\n",
|
651 |
+
" <td>68.0</td>\n",
|
652 |
" <td>0.0</td>\n",
|
653 |
" <td>0.0</td>\n",
|
654 |
" <td>0.0</td>\n",
|
655 |
" <td>0.0</td>\n",
|
656 |
+
" <td>7.0</td>\n",
|
657 |
+
" <td>20.2</td>\n",
|
658 |
+
" <td>37.4</td>\n",
|
659 |
" </tr>\n",
|
660 |
" <tr>\n",
|
661 |
+
" <th>20447</th>\n",
|
662 |
+
" <td>1714604400000</td>\n",
|
663 |
+
" <td>2024-05-01 23:00:00</td>\n",
|
664 |
+
" <td>2024-05-01</td>\n",
|
665 |
" <td>23</td>\n",
|
666 |
+
" <td>12.4</td>\n",
|
667 |
" <td>70.0</td>\n",
|
668 |
" <td>0.0</td>\n",
|
669 |
" <td>0.0</td>\n",
|
670 |
" <td>0.0</td>\n",
|
671 |
" <td>0.0</td>\n",
|
672 |
+
" <td>17.0</td>\n",
|
673 |
+
" <td>18.8</td>\n",
|
674 |
+
" <td>36.4</td>\n",
|
675 |
" </tr>\n",
|
676 |
" </tbody>\n",
|
677 |
"</table>\n",
|
|
|
679 |
],
|
680 |
"text/plain": [
|
681 |
" timestamp datetime date hour temperature_2m \\\n",
|
682 |
+
"20443 1714590000000 2024-05-01 19:00:00 2024-05-01 19 15.4 \n",
|
683 |
+
"20444 1714593600000 2024-05-01 20:00:00 2024-05-01 20 14.7 \n",
|
684 |
+
"20445 1714597200000 2024-05-01 21:00:00 2024-05-01 21 14.3 \n",
|
685 |
+
"20446 1714600800000 2024-05-01 22:00:00 2024-05-01 22 13.4 \n",
|
686 |
+
"20447 1714604400000 2024-05-01 23:00:00 2024-05-01 23 12.4 \n",
|
687 |
"\n",
|
688 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
689 |
+
"20443 71.0 0.0 0.0 0.0 0.0 \n",
|
690 |
+
"20444 69.0 0.0 0.0 0.0 0.0 \n",
|
691 |
+
"20445 67.0 0.0 0.0 0.0 0.0 \n",
|
692 |
+
"20446 68.0 0.0 0.0 0.0 0.0 \n",
|
693 |
+
"20447 70.0 0.0 0.0 0.0 0.0 \n",
|
694 |
"\n",
|
695 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
696 |
+
"20443 13.0 14.8 25.9 \n",
|
697 |
+
"20444 7.0 19.2 33.8 \n",
|
698 |
+
"20445 2.0 20.5 37.4 \n",
|
699 |
+
"20446 7.0 20.2 37.4 \n",
|
700 |
+
"20447 17.0 18.8 36.4 "
|
701 |
]
|
702 |
},
|
703 |
+
"execution_count": 10,
|
704 |
"metadata": {},
|
705 |
"output_type": "execute_result"
|
706 |
}
|
|
|
712 |
},
|
713 |
{
|
714 |
"cell_type": "code",
|
715 |
+
"execution_count": 11,
|
716 |
"metadata": {},
|
717 |
"outputs": [
|
718 |
{
|
|
|
720 |
"output_type": "stream",
|
721 |
"text": [
|
722 |
"<class 'pandas.core.frame.DataFrame'>\n",
|
723 |
+
"Int64Index: 20448 entries, 0 to 20447\n",
|
724 |
"Data columns (total 13 columns):\n",
|
725 |
" # Column Non-Null Count Dtype \n",
|
726 |
"--- ------ -------------- ----- \n",
|
727 |
+
" 0 timestamp 20448 non-null int64 \n",
|
728 |
+
" 1 datetime 20448 non-null datetime64[ns]\n",
|
729 |
+
" 2 date 20448 non-null object \n",
|
730 |
+
" 3 hour 20448 non-null int64 \n",
|
731 |
+
" 4 temperature_2m 20448 non-null float64 \n",
|
732 |
+
" 5 relative_humidity_2m 20448 non-null float64 \n",
|
733 |
+
" 6 precipitation 20448 non-null float64 \n",
|
734 |
+
" 7 rain 20448 non-null float64 \n",
|
735 |
+
" 8 snowfall 20448 non-null float64 \n",
|
736 |
+
" 9 weather_code 20448 non-null float64 \n",
|
737 |
+
" 10 cloud_cover 20448 non-null float64 \n",
|
738 |
+
" 11 wind_speed_10m 20448 non-null float64 \n",
|
739 |
+
" 12 wind_gusts_10m 20448 non-null float64 \n",
|
740 |
+
"dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
|
741 |
+
"memory usage: 2.2+ MB\n"
|
742 |
]
|
743 |
}
|
744 |
],
|
|
|
751 |
"cell_type": "markdown",
|
752 |
"metadata": {},
|
753 |
"source": [
|
754 |
+
"#### <span style=\"color:#2656a3;\"> 🌈 Forecast Weather Measures\n",
|
755 |
"Weather Forecast from Open Meteo is now being fetched. This data is used in part 02 the feature_pipeline to parse in new real-time weather data."
|
756 |
]
|
757 |
},
|
758 |
{
|
759 |
"cell_type": "code",
|
760 |
+
"execution_count": 12,
|
761 |
"metadata": {},
|
762 |
"outputs": [],
|
763 |
"source": [
|
|
|
769 |
},
|
770 |
{
|
771 |
"cell_type": "code",
|
772 |
+
"execution_count": 13,
|
773 |
"metadata": {},
|
774 |
"outputs": [
|
775 |
{
|
|
|
811 |
" <tbody>\n",
|
812 |
" <tr>\n",
|
813 |
" <th>0</th>\n",
|
814 |
+
" <td>1714694400000</td>\n",
|
815 |
+
" <td>2024-05-03 00:00:00</td>\n",
|
816 |
+
" <td>2024-05-03</td>\n",
|
817 |
" <td>0</td>\n",
|
818 |
+
" <td>14.3</td>\n",
|
819 |
+
" <td>65.0</td>\n",
|
|
|
820 |
" <td>0.0</td>\n",
|
821 |
" <td>0.0</td>\n",
|
822 |
" <td>0.0</td>\n",
|
823 |
+
" <td>1.0</td>\n",
|
824 |
+
" <td>25.0</td>\n",
|
825 |
+
" <td>20.5</td>\n",
|
826 |
+
" <td>36.0</td>\n",
|
827 |
" </tr>\n",
|
828 |
" <tr>\n",
|
829 |
" <th>1</th>\n",
|
830 |
+
" <td>1714698000000</td>\n",
|
831 |
+
" <td>2024-05-03 01:00:00</td>\n",
|
832 |
+
" <td>2024-05-03</td>\n",
|
833 |
" <td>1</td>\n",
|
834 |
+
" <td>13.6</td>\n",
|
835 |
+
" <td>69.0</td>\n",
|
836 |
" <td>0.0</td>\n",
|
837 |
" <td>0.0</td>\n",
|
838 |
" <td>0.0</td>\n",
|
839 |
" <td>0.0</td>\n",
|
840 |
+
" <td>12.0</td>\n",
|
841 |
+
" <td>21.6</td>\n",
|
842 |
+
" <td>37.4</td>\n",
|
843 |
" </tr>\n",
|
844 |
" <tr>\n",
|
845 |
" <th>2</th>\n",
|
846 |
+
" <td>1714701600000</td>\n",
|
847 |
+
" <td>2024-05-03 02:00:00</td>\n",
|
848 |
+
" <td>2024-05-03</td>\n",
|
849 |
" <td>2</td>\n",
|
850 |
+
" <td>13.0</td>\n",
|
851 |
+
" <td>72.0</td>\n",
|
852 |
" <td>0.0</td>\n",
|
853 |
" <td>0.0</td>\n",
|
854 |
" <td>0.0</td>\n",
|
855 |
+
" <td>0.0</td>\n",
|
856 |
+
" <td>7.0</td>\n",
|
857 |
+
" <td>20.9</td>\n",
|
858 |
+
" <td>37.4</td>\n",
|
859 |
" </tr>\n",
|
860 |
" <tr>\n",
|
861 |
" <th>3</th>\n",
|
862 |
+
" <td>1714705200000</td>\n",
|
863 |
+
" <td>2024-05-03 03:00:00</td>\n",
|
864 |
+
" <td>2024-05-03</td>\n",
|
865 |
" <td>3</td>\n",
|
866 |
+
" <td>12.7</td>\n",
|
867 |
+
" <td>73.0</td>\n",
|
|
|
|
|
868 |
" <td>0.0</td>\n",
|
869 |
+
" <td>0.0</td>\n",
|
870 |
+
" <td>0.0</td>\n",
|
871 |
+
" <td>1.0</td>\n",
|
872 |
+
" <td>26.0</td>\n",
|
873 |
+
" <td>19.8</td>\n",
|
874 |
+
" <td>34.6</td>\n",
|
875 |
" </tr>\n",
|
876 |
" <tr>\n",
|
877 |
" <th>4</th>\n",
|
878 |
+
" <td>1714708800000</td>\n",
|
879 |
+
" <td>2024-05-03 04:00:00</td>\n",
|
880 |
+
" <td>2024-05-03</td>\n",
|
881 |
" <td>4</td>\n",
|
882 |
+
" <td>12.4</td>\n",
|
883 |
" <td>73.0</td>\n",
|
884 |
" <td>0.0</td>\n",
|
885 |
" <td>0.0</td>\n",
|
886 |
" <td>0.0</td>\n",
|
887 |
" <td>2.0</td>\n",
|
888 |
+
" <td>54.0</td>\n",
|
889 |
+
" <td>18.7</td>\n",
|
890 |
+
" <td>33.8</td>\n",
|
891 |
" </tr>\n",
|
892 |
" </tbody>\n",
|
893 |
"</table>\n",
|
|
|
895 |
],
|
896 |
"text/plain": [
|
897 |
" timestamp datetime date hour temperature_2m \\\n",
|
898 |
+
"0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 14.3 \n",
|
899 |
+
"1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 13.6 \n",
|
900 |
+
"2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 13.0 \n",
|
901 |
+
"3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 12.7 \n",
|
902 |
+
"4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 12.4 \n",
|
903 |
"\n",
|
904 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
905 |
+
"0 65.0 0.0 0.0 0.0 1.0 \n",
|
906 |
+
"1 69.0 0.0 0.0 0.0 0.0 \n",
|
907 |
+
"2 72.0 0.0 0.0 0.0 0.0 \n",
|
908 |
+
"3 73.0 0.0 0.0 0.0 1.0 \n",
|
909 |
"4 73.0 0.0 0.0 0.0 2.0 \n",
|
910 |
"\n",
|
911 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
912 |
+
"0 25.0 20.5 36.0 \n",
|
913 |
+
"1 12.0 21.6 37.4 \n",
|
914 |
+
"2 7.0 20.9 37.4 \n",
|
915 |
+
"3 26.0 19.8 34.6 \n",
|
916 |
+
"4 54.0 18.7 33.8 "
|
917 |
]
|
918 |
},
|
919 |
+
"execution_count": 13,
|
920 |
"metadata": {},
|
921 |
"output_type": "execute_result"
|
922 |
}
|
|
|
928 |
},
|
929 |
{
|
930 |
"cell_type": "code",
|
931 |
+
"execution_count": 14,
|
932 |
"metadata": {},
|
933 |
"outputs": [
|
934 |
{
|
|
|
970 |
" <tbody>\n",
|
971 |
" <tr>\n",
|
972 |
" <th>115</th>\n",
|
973 |
+
" <td>1715108400000</td>\n",
|
974 |
+
" <td>2024-05-07 19:00:00</td>\n",
|
975 |
+
" <td>2024-05-07</td>\n",
|
976 |
" <td>19</td>\n",
|
977 |
+
" <td>12.0</td>\n",
|
978 |
+
" <td>41.0</td>\n",
|
|
|
|
|
979 |
" <td>0.0</td>\n",
|
980 |
+
" <td>0.0</td>\n",
|
981 |
+
" <td>0.0</td>\n",
|
982 |
+
" <td>0.0</td>\n",
|
983 |
+
" <td>0.0</td>\n",
|
984 |
+
" <td>4.2</td>\n",
|
985 |
+
" <td>10.8</td>\n",
|
986 |
" </tr>\n",
|
987 |
" <tr>\n",
|
988 |
" <th>116</th>\n",
|
989 |
+
" <td>1715112000000</td>\n",
|
990 |
+
" <td>2024-05-07 20:00:00</td>\n",
|
991 |
+
" <td>2024-05-07</td>\n",
|
992 |
" <td>20</td>\n",
|
993 |
+
" <td>10.7</td>\n",
|
994 |
+
" <td>49.0</td>\n",
|
|
|
|
|
995 |
" <td>0.0</td>\n",
|
996 |
+
" <td>0.0</td>\n",
|
997 |
+
" <td>0.0</td>\n",
|
998 |
+
" <td>0.0</td>\n",
|
999 |
+
" <td>0.0</td>\n",
|
1000 |
+
" <td>3.6</td>\n",
|
1001 |
+
" <td>8.3</td>\n",
|
1002 |
" </tr>\n",
|
1003 |
" <tr>\n",
|
1004 |
" <th>117</th>\n",
|
1005 |
+
" <td>1715115600000</td>\n",
|
1006 |
+
" <td>2024-05-07 21:00:00</td>\n",
|
1007 |
+
" <td>2024-05-07</td>\n",
|
1008 |
" <td>21</td>\n",
|
1009 |
+
" <td>9.6</td>\n",
|
1010 |
+
" <td>56.0</td>\n",
|
|
|
|
|
1011 |
" <td>0.0</td>\n",
|
1012 |
+
" <td>0.0</td>\n",
|
1013 |
+
" <td>0.0</td>\n",
|
1014 |
+
" <td>0.0</td>\n",
|
1015 |
+
" <td>0.0</td>\n",
|
1016 |
+
" <td>3.2</td>\n",
|
1017 |
+
" <td>5.4</td>\n",
|
1018 |
" </tr>\n",
|
1019 |
" <tr>\n",
|
1020 |
" <th>118</th>\n",
|
1021 |
+
" <td>1715119200000</td>\n",
|
1022 |
+
" <td>2024-05-07 22:00:00</td>\n",
|
1023 |
+
" <td>2024-05-07</td>\n",
|
1024 |
" <td>22</td>\n",
|
1025 |
+
" <td>8.7</td>\n",
|
1026 |
+
" <td>58.0</td>\n",
|
|
|
|
|
1027 |
" <td>0.0</td>\n",
|
1028 |
+
" <td>0.0</td>\n",
|
1029 |
+
" <td>0.0</td>\n",
|
1030 |
+
" <td>0.0</td>\n",
|
1031 |
+
" <td>0.0</td>\n",
|
1032 |
+
" <td>3.3</td>\n",
|
1033 |
+
" <td>5.8</td>\n",
|
1034 |
" </tr>\n",
|
1035 |
" <tr>\n",
|
1036 |
" <th>119</th>\n",
|
1037 |
+
" <td>1715122800000</td>\n",
|
1038 |
+
" <td>2024-05-07 23:00:00</td>\n",
|
1039 |
+
" <td>2024-05-07</td>\n",
|
1040 |
" <td>23</td>\n",
|
1041 |
+
" <td>7.9</td>\n",
|
1042 |
+
" <td>57.0</td>\n",
|
|
|
|
|
1043 |
" <td>0.0</td>\n",
|
1044 |
+
" <td>0.0</td>\n",
|
1045 |
+
" <td>0.0</td>\n",
|
1046 |
+
" <td>0.0</td>\n",
|
1047 |
+
" <td>0.0</td>\n",
|
1048 |
+
" <td>3.8</td>\n",
|
1049 |
+
" <td>6.5</td>\n",
|
1050 |
" </tr>\n",
|
1051 |
" </tbody>\n",
|
1052 |
"</table>\n",
|
|
|
1054 |
],
|
1055 |
"text/plain": [
|
1056 |
" timestamp datetime date hour temperature_2m \\\n",
|
1057 |
+
"115 1715108400000 2024-05-07 19:00:00 2024-05-07 19 12.0 \n",
|
1058 |
+
"116 1715112000000 2024-05-07 20:00:00 2024-05-07 20 10.7 \n",
|
1059 |
+
"117 1715115600000 2024-05-07 21:00:00 2024-05-07 21 9.6 \n",
|
1060 |
+
"118 1715119200000 2024-05-07 22:00:00 2024-05-07 22 8.7 \n",
|
1061 |
+
"119 1715122800000 2024-05-07 23:00:00 2024-05-07 23 7.9 \n",
|
1062 |
"\n",
|
1063 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
1064 |
+
"115 41.0 0.0 0.0 0.0 0.0 \n",
|
1065 |
+
"116 49.0 0.0 0.0 0.0 0.0 \n",
|
1066 |
+
"117 56.0 0.0 0.0 0.0 0.0 \n",
|
1067 |
+
"118 58.0 0.0 0.0 0.0 0.0 \n",
|
1068 |
+
"119 57.0 0.0 0.0 0.0 0.0 \n",
|
1069 |
"\n",
|
1070 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
1071 |
+
"115 0.0 4.2 10.8 \n",
|
1072 |
+
"116 0.0 3.6 8.3 \n",
|
1073 |
+
"117 0.0 3.2 5.4 \n",
|
1074 |
+
"118 0.0 3.3 5.8 \n",
|
1075 |
+
"119 0.0 3.8 6.5 "
|
1076 |
]
|
1077 |
},
|
1078 |
+
"execution_count": 14,
|
1079 |
"metadata": {},
|
1080 |
"output_type": "execute_result"
|
1081 |
}
|
|
|
1087 |
},
|
1088 |
{
|
1089 |
"cell_type": "code",
|
1090 |
+
"execution_count": 15,
|
1091 |
"metadata": {},
|
1092 |
"outputs": [
|
1093 |
{
|
|
|
1102 |
" 0 timestamp 120 non-null int64 \n",
|
1103 |
" 1 datetime 120 non-null datetime64[ns]\n",
|
1104 |
" 2 date 120 non-null object \n",
|
1105 |
+
" 3 hour 120 non-null int64 \n",
|
1106 |
" 4 temperature_2m 120 non-null float64 \n",
|
1107 |
" 5 relative_humidity_2m 120 non-null float64 \n",
|
1108 |
" 6 precipitation 120 non-null float64 \n",
|
|
|
1112 |
" 10 cloud_cover 120 non-null float64 \n",
|
1113 |
" 11 wind_speed_10m 120 non-null float64 \n",
|
1114 |
" 12 wind_gusts_10m 120 non-null float64 \n",
|
1115 |
+
"dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
|
1116 |
+
"memory usage: 12.3+ KB\n"
|
1117 |
]
|
1118 |
}
|
1119 |
],
|
|
|
1132 |
},
|
1133 |
{
|
1134 |
"cell_type": "code",
|
1135 |
+
"execution_count": 16,
|
1136 |
"metadata": {},
|
1137 |
"outputs": [],
|
1138 |
"source": [
|
1139 |
+
"calender_df = calendar.dk_calendar()"
|
1140 |
]
|
1141 |
},
|
1142 |
{
|
1143 |
"cell_type": "code",
|
1144 |
+
"execution_count": 17,
|
1145 |
"metadata": {},
|
1146 |
"outputs": [
|
1147 |
{
|
|
|
1170 |
" <th>day</th>\n",
|
1171 |
" <th>month</th>\n",
|
1172 |
" <th>year</th>\n",
|
1173 |
+
" <th>workday</th>\n",
|
1174 |
" </tr>\n",
|
1175 |
" </thead>\n",
|
1176 |
" <tbody>\n",
|
|
|
1181 |
" <td>1</td>\n",
|
1182 |
" <td>1</td>\n",
|
1183 |
" <td>2022</td>\n",
|
1184 |
+
" <td>0</td>\n",
|
1185 |
" </tr>\n",
|
1186 |
" <tr>\n",
|
1187 |
" <th>1</th>\n",
|
|
|
1190 |
" <td>2</td>\n",
|
1191 |
" <td>1</td>\n",
|
1192 |
" <td>2022</td>\n",
|
1193 |
+
" <td>0</td>\n",
|
1194 |
" </tr>\n",
|
1195 |
" <tr>\n",
|
1196 |
" <th>2</th>\n",
|
|
|
1199 |
" <td>3</td>\n",
|
1200 |
" <td>1</td>\n",
|
1201 |
" <td>2022</td>\n",
|
1202 |
+
" <td>1</td>\n",
|
1203 |
" </tr>\n",
|
1204 |
" <tr>\n",
|
1205 |
" <th>3</th>\n",
|
|
|
1208 |
" <td>4</td>\n",
|
1209 |
" <td>1</td>\n",
|
1210 |
" <td>2022</td>\n",
|
1211 |
+
" <td>1</td>\n",
|
1212 |
" </tr>\n",
|
1213 |
" <tr>\n",
|
1214 |
" <th>4</th>\n",
|
|
|
1217 |
" <td>5</td>\n",
|
1218 |
" <td>1</td>\n",
|
1219 |
" <td>2022</td>\n",
|
1220 |
+
" <td>1</td>\n",
|
1221 |
" </tr>\n",
|
1222 |
" </tbody>\n",
|
1223 |
"</table>\n",
|
1224 |
"</div>"
|
1225 |
],
|
1226 |
"text/plain": [
|
1227 |
+
" date dayofweek day month year workday\n",
|
1228 |
+
"0 2022-01-01 5 1 1 2022 0\n",
|
1229 |
+
"1 2022-01-02 6 2 1 2022 0\n",
|
1230 |
+
"2 2022-01-03 0 3 1 2022 1\n",
|
1231 |
+
"3 2022-01-04 1 4 1 2022 1\n",
|
1232 |
+
"4 2022-01-05 2 5 1 2022 1"
|
1233 |
]
|
1234 |
},
|
1235 |
+
"execution_count": 17,
|
1236 |
"metadata": {},
|
1237 |
"output_type": "execute_result"
|
1238 |
}
|
|
|
1244 |
},
|
1245 |
{
|
1246 |
"cell_type": "code",
|
1247 |
+
"execution_count": 18,
|
1248 |
"metadata": {},
|
1249 |
"outputs": [
|
1250 |
{
|
|
|
1273 |
" <th>day</th>\n",
|
1274 |
" <th>month</th>\n",
|
1275 |
" <th>year</th>\n",
|
1276 |
+
" <th>workday</th>\n",
|
1277 |
" </tr>\n",
|
1278 |
" </thead>\n",
|
1279 |
" <tbody>\n",
|
|
|
1284 |
" <td>27</td>\n",
|
1285 |
" <td>12</td>\n",
|
1286 |
" <td>2024</td>\n",
|
1287 |
+
" <td>1</td>\n",
|
1288 |
" </tr>\n",
|
1289 |
" <tr>\n",
|
1290 |
" <th>1092</th>\n",
|
|
|
1293 |
" <td>28</td>\n",
|
1294 |
" <td>12</td>\n",
|
1295 |
" <td>2024</td>\n",
|
1296 |
+
" <td>0</td>\n",
|
1297 |
" </tr>\n",
|
1298 |
" <tr>\n",
|
1299 |
" <th>1093</th>\n",
|
|
|
1302 |
" <td>29</td>\n",
|
1303 |
" <td>12</td>\n",
|
1304 |
" <td>2024</td>\n",
|
1305 |
+
" <td>0</td>\n",
|
1306 |
" </tr>\n",
|
1307 |
" <tr>\n",
|
1308 |
" <th>1094</th>\n",
|
|
|
1311 |
" <td>30</td>\n",
|
1312 |
" <td>12</td>\n",
|
1313 |
" <td>2024</td>\n",
|
1314 |
+
" <td>1</td>\n",
|
1315 |
" </tr>\n",
|
1316 |
" <tr>\n",
|
1317 |
" <th>1095</th>\n",
|
|
|
1320 |
" <td>31</td>\n",
|
1321 |
" <td>12</td>\n",
|
1322 |
" <td>2024</td>\n",
|
1323 |
+
" <td>1</td>\n",
|
1324 |
" </tr>\n",
|
1325 |
" </tbody>\n",
|
1326 |
"</table>\n",
|
1327 |
"</div>"
|
1328 |
],
|
1329 |
"text/plain": [
|
1330 |
+
" date dayofweek day month year workday\n",
|
1331 |
+
"1091 2024-12-27 4 27 12 2024 1\n",
|
1332 |
+
"1092 2024-12-28 5 28 12 2024 0\n",
|
1333 |
+
"1093 2024-12-29 6 29 12 2024 0\n",
|
1334 |
+
"1094 2024-12-30 0 30 12 2024 1\n",
|
1335 |
+
"1095 2024-12-31 1 31 12 2024 1"
|
1336 |
]
|
1337 |
},
|
1338 |
+
"execution_count": 18,
|
1339 |
"metadata": {},
|
1340 |
"output_type": "execute_result"
|
1341 |
}
|
|
|
1347 |
},
|
1348 |
{
|
1349 |
"cell_type": "code",
|
1350 |
+
"execution_count": 19,
|
1351 |
"metadata": {},
|
1352 |
"outputs": [
|
1353 |
{
|
|
|
1360 |
" # Column Non-Null Count Dtype \n",
|
1361 |
"--- ------ -------------- ----- \n",
|
1362 |
" 0 date 1096 non-null object\n",
|
1363 |
+
" 1 dayofweek 1096 non-null int64 \n",
|
1364 |
+
" 2 day 1096 non-null int64 \n",
|
1365 |
+
" 3 month 1096 non-null int64 \n",
|
1366 |
+
" 4 year 1096 non-null int64 \n",
|
1367 |
+
" 5 workday 1096 non-null int64 \n",
|
1368 |
+
"dtypes: int64(5), object(1)\n",
|
1369 |
+
"memory usage: 51.5+ KB\n"
|
1370 |
]
|
1371 |
}
|
1372 |
],
|
|
|
1377 |
},
|
1378 |
{
|
1379 |
"cell_type": "code",
|
1380 |
+
"execution_count": 20,
|
1381 |
"metadata": {},
|
1382 |
"outputs": [
|
1383 |
{
|
|
|
1390 |
" # Column Non-Null Count Dtype \n",
|
1391 |
"--- ------ -------------- ----- \n",
|
1392 |
" 0 date 1096 non-null object\n",
|
1393 |
+
" 1 dayofweek 1096 non-null int64 \n",
|
1394 |
+
" 2 day 1096 non-null int64 \n",
|
1395 |
+
" 3 month 1096 non-null int64 \n",
|
1396 |
+
" 4 year 1096 non-null int64 \n",
|
1397 |
+
" 5 workday 1096 non-null int64 \n",
|
1398 |
+
"dtypes: int64(5), object(1)\n",
|
1399 |
+
"memory usage: 51.5+ KB\n"
|
1400 |
]
|
1401 |
}
|
1402 |
],
|
|
|
1416 |
},
|
1417 |
{
|
1418 |
"cell_type": "code",
|
1419 |
+
"execution_count": 21,
|
1420 |
"metadata": {},
|
1421 |
"outputs": [
|
1422 |
{
|
1423 |
"name": "stdout",
|
1424 |
"output_type": "stream",
|
1425 |
"text": [
|
|
|
1426 |
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
1427 |
"\n",
|
1428 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/554133\n",
|
1429 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
1430 |
]
|
1431 |
}
|
|
|
1462 |
},
|
1463 |
{
|
1464 |
"cell_type": "code",
|
1465 |
+
"execution_count": 22,
|
1466 |
"metadata": {},
|
1467 |
"outputs": [],
|
1468 |
"source": [
|
|
|
1486 |
},
|
1487 |
{
|
1488 |
"cell_type": "code",
|
1489 |
+
"execution_count": 23,
|
1490 |
"metadata": {},
|
1491 |
"outputs": [
|
1492 |
{
|
|
|
1494 |
"output_type": "stream",
|
1495 |
"text": [
|
1496 |
"Feature Group created successfully, explore it at \n",
|
1497 |
+
"https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/778586\n"
|
1498 |
]
|
1499 |
},
|
1500 |
{
|
1501 |
+
"data": {
|
1502 |
+
"application/vnd.jupyter.widget-view+json": {
|
1503 |
+
"model_id": "8d08f0f4717746c1b7c3b16c7490cf51",
|
1504 |
+
"version_major": 2,
|
1505 |
+
"version_minor": 0
|
1506 |
+
},
|
1507 |
+
"text/plain": [
|
1508 |
+
"Uploading Dataframe: 0.00% | | Rows 0/20469 | Elapsed Time: 00:00 | Remaining Time: ?"
|
1509 |
+
]
|
1510 |
+
},
|
1511 |
+
"metadata": {},
|
1512 |
+
"output_type": "display_data"
|
1513 |
},
|
1514 |
{
|
1515 |
"name": "stdout",
|
|
|
1517 |
"text": [
|
1518 |
"Launching job: electricity_prices_1_offline_fg_materialization\n",
|
1519 |
"Job started successfully, you can follow the progress at \n",
|
1520 |
+
"https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
|
1521 |
]
|
1522 |
},
|
1523 |
{
|
1524 |
"data": {
|
1525 |
"text/plain": [
|
1526 |
+
"(<hsfs.core.job.Job at 0x17f2dc950>, None)"
|
1527 |
]
|
1528 |
},
|
1529 |
+
"execution_count": 23,
|
1530 |
"metadata": {},
|
1531 |
"output_type": "execute_result"
|
1532 |
}
|
|
|
1545 |
},
|
1546 |
{
|
1547 |
"cell_type": "code",
|
1548 |
+
"execution_count": 24,
|
1549 |
"metadata": {},
|
1550 |
"outputs": [],
|
1551 |
"source": [
|
|
|
1567 |
"cell_type": "markdown",
|
1568 |
"metadata": {},
|
1569 |
"source": [
|
1570 |
+
"We replicate the process for both the `weather_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
|
1571 |
]
|
1572 |
},
|
1573 |
{
|
1574 |
"cell_type": "code",
|
1575 |
+
"execution_count": 25,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1576 |
"metadata": {},
|
1577 |
"outputs": [],
|
1578 |
"source": [
|
|
|
1589 |
},
|
1590 |
{
|
1591 |
"cell_type": "code",
|
1592 |
+
"execution_count": 26,
|
1593 |
"metadata": {},
|
1594 |
"outputs": [
|
1595 |
{
|
|
|
1597 |
"output_type": "stream",
|
1598 |
"text": [
|
1599 |
"Feature Group created successfully, explore it at \n",
|
1600 |
+
"https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/777575\n"
|
1601 |
]
|
1602 |
},
|
1603 |
{
|
1604 |
+
"data": {
|
1605 |
+
"application/vnd.jupyter.widget-view+json": {
|
1606 |
+
"model_id": "5978d081e84c49999233b2b00aa46be7",
|
1607 |
+
"version_major": 2,
|
1608 |
+
"version_minor": 0
|
1609 |
+
},
|
1610 |
+
"text/plain": [
|
1611 |
+
"Uploading Dataframe: 0.00% | | Rows 0/20448 | Elapsed Time: 00:00 | Remaining Time: ?"
|
1612 |
+
]
|
1613 |
+
},
|
1614 |
+
"metadata": {},
|
1615 |
+
"output_type": "display_data"
|
1616 |
},
|
1617 |
{
|
1618 |
"name": "stdout",
|
|
|
1620 |
"text": [
|
1621 |
"Launching job: weather_measurements_1_offline_fg_materialization\n",
|
1622 |
"Job started successfully, you can follow the progress at \n",
|
1623 |
+
"https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
|
1624 |
]
|
1625 |
},
|
1626 |
{
|
1627 |
"data": {
|
1628 |
"text/plain": [
|
1629 |
+
"(<hsfs.core.job.Job at 0x308979690>, None)"
|
1630 |
]
|
1631 |
},
|
1632 |
+
"execution_count": 26,
|
1633 |
"metadata": {},
|
1634 |
"output_type": "execute_result"
|
1635 |
}
|
|
|
1641 |
},
|
1642 |
{
|
1643 |
"cell_type": "code",
|
1644 |
+
"execution_count": 27,
|
1645 |
"metadata": {},
|
1646 |
"outputs": [],
|
1647 |
"source": [
|
|
|
1669 |
},
|
1670 |
{
|
1671 |
"cell_type": "code",
|
1672 |
+
"execution_count": 28,
|
1673 |
"metadata": {},
|
1674 |
"outputs": [],
|
1675 |
"source": [
|
|
|
1685 |
},
|
1686 |
{
|
1687 |
"cell_type": "code",
|
1688 |
+
"execution_count": 29,
|
1689 |
"metadata": {},
|
1690 |
"outputs": [
|
1691 |
{
|
|
|
1693 |
"output_type": "stream",
|
1694 |
"text": [
|
1695 |
"Feature Group created successfully, explore it at \n",
|
1696 |
+
"https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/777576\n"
|
1697 |
]
|
1698 |
},
|
1699 |
{
|
1700 |
+
"data": {
|
1701 |
+
"application/vnd.jupyter.widget-view+json": {
|
1702 |
+
"model_id": "376ce1b46ca6473b830e6ad9af28276a",
|
1703 |
+
"version_major": 2,
|
1704 |
+
"version_minor": 0
|
1705 |
+
},
|
1706 |
+
"text/plain": [
|
1707 |
+
"Uploading Dataframe: 0.00% | | Rows 0/1096 | Elapsed Time: 00:00 | Remaining Time: ?"
|
1708 |
+
]
|
1709 |
+
},
|
1710 |
+
"metadata": {},
|
1711 |
+
"output_type": "display_data"
|
1712 |
},
|
1713 |
{
|
1714 |
"name": "stdout",
|
|
|
1716 |
"text": [
|
1717 |
"Launching job: dk_calendar_1_offline_fg_materialization\n",
|
1718 |
"Job started successfully, you can follow the progress at \n",
|
1719 |
+
"https://c.app.hopsworks.ai/p/554133/jobs/named/dk_calendar_1_offline_fg_materialization/executions\n"
|
1720 |
]
|
1721 |
},
|
1722 |
{
|
1723 |
"data": {
|
1724 |
"text/plain": [
|
1725 |
+
"(<hsfs.core.job.Job at 0x3088ef590>, None)"
|
1726 |
]
|
1727 |
},
|
1728 |
+
"execution_count": 29,
|
1729 |
"metadata": {},
|
1730 |
"output_type": "execute_result"
|
1731 |
}
|
|
|
1737 |
},
|
1738 |
{
|
1739 |
"cell_type": "code",
|
1740 |
+
"execution_count": 31,
|
1741 |
"metadata": {},
|
1742 |
"outputs": [],
|
1743 |
"source": [
|
|
|
1746 |
" {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
|
1747 |
" {\"name\": \"day\", \"description\": \"Day number of the week. Monday is 0 and Sunday is 6\"},\n",
|
1748 |
" {\"name\": \"month\", \"description\": \"Month number of the year\"},\n",
|
1749 |
+
" {\"name\": \"workday\", \"description\": \"Workday or not a workday\"},\n",
|
1750 |
"]\n",
|
1751 |
"\n",
|
1752 |
"# Updating feature descriptions\n",
|
notebooks/2_feature_pipeline.ipynb
CHANGED
@@ -27,15 +27,15 @@
|
|
27 |
},
|
28 |
{
|
29 |
"cell_type": "code",
|
30 |
-
"execution_count":
|
31 |
"metadata": {},
|
32 |
"outputs": [
|
33 |
{
|
34 |
"name": "stdout",
|
35 |
"output_type": "stream",
|
36 |
"text": [
|
37 |
-
"
|
38 |
-
"
|
39 |
]
|
40 |
}
|
41 |
],
|
@@ -53,7 +53,7 @@
|
|
53 |
},
|
54 |
{
|
55 |
"cell_type": "code",
|
56 |
-
"execution_count":
|
57 |
"metadata": {},
|
58 |
"outputs": [],
|
59 |
"source": [
|
@@ -88,7 +88,7 @@
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
-
"execution_count":
|
92 |
"metadata": {},
|
93 |
"outputs": [],
|
94 |
"source": [
|
@@ -101,7 +101,7 @@
|
|
101 |
},
|
102 |
{
|
103 |
"cell_type": "code",
|
104 |
-
"execution_count":
|
105 |
"metadata": {},
|
106 |
"outputs": [
|
107 |
{
|
@@ -135,195 +135,195 @@
|
|
135 |
" <tbody>\n",
|
136 |
" <tr>\n",
|
137 |
" <th>0</th>\n",
|
138 |
-
" <td>
|
139 |
-
" <td>2024-05-
|
140 |
-
" <td>2024-05-
|
141 |
" <td>0</td>\n",
|
142 |
-
" <td>0.
|
143 |
" </tr>\n",
|
144 |
" <tr>\n",
|
145 |
" <th>1</th>\n",
|
146 |
-
" <td>
|
147 |
-
" <td>2024-05-
|
148 |
-
" <td>2024-05-
|
149 |
" <td>1</td>\n",
|
150 |
-
" <td>0.
|
151 |
" </tr>\n",
|
152 |
" <tr>\n",
|
153 |
" <th>2</th>\n",
|
154 |
-
" <td>
|
155 |
-
" <td>2024-05-
|
156 |
-
" <td>2024-05-
|
157 |
" <td>2</td>\n",
|
158 |
-
" <td>0.
|
159 |
" </tr>\n",
|
160 |
" <tr>\n",
|
161 |
" <th>3</th>\n",
|
162 |
-
" <td>
|
163 |
-
" <td>2024-05-
|
164 |
-
" <td>2024-05-
|
165 |
" <td>3</td>\n",
|
166 |
-
" <td>0.
|
167 |
" </tr>\n",
|
168 |
" <tr>\n",
|
169 |
" <th>4</th>\n",
|
170 |
-
" <td>
|
171 |
-
" <td>2024-05-
|
172 |
-
" <td>2024-05-
|
173 |
" <td>4</td>\n",
|
174 |
-
" <td>0.
|
175 |
" </tr>\n",
|
176 |
" <tr>\n",
|
177 |
" <th>5</th>\n",
|
178 |
-
" <td>
|
179 |
-
" <td>2024-05-
|
180 |
-
" <td>2024-05-
|
181 |
" <td>5</td>\n",
|
182 |
-
" <td>0.
|
183 |
" </tr>\n",
|
184 |
" <tr>\n",
|
185 |
" <th>6</th>\n",
|
186 |
-
" <td>
|
187 |
-
" <td>2024-05-
|
188 |
-
" <td>2024-05-
|
189 |
" <td>6</td>\n",
|
190 |
-
" <td>0.
|
191 |
" </tr>\n",
|
192 |
" <tr>\n",
|
193 |
" <th>7</th>\n",
|
194 |
-
" <td>
|
195 |
-
" <td>2024-05-
|
196 |
-
" <td>2024-05-
|
197 |
" <td>7</td>\n",
|
198 |
-
" <td>0.
|
199 |
" </tr>\n",
|
200 |
" <tr>\n",
|
201 |
" <th>8</th>\n",
|
202 |
-
" <td>
|
203 |
-
" <td>2024-05-
|
204 |
-
" <td>2024-05-
|
205 |
" <td>8</td>\n",
|
206 |
-
" <td>0.
|
207 |
" </tr>\n",
|
208 |
" <tr>\n",
|
209 |
" <th>9</th>\n",
|
210 |
-
" <td>
|
211 |
-
" <td>2024-05-
|
212 |
-
" <td>2024-05-
|
213 |
" <td>9</td>\n",
|
214 |
-
" <td>0.
|
215 |
" </tr>\n",
|
216 |
" <tr>\n",
|
217 |
" <th>10</th>\n",
|
218 |
-
" <td>
|
219 |
-
" <td>2024-05-
|
220 |
-
" <td>2024-05-
|
221 |
" <td>10</td>\n",
|
222 |
-
" <td>0.
|
223 |
" </tr>\n",
|
224 |
" <tr>\n",
|
225 |
" <th>11</th>\n",
|
226 |
-
" <td>
|
227 |
-
" <td>2024-05-
|
228 |
-
" <td>2024-05-
|
229 |
" <td>11</td>\n",
|
230 |
-
" <td
|
231 |
" </tr>\n",
|
232 |
" <tr>\n",
|
233 |
" <th>12</th>\n",
|
234 |
-
" <td>
|
235 |
-
" <td>2024-05-
|
236 |
-
" <td>2024-05-
|
237 |
" <td>12</td>\n",
|
238 |
-
" <td
|
239 |
" </tr>\n",
|
240 |
" <tr>\n",
|
241 |
" <th>13</th>\n",
|
242 |
-
" <td>
|
243 |
-
" <td>2024-05-
|
244 |
-
" <td>2024-05-
|
245 |
" <td>13</td>\n",
|
246 |
-
" <td
|
247 |
" </tr>\n",
|
248 |
" <tr>\n",
|
249 |
" <th>14</th>\n",
|
250 |
-
" <td>
|
251 |
-
" <td>2024-05-
|
252 |
-
" <td>2024-05-
|
253 |
" <td>14</td>\n",
|
254 |
-
" <td
|
255 |
" </tr>\n",
|
256 |
" <tr>\n",
|
257 |
" <th>15</th>\n",
|
258 |
-
" <td>
|
259 |
-
" <td>2024-05-
|
260 |
-
" <td>2024-05-
|
261 |
" <td>15</td>\n",
|
262 |
-
" <td
|
263 |
" </tr>\n",
|
264 |
" <tr>\n",
|
265 |
" <th>16</th>\n",
|
266 |
-
" <td>
|
267 |
-
" <td>2024-05-
|
268 |
-
" <td>2024-05-
|
269 |
" <td>16</td>\n",
|
270 |
-
" <td>0.
|
271 |
" </tr>\n",
|
272 |
" <tr>\n",
|
273 |
" <th>17</th>\n",
|
274 |
-
" <td>
|
275 |
-
" <td>2024-05-
|
276 |
-
" <td>2024-05-
|
277 |
" <td>17</td>\n",
|
278 |
-
" <td>0.
|
279 |
" </tr>\n",
|
280 |
" <tr>\n",
|
281 |
" <th>18</th>\n",
|
282 |
-
" <td>
|
283 |
-
" <td>2024-05-
|
284 |
-
" <td>2024-05-
|
285 |
" <td>18</td>\n",
|
286 |
-
" <td>0.
|
287 |
" </tr>\n",
|
288 |
" <tr>\n",
|
289 |
" <th>19</th>\n",
|
290 |
-
" <td>
|
291 |
-
" <td>2024-05-
|
292 |
-
" <td>2024-05-
|
293 |
" <td>19</td>\n",
|
294 |
-
" <td>0.
|
295 |
" </tr>\n",
|
296 |
" <tr>\n",
|
297 |
" <th>20</th>\n",
|
298 |
-
" <td>
|
299 |
-
" <td>2024-05-
|
300 |
-
" <td>2024-05-
|
301 |
" <td>20</td>\n",
|
302 |
-
" <td>0.
|
303 |
" </tr>\n",
|
304 |
" <tr>\n",
|
305 |
" <th>21</th>\n",
|
306 |
-
" <td>
|
307 |
-
" <td>2024-05-
|
308 |
-
" <td>2024-05-
|
309 |
" <td>21</td>\n",
|
310 |
-
" <td>0.
|
311 |
" </tr>\n",
|
312 |
" <tr>\n",
|
313 |
" <th>22</th>\n",
|
314 |
-
" <td>
|
315 |
-
" <td>2024-05-
|
316 |
-
" <td>2024-05-
|
317 |
" <td>22</td>\n",
|
318 |
-
" <td>0.
|
319 |
" </tr>\n",
|
320 |
" <tr>\n",
|
321 |
" <th>23</th>\n",
|
322 |
-
" <td>
|
323 |
-
" <td>2024-05-
|
324 |
-
" <td>2024-05-
|
325 |
" <td>23</td>\n",
|
326 |
-
" <td>0.
|
327 |
" </tr>\n",
|
328 |
" </tbody>\n",
|
329 |
"</table>\n",
|
@@ -331,33 +331,33 @@
|
|
331 |
],
|
332 |
"text/plain": [
|
333 |
" timestamp datetime date hour dk1_spotpricedkk_kwh\n",
|
334 |
-
"0
|
335 |
-
"1
|
336 |
-
"2
|
337 |
-
"3
|
338 |
-
"4
|
339 |
-
"5
|
340 |
-
"6
|
341 |
-
"7
|
342 |
-
"8
|
343 |
-
"9
|
344 |
-
"10
|
345 |
-
"11
|
346 |
-
"12
|
347 |
-
"13
|
348 |
-
"14
|
349 |
-
"15
|
350 |
-
"16
|
351 |
-
"17
|
352 |
-
"18
|
353 |
-
"19
|
354 |
-
"20
|
355 |
-
"21
|
356 |
-
"22
|
357 |
-
"23
|
358 |
]
|
359 |
},
|
360 |
-
"execution_count":
|
361 |
"metadata": {},
|
362 |
"output_type": "execute_result"
|
363 |
}
|
@@ -367,36 +367,6 @@
|
|
367 |
"electricity_df"
|
368 |
]
|
369 |
},
|
370 |
-
{
|
371 |
-
"cell_type": "markdown",
|
372 |
-
"metadata": {},
|
373 |
-
"source": [
|
374 |
-
"### <span style=\"color:#2656a3;\">☀️💨 Forecast Renewable Energy next day from Energinet"
|
375 |
-
]
|
376 |
-
},
|
377 |
-
{
|
378 |
-
"cell_type": "code",
|
379 |
-
"execution_count": 22,
|
380 |
-
"metadata": {},
|
381 |
-
"outputs": [],
|
382 |
-
"source": [
|
383 |
-
"# # Fetching non-historical forecast of renewable energy data for area DK1\n",
|
384 |
-
"# forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
|
385 |
-
"# historical=False,\n",
|
386 |
-
"# area=[\"DK1\"]\n",
|
387 |
-
"# )"
|
388 |
-
]
|
389 |
-
},
|
390 |
-
{
|
391 |
-
"cell_type": "code",
|
392 |
-
"execution_count": 23,
|
393 |
-
"metadata": {},
|
394 |
-
"outputs": [],
|
395 |
-
"source": [
|
396 |
-
"# # Display the forecast_renewable_energy dataframe\n",
|
397 |
-
"# forecast_renewable_energy_df"
|
398 |
-
]
|
399 |
-
},
|
400 |
{
|
401 |
"cell_type": "markdown",
|
402 |
"metadata": {},
|
@@ -408,41 +378,12 @@
|
|
408 |
"cell_type": "markdown",
|
409 |
"metadata": {},
|
410 |
"source": [
|
411 |
-
"#### <span style=\"color:#2656a3;\">
|
412 |
-
]
|
413 |
-
},
|
414 |
-
{
|
415 |
-
"cell_type": "code",
|
416 |
-
"execution_count": 24,
|
417 |
-
"metadata": {},
|
418 |
-
"outputs": [],
|
419 |
-
"source": [
|
420 |
-
"# Fetching non-historical weather data for area DK1\n",
|
421 |
-
"#historical_weather_df = weather_measures.historical_weather_measures(\n",
|
422 |
-
"# historical=False\n",
|
423 |
-
"#)"
|
424 |
-
]
|
425 |
-
},
|
426 |
-
{
|
427 |
-
"cell_type": "code",
|
428 |
-
"execution_count": 25,
|
429 |
-
"metadata": {},
|
430 |
-
"outputs": [],
|
431 |
-
"source": [
|
432 |
-
"# Display the first 5 rows of the dataframe\n",
|
433 |
-
"#historical_weather_df.head()"
|
434 |
-
]
|
435 |
-
},
|
436 |
-
{
|
437 |
-
"cell_type": "markdown",
|
438 |
-
"metadata": {},
|
439 |
-
"source": [
|
440 |
-
"#### <span style=\"color:#2656a3;\"> 🌈 Weather Forecast"
|
441 |
]
|
442 |
},
|
443 |
{
|
444 |
"cell_type": "code",
|
445 |
-
"execution_count":
|
446 |
"metadata": {},
|
447 |
"outputs": [],
|
448 |
"source": [
|
@@ -454,7 +395,7 @@
|
|
454 |
},
|
455 |
{
|
456 |
"cell_type": "code",
|
457 |
-
"execution_count":
|
458 |
"metadata": {},
|
459 |
"outputs": [
|
460 |
{
|
@@ -496,83 +437,83 @@
|
|
496 |
" <tbody>\n",
|
497 |
" <tr>\n",
|
498 |
" <th>0</th>\n",
|
499 |
-
" <td>
|
500 |
-
" <td>2024-05-
|
501 |
-
" <td>2024-05-
|
502 |
" <td>0</td>\n",
|
503 |
-
" <td>14.
|
504 |
-
" <td>
|
505 |
" <td>0.0</td>\n",
|
506 |
" <td>0.0</td>\n",
|
507 |
" <td>0.0</td>\n",
|
508 |
-
" <td>
|
509 |
-
" <td>
|
510 |
-
" <td>
|
511 |
-
" <td>
|
512 |
" </tr>\n",
|
513 |
" <tr>\n",
|
514 |
" <th>1</th>\n",
|
515 |
-
" <td>
|
516 |
-
" <td>2024-05-
|
517 |
-
" <td>2024-05-
|
518 |
" <td>1</td>\n",
|
519 |
-
" <td>
|
520 |
-
" <td>
|
521 |
" <td>0.0</td>\n",
|
522 |
" <td>0.0</td>\n",
|
523 |
" <td>0.0</td>\n",
|
524 |
" <td>0.0</td>\n",
|
525 |
-
" <td>
|
526 |
-
" <td>
|
527 |
-
" <td>37.
|
528 |
" </tr>\n",
|
529 |
" <tr>\n",
|
530 |
" <th>2</th>\n",
|
531 |
-
" <td>
|
532 |
-
" <td>2024-05-
|
533 |
-
" <td>2024-05-
|
534 |
" <td>2</td>\n",
|
535 |
-
" <td>13.
|
536 |
-
" <td>
|
537 |
" <td>0.0</td>\n",
|
538 |
" <td>0.0</td>\n",
|
539 |
" <td>0.0</td>\n",
|
540 |
-
" <td>
|
541 |
-
" <td>
|
542 |
-
" <td>
|
543 |
-
" <td>
|
544 |
" </tr>\n",
|
545 |
" <tr>\n",
|
546 |
" <th>3</th>\n",
|
547 |
-
" <td>
|
548 |
-
" <td>2024-05-
|
549 |
-
" <td>2024-05-
|
550 |
" <td>3</td>\n",
|
551 |
-
" <td>
|
552 |
-
" <td>
|
553 |
-
" <td>0.
|
554 |
-
" <td>0.
|
555 |
" <td>0.0</td>\n",
|
556 |
-
" <td>
|
557 |
-
" <td>
|
558 |
-
" <td>
|
559 |
-
" <td>
|
560 |
" </tr>\n",
|
561 |
" <tr>\n",
|
562 |
" <th>4</th>\n",
|
563 |
-
" <td>
|
564 |
-
" <td>2024-05-
|
565 |
-
" <td>2024-05-
|
566 |
" <td>4</td>\n",
|
567 |
-
" <td>12.
|
568 |
" <td>73.0</td>\n",
|
569 |
" <td>0.0</td>\n",
|
570 |
" <td>0.0</td>\n",
|
571 |
" <td>0.0</td>\n",
|
572 |
" <td>2.0</td>\n",
|
573 |
-
" <td>
|
574 |
-
" <td>
|
575 |
-
" <td>
|
576 |
" </tr>\n",
|
577 |
" <tr>\n",
|
578 |
" <th>...</th>\n",
|
@@ -592,457 +533,140 @@
|
|
592 |
" </tr>\n",
|
593 |
" <tr>\n",
|
594 |
" <th>115</th>\n",
|
595 |
-
" <td>
|
596 |
-
" <td>2024-05-
|
597 |
-
" <td>2024-05-
|
598 |
" <td>19</td>\n",
|
599 |
-
" <td>
|
600 |
-
" <td>
|
601 |
-
" <td>1.4</td>\n",
|
602 |
-
" <td>1.4</td>\n",
|
603 |
" <td>0.0</td>\n",
|
604 |
-
" <td>61.0</td>\n",
|
605 |
-
" <td>100.0</td>\n",
|
606 |
-
" <td>16.6</td>\n",
|
607 |
-
" <td>32.0</td>\n",
|
608 |
-
" </tr>\n",
|
609 |
-
" <tr>\n",
|
610 |
-
" <th>116</th>\n",
|
611 |
-
" <td>1715025600000</td>\n",
|
612 |
-
" <td>2024-05-06 20:00:00</td>\n",
|
613 |
-
" <td>2024-05-06</td>\n",
|
614 |
-
" <td>20</td>\n",
|
615 |
-
" <td>10.1</td>\n",
|
616 |
-
" <td>90.0</td>\n",
|
617 |
-
" <td>1.4</td>\n",
|
618 |
-
" <td>1.4</td>\n",
|
619 |
" <td>0.0</td>\n",
|
620 |
-
" <td>61.0</td>\n",
|
621 |
-
" <td>100.0</td>\n",
|
622 |
-
" <td>19.5</td>\n",
|
623 |
-
" <td>37.1</td>\n",
|
624 |
-
" </tr>\n",
|
625 |
-
" <tr>\n",
|
626 |
-
" <th>117</th>\n",
|
627 |
-
" <td>1715029200000</td>\n",
|
628 |
-
" <td>2024-05-06 21:00:00</td>\n",
|
629 |
-
" <td>2024-05-06</td>\n",
|
630 |
-
" <td>21</td>\n",
|
631 |
-
" <td>9.5</td>\n",
|
632 |
-
" <td>88.0</td>\n",
|
633 |
-
" <td>1.4</td>\n",
|
634 |
-
" <td>1.4</td>\n",
|
635 |
" <td>0.0</td>\n",
|
636 |
-
" <td>61.0</td>\n",
|
637 |
-
" <td>100.0</td>\n",
|
638 |
-
" <td>21.6</td>\n",
|
639 |
-
" <td>42.1</td>\n",
|
640 |
-
" </tr>\n",
|
641 |
-
" <tr>\n",
|
642 |
-
" <th>118</th>\n",
|
643 |
-
" <td>1715032800000</td>\n",
|
644 |
-
" <td>2024-05-06 22:00:00</td>\n",
|
645 |
-
" <td>2024-05-06</td>\n",
|
646 |
-
" <td>22</td>\n",
|
647 |
-
" <td>9.3</td>\n",
|
648 |
-
" <td>86.0</td>\n",
|
649 |
-
" <td>0.6</td>\n",
|
650 |
-
" <td>0.6</td>\n",
|
651 |
" <td>0.0</td>\n",
|
652 |
-
" <td>3.0</td>\n",
|
653 |
-
" <td>100.0</td>\n",
|
654 |
-
" <td>22.0</td>\n",
|
655 |
-
" <td>41.0</td>\n",
|
656 |
-
" </tr>\n",
|
657 |
-
" <tr>\n",
|
658 |
-
" <th>119</th>\n",
|
659 |
-
" <td>1715036400000</td>\n",
|
660 |
-
" <td>2024-05-06 23:00:00</td>\n",
|
661 |
-
" <td>2024-05-06</td>\n",
|
662 |
-
" <td>23</td>\n",
|
663 |
-
" <td>9.1</td>\n",
|
664 |
-
" <td>84.0</td>\n",
|
665 |
-
" <td>0.6</td>\n",
|
666 |
-
" <td>0.6</td>\n",
|
667 |
" <td>0.0</td>\n",
|
668 |
-
" <td>
|
669 |
-
" <td>
|
670 |
-
" <td>21.3</td>\n",
|
671 |
-
" <td>40.3</td>\n",
|
672 |
" </tr>\n",
|
673 |
-
" </tbody>\n",
|
674 |
-
"</table>\n",
|
675 |
-
"<p>120 rows × 13 columns</p>\n",
|
676 |
-
"</div>"
|
677 |
-
],
|
678 |
-
"text/plain": [
|
679 |
-
" timestamp datetime date hour temperature_2m \\\n",
|
680 |
-
"0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
|
681 |
-
"1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
|
682 |
-
"2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
|
683 |
-
"3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
|
684 |
-
"4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
|
685 |
-
".. ... ... ... ... ... \n",
|
686 |
-
"115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
|
687 |
-
"116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
|
688 |
-
"117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
|
689 |
-
"118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
|
690 |
-
"119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
|
691 |
-
"\n",
|
692 |
-
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
693 |
-
"0 66.0 0.0 0.0 0.0 0.0 \n",
|
694 |
-
"1 71.0 0.0 0.0 0.0 0.0 \n",
|
695 |
-
"2 73.0 0.0 0.0 0.0 2.0 \n",
|
696 |
-
"3 72.0 0.1 0.1 0.0 51.0 \n",
|
697 |
-
"4 73.0 0.0 0.0 0.0 2.0 \n",
|
698 |
-
".. ... ... ... ... ... \n",
|
699 |
-
"115 91.0 1.4 1.4 0.0 61.0 \n",
|
700 |
-
"116 90.0 1.4 1.4 0.0 61.0 \n",
|
701 |
-
"117 88.0 1.4 1.4 0.0 61.0 \n",
|
702 |
-
"118 86.0 0.6 0.6 0.0 3.0 \n",
|
703 |
-
"119 84.0 0.6 0.6 0.0 3.0 \n",
|
704 |
-
"\n",
|
705 |
-
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
706 |
-
"0 13.0 21.6 41.4 \n",
|
707 |
-
"1 4.0 20.5 37.1 \n",
|
708 |
-
"2 70.0 21.2 36.7 \n",
|
709 |
-
"3 51.0 22.3 39.2 \n",
|
710 |
-
"4 78.0 21.6 38.9 \n",
|
711 |
-
".. ... ... ... \n",
|
712 |
-
"115 100.0 16.6 32.0 \n",
|
713 |
-
"116 100.0 19.5 37.1 \n",
|
714 |
-
"117 100.0 21.6 42.1 \n",
|
715 |
-
"118 100.0 22.0 41.0 \n",
|
716 |
-
"119 100.0 21.3 40.3 \n",
|
717 |
-
"\n",
|
718 |
-
"[120 rows x 13 columns]"
|
719 |
-
]
|
720 |
-
},
|
721 |
-
"execution_count": 27,
|
722 |
-
"metadata": {},
|
723 |
-
"output_type": "execute_result"
|
724 |
-
}
|
725 |
-
],
|
726 |
-
"source": [
|
727 |
-
"# Display the weather_forecast_df dataframe\n",
|
728 |
-
"weather_forecast_df"
|
729 |
-
]
|
730 |
-
},
|
731 |
-
{
|
732 |
-
"cell_type": "code",
|
733 |
-
"execution_count": 28,
|
734 |
-
"metadata": {},
|
735 |
-
"outputs": [
|
736 |
-
{
|
737 |
-
"data": {
|
738 |
-
"text/html": [
|
739 |
-
"<div>\n",
|
740 |
-
"<style scoped>\n",
|
741 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
742 |
-
" vertical-align: middle;\n",
|
743 |
-
" }\n",
|
744 |
-
"\n",
|
745 |
-
" .dataframe tbody tr th {\n",
|
746 |
-
" vertical-align: top;\n",
|
747 |
-
" }\n",
|
748 |
-
"\n",
|
749 |
-
" .dataframe thead th {\n",
|
750 |
-
" text-align: right;\n",
|
751 |
-
" }\n",
|
752 |
-
"</style>\n",
|
753 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
754 |
-
" <thead>\n",
|
755 |
-
" <tr style=\"text-align: right;\">\n",
|
756 |
-
" <th></th>\n",
|
757 |
-
" <th>timestamp</th>\n",
|
758 |
-
" <th>datetime</th>\n",
|
759 |
-
" <th>date</th>\n",
|
760 |
-
" <th>hour</th>\n",
|
761 |
-
" <th>temperature_2m</th>\n",
|
762 |
-
" <th>relative_humidity_2m</th>\n",
|
763 |
-
" <th>precipitation</th>\n",
|
764 |
-
" <th>rain</th>\n",
|
765 |
-
" <th>snowfall</th>\n",
|
766 |
-
" <th>weather_code</th>\n",
|
767 |
-
" <th>cloud_cover</th>\n",
|
768 |
-
" <th>wind_speed_10m</th>\n",
|
769 |
-
" <th>wind_gusts_10m</th>\n",
|
770 |
-
" </tr>\n",
|
771 |
-
" </thead>\n",
|
772 |
-
" <tbody>\n",
|
773 |
" <tr>\n",
|
774 |
-
" <th>
|
775 |
-
" <td>
|
776 |
-
" <td>2024-05-
|
777 |
-
" <td>2024-05-
|
778 |
-
" <td>
|
779 |
-
" <td>
|
780 |
-
" <td>
|
781 |
" <td>0.0</td>\n",
|
782 |
" <td>0.0</td>\n",
|
783 |
" <td>0.0</td>\n",
|
784 |
" <td>0.0</td>\n",
|
785 |
-
" <td>
|
786 |
-
" <td>
|
787 |
-
" <td>
|
788 |
" </tr>\n",
|
789 |
" <tr>\n",
|
790 |
-
" <th>
|
791 |
-
" <td>
|
792 |
-
" <td>2024-05-
|
793 |
-
" <td>2024-05-
|
794 |
-
" <td>
|
795 |
-
" <td>
|
796 |
-
" <td>
|
797 |
-
" <td>0.0</td>\n",
|
798 |
-
" <td>0.0</td>\n",
|
799 |
" <td>0.0</td>\n",
|
800 |
" <td>0.0</td>\n",
|
801 |
-
" <td>4.0</td>\n",
|
802 |
-
" <td>20.5</td>\n",
|
803 |
-
" <td>37.1</td>\n",
|
804 |
-
" </tr>\n",
|
805 |
-
" <tr>\n",
|
806 |
-
" <th>2</th>\n",
|
807 |
-
" <td>1714615200000</td>\n",
|
808 |
-
" <td>2024-05-02 02:00:00</td>\n",
|
809 |
-
" <td>2024-05-02</td>\n",
|
810 |
-
" <td>2</td>\n",
|
811 |
-
" <td>13.4</td>\n",
|
812 |
-
" <td>73.0</td>\n",
|
813 |
" <td>0.0</td>\n",
|
814 |
" <td>0.0</td>\n",
|
815 |
" <td>0.0</td>\n",
|
816 |
-
" <td>2
|
817 |
-
" <td>
|
818 |
-
" <td>21.2</td>\n",
|
819 |
-
" <td>36.7</td>\n",
|
820 |
" </tr>\n",
|
821 |
" <tr>\n",
|
822 |
-
" <th>
|
823 |
-
" <td>
|
824 |
-
" <td>2024-05-
|
825 |
-
" <td>2024-05-
|
826 |
-
" <td>
|
827 |
-
" <td>
|
828 |
-
" <td>
|
829 |
-
" <td>0.1</td>\n",
|
830 |
-
" <td>0.1</td>\n",
|
831 |
" <td>0.0</td>\n",
|
832 |
-
" <td>51.0</td>\n",
|
833 |
-
" <td>51.0</td>\n",
|
834 |
-
" <td>22.3</td>\n",
|
835 |
-
" <td>39.2</td>\n",
|
836 |
-
" </tr>\n",
|
837 |
-
" <tr>\n",
|
838 |
-
" <th>4</th>\n",
|
839 |
-
" <td>1714622400000</td>\n",
|
840 |
-
" <td>2024-05-02 04:00:00</td>\n",
|
841 |
-
" <td>2024-05-02</td>\n",
|
842 |
-
" <td>4</td>\n",
|
843 |
-
" <td>12.7</td>\n",
|
844 |
-
" <td>73.0</td>\n",
|
845 |
" <td>0.0</td>\n",
|
846 |
" <td>0.0</td>\n",
|
847 |
" <td>0.0</td>\n",
|
848 |
-
" <td>2.0</td>\n",
|
849 |
-
" <td>78.0</td>\n",
|
850 |
-
" <td>21.6</td>\n",
|
851 |
-
" <td>38.9</td>\n",
|
852 |
-
" </tr>\n",
|
853 |
-
" </tbody>\n",
|
854 |
-
"</table>\n",
|
855 |
-
"</div>"
|
856 |
-
],
|
857 |
-
"text/plain": [
|
858 |
-
" timestamp datetime date hour temperature_2m \\\n",
|
859 |
-
"0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
|
860 |
-
"1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
|
861 |
-
"2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
|
862 |
-
"3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
|
863 |
-
"4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
|
864 |
-
"\n",
|
865 |
-
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
866 |
-
"0 66.0 0.0 0.0 0.0 0.0 \n",
|
867 |
-
"1 71.0 0.0 0.0 0.0 0.0 \n",
|
868 |
-
"2 73.0 0.0 0.0 0.0 2.0 \n",
|
869 |
-
"3 72.0 0.1 0.1 0.0 51.0 \n",
|
870 |
-
"4 73.0 0.0 0.0 0.0 2.0 \n",
|
871 |
-
"\n",
|
872 |
-
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
873 |
-
"0 13.0 21.6 41.4 \n",
|
874 |
-
"1 4.0 20.5 37.1 \n",
|
875 |
-
"2 70.0 21.2 36.7 \n",
|
876 |
-
"3 51.0 22.3 39.2 \n",
|
877 |
-
"4 78.0 21.6 38.9 "
|
878 |
-
]
|
879 |
-
},
|
880 |
-
"execution_count": 28,
|
881 |
-
"metadata": {},
|
882 |
-
"output_type": "execute_result"
|
883 |
-
}
|
884 |
-
],
|
885 |
-
"source": [
|
886 |
-
"# Display the first 5 rows of the weather_forecast dataframe\n",
|
887 |
-
"weather_forecast_df.head(5)"
|
888 |
-
]
|
889 |
-
},
|
890 |
-
{
|
891 |
-
"cell_type": "code",
|
892 |
-
"execution_count": 29,
|
893 |
-
"metadata": {},
|
894 |
-
"outputs": [
|
895 |
-
{
|
896 |
-
"data": {
|
897 |
-
"text/html": [
|
898 |
-
"<div>\n",
|
899 |
-
"<style scoped>\n",
|
900 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
901 |
-
" vertical-align: middle;\n",
|
902 |
-
" }\n",
|
903 |
-
"\n",
|
904 |
-
" .dataframe tbody tr th {\n",
|
905 |
-
" vertical-align: top;\n",
|
906 |
-
" }\n",
|
907 |
-
"\n",
|
908 |
-
" .dataframe thead th {\n",
|
909 |
-
" text-align: right;\n",
|
910 |
-
" }\n",
|
911 |
-
"</style>\n",
|
912 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
913 |
-
" <thead>\n",
|
914 |
-
" <tr style=\"text-align: right;\">\n",
|
915 |
-
" <th></th>\n",
|
916 |
-
" <th>timestamp</th>\n",
|
917 |
-
" <th>datetime</th>\n",
|
918 |
-
" <th>date</th>\n",
|
919 |
-
" <th>hour</th>\n",
|
920 |
-
" <th>temperature_2m</th>\n",
|
921 |
-
" <th>relative_humidity_2m</th>\n",
|
922 |
-
" <th>precipitation</th>\n",
|
923 |
-
" <th>rain</th>\n",
|
924 |
-
" <th>snowfall</th>\n",
|
925 |
-
" <th>weather_code</th>\n",
|
926 |
-
" <th>cloud_cover</th>\n",
|
927 |
-
" <th>wind_speed_10m</th>\n",
|
928 |
-
" <th>wind_gusts_10m</th>\n",
|
929 |
-
" </tr>\n",
|
930 |
-
" </thead>\n",
|
931 |
-
" <tbody>\n",
|
932 |
-
" <tr>\n",
|
933 |
-
" <th>115</th>\n",
|
934 |
-
" <td>1715022000000</td>\n",
|
935 |
-
" <td>2024-05-06 19:00:00</td>\n",
|
936 |
-
" <td>2024-05-06</td>\n",
|
937 |
-
" <td>19</td>\n",
|
938 |
-
" <td>10.7</td>\n",
|
939 |
-
" <td>91.0</td>\n",
|
940 |
-
" <td>1.4</td>\n",
|
941 |
-
" <td>1.4</td>\n",
|
942 |
" <td>0.0</td>\n",
|
943 |
-
" <td>
|
944 |
-
" <td>
|
945 |
-
" <td>16.6</td>\n",
|
946 |
-
" <td>32.0</td>\n",
|
947 |
" </tr>\n",
|
948 |
" <tr>\n",
|
949 |
-
" <th>
|
950 |
-
" <td>
|
951 |
-
" <td>2024-05-
|
952 |
-
" <td>2024-05-
|
953 |
-
" <td>
|
954 |
-
" <td>
|
955 |
-
" <td>
|
956 |
-
" <td>1.4</td>\n",
|
957 |
-
" <td>1.4</td>\n",
|
958 |
" <td>0.0</td>\n",
|
959 |
-
" <td>61.0</td>\n",
|
960 |
-
" <td>100.0</td>\n",
|
961 |
-
" <td>19.5</td>\n",
|
962 |
-
" <td>37.1</td>\n",
|
963 |
-
" </tr>\n",
|
964 |
-
" <tr>\n",
|
965 |
-
" <th>117</th>\n",
|
966 |
-
" <td>1715029200000</td>\n",
|
967 |
-
" <td>2024-05-06 21:00:00</td>\n",
|
968 |
-
" <td>2024-05-06</td>\n",
|
969 |
-
" <td>21</td>\n",
|
970 |
-
" <td>9.5</td>\n",
|
971 |
-
" <td>88.0</td>\n",
|
972 |
-
" <td>1.4</td>\n",
|
973 |
-
" <td>1.4</td>\n",
|
974 |
" <td>0.0</td>\n",
|
975 |
-
" <td>61.0</td>\n",
|
976 |
-
" <td>100.0</td>\n",
|
977 |
-
" <td>21.6</td>\n",
|
978 |
-
" <td>42.1</td>\n",
|
979 |
-
" </tr>\n",
|
980 |
-
" <tr>\n",
|
981 |
-
" <th>118</th>\n",
|
982 |
-
" <td>1715032800000</td>\n",
|
983 |
-
" <td>2024-05-06 22:00:00</td>\n",
|
984 |
-
" <td>2024-05-06</td>\n",
|
985 |
-
" <td>22</td>\n",
|
986 |
-
" <td>9.3</td>\n",
|
987 |
-
" <td>86.0</td>\n",
|
988 |
-
" <td>0.6</td>\n",
|
989 |
-
" <td>0.6</td>\n",
|
990 |
" <td>0.0</td>\n",
|
991 |
-
" <td>3.0</td>\n",
|
992 |
-
" <td>100.0</td>\n",
|
993 |
-
" <td>22.0</td>\n",
|
994 |
-
" <td>41.0</td>\n",
|
995 |
-
" </tr>\n",
|
996 |
-
" <tr>\n",
|
997 |
-
" <th>119</th>\n",
|
998 |
-
" <td>1715036400000</td>\n",
|
999 |
-
" <td>2024-05-06 23:00:00</td>\n",
|
1000 |
-
" <td>2024-05-06</td>\n",
|
1001 |
-
" <td>23</td>\n",
|
1002 |
-
" <td>9.1</td>\n",
|
1003 |
-
" <td>84.0</td>\n",
|
1004 |
-
" <td>0.6</td>\n",
|
1005 |
-
" <td>0.6</td>\n",
|
1006 |
" <td>0.0</td>\n",
|
1007 |
-
" <td>
|
1008 |
-
" <td>
|
1009 |
-
" <td>
|
1010 |
-
" <td>40.3</td>\n",
|
1011 |
" </tr>\n",
|
1012 |
" </tbody>\n",
|
1013 |
"</table>\n",
|
|
|
1014 |
"</div>"
|
1015 |
],
|
1016 |
"text/plain": [
|
1017 |
" timestamp datetime date hour temperature_2m \\\n",
|
1018 |
-
"
|
1019 |
-
"
|
1020 |
-
"
|
1021 |
-
"
|
1022 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
1023 |
"\n",
|
1024 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
1025 |
-
"
|
1026 |
-
"
|
1027 |
-
"
|
1028 |
-
"
|
1029 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
1030 |
"\n",
|
1031 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
1032 |
-
"
|
1033 |
-
"
|
1034 |
-
"
|
1035 |
-
"
|
1036 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1037 |
]
|
1038 |
},
|
1039 |
-
"execution_count":
|
1040 |
"metadata": {},
|
1041 |
"output_type": "execute_result"
|
1042 |
}
|
1043 |
],
|
1044 |
"source": [
|
1045 |
-
"
|
|
|
1046 |
]
|
1047 |
},
|
1048 |
{
|
@@ -1056,17 +680,16 @@
|
|
1056 |
},
|
1057 |
{
|
1058 |
"cell_type": "code",
|
1059 |
-
"execution_count":
|
1060 |
"metadata": {},
|
1061 |
"outputs": [
|
1062 |
{
|
1063 |
"name": "stdout",
|
1064 |
"output_type": "stream",
|
1065 |
"text": [
|
1066 |
-
"Connection closed.\n",
|
1067 |
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
1068 |
"\n",
|
1069 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/
|
1070 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
1071 |
]
|
1072 |
}
|
@@ -1084,7 +707,7 @@
|
|
1084 |
},
|
1085 |
{
|
1086 |
"cell_type": "code",
|
1087 |
-
"execution_count":
|
1088 |
"metadata": {},
|
1089 |
"outputs": [],
|
1090 |
"source": [
|
@@ -1094,11 +717,6 @@
|
|
1094 |
" version=1,\n",
|
1095 |
")\n",
|
1096 |
"\n",
|
1097 |
-
"# forecast_renewable_energy_fg = fs.get_feature_group(\n",
|
1098 |
-
"# name=\"forecast_renewable_energy\",\n",
|
1099 |
-
"# version=1,\n",
|
1100 |
-
"# )\n",
|
1101 |
-
"\n",
|
1102 |
"weather_fg = fs.get_feature_group(\n",
|
1103 |
" name=\"weather_measurements\",\n",
|
1104 |
" version=1,\n",
|
@@ -1115,15 +733,22 @@
|
|
1115 |
},
|
1116 |
{
|
1117 |
"cell_type": "code",
|
1118 |
-
"execution_count":
|
1119 |
"metadata": {},
|
1120 |
"outputs": [
|
1121 |
{
|
1122 |
-
"
|
1123 |
-
|
1124 |
-
|
1125 |
-
|
1126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1127 |
},
|
1128 |
{
|
1129 |
"name": "stdout",
|
@@ -1131,16 +756,16 @@
|
|
1131 |
"text": [
|
1132 |
"Launching job: electricity_prices_1_offline_fg_materialization\n",
|
1133 |
"Job started successfully, you can follow the progress at \n",
|
1134 |
-
"https://c.app.hopsworks.ai/p/
|
1135 |
]
|
1136 |
},
|
1137 |
{
|
1138 |
"data": {
|
1139 |
"text/plain": [
|
1140 |
-
"(<hsfs.core.job.Job at
|
1141 |
]
|
1142 |
},
|
1143 |
-
"execution_count":
|
1144 |
"metadata": {},
|
1145 |
"output_type": "execute_result"
|
1146 |
}
|
@@ -1153,26 +778,22 @@
|
|
1153 |
},
|
1154 |
{
|
1155 |
"cell_type": "code",
|
1156 |
-
"execution_count":
|
1157 |
-
"metadata": {},
|
1158 |
-
"outputs": [],
|
1159 |
-
"source": [
|
1160 |
-
"# # Inserting the forecast_renewable_energy_df into the feature group named forecast_renewable_energy_fg\n",
|
1161 |
-
"# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df, \n",
|
1162 |
-
"# write_options={\"wait_for_job\" : False})"
|
1163 |
-
]
|
1164 |
-
},
|
1165 |
-
{
|
1166 |
-
"cell_type": "code",
|
1167 |
-
"execution_count": 34,
|
1168 |
"metadata": {},
|
1169 |
"outputs": [
|
1170 |
{
|
1171 |
-
"
|
1172 |
-
|
1173 |
-
|
1174 |
-
|
1175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1176 |
},
|
1177 |
{
|
1178 |
"name": "stdout",
|
@@ -1180,16 +801,16 @@
|
|
1180 |
"text": [
|
1181 |
"Launching job: weather_measurements_1_offline_fg_materialization\n",
|
1182 |
"Job started successfully, you can follow the progress at \n",
|
1183 |
-
"https://c.app.hopsworks.ai/p/
|
1184 |
]
|
1185 |
},
|
1186 |
{
|
1187 |
"data": {
|
1188 |
"text/plain": [
|
1189 |
-
"(<hsfs.core.job.Job at
|
1190 |
]
|
1191 |
},
|
1192 |
-
"execution_count":
|
1193 |
"metadata": {},
|
1194 |
"output_type": "execute_result"
|
1195 |
}
|
|
|
27 |
},
|
28 |
{
|
29 |
"cell_type": "code",
|
30 |
+
"execution_count": 1,
|
31 |
"metadata": {},
|
32 |
"outputs": [
|
33 |
{
|
34 |
"name": "stdout",
|
35 |
"output_type": "stream",
|
36 |
"text": [
|
37 |
+
"/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-\n",
|
38 |
+
"/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-/notebooks\n"
|
39 |
]
|
40 |
}
|
41 |
],
|
|
|
53 |
},
|
54 |
{
|
55 |
"cell_type": "code",
|
56 |
+
"execution_count": 2,
|
57 |
"metadata": {},
|
58 |
"outputs": [],
|
59 |
"source": [
|
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 3,
|
92 |
"metadata": {},
|
93 |
"outputs": [],
|
94 |
"source": [
|
|
|
101 |
},
|
102 |
{
|
103 |
"cell_type": "code",
|
104 |
+
"execution_count": 4,
|
105 |
"metadata": {},
|
106 |
"outputs": [
|
107 |
{
|
|
|
135 |
" <tbody>\n",
|
136 |
" <tr>\n",
|
137 |
" <th>0</th>\n",
|
138 |
+
" <td>1714694400000</td>\n",
|
139 |
+
" <td>2024-05-03 00:00:00</td>\n",
|
140 |
+
" <td>2024-05-03</td>\n",
|
141 |
" <td>0</td>\n",
|
142 |
+
" <td>0.22214</td>\n",
|
143 |
" </tr>\n",
|
144 |
" <tr>\n",
|
145 |
" <th>1</th>\n",
|
146 |
+
" <td>1714698000000</td>\n",
|
147 |
+
" <td>2024-05-03 01:00:00</td>\n",
|
148 |
+
" <td>2024-05-03</td>\n",
|
149 |
" <td>1</td>\n",
|
150 |
+
" <td>0.21893</td>\n",
|
151 |
" </tr>\n",
|
152 |
" <tr>\n",
|
153 |
" <th>2</th>\n",
|
154 |
+
" <td>1714701600000</td>\n",
|
155 |
+
" <td>2024-05-03 02:00:00</td>\n",
|
156 |
+
" <td>2024-05-03</td>\n",
|
157 |
" <td>2</td>\n",
|
158 |
+
" <td>0.22348</td>\n",
|
159 |
" </tr>\n",
|
160 |
" <tr>\n",
|
161 |
" <th>3</th>\n",
|
162 |
+
" <td>1714705200000</td>\n",
|
163 |
+
" <td>2024-05-03 03:00:00</td>\n",
|
164 |
+
" <td>2024-05-03</td>\n",
|
165 |
" <td>3</td>\n",
|
166 |
+
" <td>0.22385</td>\n",
|
167 |
" </tr>\n",
|
168 |
" <tr>\n",
|
169 |
" <th>4</th>\n",
|
170 |
+
" <td>1714708800000</td>\n",
|
171 |
+
" <td>2024-05-03 04:00:00</td>\n",
|
172 |
+
" <td>2024-05-03</td>\n",
|
173 |
" <td>4</td>\n",
|
174 |
+
" <td>0.22706</td>\n",
|
175 |
" </tr>\n",
|
176 |
" <tr>\n",
|
177 |
" <th>5</th>\n",
|
178 |
+
" <td>1714712400000</td>\n",
|
179 |
+
" <td>2024-05-03 05:00:00</td>\n",
|
180 |
+
" <td>2024-05-03</td>\n",
|
181 |
" <td>5</td>\n",
|
182 |
+
" <td>0.23825</td>\n",
|
183 |
" </tr>\n",
|
184 |
" <tr>\n",
|
185 |
" <th>6</th>\n",
|
186 |
+
" <td>1714716000000</td>\n",
|
187 |
+
" <td>2024-05-03 06:00:00</td>\n",
|
188 |
+
" <td>2024-05-03</td>\n",
|
189 |
" <td>6</td>\n",
|
190 |
+
" <td>0.26167</td>\n",
|
191 |
" </tr>\n",
|
192 |
" <tr>\n",
|
193 |
" <th>7</th>\n",
|
194 |
+
" <td>1714719600000</td>\n",
|
195 |
+
" <td>2024-05-03 07:00:00</td>\n",
|
196 |
+
" <td>2024-05-03</td>\n",
|
197 |
" <td>7</td>\n",
|
198 |
+
" <td>0.32045</td>\n",
|
199 |
" </tr>\n",
|
200 |
" <tr>\n",
|
201 |
" <th>8</th>\n",
|
202 |
+
" <td>1714723200000</td>\n",
|
203 |
+
" <td>2024-05-03 08:00:00</td>\n",
|
204 |
+
" <td>2024-05-03</td>\n",
|
205 |
" <td>8</td>\n",
|
206 |
+
" <td>0.31881</td>\n",
|
207 |
" </tr>\n",
|
208 |
" <tr>\n",
|
209 |
" <th>9</th>\n",
|
210 |
+
" <td>1714726800000</td>\n",
|
211 |
+
" <td>2024-05-03 09:00:00</td>\n",
|
212 |
+
" <td>2024-05-03</td>\n",
|
213 |
" <td>9</td>\n",
|
214 |
+
" <td>0.28860</td>\n",
|
215 |
" </tr>\n",
|
216 |
" <tr>\n",
|
217 |
" <th>10</th>\n",
|
218 |
+
" <td>1714730400000</td>\n",
|
219 |
+
" <td>2024-05-03 10:00:00</td>\n",
|
220 |
+
" <td>2024-05-03</td>\n",
|
221 |
" <td>10</td>\n",
|
222 |
+
" <td>0.28413</td>\n",
|
223 |
" </tr>\n",
|
224 |
" <tr>\n",
|
225 |
" <th>11</th>\n",
|
226 |
+
" <td>1714734000000</td>\n",
|
227 |
+
" <td>2024-05-03 11:00:00</td>\n",
|
228 |
+
" <td>2024-05-03</td>\n",
|
229 |
" <td>11</td>\n",
|
230 |
+
" <td>0.25339</td>\n",
|
231 |
" </tr>\n",
|
232 |
" <tr>\n",
|
233 |
" <th>12</th>\n",
|
234 |
+
" <td>1714737600000</td>\n",
|
235 |
+
" <td>2024-05-03 12:00:00</td>\n",
|
236 |
+
" <td>2024-05-03</td>\n",
|
237 |
" <td>12</td>\n",
|
238 |
+
" <td>0.25324</td>\n",
|
239 |
" </tr>\n",
|
240 |
" <tr>\n",
|
241 |
" <th>13</th>\n",
|
242 |
+
" <td>1714741200000</td>\n",
|
243 |
+
" <td>2024-05-03 13:00:00</td>\n",
|
244 |
+
" <td>2024-05-03</td>\n",
|
245 |
" <td>13</td>\n",
|
246 |
+
" <td>0.24325</td>\n",
|
247 |
" </tr>\n",
|
248 |
" <tr>\n",
|
249 |
" <th>14</th>\n",
|
250 |
+
" <td>1714744800000</td>\n",
|
251 |
+
" <td>2024-05-03 14:00:00</td>\n",
|
252 |
+
" <td>2024-05-03</td>\n",
|
253 |
" <td>14</td>\n",
|
254 |
+
" <td>0.23698</td>\n",
|
255 |
" </tr>\n",
|
256 |
" <tr>\n",
|
257 |
" <th>15</th>\n",
|
258 |
+
" <td>1714748400000</td>\n",
|
259 |
+
" <td>2024-05-03 15:00:00</td>\n",
|
260 |
+
" <td>2024-05-03</td>\n",
|
261 |
" <td>15</td>\n",
|
262 |
+
" <td>0.22751</td>\n",
|
263 |
" </tr>\n",
|
264 |
" <tr>\n",
|
265 |
" <th>16</th>\n",
|
266 |
+
" <td>1714752000000</td>\n",
|
267 |
+
" <td>2024-05-03 16:00:00</td>\n",
|
268 |
+
" <td>2024-05-03</td>\n",
|
269 |
" <td>16</td>\n",
|
270 |
+
" <td>0.22676</td>\n",
|
271 |
" </tr>\n",
|
272 |
" <tr>\n",
|
273 |
" <th>17</th>\n",
|
274 |
+
" <td>1714755600000</td>\n",
|
275 |
+
" <td>2024-05-03 17:00:00</td>\n",
|
276 |
+
" <td>2024-05-03</td>\n",
|
277 |
" <td>17</td>\n",
|
278 |
+
" <td>0.34283</td>\n",
|
279 |
" </tr>\n",
|
280 |
" <tr>\n",
|
281 |
" <th>18</th>\n",
|
282 |
+
" <td>1714759200000</td>\n",
|
283 |
+
" <td>2024-05-03 18:00:00</td>\n",
|
284 |
+
" <td>2024-05-03</td>\n",
|
285 |
" <td>18</td>\n",
|
286 |
+
" <td>0.60010</td>\n",
|
287 |
" </tr>\n",
|
288 |
" <tr>\n",
|
289 |
" <th>19</th>\n",
|
290 |
+
" <td>1714762800000</td>\n",
|
291 |
+
" <td>2024-05-03 19:00:00</td>\n",
|
292 |
+
" <td>2024-05-03</td>\n",
|
293 |
" <td>19</td>\n",
|
294 |
+
" <td>0.72356</td>\n",
|
295 |
" </tr>\n",
|
296 |
" <tr>\n",
|
297 |
" <th>20</th>\n",
|
298 |
+
" <td>1714766400000</td>\n",
|
299 |
+
" <td>2024-05-03 20:00:00</td>\n",
|
300 |
+
" <td>2024-05-03</td>\n",
|
301 |
" <td>20</td>\n",
|
302 |
+
" <td>0.82068</td>\n",
|
303 |
" </tr>\n",
|
304 |
" <tr>\n",
|
305 |
" <th>21</th>\n",
|
306 |
+
" <td>1714770000000</td>\n",
|
307 |
+
" <td>2024-05-03 21:00:00</td>\n",
|
308 |
+
" <td>2024-05-03</td>\n",
|
309 |
" <td>21</td>\n",
|
310 |
+
" <td>0.78524</td>\n",
|
311 |
" </tr>\n",
|
312 |
" <tr>\n",
|
313 |
" <th>22</th>\n",
|
314 |
+
" <td>1714773600000</td>\n",
|
315 |
+
" <td>2024-05-03 22:00:00</td>\n",
|
316 |
+
" <td>2024-05-03</td>\n",
|
317 |
" <td>22</td>\n",
|
318 |
+
" <td>0.68119</td>\n",
|
319 |
" </tr>\n",
|
320 |
" <tr>\n",
|
321 |
" <th>23</th>\n",
|
322 |
+
" <td>1714777200000</td>\n",
|
323 |
+
" <td>2024-05-03 23:00:00</td>\n",
|
324 |
+
" <td>2024-05-03</td>\n",
|
325 |
" <td>23</td>\n",
|
326 |
+
" <td>0.63822</td>\n",
|
327 |
" </tr>\n",
|
328 |
" </tbody>\n",
|
329 |
"</table>\n",
|
|
|
331 |
],
|
332 |
"text/plain": [
|
333 |
" timestamp datetime date hour dk1_spotpricedkk_kwh\n",
|
334 |
+
"0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 0.22214\n",
|
335 |
+
"1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 0.21893\n",
|
336 |
+
"2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 0.22348\n",
|
337 |
+
"3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 0.22385\n",
|
338 |
+
"4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 0.22706\n",
|
339 |
+
"5 1714712400000 2024-05-03 05:00:00 2024-05-03 5 0.23825\n",
|
340 |
+
"6 1714716000000 2024-05-03 06:00:00 2024-05-03 6 0.26167\n",
|
341 |
+
"7 1714719600000 2024-05-03 07:00:00 2024-05-03 7 0.32045\n",
|
342 |
+
"8 1714723200000 2024-05-03 08:00:00 2024-05-03 8 0.31881\n",
|
343 |
+
"9 1714726800000 2024-05-03 09:00:00 2024-05-03 9 0.28860\n",
|
344 |
+
"10 1714730400000 2024-05-03 10:00:00 2024-05-03 10 0.28413\n",
|
345 |
+
"11 1714734000000 2024-05-03 11:00:00 2024-05-03 11 0.25339\n",
|
346 |
+
"12 1714737600000 2024-05-03 12:00:00 2024-05-03 12 0.25324\n",
|
347 |
+
"13 1714741200000 2024-05-03 13:00:00 2024-05-03 13 0.24325\n",
|
348 |
+
"14 1714744800000 2024-05-03 14:00:00 2024-05-03 14 0.23698\n",
|
349 |
+
"15 1714748400000 2024-05-03 15:00:00 2024-05-03 15 0.22751\n",
|
350 |
+
"16 1714752000000 2024-05-03 16:00:00 2024-05-03 16 0.22676\n",
|
351 |
+
"17 1714755600000 2024-05-03 17:00:00 2024-05-03 17 0.34283\n",
|
352 |
+
"18 1714759200000 2024-05-03 18:00:00 2024-05-03 18 0.60010\n",
|
353 |
+
"19 1714762800000 2024-05-03 19:00:00 2024-05-03 19 0.72356\n",
|
354 |
+
"20 1714766400000 2024-05-03 20:00:00 2024-05-03 20 0.82068\n",
|
355 |
+
"21 1714770000000 2024-05-03 21:00:00 2024-05-03 21 0.78524\n",
|
356 |
+
"22 1714773600000 2024-05-03 22:00:00 2024-05-03 22 0.68119\n",
|
357 |
+
"23 1714777200000 2024-05-03 23:00:00 2024-05-03 23 0.63822"
|
358 |
]
|
359 |
},
|
360 |
+
"execution_count": 4,
|
361 |
"metadata": {},
|
362 |
"output_type": "execute_result"
|
363 |
}
|
|
|
367 |
"electricity_df"
|
368 |
]
|
369 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
{
|
371 |
"cell_type": "markdown",
|
372 |
"metadata": {},
|
|
|
378 |
"cell_type": "markdown",
|
379 |
"metadata": {},
|
380 |
"source": [
|
381 |
+
"#### <span style=\"color:#2656a3;\"> 🌈 Forecast Weather Measures"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
]
|
383 |
},
|
384 |
{
|
385 |
"cell_type": "code",
|
386 |
+
"execution_count": 5,
|
387 |
"metadata": {},
|
388 |
"outputs": [],
|
389 |
"source": [
|
|
|
395 |
},
|
396 |
{
|
397 |
"cell_type": "code",
|
398 |
+
"execution_count": 6,
|
399 |
"metadata": {},
|
400 |
"outputs": [
|
401 |
{
|
|
|
437 |
" <tbody>\n",
|
438 |
" <tr>\n",
|
439 |
" <th>0</th>\n",
|
440 |
+
" <td>1714694400000</td>\n",
|
441 |
+
" <td>2024-05-03 00:00:00</td>\n",
|
442 |
+
" <td>2024-05-03</td>\n",
|
443 |
" <td>0</td>\n",
|
444 |
+
" <td>14.3</td>\n",
|
445 |
+
" <td>65.0</td>\n",
|
446 |
" <td>0.0</td>\n",
|
447 |
" <td>0.0</td>\n",
|
448 |
" <td>0.0</td>\n",
|
449 |
+
" <td>1.0</td>\n",
|
450 |
+
" <td>25.0</td>\n",
|
451 |
+
" <td>20.5</td>\n",
|
452 |
+
" <td>36.0</td>\n",
|
453 |
" </tr>\n",
|
454 |
" <tr>\n",
|
455 |
" <th>1</th>\n",
|
456 |
+
" <td>1714698000000</td>\n",
|
457 |
+
" <td>2024-05-03 01:00:00</td>\n",
|
458 |
+
" <td>2024-05-03</td>\n",
|
459 |
" <td>1</td>\n",
|
460 |
+
" <td>13.6</td>\n",
|
461 |
+
" <td>69.0</td>\n",
|
462 |
" <td>0.0</td>\n",
|
463 |
" <td>0.0</td>\n",
|
464 |
" <td>0.0</td>\n",
|
465 |
" <td>0.0</td>\n",
|
466 |
+
" <td>12.0</td>\n",
|
467 |
+
" <td>21.6</td>\n",
|
468 |
+
" <td>37.4</td>\n",
|
469 |
" </tr>\n",
|
470 |
" <tr>\n",
|
471 |
" <th>2</th>\n",
|
472 |
+
" <td>1714701600000</td>\n",
|
473 |
+
" <td>2024-05-03 02:00:00</td>\n",
|
474 |
+
" <td>2024-05-03</td>\n",
|
475 |
" <td>2</td>\n",
|
476 |
+
" <td>13.0</td>\n",
|
477 |
+
" <td>72.0</td>\n",
|
478 |
" <td>0.0</td>\n",
|
479 |
" <td>0.0</td>\n",
|
480 |
" <td>0.0</td>\n",
|
481 |
+
" <td>0.0</td>\n",
|
482 |
+
" <td>7.0</td>\n",
|
483 |
+
" <td>20.9</td>\n",
|
484 |
+
" <td>37.4</td>\n",
|
485 |
" </tr>\n",
|
486 |
" <tr>\n",
|
487 |
" <th>3</th>\n",
|
488 |
+
" <td>1714705200000</td>\n",
|
489 |
+
" <td>2024-05-03 03:00:00</td>\n",
|
490 |
+
" <td>2024-05-03</td>\n",
|
491 |
" <td>3</td>\n",
|
492 |
+
" <td>12.7</td>\n",
|
493 |
+
" <td>73.0</td>\n",
|
494 |
+
" <td>0.0</td>\n",
|
495 |
+
" <td>0.0</td>\n",
|
496 |
" <td>0.0</td>\n",
|
497 |
+
" <td>1.0</td>\n",
|
498 |
+
" <td>26.0</td>\n",
|
499 |
+
" <td>19.8</td>\n",
|
500 |
+
" <td>34.6</td>\n",
|
501 |
" </tr>\n",
|
502 |
" <tr>\n",
|
503 |
" <th>4</th>\n",
|
504 |
+
" <td>1714708800000</td>\n",
|
505 |
+
" <td>2024-05-03 04:00:00</td>\n",
|
506 |
+
" <td>2024-05-03</td>\n",
|
507 |
" <td>4</td>\n",
|
508 |
+
" <td>12.4</td>\n",
|
509 |
" <td>73.0</td>\n",
|
510 |
" <td>0.0</td>\n",
|
511 |
" <td>0.0</td>\n",
|
512 |
" <td>0.0</td>\n",
|
513 |
" <td>2.0</td>\n",
|
514 |
+
" <td>54.0</td>\n",
|
515 |
+
" <td>18.7</td>\n",
|
516 |
+
" <td>33.8</td>\n",
|
517 |
" </tr>\n",
|
518 |
" <tr>\n",
|
519 |
" <th>...</th>\n",
|
|
|
533 |
" </tr>\n",
|
534 |
" <tr>\n",
|
535 |
" <th>115</th>\n",
|
536 |
+
" <td>1715108400000</td>\n",
|
537 |
+
" <td>2024-05-07 19:00:00</td>\n",
|
538 |
+
" <td>2024-05-07</td>\n",
|
539 |
" <td>19</td>\n",
|
540 |
+
" <td>12.0</td>\n",
|
541 |
+
" <td>41.0</td>\n",
|
|
|
|
|
542 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
543 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
" <td>0.0</td>\n",
|
547 |
+
" <td>4.2</td>\n",
|
548 |
+
" <td>10.8</td>\n",
|
|
|
|
|
549 |
" </tr>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
" <tr>\n",
|
551 |
+
" <th>116</th>\n",
|
552 |
+
" <td>1715112000000</td>\n",
|
553 |
+
" <td>2024-05-07 20:00:00</td>\n",
|
554 |
+
" <td>2024-05-07</td>\n",
|
555 |
+
" <td>20</td>\n",
|
556 |
+
" <td>10.7</td>\n",
|
557 |
+
" <td>49.0</td>\n",
|
558 |
" <td>0.0</td>\n",
|
559 |
" <td>0.0</td>\n",
|
560 |
" <td>0.0</td>\n",
|
561 |
" <td>0.0</td>\n",
|
562 |
+
" <td>0.0</td>\n",
|
563 |
+
" <td>3.6</td>\n",
|
564 |
+
" <td>8.3</td>\n",
|
565 |
" </tr>\n",
|
566 |
" <tr>\n",
|
567 |
+
" <th>117</th>\n",
|
568 |
+
" <td>1715115600000</td>\n",
|
569 |
+
" <td>2024-05-07 21:00:00</td>\n",
|
570 |
+
" <td>2024-05-07</td>\n",
|
571 |
+
" <td>21</td>\n",
|
572 |
+
" <td>9.6</td>\n",
|
573 |
+
" <td>56.0</td>\n",
|
|
|
|
|
574 |
" <td>0.0</td>\n",
|
575 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
576 |
" <td>0.0</td>\n",
|
577 |
" <td>0.0</td>\n",
|
578 |
" <td>0.0</td>\n",
|
579 |
+
" <td>3.2</td>\n",
|
580 |
+
" <td>5.4</td>\n",
|
|
|
|
|
581 |
" </tr>\n",
|
582 |
" <tr>\n",
|
583 |
+
" <th>118</th>\n",
|
584 |
+
" <td>1715119200000</td>\n",
|
585 |
+
" <td>2024-05-07 22:00:00</td>\n",
|
586 |
+
" <td>2024-05-07</td>\n",
|
587 |
+
" <td>22</td>\n",
|
588 |
+
" <td>8.7</td>\n",
|
589 |
+
" <td>58.0</td>\n",
|
|
|
|
|
590 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
591 |
" <td>0.0</td>\n",
|
592 |
" <td>0.0</td>\n",
|
593 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
594 |
" <td>0.0</td>\n",
|
595 |
+
" <td>3.3</td>\n",
|
596 |
+
" <td>5.8</td>\n",
|
|
|
|
|
597 |
" </tr>\n",
|
598 |
" <tr>\n",
|
599 |
+
" <th>119</th>\n",
|
600 |
+
" <td>1715122800000</td>\n",
|
601 |
+
" <td>2024-05-07 23:00:00</td>\n",
|
602 |
+
" <td>2024-05-07</td>\n",
|
603 |
+
" <td>23</td>\n",
|
604 |
+
" <td>7.9</td>\n",
|
605 |
+
" <td>57.0</td>\n",
|
|
|
|
|
606 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
608 |
" <td>0.0</td>\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
609 |
" <td>0.0</td>\n",
|
610 |
+
" <td>0.0</td>\n",
|
611 |
+
" <td>3.8</td>\n",
|
612 |
+
" <td>6.5</td>\n",
|
|
|
613 |
" </tr>\n",
|
614 |
" </tbody>\n",
|
615 |
"</table>\n",
|
616 |
+
"<p>120 rows × 13 columns</p>\n",
|
617 |
"</div>"
|
618 |
],
|
619 |
"text/plain": [
|
620 |
" timestamp datetime date hour temperature_2m \\\n",
|
621 |
+
"0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 14.3 \n",
|
622 |
+
"1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 13.6 \n",
|
623 |
+
"2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 13.0 \n",
|
624 |
+
"3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 12.7 \n",
|
625 |
+
"4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 12.4 \n",
|
626 |
+
".. ... ... ... ... ... \n",
|
627 |
+
"115 1715108400000 2024-05-07 19:00:00 2024-05-07 19 12.0 \n",
|
628 |
+
"116 1715112000000 2024-05-07 20:00:00 2024-05-07 20 10.7 \n",
|
629 |
+
"117 1715115600000 2024-05-07 21:00:00 2024-05-07 21 9.6 \n",
|
630 |
+
"118 1715119200000 2024-05-07 22:00:00 2024-05-07 22 8.7 \n",
|
631 |
+
"119 1715122800000 2024-05-07 23:00:00 2024-05-07 23 7.9 \n",
|
632 |
"\n",
|
633 |
" relative_humidity_2m precipitation rain snowfall weather_code \\\n",
|
634 |
+
"0 65.0 0.0 0.0 0.0 1.0 \n",
|
635 |
+
"1 69.0 0.0 0.0 0.0 0.0 \n",
|
636 |
+
"2 72.0 0.0 0.0 0.0 0.0 \n",
|
637 |
+
"3 73.0 0.0 0.0 0.0 1.0 \n",
|
638 |
+
"4 73.0 0.0 0.0 0.0 2.0 \n",
|
639 |
+
".. ... ... ... ... ... \n",
|
640 |
+
"115 41.0 0.0 0.0 0.0 0.0 \n",
|
641 |
+
"116 49.0 0.0 0.0 0.0 0.0 \n",
|
642 |
+
"117 56.0 0.0 0.0 0.0 0.0 \n",
|
643 |
+
"118 58.0 0.0 0.0 0.0 0.0 \n",
|
644 |
+
"119 57.0 0.0 0.0 0.0 0.0 \n",
|
645 |
"\n",
|
646 |
" cloud_cover wind_speed_10m wind_gusts_10m \n",
|
647 |
+
"0 25.0 20.5 36.0 \n",
|
648 |
+
"1 12.0 21.6 37.4 \n",
|
649 |
+
"2 7.0 20.9 37.4 \n",
|
650 |
+
"3 26.0 19.8 34.6 \n",
|
651 |
+
"4 54.0 18.7 33.8 \n",
|
652 |
+
".. ... ... ... \n",
|
653 |
+
"115 0.0 4.2 10.8 \n",
|
654 |
+
"116 0.0 3.6 8.3 \n",
|
655 |
+
"117 0.0 3.2 5.4 \n",
|
656 |
+
"118 0.0 3.3 5.8 \n",
|
657 |
+
"119 0.0 3.8 6.5 \n",
|
658 |
+
"\n",
|
659 |
+
"[120 rows x 13 columns]"
|
660 |
]
|
661 |
},
|
662 |
+
"execution_count": 6,
|
663 |
"metadata": {},
|
664 |
"output_type": "execute_result"
|
665 |
}
|
666 |
],
|
667 |
"source": [
|
668 |
+
"# Display the weather forecast dataframe\n",
|
669 |
+
"weather_forecast_df"
|
670 |
]
|
671 |
},
|
672 |
{
|
|
|
680 |
},
|
681 |
{
|
682 |
"cell_type": "code",
|
683 |
+
"execution_count": 7,
|
684 |
"metadata": {},
|
685 |
"outputs": [
|
686 |
{
|
687 |
"name": "stdout",
|
688 |
"output_type": "stream",
|
689 |
"text": [
|
|
|
690 |
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
691 |
"\n",
|
692 |
+
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/554133\n",
|
693 |
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
694 |
]
|
695 |
}
|
|
|
707 |
},
|
708 |
{
|
709 |
"cell_type": "code",
|
710 |
+
"execution_count": 8,
|
711 |
"metadata": {},
|
712 |
"outputs": [],
|
713 |
"source": [
|
|
|
717 |
" version=1,\n",
|
718 |
")\n",
|
719 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
720 |
"weather_fg = fs.get_feature_group(\n",
|
721 |
" name=\"weather_measurements\",\n",
|
722 |
" version=1,\n",
|
|
|
733 |
},
|
734 |
{
|
735 |
"cell_type": "code",
|
736 |
+
"execution_count": 9,
|
737 |
"metadata": {},
|
738 |
"outputs": [
|
739 |
{
|
740 |
+
"data": {
|
741 |
+
"application/vnd.jupyter.widget-view+json": {
|
742 |
+
"model_id": "14af0030d68542cdae43f516e0e0f7a7",
|
743 |
+
"version_major": 2,
|
744 |
+
"version_minor": 0
|
745 |
+
},
|
746 |
+
"text/plain": [
|
747 |
+
"Uploading Dataframe: 0.00% | | Rows 0/24 | Elapsed Time: 00:00 | Remaining Time: ?"
|
748 |
+
]
|
749 |
+
},
|
750 |
+
"metadata": {},
|
751 |
+
"output_type": "display_data"
|
752 |
},
|
753 |
{
|
754 |
"name": "stdout",
|
|
|
756 |
"text": [
|
757 |
"Launching job: electricity_prices_1_offline_fg_materialization\n",
|
758 |
"Job started successfully, you can follow the progress at \n",
|
759 |
+
"https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
|
760 |
]
|
761 |
},
|
762 |
{
|
763 |
"data": {
|
764 |
"text/plain": [
|
765 |
+
"(<hsfs.core.job.Job at 0x3058ab890>, None)"
|
766 |
]
|
767 |
},
|
768 |
+
"execution_count": 9,
|
769 |
"metadata": {},
|
770 |
"output_type": "execute_result"
|
771 |
}
|
|
|
778 |
},
|
779 |
{
|
780 |
"cell_type": "code",
|
781 |
+
"execution_count": 10,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
"metadata": {},
|
783 |
"outputs": [
|
784 |
{
|
785 |
+
"data": {
|
786 |
+
"application/vnd.jupyter.widget-view+json": {
|
787 |
+
"model_id": "81dfae8d4c1942aaba0d1b0ff7917720",
|
788 |
+
"version_major": 2,
|
789 |
+
"version_minor": 0
|
790 |
+
},
|
791 |
+
"text/plain": [
|
792 |
+
"Uploading Dataframe: 0.00% | | Rows 0/120 | Elapsed Time: 00:00 | Remaining Time: ?"
|
793 |
+
]
|
794 |
+
},
|
795 |
+
"metadata": {},
|
796 |
+
"output_type": "display_data"
|
797 |
},
|
798 |
{
|
799 |
"name": "stdout",
|
|
|
801 |
"text": [
|
802 |
"Launching job: weather_measurements_1_offline_fg_materialization\n",
|
803 |
"Job started successfully, you can follow the progress at \n",
|
804 |
+
"https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
|
805 |
]
|
806 |
},
|
807 |
{
|
808 |
"data": {
|
809 |
"text/plain": [
|
810 |
+
"(<hsfs.core.job.Job at 0x3058f5d10>, None)"
|
811 |
]
|
812 |
},
|
813 |
+
"execution_count": 10,
|
814 |
"metadata": {},
|
815 |
"output_type": "execute_result"
|
816 |
}
|
notebooks/3_training_pipeline.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/4_batch_inference.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/model/dk_electricity_model.pkl
CHANGED
Binary files a/notebooks/model/dk_electricity_model.pkl and b/notebooks/model/dk_electricity_model.pkl differ
|
|
notebooks/test.ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
scripts/run_feature_and_prediction_pipelines.sh
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
set -e
|
4 |
+
|
5 |
+
cd notebooks
|
6 |
+
|
7 |
+
# Run the feature pipeline
|
8 |
+
jupyter nbconvert --to notebook --execute 2_feature_pipeline.ipynb
|
9 |
+
|
10 |
+
# Run the batch inference pipeline
|
11 |
+
jupyter nbconvert --to notebook --execute 4_batch_inference.ipynb
|