tobiasmj97 commited on
Commit
66accd2
·
1 Parent(s): a8eee80

changes to project

Browse files
.github/workflows/features-and-predictions.yml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: electricity-feature-and-prediction-pipelines
2
+
3
+ on:
4
+
5
+ # To run this workflow manually from the Actions tab
6
+ workflow_dispatch:
7
+
8
+ # Schedule the workflow to run at 23:50 everyday
9
+ schedule:
10
+ - cron: '50 23 * * *'
11
+
12
+ jobs:
13
+ test_schedule:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: checkout repo content
17
+ uses: actions/checkout@v2
18
+
19
+ - name: setup python
20
+ uses: actions/setup-python@v2
21
+ with:
22
+ python-version: '3.11.5'
23
+
24
+ - name: install python packages
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -r requirements.txt
28
+
29
+ - name: execute python workflows from bash script
30
+ env:
31
+ HOPSWORKS_API_KEY: ${{ secrets.HOPSWORKS_API_KEY }}
32
+ run: ./scripts/run_feature_and_prediction_pipelines.sh
33
+
34
+
35
+
.github/workflows/sync_to_hf.yml CHANGED
@@ -1,13 +1,17 @@
1
  name: Sync to Hugging Face hub
2
  on:
 
 
 
 
 
3
  schedule:
4
- - cron: '0 0 * * *' # At 00:00 everyday
 
 
5
  push:
6
  branches: [main]
7
 
8
- # to run this workflow manually from the Actions tab
9
- workflow_dispatch:
10
-
11
  jobs:
12
  sync-to-hub:
13
  runs-on: ubuntu-latest
 
1
  name: Sync to Hugging Face hub
2
  on:
3
+
4
+ # To run this workflow manually from the Actions tab
5
+ workflow_dispatch:
6
+
7
+ # Schedule the workflow to run at 23:59 everyday
8
  schedule:
9
+ - cron: '59 23 * * *'
10
+
11
+ # Push events to the main branch
12
  push:
13
  branches: [main]
14
 
 
 
 
15
  jobs:
16
  sync-to-hub:
17
  runs-on: ubuntu-latest
app.py CHANGED
@@ -19,8 +19,12 @@ from streamlit_folium import st_folium
19
  # This is the functions we have created to generate features for electricity prices and weather measures
20
  from features import electricity_prices, weather_measures, calendar
21
 
22
- def print_fancy_header(text, font_size=22, color="#ff5f27"):
23
- res = f'<span style="color:{color}; font-size: {font_size}px;">{text}</span>'
 
 
 
 
24
  st.markdown(res, unsafe_allow_html=True)
25
 
26
  # I want to cache this so streamlit would run much faster after restart (it restarts a lot)
@@ -47,67 +51,102 @@ def download_model(name="electricity_price_prediction_model",
47
  saved_model_dir = retrieved_model.download()
48
  return saved_model_dir
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
 
51
 
52
- def plot_price(df):
53
- # create figure with plotly express
54
- fig = px.line(df, x='date', y='dk1_spotpricedkk_kwh', color='type')
55
-
56
- # customize line colors and styles
57
- fig.update_traces(mode='lines+markers')
58
- fig.update_layout({
59
- 'plot_bgcolor': 'rgba(0, 0, 0, 0)',
60
- 'paper_bgcolor': 'rgba(0, 0, 0, 0)',
61
- 'legend_title': 'type',
62
- 'legend_font': {'size': 12},
63
- 'legend_bgcolor': 'rgba(0, 0, 0, 0)',
64
- 'xaxis': {'title': 'Date'},
65
- 'yaxis': {'title': 'dk1_spotpricedkk_kwh'},
66
- 'shapes': [{
67
- 'type': 'line',
68
- 'x0': datetime.datetime.now().strftime('%Y-%m-%d'),
69
- 'y0': 0,
70
- 'x1': datetime.datetime.now().strftime('%Y-%m-%d'),
71
- 'y1': df['dk1_spotpricedkk_kwh'].max(),
72
- 'line': {'color': 'red', 'width': 2, 'dash': 'dashdot'}
73
- }]
74
- })
75
-
76
- # show plot
77
- st.plotly_chart(fig, use_container_width=True)
78
-
79
- with open('data/calendar_incl_holiday.csv') as csv_file:
80
- target_days = csv.reader(csv_file)
81
 
82
  #########################
83
- st.title('🌫 Electricity Price Prediction 🌦')
 
 
 
 
 
 
 
 
 
 
84
 
85
  st.write(3 * "-")
86
- print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
87
-
88
- st.write("Logging... ")
89
- # (Attention! If the app has stopped at this step,
90
- # please enter your Hopsworks API Key in the commmand prompt.)
91
- project = hopsworks.login(project = "camillah", api_key_value=os.environ['HOPSWORKS_API_KEY'])
92
- fs = project.get_feature_store()
93
- st.write("✅ Logged in successfully!")
94
-
95
- # Retrieve the model registry
96
- mr = project.get_model_registry()
97
-
98
- # Retrieving the model from the Model Registry
99
- retrieved_model = mr.get_model(
100
- name="electricity_price_prediction_model",
101
- version=1,
102
  )
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- # Downloading the saved model to a local directory
105
- saved_model_dir = retrieved_model.download()
 
 
 
 
 
106
 
107
- # Loading the saved XGB model
108
- retrieved_xgboost_model = joblib.load(saved_model_dir + "/dk_electricity_model.pkl")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- st.write("✅ Model successfully loaded!")
111
 
112
  # I am going to load data for of last 60 days (for feature engineering)
113
  today = datetime.date.today()
@@ -115,70 +154,61 @@ date_threshold = today - datetime.timedelta(days=60)
115
 
116
  st.write(3 * "-")
117
  print_fancy_header('\n☁️ Retriving batch data from Feature Store...')
118
- # Fetching weather forecast measures for the next 5 days
119
- weather_forecast_df = weather_measures.forecast_weather_measures(
120
- forecast_length=5
121
- )
122
-
123
- # Fetching danish calendar
124
- calendar_df = calendar.get_calendar()
125
 
126
- # Merging the weather forecast and calendar dataframes
127
- new_data = pd.merge(weather_forecast_df, calendar_df, how='inner', left_on='date', right_on='date')
128
 
129
- st.write("New data:")
130
- st.write(new_data.sample(5))
131
 
132
- # Drop columns 'date', 'datetime', 'timestamp' from the DataFrame 'new_data'
133
- data = new_data.drop(columns=['date', 'datetime', 'timestamp'])
 
 
 
 
134
 
135
- predictions = retrieved_xgboost_model.predict(data)
136
-
137
- predictions_data = {
138
- 'prediction': predictions,
139
- 'time': new_data["datetime"],
140
- }
141
 
142
- # Create a DataFrame from the predictions data
143
- predictions_df = pd.DataFrame(predictions_data)
144
- predictions_df = predictions_df.sort_values(by='time')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- st.write("predictions_df:")
147
- st.write(predictions_df.sample(5))
148
 
 
 
 
149
 
150
- #########################
151
- st.write(3 * '-')
152
- st.write("\n")
153
 
154
- print_fancy_header('\n📈 Predictions Table for today and 4 days ahead')
 
 
155
 
156
- # Reshape the predictions data to a Table format, where each row represents a hour and each column a day
157
- table_df = predictions_df['prediction'].values.reshape(-1, 24)
158
- table_df = pd.DataFrame(table_df, columns=[f'{i}:00' for i in range(24)], index = [f'Day {i}' for i in range(table_df.shape[0])])
159
 
160
- st.write(table_df.T.style.set_properties(**{'width': '100%', 'max-width': 'none'}))
161
 
162
- #########################
163
- st.write(3 * '-')
164
- st.write("\n")
165
-
166
- # Create a slider for selecting the number of days to display
167
- num_hours = st.slider("Select number of hours to display", min_value=1, max_value=120, value=48)
168
-
169
- # Filter the predictions dataframe based on the selected number of days
170
- filtered_predictions_df = predictions_df.head(num_hours)
171
-
172
- # Create Altair chart with line and dots
173
- chart = alt.Chart(filtered_predictions_df).mark_line(point=True).encode(
174
- x='time:T',
175
- y='prediction:Q',
176
- tooltip=[alt.Tooltip('time:T', title='Date', format='%d-%m-%Y'),
177
- alt.Tooltip('time:T', title='Time', format='%H:%M'),
178
- alt.Tooltip('prediction:Q', title='Spot Price (DKK)', format='.2f')
179
- ]
180
- )
181
 
182
- # Display the chart
183
- st.altair_chart(chart, use_container_width=True)
184
 
 
19
  # This is the functions we have created to generate features for electricity prices and weather measures
20
  from features import electricity_prices, weather_measures, calendar
21
 
22
+ def print_fancy_header(text, font_width="bold", font_size=22, color="#2656a3"):
23
+ res = f'<span style="font-width:{font_width}; color:{color}; font-size:{font_size}px;">{text}</span>'
24
+ st.markdown(res, unsafe_allow_html=True)
25
+
26
+ def print_fancy_subheader(text, font_width="bold", font_size=22, color="#333"):
27
+ res = f'<span style="font-width:{font_width}; color:{color}; font-size:{font_size}px;">{text}</span>'
28
  st.markdown(res, unsafe_allow_html=True)
29
 
30
  # I want to cache this so streamlit would run much faster after restart (it restarts a lot)
 
51
  saved_model_dir = retrieved_model.download()
52
  return saved_model_dir
53
 
54
+ # with open('data/calendar_incl_holiday.csv') as csv_file:
55
+ # target_days = csv.reader(csv_file)
56
+
57
+ # Function to load the dataset
58
+ @st.cache_data # Cache the function to enhance performance
59
+ def load_data():
60
+ # Fetching weather forecast measures for the next 5 days
61
+ weather_forecast_df = weather_measures.forecast_weather_measures(
62
+ forecast_length=5
63
+ )
64
+
65
+ # Fetching danish calendar
66
+ calendar_df = calendar.get_calendar()
67
+
68
+ # Merging the weather forecast and calendar dataframes
69
+ new_data = pd.merge(weather_forecast_df, calendar_df, how='inner', left_on='date', right_on='date')
70
+
71
+ st.write("New data:")
72
+ st.write(new_data.sample(5))
73
+
74
+ # Drop columns 'date', 'datetime', 'timestamp' from the DataFrame 'new_data'
75
+ data = new_data.drop(columns=['date', 'datetime', 'timestamp'])
76
 
77
+ predictions = retrieved_xgboost_model.predict(data)
78
 
79
+ predictions_data = {
80
+ 'prediction': predictions,
81
+ 'time': new_data["datetime"],
82
+ }
83
+
84
+ predictions_df = pd.DataFrame(predictions_data).sort_values(by='time')
85
+
86
+ return predictions_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  #########################
89
+
90
+ progress_bar = st.sidebar.header('⚙️ Working Progress')
91
+ progress_bar = st.sidebar.progress(0)
92
+
93
+ # Title for the streamlit app
94
+ st.title('Electricity Price Prediction 🌦')
95
+
96
+ # Subtitle
97
+ st.markdown("""
98
+ Welcome to the electricity price predicter for DK1.
99
+ """)
100
 
101
  st.write(3 * "-")
102
+
103
+ with st.expander("📊 **Data Engineering and Machine Learning Operations in Business**"):
104
+ st.markdown("""
105
+ LEARNING OBJECTIVES
106
+ - Using our skills for designing, implementing, and managing data pipelines and ML systems.
107
+ - Focus on practical applications within a business context.
108
+ - Cover topics such as data ingestion, preprocessing, model deployment, monitoring, and maintenance.
109
+ - Emphasize industry best practices for effective operation of ML systems.
110
+ """
 
 
 
 
 
 
 
111
  )
112
+
113
+ with st.expander("📊 **This assigment**"):
114
+ st.markdown("""
115
+ The objective of this assignment is to build a prediction system that predicts the electricity prices in Denmark (area DK1) based on weather conditions, previous prices, and the Danish holidays.
116
+ """
117
+ )
118
+
119
+ with st.sidebar:
120
+ # st.write("This code will be printed to the sidebar.")
121
+
122
+ print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
123
 
124
+ st.write("Logging... ")
125
+ # please enter your Hopsworks API Key in the commmand prompt.)
126
+ # project = hopsworks.login(project = "camillah", api_key_value=os.environ['HOPSWORKS_API_KEY'])
127
+ project = hopsworks.login()
128
+ fs = project.get_feature_store()
129
+ progress_bar.progress(40)
130
+ st.write("✅ Logged in successfully!")
131
 
132
+ # Retrieve the model registry
133
+ mr = project.get_model_registry()
134
+
135
+ # Retrieving the model from the Model Registry
136
+ retrieved_model = mr.get_model(
137
+ name="electricity_price_prediction_model",
138
+ version=1,
139
+ )
140
+
141
+ # Downloading the saved model to a local directory
142
+ saved_model_dir = retrieved_model.download()
143
+
144
+ # Loading the saved XGB model
145
+ retrieved_xgboost_model = joblib.load(saved_model_dir + "/dk_electricity_model.pkl")
146
+
147
+ st.write("✅ Model successfully loaded!")
148
 
149
+ progress_bar.progress(80)
150
 
151
  # I am going to load data for of last 60 days (for feature engineering)
152
  today = datetime.date.today()
 
154
 
155
  st.write(3 * "-")
156
  print_fancy_header('\n☁️ Retriving batch data from Feature Store...')
 
 
 
 
 
 
 
157
 
158
+ predictions_df = load_data()
 
159
 
160
+ progress_bar.progress(100)
 
161
 
162
+ # Sidebar filter: Date range
163
+ min_value = 1
164
+ max_value = int(len(predictions_df['time'].unique()) / 24)
165
+ default = int(48 / 24)
166
+ date_range = st.sidebar.slider("Select Date Range", min_value=min_value, max_value=max_value, value=default)
167
+ filtered_predictions_df = predictions_df.head(date_range * 24)
168
 
169
+ visualization_option = st.selectbox(
170
+ "Select Visualization 🎨",
171
+ ["Matrix",
172
+ "Linechart"]
173
+ )
 
174
 
175
+ # Visualizations based on user selection
176
+ if visualization_option == "Matrix":
177
+ data = filtered_predictions_df
178
+ data['date'] = data['time'].dt.strftime('%Y-%m-%d')
179
+ data['time_of_day'] = data['time'].dt.strftime('%H:%M')
180
+ data.drop(columns=['time'], inplace=True)
181
+
182
+ # Pivot the DataFrame
183
+ pivot_df = data.pivot(index='time_of_day', columns='date', values='prediction')
184
+
185
+ st.write(pivot_df)
186
+
187
+ elif visualization_option == "Linechart":
188
+ # Create Altair chart with line and dots
189
+ chart = alt.Chart(filtered_predictions_df).mark_line(point=True).encode(
190
+ x='time:T',
191
+ y='prediction:Q',
192
+ tooltip=[alt.Tooltip('time:T', title='Date', format='%d-%m-%Y'),
193
+ alt.Tooltip('time:T', title='Time', format='%H:%M'),
194
+ alt.Tooltip('prediction:Q', title='Spot Price (DKK)', format='.2f')
195
+ ]
196
+ )
197
 
198
+ # Display the chart
199
+ st.altair_chart(chart, use_container_width=True)
200
 
201
+ # #########################
202
+ # st.write(3 * '-')
203
+ # st.write("\n")
204
 
205
+ # print_fancy_header('\n📈 Predictions Table for today and 4 days ahead')
 
 
206
 
207
+ # #########################
208
+ # st.write(3 * '-')
209
+ # st.write("\n")
210
 
 
 
 
211
 
 
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
 
 
214
 
features/__pycache__/calendar.cpython-311.pyc CHANGED
Binary files a/features/__pycache__/calendar.cpython-311.pyc and b/features/__pycache__/calendar.cpython-311.pyc differ
 
features/__pycache__/electricity_prices.cpython-311.pyc CHANGED
Binary files a/features/__pycache__/electricity_prices.cpython-311.pyc and b/features/__pycache__/electricity_prices.cpython-311.pyc differ
 
features/__pycache__/weather_measures.cpython-311.pyc CHANGED
Binary files a/features/__pycache__/weather_measures.cpython-311.pyc and b/features/__pycache__/weather_measures.cpython-311.pyc differ
 
features/calendar.py CHANGED
@@ -3,7 +3,7 @@ import numpy as np
3
  import pandas as pd
4
 
5
 
6
- def get_calendar() -> pd.DataFrame:
7
  """
8
  Fetches calendar for Denmark.
9
 
@@ -25,7 +25,7 @@ def get_calendar() -> pd.DataFrame:
25
  df['day'] = df['date_'].dt.day
26
  df['month'] = df['date_'].dt.month
27
  df['year'] = df['date_'].dt.year
28
- df['holiday'] = np.where(df['type'] == 'Not a Workday', 1, 0)
29
 
30
  # Drop the columns 'type' and 'date_' to finalize the calender dataframe
31
  calendar = df.drop(['type','date_'], axis=1)
 
3
  import pandas as pd
4
 
5
 
6
+ def dk_calendar() -> pd.DataFrame:
7
  """
8
  Fetches calendar for Denmark.
9
 
 
25
  df['day'] = df['date_'].dt.day
26
  df['month'] = df['date_'].dt.month
27
  df['year'] = df['date_'].dt.year
28
+ df['workday'] = np.where(df['type'] == 'Not a Workday', 0, 1)
29
 
30
  # Drop the columns 'type' and 'date_' to finalize the calender dataframe
31
  calendar = df.drop(['type','date_'], axis=1)
features/plots.py DELETED
@@ -1,208 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from datetime import date, timedelta
4
- import datetime
5
- from tqdm import tqdm
6
- import plotly.express as px
7
- import plotly.graph_objects as go
8
- import plotly.colors as pc
9
- from typing import List, Union, Optional, Tuple, Dict
10
-
11
-
12
- def plot_historical_id(ids_to_show: List[int], data: pd.DataFrame) -> go.Figure:
13
- """
14
- Plots time series data for a specified list of IDs.
15
-
16
- Parameters:
17
- - ids_to_show (list): A list of IDs for which time series data should be plotted.
18
- - data (pd.DataFrame): The DataFrame containing the data to be plotted, with columns ['date', 'id', 'price'].
19
-
20
- Returns:
21
- - Figure
22
- """
23
- # Filter the DataFrame to include only the specified IDs
24
- filtered_df = data[data['id'].isin(ids_to_show)]
25
-
26
- # Convert the 'date' column to datetime type
27
- filtered_df['date'] = pd.to_datetime(filtered_df['date'], format='%Y-%m-%d')
28
- filtered_df.sort_values('date', inplace=True)
29
-
30
- # Generate a colormap with distinct colors based on the number of unique IDs
31
- unique_ids = filtered_df['id'].unique()
32
- num_ids = len(unique_ids)
33
- colors = pc.qualitative.Set1 * (num_ids // len(pc.qualitative.Set1) + 1)
34
-
35
- # Create a dictionary to map IDs to colors
36
- color_map = dict(zip(unique_ids, colors[:num_ids]))
37
-
38
- # Create a time series plot using Plotly Express
39
- fig = px.line(
40
- filtered_df,
41
- x='date',
42
- y='price',
43
- color='id',
44
- title=f'Historical Prices for {ids_to_show} IDs',
45
- labels={'date': 'Date', 'price': 'Price'},
46
- line_group='id',
47
- color_discrete_map=color_map,
48
- )
49
-
50
- return fig
51
-
52
-
53
- def plot_prediction_test(
54
- id_to_show: int,
55
- X_train: pd.DataFrame,
56
- X_test: pd.DataFrame,
57
- y_train: Union[pd.Series, pd.DataFrame],
58
- y_test: Union[pd.Series, pd.DataFrame],
59
- train_date: pd.Series,
60
- test_date: pd.Series,
61
- predictions: Optional[pd.Series] = None
62
- ) -> go.Figure:
63
- """
64
- Plots a time series for a specific ID, showing training and test data on the same plot.
65
-
66
- Parameters:
67
- - id_to_show (int): The ID to be displayed in the plot.
68
- - X_train (pd.DataFrame): The feature data for the training set.
69
- - X_test (pd.DataFrame): The feature data for the test set.
70
- - y_train (pd.Series or pd.DataFrame): The target data for the training set.
71
- - y_test (pd.Series or pd.DataFrame): The target data for the test set.
72
- - train_date (pd.Series): The date column for the training data.
73
- - test_date (pd.Series): The date column for the test data.
74
- - predictions (pd.Series or None): Predicted values for the test data. Default is None.
75
-
76
- Returns:
77
- - Figure
78
- """
79
- # Combine features and target data for training and test sets
80
- train = pd.concat([train_date, X_train, y_train], axis=1)
81
- test = pd.concat([test_date, X_test, y_test], axis=1)
82
-
83
- # Filter and sort data for the specified ID
84
- train_sorted = train[train.id == id_to_show].sort_values('date')
85
- test_sorted = test[test.id == id_to_show].sort_values('date')
86
-
87
- # Create a Plotly figure
88
- fig = go.Figure()
89
-
90
- # Add a trace for training data (blue)
91
- fig.add_trace(go.Scatter(
92
- x=train_sorted['date'],
93
- y=train_sorted['price'],
94
- mode='lines',
95
- name='Training Data',
96
- line=dict(color='blue')
97
- ))
98
-
99
- # Add a trace for test data (red)
100
- fig.add_trace(go.Scatter(
101
- x=test_sorted['date'],
102
- y=test_sorted['price'],
103
- mode='lines',
104
- name='Test Data',
105
- line=dict(color='green')
106
- ))
107
-
108
- if predictions is not None:
109
- pred_df = pd.DataFrame()
110
- pred_df['date'] = test_sorted['date']
111
- pred_df['price'] = predictions
112
- fig.add_trace(go.Scatter(
113
- x=pred_df['date'],
114
- y=pred_df['price'],
115
- mode='lines',
116
- name='Prediction',
117
- line=dict(color='red')
118
- ))
119
-
120
-
121
- # Set X-axis range to span the entire date range from both training and test data
122
- fig.update_xaxes(range=[train_sorted['date'].min(), test_sorted['date'].max()])
123
-
124
- # Customize plot layout
125
- fig.update_layout(
126
- title=f'Time Series for the {id_to_show} ID',
127
- xaxis_title='Date',
128
- yaxis_title='Price',
129
- legend_title='Data Type'
130
- )
131
-
132
- return fig
133
-
134
-
135
- def plot_prediction(
136
- id_to_show: int,
137
- data: pd.DataFrame,
138
- week_ago: str,
139
- predictions: Optional[pd.Series] = None,
140
- ) -> go.Figure:
141
- """
142
- Display a time series plot for a specific ID, showcasing historical data, real prices, and predicted prices.
143
-
144
- Parameters:
145
- - id_to_show (int): The unique identifier for the data series to be displayed.
146
- - data (pd.DataFrame): A DataFrame containing time series data.
147
- - week_ago (str): A string representing a date one week ago (in 'YYYY-MM-DD' format).
148
- - predictions (pd.Series or None, optional): Predicted price values for the test data. Default is None.
149
-
150
- Returns:
151
- - fig (plotly.graph_objs.Figure): A Plotly figure object containing the generated time series plot.
152
- """
153
- data_sorted = data[data.id == id_to_show].sort_values('date')
154
- data_sorted['date'] = pd.to_datetime(data_sorted['date'])
155
-
156
- time_ago = (datetime.datetime.strptime(week_ago, '%Y-%m-%d') - timedelta(days=210)).strftime("%Y-%m-%d")
157
- data_historical = data_sorted.loc[
158
- (data_sorted['date'] <= datetime.datetime.strptime(week_ago, "%Y-%m-%d")) &
159
- (data_sorted['date'] >= datetime.datetime.strptime(time_ago, "%Y-%m-%d"))
160
- ]
161
- data_last_week = data_sorted[data_sorted.date > week_ago]
162
-
163
- # Create a Plotly figure
164
- fig = go.Figure()
165
-
166
- # Add a trace for training data (blue)
167
- fig.add_trace(go.Scatter(
168
- x=data_historical['date'],
169
- y=data_historical['price'],
170
- mode='lines',
171
- name='Historical Data',
172
- line=dict(color='blue')
173
- ))
174
-
175
- # Add a trace for test data (red)
176
- fig.add_trace(go.Scatter(
177
- x=data_last_week['date'],
178
- y=data_last_week['price'],
179
- mode='lines',
180
- name='Real Price',
181
- line=dict(color='green')
182
- ))
183
-
184
- if predictions is not None:
185
- pred_df = pd.DataFrame()
186
- pred_df['date'] = data_last_week['date']
187
- pred_df['price'] = predictions
188
- fig.add_trace(go.Scatter(
189
- x=pred_df['date'],
190
- y=pred_df['price'],
191
- mode='lines',
192
- name='Predicted Price',
193
- line=dict(color='red')
194
- ))
195
-
196
-
197
- # Set X-axis range to span the entire date range from both training and test data
198
- fig.update_xaxes(range=[data_historical['date'].min(), data_last_week['date'].max()])
199
-
200
- # Customize plot layout
201
- fig.update_layout(
202
- title=f'Predicted price for the {id_to_show} ID',
203
- xaxis_title='Date',
204
- yaxis_title='Price',
205
- legend_title='Data Type'
206
- )
207
-
208
- return fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/Old/1_feature_backfill_OLD.ipynb DELETED
@@ -1,1404 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 01: Feature Backfill</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Load the data and process features\n",
16
- "2. Connect to the Hopsworks feature store\n",
17
- "3. Create feature groups and upload them to the feature store"
18
- ]
19
- },
20
- {
21
- "cell_type": "markdown",
22
- "metadata": {},
23
- "source": [
24
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages\n",
25
- "\n",
26
- "First, we'll install the Python packages required for this notebook. We'll use the --quiet command after specifying the names of the libraries to ensure a silent installation process. Then, we'll proceed to import all the necessary libraries."
27
- ]
28
- },
29
- {
30
- "cell_type": "code",
31
- "execution_count": 1,
32
- "metadata": {},
33
- "outputs": [],
34
- "source": [
35
- "# Install of the packages for hopsworks\n",
36
- "# !pip install -U hopsworks --quiet"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 3,
42
- "metadata": {},
43
- "outputs": [],
44
- "source": [
45
- "# Importing of the packages for the needed libraries for the Jupyter notebook\n",
46
- "import pandas as pd\n",
47
- "import requests\n",
48
- "\n",
49
- "# Ignore warnings\n",
50
- "import warnings \n",
51
- "warnings.filterwarnings('ignore')"
52
- ]
53
- },
54
- {
55
- "cell_type": "markdown",
56
- "metadata": {},
57
- "source": [
58
- "## <span style=\"color:#2656a3;\"> 💽 Load the historical data\n",
59
- "\n",
60
- "The data you will use comes from three different sources:\n",
61
- "\n",
62
- "- Electricity prices in Denmark per day from [Energinet](https://www.energidataservice.dk).\n",
63
- "- Different meteorological observations from [Open meteo](https://www.open-meteo.com).\n",
64
- "- Danish Calendar with the type if the date is a national holiday or not. This files is made manually by the group and is located in the \"*data*\" folder inside this repository."
65
- ]
66
- },
67
- {
68
- "cell_type": "markdown",
69
- "metadata": {},
70
- "source": [
71
- "### <span style=\"color:#2656a3;\">💸 Electricity prices per day from Energinet\n",
72
- "*Hvis vi skal have tariffer med i modellen, anbefales det at vi sætter en faktor på 0.2 i tidsrummet 22 - 16 og en faktor på 0.6 eller 0.7 i tidsrummet 17 - 21.*\n",
73
- "\n",
74
- "This first dataset is Electricity prices per day from Energinet/Dataservice. Here we use "
75
- ]
76
- },
77
- {
78
- "cell_type": "code",
79
- "execution_count": 4,
80
- "metadata": {},
81
- "outputs": [],
82
- "source": [
83
- "# Defining the URL for the API call to the electricity price data\n",
84
- "electricity_api_url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2022-01-01T00:00&end=2023-12-31T23:59&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')"
85
- ]
86
- },
87
- {
88
- "cell_type": "code",
89
- "execution_count": 5,
90
- "metadata": {},
91
- "outputs": [
92
- {
93
- "name": "stdout",
94
- "output_type": "stream",
95
- "text": [
96
- "<Response [200]>\n"
97
- ]
98
- }
99
- ],
100
- "source": [
101
- "# Fetch data from the API and make the output to a pandas dataframe\n",
102
- "electricity_data_response = requests.get(electricity_api_url)\n",
103
- "electricity_data = electricity_data_response.json()\n",
104
- "electricity_df = pd.DataFrame(electricity_data['records'])\n",
105
- "\n",
106
- "# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
107
- "print(electricity_data_response)"
108
- ]
109
- },
110
- {
111
- "cell_type": "code",
112
- "execution_count": 6,
113
- "metadata": {},
114
- "outputs": [
115
- {
116
- "data": {
117
- "text/html": [
118
- "<div>\n",
119
- "<style scoped>\n",
120
- " .dataframe tbody tr th:only-of-type {\n",
121
- " vertical-align: middle;\n",
122
- " }\n",
123
- "\n",
124
- " .dataframe tbody tr th {\n",
125
- " vertical-align: top;\n",
126
- " }\n",
127
- "\n",
128
- " .dataframe thead th {\n",
129
- " text-align: right;\n",
130
- " }\n",
131
- "</style>\n",
132
- "<table border=\"1\" class=\"dataframe\">\n",
133
- " <thead>\n",
134
- " <tr style=\"text-align: right;\">\n",
135
- " <th></th>\n",
136
- " <th>HourUTC</th>\n",
137
- " <th>HourDK</th>\n",
138
- " <th>PriceArea</th>\n",
139
- " <th>SpotPriceDKK</th>\n",
140
- " <th>SpotPriceEUR</th>\n",
141
- " </tr>\n",
142
- " </thead>\n",
143
- " <tbody>\n",
144
- " <tr>\n",
145
- " <th>0</th>\n",
146
- " <td>2023-12-31T22:00:00</td>\n",
147
- " <td>2023-12-31T23:00:00</td>\n",
148
- " <td>DK1</td>\n",
149
- " <td>200.309998</td>\n",
150
- " <td>26.870001</td>\n",
151
- " </tr>\n",
152
- " <tr>\n",
153
- " <th>1</th>\n",
154
- " <td>2023-12-31T21:00:00</td>\n",
155
- " <td>2023-12-31T22:00:00</td>\n",
156
- " <td>DK1</td>\n",
157
- " <td>213.729996</td>\n",
158
- " <td>28.670000</td>\n",
159
- " </tr>\n",
160
- " <tr>\n",
161
- " <th>2</th>\n",
162
- " <td>2023-12-31T20:00:00</td>\n",
163
- " <td>2023-12-31T21:00:00</td>\n",
164
- " <td>DK1</td>\n",
165
- " <td>220.660004</td>\n",
166
- " <td>29.600000</td>\n",
167
- " </tr>\n",
168
- " <tr>\n",
169
- " <th>3</th>\n",
170
- " <td>2023-12-31T19:00:00</td>\n",
171
- " <td>2023-12-31T20:00:00</td>\n",
172
- " <td>DK1</td>\n",
173
- " <td>260.100006</td>\n",
174
- " <td>34.889999</td>\n",
175
- " </tr>\n",
176
- " <tr>\n",
177
- " <th>4</th>\n",
178
- " <td>2023-12-31T18:00:00</td>\n",
179
- " <td>2023-12-31T19:00:00</td>\n",
180
- " <td>DK1</td>\n",
181
- " <td>295.510010</td>\n",
182
- " <td>39.639999</td>\n",
183
- " </tr>\n",
184
- " </tbody>\n",
185
- "</table>\n",
186
- "</div>"
187
- ],
188
- "text/plain": [
189
- " HourUTC HourDK PriceArea SpotPriceDKK \\\n",
190
- "0 2023-12-31T22:00:00 2023-12-31T23:00:00 DK1 200.309998 \n",
191
- "1 2023-12-31T21:00:00 2023-12-31T22:00:00 DK1 213.729996 \n",
192
- "2 2023-12-31T20:00:00 2023-12-31T21:00:00 DK1 220.660004 \n",
193
- "3 2023-12-31T19:00:00 2023-12-31T20:00:00 DK1 260.100006 \n",
194
- "4 2023-12-31T18:00:00 2023-12-31T19:00:00 DK1 295.510010 \n",
195
- "\n",
196
- " SpotPriceEUR \n",
197
- "0 26.870001 \n",
198
- "1 28.670000 \n",
199
- "2 29.600000 \n",
200
- "3 34.889999 \n",
201
- "4 39.639999 "
202
- ]
203
- },
204
- "execution_count": 6,
205
- "metadata": {},
206
- "output_type": "execute_result"
207
- }
208
- ],
209
- "source": [
210
- "# Display the first 5 rows of the dataframe\n",
211
- "electricity_df.head()"
212
- ]
213
- },
214
- {
215
- "cell_type": "code",
216
- "execution_count": 7,
217
- "metadata": {},
218
- "outputs": [],
219
- "source": [
220
- "# Datapreprocessing by making the spotprice per kwh instead of mwh\n",
221
- "electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000"
222
- ]
223
- },
224
- {
225
- "cell_type": "code",
226
- "execution_count": 8,
227
- "metadata": {},
228
- "outputs": [],
229
- "source": [
230
- "# Datacleaning by removing the columns that are not needed\n",
231
- "electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)\n",
232
- "electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)\n",
233
- "electricity_df.drop('HourUTC', axis=1, inplace=True)"
234
- ]
235
- },
236
- {
237
- "cell_type": "code",
238
- "execution_count": 9,
239
- "metadata": {},
240
- "outputs": [],
241
- "source": [
242
- "# Renaming the columns and reformating the time column\n",
243
- "electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)"
244
- ]
245
- },
246
- {
247
- "cell_type": "code",
248
- "execution_count": 10,
249
- "metadata": {},
250
- "outputs": [],
251
- "source": [
252
- "# Formatting the date column\n",
253
- "electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]\n",
254
- "electricity_df['date'] = electricity_df['time'].str[:10]"
255
- ]
256
- },
257
- {
258
- "cell_type": "code",
259
- "execution_count": 11,
260
- "metadata": {},
261
- "outputs": [],
262
- "source": [
263
- "# Creating a new column for the date called electricity_temporary_date_column and insert it as the first column in the dataframe\n",
264
- "electricity_temporary_date_column = electricity_df.pop('date')\n",
265
- "electricity_df.insert(0, 'date', electricity_temporary_date_column)"
266
- ]
267
- },
268
- {
269
- "cell_type": "code",
270
- "execution_count": 12,
271
- "metadata": {},
272
- "outputs": [],
273
- "source": [
274
- "# Convert string 'date' column to date type and 'time' column to datetime format\n",
275
- "electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date\n",
276
- "electricity_df['time'] = pd.to_datetime(electricity_df['time'])"
277
- ]
278
- },
279
- {
280
- "cell_type": "code",
281
- "execution_count": 13,
282
- "metadata": {},
283
- "outputs": [
284
- {
285
- "data": {
286
- "text/html": [
287
- "<div>\n",
288
- "<style scoped>\n",
289
- " .dataframe tbody tr th:only-of-type {\n",
290
- " vertical-align: middle;\n",
291
- " }\n",
292
- "\n",
293
- " .dataframe tbody tr th {\n",
294
- " vertical-align: top;\n",
295
- " }\n",
296
- "\n",
297
- " .dataframe thead th {\n",
298
- " text-align: right;\n",
299
- " }\n",
300
- "</style>\n",
301
- "<table border=\"1\" class=\"dataframe\">\n",
302
- " <thead>\n",
303
- " <tr style=\"text-align: right;\">\n",
304
- " <th></th>\n",
305
- " <th>date</th>\n",
306
- " <th>time</th>\n",
307
- " <th>PriceArea</th>\n",
308
- " <th>SpotPriceDKK_KWH</th>\n",
309
- " </tr>\n",
310
- " </thead>\n",
311
- " <tbody>\n",
312
- " <tr>\n",
313
- " <th>0</th>\n",
314
- " <td>2023-12-31</td>\n",
315
- " <td>2023-12-31 23:00:00</td>\n",
316
- " <td>DK1</td>\n",
317
- " <td>0.20031</td>\n",
318
- " </tr>\n",
319
- " <tr>\n",
320
- " <th>1</th>\n",
321
- " <td>2023-12-31</td>\n",
322
- " <td>2023-12-31 22:00:00</td>\n",
323
- " <td>DK1</td>\n",
324
- " <td>0.21373</td>\n",
325
- " </tr>\n",
326
- " <tr>\n",
327
- " <th>2</th>\n",
328
- " <td>2023-12-31</td>\n",
329
- " <td>2023-12-31 21:00:00</td>\n",
330
- " <td>DK1</td>\n",
331
- " <td>0.22066</td>\n",
332
- " </tr>\n",
333
- " <tr>\n",
334
- " <th>3</th>\n",
335
- " <td>2023-12-31</td>\n",
336
- " <td>2023-12-31 20:00:00</td>\n",
337
- " <td>DK1</td>\n",
338
- " <td>0.26010</td>\n",
339
- " </tr>\n",
340
- " <tr>\n",
341
- " <th>4</th>\n",
342
- " <td>2023-12-31</td>\n",
343
- " <td>2023-12-31 19:00:00</td>\n",
344
- " <td>DK1</td>\n",
345
- " <td>0.29551</td>\n",
346
- " </tr>\n",
347
- " </tbody>\n",
348
- "</table>\n",
349
- "</div>"
350
- ],
351
- "text/plain": [
352
- " date time PriceArea SpotPriceDKK_KWH\n",
353
- "0 2023-12-31 2023-12-31 23:00:00 DK1 0.20031\n",
354
- "1 2023-12-31 2023-12-31 22:00:00 DK1 0.21373\n",
355
- "2 2023-12-31 2023-12-31 21:00:00 DK1 0.22066\n",
356
- "3 2023-12-31 2023-12-31 20:00:00 DK1 0.26010\n",
357
- "4 2023-12-31 2023-12-31 19:00:00 DK1 0.29551"
358
- ]
359
- },
360
- "execution_count": 13,
361
- "metadata": {},
362
- "output_type": "execute_result"
363
- }
364
- ],
365
- "source": [
366
- "# Display the first 5 rows of the dataframe\n",
367
- "electricity_df.head()"
368
- ]
369
- },
370
- {
371
- "cell_type": "code",
372
- "execution_count": 14,
373
- "metadata": {},
374
- "outputs": [
375
- {
376
- "name": "stdout",
377
- "output_type": "stream",
378
- "text": [
379
- "<class 'pandas.core.frame.DataFrame'>\n",
380
- "RangeIndex: 17520 entries, 0 to 17519\n",
381
- "Data columns (total 4 columns):\n",
382
- " # Column Non-Null Count Dtype \n",
383
- "--- ------ -------------- ----- \n",
384
- " 0 date 17520 non-null object \n",
385
- " 1 time 17520 non-null datetime64[ns]\n",
386
- " 2 PriceArea 17520 non-null object \n",
387
- " 3 SpotPriceDKK_KWH 17520 non-null float64 \n",
388
- "dtypes: datetime64[ns](1), float64(1), object(2)\n",
389
- "memory usage: 547.6+ KB\n"
390
- ]
391
- }
392
- ],
393
- "source": [
394
- "# Showing the information for the electricity dataframe\n",
395
- "electricity_df.info()"
396
- ]
397
- },
398
- {
399
- "cell_type": "code",
400
- "execution_count": 15,
401
- "metadata": {},
402
- "outputs": [
403
- {
404
- "data": {
405
- "text/html": [
406
- "<div>\n",
407
- "<style scoped>\n",
408
- " .dataframe tbody tr th:only-of-type {\n",
409
- " vertical-align: middle;\n",
410
- " }\n",
411
- "\n",
412
- " .dataframe tbody tr th {\n",
413
- " vertical-align: top;\n",
414
- " }\n",
415
- "\n",
416
- " .dataframe thead th {\n",
417
- " text-align: right;\n",
418
- " }\n",
419
- "</style>\n",
420
- "<table border=\"1\" class=\"dataframe\">\n",
421
- " <thead>\n",
422
- " <tr style=\"text-align: right;\">\n",
423
- " <th></th>\n",
424
- " <th>date</th>\n",
425
- " <th>time</th>\n",
426
- " <th>PriceArea</th>\n",
427
- " <th>SpotPriceDKK_KWH</th>\n",
428
- " </tr>\n",
429
- " </thead>\n",
430
- " <tbody>\n",
431
- " <tr>\n",
432
- " <th>0</th>\n",
433
- " <td>2023-12-31</td>\n",
434
- " <td>2023-12-31 23:00:00</td>\n",
435
- " <td>DK1</td>\n",
436
- " <td>0.20031</td>\n",
437
- " </tr>\n",
438
- " <tr>\n",
439
- " <th>2</th>\n",
440
- " <td>2023-12-31</td>\n",
441
- " <td>2023-12-31 22:00:00</td>\n",
442
- " <td>DK1</td>\n",
443
- " <td>0.21373</td>\n",
444
- " </tr>\n",
445
- " <tr>\n",
446
- " <th>4</th>\n",
447
- " <td>2023-12-31</td>\n",
448
- " <td>2023-12-31 21:00:00</td>\n",
449
- " <td>DK1</td>\n",
450
- " <td>0.22066</td>\n",
451
- " </tr>\n",
452
- " <tr>\n",
453
- " <th>6</th>\n",
454
- " <td>2023-12-31</td>\n",
455
- " <td>2023-12-31 20:00:00</td>\n",
456
- " <td>DK1</td>\n",
457
- " <td>0.26010</td>\n",
458
- " </tr>\n",
459
- " <tr>\n",
460
- " <th>8</th>\n",
461
- " <td>2023-12-31</td>\n",
462
- " <td>2023-12-31 19:00:00</td>\n",
463
- " <td>DK1</td>\n",
464
- " <td>0.29551</td>\n",
465
- " </tr>\n",
466
- " </tbody>\n",
467
- "</table>\n",
468
- "</div>"
469
- ],
470
- "text/plain": [
471
- " date time PriceArea SpotPriceDKK_KWH\n",
472
- "0 2023-12-31 2023-12-31 23:00:00 DK1 0.20031\n",
473
- "2 2023-12-31 2023-12-31 22:00:00 DK1 0.21373\n",
474
- "4 2023-12-31 2023-12-31 21:00:00 DK1 0.22066\n",
475
- "6 2023-12-31 2023-12-31 20:00:00 DK1 0.26010\n",
476
- "8 2023-12-31 2023-12-31 19:00:00 DK1 0.29551"
477
- ]
478
- },
479
- "execution_count": 15,
480
- "metadata": {},
481
- "output_type": "execute_result"
482
- }
483
- ],
484
- "source": [
485
- "# Fetching historical electricity prices data\n",
486
- "electricity = electricity_prices.fetch_electricity_prices(historical=True, start='2022-01-01', end='2023-12-31')\n",
487
- "electricity = electricity[(electricity['PriceArea'] == \"DK1\")]\n",
488
- "electricity.head()"
489
- ]
490
- },
491
- {
492
- "cell_type": "markdown",
493
- "metadata": {},
494
- "source": [
495
- "### <span style=\"color:#2656a3;\"> 🌤 Weather measurements from Open Meteo\n",
496
- "\n",
497
- "Burde have enddate 2023-12-31. url = (\"https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2022-01-01&end_date=2023-12-31&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m\")"
498
- ]
499
- },
500
- {
501
- "cell_type": "code",
502
- "execution_count": 16,
503
- "metadata": {},
504
- "outputs": [],
505
- "source": [
506
- "# Defining the URL for the API call to the weather data \n",
507
- "weather_api_url = (\"https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2022-01-01&end_date=2023-12-31&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m\")"
508
- ]
509
- },
510
- {
511
- "cell_type": "code",
512
- "execution_count": 17,
513
- "metadata": {},
514
- "outputs": [
515
- {
516
- "name": "stdout",
517
- "output_type": "stream",
518
- "text": [
519
- "<Response [200]>\n"
520
- ]
521
- }
522
- ],
523
- "source": [
524
- "# Fetch data from the API and make the output to a pandas dataframe\n",
525
- "weather_data_response = requests.get(weather_api_url)\n",
526
- "weather_data = weather_data_response.json()\n",
527
- "weather_df = pd.DataFrame(weather_data['hourly'])\n",
528
- "\n",
529
- "# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
530
- "print(weather_data_response)"
531
- ]
532
- },
533
- {
534
- "cell_type": "code",
535
- "execution_count": 18,
536
- "metadata": {},
537
- "outputs": [],
538
- "source": [
539
- "# Formatting the date column\n",
540
- "weather_df['date'] = weather_df['time'].str[:10]"
541
- ]
542
- },
543
- {
544
- "cell_type": "code",
545
- "execution_count": 19,
546
- "metadata": {},
547
- "outputs": [],
548
- "source": [
549
- "# Creating a new column for the date called weather_temporary_date_column and insert it as the first column in the dataframe\n",
550
- "weather_temporary_date_column = weather_df.pop('date')\n",
551
- "weather_df.insert(0, 'date', weather_temporary_date_column)"
552
- ]
553
- },
554
- {
555
- "cell_type": "code",
556
- "execution_count": 20,
557
- "metadata": {},
558
- "outputs": [],
559
- "source": [
560
- "# Convert string 'date' column to date type and 'time' column to datetime format\n",
561
- "weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date\n",
562
- "weather_df['time'] = pd.to_datetime(weather_df['time'])"
563
- ]
564
- },
565
- {
566
- "cell_type": "code",
567
- "execution_count": 21,
568
- "metadata": {},
569
- "outputs": [
570
- {
571
- "data": {
572
- "text/html": [
573
- "<div>\n",
574
- "<style scoped>\n",
575
- " .dataframe tbody tr th:only-of-type {\n",
576
- " vertical-align: middle;\n",
577
- " }\n",
578
- "\n",
579
- " .dataframe tbody tr th {\n",
580
- " vertical-align: top;\n",
581
- " }\n",
582
- "\n",
583
- " .dataframe thead th {\n",
584
- " text-align: right;\n",
585
- " }\n",
586
- "</style>\n",
587
- "<table border=\"1\" class=\"dataframe\">\n",
588
- " <thead>\n",
589
- " <tr style=\"text-align: right;\">\n",
590
- " <th></th>\n",
591
- " <th>date</th>\n",
592
- " <th>time</th>\n",
593
- " <th>temperature_2m</th>\n",
594
- " <th>relative_humidity_2m</th>\n",
595
- " <th>precipitation</th>\n",
596
- " <th>rain</th>\n",
597
- " <th>snowfall</th>\n",
598
- " <th>weather_code</th>\n",
599
- " <th>cloud_cover</th>\n",
600
- " <th>wind_speed_10m</th>\n",
601
- " <th>wind_gusts_10m</th>\n",
602
- " </tr>\n",
603
- " </thead>\n",
604
- " <tbody>\n",
605
- " <tr>\n",
606
- " <th>0</th>\n",
607
- " <td>2022-01-01</td>\n",
608
- " <td>2022-01-01 00:00:00</td>\n",
609
- " <td>6.7</td>\n",
610
- " <td>100</td>\n",
611
- " <td>0.0</td>\n",
612
- " <td>0.0</td>\n",
613
- " <td>0.0</td>\n",
614
- " <td>3</td>\n",
615
- " <td>100</td>\n",
616
- " <td>16.2</td>\n",
617
- " <td>36.0</td>\n",
618
- " </tr>\n",
619
- " <tr>\n",
620
- " <th>1</th>\n",
621
- " <td>2022-01-01</td>\n",
622
- " <td>2022-01-01 01:00:00</td>\n",
623
- " <td>6.6</td>\n",
624
- " <td>100</td>\n",
625
- " <td>0.0</td>\n",
626
- " <td>0.0</td>\n",
627
- " <td>0.0</td>\n",
628
- " <td>3</td>\n",
629
- " <td>100</td>\n",
630
- " <td>16.2</td>\n",
631
- " <td>30.2</td>\n",
632
- " </tr>\n",
633
- " <tr>\n",
634
- " <th>2</th>\n",
635
- " <td>2022-01-01</td>\n",
636
- " <td>2022-01-01 02:00:00</td>\n",
637
- " <td>6.7</td>\n",
638
- " <td>99</td>\n",
639
- " <td>0.0</td>\n",
640
- " <td>0.0</td>\n",
641
- " <td>0.0</td>\n",
642
- " <td>3</td>\n",
643
- " <td>100</td>\n",
644
- " <td>15.5</td>\n",
645
- " <td>30.6</td>\n",
646
- " </tr>\n",
647
- " <tr>\n",
648
- " <th>3</th>\n",
649
- " <td>2022-01-01</td>\n",
650
- " <td>2022-01-01 03:00:00</td>\n",
651
- " <td>6.7</td>\n",
652
- " <td>100</td>\n",
653
- " <td>0.0</td>\n",
654
- " <td>0.0</td>\n",
655
- " <td>0.0</td>\n",
656
- " <td>3</td>\n",
657
- " <td>100</td>\n",
658
- " <td>12.7</td>\n",
659
- " <td>28.8</td>\n",
660
- " </tr>\n",
661
- " <tr>\n",
662
- " <th>4</th>\n",
663
- " <td>2022-01-01</td>\n",
664
- " <td>2022-01-01 04:00:00</td>\n",
665
- " <td>6.7</td>\n",
666
- " <td>99</td>\n",
667
- " <td>0.0</td>\n",
668
- " <td>0.0</td>\n",
669
- " <td>0.0</td>\n",
670
- " <td>3</td>\n",
671
- " <td>100</td>\n",
672
- " <td>10.6</td>\n",
673
- " <td>23.8</td>\n",
674
- " </tr>\n",
675
- " </tbody>\n",
676
- "</table>\n",
677
- "</div>"
678
- ],
679
- "text/plain": [
680
- " date time temperature_2m relative_humidity_2m \\\n",
681
- "0 2022-01-01 2022-01-01 00:00:00 6.7 100 \n",
682
- "1 2022-01-01 2022-01-01 01:00:00 6.6 100 \n",
683
- "2 2022-01-01 2022-01-01 02:00:00 6.7 99 \n",
684
- "3 2022-01-01 2022-01-01 03:00:00 6.7 100 \n",
685
- "4 2022-01-01 2022-01-01 04:00:00 6.7 99 \n",
686
- "\n",
687
- " precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
688
- "0 0.0 0.0 0.0 3 100 16.2 \n",
689
- "1 0.0 0.0 0.0 3 100 16.2 \n",
690
- "2 0.0 0.0 0.0 3 100 15.5 \n",
691
- "3 0.0 0.0 0.0 3 100 12.7 \n",
692
- "4 0.0 0.0 0.0 3 100 10.6 \n",
693
- "\n",
694
- " wind_gusts_10m \n",
695
- "0 36.0 \n",
696
- "1 30.2 \n",
697
- "2 30.6 \n",
698
- "3 28.8 \n",
699
- "4 23.8 "
700
- ]
701
- },
702
- "execution_count": 21,
703
- "metadata": {},
704
- "output_type": "execute_result"
705
- }
706
- ],
707
- "source": [
708
- "# Display the first 5 rows of the dataframe\n",
709
- "weather_df.head()"
710
- ]
711
- },
712
- {
713
- "cell_type": "code",
714
- "execution_count": 22,
715
- "metadata": {},
716
- "outputs": [
717
- {
718
- "name": "stdout",
719
- "output_type": "stream",
720
- "text": [
721
- "<class 'pandas.core.frame.DataFrame'>\n",
722
- "RangeIndex: 17520 entries, 0 to 17519\n",
723
- "Data columns (total 11 columns):\n",
724
- " # Column Non-Null Count Dtype \n",
725
- "--- ------ -------------- ----- \n",
726
- " 0 date 17520 non-null object \n",
727
- " 1 time 17520 non-null datetime64[ns]\n",
728
- " 2 temperature_2m 17520 non-null float64 \n",
729
- " 3 relative_humidity_2m 17520 non-null int64 \n",
730
- " 4 precipitation 17520 non-null float64 \n",
731
- " 5 rain 17520 non-null float64 \n",
732
- " 6 snowfall 17520 non-null float64 \n",
733
- " 7 weather_code 17520 non-null int64 \n",
734
- " 8 cloud_cover 17520 non-null int64 \n",
735
- " 9 wind_speed_10m 17520 non-null float64 \n",
736
- " 10 wind_gusts_10m 17520 non-null float64 \n",
737
- "dtypes: datetime64[ns](1), float64(6), int64(3), object(1)\n",
738
- "memory usage: 1.5+ MB\n"
739
- ]
740
- }
741
- ],
742
- "source": [
743
- "# Showing the information for the weather dataframe\n",
744
- "weather_df.info()"
745
- ]
746
- },
747
- {
748
- "cell_type": "code",
749
- "execution_count": 23,
750
- "metadata": {},
751
- "outputs": [
752
- {
753
- "data": {
754
- "text/html": [
755
- "<div>\n",
756
- "<style scoped>\n",
757
- " .dataframe tbody tr th:only-of-type {\n",
758
- " vertical-align: middle;\n",
759
- " }\n",
760
- "\n",
761
- " .dataframe tbody tr th {\n",
762
- " vertical-align: top;\n",
763
- " }\n",
764
- "\n",
765
- " .dataframe thead th {\n",
766
- " text-align: right;\n",
767
- " }\n",
768
- "</style>\n",
769
- "<table border=\"1\" class=\"dataframe\">\n",
770
- " <thead>\n",
771
- " <tr style=\"text-align: right;\">\n",
772
- " <th></th>\n",
773
- " <th>date</th>\n",
774
- " <th>time</th>\n",
775
- " <th>temperature_2m</th>\n",
776
- " <th>relative_humidity_2m</th>\n",
777
- " <th>precipitation</th>\n",
778
- " <th>rain</th>\n",
779
- " <th>snowfall</th>\n",
780
- " <th>weather_code</th>\n",
781
- " <th>cloud_cover</th>\n",
782
- " <th>wind_speed_10m</th>\n",
783
- " <th>wind_gusts_10m</th>\n",
784
- " </tr>\n",
785
- " </thead>\n",
786
- " <tbody>\n",
787
- " <tr>\n",
788
- " <th>0</th>\n",
789
- " <td>2022-01-01</td>\n",
790
- " <td>2022-01-01 00:00:00</td>\n",
791
- " <td>6.7</td>\n",
792
- " <td>100</td>\n",
793
- " <td>0.0</td>\n",
794
- " <td>0.0</td>\n",
795
- " <td>0.0</td>\n",
796
- " <td>3</td>\n",
797
- " <td>100</td>\n",
798
- " <td>16.2</td>\n",
799
- " <td>36.0</td>\n",
800
- " </tr>\n",
801
- " <tr>\n",
802
- " <th>1</th>\n",
803
- " <td>2022-01-01</td>\n",
804
- " <td>2022-01-01 01:00:00</td>\n",
805
- " <td>6.6</td>\n",
806
- " <td>100</td>\n",
807
- " <td>0.0</td>\n",
808
- " <td>0.0</td>\n",
809
- " <td>0.0</td>\n",
810
- " <td>3</td>\n",
811
- " <td>100</td>\n",
812
- " <td>16.2</td>\n",
813
- " <td>30.2</td>\n",
814
- " </tr>\n",
815
- " <tr>\n",
816
- " <th>2</th>\n",
817
- " <td>2022-01-01</td>\n",
818
- " <td>2022-01-01 02:00:00</td>\n",
819
- " <td>6.7</td>\n",
820
- " <td>99</td>\n",
821
- " <td>0.0</td>\n",
822
- " <td>0.0</td>\n",
823
- " <td>0.0</td>\n",
824
- " <td>3</td>\n",
825
- " <td>100</td>\n",
826
- " <td>15.5</td>\n",
827
- " <td>30.6</td>\n",
828
- " </tr>\n",
829
- " <tr>\n",
830
- " <th>3</th>\n",
831
- " <td>2022-01-01</td>\n",
832
- " <td>2022-01-01 03:00:00</td>\n",
833
- " <td>6.7</td>\n",
834
- " <td>100</td>\n",
835
- " <td>0.0</td>\n",
836
- " <td>0.0</td>\n",
837
- " <td>0.0</td>\n",
838
- " <td>3</td>\n",
839
- " <td>100</td>\n",
840
- " <td>12.7</td>\n",
841
- " <td>28.8</td>\n",
842
- " </tr>\n",
843
- " <tr>\n",
844
- " <th>4</th>\n",
845
- " <td>2022-01-01</td>\n",
846
- " <td>2022-01-01 04:00:00</td>\n",
847
- " <td>6.7</td>\n",
848
- " <td>99</td>\n",
849
- " <td>0.0</td>\n",
850
- " <td>0.0</td>\n",
851
- " <td>0.0</td>\n",
852
- " <td>3</td>\n",
853
- " <td>100</td>\n",
854
- " <td>10.6</td>\n",
855
- " <td>23.8</td>\n",
856
- " </tr>\n",
857
- " </tbody>\n",
858
- "</table>\n",
859
- "</div>"
860
- ],
861
- "text/plain": [
862
- " date time temperature_2m relative_humidity_2m \\\n",
863
- "0 2022-01-01 2022-01-01 00:00:00 6.7 100 \n",
864
- "1 2022-01-01 2022-01-01 01:00:00 6.6 100 \n",
865
- "2 2022-01-01 2022-01-01 02:00:00 6.7 99 \n",
866
- "3 2022-01-01 2022-01-01 03:00:00 6.7 100 \n",
867
- "4 2022-01-01 2022-01-01 04:00:00 6.7 99 \n",
868
- "\n",
869
- " precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
870
- "0 0.0 0.0 0.0 3 100 16.2 \n",
871
- "1 0.0 0.0 0.0 3 100 16.2 \n",
872
- "2 0.0 0.0 0.0 3 100 15.5 \n",
873
- "3 0.0 0.0 0.0 3 100 12.7 \n",
874
- "4 0.0 0.0 0.0 3 100 10.6 \n",
875
- "\n",
876
- " wind_gusts_10m \n",
877
- "0 36.0 \n",
878
- "1 30.2 \n",
879
- "2 30.6 \n",
880
- "3 28.8 \n",
881
- "4 23.8 "
882
- ]
883
- },
884
- "execution_count": 23,
885
- "metadata": {},
886
- "output_type": "execute_result"
887
- }
888
- ],
889
- "source": [
890
- "# Fetching historical electricity prices data\n",
891
- "weater = weater_measures.fetch_weater_measures()\n",
892
- "weater.head()"
893
- ]
894
- },
895
- {
896
- "cell_type": "markdown",
897
- "metadata": {},
898
- "source": [
899
- "### <span style=\"color:#2656a3;\"> 🗓️ Calendar of Danish workdays and holidays "
900
- ]
901
- },
902
- {
903
- "cell_type": "code",
904
- "execution_count": 24,
905
- "metadata": {},
906
- "outputs": [
907
- {
908
- "data": {
909
- "text/html": [
910
- "<div>\n",
911
- "<style scoped>\n",
912
- " .dataframe tbody tr th:only-of-type {\n",
913
- " vertical-align: middle;\n",
914
- " }\n",
915
- "\n",
916
- " .dataframe tbody tr th {\n",
917
- " vertical-align: top;\n",
918
- " }\n",
919
- "\n",
920
- " .dataframe thead th {\n",
921
- " text-align: right;\n",
922
- " }\n",
923
- "</style>\n",
924
- "<table border=\"1\" class=\"dataframe\">\n",
925
- " <thead>\n",
926
- " <tr style=\"text-align: right;\">\n",
927
- " <th></th>\n",
928
- " <th>date</th>\n",
929
- " <th>type</th>\n",
930
- " </tr>\n",
931
- " </thead>\n",
932
- " <tbody>\n",
933
- " <tr>\n",
934
- " <th>0</th>\n",
935
- " <td>01/01/2022</td>\n",
936
- " <td>Not a Workday</td>\n",
937
- " </tr>\n",
938
- " <tr>\n",
939
- " <th>1</th>\n",
940
- " <td>02/01/2022</td>\n",
941
- " <td>Not a Workday</td>\n",
942
- " </tr>\n",
943
- " <tr>\n",
944
- " <th>2</th>\n",
945
- " <td>03/01/2022</td>\n",
946
- " <td>Workday</td>\n",
947
- " </tr>\n",
948
- " <tr>\n",
949
- " <th>3</th>\n",
950
- " <td>04/01/2022</td>\n",
951
- " <td>Workday</td>\n",
952
- " </tr>\n",
953
- " <tr>\n",
954
- " <th>4</th>\n",
955
- " <td>05/01/2022</td>\n",
956
- " <td>Workday</td>\n",
957
- " </tr>\n",
958
- " <tr>\n",
959
- " <th>...</th>\n",
960
- " <td>...</td>\n",
961
- " <td>...</td>\n",
962
- " </tr>\n",
963
- " <tr>\n",
964
- " <th>1091</th>\n",
965
- " <td>27/12/2024</td>\n",
966
- " <td>Workday</td>\n",
967
- " </tr>\n",
968
- " <tr>\n",
969
- " <th>1092</th>\n",
970
- " <td>28/12/2024</td>\n",
971
- " <td>Not a Workday</td>\n",
972
- " </tr>\n",
973
- " <tr>\n",
974
- " <th>1093</th>\n",
975
- " <td>29/12/2024</td>\n",
976
- " <td>Not a Workday</td>\n",
977
- " </tr>\n",
978
- " <tr>\n",
979
- " <th>1094</th>\n",
980
- " <td>30/12/2024</td>\n",
981
- " <td>Workday</td>\n",
982
- " </tr>\n",
983
- " <tr>\n",
984
- " <th>1095</th>\n",
985
- " <td>31/12/2024</td>\n",
986
- " <td>Workday</td>\n",
987
- " </tr>\n",
988
- " </tbody>\n",
989
- "</table>\n",
990
- "<p>1096 rows × 2 columns</p>\n",
991
- "</div>"
992
- ],
993
- "text/plain": [
994
- " date type\n",
995
- "0 01/01/2022 Not a Workday\n",
996
- "1 02/01/2022 Not a Workday\n",
997
- "2 03/01/2022 Workday\n",
998
- "3 04/01/2022 Workday\n",
999
- "4 05/01/2022 Workday\n",
1000
- "... ... ...\n",
1001
- "1091 27/12/2024 Workday\n",
1002
- "1092 28/12/2024 Not a Workday\n",
1003
- "1093 29/12/2024 Not a Workday\n",
1004
- "1094 30/12/2024 Workday\n",
1005
- "1095 31/12/2024 Workday\n",
1006
- "\n",
1007
- "[1096 rows x 2 columns]"
1008
- ]
1009
- },
1010
- "execution_count": 24,
1011
- "metadata": {},
1012
- "output_type": "execute_result"
1013
- }
1014
- ],
1015
- "source": [
1016
- "# Read csv file with calender\n",
1017
- "calender_df = pd.read_csv('https://raw.githubusercontent.com/Camillahannesbo/MLOPs-Assignment-/main/data/calendar_incl_holiday.csv', delimiter=';', usecols=['date', 'type'])\n",
1018
- " \n",
1019
- "# Display the DataFrame\n",
1020
- "calender_df"
1021
- ]
1022
- },
1023
- {
1024
- "cell_type": "code",
1025
- "execution_count": 25,
1026
- "metadata": {},
1027
- "outputs": [],
1028
- "source": [
1029
- "# Formatting the date column by replacing the / with -\n",
1030
- "calender_df['date'] = calender_df['date'].str.replace('/', '-')"
1031
- ]
1032
- },
1033
- {
1034
- "cell_type": "code",
1035
- "execution_count": 26,
1036
- "metadata": {},
1037
- "outputs": [],
1038
- "source": [
1039
- "# Defining the function for flipping the date to the left\n",
1040
- "\n",
1041
- "# Flip the date to the left\n",
1042
- "def flip_date_left(date):\n",
1043
- " parts = date.split(\"-\") # Assuming the date format is \"YYYY-MM-DD\"\n",
1044
- " flipped_date = \"-\".join(parts[::-1])\n",
1045
- " return flipped_date\n",
1046
- "\n",
1047
- "# Flip the date to the left for the date column\n",
1048
- "def flip_dates_left_in_column(column):\n",
1049
- " flipped_column = [flip_date_left(date) for date in column]\n",
1050
- " return flipped_column"
1051
- ]
1052
- },
1053
- {
1054
- "cell_type": "code",
1055
- "execution_count": 27,
1056
- "metadata": {},
1057
- "outputs": [],
1058
- "source": [
1059
- "# Make the new flipped_dates_column based on the function above and insert it as the 'date' column in the dataframe\n",
1060
- "flipped_dates_column = flip_dates_left_in_column(calender_df['date'])\n",
1061
- "calender_df['date'] = flipped_dates_column"
1062
- ]
1063
- },
1064
- {
1065
- "cell_type": "code",
1066
- "execution_count": 28,
1067
- "metadata": {},
1068
- "outputs": [],
1069
- "source": [
1070
- "# Convert string 'date' column to date type\n",
1071
- "calender_df['date'] = pd.to_datetime(calender_df['date'], format='%Y-%m-%d').dt.date"
1072
- ]
1073
- },
1074
- {
1075
- "cell_type": "code",
1076
- "execution_count": 29,
1077
- "metadata": {},
1078
- "outputs": [
1079
- {
1080
- "data": {
1081
- "text/html": [
1082
- "<div>\n",
1083
- "<style scoped>\n",
1084
- " .dataframe tbody tr th:only-of-type {\n",
1085
- " vertical-align: middle;\n",
1086
- " }\n",
1087
- "\n",
1088
- " .dataframe tbody tr th {\n",
1089
- " vertical-align: top;\n",
1090
- " }\n",
1091
- "\n",
1092
- " .dataframe thead th {\n",
1093
- " text-align: right;\n",
1094
- " }\n",
1095
- "</style>\n",
1096
- "<table border=\"1\" class=\"dataframe\">\n",
1097
- " <thead>\n",
1098
- " <tr style=\"text-align: right;\">\n",
1099
- " <th></th>\n",
1100
- " <th>date</th>\n",
1101
- " <th>type</th>\n",
1102
- " </tr>\n",
1103
- " </thead>\n",
1104
- " <tbody>\n",
1105
- " <tr>\n",
1106
- " <th>0</th>\n",
1107
- " <td>2022-01-01</td>\n",
1108
- " <td>Not a Workday</td>\n",
1109
- " </tr>\n",
1110
- " <tr>\n",
1111
- " <th>1</th>\n",
1112
- " <td>2022-01-02</td>\n",
1113
- " <td>Not a Workday</td>\n",
1114
- " </tr>\n",
1115
- " <tr>\n",
1116
- " <th>2</th>\n",
1117
- " <td>2022-01-03</td>\n",
1118
- " <td>Workday</td>\n",
1119
- " </tr>\n",
1120
- " <tr>\n",
1121
- " <th>3</th>\n",
1122
- " <td>2022-01-04</td>\n",
1123
- " <td>Workday</td>\n",
1124
- " </tr>\n",
1125
- " <tr>\n",
1126
- " <th>4</th>\n",
1127
- " <td>2022-01-05</td>\n",
1128
- " <td>Workday</td>\n",
1129
- " </tr>\n",
1130
- " </tbody>\n",
1131
- "</table>\n",
1132
- "</div>"
1133
- ],
1134
- "text/plain": [
1135
- " date type\n",
1136
- "0 2022-01-01 Not a Workday\n",
1137
- "1 2022-01-02 Not a Workday\n",
1138
- "2 2022-01-03 Workday\n",
1139
- "3 2022-01-04 Workday\n",
1140
- "4 2022-01-05 Workday"
1141
- ]
1142
- },
1143
- "execution_count": 29,
1144
- "metadata": {},
1145
- "output_type": "execute_result"
1146
- }
1147
- ],
1148
- "source": [
1149
- "# Display the first 5 rows of the dataframe\n",
1150
- "calender_df.head()"
1151
- ]
1152
- },
1153
- {
1154
- "cell_type": "code",
1155
- "execution_count": 30,
1156
- "metadata": {},
1157
- "outputs": [
1158
- {
1159
- "name": "stdout",
1160
- "output_type": "stream",
1161
- "text": [
1162
- "<class 'pandas.core.frame.DataFrame'>\n",
1163
- "RangeIndex: 1096 entries, 0 to 1095\n",
1164
- "Data columns (total 2 columns):\n",
1165
- " # Column Non-Null Count Dtype \n",
1166
- "--- ------ -------------- ----- \n",
1167
- " 0 date 1096 non-null object\n",
1168
- " 1 type 1096 non-null object\n",
1169
- "dtypes: object(2)\n",
1170
- "memory usage: 17.3+ KB\n"
1171
- ]
1172
- }
1173
- ],
1174
- "source": [
1175
- "# Showing the information for the calender dataframe\n",
1176
- "calender_df.info()"
1177
- ]
1178
- },
1179
- {
1180
- "cell_type": "markdown",
1181
- "metadata": {},
1182
- "source": [
1183
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store\n",
1184
- "\n",
1185
- "First we will connect to Hopsworks Feature Store so we can access and create Feature Groups.\n",
1186
- "Feature groups can also be used to define a namespace for features. For instance, in a real-life setting you would likely want to experiment with different window lengths. In that case, you can create feature groups with identical schema for each window length. \n",
1187
- "\n",
1188
- "Before you can create a feature group you need to connect to our feature store."
1189
- ]
1190
- },
1191
- {
1192
- "cell_type": "code",
1193
- "execution_count": null,
1194
- "metadata": {},
1195
- "outputs": [],
1196
- "source": [
1197
- "import hopsworks\n",
1198
- "\n",
1199
- "project = hopsworks.login()\n",
1200
- "\n",
1201
- "fs = project.get_feature_store()"
1202
- ]
1203
- },
1204
- {
1205
- "cell_type": "markdown",
1206
- "metadata": {},
1207
- "source": [
1208
- "### <span style=\"color:#2656a3;\"> 🪄 Creating Feature Groups\n",
1209
- "\n",
1210
- "When creating a feature group, you must name it and designate a primary key. Additionally, it's helpful to include a description of the feature group's contents and a version number; if not defined, it will default to `1`. \n",
1211
- "\n",
1212
- "We've configured `online_enabled` as `True` to enable the feature group to be read via the Online API for a Feature View."
1213
- ]
1214
- },
1215
- {
1216
- "cell_type": "code",
1217
- "execution_count": null,
1218
- "metadata": {},
1219
- "outputs": [],
1220
- "source": [
1221
- "# Creating the feature group for the weater data\n",
1222
- "weather_fg = fs.get_or_create_feature_group(\n",
1223
- " name=\"weather_measurements\",\n",
1224
- " version=1,\n",
1225
- " description=\"Weather measurements from Open Meteo API\",\n",
1226
- " primary_key=[\"date\"],\n",
1227
- " event_time=\"time\",\n",
1228
- " online_enabled=True,\n",
1229
- ")"
1230
- ]
1231
- },
1232
- {
1233
- "cell_type": "markdown",
1234
- "metadata": {},
1235
- "source": [
1236
- "By now, you've only outlined metadata for the feature group. There's no data stored, nor is there a defined schema for it. To establish persistence for the feature group, you'll need to populate it with its associated data using the `insert` function"
1237
- ]
1238
- },
1239
- {
1240
- "cell_type": "code",
1241
- "execution_count": null,
1242
- "metadata": {},
1243
- "outputs": [],
1244
- "source": [
1245
- "# Inserting the weather_df into the feature group named weather_fg\n",
1246
- "weather_fg.insert(weather_df)"
1247
- ]
1248
- },
1249
- {
1250
- "cell_type": "markdown",
1251
- "metadata": {},
1252
- "source": [
1253
- "We make a descriptions for each feature we put into the feature group. In this way we are adding more information and documentation to the user"
1254
- ]
1255
- },
1256
- {
1257
- "cell_type": "code",
1258
- "execution_count": null,
1259
- "metadata": {},
1260
- "outputs": [],
1261
- "source": [
1262
- "weather_feature_descriptions = [\n",
1263
- " {\"name\": \"date\", \"description\": \"Date of the weather measurement\"},\n",
1264
- " {\"name\": \"time\", \"description\": \"Time of the weather measurement\"},\n",
1265
- " {\"name\": \"temperature_2m\", \"description\": \"Temperature at 2m above ground\"},\n",
1266
- " {\"name\": \"relative_humidity_2m\", \"description\": \"Relative humidity at 2m above ground\"},\n",
1267
- " {\"name\": \"precipitation\", \"description\": \"Precipitation\"},\n",
1268
- " {\"name\": \"rain\", \"description\": \"Rain\"},\n",
1269
- " {\"name\": \"snowfall\", \"description\": \"Snowfall\"}, \n",
1270
- " {\"name\": \"weather_code\", \"description\": \"Weather code\"}, \n",
1271
- " {\"name\": \"cloud_cover\", \"description\": \"Cloud cover\"}, \n",
1272
- " {\"name\": \"wind_speed_10m\", \"description\": \"Wind speed at 10m above ground\"}, \n",
1273
- " {\"name\": \"wind_gusts_10m\", \"description\": \"Wind gusts at 10m above ground\"}, \n",
1274
- "]\n",
1275
- "\n",
1276
- "for desc in weather_feature_descriptions: \n",
1277
- " weather_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1278
- ]
1279
- },
1280
- {
1281
- "cell_type": "markdown",
1282
- "metadata": {},
1283
- "source": [
1284
- "We replicate the process for both the `electricity_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
1285
- ]
1286
- },
1287
- {
1288
- "cell_type": "code",
1289
- "execution_count": null,
1290
- "metadata": {},
1291
- "outputs": [],
1292
- "source": [
1293
- "# Creating the feature group for the electricity prices\n",
1294
- "electricity_fg = fs.get_or_create_feature_group(\n",
1295
- " name=\"electricity_prices\",\n",
1296
- " version=1,\n",
1297
- " description=\"Electricity prices from Energidata API\",\n",
1298
- " primary_key=[\"date\"],\n",
1299
- " online_enabled=True,\n",
1300
- " event_time=\"time\",\n",
1301
- ")"
1302
- ]
1303
- },
1304
- {
1305
- "cell_type": "code",
1306
- "execution_count": null,
1307
- "metadata": {},
1308
- "outputs": [],
1309
- "source": [
1310
- "# Inserting the electricity_df into the feature group named electricity_fg\n",
1311
- "electricity_fg.insert(electricity_df)"
1312
- ]
1313
- },
1314
- {
1315
- "cell_type": "code",
1316
- "execution_count": null,
1317
- "metadata": {},
1318
- "outputs": [],
1319
- "source": [
1320
- "electricity_feature_descriptions = [\n",
1321
- " {\"name\": \"date\", \"description\": \"Date of the electricity measurement\"},\n",
1322
- " {\"name\": \"time\", \"description\": \"Time of the electricity measurement\"},\n",
1323
- " {\"name\": \"PriceArea\", \"description\": \"Price area for the electricity measurement\"},\n",
1324
- " {\"name\": \"SpotPriceDKK_KWH\", \"description\": \"Spot price in DKK per KWH\"}, \n",
1325
- "]\n",
1326
- "\n",
1327
- "for desc in electricity_feature_descriptions: \n",
1328
- " electricity_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1329
- ]
1330
- },
1331
- {
1332
- "cell_type": "code",
1333
- "execution_count": null,
1334
- "metadata": {},
1335
- "outputs": [],
1336
- "source": [
1337
- "# Creating the feature group for the danish holidays\n",
1338
- "danish_holidays_fg = fs.get_or_create_feature_group(\n",
1339
- " name=\"danish_holidays\",\n",
1340
- " version=1,\n",
1341
- " description=\"Danish holidays calendar.\",\n",
1342
- " online_enabled=True,\n",
1343
- " primary_key=[\"date\"],\n",
1344
- ")"
1345
- ]
1346
- },
1347
- {
1348
- "cell_type": "code",
1349
- "execution_count": null,
1350
- "metadata": {},
1351
- "outputs": [],
1352
- "source": [
1353
- "# Inserting the calender_df into the feature group named danish_holidays_fg\n",
1354
- "danish_holidays_fg.insert(calender_df)"
1355
- ]
1356
- },
1357
- {
1358
- "cell_type": "code",
1359
- "execution_count": null,
1360
- "metadata": {},
1361
- "outputs": [],
1362
- "source": [
1363
- "danish_holidays_feature_descriptions = [\n",
1364
- " {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
1365
- " {\"name\": \"type\", \"description\": \"Holyday or not holyday\"},\n",
1366
- "]\n",
1367
- "\n",
1368
- "for desc in danish_holidays_feature_descriptions: \n",
1369
- " danish_holidays_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1370
- ]
1371
- },
1372
- {
1373
- "cell_type": "markdown",
1374
- "metadata": {},
1375
- "source": [
1376
- "---\n",
1377
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 02: Feature Pipeline </span>\n",
1378
- "\n",
1379
- "In the next notebook, you will be generating new data for the Feature Groups."
1380
- ]
1381
- }
1382
- ],
1383
- "metadata": {
1384
- "kernelspec": {
1385
- "display_name": "bds-streamlit",
1386
- "language": "python",
1387
- "name": "python3"
1388
- },
1389
- "language_info": {
1390
- "codemirror_mode": {
1391
- "name": "ipython",
1392
- "version": 3
1393
- },
1394
- "file_extension": ".py",
1395
- "mimetype": "text/x-python",
1396
- "name": "python",
1397
- "nbconvert_exporter": "python",
1398
- "pygments_lexer": "ipython3",
1399
- "version": "3.11.8"
1400
- }
1401
- },
1402
- "nbformat": 4,
1403
- "nbformat_minor": 2
1404
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/Old/2_feature_pipeline_OLD.ipynb DELETED
@@ -1,561 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 02: Feature Pipeline</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Parse new aata.\n",
16
- "2. Insert new data into the Feature Store."
17
- ]
18
- },
19
- {
20
- "cell_type": "markdown",
21
- "metadata": {},
22
- "source": [
23
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
24
- ]
25
- },
26
- {
27
- "cell_type": "code",
28
- "execution_count": 1,
29
- "metadata": {},
30
- "outputs": [],
31
- "source": [
32
- "# Importing of the packages for the needed libraries for the Jupyter notebook\n",
33
- "import pandas as pd\n",
34
- "import requests\n",
35
- "\n",
36
- "# Ignore warnings\n",
37
- "import warnings \n",
38
- "warnings.filterwarnings('ignore')"
39
- ]
40
- },
41
- {
42
- "cell_type": "markdown",
43
- "metadata": {},
44
- "source": [
45
- "## <span style='color:#2656a3'> 🪄 Parsing new data"
46
- ]
47
- },
48
- {
49
- "cell_type": "markdown",
50
- "metadata": {},
51
- "source": [
52
- "### <span style=\"color:#2656a3;\">💸 Electricity prices per day from Energinet"
53
- ]
54
- },
55
- {
56
- "cell_type": "code",
57
- "execution_count": 2,
58
- "metadata": {},
59
- "outputs": [],
60
- "source": [
61
- "# Defining the URL for the API call to the electricity price data\n",
62
- "electricity_api_url = ('https://api.energidataservice.dk/dataset/Elspotprices?offset=0&start=2024-01-01T00:00&end=2024-04-08T00:00&filter=%7B%22PriceArea%22:[%22DK1%22]%7D&sort=HourUTC%20DESC')"
63
- ]
64
- },
65
- {
66
- "cell_type": "code",
67
- "execution_count": 3,
68
- "metadata": {},
69
- "outputs": [
70
- {
71
- "name": "stdout",
72
- "output_type": "stream",
73
- "text": [
74
- "<Response [200]>\n"
75
- ]
76
- }
77
- ],
78
- "source": [
79
- "# Fetch data from the API and make the output to a pandas dataframe\n",
80
- "electricity_data_response = requests.get(electricity_api_url)\n",
81
- "electricity_data = electricity_data_response.json()\n",
82
- "electricity_df = pd.DataFrame(electricity_data['records'])\n",
83
- "\n",
84
- "# Checking the result of the API call. If the response if 200 then the API call was successfull\n",
85
- "print(electricity_data_response)"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": 4,
91
- "metadata": {},
92
- "outputs": [],
93
- "source": [
94
- "# Datapreprocessing by making the spotprice per kwh instead of mwh\n",
95
- "electricity_df['SpotPriceDKK_KWH'] = electricity_df['SpotPriceDKK'] / 1000"
96
- ]
97
- },
98
- {
99
- "cell_type": "code",
100
- "execution_count": 5,
101
- "metadata": {},
102
- "outputs": [],
103
- "source": [
104
- "# Datacleaning by removing the columns that are not needed\n",
105
- "electricity_df.drop('SpotPriceDKK', axis=1, inplace=True)\n",
106
- "electricity_df.drop('SpotPriceEUR', axis=1, inplace=True)\n",
107
- "electricity_df.drop('HourUTC', axis=1, inplace=True)"
108
- ]
109
- },
110
- {
111
- "cell_type": "code",
112
- "execution_count": 6,
113
- "metadata": {},
114
- "outputs": [],
115
- "source": [
116
- "# Renaming the columns and reformating the time column\n",
117
- "electricity_df.rename(columns={'HourDK': 'time'}, inplace=True)"
118
- ]
119
- },
120
- {
121
- "cell_type": "code",
122
- "execution_count": 7,
123
- "metadata": {},
124
- "outputs": [],
125
- "source": [
126
- "# Formatting the date column\n",
127
- "electricity_df['time'] = electricity_df['time'].astype(str).str[:-3]\n",
128
- "electricity_df['date'] = electricity_df['time'].str[:10]"
129
- ]
130
- },
131
- {
132
- "cell_type": "code",
133
- "execution_count": 8,
134
- "metadata": {},
135
- "outputs": [],
136
- "source": [
137
- "# Creating a new column for the date called electricity_temporary_date_column and insert it as the first column in the dataframe\n",
138
- "electricity_temporary_date_column = electricity_df.pop('date')\n",
139
- "electricity_df.insert(0, 'date', electricity_temporary_date_column)"
140
- ]
141
- },
142
- {
143
- "cell_type": "code",
144
- "execution_count": 9,
145
- "metadata": {},
146
- "outputs": [],
147
- "source": [
148
- "# Convert string 'date' column to date type and 'time' column to datetime format\n",
149
- "electricity_df['date'] = pd.to_datetime(electricity_df['date'], format='%Y-%m-%d').dt.date\n",
150
- "electricity_df['time'] = pd.to_datetime(electricity_df['time'])"
151
- ]
152
- },
153
- {
154
- "cell_type": "code",
155
- "execution_count": 10,
156
- "metadata": {},
157
- "outputs": [
158
- {
159
- "data": {
160
- "text/html": [
161
- "<div>\n",
162
- "<style scoped>\n",
163
- " .dataframe tbody tr th:only-of-type {\n",
164
- " vertical-align: middle;\n",
165
- " }\n",
166
- "\n",
167
- " .dataframe tbody tr th {\n",
168
- " vertical-align: top;\n",
169
- " }\n",
170
- "\n",
171
- " .dataframe thead th {\n",
172
- " text-align: right;\n",
173
- " }\n",
174
- "</style>\n",
175
- "<table border=\"1\" class=\"dataframe\">\n",
176
- " <thead>\n",
177
- " <tr style=\"text-align: right;\">\n",
178
- " <th></th>\n",
179
- " <th>date</th>\n",
180
- " <th>time</th>\n",
181
- " <th>PriceArea</th>\n",
182
- " <th>SpotPriceDKK_KWH</th>\n",
183
- " </tr>\n",
184
- " </thead>\n",
185
- " <tbody>\n",
186
- " <tr>\n",
187
- " <th>0</th>\n",
188
- " <td>2024-04-07</td>\n",
189
- " <td>2024-04-07 23:00:00</td>\n",
190
- " <td>DK1</td>\n",
191
- " <td>0.31886</td>\n",
192
- " </tr>\n",
193
- " <tr>\n",
194
- " <th>1</th>\n",
195
- " <td>2024-04-07</td>\n",
196
- " <td>2024-04-07 22:00:00</td>\n",
197
- " <td>DK1</td>\n",
198
- " <td>0.34078</td>\n",
199
- " </tr>\n",
200
- " <tr>\n",
201
- " <th>2</th>\n",
202
- " <td>2024-04-07</td>\n",
203
- " <td>2024-04-07 21:00:00</td>\n",
204
- " <td>DK1</td>\n",
205
- " <td>0.35958</td>\n",
206
- " </tr>\n",
207
- " <tr>\n",
208
- " <th>3</th>\n",
209
- " <td>2024-04-07</td>\n",
210
- " <td>2024-04-07 20:00:00</td>\n",
211
- " <td>DK1</td>\n",
212
- " <td>0.35645</td>\n",
213
- " </tr>\n",
214
- " <tr>\n",
215
- " <th>4</th>\n",
216
- " <td>2024-04-07</td>\n",
217
- " <td>2024-04-07 19:00:00</td>\n",
218
- " <td>DK1</td>\n",
219
- " <td>0.34399</td>\n",
220
- " </tr>\n",
221
- " </tbody>\n",
222
- "</table>\n",
223
- "</div>"
224
- ],
225
- "text/plain": [
226
- " date time PriceArea SpotPriceDKK_KWH\n",
227
- "0 2024-04-07 2024-04-07 23:00:00 DK1 0.31886\n",
228
- "1 2024-04-07 2024-04-07 22:00:00 DK1 0.34078\n",
229
- "2 2024-04-07 2024-04-07 21:00:00 DK1 0.35958\n",
230
- "3 2024-04-07 2024-04-07 20:00:00 DK1 0.35645\n",
231
- "4 2024-04-07 2024-04-07 19:00:00 DK1 0.34399"
232
- ]
233
- },
234
- "execution_count": 10,
235
- "metadata": {},
236
- "output_type": "execute_result"
237
- }
238
- ],
239
- "source": [
240
- "# Display the first 5 rows of the dataframe\n",
241
- "electricity_df.head()"
242
- ]
243
- },
244
- {
245
- "cell_type": "markdown",
246
- "metadata": {},
247
- "source": [
248
- "### <span style=\"color:#2656a3;\"> 🌤 Weather measurements from Open Meteo"
249
- ]
250
- },
251
- {
252
- "cell_type": "code",
253
- "execution_count": 11,
254
- "metadata": {},
255
- "outputs": [],
256
- "source": [
257
- "# Defining the URL for the API call to the electricity price data\n",
258
- "weather_api_url = ('https://archive-api.open-meteo.com/v1/archive?latitude=57.048&longitude=9.9187&start_date=2024-01-01&end_date=2024-04-08&hourly=temperature_2m,relative_humidity_2m,precipitation,rain,snowfall,weather_code,cloud_cover,wind_speed_10m,wind_gusts_10m&timezone=auto')"
259
- ]
260
- },
261
- {
262
- "cell_type": "code",
263
- "execution_count": 12,
264
- "metadata": {},
265
- "outputs": [
266
- {
267
- "name": "stdout",
268
- "output_type": "stream",
269
- "text": [
270
- "<Response [200]>\n"
271
- ]
272
- }
273
- ],
274
- "source": [
275
- "# Fetch data from the API and make the output to a pandas dataframe\n",
276
- "weather_data_response = requests.get(weather_api_url)\n",
277
- "weather_data = weather_data_response.json()\n",
278
- "weather_df = pd.DataFrame(weather_data['hourly'])\n",
279
- "\n",
280
- "# Checking the result of the API call\n",
281
- "print(weather_data_response)\n"
282
- ]
283
- },
284
- {
285
- "cell_type": "code",
286
- "execution_count": 13,
287
- "metadata": {},
288
- "outputs": [],
289
- "source": [
290
- "# Formatting the date column\n",
291
- "weather_df['date'] = weather_df['time'].str[:10]"
292
- ]
293
- },
294
- {
295
- "cell_type": "code",
296
- "execution_count": 14,
297
- "metadata": {},
298
- "outputs": [],
299
- "source": [
300
- "# Creating a new column for the date called weather_temporary_date_column and insert it as the first column in the dataframe\n",
301
- "weather_temporary_date_column = weather_df.pop('date')\n",
302
- "weather_df.insert(0, 'date', weather_temporary_date_column)"
303
- ]
304
- },
305
- {
306
- "cell_type": "code",
307
- "execution_count": 15,
308
- "metadata": {},
309
- "outputs": [],
310
- "source": [
311
- "# Convert string 'date' column to date type\n",
312
- "weather_df['date'] = pd.to_datetime(weather_df['date'], format='%Y-%m-%d').dt.date\n",
313
- "weather_df['time'] = pd.to_datetime(weather_df['time'])"
314
- ]
315
- },
316
- {
317
- "cell_type": "code",
318
- "execution_count": 16,
319
- "metadata": {},
320
- "outputs": [
321
- {
322
- "data": {
323
- "text/html": [
324
- "<div>\n",
325
- "<style scoped>\n",
326
- " .dataframe tbody tr th:only-of-type {\n",
327
- " vertical-align: middle;\n",
328
- " }\n",
329
- "\n",
330
- " .dataframe tbody tr th {\n",
331
- " vertical-align: top;\n",
332
- " }\n",
333
- "\n",
334
- " .dataframe thead th {\n",
335
- " text-align: right;\n",
336
- " }\n",
337
- "</style>\n",
338
- "<table border=\"1\" class=\"dataframe\">\n",
339
- " <thead>\n",
340
- " <tr style=\"text-align: right;\">\n",
341
- " <th></th>\n",
342
- " <th>date</th>\n",
343
- " <th>time</th>\n",
344
- " <th>temperature_2m</th>\n",
345
- " <th>relative_humidity_2m</th>\n",
346
- " <th>precipitation</th>\n",
347
- " <th>rain</th>\n",
348
- " <th>snowfall</th>\n",
349
- " <th>weather_code</th>\n",
350
- " <th>cloud_cover</th>\n",
351
- " <th>wind_speed_10m</th>\n",
352
- " <th>wind_gusts_10m</th>\n",
353
- " </tr>\n",
354
- " </thead>\n",
355
- " <tbody>\n",
356
- " <tr>\n",
357
- " <th>0</th>\n",
358
- " <td>2024-01-01</td>\n",
359
- " <td>2024-01-01 00:00:00</td>\n",
360
- " <td>4.8</td>\n",
361
- " <td>95</td>\n",
362
- " <td>1.8</td>\n",
363
- " <td>1.8</td>\n",
364
- " <td>0.0</td>\n",
365
- " <td>61</td>\n",
366
- " <td>100</td>\n",
367
- " <td>23.6</td>\n",
368
- " <td>49.0</td>\n",
369
- " </tr>\n",
370
- " <tr>\n",
371
- " <th>1</th>\n",
372
- " <td>2024-01-01</td>\n",
373
- " <td>2024-01-01 01:00:00</td>\n",
374
- " <td>4.9</td>\n",
375
- " <td>95</td>\n",
376
- " <td>1.2</td>\n",
377
- " <td>1.2</td>\n",
378
- " <td>0.0</td>\n",
379
- " <td>55</td>\n",
380
- " <td>100</td>\n",
381
- " <td>21.6</td>\n",
382
- " <td>43.2</td>\n",
383
- " </tr>\n",
384
- " <tr>\n",
385
- " <th>2</th>\n",
386
- " <td>2024-01-01</td>\n",
387
- " <td>2024-01-01 02:00:00</td>\n",
388
- " <td>4.8</td>\n",
389
- " <td>96</td>\n",
390
- " <td>0.6</td>\n",
391
- " <td>0.6</td>\n",
392
- " <td>0.0</td>\n",
393
- " <td>53</td>\n",
394
- " <td>100</td>\n",
395
- " <td>18.4</td>\n",
396
- " <td>39.2</td>\n",
397
- " </tr>\n",
398
- " <tr>\n",
399
- " <th>3</th>\n",
400
- " <td>2024-01-01</td>\n",
401
- " <td>2024-01-01 03:00:00</td>\n",
402
- " <td>4.3</td>\n",
403
- " <td>96</td>\n",
404
- " <td>0.8</td>\n",
405
- " <td>0.8</td>\n",
406
- " <td>0.0</td>\n",
407
- " <td>53</td>\n",
408
- " <td>100</td>\n",
409
- " <td>16.7</td>\n",
410
- " <td>33.8</td>\n",
411
- " </tr>\n",
412
- " <tr>\n",
413
- " <th>4</th>\n",
414
- " <td>2024-01-01</td>\n",
415
- " <td>2024-01-01 04:00:00</td>\n",
416
- " <td>4.4</td>\n",
417
- " <td>97</td>\n",
418
- " <td>0.3</td>\n",
419
- " <td>0.3</td>\n",
420
- " <td>0.0</td>\n",
421
- " <td>51</td>\n",
422
- " <td>100</td>\n",
423
- " <td>15.4</td>\n",
424
- " <td>30.2</td>\n",
425
- " </tr>\n",
426
- " </tbody>\n",
427
- "</table>\n",
428
- "</div>"
429
- ],
430
- "text/plain": [
431
- " date time temperature_2m relative_humidity_2m \\\n",
432
- "0 2024-01-01 2024-01-01 00:00:00 4.8 95 \n",
433
- "1 2024-01-01 2024-01-01 01:00:00 4.9 95 \n",
434
- "2 2024-01-01 2024-01-01 02:00:00 4.8 96 \n",
435
- "3 2024-01-01 2024-01-01 03:00:00 4.3 96 \n",
436
- "4 2024-01-01 2024-01-01 04:00:00 4.4 97 \n",
437
- "\n",
438
- " precipitation rain snowfall weather_code cloud_cover wind_speed_10m \\\n",
439
- "0 1.8 1.8 0.0 61 100 23.6 \n",
440
- "1 1.2 1.2 0.0 55 100 21.6 \n",
441
- "2 0.6 0.6 0.0 53 100 18.4 \n",
442
- "3 0.8 0.8 0.0 53 100 16.7 \n",
443
- "4 0.3 0.3 0.0 51 100 15.4 \n",
444
- "\n",
445
- " wind_gusts_10m \n",
446
- "0 49.0 \n",
447
- "1 43.2 \n",
448
- "2 39.2 \n",
449
- "3 33.8 \n",
450
- "4 30.2 "
451
- ]
452
- },
453
- "execution_count": 16,
454
- "metadata": {},
455
- "output_type": "execute_result"
456
- }
457
- ],
458
- "source": [
459
- "# Display the first 5 rows of the dataframe\n",
460
- "weather_df.head()"
461
- ]
462
- },
463
- {
464
- "cell_type": "markdown",
465
- "metadata": {},
466
- "source": [
467
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
468
- ]
469
- },
470
- {
471
- "cell_type": "code",
472
- "execution_count": null,
473
- "metadata": {},
474
- "outputs": [],
475
- "source": [
476
- "import hopsworks\n",
477
- "\n",
478
- "project = hopsworks.login()\n",
479
- "\n",
480
- "fs = project.get_feature_store()"
481
- ]
482
- },
483
- {
484
- "cell_type": "code",
485
- "execution_count": null,
486
- "metadata": {},
487
- "outputs": [],
488
- "source": [
489
- "# Retrieve feature groups\n",
490
- "weather_fg = fs.get_feature_group(\n",
491
- " name=\"weather_measurements\",\n",
492
- " version=1,\n",
493
- ")\n",
494
- "\n",
495
- "electricity_fg = fs.get_feature_group(\n",
496
- " name=\"electricity_prices\",\n",
497
- " version=1,\n",
498
- ")"
499
- ]
500
- },
501
- {
502
- "cell_type": "markdown",
503
- "metadata": {},
504
- "source": [
505
- "### <span style=\"color:#2656a3;\"> ⬆️ Uploading new data to the Feature Store"
506
- ]
507
- },
508
- {
509
- "cell_type": "code",
510
- "execution_count": null,
511
- "metadata": {},
512
- "outputs": [],
513
- "source": [
514
- "# Inserting the weather_df into the feature group named weather_fg\n",
515
- "weather_fg.insert(weather_df)"
516
- ]
517
- },
518
- {
519
- "cell_type": "code",
520
- "execution_count": null,
521
- "metadata": {},
522
- "outputs": [],
523
- "source": [
524
- "# Inserting the electricity_df into the feature group named electricity_fg\n",
525
- "electricity_fg.insert(electricity_df)"
526
- ]
527
- },
528
- {
529
- "cell_type": "markdown",
530
- "metadata": {},
531
- "source": [
532
- "---\n",
533
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 03: Traning </span>\n",
534
- "\n",
535
- "In the next notebook, you will be generating new data for the Feature Groups."
536
- ]
537
- }
538
- ],
539
- "metadata": {
540
- "kernelspec": {
541
- "display_name": "bds-mlops",
542
- "language": "python",
543
- "name": "python3"
544
- },
545
- "language_info": {
546
- "codemirror_mode": {
547
- "name": "ipython",
548
- "version": 3
549
- },
550
- "file_extension": ".py",
551
- "mimetype": "text/x-python",
552
- "name": "python",
553
- "nbconvert_exporter": "python",
554
- "pygments_lexer": "ipython3",
555
- "version": "3.11.8"
556
- },
557
- "orig_nbformat": 4
558
- },
559
- "nbformat": 4,
560
- "nbformat_minor": 2
561
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/Old/3_training_pipeline copy.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
hide/Old/3_training_pipeline_OLD.ipynb DELETED
@@ -1,349 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Feature selection.\n",
16
- "2. Feature transformations.\n",
17
- "3. Training datasets creation.\n",
18
- "4. Loading the training data.\n",
19
- "5. Train the model.\n",
20
- "6. Register model to Hopsworks model registry."
21
- ]
22
- },
23
- {
24
- "cell_type": "markdown",
25
- "metadata": {},
26
- "source": [
27
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 1,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "!pip install tensorflow --quiet"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 2,
42
- "metadata": {},
43
- "outputs": [
44
- {
45
- "name": "stderr",
46
- "output_type": "stream",
47
- "text": [
48
- "2024-04-16 16:06:19.917866: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
49
- "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
50
- ]
51
- }
52
- ],
53
- "source": [
54
- "import inspect \n",
55
- "import datetime\n",
56
- "\n",
57
- "import pandas as pd\n",
58
- "import numpy as np\n",
59
- "import matplotlib.pyplot as plt\n",
60
- "import tensorflow as tf\n",
61
- "\n",
62
- "#ignore warnings\n",
63
- "import warnings\n",
64
- "warnings.filterwarnings('ignore')"
65
- ]
66
- },
67
- {
68
- "cell_type": "markdown",
69
- "metadata": {},
70
- "source": [
71
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
72
- ]
73
- },
74
- {
75
- "cell_type": "code",
76
- "execution_count": 3,
77
- "metadata": {},
78
- "outputs": [
79
- {
80
- "name": "stdout",
81
- "output_type": "stream",
82
- "text": [
83
- "Connected. Call `.close()` to terminate connection gracefully.\n",
84
- "\n",
85
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/550040\n",
86
- "Connected. Call `.close()` to terminate connection gracefully.\n"
87
- ]
88
- }
89
- ],
90
- "source": [
91
- "import hopsworks\n",
92
- "\n",
93
- "project = hopsworks.login()\n",
94
- "\n",
95
- "fs = project.get_feature_store() "
96
- ]
97
- },
98
- {
99
- "cell_type": "code",
100
- "execution_count": 4,
101
- "metadata": {},
102
- "outputs": [],
103
- "source": [
104
- "# Retrieve feature groups\n",
105
- "electricity_fg = fs.get_feature_group(\n",
106
- " name='electricity_prices',\n",
107
- " version=1,\n",
108
- ")\n",
109
- "\n",
110
- "weather_fg = fs.get_feature_group(\n",
111
- " name='weather_measurements',\n",
112
- " version=1,\n",
113
- ")\n",
114
- "\n",
115
- "danish_holidays_fg = fs.get_feature_group(\n",
116
- " name='danish_holidays',\n",
117
- " version=1,\n",
118
- ")"
119
- ]
120
- },
121
- {
122
- "cell_type": "markdown",
123
- "metadata": {},
124
- "source": [
125
- "## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
126
- "\n",
127
- "Let's start by selecting all the features you want to include for model training/inference."
128
- ]
129
- },
130
- {
131
- "cell_type": "code",
132
- "execution_count": 5,
133
- "metadata": {},
134
- "outputs": [],
135
- "source": [
136
- "# Select features for training data\n",
137
- "selected_features = electricity_fg.select_all()\\\n",
138
- " .join(\n",
139
- " weather_fg\\\n",
140
- " .select_except([\"timestamp\"])\n",
141
- " )\\\n",
142
- " .join(\n",
143
- " danish_holidays_fg.select_all()\n",
144
- " )"
145
- ]
146
- },
147
- {
148
- "cell_type": "markdown",
149
- "metadata": {},
150
- "source": [
151
- "### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
152
- "\n",
153
- "Hopsworks Feature Store provides functionality to attach transformation functions to feature views and comes with built-in transformation functions such as `min_max_scaler`, `standard_scaler`, `robust_scaler` and `label_encoder`.\n",
154
- "\n",
155
- "You will preprocess your data using *min-max scaling* on numerical features and *label encoding* on categorical features. To do this you simply define a mapping between our features and transformation functions. This ensures that transformation functions such as *min-max scaling* are fitted only on the training data (and not the validation/test data), which ensures that there is no data leakage."
156
- ]
157
- },
158
- {
159
- "cell_type": "code",
160
- "execution_count": 6,
161
- "metadata": {},
162
- "outputs": [],
163
- "source": [
164
- "transformation_functions = {\n",
165
- " \"SpotPriceDKK_KWH\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
166
- " \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
167
- " \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
168
- " \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
169
- " \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
170
- " \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
171
- " \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
172
- " \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
173
- " \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
174
- " \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\")\n",
175
- " }"
176
- ]
177
- },
178
- {
179
- "cell_type": "markdown",
180
- "metadata": {},
181
- "source": [
182
- "`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
183
- "\n",
184
- "The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
185
- "\n",
186
- "In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
187
- "\n",
188
- "We can specify next parameters:\n",
189
- "\n",
190
- "- `name` - name of a feature group.\n",
191
- "\n",
192
- "- `version` - version of a feature group.\n",
193
- "\n",
194
- "- `labels`- our target variable.\n",
195
- "\n",
196
- "- `transformation_functions` - functions to transform our features.\n",
197
- "\n",
198
- "- `query` - query object with data."
199
- ]
200
- },
201
- {
202
- "cell_type": "code",
203
- "execution_count": 7,
204
- "metadata": {},
205
- "outputs": [],
206
- "source": [
207
- "feature_view = fs.get_or_create_feature_view(\n",
208
- " name='electricity_feature_view',\n",
209
- " version=1,\n",
210
- " labels=[], # you will define our 'y' later manualy\n",
211
- " transformation_functions=transformation_functions,\n",
212
- " query=selected_features,\n",
213
- ")"
214
- ]
215
- },
216
- {
217
- "cell_type": "markdown",
218
- "metadata": {},
219
- "source": [
220
- "## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>"
221
- ]
222
- },
223
- {
224
- "cell_type": "markdown",
225
- "metadata": {},
226
- "source": [
227
- "### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
228
- ]
229
- },
230
- {
231
- "cell_type": "code",
232
- "execution_count": null,
233
- "metadata": {},
234
- "outputs": [],
235
- "source": [
236
- "# since you didn't specify 'labels' in feature view creation, it will return None for Y.\n",
237
- "X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
238
- " train_start=\"2021-01-01\",\n",
239
- " train_end=\"2022-02-28\",\n",
240
- " validation_start=\"2022-03-01\",\n",
241
- " validation_end=\"2022-05-31\",\n",
242
- " test_start=\"2022-06-01\",\n",
243
- " test_end=\"2022-09-09\",\n",
244
- " description='Electricity price prediction dataset',\n",
245
- ")"
246
- ]
247
- },
248
- {
249
- "cell_type": "code",
250
- "execution_count": null,
251
- "metadata": {},
252
- "outputs": [],
253
- "source": [
254
- "# Sorting the training, validation, and test datasets based on the 'time' column\n",
255
- "X_train.sort_values([\"time\"], inplace=True)\n",
256
- "X_val.sort_values([\"time\"], inplace=True)\n",
257
- "X_test.sort_values([\"time\"], inplace=True)"
258
- ]
259
- },
260
- {
261
- "cell_type": "code",
262
- "execution_count": null,
263
- "metadata": {},
264
- "outputs": [],
265
- "source": [
266
- "# Define 'y_train', 'y_val' and 'y_test'\n",
267
- "y_train = X_train[[\"SpotPriceDKK_KWH\"]]\n",
268
- "y_val = X_val[[\"SpotPriceDKK_KWH\"]]\n",
269
- "y_test = X_test[[\"SpotPriceDKK_KWH\"]]"
270
- ]
271
- },
272
- {
273
- "cell_type": "code",
274
- "execution_count": null,
275
- "metadata": {},
276
- "outputs": [],
277
- "source": [
278
- "# Dropping the 'day' and 'timestamp' columns from the training, validation, and test datasets\n",
279
- "X_train.drop([\"date\", \"time\"], axis=1, inplace=True)\n",
280
- "X_val.drop([\"date\", \"time\"], axis=1, inplace=True)\n",
281
- "X_test.drop([\"date\", \"time\"], axis=1, inplace=True)"
282
- ]
283
- },
284
- {
285
- "cell_type": "code",
286
- "execution_count": null,
287
- "metadata": {},
288
- "outputs": [],
289
- "source": [
290
- "# Displaying the first 5 rows of the test dataset (X_test)\n",
291
- "X_test.head()"
292
- ]
293
- },
294
- {
295
- "cell_type": "markdown",
296
- "metadata": {},
297
- "source": [
298
- "## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
299
- ]
300
- },
301
- {
302
- "cell_type": "markdown",
303
- "metadata": {},
304
- "source": [
305
- "## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
306
- ]
307
- },
308
- {
309
- "cell_type": "markdown",
310
- "metadata": {},
311
- "source": [
312
- "## <span style='color:#2656a3'>🗄 Model Registry</span>"
313
- ]
314
- },
315
- {
316
- "cell_type": "markdown",
317
- "metadata": {},
318
- "source": [
319
- "---\n",
320
- "\n",
321
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
322
- "\n",
323
- "In the next notebook you will use your registered model to predict batch data."
324
- ]
325
- }
326
- ],
327
- "metadata": {
328
- "kernelspec": {
329
- "display_name": "bds-mlops",
330
- "language": "python",
331
- "name": "python3"
332
- },
333
- "language_info": {
334
- "codemirror_mode": {
335
- "name": "ipython",
336
- "version": 3
337
- },
338
- "file_extension": ".py",
339
- "mimetype": "text/x-python",
340
- "name": "python",
341
- "nbconvert_exporter": "python",
342
- "pygments_lexer": "ipython3",
343
- "version": "3.11.8"
344
- },
345
- "orig_nbformat": 4
346
- },
347
- "nbformat": 4,
348
- "nbformat_minor": 2
349
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/Old/4_batch_inference_OLD.ipynb DELETED
@@ -1,80 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 04: Batch Inference</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "\n",
16
- "1. Load batch data.\n",
17
- "2. Predict using model from Model Registry."
18
- ]
19
- },
20
- {
21
- "cell_type": "markdown",
22
- "metadata": {},
23
- "source": [
24
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages\n",
25
- "\n",
26
- "First, we'll install the Python packages required for this notebook. We'll use the --quiet command after specifying the names of the libraries to ensure a silent installation process. Then, we'll proceed to import all the necessary libraries."
27
- ]
28
- },
29
- {
30
- "cell_type": "markdown",
31
- "metadata": {},
32
- "source": [
33
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
34
- ]
35
- },
36
- {
37
- "cell_type": "markdown",
38
- "metadata": {},
39
- "source": [
40
- "## <span style='color:#2656a3'> ⚙️ Feature View Retrieval"
41
- ]
42
- },
43
- {
44
- "cell_type": "markdown",
45
- "metadata": {},
46
- "source": [
47
- "## <span style='color:#2656a3'> 🗄 Model Registry"
48
- ]
49
- },
50
- {
51
- "cell_type": "markdown",
52
- "metadata": {},
53
- "source": [
54
- "## <span style='color:#2656a3'> 📮 Retrieving model from Model Registry"
55
- ]
56
- },
57
- {
58
- "cell_type": "markdown",
59
- "metadata": {},
60
- "source": [
61
- "## <span style='color:#2656a3'> ✨ Load Batch Data"
62
- ]
63
- },
64
- {
65
- "cell_type": "markdown",
66
- "metadata": {},
67
- "source": [
68
- "## <span style='color:#2656a3'> 🤖 Making the predictions"
69
- ]
70
- }
71
- ],
72
- "metadata": {
73
- "language_info": {
74
- "name": "python"
75
- },
76
- "orig_nbformat": 4
77
- },
78
- "nbformat": 4,
79
- "nbformat_minor": 2
80
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/Old/predict_example.py DELETED
@@ -1,33 +0,0 @@
1
- import os
2
- import numpy as np
3
- import pandas as pd
4
- import hsfs
5
- import joblib
6
-
7
-
8
- class Predict(object):
9
-
10
- def __init__(self):
11
- """ Initializes the serving state, reads a trained model"""
12
- # get feature store handle
13
- fs_conn = hsfs.connection()
14
- self.fs = fs_conn.get_feature_store()
15
-
16
- # get feature view
17
- self.fv = self.fs.get_feature_view("electricity_feature_view", 1)
18
-
19
- # initialize serving
20
- self.fv.init_serving(1)
21
-
22
- # load the trained model
23
- self.model = joblib.load(os.environ["ARTIFACT_FILES_PATH"] + "/dk_electricity_model.pkl")
24
- print("Initialization Complete")
25
-
26
-
27
- def predict(self, timestamp_value, date_value):
28
- """ Serves a prediction request usign a trained model"""
29
- # Retrieve feature vectors
30
- feature_vector = self.fv.get_feature_vector(
31
- entry = {['timestamp','date']: [timestamp_value[0], date_value[0]]}
32
- )
33
- return self.model.predict(np.asarray(feature_vector[1:]).reshape(1, -1)).tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/notebooks_dev/3_training_pipeline_dev_prophet.ipynb DELETED
@@ -1,943 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Feature selection.\n",
16
- "2. Feature transformations.\n",
17
- "3. Training datasets creation.\n",
18
- "4. Loading the training data.\n",
19
- "5. Train the model.\n",
20
- "6. Register model to Hopsworks model registry."
21
- ]
22
- },
23
- {
24
- "cell_type": "markdown",
25
- "metadata": {},
26
- "source": [
27
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 1,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "!pip install tensorflow --quiet"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 2,
42
- "metadata": {},
43
- "outputs": [],
44
- "source": [
45
- "# Importing the packages for the needed libraries for the Jupyter notebook\n",
46
- "import inspect \n",
47
- "import datetime\n",
48
- "\n",
49
- "import pandas as pd\n",
50
- "import numpy as np\n",
51
- "import matplotlib.pyplot as plt\n",
52
- "import tensorflow as tf\n",
53
- "\n",
54
- "#ignore warnings\n",
55
- "import warnings\n",
56
- "warnings.filterwarnings('ignore')"
57
- ]
58
- },
59
- {
60
- "cell_type": "markdown",
61
- "metadata": {},
62
- "source": [
63
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
64
- ]
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": 3,
69
- "metadata": {},
70
- "outputs": [
71
- {
72
- "name": "stdout",
73
- "output_type": "stream",
74
- "text": [
75
- "Connected. Call `.close()` to terminate connection gracefully.\n",
76
- "\n",
77
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
78
- "Connected. Call `.close()` to terminate connection gracefully.\n"
79
- ]
80
- }
81
- ],
82
- "source": [
83
- "# Importing the hopsworks module\n",
84
- "import hopsworks\n",
85
- "\n",
86
- "# Logging in to the Hopsworks project\n",
87
- "project = hopsworks.login()\n",
88
- "\n",
89
- "# Getting the feature store from the project\n",
90
- "fs = project.get_feature_store() "
91
- ]
92
- },
93
- {
94
- "cell_type": "code",
95
- "execution_count": 4,
96
- "metadata": {},
97
- "outputs": [],
98
- "source": [
99
- "# Retrieve the feature groups\n",
100
- "electricity_fg = fs.get_feature_group(\n",
101
- " name='electricity_prices',\n",
102
- " version=1,\n",
103
- ")\n",
104
- "\n",
105
- "weather_fg = fs.get_feature_group(\n",
106
- " name='weather_measurements',\n",
107
- " version=1,\n",
108
- ")\n",
109
- "\n",
110
- "danish_holidays_fg = fs.get_feature_group(\n",
111
- " name='danish_holidayss',\n",
112
- " version=1,\n",
113
- ")\n",
114
- "forecast_renewable_energy_fg = fs.get_feature_group(\n",
115
- " name='forecast_renewable_energy',\n",
116
- " version=1\n",
117
- ")"
118
- ]
119
- },
120
- {
121
- "cell_type": "markdown",
122
- "metadata": {},
123
- "source": [
124
- "## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
125
- "\n",
126
- "We first select the features that we want to include for model training.\n",
127
- "\n",
128
- "Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
129
- ]
130
- },
131
- {
132
- "cell_type": "markdown",
133
- "metadata": {},
134
- "source": [
135
- "hmmm skal 'time' egentlig være 'date'???"
136
- ]
137
- },
138
- {
139
- "cell_type": "code",
140
- "execution_count": 5,
141
- "metadata": {},
142
- "outputs": [],
143
- "source": [
144
- "# Select features for training data\n",
145
- "selected_features = electricity_fg.select_all()\\\n",
146
- " .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
147
- " .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
148
- " .join(danish_holidays_fg.select_all())"
149
- ]
150
- },
151
- {
152
- "cell_type": "code",
153
- "execution_count": 6,
154
- "metadata": {},
155
- "outputs": [],
156
- "source": [
157
- "# Uncomment this if you would like to view your selected features\n",
158
- "# selected_features.show(5)"
159
- ]
160
- },
161
- {
162
- "cell_type": "markdown",
163
- "metadata": {},
164
- "source": [
165
- "### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
166
- "\n",
167
- "We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
168
- "To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
169
- "\n",
170
- "To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
171
- ]
172
- },
173
- {
174
- "cell_type": "code",
175
- "execution_count": 7,
176
- "metadata": {},
177
- "outputs": [],
178
- "source": [
179
- "# Defining transformation functions for feature scaling and encoding\n",
180
- "transformation_functions = {\n",
181
- " \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
182
- " \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
183
- " \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
184
- " \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
185
- " \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
186
- " \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
187
- " \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
188
- " \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
189
- " \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
190
- " \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
191
- " \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
192
- " \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
193
- " \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
194
- " \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
195
- " }"
196
- ]
197
- },
198
- {
199
- "cell_type": "markdown",
200
- "metadata": {},
201
- "source": [
202
- "`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
203
- "\n",
204
- "The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
205
- "\n",
206
- "In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
207
- "\n",
208
- "We can specify next parameters:\n",
209
- "\n",
210
- "- `name` - name of a feature group.\n",
211
- "\n",
212
- "- `version` - version of a feature group.\n",
213
- "\n",
214
- "- `labels`- our target variable.\n",
215
- "\n",
216
- "- `transformation_functions` - functions to transform our features.\n",
217
- "\n",
218
- "- `query` - query object with data."
219
- ]
220
- },
221
- {
222
- "cell_type": "markdown",
223
- "metadata": {},
224
- "source": [
225
- "ved ikke om den her omformulering af botten går an?"
226
- ]
227
- },
228
- {
229
- "cell_type": "markdown",
230
- "metadata": {},
231
- "source": [
232
- "`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
233
- "\n",
234
- "Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
235
- "\n",
236
- "To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
237
- "\n",
238
- "- `name`: The name of the feature group.\n",
239
- "\n",
240
- "- `version`: The version of the feature group.\n",
241
- "\n",
242
- "- `labels`: Our target variable.\n",
243
- "\n",
244
- "- `transformation_functions`: Functions to transform our features.\n",
245
- "\n",
246
- "- `query`: A query object containing the relevant data."
247
- ]
248
- },
249
- {
250
- "cell_type": "code",
251
- "execution_count": 8,
252
- "metadata": {},
253
- "outputs": [],
254
- "source": [
255
- "# Getting or creating a feature view named 'electricity_feature_view'\n",
256
- "version = 1 # Defining the version for the feature view\n",
257
- "feature_view = fs.get_or_create_feature_view(\n",
258
- " name='electricity_feature_view',\n",
259
- " version=version,\n",
260
- " labels=[], # Labels will be defined manually later for our 'y'\n",
261
- " transformation_functions=transformation_functions,\n",
262
- " query=selected_features,\n",
263
- ")"
264
- ]
265
- },
266
- {
267
- "cell_type": "markdown",
268
- "metadata": {},
269
- "source": [
270
- "## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
271
- "\n",
272
- "In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
273
- "\n",
274
- "**Training Dataset may contain splits such as:** \n",
275
- "* Training set - the subset of training data used to train a model.\n",
276
- "* Validation set - the subset of training data used to evaluate hparams when training a model\n",
277
- "* Test set - the holdout subset of training data used to evaluate a mode\n",
278
- "\n",
279
- "Training dataset is created using `fs.create_training_dataset()` method.\n",
280
- "\n",
281
- "**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
282
- ]
283
- },
284
- {
285
- "cell_type": "markdown",
286
- "metadata": {},
287
- "source": [
288
- "### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
289
- ]
290
- },
291
- {
292
- "cell_type": "code",
293
- "execution_count": 9,
294
- "metadata": {},
295
- "outputs": [
296
- {
297
- "name": "stdout",
298
- "output_type": "stream",
299
- "text": [
300
- "Finished: Reading data from Hopsworks, using ArrowFlight (199.29s) \n"
301
- ]
302
- },
303
- {
304
- "name": "stderr",
305
- "output_type": "stream",
306
- "text": [
307
- "VersionWarning: Incremented version to `19`.\n"
308
- ]
309
- }
310
- ],
311
- "source": [
312
- "# Splitting the feature view data into train, validation, and test sets\n",
313
- "# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
314
- "X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
315
- " train_start=\"2022-01-01\",\n",
316
- " train_end=\"2023-06-30\",\n",
317
- " validation_start=\"2023-07-01\",\n",
318
- " validation_end=\"2023-09-30\",\n",
319
- " test_start=\"2023-10-01\",\n",
320
- " test_end=\"2023-12-31\",\n",
321
- " description='Electricity price prediction dataset',\n",
322
- ")"
323
- ]
324
- },
325
- {
326
- "cell_type": "code",
327
- "execution_count": 10,
328
- "metadata": {},
329
- "outputs": [],
330
- "source": [
331
- "# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
332
- "X_train.sort_values([\"timestamp\"], inplace=True)\n",
333
- "X_val.sort_values([\"timestamp\"], inplace=True)\n",
334
- "X_test.sort_values([\"timestamp\"], inplace=True)"
335
- ]
336
- },
337
- {
338
- "cell_type": "code",
339
- "execution_count": 11,
340
- "metadata": {},
341
- "outputs": [],
342
- "source": [
343
- "# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
344
- "y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
345
- "y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
346
- "y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
347
- ]
348
- },
349
- {
350
- "cell_type": "code",
351
- "execution_count": null,
352
- "metadata": {},
353
- "outputs": [],
354
- "source": [
355
- "# # Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
356
- "# X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
357
- "# X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
358
- "# X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
359
- ]
360
- },
361
- {
362
- "cell_type": "code",
363
- "execution_count": null,
364
- "metadata": {},
365
- "outputs": [],
366
- "source": [
367
- "# # Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
368
- "# X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
369
- "# X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
370
- "# X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
371
- ]
372
- },
373
- {
374
- "cell_type": "code",
375
- "execution_count": 12,
376
- "metadata": {},
377
- "outputs": [
378
- {
379
- "data": {
380
- "text/html": [
381
- "<div>\n",
382
- "<style scoped>\n",
383
- " .dataframe tbody tr th:only-of-type {\n",
384
- " vertical-align: middle;\n",
385
- " }\n",
386
- "\n",
387
- " .dataframe tbody tr th {\n",
388
- " vertical-align: top;\n",
389
- " }\n",
390
- "\n",
391
- " .dataframe thead th {\n",
392
- " text-align: right;\n",
393
- " }\n",
394
- "</style>\n",
395
- "<table border=\"1\" class=\"dataframe\">\n",
396
- " <thead>\n",
397
- " <tr style=\"text-align: right;\">\n",
398
- " <th></th>\n",
399
- " <th>timestamp</th>\n",
400
- " <th>time</th>\n",
401
- " <th>date</th>\n",
402
- " <th>dk1_spotpricedkk_kwh</th>\n",
403
- " <th>temperature_2m</th>\n",
404
- " <th>relative_humidity_2m</th>\n",
405
- " <th>precipitation</th>\n",
406
- " <th>rain</th>\n",
407
- " <th>snowfall</th>\n",
408
- " <th>weather_code</th>\n",
409
- " <th>cloud_cover</th>\n",
410
- " <th>wind_speed_10m</th>\n",
411
- " <th>wind_gusts_10m</th>\n",
412
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
413
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
414
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
415
- " <th>type</th>\n",
416
- " </tr>\n",
417
- " </thead>\n",
418
- " <tbody>\n",
419
- " <tr>\n",
420
- " <th>5905751</th>\n",
421
- " <td>1640995200000</td>\n",
422
- " <td>2022-01-01 00:00:00+00:00</td>\n",
423
- " <td>2022-01-01</td>\n",
424
- " <td>0.179988</td>\n",
425
- " <td>0.435268</td>\n",
426
- " <td>0.986667</td>\n",
427
- " <td>0.011364</td>\n",
428
- " <td>0.011364</td>\n",
429
- " <td>0.0</td>\n",
430
- " <td>0.68</td>\n",
431
- " <td>1.0</td>\n",
432
- " <td>0.315152</td>\n",
433
- " <td>0.272633</td>\n",
434
- " <td>0.945277</td>\n",
435
- " <td>0.481878</td>\n",
436
- " <td>0.000000</td>\n",
437
- " <td>1</td>\n",
438
- " </tr>\n",
439
- " <tr>\n",
440
- " <th>19398</th>\n",
441
- " <td>1640995200000</td>\n",
442
- " <td>2022-01-01 00:00:00+00:00</td>\n",
443
- " <td>2022-01-01</td>\n",
444
- " <td>0.179988</td>\n",
445
- " <td>0.435268</td>\n",
446
- " <td>0.986667</td>\n",
447
- " <td>0.011364</td>\n",
448
- " <td>0.011364</td>\n",
449
- " <td>0.0</td>\n",
450
- " <td>0.68</td>\n",
451
- " <td>1.0</td>\n",
452
- " <td>0.315152</td>\n",
453
- " <td>0.272633</td>\n",
454
- " <td>0.934795</td>\n",
455
- " <td>0.446702</td>\n",
456
- " <td>0.000008</td>\n",
457
- " <td>1</td>\n",
458
- " </tr>\n",
459
- " <tr>\n",
460
- " <th>5919627</th>\n",
461
- " <td>1640995200000</td>\n",
462
- " <td>2022-01-01 00:00:00+00:00</td>\n",
463
- " <td>2022-01-01</td>\n",
464
- " <td>0.179988</td>\n",
465
- " <td>0.417411</td>\n",
466
- " <td>0.933333</td>\n",
467
- " <td>0.000000</td>\n",
468
- " <td>0.000000</td>\n",
469
- " <td>0.0</td>\n",
470
- " <td>0.04</td>\n",
471
- " <td>1.0</td>\n",
472
- " <td>0.082828</td>\n",
473
- " <td>0.074922</td>\n",
474
- " <td>0.773045</td>\n",
475
- " <td>0.264375</td>\n",
476
- " <td>0.000018</td>\n",
477
- " <td>1</td>\n",
478
- " </tr>\n",
479
- " <tr>\n",
480
- " <th>4719247</th>\n",
481
- " <td>1640995200000</td>\n",
482
- " <td>2022-01-01 00:00:00+00:00</td>\n",
483
- " <td>2022-01-01</td>\n",
484
- " <td>0.179988</td>\n",
485
- " <td>0.426339</td>\n",
486
- " <td>0.933333</td>\n",
487
- " <td>0.000000</td>\n",
488
- " <td>0.000000</td>\n",
489
- " <td>0.0</td>\n",
490
- " <td>0.04</td>\n",
491
- " <td>1.0</td>\n",
492
- " <td>0.195960</td>\n",
493
- " <td>0.187305</td>\n",
494
- " <td>0.913059</td>\n",
495
- " <td>0.358547</td>\n",
496
- " <td>0.000012</td>\n",
497
- " <td>1</td>\n",
498
- " </tr>\n",
499
- " <tr>\n",
500
- " <th>4743896</th>\n",
501
- " <td>1640995200000</td>\n",
502
- " <td>2022-01-01 00:00:00+00:00</td>\n",
503
- " <td>2022-01-01</td>\n",
504
- " <td>0.179988</td>\n",
505
- " <td>0.417411</td>\n",
506
- " <td>0.933333</td>\n",
507
- " <td>0.000000</td>\n",
508
- " <td>0.000000</td>\n",
509
- " <td>0.0</td>\n",
510
- " <td>0.04</td>\n",
511
- " <td>1.0</td>\n",
512
- " <td>0.082828</td>\n",
513
- " <td>0.074922</td>\n",
514
- " <td>0.493641</td>\n",
515
- " <td>0.133456</td>\n",
516
- " <td>0.005406</td>\n",
517
- " <td>1</td>\n",
518
- " </tr>\n",
519
- " </tbody>\n",
520
- "</table>\n",
521
- "</div>"
522
- ],
523
- "text/plain": [
524
- " timestamp time date \\\n",
525
- "5905751 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
526
- "19398 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
527
- "5919627 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
528
- "4719247 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
529
- "4743896 1640995200000 2022-01-01 00:00:00+00:00 2022-01-01 \n",
530
- "\n",
531
- " dk1_spotpricedkk_kwh temperature_2m relative_humidity_2m \\\n",
532
- "5905751 0.179988 0.435268 0.986667 \n",
533
- "19398 0.179988 0.435268 0.986667 \n",
534
- "5919627 0.179988 0.417411 0.933333 \n",
535
- "4719247 0.179988 0.426339 0.933333 \n",
536
- "4743896 0.179988 0.417411 0.933333 \n",
537
- "\n",
538
- " precipitation rain snowfall weather_code cloud_cover \\\n",
539
- "5905751 0.011364 0.011364 0.0 0.68 1.0 \n",
540
- "19398 0.011364 0.011364 0.0 0.68 1.0 \n",
541
- "5919627 0.000000 0.000000 0.0 0.04 1.0 \n",
542
- "4719247 0.000000 0.000000 0.0 0.04 1.0 \n",
543
- "4743896 0.000000 0.000000 0.0 0.04 1.0 \n",
544
- "\n",
545
- " wind_speed_10m wind_gusts_10m \\\n",
546
- "5905751 0.315152 0.272633 \n",
547
- "19398 0.315152 0.272633 \n",
548
- "5919627 0.082828 0.074922 \n",
549
- "4719247 0.195960 0.187305 \n",
550
- "4743896 0.082828 0.074922 \n",
551
- "\n",
552
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
553
- "5905751 0.945277 \n",
554
- "19398 0.934795 \n",
555
- "5919627 0.773045 \n",
556
- "4719247 0.913059 \n",
557
- "4743896 0.493641 \n",
558
- "\n",
559
- " dk1_onshore_wind_forecastintraday_kwh \\\n",
560
- "5905751 0.481878 \n",
561
- "19398 0.446702 \n",
562
- "5919627 0.264375 \n",
563
- "4719247 0.358547 \n",
564
- "4743896 0.133456 \n",
565
- "\n",
566
- " dk1_solar_forecastintraday_kwh type \n",
567
- "5905751 0.000000 1 \n",
568
- "19398 0.000008 1 \n",
569
- "5919627 0.000018 1 \n",
570
- "4719247 0.000012 1 \n",
571
- "4743896 0.005406 1 "
572
- ]
573
- },
574
- "execution_count": 12,
575
- "metadata": {},
576
- "output_type": "execute_result"
577
- }
578
- ],
579
- "source": [
580
- "# Displaying the first 5 rows of the train dataset (X_train)\n",
581
- "X_train.head()"
582
- ]
583
- },
584
- {
585
- "cell_type": "code",
586
- "execution_count": 14,
587
- "metadata": {},
588
- "outputs": [
589
- {
590
- "data": {
591
- "text/html": [
592
- "<div>\n",
593
- "<style scoped>\n",
594
- " .dataframe tbody tr th:only-of-type {\n",
595
- " vertical-align: middle;\n",
596
- " }\n",
597
- "\n",
598
- " .dataframe tbody tr th {\n",
599
- " vertical-align: top;\n",
600
- " }\n",
601
- "\n",
602
- " .dataframe thead th {\n",
603
- " text-align: right;\n",
604
- " }\n",
605
- "</style>\n",
606
- "<table border=\"1\" class=\"dataframe\">\n",
607
- " <thead>\n",
608
- " <tr style=\"text-align: right;\">\n",
609
- " <th></th>\n",
610
- " <th>date</th>\n",
611
- " <th>dk1_spotpricedkk_kwh</th>\n",
612
- " </tr>\n",
613
- " </thead>\n",
614
- " <tbody>\n",
615
- " <tr>\n",
616
- " <th>5905751</th>\n",
617
- " <td>2022-01-01</td>\n",
618
- " <td>0.179988</td>\n",
619
- " </tr>\n",
620
- " <tr>\n",
621
- " <th>19398</th>\n",
622
- " <td>2022-01-01</td>\n",
623
- " <td>0.179988</td>\n",
624
- " </tr>\n",
625
- " <tr>\n",
626
- " <th>5919627</th>\n",
627
- " <td>2022-01-01</td>\n",
628
- " <td>0.179988</td>\n",
629
- " </tr>\n",
630
- " <tr>\n",
631
- " <th>4719247</th>\n",
632
- " <td>2022-01-01</td>\n",
633
- " <td>0.179988</td>\n",
634
- " </tr>\n",
635
- " <tr>\n",
636
- " <th>4743896</th>\n",
637
- " <td>2022-01-01</td>\n",
638
- " <td>0.179988</td>\n",
639
- " </tr>\n",
640
- " </tbody>\n",
641
- "</table>\n",
642
- "</div>"
643
- ],
644
- "text/plain": [
645
- " date dk1_spotpricedkk_kwh\n",
646
- "5905751 2022-01-01 0.179988\n",
647
- "19398 2022-01-01 0.179988\n",
648
- "5919627 2022-01-01 0.179988\n",
649
- "4719247 2022-01-01 0.179988\n",
650
- "4743896 2022-01-01 0.179988"
651
- ]
652
- },
653
- "execution_count": 14,
654
- "metadata": {},
655
- "output_type": "execute_result"
656
- }
657
- ],
658
- "source": [
659
- "df = X_train[[\"date\", \"dk1_spotpricedkk_kwh\"]]"
660
- ]
661
- },
662
- {
663
- "cell_type": "code",
664
- "execution_count": 25,
665
- "metadata": {},
666
- "outputs": [
667
- {
668
- "data": {
669
- "text/html": [
670
- "<div>\n",
671
- "<style scoped>\n",
672
- " .dataframe tbody tr th:only-of-type {\n",
673
- " vertical-align: middle;\n",
674
- " }\n",
675
- "\n",
676
- " .dataframe tbody tr th {\n",
677
- " vertical-align: top;\n",
678
- " }\n",
679
- "\n",
680
- " .dataframe thead th {\n",
681
- " text-align: right;\n",
682
- " }\n",
683
- "</style>\n",
684
- "<table border=\"1\" class=\"dataframe\">\n",
685
- " <thead>\n",
686
- " <tr style=\"text-align: right;\">\n",
687
- " <th></th>\n",
688
- " <th>ds</th>\n",
689
- " <th>y</th>\n",
690
- " </tr>\n",
691
- " </thead>\n",
692
- " <tbody>\n",
693
- " <tr>\n",
694
- " <th>5905751</th>\n",
695
- " <td>2022-01-01</td>\n",
696
- " <td>0.179988</td>\n",
697
- " </tr>\n",
698
- " <tr>\n",
699
- " <th>19398</th>\n",
700
- " <td>2022-01-01</td>\n",
701
- " <td>0.179988</td>\n",
702
- " </tr>\n",
703
- " <tr>\n",
704
- " <th>5919627</th>\n",
705
- " <td>2022-01-01</td>\n",
706
- " <td>0.179988</td>\n",
707
- " </tr>\n",
708
- " <tr>\n",
709
- " <th>4719247</th>\n",
710
- " <td>2022-01-01</td>\n",
711
- " <td>0.179988</td>\n",
712
- " </tr>\n",
713
- " <tr>\n",
714
- " <th>4743896</th>\n",
715
- " <td>2022-01-01</td>\n",
716
- " <td>0.179988</td>\n",
717
- " </tr>\n",
718
- " </tbody>\n",
719
- "</table>\n",
720
- "</div>"
721
- ],
722
- "text/plain": [
723
- " ds y\n",
724
- "5905751 2022-01-01 0.179988\n",
725
- "19398 2022-01-01 0.179988\n",
726
- "5919627 2022-01-01 0.179988\n",
727
- "4719247 2022-01-01 0.179988\n",
728
- "4743896 2022-01-01 0.179988"
729
- ]
730
- },
731
- "execution_count": 25,
732
- "metadata": {},
733
- "output_type": "execute_result"
734
- }
735
- ],
736
- "source": [
737
- "df.columns = [\"ds\", \"y\"]\n",
738
- "df.head()"
739
- ]
740
- },
741
- {
742
- "cell_type": "markdown",
743
- "metadata": {},
744
- "source": [
745
- "## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
746
- ]
747
- },
748
- {
749
- "cell_type": "markdown",
750
- "metadata": {},
751
- "source": [
752
- "## <span style=\"color:#2656a3;\">🧬 Modeling Testing</span>"
753
- ]
754
- },
755
- {
756
- "cell_type": "code",
757
- "execution_count": 22,
758
- "metadata": {},
759
- "outputs": [],
760
- "source": [
761
- "from prophet import Prophet"
762
- ]
763
- },
764
- {
765
- "cell_type": "code",
766
- "execution_count": 26,
767
- "metadata": {},
768
- "outputs": [
769
- {
770
- "name": "stderr",
771
- "output_type": "stream",
772
- "text": [
773
- "14:24:30 - cmdstanpy - INFO - Chain [1] start processing\n"
774
- ]
775
- }
776
- ],
777
- "source": [
778
- "m = Prophet(interval_width=0.95, daily_seasonality=True)\n",
779
- "model = m.fit(df)"
780
- ]
781
- },
782
- {
783
- "cell_type": "code",
784
- "execution_count": null,
785
- "metadata": {},
786
- "outputs": [],
787
- "source": [
788
- "future = m.make_future_dataframe(periods=100,freq='D')\n",
789
- "forecast = m.predict(future)\n",
790
- "forecast.head()"
791
- ]
792
- },
793
- {
794
- "cell_type": "code",
795
- "execution_count": null,
796
- "metadata": {},
797
- "outputs": [],
798
- "source": [
799
- "plot1 = m.plot(forecast)\n"
800
- ]
801
- },
802
- {
803
- "cell_type": "markdown",
804
- "metadata": {},
805
- "source": [
806
- "## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
807
- ]
808
- },
809
- {
810
- "cell_type": "code",
811
- "execution_count": null,
812
- "metadata": {},
813
- "outputs": [],
814
- "source": [
815
- "# import pandas as pd\n",
816
- "# import numpy as np\n",
817
- "# import xgboost as xgb\n",
818
- "# from sklearn.metrics import mean_squared_error\n",
819
- "# import os"
820
- ]
821
- },
822
- {
823
- "cell_type": "code",
824
- "execution_count": null,
825
- "metadata": {},
826
- "outputs": [],
827
- "source": [
828
- "# # Initialize the XGBoost regressor\n",
829
- "# model = xgb.XGBRegressor()\n",
830
- "# model_val = xgb.XGBRegressor()"
831
- ]
832
- },
833
- {
834
- "cell_type": "code",
835
- "execution_count": null,
836
- "metadata": {},
837
- "outputs": [],
838
- "source": [
839
- "# # Train the model on the training data\n",
840
- "# model.fit(X_train, y_train)"
841
- ]
842
- },
843
- {
844
- "cell_type": "code",
845
- "execution_count": null,
846
- "metadata": {},
847
- "outputs": [],
848
- "source": [
849
- "# # Make predictions on the validation set\n",
850
- "# y_test_pred = model.predict(X_test)"
851
- ]
852
- },
853
- {
854
- "cell_type": "code",
855
- "execution_count": null,
856
- "metadata": {},
857
- "outputs": [],
858
- "source": [
859
- "# # Calculate RMSE on the validation set\n",
860
- "# mse = mean_squared_error(y_test, y_test_pred, squared=False)\n",
861
- "# print(f\"Mean Squared Error (MSE): {mse}\")"
862
- ]
863
- },
864
- {
865
- "cell_type": "markdown",
866
- "metadata": {},
867
- "source": [
868
- "## <span style='color:#2656a3'>🗄 Model Registry</span>"
869
- ]
870
- },
871
- {
872
- "cell_type": "code",
873
- "execution_count": null,
874
- "metadata": {},
875
- "outputs": [],
876
- "source": [
877
- "# Exporting the trained model to a directory\n",
878
- "model_dir = \"electricity_price_model\"\n",
879
- "print('Exporting trained model to: {}'.format(model_dir))\n",
880
- "\n",
881
- "# Saving the model using TensorFlow's saved_model.save function\n",
882
- "tf.saved_model.save(model, model_dir)"
883
- ]
884
- },
885
- {
886
- "cell_type": "code",
887
- "execution_count": null,
888
- "metadata": {},
889
- "outputs": [],
890
- "source": [
891
- "# Retrieving the Model Registry\n",
892
- "mr = project.get_model_registry()\n",
893
- "\n",
894
- "# Extracting loss value from the training history\n",
895
- "metrics = {'loss': history_dict['val_loss'][0]} \n",
896
- "\n",
897
- "# Creating a TensorFlow model in the Model Registry\n",
898
- "tf_model = mr.tensorflow.create_model(\n",
899
- " name=\"DK_electricity_price_prediction_model\",\n",
900
- " metrics=metrics,\n",
901
- " description=\"Hourly electricity price prediction model.\",\n",
902
- " input_example=n_step_window.example[0].numpy(),\n",
903
- ")\n",
904
- "\n",
905
- "# Saving the model to the specified directory\n",
906
- "tf_model.save(model_dir)"
907
- ]
908
- },
909
- {
910
- "cell_type": "markdown",
911
- "metadata": {},
912
- "source": [
913
- "---\n",
914
- "\n",
915
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
916
- "\n",
917
- "In the next notebook you will use your registered model to predict batch data."
918
- ]
919
- }
920
- ],
921
- "metadata": {
922
- "kernelspec": {
923
- "display_name": "bds-mlops",
924
- "language": "python",
925
- "name": "python3"
926
- },
927
- "language_info": {
928
- "codemirror_mode": {
929
- "name": "ipython",
930
- "version": 3
931
- },
932
- "file_extension": ".py",
933
- "mimetype": "text/x-python",
934
- "name": "python",
935
- "nbconvert_exporter": "python",
936
- "pygments_lexer": "ipython3",
937
- "version": "3.11.9"
938
- },
939
- "orig_nbformat": 4
940
- },
941
- "nbformat": 4,
942
- "nbformat_minor": 2
943
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/notebooks_dev/3_training_pipeline_dev_pytorch.ipynb DELETED
@@ -1,874 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Feature selection.\n",
16
- "2. Feature transformations.\n",
17
- "3. Training datasets creation.\n",
18
- "4. Loading the training data.\n",
19
- "5. Train the model.\n",
20
- "6. Register model to Hopsworks model registry."
21
- ]
22
- },
23
- {
24
- "cell_type": "markdown",
25
- "metadata": {},
26
- "source": [
27
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 1,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "!pip install tensorflow --quiet"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 22,
42
- "metadata": {},
43
- "outputs": [],
44
- "source": [
45
- "# Importing the packages for the needed libraries for the Jupyter notebook\n",
46
- "import inspect \n",
47
- "import datetime\n",
48
- "\n",
49
- "import pandas as pd\n",
50
- "import numpy as np\n",
51
- "import matplotlib.pyplot as plt\n",
52
- "import torch\n",
53
- "import torch.nn as nn\n",
54
- "\n",
55
- "#ignore warnings\n",
56
- "import warnings\n",
57
- "warnings.filterwarnings('ignore')"
58
- ]
59
- },
60
- {
61
- "cell_type": "code",
62
- "execution_count": 2,
63
- "metadata": {},
64
- "outputs": [
65
- {
66
- "data": {
67
- "text/plain": [
68
- "'cuda:0'"
69
- ]
70
- },
71
- "execution_count": 2,
72
- "metadata": {},
73
- "output_type": "execute_result"
74
- }
75
- ],
76
- "source": [
77
- "device = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
78
- "device"
79
- ]
80
- },
81
- {
82
- "cell_type": "markdown",
83
- "metadata": {},
84
- "source": [
85
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
86
- ]
87
- },
88
- {
89
- "cell_type": "code",
90
- "execution_count": 3,
91
- "metadata": {},
92
- "outputs": [
93
- {
94
- "name": "stdout",
95
- "output_type": "stream",
96
- "text": [
97
- "Connected. Call `.close()` to terminate connection gracefully.\n",
98
- "\n",
99
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
100
- "Connected. Call `.close()` to terminate connection gracefully.\n"
101
- ]
102
- }
103
- ],
104
- "source": [
105
- "# Importing the hopsworks module\n",
106
- "import hopsworks\n",
107
- "\n",
108
- "# Logging in to the Hopsworks project\n",
109
- "project = hopsworks.login()\n",
110
- "\n",
111
- "# Getting the feature store from the project\n",
112
- "fs = project.get_feature_store() "
113
- ]
114
- },
115
- {
116
- "cell_type": "code",
117
- "execution_count": 4,
118
- "metadata": {},
119
- "outputs": [],
120
- "source": [
121
- "# Retrieve the feature groups\n",
122
- "electricity_fg = fs.get_feature_group(\n",
123
- " name='electricity_prices',\n",
124
- " version=1,\n",
125
- ")\n",
126
- "\n",
127
- "weather_fg = fs.get_feature_group(\n",
128
- " name='weather_measurements',\n",
129
- " version=1,\n",
130
- ")\n",
131
- "\n",
132
- "danish_holidays_fg = fs.get_feature_group(\n",
133
- " name='danish_holidayss',\n",
134
- " version=1,\n",
135
- ")\n",
136
- "forecast_renewable_energy_fg = fs.get_feature_group(\n",
137
- " name='forecast_renewable_energy',\n",
138
- " version=1\n",
139
- ")"
140
- ]
141
- },
142
- {
143
- "cell_type": "markdown",
144
- "metadata": {},
145
- "source": [
146
- "## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
147
- "\n",
148
- "We first select the features that we want to include for model training.\n",
149
- "\n",
150
- "Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
151
- ]
152
- },
153
- {
154
- "cell_type": "markdown",
155
- "metadata": {},
156
- "source": [
157
- "hmmm skal 'time' egentlig være 'date'???"
158
- ]
159
- },
160
- {
161
- "cell_type": "code",
162
- "execution_count": 5,
163
- "metadata": {},
164
- "outputs": [],
165
- "source": [
166
- "# Select features for training data\n",
167
- "selected_features = electricity_fg.select_all()\\\n",
168
- " .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
169
- " .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
170
- " .join(danish_holidays_fg.select_all())"
171
- ]
172
- },
173
- {
174
- "cell_type": "code",
175
- "execution_count": 7,
176
- "metadata": {},
177
- "outputs": [],
178
- "source": [
179
- "# Uncomment this if you would like to view your selected features\n",
180
- "# selected_features.show(5)"
181
- ]
182
- },
183
- {
184
- "cell_type": "markdown",
185
- "metadata": {},
186
- "source": [
187
- "### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
188
- "\n",
189
- "We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
190
- "To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
191
- "\n",
192
- "To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
193
- ]
194
- },
195
- {
196
- "cell_type": "code",
197
- "execution_count": 6,
198
- "metadata": {},
199
- "outputs": [],
200
- "source": [
201
- "# Defining transformation functions for feature scaling and encoding\n",
202
- "transformation_functions = {\n",
203
- " \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
204
- " \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
205
- " \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
206
- " \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
207
- " \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
208
- " \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
209
- " \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
210
- " \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
211
- " \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
212
- " \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
213
- " \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
214
- " \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
215
- " \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
216
- " \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
217
- " }"
218
- ]
219
- },
220
- {
221
- "cell_type": "markdown",
222
- "metadata": {},
223
- "source": [
224
- "`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
225
- "\n",
226
- "The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
227
- "\n",
228
- "In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
229
- "\n",
230
- "We can specify next parameters:\n",
231
- "\n",
232
- "- `name` - name of a feature group.\n",
233
- "\n",
234
- "- `version` - version of a feature group.\n",
235
- "\n",
236
- "- `labels`- our target variable.\n",
237
- "\n",
238
- "- `transformation_functions` - functions to transform our features.\n",
239
- "\n",
240
- "- `query` - query object with data."
241
- ]
242
- },
243
- {
244
- "cell_type": "markdown",
245
- "metadata": {},
246
- "source": [
247
- "ved ikke om den her omformulering af botten går an?"
248
- ]
249
- },
250
- {
251
- "cell_type": "markdown",
252
- "metadata": {},
253
- "source": [
254
- "`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
255
- "\n",
256
- "Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
257
- "\n",
258
- "To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
259
- "\n",
260
- "- `name`: The name of the feature group.\n",
261
- "\n",
262
- "- `version`: The version of the feature group.\n",
263
- "\n",
264
- "- `labels`: Our target variable.\n",
265
- "\n",
266
- "- `transformation_functions`: Functions to transform our features.\n",
267
- "\n",
268
- "- `query`: A query object containing the relevant data."
269
- ]
270
- },
271
- {
272
- "cell_type": "code",
273
- "execution_count": 7,
274
- "metadata": {},
275
- "outputs": [],
276
- "source": [
277
- "# Getting or creating a feature view named 'electricity_feature_view'\n",
278
- "version = 1 # Defining the version for the feature view\n",
279
- "feature_view = fs.get_or_create_feature_view(\n",
280
- " name='electricity_feature_view',\n",
281
- " version=version,\n",
282
- " labels=[], # Labels will be defined manually later for our 'y'\n",
283
- " transformation_functions=transformation_functions,\n",
284
- " query=selected_features,\n",
285
- ")"
286
- ]
287
- },
288
- {
289
- "cell_type": "markdown",
290
- "metadata": {},
291
- "source": [
292
- "## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
293
- "\n",
294
- "In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
295
- "\n",
296
- "**Training Dataset may contain splits such as:** \n",
297
- "* Training set - the subset of training data used to train a model.\n",
298
- "* Validation set - the subset of training data used to evaluate hparams when training a model\n",
299
- "* Test set - the holdout subset of training data used to evaluate a mode\n",
300
- "\n",
301
- "Training dataset is created using `fs.create_training_dataset()` method.\n",
302
- "\n",
303
- "**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
304
- ]
305
- },
306
- {
307
- "cell_type": "markdown",
308
- "metadata": {},
309
- "source": [
310
- "### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
311
- ]
312
- },
313
- {
314
- "cell_type": "code",
315
- "execution_count": 8,
316
- "metadata": {},
317
- "outputs": [
318
- {
319
- "name": "stdout",
320
- "output_type": "stream",
321
- "text": [
322
- "Finished: Reading data from Hopsworks, using ArrowFlight (211.16s) \n"
323
- ]
324
- },
325
- {
326
- "name": "stderr",
327
- "output_type": "stream",
328
- "text": [
329
- "VersionWarning: Incremented version to `21`.\n"
330
- ]
331
- }
332
- ],
333
- "source": [
334
- "# Splitting the feature view data into train, validation, and test sets\n",
335
- "# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
336
- "X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
337
- " train_start=\"2022-01-01\",\n",
338
- " train_end=\"2023-06-30\",\n",
339
- " validation_start=\"2023-07-01\",\n",
340
- " validation_end=\"2023-09-30\",\n",
341
- " test_start=\"2023-10-01\",\n",
342
- " test_end=\"2023-12-31\",\n",
343
- " description='Electricity price prediction dataset',\n",
344
- ")"
345
- ]
346
- },
347
- {
348
- "cell_type": "code",
349
- "execution_count": 9,
350
- "metadata": {},
351
- "outputs": [],
352
- "source": [
353
- "# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
354
- "X_train.sort_values([\"timestamp\"], inplace=True)\n",
355
- "X_val.sort_values([\"timestamp\"], inplace=True)\n",
356
- "X_test.sort_values([\"timestamp\"], inplace=True)"
357
- ]
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": 10,
362
- "metadata": {},
363
- "outputs": [],
364
- "source": [
365
- "# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
366
- "y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
367
- "y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
368
- "y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
369
- ]
370
- },
371
- {
372
- "cell_type": "code",
373
- "execution_count": 11,
374
- "metadata": {},
375
- "outputs": [],
376
- "source": [
377
- "# Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
378
- "X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
379
- "X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
380
- "X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
381
- ]
382
- },
383
- {
384
- "cell_type": "code",
385
- "execution_count": 12,
386
- "metadata": {},
387
- "outputs": [],
388
- "source": [
389
- "# Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
390
- "X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
391
- "X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
392
- "X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
393
- ]
394
- },
395
- {
396
- "cell_type": "code",
397
- "execution_count": 13,
398
- "metadata": {},
399
- "outputs": [
400
- {
401
- "data": {
402
- "text/html": [
403
- "<div>\n",
404
- "<style scoped>\n",
405
- " .dataframe tbody tr th:only-of-type {\n",
406
- " vertical-align: middle;\n",
407
- " }\n",
408
- "\n",
409
- " .dataframe tbody tr th {\n",
410
- " vertical-align: top;\n",
411
- " }\n",
412
- "\n",
413
- " .dataframe thead th {\n",
414
- " text-align: right;\n",
415
- " }\n",
416
- "</style>\n",
417
- "<table border=\"1\" class=\"dataframe\">\n",
418
- " <thead>\n",
419
- " <tr style=\"text-align: right;\">\n",
420
- " <th></th>\n",
421
- " <th>temperature_2m</th>\n",
422
- " <th>relative_humidity_2m</th>\n",
423
- " <th>precipitation</th>\n",
424
- " <th>rain</th>\n",
425
- " <th>snowfall</th>\n",
426
- " <th>weather_code</th>\n",
427
- " <th>cloud_cover</th>\n",
428
- " <th>wind_speed_10m</th>\n",
429
- " <th>wind_gusts_10m</th>\n",
430
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
431
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
432
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
433
- " <th>type</th>\n",
434
- " </tr>\n",
435
- " </thead>\n",
436
- " <tbody>\n",
437
- " <tr>\n",
438
- " <th>5905751</th>\n",
439
- " <td>0.435268</td>\n",
440
- " <td>0.986667</td>\n",
441
- " <td>0.011364</td>\n",
442
- " <td>0.011364</td>\n",
443
- " <td>0.0</td>\n",
444
- " <td>0.68</td>\n",
445
- " <td>1.0</td>\n",
446
- " <td>0.315152</td>\n",
447
- " <td>0.272633</td>\n",
448
- " <td>0.945277</td>\n",
449
- " <td>0.481878</td>\n",
450
- " <td>0.000000</td>\n",
451
- " <td>1</td>\n",
452
- " </tr>\n",
453
- " <tr>\n",
454
- " <th>19398</th>\n",
455
- " <td>0.435268</td>\n",
456
- " <td>0.986667</td>\n",
457
- " <td>0.011364</td>\n",
458
- " <td>0.011364</td>\n",
459
- " <td>0.0</td>\n",
460
- " <td>0.68</td>\n",
461
- " <td>1.0</td>\n",
462
- " <td>0.315152</td>\n",
463
- " <td>0.272633</td>\n",
464
- " <td>0.934795</td>\n",
465
- " <td>0.446702</td>\n",
466
- " <td>0.000008</td>\n",
467
- " <td>1</td>\n",
468
- " </tr>\n",
469
- " <tr>\n",
470
- " <th>5919627</th>\n",
471
- " <td>0.417411</td>\n",
472
- " <td>0.933333</td>\n",
473
- " <td>0.000000</td>\n",
474
- " <td>0.000000</td>\n",
475
- " <td>0.0</td>\n",
476
- " <td>0.04</td>\n",
477
- " <td>1.0</td>\n",
478
- " <td>0.082828</td>\n",
479
- " <td>0.074922</td>\n",
480
- " <td>0.773045</td>\n",
481
- " <td>0.264375</td>\n",
482
- " <td>0.000018</td>\n",
483
- " <td>1</td>\n",
484
- " </tr>\n",
485
- " <tr>\n",
486
- " <th>4719247</th>\n",
487
- " <td>0.426339</td>\n",
488
- " <td>0.933333</td>\n",
489
- " <td>0.000000</td>\n",
490
- " <td>0.000000</td>\n",
491
- " <td>0.0</td>\n",
492
- " <td>0.04</td>\n",
493
- " <td>1.0</td>\n",
494
- " <td>0.195960</td>\n",
495
- " <td>0.187305</td>\n",
496
- " <td>0.913059</td>\n",
497
- " <td>0.358547</td>\n",
498
- " <td>0.000012</td>\n",
499
- " <td>1</td>\n",
500
- " </tr>\n",
501
- " <tr>\n",
502
- " <th>4743896</th>\n",
503
- " <td>0.417411</td>\n",
504
- " <td>0.933333</td>\n",
505
- " <td>0.000000</td>\n",
506
- " <td>0.000000</td>\n",
507
- " <td>0.0</td>\n",
508
- " <td>0.04</td>\n",
509
- " <td>1.0</td>\n",
510
- " <td>0.082828</td>\n",
511
- " <td>0.074922</td>\n",
512
- " <td>0.493641</td>\n",
513
- " <td>0.133456</td>\n",
514
- " <td>0.005406</td>\n",
515
- " <td>1</td>\n",
516
- " </tr>\n",
517
- " </tbody>\n",
518
- "</table>\n",
519
- "</div>"
520
- ],
521
- "text/plain": [
522
- " temperature_2m relative_humidity_2m precipitation rain \\\n",
523
- "5905751 0.435268 0.986667 0.011364 0.011364 \n",
524
- "19398 0.435268 0.986667 0.011364 0.011364 \n",
525
- "5919627 0.417411 0.933333 0.000000 0.000000 \n",
526
- "4719247 0.426339 0.933333 0.000000 0.000000 \n",
527
- "4743896 0.417411 0.933333 0.000000 0.000000 \n",
528
- "\n",
529
- " snowfall weather_code cloud_cover wind_speed_10m wind_gusts_10m \\\n",
530
- "5905751 0.0 0.68 1.0 0.315152 0.272633 \n",
531
- "19398 0.0 0.68 1.0 0.315152 0.272633 \n",
532
- "5919627 0.0 0.04 1.0 0.082828 0.074922 \n",
533
- "4719247 0.0 0.04 1.0 0.195960 0.187305 \n",
534
- "4743896 0.0 0.04 1.0 0.082828 0.074922 \n",
535
- "\n",
536
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
537
- "5905751 0.945277 \n",
538
- "19398 0.934795 \n",
539
- "5919627 0.773045 \n",
540
- "4719247 0.913059 \n",
541
- "4743896 0.493641 \n",
542
- "\n",
543
- " dk1_onshore_wind_forecastintraday_kwh \\\n",
544
- "5905751 0.481878 \n",
545
- "19398 0.446702 \n",
546
- "5919627 0.264375 \n",
547
- "4719247 0.358547 \n",
548
- "4743896 0.133456 \n",
549
- "\n",
550
- " dk1_solar_forecastintraday_kwh type \n",
551
- "5905751 0.000000 1 \n",
552
- "19398 0.000008 1 \n",
553
- "5919627 0.000018 1 \n",
554
- "4719247 0.000012 1 \n",
555
- "4743896 0.005406 1 "
556
- ]
557
- },
558
- "execution_count": 13,
559
- "metadata": {},
560
- "output_type": "execute_result"
561
- }
562
- ],
563
- "source": [
564
- "# Displaying the first 5 rows of the train dataset (X_train)\n",
565
- "X_train.head()"
566
- ]
567
- },
568
- {
569
- "cell_type": "markdown",
570
- "metadata": {},
571
- "source": [
572
- "## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
573
- ]
574
- },
575
- {
576
- "cell_type": "code",
577
- "execution_count": 21,
578
- "metadata": {},
579
- "outputs": [
580
- {
581
- "data": {
582
- "text/plain": [
583
- "(5012736, 13)"
584
- ]
585
- },
586
- "execution_count": 21,
587
- "metadata": {},
588
- "output_type": "execute_result"
589
- }
590
- ],
591
- "source": [
592
- "X_train.shape"
593
- ]
594
- },
595
- {
596
- "cell_type": "markdown",
597
- "metadata": {},
598
- "source": [
599
- "## <span style=\"color:#2656a3;\">🧬 Modeling Testing</span>"
600
- ]
601
- },
602
- {
603
- "cell_type": "code",
604
- "execution_count": 25,
605
- "metadata": {},
606
- "outputs": [
607
- {
608
- "ename": "AttributeError",
609
- "evalue": "'DataFrame' object has no attribute 'reshape'",
610
- "output_type": "error",
611
- "traceback": [
612
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
613
- "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
614
- "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_15496\\1411499862.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mX_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
615
- "\u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\cudatest\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6200\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6201\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6202\u001b[0m ):\n\u001b[0;32m 6203\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6204\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
616
- "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'reshape'"
617
- ]
618
- }
619
- ],
620
- "source": [
621
- "X_train = X_train.reshape((-1, 1, 13))\n"
622
- ]
623
- },
624
- {
625
- "cell_type": "code",
626
- "execution_count": 23,
627
- "metadata": {},
628
- "outputs": [
629
- {
630
- "ename": "AttributeError",
631
- "evalue": "'DataFrame' object has no attribute 'reshape'",
632
- "output_type": "error",
633
- "traceback": [
634
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
635
- "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
636
- "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_15496\\1311144430.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mX_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mX_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0my_train\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m-\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
637
- "\u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\cudatest\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, name)\u001b[0m\n\u001b[0;32m 6200\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mname\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_accessors\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6201\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_info_axis\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_can_hold_identifiers_and_holds_name\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6202\u001b[0m ):\n\u001b[0;32m 6203\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 6204\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mobject\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__getattribute__\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
638
- "\u001b[1;31mAttributeError\u001b[0m: 'DataFrame' object has no attribute 'reshape'"
639
- ]
640
- }
641
- ],
642
- "source": [
643
- "X_train = X_train.reshape((-1, 13, 1))\n",
644
- "X_test = X_test.reshape((-1, 13, 1))\n",
645
- "\n",
646
- "y_train = y_train.reshape((-1, 1))\n",
647
- "y_test = y_test.reshape((-1, 1))\n",
648
- "\n",
649
- "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
650
- ]
651
- },
652
- {
653
- "cell_type": "code",
654
- "execution_count": 17,
655
- "metadata": {},
656
- "outputs": [
657
- {
658
- "ename": "ValueError",
659
- "evalue": "could not determine the shape of object type 'DataFrame'",
660
- "output_type": "error",
661
- "traceback": [
662
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
663
- "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
664
- "Cell \u001b[1;32mIn[17], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m X_train \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtensor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mfloat\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mfloat()\n\u001b[0;32m 2\u001b[0m y_train \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(y_train)\u001b[38;5;241m.\u001b[39mfloat()\n\u001b[0;32m 3\u001b[0m X_test \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(X_test\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m))\u001b[38;5;241m.\u001b[39mfloat()\n",
665
- "\u001b[1;31mValueError\u001b[0m: could not determine the shape of object type 'DataFrame'"
666
- ]
667
- }
668
- ],
669
- "source": [
670
- "X_train = torch.tensor(X_train.astype(float)).float()\n",
671
- "y_train = torch.tensor(y_train).float()\n",
672
- "X_test = torch.tensor(X_test.astype(float)).float()\n",
673
- "y_test = torch.tensor(y_test).float()\n",
674
- "\n",
675
- "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
676
- ]
677
- },
678
- {
679
- "cell_type": "markdown",
680
- "metadata": {},
681
- "source": [
682
- "## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
683
- ]
684
- },
685
- {
686
- "cell_type": "code",
687
- "execution_count": null,
688
- "metadata": {},
689
- "outputs": [],
690
- "source": [
691
- "# import pandas as pd\n",
692
- "# import numpy as np\n",
693
- "# import xgboost as xgb\n",
694
- "# from sklearn.metrics import mean_squared_error\n",
695
- "# import os"
696
- ]
697
- },
698
- {
699
- "cell_type": "code",
700
- "execution_count": null,
701
- "metadata": {},
702
- "outputs": [],
703
- "source": [
704
- "# # Initialize the XGBoost regressor\n",
705
- "# model = xgb.XGBRegressor()\n",
706
- "# model_val = xgb.XGBRegressor()"
707
- ]
708
- },
709
- {
710
- "cell_type": "code",
711
- "execution_count": null,
712
- "metadata": {},
713
- "outputs": [],
714
- "source": [
715
- "# # Train the model on the training data\n",
716
- "# model.fit(X_train, y_train)"
717
- ]
718
- },
719
- {
720
- "cell_type": "code",
721
- "execution_count": null,
722
- "metadata": {},
723
- "outputs": [],
724
- "source": [
725
- "# # Make predictions on the validation set\n",
726
- "# y_test_pred = model.predict(X_test)"
727
- ]
728
- },
729
- {
730
- "cell_type": "code",
731
- "execution_count": null,
732
- "metadata": {},
733
- "outputs": [],
734
- "source": [
735
- "# # Calculate RMSE on the validation set\n",
736
- "# mse = mean_squared_error(y_test, y_test_pred, squared=False)\n",
737
- "# print(f\"Mean Squared Error (MSE): {mse}\")"
738
- ]
739
- },
740
- {
741
- "cell_type": "markdown",
742
- "metadata": {},
743
- "source": [
744
- "## <span style='color:#2656a3'>🗄 Model Registry</span>"
745
- ]
746
- },
747
- {
748
- "cell_type": "code",
749
- "execution_count": 39,
750
- "metadata": {},
751
- "outputs": [
752
- {
753
- "name": "stdout",
754
- "output_type": "stream",
755
- "text": [
756
- "Exporting trained model to: electricity_price_model\n",
757
- "INFO:tensorflow:Assets written to: electricity_price_model\\assets\n"
758
- ]
759
- }
760
- ],
761
- "source": [
762
- "# Exporting the trained model to a directory\n",
763
- "model_dir = \"electricity_price_model\"\n",
764
- "print('Exporting trained model to: {}'.format(model_dir))\n",
765
- "\n",
766
- "# Saving the model using TensorFlow's saved_model.save function\n",
767
- "tf.saved_model.save(model, model_dir)"
768
- ]
769
- },
770
- {
771
- "cell_type": "code",
772
- "execution_count": 44,
773
- "metadata": {},
774
- "outputs": [
775
- {
776
- "name": "stdout",
777
- "output_type": "stream",
778
- "text": [
779
- "Connected. Call `.close()` to terminate connection gracefully.\n"
780
- ]
781
- },
782
- {
783
- "name": "stderr",
784
- "output_type": "stream",
785
- "text": [
786
- "Uploading: 100.000%|██████████| 59/59 elapsed<00:01 remaining<00:001<00:01, 3.38it/s]\n",
787
- "Uploading: 100.000%|██████████| 397272/397272 elapsed<00:02 remaining<00:00 3.38it/s]\n",
788
- "Uploading: 0.000%| | 0/112411 elapsed<00:01 remaining<?0:04<00:01, 3.38it/s]\n",
789
- "Uploading model files (2 dirs, 2 files): 17%|█▋ | 1/6 [00:07<00:35, 7.08s/it]\n"
790
- ]
791
- },
792
- {
793
- "ename": "RestAPIError",
794
- "evalue": "Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''",
795
- "output_type": "error",
796
- "traceback": [
797
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
798
- "\u001b[1;31mRestAPIError\u001b[0m Traceback (most recent call last)",
799
- "Cell \u001b[1;32mIn[44], line 16\u001b[0m\n\u001b[0;32m 8\u001b[0m tf_model \u001b[38;5;241m=\u001b[39m mr\u001b[38;5;241m.\u001b[39mtensorflow\u001b[38;5;241m.\u001b[39mcreate_model(\n\u001b[0;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDK_electricity_price_prediction_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m metrics\u001b[38;5;241m=\u001b[39mmetrics,\n\u001b[0;32m 11\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHourly electricity price prediction model.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 12\u001b[0m input_example\u001b[38;5;241m=\u001b[39mn_step_window\u001b[38;5;241m.\u001b[39mexample[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mnumpy(),\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# Saving the model to the specified directory\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m \u001b[43mtf_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n",
800
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\model.py:101\u001b[0m, in \u001b[0;36mModel.save\u001b[1;34m(self, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, model_path, await_registration\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m480\u001b[39m, keep_original_files\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m 91\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Persist this model including model files and metadata to the model registry.\u001b[39;00m\n\u001b[0;32m 92\u001b[0m \n\u001b[0;32m 93\u001b[0m \u001b[38;5;124;03m # Arguments\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[38;5;124;03m `Model`: The model metadata object.\u001b[39;00m\n\u001b[0;32m 100\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 102\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 103\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 104\u001b[0m \u001b[43m \u001b[49m\u001b[43mawait_registration\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mawait_registration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 105\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
801
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:421\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m be:\n\u001b[0;32m 420\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mrm(model_instance\u001b[38;5;241m.\u001b[39mversion_path)\n\u001b[1;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m be\n\u001b[0;32m 423\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel created, explore it at \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m model_instance\u001b[38;5;241m.\u001b[39mget_url())\n\u001b[0;32m 425\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_instance\n",
802
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:385\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[0;32m 382\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\n\u001b[0;32m 383\u001b[0m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), model_path)\n\u001b[0;32m 384\u001b[0m ): \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[1;32m--> 385\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_model_from_local_or_hopsfs_mount\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetcwd\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 391\u001b[0m \u001b[38;5;66;03m# check project relative\u001b[39;00m\n\u001b[0;32m 392\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mpath_exists(\n\u001b[0;32m 393\u001b[0m model_path\n\u001b[0;32m 394\u001b[0m ): \u001b[38;5;66;03m# check hdfs relative and absolute\u001b[39;00m\n",
803
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:249\u001b[0m, in \u001b[0;36mModelEngine._save_model_from_local_or_hopsfs_mount\u001b[1;34m(self, model_instance, model_path, keep_original_files, update_upload_progress)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_or_move_hopsfs_model(\n\u001b[0;32m 241\u001b[0m from_hdfs_model_path\u001b[38;5;241m=\u001b[39mmodel_path\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[0;32m 242\u001b[0m constants\u001b[38;5;241m.\u001b[39mMODEL_REGISTRY\u001b[38;5;241m.\u001b[39mHOPSFS_MOUNT_PREFIX, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 246\u001b[0m update_upload_progress\u001b[38;5;241m=\u001b[39mupdate_upload_progress,\n\u001b[0;32m 247\u001b[0m )\n\u001b[0;32m 248\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 249\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_local_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_local_model_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_model_version_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
804
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:225\u001b[0m, in \u001b[0;36mModelEngine._upload_local_model\u001b[1;34m(self, from_local_model_path, to_model_version_path, update_upload_progress)\u001b[0m\n\u001b[0;32m 223\u001b[0m update_upload_progress(n_dirs, n_files)\n\u001b[0;32m 224\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m f_name \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[1;32m--> 225\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mf_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_base_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 226\u001b[0m n_files \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 227\u001b[0m update_upload_progress(n_dirs, n_files)\n",
805
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\local_engine.py:38\u001b[0m, in \u001b[0;36mLocalEngine.upload\u001b[1;34m(self, local_path, remote_path)\u001b[0m\n\u001b[0;32m 36\u001b[0m local_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_abs_path(local_path)\n\u001b[0;32m 37\u001b[0m remote_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepend_project_path(remote_path)\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_path\u001b[49m\u001b[43m)\u001b[49m\n",
806
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:152\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 151\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m--> 152\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[0;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 155\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n",
807
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
808
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m [\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m futures]\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
809
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
810
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
811
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n",
812
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:189\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 185\u001b[0m re\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m DatasetApi\u001b[38;5;241m.\u001b[39mFLOW_PERMANENT_ERRORS\n\u001b[0;32m 186\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mretries \u001b[38;5;241m>\u001b[39m max_chunk_retries\n\u001b[0;32m 187\u001b[0m ):\n\u001b[0;32m 188\u001b[0m chunk\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfailed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 189\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m re\n\u001b[0;32m 190\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(chunk_retry_interval)\n\u001b[0;32m 191\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
813
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:178\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m\n\u001b[0;32m 180\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RestAPIError \u001b[38;5;28;01mas\u001b[39;00m re:\n",
814
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:214\u001b[0m, in \u001b[0;36mDatasetApi._upload_request\u001b[1;34m(self, params, path, file_name, chunk)\u001b[0m\n\u001b[0;32m 211\u001b[0m path_params \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproject\u001b[39m\u001b[38;5;124m\"\u001b[39m, _client\u001b[38;5;241m.\u001b[39m_project_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupload\u001b[39m\u001b[38;5;124m\"\u001b[39m, path]\n\u001b[0;32m 213\u001b[0m \u001b[38;5;66;03m# Flow configuration params are sent as form data\u001b[39;00m\n\u001b[1;32m--> 214\u001b[0m \u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
815
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\decorators.py:35\u001b[0m, in \u001b[0;36mconnected.<locals>.if_connected\u001b[1;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m inst\u001b[38;5;241m.\u001b[39m_connected:\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NoHopsworksConnectionError\n\u001b[1;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
816
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\client\\base.py:108\u001b[0m, in \u001b[0;36mClient._send_request\u001b[1;34m(self, method, path_params, query_params, headers, data, stream, files)\u001b[0m\n\u001b[0;32m 105\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39msend(prepped, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verify, stream\u001b[38;5;241m=\u001b[39mstream)\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mRestAPIError(url, response)\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[0;32m 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
817
- "\u001b[1;31mRestAPIError\u001b[0m: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''"
818
- ]
819
- }
820
- ],
821
- "source": [
822
- "# Retrieving the Model Registry\n",
823
- "mr = project.get_model_registry()\n",
824
- "\n",
825
- "# Extracting loss value from the training history\n",
826
- "metrics = {'loss': history_dict['val_loss'][0]} \n",
827
- "\n",
828
- "# Creating a TensorFlow model in the Model Registry\n",
829
- "tf_model = mr.tensorflow.create_model(\n",
830
- " name=\"DK_electricity_price_prediction_model\",\n",
831
- " metrics=metrics,\n",
832
- " description=\"Hourly electricity price prediction model.\",\n",
833
- " input_example=n_step_window.example[0].numpy(),\n",
834
- ")\n",
835
- "\n",
836
- "# Saving the model to the specified directory\n",
837
- "tf_model.save(model_dir)"
838
- ]
839
- },
840
- {
841
- "cell_type": "markdown",
842
- "metadata": {},
843
- "source": [
844
- "---\n",
845
- "\n",
846
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
847
- "\n",
848
- "In the next notebook you will use your registered model to predict batch data."
849
- ]
850
- }
851
- ],
852
- "metadata": {
853
- "kernelspec": {
854
- "display_name": "bds-mlops",
855
- "language": "python",
856
- "name": "python3"
857
- },
858
- "language_info": {
859
- "codemirror_mode": {
860
- "name": "ipython",
861
- "version": 3
862
- },
863
- "file_extension": ".py",
864
- "mimetype": "text/x-python",
865
- "name": "python",
866
- "nbconvert_exporter": "python",
867
- "pygments_lexer": "ipython3",
868
- "version": "3.7.16"
869
- },
870
- "orig_nbformat": 4
871
- },
872
- "nbformat": 4,
873
- "nbformat_minor": 2
874
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/notebooks_dev/3_training_pipeline_dev_tensorflow.ipynb DELETED
@@ -1,818 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 03: Training Pipeline</span>"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "## 🗒️ This notebook is divided into the following sections:\n",
15
- "1. Feature selection.\n",
16
- "2. Feature transformations.\n",
17
- "3. Training datasets creation.\n",
18
- "4. Loading the training data.\n",
19
- "5. Train the model.\n",
20
- "6. Register model to Hopsworks model registry."
21
- ]
22
- },
23
- {
24
- "cell_type": "markdown",
25
- "metadata": {},
26
- "source": [
27
- "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages"
28
- ]
29
- },
30
- {
31
- "cell_type": "code",
32
- "execution_count": 1,
33
- "metadata": {},
34
- "outputs": [],
35
- "source": [
36
- "!pip install tensorflow --quiet"
37
- ]
38
- },
39
- {
40
- "cell_type": "code",
41
- "execution_count": 11,
42
- "metadata": {},
43
- "outputs": [],
44
- "source": [
45
- "# Importing the packages for the needed libraries for the Jupyter notebook\n",
46
- "import inspect \n",
47
- "import datetime\n",
48
- "\n",
49
- "import pandas as pd\n",
50
- "import numpy as np\n",
51
- "import matplotlib.pyplot as plt\n",
52
- "import tensorflow as tf\n",
53
- "\n",
54
- "#ignore warnings\n",
55
- "import warnings\n",
56
- "warnings.filterwarnings('ignore')"
57
- ]
58
- },
59
- {
60
- "cell_type": "markdown",
61
- "metadata": {},
62
- "source": [
63
- "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
64
- ]
65
- },
66
- {
67
- "cell_type": "code",
68
- "execution_count": 1,
69
- "metadata": {},
70
- "outputs": [
71
- {
72
- "name": "stderr",
73
- "output_type": "stream",
74
- "text": [
75
- "c:\\Users\\Benj3\\anaconda3\\envs\\tensor\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
76
- " from .autonotebook import tqdm as notebook_tqdm\n"
77
- ]
78
- },
79
- {
80
- "name": "stdout",
81
- "output_type": "stream",
82
- "text": [
83
- "Connected. Call `.close()` to terminate connection gracefully.\n",
84
- "\n",
85
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
86
- "Connected. Call `.close()` to terminate connection gracefully.\n"
87
- ]
88
- }
89
- ],
90
- "source": [
91
- "# Importing the hopsworks module\n",
92
- "import hopsworks\n",
93
- "\n",
94
- "# Logging in to the Hopsworks project\n",
95
- "project = hopsworks.login()\n",
96
- "\n",
97
- "# Getting the feature store from the project\n",
98
- "fs = project.get_feature_store() "
99
- ]
100
- },
101
- {
102
- "cell_type": "code",
103
- "execution_count": 3,
104
- "metadata": {},
105
- "outputs": [],
106
- "source": [
107
- "# Retrieve the feature groups\n",
108
- "electricity_fg = fs.get_feature_group(\n",
109
- " name='electricity_prices',\n",
110
- " version=1,\n",
111
- ")\n",
112
- "\n",
113
- "weather_fg = fs.get_feature_group(\n",
114
- " name='weather_measurements',\n",
115
- " version=1,\n",
116
- ")\n",
117
- "\n",
118
- "danish_holidays_fg = fs.get_feature_group(\n",
119
- " name='danish_holidayss',\n",
120
- " version=1,\n",
121
- ")\n",
122
- "forecast_renewable_energy_fg = fs.get_feature_group(\n",
123
- " name='forecast_renewable_energy',\n",
124
- " version=1\n",
125
- ")"
126
- ]
127
- },
128
- {
129
- "cell_type": "markdown",
130
- "metadata": {},
131
- "source": [
132
- "## <span style=\"color:#2656a3;\"> 🖍 Feature View Creation and Retrieving </span>\n",
133
- "\n",
134
- "We first select the features that we want to include for model training.\n",
135
- "\n",
136
- "Since we specified `primary_key`as `date` and `event_time` as `timestamp` in part 01 we can now join them together for the `electricity_fg`, `weather_fg` and `forecast_renewable_energy_fg`."
137
- ]
138
- },
139
- {
140
- "cell_type": "markdown",
141
- "metadata": {},
142
- "source": [
143
- "hmmm skal 'time' egentlig være 'date'???"
144
- ]
145
- },
146
- {
147
- "cell_type": "code",
148
- "execution_count": 4,
149
- "metadata": {},
150
- "outputs": [],
151
- "source": [
152
- "# Select features for training data\n",
153
- "selected_features = electricity_fg.select_all()\\\n",
154
- " .join(weather_fg.select_except([\"timestamp\", \"time\"]))\\\n",
155
- " .join(forecast_renewable_energy_fg.select_except([\"timestamp\", \"time\"]))\\\n",
156
- " .join(danish_holidays_fg.select_all())"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": 7,
162
- "metadata": {},
163
- "outputs": [],
164
- "source": [
165
- "# Uncomment this if you would like to view your selected features\n",
166
- "# selected_features.show(5)"
167
- ]
168
- },
169
- {
170
- "cell_type": "markdown",
171
- "metadata": {},
172
- "source": [
173
- "### <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
174
- "\n",
175
- "We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
176
- "To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
177
- "\n",
178
- "To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
179
- ]
180
- },
181
- {
182
- "cell_type": "code",
183
- "execution_count": 5,
184
- "metadata": {},
185
- "outputs": [],
186
- "source": [
187
- "# Defining transformation functions for feature scaling and encoding\n",
188
- "transformation_functions = {\n",
189
- " \"dk1_spotpricedkk_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
190
- " \"dk1_offshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
191
- " \"dk1_onshore_wind_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
192
- " \"dk1_solar_forecastintraday_kwh\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
193
- " \"temperature_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
194
- " \"relative_humidity_2m\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
195
- " \"precipitation\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
196
- " \"rain\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
197
- " \"snowfall\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
198
- " \"weather_code\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
199
- " \"cloud_cover\": fs.get_transformation_function(name=\"min_max_scaler\"), \n",
200
- " \"wind_speed_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
201
- " \"wind_gusts_10m\": fs.get_transformation_function(name=\"min_max_scaler\"),\n",
202
- " \"type\": fs.get_transformation_function(name=\"label_encoder\"),\n",
203
- " }"
204
- ]
205
- },
206
- {
207
- "cell_type": "markdown",
208
- "metadata": {},
209
- "source": [
210
- "`Feature Views` stands between **Feature Groups** and **Training Dataset**. Сombining **Feature Groups** we can create **Feature Views** which store a metadata of our data. Having **Feature Views** we can create **Training Dataset**.\n",
211
- "\n",
212
- "The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n",
213
- "\n",
214
- "In order to create Feature View we can use `FeatureStore.get_or_create_feature_view()` method.\n",
215
- "\n",
216
- "We can specify next parameters:\n",
217
- "\n",
218
- "- `name` - name of a feature group.\n",
219
- "\n",
220
- "- `version` - version of a feature group.\n",
221
- "\n",
222
- "- `labels`- our target variable.\n",
223
- "\n",
224
- "- `transformation_functions` - functions to transform our features.\n",
225
- "\n",
226
- "- `query` - query object with data."
227
- ]
228
- },
229
- {
230
- "cell_type": "markdown",
231
- "metadata": {},
232
- "source": [
233
- "ved ikke om den her omformulering af botten går an?"
234
- ]
235
- },
236
- {
237
- "cell_type": "markdown",
238
- "metadata": {},
239
- "source": [
240
- "`Feature Views` serve as an intermediary between **Feature Groups** and the **Training Dataset**. By combining various **Feature Groups**, we can construct **Feature Views**, which retain metadata about our data. Utilizing **Feature Views**, we can subsequently generate a **Training Dataset**.\n",
241
- "\n",
242
- "Feature Views facilitate the definition of schema through queries with filters, identification of the model's target feature or label, and application of additional transformation functions.\n",
243
- "\n",
244
- "To create a Feature View, we employ the `FeatureStore.get_or_create_feature_view()` method, where we specify the following parameters:\n",
245
- "\n",
246
- "- `name`: The name of the feature group.\n",
247
- "\n",
248
- "- `version`: The version of the feature group.\n",
249
- "\n",
250
- "- `labels`: Our target variable.\n",
251
- "\n",
252
- "- `transformation_functions`: Functions to transform our features.\n",
253
- "\n",
254
- "- `query`: A query object containing the relevant data."
255
- ]
256
- },
257
- {
258
- "cell_type": "code",
259
- "execution_count": 6,
260
- "metadata": {},
261
- "outputs": [],
262
- "source": [
263
- "# Getting or creating a feature view named 'electricity_feature_view'\n",
264
- "version = 1 # Defining the version for the feature view\n",
265
- "feature_view = fs.get_or_create_feature_view(\n",
266
- " name='electricity_feature_view',\n",
267
- " version=version,\n",
268
- " labels=[], # Labels will be defined manually later for our 'y'\n",
269
- " transformation_functions=transformation_functions,\n",
270
- " query=selected_features,\n",
271
- ")"
272
- ]
273
- },
274
- {
275
- "cell_type": "markdown",
276
- "metadata": {},
277
- "source": [
278
- "## <span style=\"color:#2656a3;\"> 🏋️ Training Dataset Creation</span>\n",
279
- "\n",
280
- "In Hopsworks training data is a query where the projection (set of features) is determined by the parent FeatureView with an optional snapshot on disk of the data returned by the query.\n",
281
- "\n",
282
- "**Training Dataset may contain splits such as:** \n",
283
- "* Training set - the subset of training data used to train a model.\n",
284
- "* Validation set - the subset of training data used to evaluate hparams when training a model\n",
285
- "* Test set - the holdout subset of training data used to evaluate a mode\n",
286
- "\n",
287
- "Training dataset is created using `fs.create_training_dataset()` method.\n",
288
- "\n",
289
- "**From feature view APIs you can also create training datasts based on even time filters specifing `start_time` and `end_time`** "
290
- ]
291
- },
292
- {
293
- "cell_type": "markdown",
294
- "metadata": {},
295
- "source": [
296
- "### <span style=\"color:#2656a3;\"> ⛳️ Dataset with train, test and validation splits</span>"
297
- ]
298
- },
299
- {
300
- "cell_type": "code",
301
- "execution_count": 7,
302
- "metadata": {},
303
- "outputs": [
304
- {
305
- "name": "stdout",
306
- "output_type": "stream",
307
- "text": [
308
- "Finished: Reading data from Hopsworks, using ArrowFlight (198.29s) \n"
309
- ]
310
- },
311
- {
312
- "name": "stderr",
313
- "output_type": "stream",
314
- "text": [
315
- "VersionWarning: Incremented version to `15`.\n"
316
- ]
317
- }
318
- ],
319
- "source": [
320
- "# Splitting the feature view data into train, validation, and test sets\n",
321
- "# We didn't specify 'labels' in feature view creation, it will therefore return 'None' for Y\n",
322
- "X_train, X_val, X_test, _, _, _ = feature_view.train_validation_test_split(\n",
323
- " train_start=\"2022-01-01\",\n",
324
- " train_end=\"2023-06-30\",\n",
325
- " validation_start=\"2023-07-01\",\n",
326
- " validation_end=\"2023-09-30\",\n",
327
- " test_start=\"2023-10-01\",\n",
328
- " test_end=\"2023-12-31\",\n",
329
- " description='Electricity price prediction dataset',\n",
330
- ")"
331
- ]
332
- },
333
- {
334
- "cell_type": "code",
335
- "execution_count": 13,
336
- "metadata": {},
337
- "outputs": [],
338
- "source": [
339
- "# Sorting the training, validation, and test datasets based on the 'timestamp' column\n",
340
- "X_train.sort_values([\"timestamp\"], inplace=True)\n",
341
- "X_val.sort_values([\"timestamp\"], inplace=True)\n",
342
- "X_test.sort_values([\"timestamp\"], inplace=True)"
343
- ]
344
- },
345
- {
346
- "cell_type": "code",
347
- "execution_count": 14,
348
- "metadata": {},
349
- "outputs": [],
350
- "source": [
351
- "# Extracting the target variable 'dk1_spotpricedkk_kwh' and defineing 'y_train', 'y_val' and 'y_test' \n",
352
- "y_train = X_train[[\"dk1_spotpricedkk_kwh\"]]\n",
353
- "y_val = X_val[[\"dk1_spotpricedkk_kwh\"]]\n",
354
- "y_test = X_test[[\"dk1_spotpricedkk_kwh\"]]"
355
- ]
356
- },
357
- {
358
- "cell_type": "code",
359
- "execution_count": 15,
360
- "metadata": {},
361
- "outputs": [],
362
- "source": [
363
- "# Dropping the 'date', 'time' and 'timestamp' columns from the training, validation, and test datasets\n",
364
- "X_train.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
365
- "X_val.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)\n",
366
- "X_test.drop([\"date\", \"time\", \"timestamp\"], axis=1, inplace=True)"
367
- ]
368
- },
369
- {
370
- "cell_type": "code",
371
- "execution_count": 16,
372
- "metadata": {},
373
- "outputs": [],
374
- "source": [
375
- "# Dropping the 'dare', 'time' and 'timestamp' and dependent variable (y) columns from the training, validation, and test datasets\n",
376
- "X_train.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
377
- "X_val.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)\n",
378
- "X_test.drop([\"dk1_spotpricedkk_kwh\"], axis=1, inplace=True)"
379
- ]
380
- },
381
- {
382
- "cell_type": "code",
383
- "execution_count": 17,
384
- "metadata": {},
385
- "outputs": [
386
- {
387
- "data": {
388
- "text/html": [
389
- "<div>\n",
390
- "<style scoped>\n",
391
- " .dataframe tbody tr th:only-of-type {\n",
392
- " vertical-align: middle;\n",
393
- " }\n",
394
- "\n",
395
- " .dataframe tbody tr th {\n",
396
- " vertical-align: top;\n",
397
- " }\n",
398
- "\n",
399
- " .dataframe thead th {\n",
400
- " text-align: right;\n",
401
- " }\n",
402
- "</style>\n",
403
- "<table border=\"1\" class=\"dataframe\">\n",
404
- " <thead>\n",
405
- " <tr style=\"text-align: right;\">\n",
406
- " <th></th>\n",
407
- " <th>temperature_2m</th>\n",
408
- " <th>relative_humidity_2m</th>\n",
409
- " <th>precipitation</th>\n",
410
- " <th>rain</th>\n",
411
- " <th>snowfall</th>\n",
412
- " <th>weather_code</th>\n",
413
- " <th>cloud_cover</th>\n",
414
- " <th>wind_speed_10m</th>\n",
415
- " <th>wind_gusts_10m</th>\n",
416
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
417
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
418
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
419
- " <th>type</th>\n",
420
- " </tr>\n",
421
- " </thead>\n",
422
- " <tbody>\n",
423
- " <tr>\n",
424
- " <th>5905751</th>\n",
425
- " <td>0.435268</td>\n",
426
- " <td>0.986667</td>\n",
427
- " <td>0.011364</td>\n",
428
- " <td>0.011364</td>\n",
429
- " <td>0.0</td>\n",
430
- " <td>0.68</td>\n",
431
- " <td>1.0</td>\n",
432
- " <td>0.315152</td>\n",
433
- " <td>0.272633</td>\n",
434
- " <td>0.945277</td>\n",
435
- " <td>0.481878</td>\n",
436
- " <td>0.000000</td>\n",
437
- " <td>1</td>\n",
438
- " </tr>\n",
439
- " <tr>\n",
440
- " <th>19398</th>\n",
441
- " <td>0.435268</td>\n",
442
- " <td>0.986667</td>\n",
443
- " <td>0.011364</td>\n",
444
- " <td>0.011364</td>\n",
445
- " <td>0.0</td>\n",
446
- " <td>0.68</td>\n",
447
- " <td>1.0</td>\n",
448
- " <td>0.315152</td>\n",
449
- " <td>0.272633</td>\n",
450
- " <td>0.934795</td>\n",
451
- " <td>0.446702</td>\n",
452
- " <td>0.000008</td>\n",
453
- " <td>1</td>\n",
454
- " </tr>\n",
455
- " <tr>\n",
456
- " <th>5919627</th>\n",
457
- " <td>0.417411</td>\n",
458
- " <td>0.933333</td>\n",
459
- " <td>0.000000</td>\n",
460
- " <td>0.000000</td>\n",
461
- " <td>0.0</td>\n",
462
- " <td>0.04</td>\n",
463
- " <td>1.0</td>\n",
464
- " <td>0.082828</td>\n",
465
- " <td>0.074922</td>\n",
466
- " <td>0.773045</td>\n",
467
- " <td>0.264375</td>\n",
468
- " <td>0.000018</td>\n",
469
- " <td>1</td>\n",
470
- " </tr>\n",
471
- " <tr>\n",
472
- " <th>4719247</th>\n",
473
- " <td>0.426339</td>\n",
474
- " <td>0.933333</td>\n",
475
- " <td>0.000000</td>\n",
476
- " <td>0.000000</td>\n",
477
- " <td>0.0</td>\n",
478
- " <td>0.04</td>\n",
479
- " <td>1.0</td>\n",
480
- " <td>0.195960</td>\n",
481
- " <td>0.187305</td>\n",
482
- " <td>0.913059</td>\n",
483
- " <td>0.358547</td>\n",
484
- " <td>0.000012</td>\n",
485
- " <td>1</td>\n",
486
- " </tr>\n",
487
- " <tr>\n",
488
- " <th>4743896</th>\n",
489
- " <td>0.417411</td>\n",
490
- " <td>0.933333</td>\n",
491
- " <td>0.000000</td>\n",
492
- " <td>0.000000</td>\n",
493
- " <td>0.0</td>\n",
494
- " <td>0.04</td>\n",
495
- " <td>1.0</td>\n",
496
- " <td>0.082828</td>\n",
497
- " <td>0.074922</td>\n",
498
- " <td>0.493641</td>\n",
499
- " <td>0.133456</td>\n",
500
- " <td>0.005406</td>\n",
501
- " <td>1</td>\n",
502
- " </tr>\n",
503
- " </tbody>\n",
504
- "</table>\n",
505
- "</div>"
506
- ],
507
- "text/plain": [
508
- " temperature_2m relative_humidity_2m precipitation rain \\\n",
509
- "5905751 0.435268 0.986667 0.011364 0.011364 \n",
510
- "19398 0.435268 0.986667 0.011364 0.011364 \n",
511
- "5919627 0.417411 0.933333 0.000000 0.000000 \n",
512
- "4719247 0.426339 0.933333 0.000000 0.000000 \n",
513
- "4743896 0.417411 0.933333 0.000000 0.000000 \n",
514
- "\n",
515
- " snowfall weather_code cloud_cover wind_speed_10m wind_gusts_10m \\\n",
516
- "5905751 0.0 0.68 1.0 0.315152 0.272633 \n",
517
- "19398 0.0 0.68 1.0 0.315152 0.272633 \n",
518
- "5919627 0.0 0.04 1.0 0.082828 0.074922 \n",
519
- "4719247 0.0 0.04 1.0 0.195960 0.187305 \n",
520
- "4743896 0.0 0.04 1.0 0.082828 0.074922 \n",
521
- "\n",
522
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
523
- "5905751 0.945277 \n",
524
- "19398 0.934795 \n",
525
- "5919627 0.773045 \n",
526
- "4719247 0.913059 \n",
527
- "4743896 0.493641 \n",
528
- "\n",
529
- " dk1_onshore_wind_forecastintraday_kwh \\\n",
530
- "5905751 0.481878 \n",
531
- "19398 0.446702 \n",
532
- "5919627 0.264375 \n",
533
- "4719247 0.358547 \n",
534
- "4743896 0.133456 \n",
535
- "\n",
536
- " dk1_solar_forecastintraday_kwh type \n",
537
- "5905751 0.000000 1 \n",
538
- "19398 0.000008 1 \n",
539
- "5919627 0.000018 1 \n",
540
- "4719247 0.000012 1 \n",
541
- "4743896 0.005406 1 "
542
- ]
543
- },
544
- "execution_count": 17,
545
- "metadata": {},
546
- "output_type": "execute_result"
547
- }
548
- ],
549
- "source": [
550
- "# Displaying the first 5 rows of the train dataset (X_train)\n",
551
- "X_train.head()"
552
- ]
553
- },
554
- {
555
- "cell_type": "code",
556
- "execution_count": 18,
557
- "metadata": {},
558
- "outputs": [
559
- {
560
- "data": {
561
- "text/html": [
562
- "<div>\n",
563
- "<style scoped>\n",
564
- " .dataframe tbody tr th:only-of-type {\n",
565
- " vertical-align: middle;\n",
566
- " }\n",
567
- "\n",
568
- " .dataframe tbody tr th {\n",
569
- " vertical-align: top;\n",
570
- " }\n",
571
- "\n",
572
- " .dataframe thead th {\n",
573
- " text-align: right;\n",
574
- " }\n",
575
- "</style>\n",
576
- "<table border=\"1\" class=\"dataframe\">\n",
577
- " <thead>\n",
578
- " <tr style=\"text-align: right;\">\n",
579
- " <th></th>\n",
580
- " <th>dk1_spotpricedkk_kwh</th>\n",
581
- " </tr>\n",
582
- " </thead>\n",
583
- " <tbody>\n",
584
- " <tr>\n",
585
- " <th>5905751</th>\n",
586
- " <td>0.179988</td>\n",
587
- " </tr>\n",
588
- " <tr>\n",
589
- " <th>19398</th>\n",
590
- " <td>0.179988</td>\n",
591
- " </tr>\n",
592
- " <tr>\n",
593
- " <th>5919627</th>\n",
594
- " <td>0.179988</td>\n",
595
- " </tr>\n",
596
- " <tr>\n",
597
- " <th>4719247</th>\n",
598
- " <td>0.179988</td>\n",
599
- " </tr>\n",
600
- " <tr>\n",
601
- " <th>4743896</th>\n",
602
- " <td>0.179988</td>\n",
603
- " </tr>\n",
604
- " </tbody>\n",
605
- "</table>\n",
606
- "</div>"
607
- ],
608
- "text/plain": [
609
- " dk1_spotpricedkk_kwh\n",
610
- "5905751 0.179988\n",
611
- "19398 0.179988\n",
612
- "5919627 0.179988\n",
613
- "4719247 0.179988\n",
614
- "4743896 0.179988"
615
- ]
616
- },
617
- "execution_count": 18,
618
- "metadata": {},
619
- "output_type": "execute_result"
620
- }
621
- ],
622
- "source": [
623
- "\n",
624
- "# Displaying the first 5 rows of the train dataset (y_train)\n",
625
- "y_train.head()"
626
- ]
627
- },
628
- {
629
- "cell_type": "markdown",
630
- "metadata": {},
631
- "source": [
632
- "## <span style=\"color:#2656a3;\">🗃 Window timeseries dataset </span>"
633
- ]
634
- },
635
- {
636
- "cell_type": "markdown",
637
- "metadata": {},
638
- "source": [
639
- "## <span style=\"color:#2656a3;\">🧬 Modeling</span>"
640
- ]
641
- },
642
- {
643
- "cell_type": "code",
644
- "execution_count": 43,
645
- "metadata": {},
646
- "outputs": [],
647
- "source": [
648
- "from keras.preprocessing.sequence import TimeseriesGenerator\n"
649
- ]
650
- },
651
- {
652
- "cell_type": "code",
653
- "execution_count": 55,
654
- "metadata": {},
655
- "outputs": [],
656
- "source": [
657
- "# define generator\n",
658
- "n_input = 12\n",
659
- "n_features = 13\n",
660
- "generator = TimeseriesGenerator(X_train, X_train, length=n_input, batch_size=32)"
661
- ]
662
- },
663
- {
664
- "cell_type": "code",
665
- "execution_count": null,
666
- "metadata": {},
667
- "outputs": [
668
- {
669
- "ename": "",
670
- "evalue": "",
671
- "output_type": "error",
672
- "traceback": [
673
- "\u001b[1;31mThe kernel failed to start as 'TypeAliasType' could not be imported from 'c:\\Users\\Benj3\\anaconda3\\envs\\tensor\\Lib\\site-packages\\typing_extensions.py'.\n",
674
- "\u001b[1;31mClick <a href='https://aka.ms/kernelFailuresModuleImportErrFromFile'>here</a> for more info."
675
- ]
676
- }
677
- ],
678
- "source": [
679
- "X, y = generator[1]\n",
680
- "print(f'Given the Array: \\n{X.flatten()}')\n",
681
- "print(f'Predict this y: \\n {y}')"
682
- ]
683
- },
684
- {
685
- "cell_type": "markdown",
686
- "metadata": {},
687
- "source": [
688
- "## <span style='color:#2656a3'>🗄 Model Registry</span>"
689
- ]
690
- },
691
- {
692
- "cell_type": "code",
693
- "execution_count": 39,
694
- "metadata": {},
695
- "outputs": [
696
- {
697
- "name": "stdout",
698
- "output_type": "stream",
699
- "text": [
700
- "Exporting trained model to: electricity_price_model\n",
701
- "INFO:tensorflow:Assets written to: electricity_price_model\\assets\n"
702
- ]
703
- }
704
- ],
705
- "source": [
706
- "# Exporting the trained model to a directory\n",
707
- "model_dir = \"electricity_price_model\"\n",
708
- "print('Exporting trained model to: {}'.format(model_dir))\n",
709
- "\n",
710
- "# Saving the model using TensorFlow's saved_model.save function\n",
711
- "tf.saved_model.save(model, model_dir)"
712
- ]
713
- },
714
- {
715
- "cell_type": "code",
716
- "execution_count": 44,
717
- "metadata": {},
718
- "outputs": [
719
- {
720
- "name": "stdout",
721
- "output_type": "stream",
722
- "text": [
723
- "Connected. Call `.close()` to terminate connection gracefully.\n"
724
- ]
725
- },
726
- {
727
- "name": "stderr",
728
- "output_type": "stream",
729
- "text": [
730
- "Uploading: 100.000%|██████████| 59/59 elapsed<00:01 remaining<00:001<00:01, 3.38it/s]\n",
731
- "Uploading: 100.000%|██████████| 397272/397272 elapsed<00:02 remaining<00:00 3.38it/s]\n",
732
- "Uploading: 0.000%| | 0/112411 elapsed<00:01 remaining<?0:04<00:01, 3.38it/s]\n",
733
- "Uploading model files (2 dirs, 2 files): 17%|█▋ | 1/6 [00:07<00:35, 7.08s/it]\n"
734
- ]
735
- },
736
- {
737
- "ename": "RestAPIError",
738
- "evalue": "Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''",
739
- "output_type": "error",
740
- "traceback": [
741
- "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
742
- "\u001b[1;31mRestAPIError\u001b[0m Traceback (most recent call last)",
743
- "Cell \u001b[1;32mIn[44], line 16\u001b[0m\n\u001b[0;32m 8\u001b[0m tf_model \u001b[38;5;241m=\u001b[39m mr\u001b[38;5;241m.\u001b[39mtensorflow\u001b[38;5;241m.\u001b[39mcreate_model(\n\u001b[0;32m 9\u001b[0m name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDK_electricity_price_prediction_model\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 10\u001b[0m metrics\u001b[38;5;241m=\u001b[39mmetrics,\n\u001b[0;32m 11\u001b[0m description\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mHourly electricity price prediction model.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 12\u001b[0m input_example\u001b[38;5;241m=\u001b[39mn_step_window\u001b[38;5;241m.\u001b[39mexample[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mnumpy(),\n\u001b[0;32m 13\u001b[0m )\n\u001b[0;32m 15\u001b[0m \u001b[38;5;66;03m# Saving the model to the specified directory\u001b[39;00m\n\u001b[1;32m---> 16\u001b[0m \u001b[43mtf_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_dir\u001b[49m\u001b[43m)\u001b[49m\n",
744
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\model.py:101\u001b[0m, in \u001b[0;36mModel.save\u001b[1;34m(self, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 90\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msave\u001b[39m(\u001b[38;5;28mself\u001b[39m, model_path, await_registration\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m480\u001b[39m, keep_original_files\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[0;32m 91\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Persist this model including model files and metadata to the model registry.\u001b[39;00m\n\u001b[0;32m 92\u001b[0m \n\u001b[0;32m 93\u001b[0m \u001b[38;5;124;03m # Arguments\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 99\u001b[0m \u001b[38;5;124;03m `Model`: The model metadata object.\u001b[39;00m\n\u001b[0;32m 100\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_model_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 102\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 103\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 104\u001b[0m \u001b[43m \u001b[49m\u001b[43mawait_registration\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mawait_registration\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 105\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 106\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
745
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:421\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 419\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m be:\n\u001b[0;32m 420\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mrm(model_instance\u001b[38;5;241m.\u001b[39mversion_path)\n\u001b[1;32m--> 421\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m be\n\u001b[0;32m 423\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel created, explore it at \u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m+\u001b[39m model_instance\u001b[38;5;241m.\u001b[39mget_url())\n\u001b[0;32m 425\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_instance\n",
746
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:385\u001b[0m, in \u001b[0;36mModelEngine.save\u001b[1;34m(self, model_instance, model_path, await_registration, keep_original_files)\u001b[0m\n\u001b[0;32m 381\u001b[0m \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[0;32m 382\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexists(\n\u001b[0;32m 383\u001b[0m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(os\u001b[38;5;241m.\u001b[39mgetcwd(), model_path)\n\u001b[0;32m 384\u001b[0m ): \u001b[38;5;66;03m# check local relative\u001b[39;00m\n\u001b[1;32m--> 385\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_model_from_local_or_hopsfs_mount\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgetcwd\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeep_original_files\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkeep_original_files\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 389\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 390\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 391\u001b[0m \u001b[38;5;66;03m# check project relative\u001b[39;00m\n\u001b[0;32m 392\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dataset_api\u001b[38;5;241m.\u001b[39mpath_exists(\n\u001b[0;32m 393\u001b[0m model_path\n\u001b[0;32m 394\u001b[0m ): \u001b[38;5;66;03m# check hdfs relative and absolute\u001b[39;00m\n",
747
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:249\u001b[0m, in \u001b[0;36mModelEngine._save_model_from_local_or_hopsfs_mount\u001b[1;34m(self, model_instance, model_path, keep_original_files, update_upload_progress)\u001b[0m\n\u001b[0;32m 240\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_or_move_hopsfs_model(\n\u001b[0;32m 241\u001b[0m from_hdfs_model_path\u001b[38;5;241m=\u001b[39mmodel_path\u001b[38;5;241m.\u001b[39mreplace(\n\u001b[0;32m 242\u001b[0m constants\u001b[38;5;241m.\u001b[39mMODEL_REGISTRY\u001b[38;5;241m.\u001b[39mHOPSFS_MOUNT_PREFIX, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 246\u001b[0m update_upload_progress\u001b[38;5;241m=\u001b[39mupdate_upload_progress,\n\u001b[0;32m 247\u001b[0m )\n\u001b[0;32m 248\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 249\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_local_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 250\u001b[0m \u001b[43m \u001b[49m\u001b[43mfrom_local_model_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 251\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_model_version_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_instance\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mversion_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 252\u001b[0m \u001b[43m \u001b[49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mupdate_upload_progress\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 253\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
748
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\model_engine.py:225\u001b[0m, in \u001b[0;36mModelEngine._upload_local_model\u001b[1;34m(self, from_local_model_path, to_model_version_path, update_upload_progress)\u001b[0m\n\u001b[0;32m 223\u001b[0m update_upload_progress(n_dirs, n_files)\n\u001b[0;32m 224\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m f_name \u001b[38;5;129;01min\u001b[39;00m files:\n\u001b[1;32m--> 225\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mf_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_base_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 226\u001b[0m n_files \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 227\u001b[0m update_upload_progress(n_dirs, n_files)\n",
749
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\engine\\local_engine.py:38\u001b[0m, in \u001b[0;36mLocalEngine.upload\u001b[1;34m(self, local_path, remote_path)\u001b[0m\n\u001b[0;32m 36\u001b[0m local_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_abs_path(local_path)\n\u001b[0;32m 37\u001b[0m remote_path \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepend_project_path(remote_path)\n\u001b[1;32m---> 38\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dataset_api\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mupload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mremote_path\u001b[49m\u001b[43m)\u001b[49m\n",
750
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:152\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 151\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n\u001b[1;32m--> 152\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[0;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 155\u001b[0m pbar\u001b[38;5;241m.\u001b[39mclose()\n",
751
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36mDatasetApi.upload\u001b[1;34m(self, local_path, upload_path, overwrite, chunk_size, simultaneous_uploads, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfuture\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m]\u001b[49m\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
752
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:148\u001b[0m, in \u001b[0;36m<listcomp>\u001b[1;34m(.0)\u001b[0m\n\u001b[0;32m 146\u001b[0m _, _ \u001b[38;5;241m=\u001b[39m wait(futures)\n\u001b[0;32m 147\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 148\u001b[0m _ \u001b[38;5;241m=\u001b[39m [\u001b[43mfuture\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m future \u001b[38;5;129;01min\u001b[39;00m futures]\n\u001b[0;32m 149\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pbar \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
753
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:449\u001b[0m, in \u001b[0;36mFuture.result\u001b[1;34m(self, timeout)\u001b[0m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m--> 449\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__get_result\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 451\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_condition\u001b[38;5;241m.\u001b[39mwait(timeout)\n\u001b[0;32m 453\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n",
754
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\_base.py:401\u001b[0m, in \u001b[0;36mFuture.__get_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 399\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception:\n\u001b[0;32m 400\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 401\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception\n\u001b[0;32m 402\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 403\u001b[0m \u001b[38;5;66;03m# Break a reference cycle with the exception in self._exception\u001b[39;00m\n\u001b[0;32m 404\u001b[0m \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
755
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\concurrent\\futures\\thread.py:58\u001b[0m, in \u001b[0;36m_WorkItem.run\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 55\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 57\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m---> 58\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 59\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[0;32m 60\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfuture\u001b[38;5;241m.\u001b[39mset_exception(exc)\n",
756
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:189\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 185\u001b[0m re\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;129;01min\u001b[39;00m DatasetApi\u001b[38;5;241m.\u001b[39mFLOW_PERMANENT_ERRORS\n\u001b[0;32m 186\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m chunk\u001b[38;5;241m.\u001b[39mretries \u001b[38;5;241m>\u001b[39m max_chunk_retries\n\u001b[0;32m 187\u001b[0m ):\n\u001b[0;32m 188\u001b[0m chunk\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfailed\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m--> 189\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m re\n\u001b[0;32m 190\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(chunk_retry_interval)\n\u001b[0;32m 191\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n",
757
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:178\u001b[0m, in \u001b[0;36mDatasetApi._upload_chunk\u001b[1;34m(self, base_params, upload_path, file_name, chunk, pbar, max_chunk_retries, chunk_retry_interval)\u001b[0m\n\u001b[0;32m 176\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m 177\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 178\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_upload_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mupload_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontent\u001b[49m\n\u001b[0;32m 180\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[0;32m 182\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RestAPIError \u001b[38;5;28;01mas\u001b[39;00m re:\n",
758
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\core\\dataset_api.py:214\u001b[0m, in \u001b[0;36mDatasetApi._upload_request\u001b[1;34m(self, params, path, file_name, chunk)\u001b[0m\n\u001b[0;32m 211\u001b[0m path_params \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mproject\u001b[39m\u001b[38;5;124m\"\u001b[39m, _client\u001b[38;5;241m.\u001b[39m_project_id, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdataset\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mupload\u001b[39m\u001b[38;5;124m\"\u001b[39m, path]\n\u001b[0;32m 213\u001b[0m \u001b[38;5;66;03m# Flow configuration params are sent as form data\u001b[39;00m\n\u001b[1;32m--> 214\u001b[0m \u001b[43m_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPOST\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpath_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfiles\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\u001b[43m}\u001b[49m\n\u001b[0;32m 216\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
759
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\decorators.py:35\u001b[0m, in \u001b[0;36mconnected.<locals>.if_connected\u001b[1;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m inst\u001b[38;5;241m.\u001b[39m_connected:\n\u001b[0;32m 34\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m NoHopsworksConnectionError\n\u001b[1;32m---> 35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43minst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
760
- "File \u001b[1;32mc:\\Users\\Benj3\\anaconda3\\envs\\energy\\Lib\\site-packages\\hsml\\client\\base.py:108\u001b[0m, in \u001b[0;36mClient._send_request\u001b[1;34m(self, method, path_params, query_params, headers, data, stream, files)\u001b[0m\n\u001b[0;32m 105\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39msend(prepped, verify\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_verify, stream\u001b[38;5;241m=\u001b[39mstream)\n\u001b[0;32m 107\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m100\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m--> 108\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mRestAPIError(url, response)\n\u001b[0;32m 110\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream:\n\u001b[0;32m 111\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
761
- "\u001b[1;31mRestAPIError\u001b[0m: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/556180/dataset/upload/%2FProjects%2Fbenjami3%2FModels%2FDK_electricity_price_prediction_model%2F1%5Cvariables). Server response: \nHTTP code: 400, HTTP reason: Invalid URI, body: b''"
762
- ]
763
- }
764
- ],
765
- "source": [
766
- "# Retrieving the Model Registry\n",
767
- "mr = project.get_model_registry()\n",
768
- "\n",
769
- "# Extracting loss value from the training history\n",
770
- "metrics = {'loss': history_dict['val_loss'][0]} \n",
771
- "\n",
772
- "# Creating a TensorFlow model in the Model Registry\n",
773
- "tf_model = mr.tensorflow.create_model(\n",
774
- " name=\"DK_electricity_price_prediction_model\",\n",
775
- " metrics=metrics,\n",
776
- " description=\"Hourly electricity price prediction model.\",\n",
777
- " input_example=n_step_window.example[0].numpy(),\n",
778
- ")\n",
779
- "\n",
780
- "# Saving the model to the specified directory\n",
781
- "tf_model.save(model_dir)"
782
- ]
783
- },
784
- {
785
- "cell_type": "markdown",
786
- "metadata": {},
787
- "source": [
788
- "---\n",
789
- "\n",
790
- "## <span style=\"color:#2656a3;\">⏭️ **Next:** Part 04: Batch Inference </span>\n",
791
- "\n",
792
- "In the next notebook you will use your registered model to predict batch data."
793
- ]
794
- }
795
- ],
796
- "metadata": {
797
- "kernelspec": {
798
- "display_name": "bds-mlops",
799
- "language": "python",
800
- "name": "python3"
801
- },
802
- "language_info": {
803
- "codemirror_mode": {
804
- "name": "ipython",
805
- "version": 3
806
- },
807
- "file_extension": ".py",
808
- "mimetype": "text/x-python",
809
- "name": "python",
810
- "nbconvert_exporter": "python",
811
- "pygments_lexer": "ipython3",
812
- "version": "3.11.9"
813
- },
814
- "orig_nbformat": 4
815
- },
816
- "nbformat": 4,
817
- "nbformat_minor": 2
818
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
hide/notebooks_dev/3_training_pipeline_dev_windowtensor.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/1_feature_backfill.ipynb CHANGED
@@ -28,7 +28,7 @@
28
  },
29
  {
30
  "cell_type": "code",
31
- "execution_count": 38,
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
@@ -38,15 +38,15 @@
38
  },
39
  {
40
  "cell_type": "code",
41
- "execution_count": 39,
42
  "metadata": {},
43
  "outputs": [
44
  {
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
- "c:\\Users\\Benj3\\OneDrive\\Dokumenter\\VSCode\\MLOPs-Assignment-\n",
49
- "c:\\Users\\Benj3\\OneDrive\\Dokumenter\\VSCode\\MLOPs-Assignment-\\notebooks\n"
50
  ]
51
  }
52
  ],
@@ -64,7 +64,7 @@
64
  },
65
  {
66
  "cell_type": "code",
67
- "execution_count": 40,
68
  "metadata": {},
69
  "outputs": [],
70
  "source": [
@@ -80,6 +80,18 @@
80
  "warnings.filterwarnings('ignore', category=DeprecationWarning)"
81
  ]
82
  },
 
 
 
 
 
 
 
 
 
 
 
 
83
  {
84
  "cell_type": "markdown",
85
  "metadata": {},
@@ -91,7 +103,6 @@
91
  "- Electricity prices in Denmark on hourly basis per day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
92
  "- Different meteorological observations based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures.\n",
93
  "- Danish calendar that categorizes dates into types based on whether it is a weekday or not. This files is made manually by the group and is located in the *datafolder* inside this repository.\n",
94
- "- Forecast Renewable Energy next day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
95
  "- Weather Forecast based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures. (This data is used later to parse in new real-time weather data)\n"
96
  ]
97
  },
@@ -105,7 +116,7 @@
105
  },
106
  {
107
  "cell_type": "code",
108
- "execution_count": 41,
109
  "metadata": {},
110
  "outputs": [],
111
  "source": [
@@ -122,7 +133,7 @@
122
  },
123
  {
124
  "cell_type": "code",
125
- "execution_count": 42,
126
  "metadata": {},
127
  "outputs": [
128
  {
@@ -207,7 +218,7 @@
207
  "4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 0.28013"
208
  ]
209
  },
210
- "execution_count": 42,
211
  "metadata": {},
212
  "output_type": "execute_result"
213
  }
@@ -219,7 +230,7 @@
219
  },
220
  {
221
  "cell_type": "code",
222
- "execution_count": 43,
223
  "metadata": {},
224
  "outputs": [
225
  {
@@ -252,44 +263,44 @@
252
  " </thead>\n",
253
  " <tbody>\n",
254
  " <tr>\n",
255
- " <th>20440</th>\n",
256
- " <td>1714590000000</td>\n",
257
- " <td>2024-05-01 19:00:00</td>\n",
258
- " <td>2024-05-01</td>\n",
259
  " <td>19</td>\n",
260
- " <td>0.37590</td>\n",
261
  " </tr>\n",
262
  " <tr>\n",
263
- " <th>20441</th>\n",
264
- " <td>1714593600000</td>\n",
265
- " <td>2024-05-01 20:00:00</td>\n",
266
- " <td>2024-05-01</td>\n",
267
  " <td>20</td>\n",
268
- " <td>0.37292</td>\n",
269
  " </tr>\n",
270
  " <tr>\n",
271
- " <th>20442</th>\n",
272
- " <td>1714597200000</td>\n",
273
- " <td>2024-05-01 21:00:00</td>\n",
274
- " <td>2024-05-01</td>\n",
275
  " <td>21</td>\n",
276
- " <td>0.25366</td>\n",
277
  " </tr>\n",
278
  " <tr>\n",
279
- " <th>20443</th>\n",
280
- " <td>1714600800000</td>\n",
281
- " <td>2024-05-01 22:00:00</td>\n",
282
- " <td>2024-05-01</td>\n",
283
  " <td>22</td>\n",
284
- " <td>0.22315</td>\n",
285
  " </tr>\n",
286
  " <tr>\n",
287
- " <th>20444</th>\n",
288
- " <td>1714604400000</td>\n",
289
- " <td>2024-05-01 23:00:00</td>\n",
290
- " <td>2024-05-01</td>\n",
291
  " <td>23</td>\n",
292
- " <td>0.16408</td>\n",
293
  " </tr>\n",
294
  " </tbody>\n",
295
  "</table>\n",
@@ -297,21 +308,21 @@
297
  ],
298
  "text/plain": [
299
  " timestamp datetime date hour \\\n",
300
- "20440 1714590000000 2024-05-01 19:00:00 2024-05-01 19 \n",
301
- "20441 1714593600000 2024-05-01 20:00:00 2024-05-01 20 \n",
302
- "20442 1714597200000 2024-05-01 21:00:00 2024-05-01 21 \n",
303
- "20443 1714600800000 2024-05-01 22:00:00 2024-05-01 22 \n",
304
- "20444 1714604400000 2024-05-01 23:00:00 2024-05-01 23 \n",
305
  "\n",
306
  " dk1_spotpricedkk_kwh \n",
307
- "20440 0.37590 \n",
308
- "20441 0.37292 \n",
309
- "20442 0.25366 \n",
310
- "20443 0.22315 \n",
311
- "20444 0.16408 "
312
  ]
313
  },
314
- "execution_count": 43,
315
  "metadata": {},
316
  "output_type": "execute_result"
317
  }
@@ -323,7 +334,7 @@
323
  },
324
  {
325
  "cell_type": "code",
326
- "execution_count": 44,
327
  "metadata": {},
328
  "outputs": [
329
  {
@@ -331,17 +342,17 @@
331
  "output_type": "stream",
332
  "text": [
333
  "<class 'pandas.core.frame.DataFrame'>\n",
334
- "RangeIndex: 20445 entries, 0 to 20444\n",
335
  "Data columns (total 5 columns):\n",
336
  " # Column Non-Null Count Dtype \n",
337
  "--- ------ -------------- ----- \n",
338
- " 0 timestamp 20445 non-null int64 \n",
339
- " 1 datetime 20445 non-null datetime64[ns]\n",
340
- " 2 date 20445 non-null object \n",
341
- " 3 hour 20445 non-null int32 \n",
342
- " 4 dk1_spotpricedkk_kwh 20445 non-null float64 \n",
343
- "dtypes: datetime64[ns](1), float64(1), int32(1), int64(1), object(1)\n",
344
- "memory usage: 718.9+ KB\n"
345
  ]
346
  }
347
  ],
@@ -350,308 +361,6 @@
350
  "electricity_df.info()"
351
  ]
352
  },
353
- {
354
- "cell_type": "markdown",
355
- "metadata": {},
356
- "source": [
357
- "### <span style=\"color:#2656a3;\">☀️💨 Forecast Renewable Energy next day from Energinet\n",
358
- "Second dataset is Forecast Renewable Energy next day also on hourly basis from Energinet"
359
- ]
360
- },
361
- {
362
- "cell_type": "code",
363
- "execution_count": 45,
364
- "metadata": {},
365
- "outputs": [],
366
- "source": [
367
- "# Fetching historical forecast of renewable energy data for area DK1 from January 1, 2022\n",
368
- "# Note: The end date is currently left out to retrieve data up to the present date of yesterday \n",
369
- "# Today is not included in the data as it is not historical data\n",
370
- "forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
371
- " historical=True, \n",
372
- " area = [\"DK1\"],\n",
373
- " start= '2022-01-01', \n",
374
- " #end='2023-12-31'\n",
375
- ")"
376
- ]
377
- },
378
- {
379
- "cell_type": "code",
380
- "execution_count": 46,
381
- "metadata": {},
382
- "outputs": [
383
- {
384
- "data": {
385
- "text/html": [
386
- "<div>\n",
387
- "<style scoped>\n",
388
- " .dataframe tbody tr th:only-of-type {\n",
389
- " vertical-align: middle;\n",
390
- " }\n",
391
- "\n",
392
- " .dataframe tbody tr th {\n",
393
- " vertical-align: top;\n",
394
- " }\n",
395
- "\n",
396
- " .dataframe thead th {\n",
397
- " text-align: right;\n",
398
- " }\n",
399
- "</style>\n",
400
- "<table border=\"1\" class=\"dataframe\">\n",
401
- " <thead>\n",
402
- " <tr style=\"text-align: right;\">\n",
403
- " <th></th>\n",
404
- " <th>timestamp</th>\n",
405
- " <th>datetime</th>\n",
406
- " <th>date</th>\n",
407
- " <th>hour</th>\n",
408
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
409
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
410
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
411
- " </tr>\n",
412
- " </thead>\n",
413
- " <tbody>\n",
414
- " <tr>\n",
415
- " <th>0</th>\n",
416
- " <td>1641024000000</td>\n",
417
- " <td>2022-01-01 08:00:00</td>\n",
418
- " <td>2022-01-01</td>\n",
419
- " <td>8</td>\n",
420
- " <td>611708.313</td>\n",
421
- " <td>236791.672</td>\n",
422
- " <td>49.583</td>\n",
423
- " </tr>\n",
424
- " <tr>\n",
425
- " <th>1</th>\n",
426
- " <td>1641027600000</td>\n",
427
- " <td>2022-01-01 09:00:00</td>\n",
428
- " <td>2022-01-01</td>\n",
429
- " <td>9</td>\n",
430
- " <td>459708.344</td>\n",
431
- " <td>196666.672</td>\n",
432
- " <td>4841.250</td>\n",
433
- " </tr>\n",
434
- " <tr>\n",
435
- " <th>2</th>\n",
436
- " <td>1641031200000</td>\n",
437
- " <td>2022-01-01 10:00:00</td>\n",
438
- " <td>2022-01-01</td>\n",
439
- " <td>10</td>\n",
440
- " <td>310375.000</td>\n",
441
- " <td>178500.000</td>\n",
442
- " <td>20352.501</td>\n",
443
- " </tr>\n",
444
- " <tr>\n",
445
- " <th>3</th>\n",
446
- " <td>1641034800000</td>\n",
447
- " <td>2022-01-01 11:00:00</td>\n",
448
- " <td>2022-01-01</td>\n",
449
- " <td>11</td>\n",
450
- " <td>320750.000</td>\n",
451
- " <td>201125.000</td>\n",
452
- " <td>35718.750</td>\n",
453
- " </tr>\n",
454
- " <tr>\n",
455
- " <th>4</th>\n",
456
- " <td>1641038400000</td>\n",
457
- " <td>2022-01-01 12:00:00</td>\n",
458
- " <td>2022-01-01</td>\n",
459
- " <td>12</td>\n",
460
- " <td>355666.656</td>\n",
461
- " <td>277666.656</td>\n",
462
- " <td>38026.669</td>\n",
463
- " </tr>\n",
464
- " </tbody>\n",
465
- "</table>\n",
466
- "</div>"
467
- ],
468
- "text/plain": [
469
- " timestamp datetime date hour \\\n",
470
- "0 1641024000000 2022-01-01 08:00:00 2022-01-01 8 \n",
471
- "1 1641027600000 2022-01-01 09:00:00 2022-01-01 9 \n",
472
- "2 1641031200000 2022-01-01 10:00:00 2022-01-01 10 \n",
473
- "3 1641034800000 2022-01-01 11:00:00 2022-01-01 11 \n",
474
- "4 1641038400000 2022-01-01 12:00:00 2022-01-01 12 \n",
475
- "\n",
476
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
477
- "0 611708.313 \n",
478
- "1 459708.344 \n",
479
- "2 310375.000 \n",
480
- "3 320750.000 \n",
481
- "4 355666.656 \n",
482
- "\n",
483
- " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
484
- "0 236791.672 49.583 \n",
485
- "1 196666.672 4841.250 \n",
486
- "2 178500.000 20352.501 \n",
487
- "3 201125.000 35718.750 \n",
488
- "4 277666.656 38026.669 "
489
- ]
490
- },
491
- "execution_count": 46,
492
- "metadata": {},
493
- "output_type": "execute_result"
494
- }
495
- ],
496
- "source": [
497
- "# Display the first 5 rows of the forecast_renewable_energy dataframe\n",
498
- "forecast_renewable_energy_df.head(5)"
499
- ]
500
- },
501
- {
502
- "cell_type": "code",
503
- "execution_count": 47,
504
- "metadata": {},
505
- "outputs": [
506
- {
507
- "data": {
508
- "text/html": [
509
- "<div>\n",
510
- "<style scoped>\n",
511
- " .dataframe tbody tr th:only-of-type {\n",
512
- " vertical-align: middle;\n",
513
- " }\n",
514
- "\n",
515
- " .dataframe tbody tr th {\n",
516
- " vertical-align: top;\n",
517
- " }\n",
518
- "\n",
519
- " .dataframe thead th {\n",
520
- " text-align: right;\n",
521
- " }\n",
522
- "</style>\n",
523
- "<table border=\"1\" class=\"dataframe\">\n",
524
- " <thead>\n",
525
- " <tr style=\"text-align: right;\">\n",
526
- " <th></th>\n",
527
- " <th>timestamp</th>\n",
528
- " <th>datetime</th>\n",
529
- " <th>date</th>\n",
530
- " <th>hour</th>\n",
531
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
532
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
533
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
534
- " </tr>\n",
535
- " </thead>\n",
536
- " <tbody>\n",
537
- " <tr>\n",
538
- " <th>14426</th>\n",
539
- " <td>1714590000000</td>\n",
540
- " <td>2024-05-01 19:00:00</td>\n",
541
- " <td>2024-05-01</td>\n",
542
- " <td>19</td>\n",
543
- " <td>816250.000</td>\n",
544
- " <td>1382208.374</td>\n",
545
- " <td>272910.828</td>\n",
546
- " </tr>\n",
547
- " <tr>\n",
548
- " <th>14427</th>\n",
549
- " <td>1714593600000</td>\n",
550
- " <td>2024-05-01 20:00:00</td>\n",
551
- " <td>2024-05-01</td>\n",
552
- " <td>20</td>\n",
553
- " <td>848500.000</td>\n",
554
- " <td>1388583.374</td>\n",
555
- " <td>46086.666</td>\n",
556
- " </tr>\n",
557
- " <tr>\n",
558
- " <th>14428</th>\n",
559
- " <td>1714597200000</td>\n",
560
- " <td>2024-05-01 21:00:00</td>\n",
561
- " <td>2024-05-01</td>\n",
562
- " <td>21</td>\n",
563
- " <td>886041.687</td>\n",
564
- " <td>1554791.626</td>\n",
565
- " <td>1338.750</td>\n",
566
- " </tr>\n",
567
- " <tr>\n",
568
- " <th>14429</th>\n",
569
- " <td>1714600800000</td>\n",
570
- " <td>2024-05-01 22:00:00</td>\n",
571
- " <td>2024-05-01</td>\n",
572
- " <td>22</td>\n",
573
- " <td>919416.687</td>\n",
574
- " <td>1698875.000</td>\n",
575
- " <td>0.000</td>\n",
576
- " </tr>\n",
577
- " <tr>\n",
578
- " <th>14430</th>\n",
579
- " <td>1714604400000</td>\n",
580
- " <td>2024-05-01 23:00:00</td>\n",
581
- " <td>2024-05-01</td>\n",
582
- " <td>23</td>\n",
583
- " <td>934708.313</td>\n",
584
- " <td>1739375.000</td>\n",
585
- " <td>0.000</td>\n",
586
- " </tr>\n",
587
- " </tbody>\n",
588
- "</table>\n",
589
- "</div>"
590
- ],
591
- "text/plain": [
592
- " timestamp datetime date hour \\\n",
593
- "14426 1714590000000 2024-05-01 19:00:00 2024-05-01 19 \n",
594
- "14427 1714593600000 2024-05-01 20:00:00 2024-05-01 20 \n",
595
- "14428 1714597200000 2024-05-01 21:00:00 2024-05-01 21 \n",
596
- "14429 1714600800000 2024-05-01 22:00:00 2024-05-01 22 \n",
597
- "14430 1714604400000 2024-05-01 23:00:00 2024-05-01 23 \n",
598
- "\n",
599
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
600
- "14426 816250.000 \n",
601
- "14427 848500.000 \n",
602
- "14428 886041.687 \n",
603
- "14429 919416.687 \n",
604
- "14430 934708.313 \n",
605
- "\n",
606
- " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
607
- "14426 1382208.374 272910.828 \n",
608
- "14427 1388583.374 46086.666 \n",
609
- "14428 1554791.626 1338.750 \n",
610
- "14429 1698875.000 0.000 \n",
611
- "14430 1739375.000 0.000 "
612
- ]
613
- },
614
- "execution_count": 47,
615
- "metadata": {},
616
- "output_type": "execute_result"
617
- }
618
- ],
619
- "source": [
620
- "# Display the last 5 rows of the forecast_renewable_energy dataframe\n",
621
- "forecast_renewable_energy_df.tail(5)"
622
- ]
623
- },
624
- {
625
- "cell_type": "code",
626
- "execution_count": 48,
627
- "metadata": {},
628
- "outputs": [
629
- {
630
- "name": "stdout",
631
- "output_type": "stream",
632
- "text": [
633
- "<class 'pandas.core.frame.DataFrame'>\n",
634
- "RangeIndex: 14431 entries, 0 to 14430\n",
635
- "Data columns (total 7 columns):\n",
636
- " # Column Non-Null Count Dtype \n",
637
- "--- ------ -------------- ----- \n",
638
- " 0 timestamp 14431 non-null int64 \n",
639
- " 1 datetime 14431 non-null datetime64[ns]\n",
640
- " 2 date 14431 non-null object \n",
641
- " 3 hour 14431 non-null int32 \n",
642
- " 4 dk1_offshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
643
- " 5 dk1_onshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
644
- " 6 dk1_solar_forecastintraday_kwh 14415 non-null float64 \n",
645
- "dtypes: datetime64[ns](1), float64(3), int32(1), int64(1), object(1)\n",
646
- "memory usage: 733.0+ KB\n"
647
- ]
648
- }
649
- ],
650
- "source": [
651
- "# Showing the information for the forecast_renewable_energy dataframe\n",
652
- "forecast_renewable_energy_df.info()"
653
- ]
654
- },
655
  {
656
  "cell_type": "markdown",
657
  "metadata": {},
@@ -669,7 +378,7 @@
669
  },
670
  {
671
  "cell_type": "code",
672
- "execution_count": 49,
673
  "metadata": {},
674
  "outputs": [],
675
  "source": [
@@ -685,7 +394,7 @@
685
  },
686
  {
687
  "cell_type": "code",
688
- "execution_count": 50,
689
  "metadata": {},
690
  "outputs": [
691
  {
@@ -832,7 +541,7 @@
832
  "4 100.0 10.6 23.8 "
833
  ]
834
  },
835
- "execution_count": 50,
836
  "metadata": {},
837
  "output_type": "execute_result"
838
  }
@@ -844,7 +553,7 @@
844
  },
845
  {
846
  "cell_type": "code",
847
- "execution_count": 51,
848
  "metadata": {},
849
  "outputs": [
850
  {
@@ -885,84 +594,84 @@
885
  " </thead>\n",
886
  " <tbody>\n",
887
  " <tr>\n",
888
- " <th>20419</th>\n",
889
- " <td>1714503600000</td>\n",
890
- " <td>2024-04-30 19:00:00</td>\n",
891
- " <td>2024-04-30</td>\n",
892
  " <td>19</td>\n",
893
- " <td>13.8</td>\n",
894
- " <td>64.0</td>\n",
895
  " <td>0.0</td>\n",
896
  " <td>0.0</td>\n",
897
  " <td>0.0</td>\n",
898
  " <td>0.0</td>\n",
899
- " <td>6.0</td>\n",
900
- " <td>15.3</td>\n",
901
- " <td>26.3</td>\n",
902
  " </tr>\n",
903
  " <tr>\n",
904
- " <th>20420</th>\n",
905
- " <td>1714507200000</td>\n",
906
- " <td>2024-04-30 20:00:00</td>\n",
907
- " <td>2024-04-30</td>\n",
908
  " <td>20</td>\n",
909
- " <td>13.5</td>\n",
910
- " <td>66.0</td>\n",
911
  " <td>0.0</td>\n",
912
  " <td>0.0</td>\n",
913
  " <td>0.0</td>\n",
914
  " <td>0.0</td>\n",
915
- " <td>2.0</td>\n",
916
- " <td>18.7</td>\n",
917
- " <td>32.8</td>\n",
918
  " </tr>\n",
919
  " <tr>\n",
920
- " <th>20421</th>\n",
921
- " <td>1714510800000</td>\n",
922
- " <td>2024-04-30 21:00:00</td>\n",
923
- " <td>2024-04-30</td>\n",
924
  " <td>21</td>\n",
925
- " <td>13.4</td>\n",
926
  " <td>67.0</td>\n",
927
  " <td>0.0</td>\n",
928
  " <td>0.0</td>\n",
929
  " <td>0.0</td>\n",
930
  " <td>0.0</td>\n",
931
- " <td>13.0</td>\n",
932
- " <td>21.1</td>\n",
933
- " <td>38.2</td>\n",
934
  " </tr>\n",
935
  " <tr>\n",
936
- " <th>20422</th>\n",
937
- " <td>1714514400000</td>\n",
938
- " <td>2024-04-30 22:00:00</td>\n",
939
- " <td>2024-04-30</td>\n",
940
  " <td>22</td>\n",
941
- " <td>12.8</td>\n",
942
- " <td>67.0</td>\n",
943
  " <td>0.0</td>\n",
944
  " <td>0.0</td>\n",
945
  " <td>0.0</td>\n",
946
  " <td>0.0</td>\n",
947
- " <td>9.0</td>\n",
948
- " <td>21.0</td>\n",
949
- " <td>38.5</td>\n",
950
  " </tr>\n",
951
  " <tr>\n",
952
- " <th>20423</th>\n",
953
- " <td>1714518000000</td>\n",
954
- " <td>2024-04-30 23:00:00</td>\n",
955
- " <td>2024-04-30</td>\n",
956
  " <td>23</td>\n",
957
- " <td>12.0</td>\n",
958
  " <td>70.0</td>\n",
959
  " <td>0.0</td>\n",
960
  " <td>0.0</td>\n",
961
  " <td>0.0</td>\n",
962
  " <td>0.0</td>\n",
963
- " <td>18.0</td>\n",
964
- " <td>20.7</td>\n",
965
- " <td>38.5</td>\n",
966
  " </tr>\n",
967
  " </tbody>\n",
968
  "</table>\n",
@@ -970,28 +679,28 @@
970
  ],
971
  "text/plain": [
972
  " timestamp datetime date hour temperature_2m \\\n",
973
- "20419 1714503600000 2024-04-30 19:00:00 2024-04-30 19 13.8 \n",
974
- "20420 1714507200000 2024-04-30 20:00:00 2024-04-30 20 13.5 \n",
975
- "20421 1714510800000 2024-04-30 21:00:00 2024-04-30 21 13.4 \n",
976
- "20422 1714514400000 2024-04-30 22:00:00 2024-04-30 22 12.8 \n",
977
- "20423 1714518000000 2024-04-30 23:00:00 2024-04-30 23 12.0 \n",
978
  "\n",
979
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
980
- "20419 64.0 0.0 0.0 0.0 0.0 \n",
981
- "20420 66.0 0.0 0.0 0.0 0.0 \n",
982
- "20421 67.0 0.0 0.0 0.0 0.0 \n",
983
- "20422 67.0 0.0 0.0 0.0 0.0 \n",
984
- "20423 70.0 0.0 0.0 0.0 0.0 \n",
985
  "\n",
986
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
987
- "20419 6.0 15.3 26.3 \n",
988
- "20420 2.0 18.7 32.8 \n",
989
- "20421 13.0 21.1 38.2 \n",
990
- "20422 9.0 21.0 38.5 \n",
991
- "20423 18.0 20.7 38.5 "
992
  ]
993
  },
994
- "execution_count": 51,
995
  "metadata": {},
996
  "output_type": "execute_result"
997
  }
@@ -1003,7 +712,7 @@
1003
  },
1004
  {
1005
  "cell_type": "code",
1006
- "execution_count": 52,
1007
  "metadata": {},
1008
  "outputs": [
1009
  {
@@ -1011,25 +720,25 @@
1011
  "output_type": "stream",
1012
  "text": [
1013
  "<class 'pandas.core.frame.DataFrame'>\n",
1014
- "Index: 20424 entries, 0 to 20423\n",
1015
  "Data columns (total 13 columns):\n",
1016
  " # Column Non-Null Count Dtype \n",
1017
  "--- ------ -------------- ----- \n",
1018
- " 0 timestamp 20424 non-null int64 \n",
1019
- " 1 datetime 20424 non-null datetime64[ns]\n",
1020
- " 2 date 20424 non-null object \n",
1021
- " 3 hour 20424 non-null int32 \n",
1022
- " 4 temperature_2m 20424 non-null float64 \n",
1023
- " 5 relative_humidity_2m 20424 non-null float64 \n",
1024
- " 6 precipitation 20424 non-null float64 \n",
1025
- " 7 rain 20424 non-null float64 \n",
1026
- " 8 snowfall 20424 non-null float64 \n",
1027
- " 9 weather_code 20424 non-null float64 \n",
1028
- " 10 cloud_cover 20424 non-null float64 \n",
1029
- " 11 wind_speed_10m 20424 non-null float64 \n",
1030
- " 12 wind_gusts_10m 20424 non-null float64 \n",
1031
- "dtypes: datetime64[ns](1), float64(9), int32(1), int64(1), object(1)\n",
1032
- "memory usage: 2.1+ MB\n"
1033
  ]
1034
  }
1035
  ],
@@ -1042,13 +751,13 @@
1042
  "cell_type": "markdown",
1043
  "metadata": {},
1044
  "source": [
1045
- "#### <span style=\"color:#2656a3;\"> 🌈 Weather Forecast\n",
1046
  "Weather Forecast from Open Meteo is now being fetched. This data is used in part 02 the feature_pipeline to parse in new real-time weather data."
1047
  ]
1048
  },
1049
  {
1050
  "cell_type": "code",
1051
- "execution_count": 61,
1052
  "metadata": {},
1053
  "outputs": [],
1054
  "source": [
@@ -1060,7 +769,7 @@
1060
  },
1061
  {
1062
  "cell_type": "code",
1063
- "execution_count": 54,
1064
  "metadata": {},
1065
  "outputs": [
1066
  {
@@ -1102,83 +811,83 @@
1102
  " <tbody>\n",
1103
  " <tr>\n",
1104
  " <th>0</th>\n",
1105
- " <td>1714608000000</td>\n",
1106
- " <td>2024-05-02 00:00:00</td>\n",
1107
- " <td>2024-05-02</td>\n",
1108
  " <td>0</td>\n",
1109
- " <td>14.9</td>\n",
1110
- " <td>66.0</td>\n",
1111
- " <td>0.0</td>\n",
1112
  " <td>0.0</td>\n",
1113
  " <td>0.0</td>\n",
1114
  " <td>0.0</td>\n",
1115
- " <td>13.0</td>\n",
1116
- " <td>21.6</td>\n",
1117
- " <td>41.4</td>\n",
 
1118
  " </tr>\n",
1119
  " <tr>\n",
1120
  " <th>1</th>\n",
1121
- " <td>1714611600000</td>\n",
1122
- " <td>2024-05-02 01:00:00</td>\n",
1123
- " <td>2024-05-02</td>\n",
1124
  " <td>1</td>\n",
1125
- " <td>14.2</td>\n",
1126
- " <td>71.0</td>\n",
1127
  " <td>0.0</td>\n",
1128
  " <td>0.0</td>\n",
1129
  " <td>0.0</td>\n",
1130
  " <td>0.0</td>\n",
1131
- " <td>4.0</td>\n",
1132
- " <td>20.5</td>\n",
1133
- " <td>37.1</td>\n",
1134
  " </tr>\n",
1135
  " <tr>\n",
1136
  " <th>2</th>\n",
1137
- " <td>1714615200000</td>\n",
1138
- " <td>2024-05-02 02:00:00</td>\n",
1139
- " <td>2024-05-02</td>\n",
1140
  " <td>2</td>\n",
1141
- " <td>13.4</td>\n",
1142
- " <td>73.0</td>\n",
1143
  " <td>0.0</td>\n",
1144
  " <td>0.0</td>\n",
1145
  " <td>0.0</td>\n",
1146
- " <td>2.0</td>\n",
1147
- " <td>70.0</td>\n",
1148
- " <td>21.2</td>\n",
1149
- " <td>36.7</td>\n",
1150
  " </tr>\n",
1151
  " <tr>\n",
1152
  " <th>3</th>\n",
1153
- " <td>1714618800000</td>\n",
1154
- " <td>2024-05-02 03:00:00</td>\n",
1155
- " <td>2024-05-02</td>\n",
1156
  " <td>3</td>\n",
1157
- " <td>13.2</td>\n",
1158
- " <td>72.0</td>\n",
1159
- " <td>0.1</td>\n",
1160
- " <td>0.1</td>\n",
1161
  " <td>0.0</td>\n",
1162
- " <td>51.0</td>\n",
1163
- " <td>51.0</td>\n",
1164
- " <td>22.3</td>\n",
1165
- " <td>39.2</td>\n",
 
 
1166
  " </tr>\n",
1167
  " <tr>\n",
1168
  " <th>4</th>\n",
1169
- " <td>1714622400000</td>\n",
1170
- " <td>2024-05-02 04:00:00</td>\n",
1171
- " <td>2024-05-02</td>\n",
1172
  " <td>4</td>\n",
1173
- " <td>12.7</td>\n",
1174
  " <td>73.0</td>\n",
1175
  " <td>0.0</td>\n",
1176
  " <td>0.0</td>\n",
1177
  " <td>0.0</td>\n",
1178
  " <td>2.0</td>\n",
1179
- " <td>78.0</td>\n",
1180
- " <td>21.6</td>\n",
1181
- " <td>38.9</td>\n",
1182
  " </tr>\n",
1183
  " </tbody>\n",
1184
  "</table>\n",
@@ -1186,28 +895,28 @@
1186
  ],
1187
  "text/plain": [
1188
  " timestamp datetime date hour temperature_2m \\\n",
1189
- "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
1190
- "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
1191
- "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
1192
- "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
1193
- "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
1194
  "\n",
1195
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1196
- "0 66.0 0.0 0.0 0.0 0.0 \n",
1197
- "1 71.0 0.0 0.0 0.0 0.0 \n",
1198
- "2 73.0 0.0 0.0 0.0 2.0 \n",
1199
- "3 72.0 0.1 0.1 0.0 51.0 \n",
1200
  "4 73.0 0.0 0.0 0.0 2.0 \n",
1201
  "\n",
1202
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1203
- "0 13.0 21.6 41.4 \n",
1204
- "1 4.0 20.5 37.1 \n",
1205
- "2 70.0 21.2 36.7 \n",
1206
- "3 51.0 22.3 39.2 \n",
1207
- "4 78.0 21.6 38.9 "
1208
  ]
1209
  },
1210
- "execution_count": 54,
1211
  "metadata": {},
1212
  "output_type": "execute_result"
1213
  }
@@ -1219,7 +928,7 @@
1219
  },
1220
  {
1221
  "cell_type": "code",
1222
- "execution_count": 55,
1223
  "metadata": {},
1224
  "outputs": [
1225
  {
@@ -1261,83 +970,83 @@
1261
  " <tbody>\n",
1262
  " <tr>\n",
1263
  " <th>115</th>\n",
1264
- " <td>1715022000000</td>\n",
1265
- " <td>2024-05-06 19:00:00</td>\n",
1266
- " <td>2024-05-06</td>\n",
1267
  " <td>19</td>\n",
1268
- " <td>10.7</td>\n",
1269
- " <td>91.0</td>\n",
1270
- " <td>1.4</td>\n",
1271
- " <td>1.4</td>\n",
1272
  " <td>0.0</td>\n",
1273
- " <td>61.0</td>\n",
1274
- " <td>100.0</td>\n",
1275
- " <td>16.6</td>\n",
1276
- " <td>32.0</td>\n",
 
 
1277
  " </tr>\n",
1278
  " <tr>\n",
1279
  " <th>116</th>\n",
1280
- " <td>1715025600000</td>\n",
1281
- " <td>2024-05-06 20:00:00</td>\n",
1282
- " <td>2024-05-06</td>\n",
1283
  " <td>20</td>\n",
1284
- " <td>10.1</td>\n",
1285
- " <td>90.0</td>\n",
1286
- " <td>1.4</td>\n",
1287
- " <td>1.4</td>\n",
1288
  " <td>0.0</td>\n",
1289
- " <td>61.0</td>\n",
1290
- " <td>100.0</td>\n",
1291
- " <td>19.5</td>\n",
1292
- " <td>37.1</td>\n",
 
 
1293
  " </tr>\n",
1294
  " <tr>\n",
1295
  " <th>117</th>\n",
1296
- " <td>1715029200000</td>\n",
1297
- " <td>2024-05-06 21:00:00</td>\n",
1298
- " <td>2024-05-06</td>\n",
1299
  " <td>21</td>\n",
1300
- " <td>9.5</td>\n",
1301
- " <td>88.0</td>\n",
1302
- " <td>1.4</td>\n",
1303
- " <td>1.4</td>\n",
1304
  " <td>0.0</td>\n",
1305
- " <td>61.0</td>\n",
1306
- " <td>100.0</td>\n",
1307
- " <td>21.6</td>\n",
1308
- " <td>42.1</td>\n",
 
 
1309
  " </tr>\n",
1310
  " <tr>\n",
1311
  " <th>118</th>\n",
1312
- " <td>1715032800000</td>\n",
1313
- " <td>2024-05-06 22:00:00</td>\n",
1314
- " <td>2024-05-06</td>\n",
1315
  " <td>22</td>\n",
1316
- " <td>9.3</td>\n",
1317
- " <td>86.0</td>\n",
1318
- " <td>0.6</td>\n",
1319
- " <td>0.6</td>\n",
1320
  " <td>0.0</td>\n",
1321
- " <td>3.0</td>\n",
1322
- " <td>100.0</td>\n",
1323
- " <td>22.0</td>\n",
1324
- " <td>41.0</td>\n",
 
 
1325
  " </tr>\n",
1326
  " <tr>\n",
1327
  " <th>119</th>\n",
1328
- " <td>1715036400000</td>\n",
1329
- " <td>2024-05-06 23:00:00</td>\n",
1330
- " <td>2024-05-06</td>\n",
1331
  " <td>23</td>\n",
1332
- " <td>9.1</td>\n",
1333
- " <td>84.0</td>\n",
1334
- " <td>0.6</td>\n",
1335
- " <td>0.6</td>\n",
1336
  " <td>0.0</td>\n",
1337
- " <td>3.0</td>\n",
1338
- " <td>100.0</td>\n",
1339
- " <td>21.3</td>\n",
1340
- " <td>40.3</td>\n",
 
 
1341
  " </tr>\n",
1342
  " </tbody>\n",
1343
  "</table>\n",
@@ -1345,28 +1054,28 @@
1345
  ],
1346
  "text/plain": [
1347
  " timestamp datetime date hour temperature_2m \\\n",
1348
- "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
1349
- "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
1350
- "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
1351
- "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
1352
- "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
1353
  "\n",
1354
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1355
- "115 91.0 1.4 1.4 0.0 61.0 \n",
1356
- "116 90.0 1.4 1.4 0.0 61.0 \n",
1357
- "117 88.0 1.4 1.4 0.0 61.0 \n",
1358
- "118 86.0 0.6 0.6 0.0 3.0 \n",
1359
- "119 84.0 0.6 0.6 0.0 3.0 \n",
1360
  "\n",
1361
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1362
- "115 100.0 16.6 32.0 \n",
1363
- "116 100.0 19.5 37.1 \n",
1364
- "117 100.0 21.6 42.1 \n",
1365
- "118 100.0 22.0 41.0 \n",
1366
- "119 100.0 21.3 40.3 "
1367
  ]
1368
  },
1369
- "execution_count": 55,
1370
  "metadata": {},
1371
  "output_type": "execute_result"
1372
  }
@@ -1378,7 +1087,7 @@
1378
  },
1379
  {
1380
  "cell_type": "code",
1381
- "execution_count": 56,
1382
  "metadata": {},
1383
  "outputs": [
1384
  {
@@ -1393,7 +1102,7 @@
1393
  " 0 timestamp 120 non-null int64 \n",
1394
  " 1 datetime 120 non-null datetime64[ns]\n",
1395
  " 2 date 120 non-null object \n",
1396
- " 3 hour 120 non-null int32 \n",
1397
  " 4 temperature_2m 120 non-null float64 \n",
1398
  " 5 relative_humidity_2m 120 non-null float64 \n",
1399
  " 6 precipitation 120 non-null float64 \n",
@@ -1403,8 +1112,8 @@
1403
  " 10 cloud_cover 120 non-null float64 \n",
1404
  " 11 wind_speed_10m 120 non-null float64 \n",
1405
  " 12 wind_gusts_10m 120 non-null float64 \n",
1406
- "dtypes: datetime64[ns](1), float64(9), int32(1), int64(1), object(1)\n",
1407
- "memory usage: 11.8+ KB\n"
1408
  ]
1409
  }
1410
  ],
@@ -1423,16 +1132,16 @@
1423
  },
1424
  {
1425
  "cell_type": "code",
1426
- "execution_count": 69,
1427
  "metadata": {},
1428
  "outputs": [],
1429
  "source": [
1430
- "calender_df = calendar.get_calendar()"
1431
  ]
1432
  },
1433
  {
1434
  "cell_type": "code",
1435
- "execution_count": 66,
1436
  "metadata": {},
1437
  "outputs": [
1438
  {
@@ -1461,7 +1170,7 @@
1461
  " <th>day</th>\n",
1462
  " <th>month</th>\n",
1463
  " <th>year</th>\n",
1464
- " <th>holiday</th>\n",
1465
  " </tr>\n",
1466
  " </thead>\n",
1467
  " <tbody>\n",
@@ -1472,7 +1181,7 @@
1472
  " <td>1</td>\n",
1473
  " <td>1</td>\n",
1474
  " <td>2022</td>\n",
1475
- " <td>1</td>\n",
1476
  " </tr>\n",
1477
  " <tr>\n",
1478
  " <th>1</th>\n",
@@ -1481,7 +1190,7 @@
1481
  " <td>2</td>\n",
1482
  " <td>1</td>\n",
1483
  " <td>2022</td>\n",
1484
- " <td>1</td>\n",
1485
  " </tr>\n",
1486
  " <tr>\n",
1487
  " <th>2</th>\n",
@@ -1490,7 +1199,7 @@
1490
  " <td>3</td>\n",
1491
  " <td>1</td>\n",
1492
  " <td>2022</td>\n",
1493
- " <td>0</td>\n",
1494
  " </tr>\n",
1495
  " <tr>\n",
1496
  " <th>3</th>\n",
@@ -1499,7 +1208,7 @@
1499
  " <td>4</td>\n",
1500
  " <td>1</td>\n",
1501
  " <td>2022</td>\n",
1502
- " <td>0</td>\n",
1503
  " </tr>\n",
1504
  " <tr>\n",
1505
  " <th>4</th>\n",
@@ -1508,22 +1217,22 @@
1508
  " <td>5</td>\n",
1509
  " <td>1</td>\n",
1510
  " <td>2022</td>\n",
1511
- " <td>0</td>\n",
1512
  " </tr>\n",
1513
  " </tbody>\n",
1514
  "</table>\n",
1515
  "</div>"
1516
  ],
1517
  "text/plain": [
1518
- " date dayofweek day month year holiday\n",
1519
- "0 2022-01-01 5 1 1 2022 1\n",
1520
- "1 2022-01-02 6 2 1 2022 1\n",
1521
- "2 2022-01-03 0 3 1 2022 0\n",
1522
- "3 2022-01-04 1 4 1 2022 0\n",
1523
- "4 2022-01-05 2 5 1 2022 0"
1524
  ]
1525
  },
1526
- "execution_count": 66,
1527
  "metadata": {},
1528
  "output_type": "execute_result"
1529
  }
@@ -1535,7 +1244,7 @@
1535
  },
1536
  {
1537
  "cell_type": "code",
1538
- "execution_count": 70,
1539
  "metadata": {},
1540
  "outputs": [
1541
  {
@@ -1564,7 +1273,7 @@
1564
  " <th>day</th>\n",
1565
  " <th>month</th>\n",
1566
  " <th>year</th>\n",
1567
- " <th>holiday</th>\n",
1568
  " </tr>\n",
1569
  " </thead>\n",
1570
  " <tbody>\n",
@@ -1575,7 +1284,7 @@
1575
  " <td>27</td>\n",
1576
  " <td>12</td>\n",
1577
  " <td>2024</td>\n",
1578
- " <td>0</td>\n",
1579
  " </tr>\n",
1580
  " <tr>\n",
1581
  " <th>1092</th>\n",
@@ -1584,7 +1293,7 @@
1584
  " <td>28</td>\n",
1585
  " <td>12</td>\n",
1586
  " <td>2024</td>\n",
1587
- " <td>1</td>\n",
1588
  " </tr>\n",
1589
  " <tr>\n",
1590
  " <th>1093</th>\n",
@@ -1593,7 +1302,7 @@
1593
  " <td>29</td>\n",
1594
  " <td>12</td>\n",
1595
  " <td>2024</td>\n",
1596
- " <td>1</td>\n",
1597
  " </tr>\n",
1598
  " <tr>\n",
1599
  " <th>1094</th>\n",
@@ -1602,7 +1311,7 @@
1602
  " <td>30</td>\n",
1603
  " <td>12</td>\n",
1604
  " <td>2024</td>\n",
1605
- " <td>0</td>\n",
1606
  " </tr>\n",
1607
  " <tr>\n",
1608
  " <th>1095</th>\n",
@@ -1611,22 +1320,22 @@
1611
  " <td>31</td>\n",
1612
  " <td>12</td>\n",
1613
  " <td>2024</td>\n",
1614
- " <td>0</td>\n",
1615
  " </tr>\n",
1616
  " </tbody>\n",
1617
  "</table>\n",
1618
  "</div>"
1619
  ],
1620
  "text/plain": [
1621
- " date dayofweek day month year holiday\n",
1622
- "1091 2024-12-27 4 27 12 2024 0\n",
1623
- "1092 2024-12-28 5 28 12 2024 1\n",
1624
- "1093 2024-12-29 6 29 12 2024 1\n",
1625
- "1094 2024-12-30 0 30 12 2024 0\n",
1626
- "1095 2024-12-31 1 31 12 2024 0"
1627
  ]
1628
  },
1629
- "execution_count": 70,
1630
  "metadata": {},
1631
  "output_type": "execute_result"
1632
  }
@@ -1638,7 +1347,7 @@
1638
  },
1639
  {
1640
  "cell_type": "code",
1641
- "execution_count": 71,
1642
  "metadata": {},
1643
  "outputs": [
1644
  {
@@ -1651,13 +1360,13 @@
1651
  " # Column Non-Null Count Dtype \n",
1652
  "--- ------ -------------- ----- \n",
1653
  " 0 date 1096 non-null object\n",
1654
- " 1 dayofweek 1096 non-null int32 \n",
1655
- " 2 day 1096 non-null int32 \n",
1656
- " 3 month 1096 non-null int32 \n",
1657
- " 4 year 1096 non-null int32 \n",
1658
- " 5 holiday 1096 non-null int32 \n",
1659
- "dtypes: int32(5), object(1)\n",
1660
- "memory usage: 30.1+ KB\n"
1661
  ]
1662
  }
1663
  ],
@@ -1668,7 +1377,7 @@
1668
  },
1669
  {
1670
  "cell_type": "code",
1671
- "execution_count": 72,
1672
  "metadata": {},
1673
  "outputs": [
1674
  {
@@ -1681,13 +1390,13 @@
1681
  " # Column Non-Null Count Dtype \n",
1682
  "--- ------ -------------- ----- \n",
1683
  " 0 date 1096 non-null object\n",
1684
- " 1 dayofweek 1096 non-null int32 \n",
1685
- " 2 day 1096 non-null int32 \n",
1686
- " 3 month 1096 non-null int32 \n",
1687
- " 4 year 1096 non-null int32 \n",
1688
- " 5 holiday 1096 non-null int32 \n",
1689
- "dtypes: int32(5), object(1)\n",
1690
- "memory usage: 30.1+ KB\n"
1691
  ]
1692
  }
1693
  ],
@@ -1707,17 +1416,16 @@
1707
  },
1708
  {
1709
  "cell_type": "code",
1710
- "execution_count": 73,
1711
  "metadata": {},
1712
  "outputs": [
1713
  {
1714
  "name": "stdout",
1715
  "output_type": "stream",
1716
  "text": [
1717
- "Connection closed.\n",
1718
  "Connected. Call `.close()` to terminate connection gracefully.\n",
1719
  "\n",
1720
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
1721
  "Connected. Call `.close()` to terminate connection gracefully.\n"
1722
  ]
1723
  }
@@ -1754,7 +1462,7 @@
1754
  },
1755
  {
1756
  "cell_type": "code",
1757
- "execution_count": 74,
1758
  "metadata": {},
1759
  "outputs": [],
1760
  "source": [
@@ -1778,7 +1486,7 @@
1778
  },
1779
  {
1780
  "cell_type": "code",
1781
- "execution_count": 75,
1782
  "metadata": {},
1783
  "outputs": [
1784
  {
@@ -1786,15 +1494,22 @@
1786
  "output_type": "stream",
1787
  "text": [
1788
  "Feature Group created successfully, explore it at \n",
1789
- "https://c.app.hopsworks.ai:443/p/556180/fs/552003/fg/775531\n"
1790
  ]
1791
  },
1792
  {
1793
- "name": "stderr",
1794
- "output_type": "stream",
1795
- "text": [
1796
- "Uploading Dataframe: 100.00% |██████████| Rows 20445/20445 | Elapsed Time: 00:06 | Remaining Time: 00:00\n"
1797
- ]
 
 
 
 
 
 
 
1798
  },
1799
  {
1800
  "name": "stdout",
@@ -1802,16 +1517,16 @@
1802
  "text": [
1803
  "Launching job: electricity_prices_1_offline_fg_materialization\n",
1804
  "Job started successfully, you can follow the progress at \n",
1805
- "https://c.app.hopsworks.ai/p/556180/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
1806
  ]
1807
  },
1808
  {
1809
  "data": {
1810
  "text/plain": [
1811
- "(<hsfs.core.job.Job at 0x1ce32519310>, None)"
1812
  ]
1813
  },
1814
- "execution_count": 75,
1815
  "metadata": {},
1816
  "output_type": "execute_result"
1817
  }
@@ -1830,7 +1545,7 @@
1830
  },
1831
  {
1832
  "cell_type": "code",
1833
- "execution_count": 76,
1834
  "metadata": {},
1835
  "outputs": [],
1836
  "source": [
@@ -1852,59 +1567,12 @@
1852
  "cell_type": "markdown",
1853
  "metadata": {},
1854
  "source": [
1855
- "We replicate the process for both the `forecast_renewable_energy_fg`, `weather_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
1856
  ]
1857
  },
1858
  {
1859
  "cell_type": "code",
1860
- "execution_count": null,
1861
- "metadata": {},
1862
- "outputs": [],
1863
- "source": [
1864
- "# # Creating the feature group for the electricity prices\n",
1865
- "# forecast_renewable_energy_fg = fs.get_or_create_feature_group(\n",
1866
- "# name=\"forecast_renewable_energy\",\n",
1867
- "# version=1,\n",
1868
- "# description=\"Forecast on Renewable Energy on ForecastType from Energidata API\",\n",
1869
- "# primary_key=[\"date\",\"timestamp\"], \n",
1870
- "# online_enabled=True,\n",
1871
- "# event_time=\"timestamp\",\n",
1872
- "# )"
1873
- ]
1874
- },
1875
- {
1876
- "cell_type": "code",
1877
- "execution_count": null,
1878
- "metadata": {},
1879
- "outputs": [],
1880
- "source": [
1881
- "# # Inserting the electricity_df into the feature group named electricity_fg\n",
1882
- "# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df)"
1883
- ]
1884
- },
1885
- {
1886
- "cell_type": "code",
1887
- "execution_count": null,
1888
- "metadata": {},
1889
- "outputs": [],
1890
- "source": [
1891
- "# # List of descriptions for forecast_renewable_energy features\n",
1892
- "# forecast_renewable_energy_feature_descriptions = [\n",
1893
- "# {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1894
- "# {\"name\": \"date\", \"description\": \"Date of the forecast\"},\n",
1895
- "# {\"name\": \"datetime\", \"description\": \"Date and time for the forecast\"},\n",
1896
- "# {\"name\": \"hour\", \"description\": \"Hour of day\"},\n",
1897
- "# {\"name\": \"dk1_offshore_wind_forecastintraday_kwh\", \"description\": \"The forecast for the coming day at 6am Danish time zone\"},\n",
1898
- "# ]\n",
1899
- "\n",
1900
- "# # Updating feature descriptions\n",
1901
- "# for desc in forecast_renewable_energy_feature_descriptions: \n",
1902
- "# forecast_renewable_energy_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1903
- ]
1904
- },
1905
- {
1906
- "cell_type": "code",
1907
- "execution_count": 77,
1908
  "metadata": {},
1909
  "outputs": [],
1910
  "source": [
@@ -1921,7 +1589,7 @@
1921
  },
1922
  {
1923
  "cell_type": "code",
1924
- "execution_count": 78,
1925
  "metadata": {},
1926
  "outputs": [
1927
  {
@@ -1929,15 +1597,22 @@
1929
  "output_type": "stream",
1930
  "text": [
1931
  "Feature Group created successfully, explore it at \n",
1932
- "https://c.app.hopsworks.ai:443/p/556180/fs/552003/fg/774518\n"
1933
  ]
1934
  },
1935
  {
1936
- "name": "stderr",
1937
- "output_type": "stream",
1938
- "text": [
1939
- "Uploading Dataframe: 100.00% |██████████| Rows 20424/20424 | Elapsed Time: 00:08 | Remaining Time: 00:00\n"
1940
- ]
 
 
 
 
 
 
 
1941
  },
1942
  {
1943
  "name": "stdout",
@@ -1945,16 +1620,16 @@
1945
  "text": [
1946
  "Launching job: weather_measurements_1_offline_fg_materialization\n",
1947
  "Job started successfully, you can follow the progress at \n",
1948
- "https://c.app.hopsworks.ai/p/556180/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
1949
  ]
1950
  },
1951
  {
1952
  "data": {
1953
  "text/plain": [
1954
- "(<hsfs.core.job.Job at 0x1ce326d4a50>, None)"
1955
  ]
1956
  },
1957
- "execution_count": 78,
1958
  "metadata": {},
1959
  "output_type": "execute_result"
1960
  }
@@ -1966,7 +1641,7 @@
1966
  },
1967
  {
1968
  "cell_type": "code",
1969
- "execution_count": 79,
1970
  "metadata": {},
1971
  "outputs": [],
1972
  "source": [
@@ -1994,7 +1669,7 @@
1994
  },
1995
  {
1996
  "cell_type": "code",
1997
- "execution_count": 80,
1998
  "metadata": {},
1999
  "outputs": [],
2000
  "source": [
@@ -2010,7 +1685,7 @@
2010
  },
2011
  {
2012
  "cell_type": "code",
2013
- "execution_count": 81,
2014
  "metadata": {},
2015
  "outputs": [
2016
  {
@@ -2018,15 +1693,22 @@
2018
  "output_type": "stream",
2019
  "text": [
2020
  "Feature Group created successfully, explore it at \n",
2021
- "https://c.app.hopsworks.ai:443/p/556180/fs/552003/fg/776524\n"
2022
  ]
2023
  },
2024
  {
2025
- "name": "stderr",
2026
- "output_type": "stream",
2027
- "text": [
2028
- "Uploading Dataframe: 100.00% |██████████| Rows 1096/1096 | Elapsed Time: 00:06 | Remaining Time: 00:00\n"
2029
- ]
 
 
 
 
 
 
 
2030
  },
2031
  {
2032
  "name": "stdout",
@@ -2034,16 +1716,16 @@
2034
  "text": [
2035
  "Launching job: dk_calendar_1_offline_fg_materialization\n",
2036
  "Job started successfully, you can follow the progress at \n",
2037
- "https://c.app.hopsworks.ai/p/556180/jobs/named/dk_calendar_1_offline_fg_materialization/executions\n"
2038
  ]
2039
  },
2040
  {
2041
  "data": {
2042
  "text/plain": [
2043
- "(<hsfs.core.job.Job at 0x1ce326c2010>, None)"
2044
  ]
2045
  },
2046
- "execution_count": 81,
2047
  "metadata": {},
2048
  "output_type": "execute_result"
2049
  }
@@ -2055,7 +1737,7 @@
2055
  },
2056
  {
2057
  "cell_type": "code",
2058
- "execution_count": 82,
2059
  "metadata": {},
2060
  "outputs": [],
2061
  "source": [
@@ -2064,7 +1746,7 @@
2064
  " {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
2065
  " {\"name\": \"day\", \"description\": \"Day number of the week. Monday is 0 and Sunday is 6\"},\n",
2066
  " {\"name\": \"month\", \"description\": \"Month number of the year\"},\n",
2067
- " {\"name\": \"holiday\", \"description\": \"Holiday or not holiday\"},\n",
2068
  "]\n",
2069
  "\n",
2070
  "# Updating feature descriptions\n",
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": 1,
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
 
38
  },
39
  {
40
  "cell_type": "code",
41
+ "execution_count": 2,
42
  "metadata": {},
43
  "outputs": [
44
  {
45
  "name": "stdout",
46
  "output_type": "stream",
47
  "text": [
48
+ "/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-\n",
49
+ "/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-/notebooks\n"
50
  ]
51
  }
52
  ],
 
64
  },
65
  {
66
  "cell_type": "code",
67
+ "execution_count": 3,
68
  "metadata": {},
69
  "outputs": [],
70
  "source": [
 
80
  "warnings.filterwarnings('ignore', category=DeprecationWarning)"
81
  ]
82
  },
83
+ {
84
+ "cell_type": "markdown",
85
+ "metadata": {},
86
+ "source": [
87
+ "# <span style=\"color:#2656a3;\"> 🤖 Transformation Functions</span>\n",
88
+ "\n",
89
+ "We preprocess our data using *min-max scaling* on the numerical features and *label encoding* on the one categorical feature we have.\n",
90
+ "To achieve this, we create a mapping between our features and transformation functions. This ensures that transformation functions like min-max scaling are applied exclusively on the training data, preventing any data leakage into the validation or test sets.\n",
91
+ "\n",
92
+ "To achieve this, we create a mapping between our features and transformation functions - ved ikke om man kan sige det her?"
93
+ ]
94
+ },
95
  {
96
  "cell_type": "markdown",
97
  "metadata": {},
 
103
  "- Electricity prices in Denmark on hourly basis per day from [Energinet](https://www.energidataservice.dk). Loacated in the *featuresfolder* under electricity_prices.\n",
104
  "- Different meteorological observations based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures.\n",
105
  "- Danish calendar that categorizes dates into types based on whether it is a weekday or not. This files is made manually by the group and is located in the *datafolder* inside this repository.\n",
 
106
  "- Weather Forecast based on Aalborg Denmark from [Open Meteo](https://www.open-meteo.com). Loacated in the *featuresfolder* under weather_measures. (This data is used later to parse in new real-time weather data)\n"
107
  ]
108
  },
 
116
  },
117
  {
118
  "cell_type": "code",
119
+ "execution_count": 4,
120
  "metadata": {},
121
  "outputs": [],
122
  "source": [
 
133
  },
134
  {
135
  "cell_type": "code",
136
+ "execution_count": 5,
137
  "metadata": {},
138
  "outputs": [
139
  {
 
218
  "4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 0.28013"
219
  ]
220
  },
221
+ "execution_count": 5,
222
  "metadata": {},
223
  "output_type": "execute_result"
224
  }
 
230
  },
231
  {
232
  "cell_type": "code",
233
+ "execution_count": 6,
234
  "metadata": {},
235
  "outputs": [
236
  {
 
263
  " </thead>\n",
264
  " <tbody>\n",
265
  " <tr>\n",
266
+ " <th>20464</th>\n",
267
+ " <td>1714676400000</td>\n",
268
+ " <td>2024-05-02 19:00:00</td>\n",
269
+ " <td>2024-05-02</td>\n",
270
  " <td>19</td>\n",
271
+ " <td>0.31266</td>\n",
272
  " </tr>\n",
273
  " <tr>\n",
274
+ " <th>20465</th>\n",
275
+ " <td>1714680000000</td>\n",
276
+ " <td>2024-05-02 20:00:00</td>\n",
277
+ " <td>2024-05-02</td>\n",
278
  " <td>20</td>\n",
279
+ " <td>0.31318</td>\n",
280
  " </tr>\n",
281
  " <tr>\n",
282
+ " <th>20466</th>\n",
283
+ " <td>1714683600000</td>\n",
284
+ " <td>2024-05-02 21:00:00</td>\n",
285
+ " <td>2024-05-02</td>\n",
286
  " <td>21</td>\n",
287
+ " <td>0.31266</td>\n",
288
  " </tr>\n",
289
  " <tr>\n",
290
+ " <th>20467</th>\n",
291
+ " <td>1714687200000</td>\n",
292
+ " <td>2024-05-02 22:00:00</td>\n",
293
+ " <td>2024-05-02</td>\n",
294
  " <td>22</td>\n",
295
+ " <td>0.28245</td>\n",
296
  " </tr>\n",
297
  " <tr>\n",
298
+ " <th>20468</th>\n",
299
+ " <td>1714690800000</td>\n",
300
+ " <td>2024-05-02 23:00:00</td>\n",
301
+ " <td>2024-05-02</td>\n",
302
  " <td>23</td>\n",
303
+ " <td>0.25306</td>\n",
304
  " </tr>\n",
305
  " </tbody>\n",
306
  "</table>\n",
 
308
  ],
309
  "text/plain": [
310
  " timestamp datetime date hour \\\n",
311
+ "20464 1714676400000 2024-05-02 19:00:00 2024-05-02 19 \n",
312
+ "20465 1714680000000 2024-05-02 20:00:00 2024-05-02 20 \n",
313
+ "20466 1714683600000 2024-05-02 21:00:00 2024-05-02 21 \n",
314
+ "20467 1714687200000 2024-05-02 22:00:00 2024-05-02 22 \n",
315
+ "20468 1714690800000 2024-05-02 23:00:00 2024-05-02 23 \n",
316
  "\n",
317
  " dk1_spotpricedkk_kwh \n",
318
+ "20464 0.31266 \n",
319
+ "20465 0.31318 \n",
320
+ "20466 0.31266 \n",
321
+ "20467 0.28245 \n",
322
+ "20468 0.25306 "
323
  ]
324
  },
325
+ "execution_count": 6,
326
  "metadata": {},
327
  "output_type": "execute_result"
328
  }
 
334
  },
335
  {
336
  "cell_type": "code",
337
+ "execution_count": 7,
338
  "metadata": {},
339
  "outputs": [
340
  {
 
342
  "output_type": "stream",
343
  "text": [
344
  "<class 'pandas.core.frame.DataFrame'>\n",
345
+ "RangeIndex: 20469 entries, 0 to 20468\n",
346
  "Data columns (total 5 columns):\n",
347
  " # Column Non-Null Count Dtype \n",
348
  "--- ------ -------------- ----- \n",
349
+ " 0 timestamp 20469 non-null int64 \n",
350
+ " 1 datetime 20469 non-null datetime64[ns]\n",
351
+ " 2 date 20469 non-null object \n",
352
+ " 3 hour 20469 non-null int64 \n",
353
+ " 4 dk1_spotpricedkk_kwh 20469 non-null float64 \n",
354
+ "dtypes: datetime64[ns](1), float64(1), int64(2), object(1)\n",
355
+ "memory usage: 799.7+ KB\n"
356
  ]
357
  }
358
  ],
 
361
  "electricity_df.info()"
362
  ]
363
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  {
365
  "cell_type": "markdown",
366
  "metadata": {},
 
378
  },
379
  {
380
  "cell_type": "code",
381
+ "execution_count": 8,
382
  "metadata": {},
383
  "outputs": [],
384
  "source": [
 
394
  },
395
  {
396
  "cell_type": "code",
397
+ "execution_count": 9,
398
  "metadata": {},
399
  "outputs": [
400
  {
 
541
  "4 100.0 10.6 23.8 "
542
  ]
543
  },
544
+ "execution_count": 9,
545
  "metadata": {},
546
  "output_type": "execute_result"
547
  }
 
553
  },
554
  {
555
  "cell_type": "code",
556
+ "execution_count": 10,
557
  "metadata": {},
558
  "outputs": [
559
  {
 
594
  " </thead>\n",
595
  " <tbody>\n",
596
  " <tr>\n",
597
+ " <th>20443</th>\n",
598
+ " <td>1714590000000</td>\n",
599
+ " <td>2024-05-01 19:00:00</td>\n",
600
+ " <td>2024-05-01</td>\n",
601
  " <td>19</td>\n",
602
+ " <td>15.4</td>\n",
603
+ " <td>71.0</td>\n",
604
  " <td>0.0</td>\n",
605
  " <td>0.0</td>\n",
606
  " <td>0.0</td>\n",
607
  " <td>0.0</td>\n",
608
+ " <td>13.0</td>\n",
609
+ " <td>14.8</td>\n",
610
+ " <td>25.9</td>\n",
611
  " </tr>\n",
612
  " <tr>\n",
613
+ " <th>20444</th>\n",
614
+ " <td>1714593600000</td>\n",
615
+ " <td>2024-05-01 20:00:00</td>\n",
616
+ " <td>2024-05-01</td>\n",
617
  " <td>20</td>\n",
618
+ " <td>14.7</td>\n",
619
+ " <td>69.0</td>\n",
620
  " <td>0.0</td>\n",
621
  " <td>0.0</td>\n",
622
  " <td>0.0</td>\n",
623
  " <td>0.0</td>\n",
624
+ " <td>7.0</td>\n",
625
+ " <td>19.2</td>\n",
626
+ " <td>33.8</td>\n",
627
  " </tr>\n",
628
  " <tr>\n",
629
+ " <th>20445</th>\n",
630
+ " <td>1714597200000</td>\n",
631
+ " <td>2024-05-01 21:00:00</td>\n",
632
+ " <td>2024-05-01</td>\n",
633
  " <td>21</td>\n",
634
+ " <td>14.3</td>\n",
635
  " <td>67.0</td>\n",
636
  " <td>0.0</td>\n",
637
  " <td>0.0</td>\n",
638
  " <td>0.0</td>\n",
639
  " <td>0.0</td>\n",
640
+ " <td>2.0</td>\n",
641
+ " <td>20.5</td>\n",
642
+ " <td>37.4</td>\n",
643
  " </tr>\n",
644
  " <tr>\n",
645
+ " <th>20446</th>\n",
646
+ " <td>1714600800000</td>\n",
647
+ " <td>2024-05-01 22:00:00</td>\n",
648
+ " <td>2024-05-01</td>\n",
649
  " <td>22</td>\n",
650
+ " <td>13.4</td>\n",
651
+ " <td>68.0</td>\n",
652
  " <td>0.0</td>\n",
653
  " <td>0.0</td>\n",
654
  " <td>0.0</td>\n",
655
  " <td>0.0</td>\n",
656
+ " <td>7.0</td>\n",
657
+ " <td>20.2</td>\n",
658
+ " <td>37.4</td>\n",
659
  " </tr>\n",
660
  " <tr>\n",
661
+ " <th>20447</th>\n",
662
+ " <td>1714604400000</td>\n",
663
+ " <td>2024-05-01 23:00:00</td>\n",
664
+ " <td>2024-05-01</td>\n",
665
  " <td>23</td>\n",
666
+ " <td>12.4</td>\n",
667
  " <td>70.0</td>\n",
668
  " <td>0.0</td>\n",
669
  " <td>0.0</td>\n",
670
  " <td>0.0</td>\n",
671
  " <td>0.0</td>\n",
672
+ " <td>17.0</td>\n",
673
+ " <td>18.8</td>\n",
674
+ " <td>36.4</td>\n",
675
  " </tr>\n",
676
  " </tbody>\n",
677
  "</table>\n",
 
679
  ],
680
  "text/plain": [
681
  " timestamp datetime date hour temperature_2m \\\n",
682
+ "20443 1714590000000 2024-05-01 19:00:00 2024-05-01 19 15.4 \n",
683
+ "20444 1714593600000 2024-05-01 20:00:00 2024-05-01 20 14.7 \n",
684
+ "20445 1714597200000 2024-05-01 21:00:00 2024-05-01 21 14.3 \n",
685
+ "20446 1714600800000 2024-05-01 22:00:00 2024-05-01 22 13.4 \n",
686
+ "20447 1714604400000 2024-05-01 23:00:00 2024-05-01 23 12.4 \n",
687
  "\n",
688
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
689
+ "20443 71.0 0.0 0.0 0.0 0.0 \n",
690
+ "20444 69.0 0.0 0.0 0.0 0.0 \n",
691
+ "20445 67.0 0.0 0.0 0.0 0.0 \n",
692
+ "20446 68.0 0.0 0.0 0.0 0.0 \n",
693
+ "20447 70.0 0.0 0.0 0.0 0.0 \n",
694
  "\n",
695
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
696
+ "20443 13.0 14.8 25.9 \n",
697
+ "20444 7.0 19.2 33.8 \n",
698
+ "20445 2.0 20.5 37.4 \n",
699
+ "20446 7.0 20.2 37.4 \n",
700
+ "20447 17.0 18.8 36.4 "
701
  ]
702
  },
703
+ "execution_count": 10,
704
  "metadata": {},
705
  "output_type": "execute_result"
706
  }
 
712
  },
713
  {
714
  "cell_type": "code",
715
+ "execution_count": 11,
716
  "metadata": {},
717
  "outputs": [
718
  {
 
720
  "output_type": "stream",
721
  "text": [
722
  "<class 'pandas.core.frame.DataFrame'>\n",
723
+ "Int64Index: 20448 entries, 0 to 20447\n",
724
  "Data columns (total 13 columns):\n",
725
  " # Column Non-Null Count Dtype \n",
726
  "--- ------ -------------- ----- \n",
727
+ " 0 timestamp 20448 non-null int64 \n",
728
+ " 1 datetime 20448 non-null datetime64[ns]\n",
729
+ " 2 date 20448 non-null object \n",
730
+ " 3 hour 20448 non-null int64 \n",
731
+ " 4 temperature_2m 20448 non-null float64 \n",
732
+ " 5 relative_humidity_2m 20448 non-null float64 \n",
733
+ " 6 precipitation 20448 non-null float64 \n",
734
+ " 7 rain 20448 non-null float64 \n",
735
+ " 8 snowfall 20448 non-null float64 \n",
736
+ " 9 weather_code 20448 non-null float64 \n",
737
+ " 10 cloud_cover 20448 non-null float64 \n",
738
+ " 11 wind_speed_10m 20448 non-null float64 \n",
739
+ " 12 wind_gusts_10m 20448 non-null float64 \n",
740
+ "dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
741
+ "memory usage: 2.2+ MB\n"
742
  ]
743
  }
744
  ],
 
751
  "cell_type": "markdown",
752
  "metadata": {},
753
  "source": [
754
+ "#### <span style=\"color:#2656a3;\"> 🌈 Forecast Weather Measures\n",
755
  "Weather Forecast from Open Meteo is now being fetched. This data is used in part 02 the feature_pipeline to parse in new real-time weather data."
756
  ]
757
  },
758
  {
759
  "cell_type": "code",
760
+ "execution_count": 12,
761
  "metadata": {},
762
  "outputs": [],
763
  "source": [
 
769
  },
770
  {
771
  "cell_type": "code",
772
+ "execution_count": 13,
773
  "metadata": {},
774
  "outputs": [
775
  {
 
811
  " <tbody>\n",
812
  " <tr>\n",
813
  " <th>0</th>\n",
814
+ " <td>1714694400000</td>\n",
815
+ " <td>2024-05-03 00:00:00</td>\n",
816
+ " <td>2024-05-03</td>\n",
817
  " <td>0</td>\n",
818
+ " <td>14.3</td>\n",
819
+ " <td>65.0</td>\n",
 
820
  " <td>0.0</td>\n",
821
  " <td>0.0</td>\n",
822
  " <td>0.0</td>\n",
823
+ " <td>1.0</td>\n",
824
+ " <td>25.0</td>\n",
825
+ " <td>20.5</td>\n",
826
+ " <td>36.0</td>\n",
827
  " </tr>\n",
828
  " <tr>\n",
829
  " <th>1</th>\n",
830
+ " <td>1714698000000</td>\n",
831
+ " <td>2024-05-03 01:00:00</td>\n",
832
+ " <td>2024-05-03</td>\n",
833
  " <td>1</td>\n",
834
+ " <td>13.6</td>\n",
835
+ " <td>69.0</td>\n",
836
  " <td>0.0</td>\n",
837
  " <td>0.0</td>\n",
838
  " <td>0.0</td>\n",
839
  " <td>0.0</td>\n",
840
+ " <td>12.0</td>\n",
841
+ " <td>21.6</td>\n",
842
+ " <td>37.4</td>\n",
843
  " </tr>\n",
844
  " <tr>\n",
845
  " <th>2</th>\n",
846
+ " <td>1714701600000</td>\n",
847
+ " <td>2024-05-03 02:00:00</td>\n",
848
+ " <td>2024-05-03</td>\n",
849
  " <td>2</td>\n",
850
+ " <td>13.0</td>\n",
851
+ " <td>72.0</td>\n",
852
  " <td>0.0</td>\n",
853
  " <td>0.0</td>\n",
854
  " <td>0.0</td>\n",
855
+ " <td>0.0</td>\n",
856
+ " <td>7.0</td>\n",
857
+ " <td>20.9</td>\n",
858
+ " <td>37.4</td>\n",
859
  " </tr>\n",
860
  " <tr>\n",
861
  " <th>3</th>\n",
862
+ " <td>1714705200000</td>\n",
863
+ " <td>2024-05-03 03:00:00</td>\n",
864
+ " <td>2024-05-03</td>\n",
865
  " <td>3</td>\n",
866
+ " <td>12.7</td>\n",
867
+ " <td>73.0</td>\n",
 
 
868
  " <td>0.0</td>\n",
869
+ " <td>0.0</td>\n",
870
+ " <td>0.0</td>\n",
871
+ " <td>1.0</td>\n",
872
+ " <td>26.0</td>\n",
873
+ " <td>19.8</td>\n",
874
+ " <td>34.6</td>\n",
875
  " </tr>\n",
876
  " <tr>\n",
877
  " <th>4</th>\n",
878
+ " <td>1714708800000</td>\n",
879
+ " <td>2024-05-03 04:00:00</td>\n",
880
+ " <td>2024-05-03</td>\n",
881
  " <td>4</td>\n",
882
+ " <td>12.4</td>\n",
883
  " <td>73.0</td>\n",
884
  " <td>0.0</td>\n",
885
  " <td>0.0</td>\n",
886
  " <td>0.0</td>\n",
887
  " <td>2.0</td>\n",
888
+ " <td>54.0</td>\n",
889
+ " <td>18.7</td>\n",
890
+ " <td>33.8</td>\n",
891
  " </tr>\n",
892
  " </tbody>\n",
893
  "</table>\n",
 
895
  ],
896
  "text/plain": [
897
  " timestamp datetime date hour temperature_2m \\\n",
898
+ "0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 14.3 \n",
899
+ "1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 13.6 \n",
900
+ "2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 13.0 \n",
901
+ "3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 12.7 \n",
902
+ "4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 12.4 \n",
903
  "\n",
904
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
905
+ "0 65.0 0.0 0.0 0.0 1.0 \n",
906
+ "1 69.0 0.0 0.0 0.0 0.0 \n",
907
+ "2 72.0 0.0 0.0 0.0 0.0 \n",
908
+ "3 73.0 0.0 0.0 0.0 1.0 \n",
909
  "4 73.0 0.0 0.0 0.0 2.0 \n",
910
  "\n",
911
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
912
+ "0 25.0 20.5 36.0 \n",
913
+ "1 12.0 21.6 37.4 \n",
914
+ "2 7.0 20.9 37.4 \n",
915
+ "3 26.0 19.8 34.6 \n",
916
+ "4 54.0 18.7 33.8 "
917
  ]
918
  },
919
+ "execution_count": 13,
920
  "metadata": {},
921
  "output_type": "execute_result"
922
  }
 
928
  },
929
  {
930
  "cell_type": "code",
931
+ "execution_count": 14,
932
  "metadata": {},
933
  "outputs": [
934
  {
 
970
  " <tbody>\n",
971
  " <tr>\n",
972
  " <th>115</th>\n",
973
+ " <td>1715108400000</td>\n",
974
+ " <td>2024-05-07 19:00:00</td>\n",
975
+ " <td>2024-05-07</td>\n",
976
  " <td>19</td>\n",
977
+ " <td>12.0</td>\n",
978
+ " <td>41.0</td>\n",
 
 
979
  " <td>0.0</td>\n",
980
+ " <td>0.0</td>\n",
981
+ " <td>0.0</td>\n",
982
+ " <td>0.0</td>\n",
983
+ " <td>0.0</td>\n",
984
+ " <td>4.2</td>\n",
985
+ " <td>10.8</td>\n",
986
  " </tr>\n",
987
  " <tr>\n",
988
  " <th>116</th>\n",
989
+ " <td>1715112000000</td>\n",
990
+ " <td>2024-05-07 20:00:00</td>\n",
991
+ " <td>2024-05-07</td>\n",
992
  " <td>20</td>\n",
993
+ " <td>10.7</td>\n",
994
+ " <td>49.0</td>\n",
 
 
995
  " <td>0.0</td>\n",
996
+ " <td>0.0</td>\n",
997
+ " <td>0.0</td>\n",
998
+ " <td>0.0</td>\n",
999
+ " <td>0.0</td>\n",
1000
+ " <td>3.6</td>\n",
1001
+ " <td>8.3</td>\n",
1002
  " </tr>\n",
1003
  " <tr>\n",
1004
  " <th>117</th>\n",
1005
+ " <td>1715115600000</td>\n",
1006
+ " <td>2024-05-07 21:00:00</td>\n",
1007
+ " <td>2024-05-07</td>\n",
1008
  " <td>21</td>\n",
1009
+ " <td>9.6</td>\n",
1010
+ " <td>56.0</td>\n",
 
 
1011
  " <td>0.0</td>\n",
1012
+ " <td>0.0</td>\n",
1013
+ " <td>0.0</td>\n",
1014
+ " <td>0.0</td>\n",
1015
+ " <td>0.0</td>\n",
1016
+ " <td>3.2</td>\n",
1017
+ " <td>5.4</td>\n",
1018
  " </tr>\n",
1019
  " <tr>\n",
1020
  " <th>118</th>\n",
1021
+ " <td>1715119200000</td>\n",
1022
+ " <td>2024-05-07 22:00:00</td>\n",
1023
+ " <td>2024-05-07</td>\n",
1024
  " <td>22</td>\n",
1025
+ " <td>8.7</td>\n",
1026
+ " <td>58.0</td>\n",
 
 
1027
  " <td>0.0</td>\n",
1028
+ " <td>0.0</td>\n",
1029
+ " <td>0.0</td>\n",
1030
+ " <td>0.0</td>\n",
1031
+ " <td>0.0</td>\n",
1032
+ " <td>3.3</td>\n",
1033
+ " <td>5.8</td>\n",
1034
  " </tr>\n",
1035
  " <tr>\n",
1036
  " <th>119</th>\n",
1037
+ " <td>1715122800000</td>\n",
1038
+ " <td>2024-05-07 23:00:00</td>\n",
1039
+ " <td>2024-05-07</td>\n",
1040
  " <td>23</td>\n",
1041
+ " <td>7.9</td>\n",
1042
+ " <td>57.0</td>\n",
 
 
1043
  " <td>0.0</td>\n",
1044
+ " <td>0.0</td>\n",
1045
+ " <td>0.0</td>\n",
1046
+ " <td>0.0</td>\n",
1047
+ " <td>0.0</td>\n",
1048
+ " <td>3.8</td>\n",
1049
+ " <td>6.5</td>\n",
1050
  " </tr>\n",
1051
  " </tbody>\n",
1052
  "</table>\n",
 
1054
  ],
1055
  "text/plain": [
1056
  " timestamp datetime date hour temperature_2m \\\n",
1057
+ "115 1715108400000 2024-05-07 19:00:00 2024-05-07 19 12.0 \n",
1058
+ "116 1715112000000 2024-05-07 20:00:00 2024-05-07 20 10.7 \n",
1059
+ "117 1715115600000 2024-05-07 21:00:00 2024-05-07 21 9.6 \n",
1060
+ "118 1715119200000 2024-05-07 22:00:00 2024-05-07 22 8.7 \n",
1061
+ "119 1715122800000 2024-05-07 23:00:00 2024-05-07 23 7.9 \n",
1062
  "\n",
1063
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1064
+ "115 41.0 0.0 0.0 0.0 0.0 \n",
1065
+ "116 49.0 0.0 0.0 0.0 0.0 \n",
1066
+ "117 56.0 0.0 0.0 0.0 0.0 \n",
1067
+ "118 58.0 0.0 0.0 0.0 0.0 \n",
1068
+ "119 57.0 0.0 0.0 0.0 0.0 \n",
1069
  "\n",
1070
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1071
+ "115 0.0 4.2 10.8 \n",
1072
+ "116 0.0 3.6 8.3 \n",
1073
+ "117 0.0 3.2 5.4 \n",
1074
+ "118 0.0 3.3 5.8 \n",
1075
+ "119 0.0 3.8 6.5 "
1076
  ]
1077
  },
1078
+ "execution_count": 14,
1079
  "metadata": {},
1080
  "output_type": "execute_result"
1081
  }
 
1087
  },
1088
  {
1089
  "cell_type": "code",
1090
+ "execution_count": 15,
1091
  "metadata": {},
1092
  "outputs": [
1093
  {
 
1102
  " 0 timestamp 120 non-null int64 \n",
1103
  " 1 datetime 120 non-null datetime64[ns]\n",
1104
  " 2 date 120 non-null object \n",
1105
+ " 3 hour 120 non-null int64 \n",
1106
  " 4 temperature_2m 120 non-null float64 \n",
1107
  " 5 relative_humidity_2m 120 non-null float64 \n",
1108
  " 6 precipitation 120 non-null float64 \n",
 
1112
  " 10 cloud_cover 120 non-null float64 \n",
1113
  " 11 wind_speed_10m 120 non-null float64 \n",
1114
  " 12 wind_gusts_10m 120 non-null float64 \n",
1115
+ "dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
1116
+ "memory usage: 12.3+ KB\n"
1117
  ]
1118
  }
1119
  ],
 
1132
  },
1133
  {
1134
  "cell_type": "code",
1135
+ "execution_count": 16,
1136
  "metadata": {},
1137
  "outputs": [],
1138
  "source": [
1139
+ "calender_df = calendar.dk_calendar()"
1140
  ]
1141
  },
1142
  {
1143
  "cell_type": "code",
1144
+ "execution_count": 17,
1145
  "metadata": {},
1146
  "outputs": [
1147
  {
 
1170
  " <th>day</th>\n",
1171
  " <th>month</th>\n",
1172
  " <th>year</th>\n",
1173
+ " <th>workday</th>\n",
1174
  " </tr>\n",
1175
  " </thead>\n",
1176
  " <tbody>\n",
 
1181
  " <td>1</td>\n",
1182
  " <td>1</td>\n",
1183
  " <td>2022</td>\n",
1184
+ " <td>0</td>\n",
1185
  " </tr>\n",
1186
  " <tr>\n",
1187
  " <th>1</th>\n",
 
1190
  " <td>2</td>\n",
1191
  " <td>1</td>\n",
1192
  " <td>2022</td>\n",
1193
+ " <td>0</td>\n",
1194
  " </tr>\n",
1195
  " <tr>\n",
1196
  " <th>2</th>\n",
 
1199
  " <td>3</td>\n",
1200
  " <td>1</td>\n",
1201
  " <td>2022</td>\n",
1202
+ " <td>1</td>\n",
1203
  " </tr>\n",
1204
  " <tr>\n",
1205
  " <th>3</th>\n",
 
1208
  " <td>4</td>\n",
1209
  " <td>1</td>\n",
1210
  " <td>2022</td>\n",
1211
+ " <td>1</td>\n",
1212
  " </tr>\n",
1213
  " <tr>\n",
1214
  " <th>4</th>\n",
 
1217
  " <td>5</td>\n",
1218
  " <td>1</td>\n",
1219
  " <td>2022</td>\n",
1220
+ " <td>1</td>\n",
1221
  " </tr>\n",
1222
  " </tbody>\n",
1223
  "</table>\n",
1224
  "</div>"
1225
  ],
1226
  "text/plain": [
1227
+ " date dayofweek day month year workday\n",
1228
+ "0 2022-01-01 5 1 1 2022 0\n",
1229
+ "1 2022-01-02 6 2 1 2022 0\n",
1230
+ "2 2022-01-03 0 3 1 2022 1\n",
1231
+ "3 2022-01-04 1 4 1 2022 1\n",
1232
+ "4 2022-01-05 2 5 1 2022 1"
1233
  ]
1234
  },
1235
+ "execution_count": 17,
1236
  "metadata": {},
1237
  "output_type": "execute_result"
1238
  }
 
1244
  },
1245
  {
1246
  "cell_type": "code",
1247
+ "execution_count": 18,
1248
  "metadata": {},
1249
  "outputs": [
1250
  {
 
1273
  " <th>day</th>\n",
1274
  " <th>month</th>\n",
1275
  " <th>year</th>\n",
1276
+ " <th>workday</th>\n",
1277
  " </tr>\n",
1278
  " </thead>\n",
1279
  " <tbody>\n",
 
1284
  " <td>27</td>\n",
1285
  " <td>12</td>\n",
1286
  " <td>2024</td>\n",
1287
+ " <td>1</td>\n",
1288
  " </tr>\n",
1289
  " <tr>\n",
1290
  " <th>1092</th>\n",
 
1293
  " <td>28</td>\n",
1294
  " <td>12</td>\n",
1295
  " <td>2024</td>\n",
1296
+ " <td>0</td>\n",
1297
  " </tr>\n",
1298
  " <tr>\n",
1299
  " <th>1093</th>\n",
 
1302
  " <td>29</td>\n",
1303
  " <td>12</td>\n",
1304
  " <td>2024</td>\n",
1305
+ " <td>0</td>\n",
1306
  " </tr>\n",
1307
  " <tr>\n",
1308
  " <th>1094</th>\n",
 
1311
  " <td>30</td>\n",
1312
  " <td>12</td>\n",
1313
  " <td>2024</td>\n",
1314
+ " <td>1</td>\n",
1315
  " </tr>\n",
1316
  " <tr>\n",
1317
  " <th>1095</th>\n",
 
1320
  " <td>31</td>\n",
1321
  " <td>12</td>\n",
1322
  " <td>2024</td>\n",
1323
+ " <td>1</td>\n",
1324
  " </tr>\n",
1325
  " </tbody>\n",
1326
  "</table>\n",
1327
  "</div>"
1328
  ],
1329
  "text/plain": [
1330
+ " date dayofweek day month year workday\n",
1331
+ "1091 2024-12-27 4 27 12 2024 1\n",
1332
+ "1092 2024-12-28 5 28 12 2024 0\n",
1333
+ "1093 2024-12-29 6 29 12 2024 0\n",
1334
+ "1094 2024-12-30 0 30 12 2024 1\n",
1335
+ "1095 2024-12-31 1 31 12 2024 1"
1336
  ]
1337
  },
1338
+ "execution_count": 18,
1339
  "metadata": {},
1340
  "output_type": "execute_result"
1341
  }
 
1347
  },
1348
  {
1349
  "cell_type": "code",
1350
+ "execution_count": 19,
1351
  "metadata": {},
1352
  "outputs": [
1353
  {
 
1360
  " # Column Non-Null Count Dtype \n",
1361
  "--- ------ -------------- ----- \n",
1362
  " 0 date 1096 non-null object\n",
1363
+ " 1 dayofweek 1096 non-null int64 \n",
1364
+ " 2 day 1096 non-null int64 \n",
1365
+ " 3 month 1096 non-null int64 \n",
1366
+ " 4 year 1096 non-null int64 \n",
1367
+ " 5 workday 1096 non-null int64 \n",
1368
+ "dtypes: int64(5), object(1)\n",
1369
+ "memory usage: 51.5+ KB\n"
1370
  ]
1371
  }
1372
  ],
 
1377
  },
1378
  {
1379
  "cell_type": "code",
1380
+ "execution_count": 20,
1381
  "metadata": {},
1382
  "outputs": [
1383
  {
 
1390
  " # Column Non-Null Count Dtype \n",
1391
  "--- ------ -------------- ----- \n",
1392
  " 0 date 1096 non-null object\n",
1393
+ " 1 dayofweek 1096 non-null int64 \n",
1394
+ " 2 day 1096 non-null int64 \n",
1395
+ " 3 month 1096 non-null int64 \n",
1396
+ " 4 year 1096 non-null int64 \n",
1397
+ " 5 workday 1096 non-null int64 \n",
1398
+ "dtypes: int64(5), object(1)\n",
1399
+ "memory usage: 51.5+ KB\n"
1400
  ]
1401
  }
1402
  ],
 
1416
  },
1417
  {
1418
  "cell_type": "code",
1419
+ "execution_count": 21,
1420
  "metadata": {},
1421
  "outputs": [
1422
  {
1423
  "name": "stdout",
1424
  "output_type": "stream",
1425
  "text": [
 
1426
  "Connected. Call `.close()` to terminate connection gracefully.\n",
1427
  "\n",
1428
+ "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/554133\n",
1429
  "Connected. Call `.close()` to terminate connection gracefully.\n"
1430
  ]
1431
  }
 
1462
  },
1463
  {
1464
  "cell_type": "code",
1465
+ "execution_count": 22,
1466
  "metadata": {},
1467
  "outputs": [],
1468
  "source": [
 
1486
  },
1487
  {
1488
  "cell_type": "code",
1489
+ "execution_count": 23,
1490
  "metadata": {},
1491
  "outputs": [
1492
  {
 
1494
  "output_type": "stream",
1495
  "text": [
1496
  "Feature Group created successfully, explore it at \n",
1497
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/778586\n"
1498
  ]
1499
  },
1500
  {
1501
+ "data": {
1502
+ "application/vnd.jupyter.widget-view+json": {
1503
+ "model_id": "8d08f0f4717746c1b7c3b16c7490cf51",
1504
+ "version_major": 2,
1505
+ "version_minor": 0
1506
+ },
1507
+ "text/plain": [
1508
+ "Uploading Dataframe: 0.00% | | Rows 0/20469 | Elapsed Time: 00:00 | Remaining Time: ?"
1509
+ ]
1510
+ },
1511
+ "metadata": {},
1512
+ "output_type": "display_data"
1513
  },
1514
  {
1515
  "name": "stdout",
 
1517
  "text": [
1518
  "Launching job: electricity_prices_1_offline_fg_materialization\n",
1519
  "Job started successfully, you can follow the progress at \n",
1520
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
1521
  ]
1522
  },
1523
  {
1524
  "data": {
1525
  "text/plain": [
1526
+ "(<hsfs.core.job.Job at 0x17f2dc950>, None)"
1527
  ]
1528
  },
1529
+ "execution_count": 23,
1530
  "metadata": {},
1531
  "output_type": "execute_result"
1532
  }
 
1545
  },
1546
  {
1547
  "cell_type": "code",
1548
+ "execution_count": 24,
1549
  "metadata": {},
1550
  "outputs": [],
1551
  "source": [
 
1567
  "cell_type": "markdown",
1568
  "metadata": {},
1569
  "source": [
1570
+ "We replicate the process for both the `weather_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
1571
  ]
1572
  },
1573
  {
1574
  "cell_type": "code",
1575
+ "execution_count": 25,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1576
  "metadata": {},
1577
  "outputs": [],
1578
  "source": [
 
1589
  },
1590
  {
1591
  "cell_type": "code",
1592
+ "execution_count": 26,
1593
  "metadata": {},
1594
  "outputs": [
1595
  {
 
1597
  "output_type": "stream",
1598
  "text": [
1599
  "Feature Group created successfully, explore it at \n",
1600
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/777575\n"
1601
  ]
1602
  },
1603
  {
1604
+ "data": {
1605
+ "application/vnd.jupyter.widget-view+json": {
1606
+ "model_id": "5978d081e84c49999233b2b00aa46be7",
1607
+ "version_major": 2,
1608
+ "version_minor": 0
1609
+ },
1610
+ "text/plain": [
1611
+ "Uploading Dataframe: 0.00% | | Rows 0/20448 | Elapsed Time: 00:00 | Remaining Time: ?"
1612
+ ]
1613
+ },
1614
+ "metadata": {},
1615
+ "output_type": "display_data"
1616
  },
1617
  {
1618
  "name": "stdout",
 
1620
  "text": [
1621
  "Launching job: weather_measurements_1_offline_fg_materialization\n",
1622
  "Job started successfully, you can follow the progress at \n",
1623
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
1624
  ]
1625
  },
1626
  {
1627
  "data": {
1628
  "text/plain": [
1629
+ "(<hsfs.core.job.Job at 0x308979690>, None)"
1630
  ]
1631
  },
1632
+ "execution_count": 26,
1633
  "metadata": {},
1634
  "output_type": "execute_result"
1635
  }
 
1641
  },
1642
  {
1643
  "cell_type": "code",
1644
+ "execution_count": 27,
1645
  "metadata": {},
1646
  "outputs": [],
1647
  "source": [
 
1669
  },
1670
  {
1671
  "cell_type": "code",
1672
+ "execution_count": 28,
1673
  "metadata": {},
1674
  "outputs": [],
1675
  "source": [
 
1685
  },
1686
  {
1687
  "cell_type": "code",
1688
+ "execution_count": 29,
1689
  "metadata": {},
1690
  "outputs": [
1691
  {
 
1693
  "output_type": "stream",
1694
  "text": [
1695
  "Feature Group created successfully, explore it at \n",
1696
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/777576\n"
1697
  ]
1698
  },
1699
  {
1700
+ "data": {
1701
+ "application/vnd.jupyter.widget-view+json": {
1702
+ "model_id": "376ce1b46ca6473b830e6ad9af28276a",
1703
+ "version_major": 2,
1704
+ "version_minor": 0
1705
+ },
1706
+ "text/plain": [
1707
+ "Uploading Dataframe: 0.00% | | Rows 0/1096 | Elapsed Time: 00:00 | Remaining Time: ?"
1708
+ ]
1709
+ },
1710
+ "metadata": {},
1711
+ "output_type": "display_data"
1712
  },
1713
  {
1714
  "name": "stdout",
 
1716
  "text": [
1717
  "Launching job: dk_calendar_1_offline_fg_materialization\n",
1718
  "Job started successfully, you can follow the progress at \n",
1719
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/dk_calendar_1_offline_fg_materialization/executions\n"
1720
  ]
1721
  },
1722
  {
1723
  "data": {
1724
  "text/plain": [
1725
+ "(<hsfs.core.job.Job at 0x3088ef590>, None)"
1726
  ]
1727
  },
1728
+ "execution_count": 29,
1729
  "metadata": {},
1730
  "output_type": "execute_result"
1731
  }
 
1737
  },
1738
  {
1739
  "cell_type": "code",
1740
+ "execution_count": 31,
1741
  "metadata": {},
1742
  "outputs": [],
1743
  "source": [
 
1746
  " {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
1747
  " {\"name\": \"day\", \"description\": \"Day number of the week. Monday is 0 and Sunday is 6\"},\n",
1748
  " {\"name\": \"month\", \"description\": \"Month number of the year\"},\n",
1749
+ " {\"name\": \"workday\", \"description\": \"Workday or not a workday\"},\n",
1750
  "]\n",
1751
  "\n",
1752
  "# Updating feature descriptions\n",
notebooks/2_feature_pipeline.ipynb CHANGED
@@ -27,15 +27,15 @@
27
  },
28
  {
29
  "cell_type": "code",
30
- "execution_count": 18,
31
  "metadata": {},
32
  "outputs": [
33
  {
34
  "name": "stdout",
35
  "output_type": "stream",
36
  "text": [
37
- "c:\\Users\\Benj3\\OneDrive\\Dokumenter\\VSCode\\MLOPs-Assignment-\n",
38
- "c:\\Users\\Benj3\\OneDrive\\Dokumenter\\VSCode\\MLOPs-Assignment-\\notebooks\n"
39
  ]
40
  }
41
  ],
@@ -53,7 +53,7 @@
53
  },
54
  {
55
  "cell_type": "code",
56
- "execution_count": 19,
57
  "metadata": {},
58
  "outputs": [],
59
  "source": [
@@ -88,7 +88,7 @@
88
  },
89
  {
90
  "cell_type": "code",
91
- "execution_count": 20,
92
  "metadata": {},
93
  "outputs": [],
94
  "source": [
@@ -101,7 +101,7 @@
101
  },
102
  {
103
  "cell_type": "code",
104
- "execution_count": 21,
105
  "metadata": {},
106
  "outputs": [
107
  {
@@ -135,195 +135,195 @@
135
  " <tbody>\n",
136
  " <tr>\n",
137
  " <th>0</th>\n",
138
- " <td>1714608000000</td>\n",
139
- " <td>2024-05-02 00:00:00</td>\n",
140
- " <td>2024-05-02</td>\n",
141
  " <td>0</td>\n",
142
- " <td>0.10859</td>\n",
143
  " </tr>\n",
144
  " <tr>\n",
145
  " <th>1</th>\n",
146
- " <td>1714611600000</td>\n",
147
- " <td>2024-05-02 01:00:00</td>\n",
148
- " <td>2024-05-02</td>\n",
149
  " <td>1</td>\n",
150
- " <td>0.08160</td>\n",
151
  " </tr>\n",
152
  " <tr>\n",
153
  " <th>2</th>\n",
154
- " <td>1714615200000</td>\n",
155
- " <td>2024-05-02 02:00:00</td>\n",
156
- " <td>2024-05-02</td>\n",
157
  " <td>2</td>\n",
158
- " <td>0.07458</td>\n",
159
  " </tr>\n",
160
  " <tr>\n",
161
  " <th>3</th>\n",
162
- " <td>1714618800000</td>\n",
163
- " <td>2024-05-02 03:00:00</td>\n",
164
- " <td>2024-05-02</td>\n",
165
  " <td>3</td>\n",
166
- " <td>0.05818</td>\n",
167
  " </tr>\n",
168
  " <tr>\n",
169
  " <th>4</th>\n",
170
- " <td>1714622400000</td>\n",
171
- " <td>2024-05-02 04:00:00</td>\n",
172
- " <td>2024-05-02</td>\n",
173
  " <td>4</td>\n",
174
- " <td>0.07928</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
177
  " <th>5</th>\n",
178
- " <td>1714626000000</td>\n",
179
- " <td>2024-05-02 05:00:00</td>\n",
180
- " <td>2024-05-02</td>\n",
181
  " <td>5</td>\n",
182
- " <td>0.22920</td>\n",
183
  " </tr>\n",
184
  " <tr>\n",
185
  " <th>6</th>\n",
186
- " <td>1714629600000</td>\n",
187
- " <td>2024-05-02 06:00:00</td>\n",
188
- " <td>2024-05-02</td>\n",
189
  " <td>6</td>\n",
190
- " <td>0.29699</td>\n",
191
  " </tr>\n",
192
  " <tr>\n",
193
  " <th>7</th>\n",
194
- " <td>1714633200000</td>\n",
195
- " <td>2024-05-02 07:00:00</td>\n",
196
- " <td>2024-05-02</td>\n",
197
  " <td>7</td>\n",
198
- " <td>0.38605</td>\n",
199
  " </tr>\n",
200
  " <tr>\n",
201
  " <th>8</th>\n",
202
- " <td>1714636800000</td>\n",
203
- " <td>2024-05-02 08:00:00</td>\n",
204
- " <td>2024-05-02</td>\n",
205
  " <td>8</td>\n",
206
- " <td>0.43729</td>\n",
207
  " </tr>\n",
208
  " <tr>\n",
209
  " <th>9</th>\n",
210
- " <td>1714640400000</td>\n",
211
- " <td>2024-05-02 09:00:00</td>\n",
212
- " <td>2024-05-02</td>\n",
213
  " <td>9</td>\n",
214
- " <td>0.23457</td>\n",
215
  " </tr>\n",
216
  " <tr>\n",
217
  " <th>10</th>\n",
218
- " <td>1714644000000</td>\n",
219
- " <td>2024-05-02 10:00:00</td>\n",
220
- " <td>2024-05-02</td>\n",
221
  " <td>10</td>\n",
222
- " <td>0.03804</td>\n",
223
  " </tr>\n",
224
  " <tr>\n",
225
  " <th>11</th>\n",
226
- " <td>1714647600000</td>\n",
227
- " <td>2024-05-02 11:00:00</td>\n",
228
- " <td>2024-05-02</td>\n",
229
  " <td>11</td>\n",
230
- " <td>-0.00060</td>\n",
231
  " </tr>\n",
232
  " <tr>\n",
233
  " <th>12</th>\n",
234
- " <td>1714651200000</td>\n",
235
- " <td>2024-05-02 12:00:00</td>\n",
236
- " <td>2024-05-02</td>\n",
237
  " <td>12</td>\n",
238
- " <td>-0.01290</td>\n",
239
  " </tr>\n",
240
  " <tr>\n",
241
  " <th>13</th>\n",
242
- " <td>1714654800000</td>\n",
243
- " <td>2024-05-02 13:00:00</td>\n",
244
- " <td>2024-05-02</td>\n",
245
  " <td>13</td>\n",
246
- " <td>-0.02014</td>\n",
247
  " </tr>\n",
248
  " <tr>\n",
249
  " <th>14</th>\n",
250
- " <td>1714658400000</td>\n",
251
- " <td>2024-05-02 14:00:00</td>\n",
252
- " <td>2024-05-02</td>\n",
253
  " <td>14</td>\n",
254
- " <td>-0.00037</td>\n",
255
  " </tr>\n",
256
  " <tr>\n",
257
  " <th>15</th>\n",
258
- " <td>1714662000000</td>\n",
259
- " <td>2024-05-02 15:00:00</td>\n",
260
- " <td>2024-05-02</td>\n",
261
  " <td>15</td>\n",
262
- " <td>-0.01037</td>\n",
263
  " </tr>\n",
264
  " <tr>\n",
265
  " <th>16</th>\n",
266
- " <td>1714665600000</td>\n",
267
- " <td>2024-05-02 16:00:00</td>\n",
268
- " <td>2024-05-02</td>\n",
269
  " <td>16</td>\n",
270
- " <td>0.03013</td>\n",
271
  " </tr>\n",
272
  " <tr>\n",
273
  " <th>17</th>\n",
274
- " <td>1714669200000</td>\n",
275
- " <td>2024-05-02 17:00:00</td>\n",
276
- " <td>2024-05-02</td>\n",
277
  " <td>17</td>\n",
278
- " <td>0.26045</td>\n",
279
  " </tr>\n",
280
  " <tr>\n",
281
  " <th>18</th>\n",
282
- " <td>1714672800000</td>\n",
283
- " <td>2024-05-02 18:00:00</td>\n",
284
- " <td>2024-05-02</td>\n",
285
  " <td>18</td>\n",
286
- " <td>0.29125</td>\n",
287
  " </tr>\n",
288
  " <tr>\n",
289
  " <th>19</th>\n",
290
- " <td>1714676400000</td>\n",
291
- " <td>2024-05-02 19:00:00</td>\n",
292
- " <td>2024-05-02</td>\n",
293
  " <td>19</td>\n",
294
- " <td>0.31266</td>\n",
295
  " </tr>\n",
296
  " <tr>\n",
297
  " <th>20</th>\n",
298
- " <td>1714680000000</td>\n",
299
- " <td>2024-05-02 20:00:00</td>\n",
300
- " <td>2024-05-02</td>\n",
301
  " <td>20</td>\n",
302
- " <td>0.31318</td>\n",
303
  " </tr>\n",
304
  " <tr>\n",
305
  " <th>21</th>\n",
306
- " <td>1714683600000</td>\n",
307
- " <td>2024-05-02 21:00:00</td>\n",
308
- " <td>2024-05-02</td>\n",
309
  " <td>21</td>\n",
310
- " <td>0.31266</td>\n",
311
  " </tr>\n",
312
  " <tr>\n",
313
  " <th>22</th>\n",
314
- " <td>1714687200000</td>\n",
315
- " <td>2024-05-02 22:00:00</td>\n",
316
- " <td>2024-05-02</td>\n",
317
  " <td>22</td>\n",
318
- " <td>0.28245</td>\n",
319
  " </tr>\n",
320
  " <tr>\n",
321
  " <th>23</th>\n",
322
- " <td>1714690800000</td>\n",
323
- " <td>2024-05-02 23:00:00</td>\n",
324
- " <td>2024-05-02</td>\n",
325
  " <td>23</td>\n",
326
- " <td>0.25306</td>\n",
327
  " </tr>\n",
328
  " </tbody>\n",
329
  "</table>\n",
@@ -331,33 +331,33 @@
331
  ],
332
  "text/plain": [
333
  " timestamp datetime date hour dk1_spotpricedkk_kwh\n",
334
- "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 0.10859\n",
335
- "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 0.08160\n",
336
- "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 0.07458\n",
337
- "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 0.05818\n",
338
- "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 0.07928\n",
339
- "5 1714626000000 2024-05-02 05:00:00 2024-05-02 5 0.22920\n",
340
- "6 1714629600000 2024-05-02 06:00:00 2024-05-02 6 0.29699\n",
341
- "7 1714633200000 2024-05-02 07:00:00 2024-05-02 7 0.38605\n",
342
- "8 1714636800000 2024-05-02 08:00:00 2024-05-02 8 0.43729\n",
343
- "9 1714640400000 2024-05-02 09:00:00 2024-05-02 9 0.23457\n",
344
- "10 1714644000000 2024-05-02 10:00:00 2024-05-02 10 0.03804\n",
345
- "11 1714647600000 2024-05-02 11:00:00 2024-05-02 11 -0.00060\n",
346
- "12 1714651200000 2024-05-02 12:00:00 2024-05-02 12 -0.01290\n",
347
- "13 1714654800000 2024-05-02 13:00:00 2024-05-02 13 -0.02014\n",
348
- "14 1714658400000 2024-05-02 14:00:00 2024-05-02 14 -0.00037\n",
349
- "15 1714662000000 2024-05-02 15:00:00 2024-05-02 15 -0.01037\n",
350
- "16 1714665600000 2024-05-02 16:00:00 2024-05-02 16 0.03013\n",
351
- "17 1714669200000 2024-05-02 17:00:00 2024-05-02 17 0.26045\n",
352
- "18 1714672800000 2024-05-02 18:00:00 2024-05-02 18 0.29125\n",
353
- "19 1714676400000 2024-05-02 19:00:00 2024-05-02 19 0.31266\n",
354
- "20 1714680000000 2024-05-02 20:00:00 2024-05-02 20 0.31318\n",
355
- "21 1714683600000 2024-05-02 21:00:00 2024-05-02 21 0.31266\n",
356
- "22 1714687200000 2024-05-02 22:00:00 2024-05-02 22 0.28245\n",
357
- "23 1714690800000 2024-05-02 23:00:00 2024-05-02 23 0.25306"
358
  ]
359
  },
360
- "execution_count": 21,
361
  "metadata": {},
362
  "output_type": "execute_result"
363
  }
@@ -367,36 +367,6 @@
367
  "electricity_df"
368
  ]
369
  },
370
- {
371
- "cell_type": "markdown",
372
- "metadata": {},
373
- "source": [
374
- "### <span style=\"color:#2656a3;\">☀️💨 Forecast Renewable Energy next day from Energinet"
375
- ]
376
- },
377
- {
378
- "cell_type": "code",
379
- "execution_count": 22,
380
- "metadata": {},
381
- "outputs": [],
382
- "source": [
383
- "# # Fetching non-historical forecast of renewable energy data for area DK1\n",
384
- "# forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
385
- "# historical=False,\n",
386
- "# area=[\"DK1\"]\n",
387
- "# )"
388
- ]
389
- },
390
- {
391
- "cell_type": "code",
392
- "execution_count": 23,
393
- "metadata": {},
394
- "outputs": [],
395
- "source": [
396
- "# # Display the forecast_renewable_energy dataframe\n",
397
- "# forecast_renewable_energy_df"
398
- ]
399
- },
400
  {
401
  "cell_type": "markdown",
402
  "metadata": {},
@@ -408,41 +378,12 @@
408
  "cell_type": "markdown",
409
  "metadata": {},
410
  "source": [
411
- "#### <span style=\"color:#2656a3;\"> 🕰️ Historical Weather Measures"
412
- ]
413
- },
414
- {
415
- "cell_type": "code",
416
- "execution_count": 24,
417
- "metadata": {},
418
- "outputs": [],
419
- "source": [
420
- "# Fetching non-historical weather data for area DK1\n",
421
- "#historical_weather_df = weather_measures.historical_weather_measures(\n",
422
- "# historical=False\n",
423
- "#)"
424
- ]
425
- },
426
- {
427
- "cell_type": "code",
428
- "execution_count": 25,
429
- "metadata": {},
430
- "outputs": [],
431
- "source": [
432
- "# Display the first 5 rows of the dataframe\n",
433
- "#historical_weather_df.head()"
434
- ]
435
- },
436
- {
437
- "cell_type": "markdown",
438
- "metadata": {},
439
- "source": [
440
- "#### <span style=\"color:#2656a3;\"> 🌈 Weather Forecast"
441
  ]
442
  },
443
  {
444
  "cell_type": "code",
445
- "execution_count": 26,
446
  "metadata": {},
447
  "outputs": [],
448
  "source": [
@@ -454,7 +395,7 @@
454
  },
455
  {
456
  "cell_type": "code",
457
- "execution_count": 27,
458
  "metadata": {},
459
  "outputs": [
460
  {
@@ -496,83 +437,83 @@
496
  " <tbody>\n",
497
  " <tr>\n",
498
  " <th>0</th>\n",
499
- " <td>1714608000000</td>\n",
500
- " <td>2024-05-02 00:00:00</td>\n",
501
- " <td>2024-05-02</td>\n",
502
  " <td>0</td>\n",
503
- " <td>14.9</td>\n",
504
- " <td>66.0</td>\n",
505
  " <td>0.0</td>\n",
506
  " <td>0.0</td>\n",
507
  " <td>0.0</td>\n",
508
- " <td>0.0</td>\n",
509
- " <td>13.0</td>\n",
510
- " <td>21.6</td>\n",
511
- " <td>41.4</td>\n",
512
  " </tr>\n",
513
  " <tr>\n",
514
  " <th>1</th>\n",
515
- " <td>1714611600000</td>\n",
516
- " <td>2024-05-02 01:00:00</td>\n",
517
- " <td>2024-05-02</td>\n",
518
  " <td>1</td>\n",
519
- " <td>14.2</td>\n",
520
- " <td>71.0</td>\n",
521
  " <td>0.0</td>\n",
522
  " <td>0.0</td>\n",
523
  " <td>0.0</td>\n",
524
  " <td>0.0</td>\n",
525
- " <td>4.0</td>\n",
526
- " <td>20.5</td>\n",
527
- " <td>37.1</td>\n",
528
  " </tr>\n",
529
  " <tr>\n",
530
  " <th>2</th>\n",
531
- " <td>1714615200000</td>\n",
532
- " <td>2024-05-02 02:00:00</td>\n",
533
- " <td>2024-05-02</td>\n",
534
  " <td>2</td>\n",
535
- " <td>13.4</td>\n",
536
- " <td>73.0</td>\n",
537
  " <td>0.0</td>\n",
538
  " <td>0.0</td>\n",
539
  " <td>0.0</td>\n",
540
- " <td>2.0</td>\n",
541
- " <td>70.0</td>\n",
542
- " <td>21.2</td>\n",
543
- " <td>36.7</td>\n",
544
  " </tr>\n",
545
  " <tr>\n",
546
  " <th>3</th>\n",
547
- " <td>1714618800000</td>\n",
548
- " <td>2024-05-02 03:00:00</td>\n",
549
- " <td>2024-05-02</td>\n",
550
  " <td>3</td>\n",
551
- " <td>13.2</td>\n",
552
- " <td>72.0</td>\n",
553
- " <td>0.1</td>\n",
554
- " <td>0.1</td>\n",
555
  " <td>0.0</td>\n",
556
- " <td>51.0</td>\n",
557
- " <td>51.0</td>\n",
558
- " <td>22.3</td>\n",
559
- " <td>39.2</td>\n",
560
  " </tr>\n",
561
  " <tr>\n",
562
  " <th>4</th>\n",
563
- " <td>1714622400000</td>\n",
564
- " <td>2024-05-02 04:00:00</td>\n",
565
- " <td>2024-05-02</td>\n",
566
  " <td>4</td>\n",
567
- " <td>12.7</td>\n",
568
  " <td>73.0</td>\n",
569
  " <td>0.0</td>\n",
570
  " <td>0.0</td>\n",
571
  " <td>0.0</td>\n",
572
  " <td>2.0</td>\n",
573
- " <td>78.0</td>\n",
574
- " <td>21.6</td>\n",
575
- " <td>38.9</td>\n",
576
  " </tr>\n",
577
  " <tr>\n",
578
  " <th>...</th>\n",
@@ -592,457 +533,140 @@
592
  " </tr>\n",
593
  " <tr>\n",
594
  " <th>115</th>\n",
595
- " <td>1715022000000</td>\n",
596
- " <td>2024-05-06 19:00:00</td>\n",
597
- " <td>2024-05-06</td>\n",
598
  " <td>19</td>\n",
599
- " <td>10.7</td>\n",
600
- " <td>91.0</td>\n",
601
- " <td>1.4</td>\n",
602
- " <td>1.4</td>\n",
603
  " <td>0.0</td>\n",
604
- " <td>61.0</td>\n",
605
- " <td>100.0</td>\n",
606
- " <td>16.6</td>\n",
607
- " <td>32.0</td>\n",
608
- " </tr>\n",
609
- " <tr>\n",
610
- " <th>116</th>\n",
611
- " <td>1715025600000</td>\n",
612
- " <td>2024-05-06 20:00:00</td>\n",
613
- " <td>2024-05-06</td>\n",
614
- " <td>20</td>\n",
615
- " <td>10.1</td>\n",
616
- " <td>90.0</td>\n",
617
- " <td>1.4</td>\n",
618
- " <td>1.4</td>\n",
619
  " <td>0.0</td>\n",
620
- " <td>61.0</td>\n",
621
- " <td>100.0</td>\n",
622
- " <td>19.5</td>\n",
623
- " <td>37.1</td>\n",
624
- " </tr>\n",
625
- " <tr>\n",
626
- " <th>117</th>\n",
627
- " <td>1715029200000</td>\n",
628
- " <td>2024-05-06 21:00:00</td>\n",
629
- " <td>2024-05-06</td>\n",
630
- " <td>21</td>\n",
631
- " <td>9.5</td>\n",
632
- " <td>88.0</td>\n",
633
- " <td>1.4</td>\n",
634
- " <td>1.4</td>\n",
635
  " <td>0.0</td>\n",
636
- " <td>61.0</td>\n",
637
- " <td>100.0</td>\n",
638
- " <td>21.6</td>\n",
639
- " <td>42.1</td>\n",
640
- " </tr>\n",
641
- " <tr>\n",
642
- " <th>118</th>\n",
643
- " <td>1715032800000</td>\n",
644
- " <td>2024-05-06 22:00:00</td>\n",
645
- " <td>2024-05-06</td>\n",
646
- " <td>22</td>\n",
647
- " <td>9.3</td>\n",
648
- " <td>86.0</td>\n",
649
- " <td>0.6</td>\n",
650
- " <td>0.6</td>\n",
651
  " <td>0.0</td>\n",
652
- " <td>3.0</td>\n",
653
- " <td>100.0</td>\n",
654
- " <td>22.0</td>\n",
655
- " <td>41.0</td>\n",
656
- " </tr>\n",
657
- " <tr>\n",
658
- " <th>119</th>\n",
659
- " <td>1715036400000</td>\n",
660
- " <td>2024-05-06 23:00:00</td>\n",
661
- " <td>2024-05-06</td>\n",
662
- " <td>23</td>\n",
663
- " <td>9.1</td>\n",
664
- " <td>84.0</td>\n",
665
- " <td>0.6</td>\n",
666
- " <td>0.6</td>\n",
667
  " <td>0.0</td>\n",
668
- " <td>3.0</td>\n",
669
- " <td>100.0</td>\n",
670
- " <td>21.3</td>\n",
671
- " <td>40.3</td>\n",
672
  " </tr>\n",
673
- " </tbody>\n",
674
- "</table>\n",
675
- "<p>120 rows × 13 columns</p>\n",
676
- "</div>"
677
- ],
678
- "text/plain": [
679
- " timestamp datetime date hour temperature_2m \\\n",
680
- "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
681
- "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
682
- "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
683
- "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
684
- "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
685
- ".. ... ... ... ... ... \n",
686
- "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
687
- "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
688
- "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
689
- "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
690
- "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
691
- "\n",
692
- " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
693
- "0 66.0 0.0 0.0 0.0 0.0 \n",
694
- "1 71.0 0.0 0.0 0.0 0.0 \n",
695
- "2 73.0 0.0 0.0 0.0 2.0 \n",
696
- "3 72.0 0.1 0.1 0.0 51.0 \n",
697
- "4 73.0 0.0 0.0 0.0 2.0 \n",
698
- ".. ... ... ... ... ... \n",
699
- "115 91.0 1.4 1.4 0.0 61.0 \n",
700
- "116 90.0 1.4 1.4 0.0 61.0 \n",
701
- "117 88.0 1.4 1.4 0.0 61.0 \n",
702
- "118 86.0 0.6 0.6 0.0 3.0 \n",
703
- "119 84.0 0.6 0.6 0.0 3.0 \n",
704
- "\n",
705
- " cloud_cover wind_speed_10m wind_gusts_10m \n",
706
- "0 13.0 21.6 41.4 \n",
707
- "1 4.0 20.5 37.1 \n",
708
- "2 70.0 21.2 36.7 \n",
709
- "3 51.0 22.3 39.2 \n",
710
- "4 78.0 21.6 38.9 \n",
711
- ".. ... ... ... \n",
712
- "115 100.0 16.6 32.0 \n",
713
- "116 100.0 19.5 37.1 \n",
714
- "117 100.0 21.6 42.1 \n",
715
- "118 100.0 22.0 41.0 \n",
716
- "119 100.0 21.3 40.3 \n",
717
- "\n",
718
- "[120 rows x 13 columns]"
719
- ]
720
- },
721
- "execution_count": 27,
722
- "metadata": {},
723
- "output_type": "execute_result"
724
- }
725
- ],
726
- "source": [
727
- "# Display the weather_forecast_df dataframe\n",
728
- "weather_forecast_df"
729
- ]
730
- },
731
- {
732
- "cell_type": "code",
733
- "execution_count": 28,
734
- "metadata": {},
735
- "outputs": [
736
- {
737
- "data": {
738
- "text/html": [
739
- "<div>\n",
740
- "<style scoped>\n",
741
- " .dataframe tbody tr th:only-of-type {\n",
742
- " vertical-align: middle;\n",
743
- " }\n",
744
- "\n",
745
- " .dataframe tbody tr th {\n",
746
- " vertical-align: top;\n",
747
- " }\n",
748
- "\n",
749
- " .dataframe thead th {\n",
750
- " text-align: right;\n",
751
- " }\n",
752
- "</style>\n",
753
- "<table border=\"1\" class=\"dataframe\">\n",
754
- " <thead>\n",
755
- " <tr style=\"text-align: right;\">\n",
756
- " <th></th>\n",
757
- " <th>timestamp</th>\n",
758
- " <th>datetime</th>\n",
759
- " <th>date</th>\n",
760
- " <th>hour</th>\n",
761
- " <th>temperature_2m</th>\n",
762
- " <th>relative_humidity_2m</th>\n",
763
- " <th>precipitation</th>\n",
764
- " <th>rain</th>\n",
765
- " <th>snowfall</th>\n",
766
- " <th>weather_code</th>\n",
767
- " <th>cloud_cover</th>\n",
768
- " <th>wind_speed_10m</th>\n",
769
- " <th>wind_gusts_10m</th>\n",
770
- " </tr>\n",
771
- " </thead>\n",
772
- " <tbody>\n",
773
  " <tr>\n",
774
- " <th>0</th>\n",
775
- " <td>1714608000000</td>\n",
776
- " <td>2024-05-02 00:00:00</td>\n",
777
- " <td>2024-05-02</td>\n",
778
- " <td>0</td>\n",
779
- " <td>14.9</td>\n",
780
- " <td>66.0</td>\n",
781
  " <td>0.0</td>\n",
782
  " <td>0.0</td>\n",
783
  " <td>0.0</td>\n",
784
  " <td>0.0</td>\n",
785
- " <td>13.0</td>\n",
786
- " <td>21.6</td>\n",
787
- " <td>41.4</td>\n",
788
  " </tr>\n",
789
  " <tr>\n",
790
- " <th>1</th>\n",
791
- " <td>1714611600000</td>\n",
792
- " <td>2024-05-02 01:00:00</td>\n",
793
- " <td>2024-05-02</td>\n",
794
- " <td>1</td>\n",
795
- " <td>14.2</td>\n",
796
- " <td>71.0</td>\n",
797
- " <td>0.0</td>\n",
798
- " <td>0.0</td>\n",
799
  " <td>0.0</td>\n",
800
  " <td>0.0</td>\n",
801
- " <td>4.0</td>\n",
802
- " <td>20.5</td>\n",
803
- " <td>37.1</td>\n",
804
- " </tr>\n",
805
- " <tr>\n",
806
- " <th>2</th>\n",
807
- " <td>1714615200000</td>\n",
808
- " <td>2024-05-02 02:00:00</td>\n",
809
- " <td>2024-05-02</td>\n",
810
- " <td>2</td>\n",
811
- " <td>13.4</td>\n",
812
- " <td>73.0</td>\n",
813
  " <td>0.0</td>\n",
814
  " <td>0.0</td>\n",
815
  " <td>0.0</td>\n",
816
- " <td>2.0</td>\n",
817
- " <td>70.0</td>\n",
818
- " <td>21.2</td>\n",
819
- " <td>36.7</td>\n",
820
  " </tr>\n",
821
  " <tr>\n",
822
- " <th>3</th>\n",
823
- " <td>1714618800000</td>\n",
824
- " <td>2024-05-02 03:00:00</td>\n",
825
- " <td>2024-05-02</td>\n",
826
- " <td>3</td>\n",
827
- " <td>13.2</td>\n",
828
- " <td>72.0</td>\n",
829
- " <td>0.1</td>\n",
830
- " <td>0.1</td>\n",
831
  " <td>0.0</td>\n",
832
- " <td>51.0</td>\n",
833
- " <td>51.0</td>\n",
834
- " <td>22.3</td>\n",
835
- " <td>39.2</td>\n",
836
- " </tr>\n",
837
- " <tr>\n",
838
- " <th>4</th>\n",
839
- " <td>1714622400000</td>\n",
840
- " <td>2024-05-02 04:00:00</td>\n",
841
- " <td>2024-05-02</td>\n",
842
- " <td>4</td>\n",
843
- " <td>12.7</td>\n",
844
- " <td>73.0</td>\n",
845
  " <td>0.0</td>\n",
846
  " <td>0.0</td>\n",
847
  " <td>0.0</td>\n",
848
- " <td>2.0</td>\n",
849
- " <td>78.0</td>\n",
850
- " <td>21.6</td>\n",
851
- " <td>38.9</td>\n",
852
- " </tr>\n",
853
- " </tbody>\n",
854
- "</table>\n",
855
- "</div>"
856
- ],
857
- "text/plain": [
858
- " timestamp datetime date hour temperature_2m \\\n",
859
- "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
860
- "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
861
- "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
862
- "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
863
- "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
864
- "\n",
865
- " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
866
- "0 66.0 0.0 0.0 0.0 0.0 \n",
867
- "1 71.0 0.0 0.0 0.0 0.0 \n",
868
- "2 73.0 0.0 0.0 0.0 2.0 \n",
869
- "3 72.0 0.1 0.1 0.0 51.0 \n",
870
- "4 73.0 0.0 0.0 0.0 2.0 \n",
871
- "\n",
872
- " cloud_cover wind_speed_10m wind_gusts_10m \n",
873
- "0 13.0 21.6 41.4 \n",
874
- "1 4.0 20.5 37.1 \n",
875
- "2 70.0 21.2 36.7 \n",
876
- "3 51.0 22.3 39.2 \n",
877
- "4 78.0 21.6 38.9 "
878
- ]
879
- },
880
- "execution_count": 28,
881
- "metadata": {},
882
- "output_type": "execute_result"
883
- }
884
- ],
885
- "source": [
886
- "# Display the first 5 rows of the weather_forecast dataframe\n",
887
- "weather_forecast_df.head(5)"
888
- ]
889
- },
890
- {
891
- "cell_type": "code",
892
- "execution_count": 29,
893
- "metadata": {},
894
- "outputs": [
895
- {
896
- "data": {
897
- "text/html": [
898
- "<div>\n",
899
- "<style scoped>\n",
900
- " .dataframe tbody tr th:only-of-type {\n",
901
- " vertical-align: middle;\n",
902
- " }\n",
903
- "\n",
904
- " .dataframe tbody tr th {\n",
905
- " vertical-align: top;\n",
906
- " }\n",
907
- "\n",
908
- " .dataframe thead th {\n",
909
- " text-align: right;\n",
910
- " }\n",
911
- "</style>\n",
912
- "<table border=\"1\" class=\"dataframe\">\n",
913
- " <thead>\n",
914
- " <tr style=\"text-align: right;\">\n",
915
- " <th></th>\n",
916
- " <th>timestamp</th>\n",
917
- " <th>datetime</th>\n",
918
- " <th>date</th>\n",
919
- " <th>hour</th>\n",
920
- " <th>temperature_2m</th>\n",
921
- " <th>relative_humidity_2m</th>\n",
922
- " <th>precipitation</th>\n",
923
- " <th>rain</th>\n",
924
- " <th>snowfall</th>\n",
925
- " <th>weather_code</th>\n",
926
- " <th>cloud_cover</th>\n",
927
- " <th>wind_speed_10m</th>\n",
928
- " <th>wind_gusts_10m</th>\n",
929
- " </tr>\n",
930
- " </thead>\n",
931
- " <tbody>\n",
932
- " <tr>\n",
933
- " <th>115</th>\n",
934
- " <td>1715022000000</td>\n",
935
- " <td>2024-05-06 19:00:00</td>\n",
936
- " <td>2024-05-06</td>\n",
937
- " <td>19</td>\n",
938
- " <td>10.7</td>\n",
939
- " <td>91.0</td>\n",
940
- " <td>1.4</td>\n",
941
- " <td>1.4</td>\n",
942
  " <td>0.0</td>\n",
943
- " <td>61.0</td>\n",
944
- " <td>100.0</td>\n",
945
- " <td>16.6</td>\n",
946
- " <td>32.0</td>\n",
947
  " </tr>\n",
948
  " <tr>\n",
949
- " <th>116</th>\n",
950
- " <td>1715025600000</td>\n",
951
- " <td>2024-05-06 20:00:00</td>\n",
952
- " <td>2024-05-06</td>\n",
953
- " <td>20</td>\n",
954
- " <td>10.1</td>\n",
955
- " <td>90.0</td>\n",
956
- " <td>1.4</td>\n",
957
- " <td>1.4</td>\n",
958
  " <td>0.0</td>\n",
959
- " <td>61.0</td>\n",
960
- " <td>100.0</td>\n",
961
- " <td>19.5</td>\n",
962
- " <td>37.1</td>\n",
963
- " </tr>\n",
964
- " <tr>\n",
965
- " <th>117</th>\n",
966
- " <td>1715029200000</td>\n",
967
- " <td>2024-05-06 21:00:00</td>\n",
968
- " <td>2024-05-06</td>\n",
969
- " <td>21</td>\n",
970
- " <td>9.5</td>\n",
971
- " <td>88.0</td>\n",
972
- " <td>1.4</td>\n",
973
- " <td>1.4</td>\n",
974
  " <td>0.0</td>\n",
975
- " <td>61.0</td>\n",
976
- " <td>100.0</td>\n",
977
- " <td>21.6</td>\n",
978
- " <td>42.1</td>\n",
979
- " </tr>\n",
980
- " <tr>\n",
981
- " <th>118</th>\n",
982
- " <td>1715032800000</td>\n",
983
- " <td>2024-05-06 22:00:00</td>\n",
984
- " <td>2024-05-06</td>\n",
985
- " <td>22</td>\n",
986
- " <td>9.3</td>\n",
987
- " <td>86.0</td>\n",
988
- " <td>0.6</td>\n",
989
- " <td>0.6</td>\n",
990
  " <td>0.0</td>\n",
991
- " <td>3.0</td>\n",
992
- " <td>100.0</td>\n",
993
- " <td>22.0</td>\n",
994
- " <td>41.0</td>\n",
995
- " </tr>\n",
996
- " <tr>\n",
997
- " <th>119</th>\n",
998
- " <td>1715036400000</td>\n",
999
- " <td>2024-05-06 23:00:00</td>\n",
1000
- " <td>2024-05-06</td>\n",
1001
- " <td>23</td>\n",
1002
- " <td>9.1</td>\n",
1003
- " <td>84.0</td>\n",
1004
- " <td>0.6</td>\n",
1005
- " <td>0.6</td>\n",
1006
  " <td>0.0</td>\n",
1007
- " <td>3.0</td>\n",
1008
- " <td>100.0</td>\n",
1009
- " <td>21.3</td>\n",
1010
- " <td>40.3</td>\n",
1011
  " </tr>\n",
1012
  " </tbody>\n",
1013
  "</table>\n",
 
1014
  "</div>"
1015
  ],
1016
  "text/plain": [
1017
  " timestamp datetime date hour temperature_2m \\\n",
1018
- "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
1019
- "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
1020
- "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
1021
- "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
1022
- "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
 
 
 
 
 
 
1023
  "\n",
1024
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1025
- "115 91.0 1.4 1.4 0.0 61.0 \n",
1026
- "116 90.0 1.4 1.4 0.0 61.0 \n",
1027
- "117 88.0 1.4 1.4 0.0 61.0 \n",
1028
- "118 86.0 0.6 0.6 0.0 3.0 \n",
1029
- "119 84.0 0.6 0.6 0.0 3.0 \n",
 
 
 
 
 
 
1030
  "\n",
1031
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1032
- "115 100.0 16.6 32.0 \n",
1033
- "116 100.0 19.5 37.1 \n",
1034
- "117 100.0 21.6 42.1 \n",
1035
- "118 100.0 22.0 41.0 \n",
1036
- "119 100.0 21.3 40.3 "
 
 
 
 
 
 
 
 
1037
  ]
1038
  },
1039
- "execution_count": 29,
1040
  "metadata": {},
1041
  "output_type": "execute_result"
1042
  }
1043
  ],
1044
  "source": [
1045
- "weather_forecast_df.tail(5)"
 
1046
  ]
1047
  },
1048
  {
@@ -1056,17 +680,16 @@
1056
  },
1057
  {
1058
  "cell_type": "code",
1059
- "execution_count": 30,
1060
  "metadata": {},
1061
  "outputs": [
1062
  {
1063
  "name": "stdout",
1064
  "output_type": "stream",
1065
  "text": [
1066
- "Connection closed.\n",
1067
  "Connected. Call `.close()` to terminate connection gracefully.\n",
1068
  "\n",
1069
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/556180\n",
1070
  "Connected. Call `.close()` to terminate connection gracefully.\n"
1071
  ]
1072
  }
@@ -1084,7 +707,7 @@
1084
  },
1085
  {
1086
  "cell_type": "code",
1087
- "execution_count": 31,
1088
  "metadata": {},
1089
  "outputs": [],
1090
  "source": [
@@ -1094,11 +717,6 @@
1094
  " version=1,\n",
1095
  ")\n",
1096
  "\n",
1097
- "# forecast_renewable_energy_fg = fs.get_feature_group(\n",
1098
- "# name=\"forecast_renewable_energy\",\n",
1099
- "# version=1,\n",
1100
- "# )\n",
1101
- "\n",
1102
  "weather_fg = fs.get_feature_group(\n",
1103
  " name=\"weather_measurements\",\n",
1104
  " version=1,\n",
@@ -1115,15 +733,22 @@
1115
  },
1116
  {
1117
  "cell_type": "code",
1118
- "execution_count": 32,
1119
  "metadata": {},
1120
  "outputs": [
1121
  {
1122
- "name": "stderr",
1123
- "output_type": "stream",
1124
- "text": [
1125
- "Uploading Dataframe: 100.00% |███��██████| Rows 24/24 | Elapsed Time: 00:06 | Remaining Time: 00:00\n"
1126
- ]
 
 
 
 
 
 
 
1127
  },
1128
  {
1129
  "name": "stdout",
@@ -1131,16 +756,16 @@
1131
  "text": [
1132
  "Launching job: electricity_prices_1_offline_fg_materialization\n",
1133
  "Job started successfully, you can follow the progress at \n",
1134
- "https://c.app.hopsworks.ai/p/556180/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
1135
  ]
1136
  },
1137
  {
1138
  "data": {
1139
  "text/plain": [
1140
- "(<hsfs.core.job.Job at 0x29ebfbd7190>, None)"
1141
  ]
1142
  },
1143
- "execution_count": 32,
1144
  "metadata": {},
1145
  "output_type": "execute_result"
1146
  }
@@ -1153,26 +778,22 @@
1153
  },
1154
  {
1155
  "cell_type": "code",
1156
- "execution_count": 33,
1157
- "metadata": {},
1158
- "outputs": [],
1159
- "source": [
1160
- "# # Inserting the forecast_renewable_energy_df into the feature group named forecast_renewable_energy_fg\n",
1161
- "# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df, \n",
1162
- "# write_options={\"wait_for_job\" : False})"
1163
- ]
1164
- },
1165
- {
1166
- "cell_type": "code",
1167
- "execution_count": 34,
1168
  "metadata": {},
1169
  "outputs": [
1170
  {
1171
- "name": "stderr",
1172
- "output_type": "stream",
1173
- "text": [
1174
- "Uploading Dataframe: 100.00% |██████████| Rows 120/120 | Elapsed Time: 00:06 | Remaining Time: 00:00\n"
1175
- ]
 
 
 
 
 
 
 
1176
  },
1177
  {
1178
  "name": "stdout",
@@ -1180,16 +801,16 @@
1180
  "text": [
1181
  "Launching job: weather_measurements_1_offline_fg_materialization\n",
1182
  "Job started successfully, you can follow the progress at \n",
1183
- "https://c.app.hopsworks.ai/p/556180/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
1184
  ]
1185
  },
1186
  {
1187
  "data": {
1188
  "text/plain": [
1189
- "(<hsfs.core.job.Job at 0x29ebfad6550>, None)"
1190
  ]
1191
  },
1192
- "execution_count": 34,
1193
  "metadata": {},
1194
  "output_type": "execute_result"
1195
  }
 
27
  },
28
  {
29
  "cell_type": "code",
30
+ "execution_count": 1,
31
  "metadata": {},
32
  "outputs": [
33
  {
34
  "name": "stdout",
35
  "output_type": "stream",
36
  "text": [
37
+ "/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-\n",
38
+ "/Users/tobiasmjensen/Documents/aau_bds/m5_data-engineering-and-mlops/exam_assigment/MLOPs-Assignment-/notebooks\n"
39
  ]
40
  }
41
  ],
 
53
  },
54
  {
55
  "cell_type": "code",
56
+ "execution_count": 2,
57
  "metadata": {},
58
  "outputs": [],
59
  "source": [
 
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": 3,
92
  "metadata": {},
93
  "outputs": [],
94
  "source": [
 
101
  },
102
  {
103
  "cell_type": "code",
104
+ "execution_count": 4,
105
  "metadata": {},
106
  "outputs": [
107
  {
 
135
  " <tbody>\n",
136
  " <tr>\n",
137
  " <th>0</th>\n",
138
+ " <td>1714694400000</td>\n",
139
+ " <td>2024-05-03 00:00:00</td>\n",
140
+ " <td>2024-05-03</td>\n",
141
  " <td>0</td>\n",
142
+ " <td>0.22214</td>\n",
143
  " </tr>\n",
144
  " <tr>\n",
145
  " <th>1</th>\n",
146
+ " <td>1714698000000</td>\n",
147
+ " <td>2024-05-03 01:00:00</td>\n",
148
+ " <td>2024-05-03</td>\n",
149
  " <td>1</td>\n",
150
+ " <td>0.21893</td>\n",
151
  " </tr>\n",
152
  " <tr>\n",
153
  " <th>2</th>\n",
154
+ " <td>1714701600000</td>\n",
155
+ " <td>2024-05-03 02:00:00</td>\n",
156
+ " <td>2024-05-03</td>\n",
157
  " <td>2</td>\n",
158
+ " <td>0.22348</td>\n",
159
  " </tr>\n",
160
  " <tr>\n",
161
  " <th>3</th>\n",
162
+ " <td>1714705200000</td>\n",
163
+ " <td>2024-05-03 03:00:00</td>\n",
164
+ " <td>2024-05-03</td>\n",
165
  " <td>3</td>\n",
166
+ " <td>0.22385</td>\n",
167
  " </tr>\n",
168
  " <tr>\n",
169
  " <th>4</th>\n",
170
+ " <td>1714708800000</td>\n",
171
+ " <td>2024-05-03 04:00:00</td>\n",
172
+ " <td>2024-05-03</td>\n",
173
  " <td>4</td>\n",
174
+ " <td>0.22706</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
177
  " <th>5</th>\n",
178
+ " <td>1714712400000</td>\n",
179
+ " <td>2024-05-03 05:00:00</td>\n",
180
+ " <td>2024-05-03</td>\n",
181
  " <td>5</td>\n",
182
+ " <td>0.23825</td>\n",
183
  " </tr>\n",
184
  " <tr>\n",
185
  " <th>6</th>\n",
186
+ " <td>1714716000000</td>\n",
187
+ " <td>2024-05-03 06:00:00</td>\n",
188
+ " <td>2024-05-03</td>\n",
189
  " <td>6</td>\n",
190
+ " <td>0.26167</td>\n",
191
  " </tr>\n",
192
  " <tr>\n",
193
  " <th>7</th>\n",
194
+ " <td>1714719600000</td>\n",
195
+ " <td>2024-05-03 07:00:00</td>\n",
196
+ " <td>2024-05-03</td>\n",
197
  " <td>7</td>\n",
198
+ " <td>0.32045</td>\n",
199
  " </tr>\n",
200
  " <tr>\n",
201
  " <th>8</th>\n",
202
+ " <td>1714723200000</td>\n",
203
+ " <td>2024-05-03 08:00:00</td>\n",
204
+ " <td>2024-05-03</td>\n",
205
  " <td>8</td>\n",
206
+ " <td>0.31881</td>\n",
207
  " </tr>\n",
208
  " <tr>\n",
209
  " <th>9</th>\n",
210
+ " <td>1714726800000</td>\n",
211
+ " <td>2024-05-03 09:00:00</td>\n",
212
+ " <td>2024-05-03</td>\n",
213
  " <td>9</td>\n",
214
+ " <td>0.28860</td>\n",
215
  " </tr>\n",
216
  " <tr>\n",
217
  " <th>10</th>\n",
218
+ " <td>1714730400000</td>\n",
219
+ " <td>2024-05-03 10:00:00</td>\n",
220
+ " <td>2024-05-03</td>\n",
221
  " <td>10</td>\n",
222
+ " <td>0.28413</td>\n",
223
  " </tr>\n",
224
  " <tr>\n",
225
  " <th>11</th>\n",
226
+ " <td>1714734000000</td>\n",
227
+ " <td>2024-05-03 11:00:00</td>\n",
228
+ " <td>2024-05-03</td>\n",
229
  " <td>11</td>\n",
230
+ " <td>0.25339</td>\n",
231
  " </tr>\n",
232
  " <tr>\n",
233
  " <th>12</th>\n",
234
+ " <td>1714737600000</td>\n",
235
+ " <td>2024-05-03 12:00:00</td>\n",
236
+ " <td>2024-05-03</td>\n",
237
  " <td>12</td>\n",
238
+ " <td>0.25324</td>\n",
239
  " </tr>\n",
240
  " <tr>\n",
241
  " <th>13</th>\n",
242
+ " <td>1714741200000</td>\n",
243
+ " <td>2024-05-03 13:00:00</td>\n",
244
+ " <td>2024-05-03</td>\n",
245
  " <td>13</td>\n",
246
+ " <td>0.24325</td>\n",
247
  " </tr>\n",
248
  " <tr>\n",
249
  " <th>14</th>\n",
250
+ " <td>1714744800000</td>\n",
251
+ " <td>2024-05-03 14:00:00</td>\n",
252
+ " <td>2024-05-03</td>\n",
253
  " <td>14</td>\n",
254
+ " <td>0.23698</td>\n",
255
  " </tr>\n",
256
  " <tr>\n",
257
  " <th>15</th>\n",
258
+ " <td>1714748400000</td>\n",
259
+ " <td>2024-05-03 15:00:00</td>\n",
260
+ " <td>2024-05-03</td>\n",
261
  " <td>15</td>\n",
262
+ " <td>0.22751</td>\n",
263
  " </tr>\n",
264
  " <tr>\n",
265
  " <th>16</th>\n",
266
+ " <td>1714752000000</td>\n",
267
+ " <td>2024-05-03 16:00:00</td>\n",
268
+ " <td>2024-05-03</td>\n",
269
  " <td>16</td>\n",
270
+ " <td>0.22676</td>\n",
271
  " </tr>\n",
272
  " <tr>\n",
273
  " <th>17</th>\n",
274
+ " <td>1714755600000</td>\n",
275
+ " <td>2024-05-03 17:00:00</td>\n",
276
+ " <td>2024-05-03</td>\n",
277
  " <td>17</td>\n",
278
+ " <td>0.34283</td>\n",
279
  " </tr>\n",
280
  " <tr>\n",
281
  " <th>18</th>\n",
282
+ " <td>1714759200000</td>\n",
283
+ " <td>2024-05-03 18:00:00</td>\n",
284
+ " <td>2024-05-03</td>\n",
285
  " <td>18</td>\n",
286
+ " <td>0.60010</td>\n",
287
  " </tr>\n",
288
  " <tr>\n",
289
  " <th>19</th>\n",
290
+ " <td>1714762800000</td>\n",
291
+ " <td>2024-05-03 19:00:00</td>\n",
292
+ " <td>2024-05-03</td>\n",
293
  " <td>19</td>\n",
294
+ " <td>0.72356</td>\n",
295
  " </tr>\n",
296
  " <tr>\n",
297
  " <th>20</th>\n",
298
+ " <td>1714766400000</td>\n",
299
+ " <td>2024-05-03 20:00:00</td>\n",
300
+ " <td>2024-05-03</td>\n",
301
  " <td>20</td>\n",
302
+ " <td>0.82068</td>\n",
303
  " </tr>\n",
304
  " <tr>\n",
305
  " <th>21</th>\n",
306
+ " <td>1714770000000</td>\n",
307
+ " <td>2024-05-03 21:00:00</td>\n",
308
+ " <td>2024-05-03</td>\n",
309
  " <td>21</td>\n",
310
+ " <td>0.78524</td>\n",
311
  " </tr>\n",
312
  " <tr>\n",
313
  " <th>22</th>\n",
314
+ " <td>1714773600000</td>\n",
315
+ " <td>2024-05-03 22:00:00</td>\n",
316
+ " <td>2024-05-03</td>\n",
317
  " <td>22</td>\n",
318
+ " <td>0.68119</td>\n",
319
  " </tr>\n",
320
  " <tr>\n",
321
  " <th>23</th>\n",
322
+ " <td>1714777200000</td>\n",
323
+ " <td>2024-05-03 23:00:00</td>\n",
324
+ " <td>2024-05-03</td>\n",
325
  " <td>23</td>\n",
326
+ " <td>0.63822</td>\n",
327
  " </tr>\n",
328
  " </tbody>\n",
329
  "</table>\n",
 
331
  ],
332
  "text/plain": [
333
  " timestamp datetime date hour dk1_spotpricedkk_kwh\n",
334
+ "0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 0.22214\n",
335
+ "1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 0.21893\n",
336
+ "2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 0.22348\n",
337
+ "3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 0.22385\n",
338
+ "4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 0.22706\n",
339
+ "5 1714712400000 2024-05-03 05:00:00 2024-05-03 5 0.23825\n",
340
+ "6 1714716000000 2024-05-03 06:00:00 2024-05-03 6 0.26167\n",
341
+ "7 1714719600000 2024-05-03 07:00:00 2024-05-03 7 0.32045\n",
342
+ "8 1714723200000 2024-05-03 08:00:00 2024-05-03 8 0.31881\n",
343
+ "9 1714726800000 2024-05-03 09:00:00 2024-05-03 9 0.28860\n",
344
+ "10 1714730400000 2024-05-03 10:00:00 2024-05-03 10 0.28413\n",
345
+ "11 1714734000000 2024-05-03 11:00:00 2024-05-03 11 0.25339\n",
346
+ "12 1714737600000 2024-05-03 12:00:00 2024-05-03 12 0.25324\n",
347
+ "13 1714741200000 2024-05-03 13:00:00 2024-05-03 13 0.24325\n",
348
+ "14 1714744800000 2024-05-03 14:00:00 2024-05-03 14 0.23698\n",
349
+ "15 1714748400000 2024-05-03 15:00:00 2024-05-03 15 0.22751\n",
350
+ "16 1714752000000 2024-05-03 16:00:00 2024-05-03 16 0.22676\n",
351
+ "17 1714755600000 2024-05-03 17:00:00 2024-05-03 17 0.34283\n",
352
+ "18 1714759200000 2024-05-03 18:00:00 2024-05-03 18 0.60010\n",
353
+ "19 1714762800000 2024-05-03 19:00:00 2024-05-03 19 0.72356\n",
354
+ "20 1714766400000 2024-05-03 20:00:00 2024-05-03 20 0.82068\n",
355
+ "21 1714770000000 2024-05-03 21:00:00 2024-05-03 21 0.78524\n",
356
+ "22 1714773600000 2024-05-03 22:00:00 2024-05-03 22 0.68119\n",
357
+ "23 1714777200000 2024-05-03 23:00:00 2024-05-03 23 0.63822"
358
  ]
359
  },
360
+ "execution_count": 4,
361
  "metadata": {},
362
  "output_type": "execute_result"
363
  }
 
367
  "electricity_df"
368
  ]
369
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  {
371
  "cell_type": "markdown",
372
  "metadata": {},
 
378
  "cell_type": "markdown",
379
  "metadata": {},
380
  "source": [
381
+ "#### <span style=\"color:#2656a3;\"> 🌈 Forecast Weather Measures"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  ]
383
  },
384
  {
385
  "cell_type": "code",
386
+ "execution_count": 5,
387
  "metadata": {},
388
  "outputs": [],
389
  "source": [
 
395
  },
396
  {
397
  "cell_type": "code",
398
+ "execution_count": 6,
399
  "metadata": {},
400
  "outputs": [
401
  {
 
437
  " <tbody>\n",
438
  " <tr>\n",
439
  " <th>0</th>\n",
440
+ " <td>1714694400000</td>\n",
441
+ " <td>2024-05-03 00:00:00</td>\n",
442
+ " <td>2024-05-03</td>\n",
443
  " <td>0</td>\n",
444
+ " <td>14.3</td>\n",
445
+ " <td>65.0</td>\n",
446
  " <td>0.0</td>\n",
447
  " <td>0.0</td>\n",
448
  " <td>0.0</td>\n",
449
+ " <td>1.0</td>\n",
450
+ " <td>25.0</td>\n",
451
+ " <td>20.5</td>\n",
452
+ " <td>36.0</td>\n",
453
  " </tr>\n",
454
  " <tr>\n",
455
  " <th>1</th>\n",
456
+ " <td>1714698000000</td>\n",
457
+ " <td>2024-05-03 01:00:00</td>\n",
458
+ " <td>2024-05-03</td>\n",
459
  " <td>1</td>\n",
460
+ " <td>13.6</td>\n",
461
+ " <td>69.0</td>\n",
462
  " <td>0.0</td>\n",
463
  " <td>0.0</td>\n",
464
  " <td>0.0</td>\n",
465
  " <td>0.0</td>\n",
466
+ " <td>12.0</td>\n",
467
+ " <td>21.6</td>\n",
468
+ " <td>37.4</td>\n",
469
  " </tr>\n",
470
  " <tr>\n",
471
  " <th>2</th>\n",
472
+ " <td>1714701600000</td>\n",
473
+ " <td>2024-05-03 02:00:00</td>\n",
474
+ " <td>2024-05-03</td>\n",
475
  " <td>2</td>\n",
476
+ " <td>13.0</td>\n",
477
+ " <td>72.0</td>\n",
478
  " <td>0.0</td>\n",
479
  " <td>0.0</td>\n",
480
  " <td>0.0</td>\n",
481
+ " <td>0.0</td>\n",
482
+ " <td>7.0</td>\n",
483
+ " <td>20.9</td>\n",
484
+ " <td>37.4</td>\n",
485
  " </tr>\n",
486
  " <tr>\n",
487
  " <th>3</th>\n",
488
+ " <td>1714705200000</td>\n",
489
+ " <td>2024-05-03 03:00:00</td>\n",
490
+ " <td>2024-05-03</td>\n",
491
  " <td>3</td>\n",
492
+ " <td>12.7</td>\n",
493
+ " <td>73.0</td>\n",
494
+ " <td>0.0</td>\n",
495
+ " <td>0.0</td>\n",
496
  " <td>0.0</td>\n",
497
+ " <td>1.0</td>\n",
498
+ " <td>26.0</td>\n",
499
+ " <td>19.8</td>\n",
500
+ " <td>34.6</td>\n",
501
  " </tr>\n",
502
  " <tr>\n",
503
  " <th>4</th>\n",
504
+ " <td>1714708800000</td>\n",
505
+ " <td>2024-05-03 04:00:00</td>\n",
506
+ " <td>2024-05-03</td>\n",
507
  " <td>4</td>\n",
508
+ " <td>12.4</td>\n",
509
  " <td>73.0</td>\n",
510
  " <td>0.0</td>\n",
511
  " <td>0.0</td>\n",
512
  " <td>0.0</td>\n",
513
  " <td>2.0</td>\n",
514
+ " <td>54.0</td>\n",
515
+ " <td>18.7</td>\n",
516
+ " <td>33.8</td>\n",
517
  " </tr>\n",
518
  " <tr>\n",
519
  " <th>...</th>\n",
 
533
  " </tr>\n",
534
  " <tr>\n",
535
  " <th>115</th>\n",
536
+ " <td>1715108400000</td>\n",
537
+ " <td>2024-05-07 19:00:00</td>\n",
538
+ " <td>2024-05-07</td>\n",
539
  " <td>19</td>\n",
540
+ " <td>12.0</td>\n",
541
+ " <td>41.0</td>\n",
 
 
542
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
546
  " <td>0.0</td>\n",
547
+ " <td>4.2</td>\n",
548
+ " <td>10.8</td>\n",
 
 
549
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  " <tr>\n",
551
+ " <th>116</th>\n",
552
+ " <td>1715112000000</td>\n",
553
+ " <td>2024-05-07 20:00:00</td>\n",
554
+ " <td>2024-05-07</td>\n",
555
+ " <td>20</td>\n",
556
+ " <td>10.7</td>\n",
557
+ " <td>49.0</td>\n",
558
  " <td>0.0</td>\n",
559
  " <td>0.0</td>\n",
560
  " <td>0.0</td>\n",
561
  " <td>0.0</td>\n",
562
+ " <td>0.0</td>\n",
563
+ " <td>3.6</td>\n",
564
+ " <td>8.3</td>\n",
565
  " </tr>\n",
566
  " <tr>\n",
567
+ " <th>117</th>\n",
568
+ " <td>1715115600000</td>\n",
569
+ " <td>2024-05-07 21:00:00</td>\n",
570
+ " <td>2024-05-07</td>\n",
571
+ " <td>21</td>\n",
572
+ " <td>9.6</td>\n",
573
+ " <td>56.0</td>\n",
 
 
574
  " <td>0.0</td>\n",
575
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
576
  " <td>0.0</td>\n",
577
  " <td>0.0</td>\n",
578
  " <td>0.0</td>\n",
579
+ " <td>3.2</td>\n",
580
+ " <td>5.4</td>\n",
 
 
581
  " </tr>\n",
582
  " <tr>\n",
583
+ " <th>118</th>\n",
584
+ " <td>1715119200000</td>\n",
585
+ " <td>2024-05-07 22:00:00</td>\n",
586
+ " <td>2024-05-07</td>\n",
587
+ " <td>22</td>\n",
588
+ " <td>8.7</td>\n",
589
+ " <td>58.0</td>\n",
 
 
590
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
591
  " <td>0.0</td>\n",
592
  " <td>0.0</td>\n",
593
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  " <td>0.0</td>\n",
595
+ " <td>3.3</td>\n",
596
+ " <td>5.8</td>\n",
 
 
597
  " </tr>\n",
598
  " <tr>\n",
599
+ " <th>119</th>\n",
600
+ " <td>1715122800000</td>\n",
601
+ " <td>2024-05-07 23:00:00</td>\n",
602
+ " <td>2024-05-07</td>\n",
603
+ " <td>23</td>\n",
604
+ " <td>7.9</td>\n",
605
+ " <td>57.0</td>\n",
 
 
606
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608
  " <td>0.0</td>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  " <td>0.0</td>\n",
610
+ " <td>0.0</td>\n",
611
+ " <td>3.8</td>\n",
612
+ " <td>6.5</td>\n",
 
613
  " </tr>\n",
614
  " </tbody>\n",
615
  "</table>\n",
616
+ "<p>120 rows × 13 columns</p>\n",
617
  "</div>"
618
  ],
619
  "text/plain": [
620
  " timestamp datetime date hour temperature_2m \\\n",
621
+ "0 1714694400000 2024-05-03 00:00:00 2024-05-03 0 14.3 \n",
622
+ "1 1714698000000 2024-05-03 01:00:00 2024-05-03 1 13.6 \n",
623
+ "2 1714701600000 2024-05-03 02:00:00 2024-05-03 2 13.0 \n",
624
+ "3 1714705200000 2024-05-03 03:00:00 2024-05-03 3 12.7 \n",
625
+ "4 1714708800000 2024-05-03 04:00:00 2024-05-03 4 12.4 \n",
626
+ ".. ... ... ... ... ... \n",
627
+ "115 1715108400000 2024-05-07 19:00:00 2024-05-07 19 12.0 \n",
628
+ "116 1715112000000 2024-05-07 20:00:00 2024-05-07 20 10.7 \n",
629
+ "117 1715115600000 2024-05-07 21:00:00 2024-05-07 21 9.6 \n",
630
+ "118 1715119200000 2024-05-07 22:00:00 2024-05-07 22 8.7 \n",
631
+ "119 1715122800000 2024-05-07 23:00:00 2024-05-07 23 7.9 \n",
632
  "\n",
633
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
634
+ "0 65.0 0.0 0.0 0.0 1.0 \n",
635
+ "1 69.0 0.0 0.0 0.0 0.0 \n",
636
+ "2 72.0 0.0 0.0 0.0 0.0 \n",
637
+ "3 73.0 0.0 0.0 0.0 1.0 \n",
638
+ "4 73.0 0.0 0.0 0.0 2.0 \n",
639
+ ".. ... ... ... ... ... \n",
640
+ "115 41.0 0.0 0.0 0.0 0.0 \n",
641
+ "116 49.0 0.0 0.0 0.0 0.0 \n",
642
+ "117 56.0 0.0 0.0 0.0 0.0 \n",
643
+ "118 58.0 0.0 0.0 0.0 0.0 \n",
644
+ "119 57.0 0.0 0.0 0.0 0.0 \n",
645
  "\n",
646
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
647
+ "0 25.0 20.5 36.0 \n",
648
+ "1 12.0 21.6 37.4 \n",
649
+ "2 7.0 20.9 37.4 \n",
650
+ "3 26.0 19.8 34.6 \n",
651
+ "4 54.0 18.7 33.8 \n",
652
+ ".. ... ... ... \n",
653
+ "115 0.0 4.2 10.8 \n",
654
+ "116 0.0 3.6 8.3 \n",
655
+ "117 0.0 3.2 5.4 \n",
656
+ "118 0.0 3.3 5.8 \n",
657
+ "119 0.0 3.8 6.5 \n",
658
+ "\n",
659
+ "[120 rows x 13 columns]"
660
  ]
661
  },
662
+ "execution_count": 6,
663
  "metadata": {},
664
  "output_type": "execute_result"
665
  }
666
  ],
667
  "source": [
668
+ "# Display the weather forecast dataframe\n",
669
+ "weather_forecast_df"
670
  ]
671
  },
672
  {
 
680
  },
681
  {
682
  "cell_type": "code",
683
+ "execution_count": 7,
684
  "metadata": {},
685
  "outputs": [
686
  {
687
  "name": "stdout",
688
  "output_type": "stream",
689
  "text": [
 
690
  "Connected. Call `.close()` to terminate connection gracefully.\n",
691
  "\n",
692
+ "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/554133\n",
693
  "Connected. Call `.close()` to terminate connection gracefully.\n"
694
  ]
695
  }
 
707
  },
708
  {
709
  "cell_type": "code",
710
+ "execution_count": 8,
711
  "metadata": {},
712
  "outputs": [],
713
  "source": [
 
717
  " version=1,\n",
718
  ")\n",
719
  "\n",
 
 
 
 
 
720
  "weather_fg = fs.get_feature_group(\n",
721
  " name=\"weather_measurements\",\n",
722
  " version=1,\n",
 
733
  },
734
  {
735
  "cell_type": "code",
736
+ "execution_count": 9,
737
  "metadata": {},
738
  "outputs": [
739
  {
740
+ "data": {
741
+ "application/vnd.jupyter.widget-view+json": {
742
+ "model_id": "14af0030d68542cdae43f516e0e0f7a7",
743
+ "version_major": 2,
744
+ "version_minor": 0
745
+ },
746
+ "text/plain": [
747
+ "Uploading Dataframe: 0.00% | | Rows 0/24 | Elapsed Time: 00:00 | Remaining Time: ?"
748
+ ]
749
+ },
750
+ "metadata": {},
751
+ "output_type": "display_data"
752
  },
753
  {
754
  "name": "stdout",
 
756
  "text": [
757
  "Launching job: electricity_prices_1_offline_fg_materialization\n",
758
  "Job started successfully, you can follow the progress at \n",
759
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
760
  ]
761
  },
762
  {
763
  "data": {
764
  "text/plain": [
765
+ "(<hsfs.core.job.Job at 0x3058ab890>, None)"
766
  ]
767
  },
768
+ "execution_count": 9,
769
  "metadata": {},
770
  "output_type": "execute_result"
771
  }
 
778
  },
779
  {
780
  "cell_type": "code",
781
+ "execution_count": 10,
 
 
 
 
 
 
 
 
 
 
 
782
  "metadata": {},
783
  "outputs": [
784
  {
785
+ "data": {
786
+ "application/vnd.jupyter.widget-view+json": {
787
+ "model_id": "81dfae8d4c1942aaba0d1b0ff7917720",
788
+ "version_major": 2,
789
+ "version_minor": 0
790
+ },
791
+ "text/plain": [
792
+ "Uploading Dataframe: 0.00% | | Rows 0/120 | Elapsed Time: 00:00 | Remaining Time: ?"
793
+ ]
794
+ },
795
+ "metadata": {},
796
+ "output_type": "display_data"
797
  },
798
  {
799
  "name": "stdout",
 
801
  "text": [
802
  "Launching job: weather_measurements_1_offline_fg_materialization\n",
803
  "Job started successfully, you can follow the progress at \n",
804
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
805
  ]
806
  },
807
  {
808
  "data": {
809
  "text/plain": [
810
+ "(<hsfs.core.job.Job at 0x3058f5d10>, None)"
811
  ]
812
  },
813
+ "execution_count": 10,
814
  "metadata": {},
815
  "output_type": "execute_result"
816
  }
notebooks/3_training_pipeline.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/4_batch_inference.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/model/dk_electricity_model.pkl CHANGED
Binary files a/notebooks/model/dk_electricity_model.pkl and b/notebooks/model/dk_electricity_model.pkl differ
 
notebooks/test.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
scripts/run_feature_and_prediction_pipelines.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -e
4
+
5
+ cd notebooks
6
+
7
+ # Run the feature pipeline
8
+ jupyter nbconvert --to notebook --execute 2_feature_pipeline.ipynb
9
+
10
+ # Run the batch inference pipeline
11
+ jupyter nbconvert --to notebook --execute 4_batch_inference.ipynb