tobiasmj97 commited on
Commit
1f6629c
·
1 Parent(s): a860811

traning and test changes

Browse files
app/app2.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import joblib
3
+ from math import radians
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+ import plotly.express as px
8
+ from matplotlib import pyplot
9
+ import warnings
10
+
11
+ import hopsworks
12
+
13
+ import streamlit as st
14
+
15
+ import folium
16
+ from streamlit_folium import st_folium
17
+ import json
18
+
19
+ start_date = (datetime.datetime.now() - datetime.timedelta(hours=200))
20
+ end_date = (datetime.datetime.now())
21
+
22
+ synthetic_data.set_random_seed(12345)
23
+ credit_cards = [cc["cc_num"] for cc in synthetic_data.generate_list_credit_card_numbers()]
24
+ lat = 0
25
+ long = 0
26
+
27
+ warnings.filterwarnings("ignore")
28
+
29
+ project = hopsworks.login()
30
+ fs = project.get_feature_store()
31
+
32
+ @st.cache(allow_output_mutation=True, suppress_st_warning=True)
33
+ def retrieve_dataset(fv, start_date, end_date):
34
+ st.write(36 * "-")
35
+ print_fancy_header('\n💾 Dataset Retrieving...')
36
+ batch_data = fv.get_batch_data(start_time = start_date, end_time = end_date)
37
+ batch_data.drop(["tid", "cc_num", "datetime"], axis = 1, inplace=True)
38
+ return batch_data
39
+
40
+
41
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True)
42
+ def get_feature_view():
43
+ fv = fs.get_feature_view("cc_trans_fraud", 1)
44
+ return fv
45
+
46
+
47
+ @st.cache(allow_output_mutation=True,suppress_st_warning=True)
48
+ def get_model(project = project):
49
+ mr = project.get_model_registry()
50
+ model = mr.get_model("cc_fraud", version = 1)
51
+ model_dir = model.download()
52
+ return joblib.load(model_dir + "/cc_fraud_model.pkl")
53
+
54
+ def explore_data(batch_data):
55
+ st.write(36 * "-")
56
+ print_fancy_header('\n👁 Data Exploration...')
57
+ labels = ["Suspected of Fraud", "Not Suspected of Fraud"]
58
+ unique, counts = np.unique(batch_data.fraud.values, return_counts=True)
59
+ values = counts.tolist()
60
+
61
+ def plot_pie(values, labels):
62
+ fig = px.pie(values=values, names=labels, title='Distribution of predicted fraud transactions')
63
+ return fig
64
+
65
+ fig1 = plot_pie(values, labels)
66
+ st.plotly_chart(fig1)
67
+
68
+
69
+ def print_fancy_header(text, font_size=24):
70
+ res = f'<span style="color:#ff5f27; font-size: {font_size}px;">{text}</span>'
71
+ st.markdown(res, unsafe_allow_html=True)
72
+
73
+ def transform_preds(predictions):
74
+ return ['Fraud' if pred == 1 else 'Not Fraud' for pred in predictions]
75
+
76
+ progress_bar = st.sidebar.header('⚙️ Working Progress')
77
+ progress_bar = st.sidebar.progress(0)
78
+ st.title('🆘 Fraud transactions detection 🆘')
79
+
80
+ st.write(36 * "-")
81
+ print_fancy_header('\n📡 Connecting to Hopsworks Feature Store...')
82
+
83
+ st.write(36 * "-")
84
+ print_fancy_header('\n🤖 Connecting to Model Registry on Hopsworks...')
85
+ model = get_model(project)
86
+ st.write(model)
87
+ st.write("✅ Connected!")
88
+
89
+ progress_bar.progress(40)
90
+
91
+ st.write(36 * "-")
92
+ print_fancy_header('\n✨ Fetch batch data and predict')
93
+ fv = get_feature_view()
94
+
95
+
96
+ if st.button('📊 Make a prediction'):
97
+ batch_data = retrieve_dataset(fv, start_date, end_date)
98
+ st.write("✅ Retrieved!")
99
+ progress_bar.progress(55)
100
+ predictions = model.predict(batch_data)
101
+ predictions = transform_preds(predictions)
102
+ batch_data_to_explore = batch_data.copy()
103
+ batch_data_to_explore['fraud'] = predictions
104
+ explore_data(batch_data_to_explore)
105
+
106
+ st.button("Re-run")
features/__pycache__/calendar.cpython-311.pyc ADDED
Binary file (1.94 kB). View file
 
features/__pycache__/electricity_prices.cpython-311.pyc CHANGED
Binary files a/features/__pycache__/electricity_prices.cpython-311.pyc and b/features/__pycache__/electricity_prices.cpython-311.pyc differ
 
features/__pycache__/weater_measures.cpython-311.pyc DELETED
Binary file (4.55 kB)
 
features/__pycache__/weather_measures.cpython-311.pyc CHANGED
Binary files a/features/__pycache__/weather_measures.cpython-311.pyc and b/features/__pycache__/weather_measures.cpython-311.pyc differ
 
features/calendar.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime, date
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+
6
+ def get_calendar() -> pd.DataFrame:
7
+ """
8
+ Fetches calendar for Denmark.
9
+
10
+ Parameters:
11
+ - ....
12
+
13
+ Returns:
14
+ - pd.DataFrame: DataFrame with danish calendar.
15
+ """
16
+
17
+ df = pd.read_csv('https://raw.githubusercontent.com/Camillahannesbo/MLOPs-Assignment-/main/data/calendar_incl_holiday.csv', delimiter=';', usecols=['date', 'type'])
18
+
19
+ # Formatting the date column to 'YYYY-MM-DD' dateformat
20
+ df["date"] = df["date"].map(lambda x: datetime.strptime(x, '%d/%m/%Y').strftime("%Y-%m-%d"))
21
+
22
+ # Add features to the calender dataframe
23
+ df['date_'] = pd.to_datetime(df['date'])
24
+ df['dayofweek'] = df['date_'].dt.dayofweek
25
+ df['day'] = df['date_'].dt.day
26
+ df['month'] = df['date_'].dt.month
27
+ df['year'] = df['date_'].dt.year
28
+ df['holiday'] = np.where(df['type'] == 'Not a Workday', 1, 0)
29
+
30
+ # Drop the columns 'type' and 'date_' to finalize the calender dataframe
31
+ calendar = df.drop(['type','date_'], axis=1)
32
+
33
+ # Return the DataFrame with weather data
34
+ return calendar
features/electricity_prices.py CHANGED
@@ -31,7 +31,10 @@ def electricity_prices(historical: bool = False, area: list = None, start: str =
31
 
32
  # Format date and time
33
  df["date"] = df["HourDK"].map(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S').strftime("%Y-%m-%d"))
34
- df['time'] = pd.to_datetime(df['HourDK'])
 
 
 
35
 
36
  # Dicide the price to KWH
37
  df['SpotPriceDKK_KWH'] = df['SpotPriceDKK'] / 1000
@@ -53,16 +56,16 @@ def electricity_prices(historical: bool = False, area: list = None, start: str =
53
  filtered_df = filtered_df[filtered_df.date == today]
54
 
55
  # Convert datetime to timestamp in milliseconds and add it as a new column
56
- filtered_df["timestamp"] = filtered_df["time"].apply(lambda x: int(x.timestamp() * 1000))
57
 
58
  # Reset the index to avoid duplicate entries
59
  filtered_df.reset_index(drop=True, inplace=True)
60
 
61
  # Select relevant columns for weather data and reorder them
62
- reordered_df = filtered_df[['timestamp', 'date', 'time', 'PriceArea', 'SpotPriceDKK_KWH']]
63
 
64
  # Unpivot DataFrame
65
- reordered_df = reordered_df.melt(id_vars=["timestamp", "time", "date", "PriceArea"], var_name="attribute", value_name="value")
66
 
67
  # Combine columns into a single "heading" column
68
  reordered_df["heading"] = reordered_df["PriceArea"] + "_" + reordered_df["attribute"]
@@ -72,7 +75,7 @@ def electricity_prices(historical: bool = False, area: list = None, start: str =
72
  reordered_df.drop(columns=["attribute"], inplace=True)
73
 
74
  # Pivot DataFrame
75
- electricity_prices = reordered_df.pivot_table(index=["timestamp", "time", "date"], columns="heading", values="value").reset_index()
76
 
77
  # Converting column names to lowercase for consistency
78
  electricity_prices.columns = list(map(str.lower, electricity_prices.columns))
@@ -109,7 +112,9 @@ def forecast_renewable_energy(historical: bool = False, area: str = None, start:
109
 
110
  # Format date and time
111
  df["date"] = df["HourDK"].map(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S').strftime("%Y-%m-%d"))
112
- df['time'] = pd.to_datetime(df['HourDK'])
 
 
113
 
114
  # Drop unnecessary columns
115
  df.drop('Forecast5Hour', axis=1, inplace=True)
@@ -135,7 +140,7 @@ def forecast_renewable_energy(historical: bool = False, area: str = None, start:
135
  filtered_df = filtered_df[df.date == today]
136
 
137
  # Convert datetime to timestamp in milliseconds and add it as a new column
138
- filtered_df["timestamp"] = filtered_df["time"].apply(lambda x: int(x.timestamp() * 1000))
139
 
140
  # Divide specified columns by 1000
141
  filtered_df["ForecastIntraday_KWH"] = filtered_df["ForecastIntraday"] / 1000
@@ -147,10 +152,10 @@ def forecast_renewable_energy(historical: bool = False, area: str = None, start:
147
  filtered_df.reset_index(drop=True, inplace=True)
148
 
149
  # Select relevant columns for weather data and reorder them
150
- reordered_df = filtered_df[['timestamp', 'date', 'time', 'PriceArea', 'ForecastType', 'ForecastIntraday_KWH']]
151
 
152
  # Unpivot DataFrame
153
- reordered_df = reordered_df.melt(id_vars=["timestamp", "time", "date", "PriceArea", "ForecastType"], var_name="attribute", value_name="value")
154
 
155
  # Combine columns into a single "heading" column
156
  reordered_df["heading"] = reordered_df["PriceArea"] + "_" + reordered_df["ForecastType"] + "_" + reordered_df["attribute"]
@@ -161,7 +166,7 @@ def forecast_renewable_energy(historical: bool = False, area: str = None, start:
161
  reordered_df.drop(columns=["attribute"], inplace=True)
162
 
163
  # Pivot DataFrame
164
- forecast_renewable_energy = reordered_df.pivot_table(index=["timestamp", "time", "date"], columns="heading", values="value").reset_index()
165
 
166
  # Converting column names to lowercase for consistency
167
  forecast_renewable_energy.columns = list(map(str.lower, forecast_renewable_energy.columns))
 
31
 
32
  # Format date and time
33
  df["date"] = df["HourDK"].map(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S').strftime("%Y-%m-%d"))
34
+ df['datetime'] = pd.to_datetime(df['HourDK'])
35
+ # df['time'] = pd.to_datetime(df['datetime']).dt.time
36
+ df['hour'] = pd.to_datetime(df['datetime']).dt.hour
37
+
38
 
39
  # Dicide the price to KWH
40
  df['SpotPriceDKK_KWH'] = df['SpotPriceDKK'] / 1000
 
56
  filtered_df = filtered_df[filtered_df.date == today]
57
 
58
  # Convert datetime to timestamp in milliseconds and add it as a new column
59
+ filtered_df["timestamp"] = filtered_df["datetime"].apply(lambda x: int(x.timestamp() * 1000))
60
 
61
  # Reset the index to avoid duplicate entries
62
  filtered_df.reset_index(drop=True, inplace=True)
63
 
64
  # Select relevant columns for weather data and reorder them
65
+ reordered_df = filtered_df[['timestamp', 'datetime', 'date', 'hour', 'PriceArea', 'SpotPriceDKK_KWH']]
66
 
67
  # Unpivot DataFrame
68
+ reordered_df = reordered_df.melt(id_vars=['timestamp', 'datetime', 'date', 'hour', "PriceArea"], var_name="attribute", value_name="value")
69
 
70
  # Combine columns into a single "heading" column
71
  reordered_df["heading"] = reordered_df["PriceArea"] + "_" + reordered_df["attribute"]
 
75
  reordered_df.drop(columns=["attribute"], inplace=True)
76
 
77
  # Pivot DataFrame
78
+ electricity_prices = reordered_df.pivot_table(index=['timestamp', 'datetime', 'date', 'hour'], columns="heading", values="value").reset_index()
79
 
80
  # Converting column names to lowercase for consistency
81
  electricity_prices.columns = list(map(str.lower, electricity_prices.columns))
 
112
 
113
  # Format date and time
114
  df["date"] = df["HourDK"].map(lambda x: datetime.strptime(x, '%Y-%m-%dT%H:%M:%S').strftime("%Y-%m-%d"))
115
+ df['datetime'] = pd.to_datetime(df['HourDK'])
116
+ # df['time'] = pd.to_datetime(df['datetime']).dt.time
117
+ df['hour'] = pd.to_datetime(df['datetime']).dt.hour
118
 
119
  # Drop unnecessary columns
120
  df.drop('Forecast5Hour', axis=1, inplace=True)
 
140
  filtered_df = filtered_df[df.date == today]
141
 
142
  # Convert datetime to timestamp in milliseconds and add it as a new column
143
+ filtered_df["timestamp"] = filtered_df["datetime"].apply(lambda x: int(x.timestamp() * 1000))
144
 
145
  # Divide specified columns by 1000
146
  filtered_df["ForecastIntraday_KWH"] = filtered_df["ForecastIntraday"] / 1000
 
152
  filtered_df.reset_index(drop=True, inplace=True)
153
 
154
  # Select relevant columns for weather data and reorder them
155
+ reordered_df = filtered_df[['timestamp', 'datetime', 'date', 'hour', 'PriceArea', 'ForecastType', 'ForecastIntraday_KWH']]
156
 
157
  # Unpivot DataFrame
158
+ reordered_df = reordered_df.melt(id_vars=["timestamp", 'datetime', "date", "hour", "PriceArea", "ForecastType"], var_name="attribute", value_name="value")
159
 
160
  # Combine columns into a single "heading" column
161
  reordered_df["heading"] = reordered_df["PriceArea"] + "_" + reordered_df["ForecastType"] + "_" + reordered_df["attribute"]
 
166
  reordered_df.drop(columns=["attribute"], inplace=True)
167
 
168
  # Pivot DataFrame
169
+ forecast_renewable_energy = reordered_df.pivot_table(index=["timestamp", "datetime", "date", "hour"], columns="heading", values="value").reset_index()
170
 
171
  # Converting column names to lowercase for consistency
172
  forecast_renewable_energy.columns = list(map(str.lower, forecast_renewable_energy.columns))
features/weather_measures.py CHANGED
@@ -32,8 +32,9 @@ def historical_weather_measures(historical: bool = False, lat: float = 57.048, l
32
 
33
  # Extract date from the 'time' column and convert it to datetime format
34
  df["date"] = df['time'].str[:10]
35
- df['time'] = pd.to_datetime(df['time'])
36
-
 
37
 
38
  # Filter the DataFrame based on whether historical data is requested or not
39
  today = (date.today()).strftime("%Y-%m-%d")
@@ -43,10 +44,10 @@ def historical_weather_measures(historical: bool = False, lat: float = 57.048, l
43
  df = df[df.date == today]
44
 
45
  # Convert datetime to timestamp in milliseconds and add it as a new column
46
- df["timestamp"] = df["time"].apply(lambda x: int(x.timestamp() * 1000))
47
 
48
  # Select relevant columns for weather data and reorder them
49
- weather = df[['timestamp', 'date', 'time', 'temperature_2m', 'relative_humidity_2m', 'precipitation', 'rain', 'snowfall', 'weather_code', 'cloud_cover', 'wind_speed_10m', 'wind_gusts_10m']]
50
 
51
  # Deleting rows with missing values
52
  weather = weather.dropna()
@@ -79,13 +80,20 @@ def forecast_weather_measures(lat: float = 57.048, lon: float = 9.9187, forecast
79
 
80
  # Extract date from the 'time' column and convert it to datetime format
81
  df["date"] = df['time'].str[:10]
82
- df['time'] = pd.to_datetime(df['time'])
 
 
83
 
84
  # Convert datetime to timestamp in milliseconds and add it as a new column
85
- df["timestamp"] = df["time"].apply(lambda x: int(x.timestamp() * 1000))
86
 
87
  # Select relevant columns for forecast weather data and reorder them
88
- forecast_weather = df[['timestamp', 'date', 'time', 'temperature_2m', 'relative_humidity_2m', 'precipitation', 'rain', 'snowfall', 'weather_code', 'cloud_cover', 'wind_speed_10m', 'wind_gusts_10m']]
 
 
 
 
 
89
 
90
  # Deleting rows with missing values
91
  forecast_weather = forecast_weather.dropna()
 
32
 
33
  # Extract date from the 'time' column and convert it to datetime format
34
  df["date"] = df['time'].str[:10]
35
+ df['datetime'] = pd.to_datetime(df['time'])
36
+ # df['time'] = pd.to_datetime(df['datetime']).dt.time
37
+ df['hour'] = pd.to_datetime(df['datetime']).dt.hour
38
 
39
  # Filter the DataFrame based on whether historical data is requested or not
40
  today = (date.today()).strftime("%Y-%m-%d")
 
44
  df = df[df.date == today]
45
 
46
  # Convert datetime to timestamp in milliseconds and add it as a new column
47
+ df["timestamp"] = df["datetime"].apply(lambda x: int(x.timestamp() * 1000))
48
 
49
  # Select relevant columns for weather data and reorder them
50
+ weather = df[['timestamp', 'datetime', 'date', 'hour', 'temperature_2m', 'relative_humidity_2m', 'precipitation', 'rain', 'snowfall', 'weather_code', 'cloud_cover', 'wind_speed_10m', 'wind_gusts_10m']]
51
 
52
  # Deleting rows with missing values
53
  weather = weather.dropna()
 
80
 
81
  # Extract date from the 'time' column and convert it to datetime format
82
  df["date"] = df['time'].str[:10]
83
+ df['datetime'] = pd.to_datetime(df['time'])
84
+ # df['time'] = pd.to_datetime(df['datetime']).dt.time
85
+ df['hour'] = pd.to_datetime(df['datetime']).dt.hour
86
 
87
  # Convert datetime to timestamp in milliseconds and add it as a new column
88
+ df["timestamp"] = df["datetime"].apply(lambda x: int(x.timestamp() * 1000))
89
 
90
  # Select relevant columns for forecast weather data and reorder them
91
+ forecast_weather = df[['timestamp', 'datetime', 'date', 'hour', 'temperature_2m', 'relative_humidity_2m', 'precipitation', 'rain', 'snowfall', 'weather_code', 'cloud_cover', 'wind_speed_10m', 'wind_gusts_10m']]
92
+
93
+ # Convert columns to float
94
+ forecast_weather['relative_humidity_2m'] = forecast_weather['relative_humidity_2m'].astype(float)
95
+ forecast_weather['weather_code'] = forecast_weather['weather_code'].astype(float)
96
+ forecast_weather['cloud_cover'] = forecast_weather['cloud_cover'].astype(float)
97
 
98
  # Deleting rows with missing values
99
  forecast_weather = forecast_weather.dropna()
{Old → hide/Old}/1_feature_backfill_OLD.ipynb RENAMED
File without changes
{Old → hide/Old}/2_feature_pipeline_OLD.ipynb RENAMED
File without changes
hide/Old/3_training_pipeline copy.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
{Old → hide/Old}/3_training_pipeline_OLD.ipynb RENAMED
File without changes
{Old → hide/Old}/4_batch_inference_OLD.ipynb RENAMED
File without changes
{notebooks → hide/Old}/predict_example.py RENAMED
File without changes
{notebooks_dev → hide/notebooks_dev}/3_training_pipeline_dev_prophet.ipynb RENAMED
File without changes
{notebooks_dev → hide/notebooks_dev}/3_training_pipeline_dev_pytorch.ipynb RENAMED
File without changes
{notebooks_dev → hide/notebooks_dev}/3_training_pipeline_dev_tensorflow.ipynb RENAMED
File without changes
{notebooks_dev → hide/notebooks_dev}/3_training_pipeline_dev_windowtensor.ipynb RENAMED
File without changes
notebooks/1_feature_backfill.ipynb CHANGED
@@ -56,7 +56,7 @@
56
  "\n",
57
  "# Now we import the functions from the features folder\n",
58
  "# This is the functions we have created to generate features for electricity prices and weather measures\n",
59
- "from features import electricity_prices, weather_measures \n",
60
  "\n",
61
  "# We go back into the notebooks folder\n",
62
  "%cd notebooks"
@@ -70,6 +70,7 @@
70
  "source": [
71
  "# Importing the packages for the needed libraries for the Jupyter notebook\n",
72
  "import pandas as pd\n",
 
73
  "import requests\n",
74
  "from datetime import datetime, timedelta\n",
75
  "\n",
@@ -146,8 +147,9 @@
146
  " <tr style=\"text-align: right;\">\n",
147
  " <th></th>\n",
148
  " <th>timestamp</th>\n",
149
- " <th>time</th>\n",
150
  " <th>date</th>\n",
 
151
  " <th>dk1_spotpricedkk_kwh</th>\n",
152
  " </tr>\n",
153
  " </thead>\n",
@@ -157,6 +159,7 @@
157
  " <td>1640995200000</td>\n",
158
  " <td>2022-01-01 00:00:00</td>\n",
159
  " <td>2022-01-01</td>\n",
 
160
  " <td>0.37220</td>\n",
161
  " </tr>\n",
162
  " <tr>\n",
@@ -164,6 +167,7 @@
164
  " <td>1640998800000</td>\n",
165
  " <td>2022-01-01 01:00:00</td>\n",
166
  " <td>2022-01-01</td>\n",
 
167
  " <td>0.30735</td>\n",
168
  " </tr>\n",
169
  " <tr>\n",
@@ -171,6 +175,7 @@
171
  " <td>1641002400000</td>\n",
172
  " <td>2022-01-01 02:00:00</td>\n",
173
  " <td>2022-01-01</td>\n",
 
174
  " <td>0.32141</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
@@ -178,6 +183,7 @@
178
  " <td>1641006000000</td>\n",
179
  " <td>2022-01-01 03:00:00</td>\n",
180
  " <td>2022-01-01</td>\n",
 
181
  " <td>0.33806</td>\n",
182
  " </tr>\n",
183
  " <tr>\n",
@@ -185,6 +191,7 @@
185
  " <td>1641009600000</td>\n",
186
  " <td>2022-01-01 04:00:00</td>\n",
187
  " <td>2022-01-01</td>\n",
 
188
  " <td>0.28013</td>\n",
189
  " </tr>\n",
190
  " </tbody>\n",
@@ -192,12 +199,12 @@
192
  "</div>"
193
  ],
194
  "text/plain": [
195
- " timestamp time date dk1_spotpricedkk_kwh\n",
196
- "0 1640995200000 2022-01-01 00:00:00 2022-01-01 0.37220\n",
197
- "1 1640998800000 2022-01-01 01:00:00 2022-01-01 0.30735\n",
198
- "2 1641002400000 2022-01-01 02:00:00 2022-01-01 0.32141\n",
199
- "3 1641006000000 2022-01-01 03:00:00 2022-01-01 0.33806\n",
200
- "4 1641009600000 2022-01-01 04:00:00 2022-01-01 0.28013"
201
  ]
202
  },
203
  "execution_count": 5,
@@ -237,58 +244,71 @@
237
  " <tr style=\"text-align: right;\">\n",
238
  " <th></th>\n",
239
  " <th>timestamp</th>\n",
240
- " <th>time</th>\n",
241
  " <th>date</th>\n",
 
242
  " <th>dk1_spotpricedkk_kwh</th>\n",
243
  " </tr>\n",
244
  " </thead>\n",
245
  " <tbody>\n",
246
  " <tr>\n",
247
- " <th>20416</th>\n",
248
- " <td>1714503600000</td>\n",
249
- " <td>2024-04-30 19:00:00</td>\n",
250
- " <td>2024-04-30</td>\n",
251
- " <td>0.48640</td>\n",
 
252
  " </tr>\n",
253
  " <tr>\n",
254
- " <th>20417</th>\n",
255
- " <td>1714507200000</td>\n",
256
- " <td>2024-04-30 20:00:00</td>\n",
257
- " <td>2024-04-30</td>\n",
258
- " <td>0.48275</td>\n",
 
259
  " </tr>\n",
260
  " <tr>\n",
261
- " <th>20418</th>\n",
262
- " <td>1714510800000</td>\n",
263
- " <td>2024-04-30 21:00:00</td>\n",
264
- " <td>2024-04-30</td>\n",
265
- " <td>0.49259</td>\n",
 
266
  " </tr>\n",
267
  " <tr>\n",
268
- " <th>20419</th>\n",
269
- " <td>1714514400000</td>\n",
270
- " <td>2024-04-30 22:00:00</td>\n",
271
- " <td>2024-04-30</td>\n",
272
- " <td>0.40340</td>\n",
 
273
  " </tr>\n",
274
  " <tr>\n",
275
- " <th>20420</th>\n",
276
- " <td>1714518000000</td>\n",
277
- " <td>2024-04-30 23:00:00</td>\n",
278
- " <td>2024-04-30</td>\n",
279
- " <td>0.38438</td>\n",
 
280
  " </tr>\n",
281
  " </tbody>\n",
282
  "</table>\n",
283
  "</div>"
284
  ],
285
  "text/plain": [
286
- " timestamp time date dk1_spotpricedkk_kwh\n",
287
- "20416 1714503600000 2024-04-30 19:00:00 2024-04-30 0.48640\n",
288
- "20417 1714507200000 2024-04-30 20:00:00 2024-04-30 0.48275\n",
289
- "20418 1714510800000 2024-04-30 21:00:00 2024-04-30 0.49259\n",
290
- "20419 1714514400000 2024-04-30 22:00:00 2024-04-30 0.40340\n",
291
- "20420 1714518000000 2024-04-30 23:00:00 2024-04-30 0.38438"
 
 
 
 
 
 
 
292
  ]
293
  },
294
  "execution_count": 6,
@@ -311,16 +331,17 @@
311
  "output_type": "stream",
312
  "text": [
313
  "<class 'pandas.core.frame.DataFrame'>\n",
314
- "RangeIndex: 20421 entries, 0 to 20420\n",
315
- "Data columns (total 4 columns):\n",
316
  " # Column Non-Null Count Dtype \n",
317
  "--- ------ -------------- ----- \n",
318
- " 0 timestamp 20421 non-null int64 \n",
319
- " 1 time 20421 non-null datetime64[ns]\n",
320
- " 2 date 20421 non-null object \n",
321
- " 3 dk1_spotpricedkk_kwh 20421 non-null float64 \n",
322
- "dtypes: datetime64[ns](1), float64(1), int64(1), object(1)\n",
323
- "memory usage: 638.3+ KB\n"
 
324
  ]
325
  }
326
  ],
@@ -381,8 +402,9 @@
381
  " <tr style=\"text-align: right;\">\n",
382
  " <th></th>\n",
383
  " <th>timestamp</th>\n",
384
- " <th>time</th>\n",
385
  " <th>date</th>\n",
 
386
  " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
387
  " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
388
  " <th>dk1_solar_forecastintraday_kwh</th>\n",
@@ -394,6 +416,7 @@
394
  " <td>1641024000000</td>\n",
395
  " <td>2022-01-01 08:00:00</td>\n",
396
  " <td>2022-01-01</td>\n",
 
397
  " <td>0.611708</td>\n",
398
  " <td>0.236792</td>\n",
399
  " <td>0.000050</td>\n",
@@ -403,6 +426,7 @@
403
  " <td>1641027600000</td>\n",
404
  " <td>2022-01-01 09:00:00</td>\n",
405
  " <td>2022-01-01</td>\n",
 
406
  " <td>0.459708</td>\n",
407
  " <td>0.196667</td>\n",
408
  " <td>0.004841</td>\n",
@@ -412,6 +436,7 @@
412
  " <td>1641031200000</td>\n",
413
  " <td>2022-01-01 10:00:00</td>\n",
414
  " <td>2022-01-01</td>\n",
 
415
  " <td>0.310375</td>\n",
416
  " <td>0.178500</td>\n",
417
  " <td>0.020353</td>\n",
@@ -421,6 +446,7 @@
421
  " <td>1641034800000</td>\n",
422
  " <td>2022-01-01 11:00:00</td>\n",
423
  " <td>2022-01-01</td>\n",
 
424
  " <td>0.320750</td>\n",
425
  " <td>0.201125</td>\n",
426
  " <td>0.035719</td>\n",
@@ -430,6 +456,7 @@
430
  " <td>1641038400000</td>\n",
431
  " <td>2022-01-01 12:00:00</td>\n",
432
  " <td>2022-01-01</td>\n",
 
433
  " <td>0.355667</td>\n",
434
  " <td>0.277667</td>\n",
435
  " <td>0.038027</td>\n",
@@ -439,12 +466,12 @@
439
  "</div>"
440
  ],
441
  "text/plain": [
442
- " timestamp time date \\\n",
443
- "0 1641024000000 2022-01-01 08:00:00 2022-01-01 \n",
444
- "1 1641027600000 2022-01-01 09:00:00 2022-01-01 \n",
445
- "2 1641031200000 2022-01-01 10:00:00 2022-01-01 \n",
446
- "3 1641034800000 2022-01-01 11:00:00 2022-01-01 \n",
447
- "4 1641038400000 2022-01-01 12:00:00 2022-01-01 \n",
448
  "\n",
449
  " dk1_offshore_wind_forecastintraday_kwh \\\n",
450
  "0 0.611708 \n",
@@ -498,8 +525,9 @@
498
  " <tr style=\"text-align: right;\">\n",
499
  " <th></th>\n",
500
  " <th>timestamp</th>\n",
501
- " <th>time</th>\n",
502
  " <th>date</th>\n",
 
503
  " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
504
  " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
505
  " <th>dk1_solar_forecastintraday_kwh</th>\n",
@@ -507,48 +535,53 @@
507
  " </thead>\n",
508
  " <tbody>\n",
509
  " <tr>\n",
510
- " <th>14402</th>\n",
511
- " <td>1714503600000</td>\n",
512
- " <td>2024-04-30 19:00:00</td>\n",
513
- " <td>2024-04-30</td>\n",
514
- " <td>0.655292</td>\n",
515
- " <td>0.998375</td>\n",
516
- " <td>0.163898</td>\n",
 
517
  " </tr>\n",
518
  " <tr>\n",
519
- " <th>14403</th>\n",
520
- " <td>1714507200000</td>\n",
521
- " <td>2024-04-30 20:00:00</td>\n",
522
- " <td>2024-04-30</td>\n",
523
- " <td>0.674583</td>\n",
524
- " <td>1.042500</td>\n",
525
- " <td>0.028304</td>\n",
 
526
  " </tr>\n",
527
  " <tr>\n",
528
- " <th>14404</th>\n",
529
- " <td>1714510800000</td>\n",
530
- " <td>2024-04-30 21:00:00</td>\n",
531
- " <td>2024-04-30</td>\n",
532
- " <td>0.707333</td>\n",
533
- " <td>1.188708</td>\n",
534
- " <td>0.001001</td>\n",
 
535
  " </tr>\n",
536
  " <tr>\n",
537
- " <th>14405</th>\n",
538
- " <td>1714514400000</td>\n",
539
- " <td>2024-04-30 22:00:00</td>\n",
540
- " <td>2024-04-30</td>\n",
541
- " <td>0.728000</td>\n",
542
- " <td>1.326250</td>\n",
 
543
  " <td>0.000000</td>\n",
544
  " </tr>\n",
545
  " <tr>\n",
546
- " <th>14406</th>\n",
547
- " <td>1714518000000</td>\n",
548
- " <td>2024-04-30 23:00:00</td>\n",
549
- " <td>2024-04-30</td>\n",
550
- " <td>0.727333</td>\n",
551
- " <td>1.366417</td>\n",
 
552
  " <td>0.000000</td>\n",
553
  " </tr>\n",
554
  " </tbody>\n",
@@ -556,26 +589,26 @@
556
  "</div>"
557
  ],
558
  "text/plain": [
559
- " timestamp time date \\\n",
560
- "14402 1714503600000 2024-04-30 19:00:00 2024-04-30 \n",
561
- "14403 1714507200000 2024-04-30 20:00:00 2024-04-30 \n",
562
- "14404 1714510800000 2024-04-30 21:00:00 2024-04-30 \n",
563
- "14405 1714514400000 2024-04-30 22:00:00 2024-04-30 \n",
564
- "14406 1714518000000 2024-04-30 23:00:00 2024-04-30 \n",
565
  "\n",
566
  " dk1_offshore_wind_forecastintraday_kwh \\\n",
567
- "14402 0.655292 \n",
568
- "14403 0.674583 \n",
569
- "14404 0.707333 \n",
570
- "14405 0.728000 \n",
571
- "14406 0.727333 \n",
572
  "\n",
573
  " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
574
- "14402 0.998375 0.163898 \n",
575
- "14403 1.042500 0.028304 \n",
576
- "14404 1.188708 0.001001 \n",
577
- "14405 1.326250 0.000000 \n",
578
- "14406 1.366417 0.000000 "
579
  ]
580
  },
581
  "execution_count": 10,
@@ -598,18 +631,19 @@
598
  "output_type": "stream",
599
  "text": [
600
  "<class 'pandas.core.frame.DataFrame'>\n",
601
- "RangeIndex: 14407 entries, 0 to 14406\n",
602
- "Data columns (total 6 columns):\n",
603
  " # Column Non-Null Count Dtype \n",
604
  "--- ------ -------------- ----- \n",
605
- " 0 timestamp 14407 non-null int64 \n",
606
- " 1 time 14407 non-null datetime64[ns]\n",
607
- " 2 date 14407 non-null object \n",
608
- " 3 dk1_offshore_wind_forecastintraday_kwh 14391 non-null float64 \n",
609
- " 4 dk1_onshore_wind_forecastintraday_kwh 14391 non-null float64 \n",
610
- " 5 dk1_solar_forecastintraday_kwh 14391 non-null float64 \n",
611
- "dtypes: datetime64[ns](1), float64(3), int64(1), object(1)\n",
612
- "memory usage: 675.5+ KB\n"
 
613
  ]
614
  }
615
  ],
@@ -676,8 +710,9 @@
676
  " <tr style=\"text-align: right;\">\n",
677
  " <th></th>\n",
678
  " <th>timestamp</th>\n",
 
679
  " <th>date</th>\n",
680
- " <th>time</th>\n",
681
  " <th>temperature_2m</th>\n",
682
  " <th>relative_humidity_2m</th>\n",
683
  " <th>precipitation</th>\n",
@@ -693,8 +728,9 @@
693
  " <tr>\n",
694
  " <th>0</th>\n",
695
  " <td>1640995200000</td>\n",
696
- " <td>2022-01-01</td>\n",
697
  " <td>2022-01-01 00:00:00</td>\n",
 
 
698
  " <td>6.7</td>\n",
699
  " <td>100.0</td>\n",
700
  " <td>0.0</td>\n",
@@ -708,8 +744,9 @@
708
  " <tr>\n",
709
  " <th>1</th>\n",
710
  " <td>1640998800000</td>\n",
711
- " <td>2022-01-01</td>\n",
712
  " <td>2022-01-01 01:00:00</td>\n",
 
 
713
  " <td>6.6</td>\n",
714
  " <td>100.0</td>\n",
715
  " <td>0.0</td>\n",
@@ -723,8 +760,9 @@
723
  " <tr>\n",
724
  " <th>2</th>\n",
725
  " <td>1641002400000</td>\n",
726
- " <td>2022-01-01</td>\n",
727
  " <td>2022-01-01 02:00:00</td>\n",
 
 
728
  " <td>6.7</td>\n",
729
  " <td>99.0</td>\n",
730
  " <td>0.0</td>\n",
@@ -738,8 +776,9 @@
738
  " <tr>\n",
739
  " <th>3</th>\n",
740
  " <td>1641006000000</td>\n",
741
- " <td>2022-01-01</td>\n",
742
  " <td>2022-01-01 03:00:00</td>\n",
 
 
743
  " <td>6.7</td>\n",
744
  " <td>100.0</td>\n",
745
  " <td>0.0</td>\n",
@@ -753,8 +792,9 @@
753
  " <tr>\n",
754
  " <th>4</th>\n",
755
  " <td>1641009600000</td>\n",
756
- " <td>2022-01-01</td>\n",
757
  " <td>2022-01-01 04:00:00</td>\n",
 
 
758
  " <td>6.7</td>\n",
759
  " <td>99.0</td>\n",
760
  " <td>0.0</td>\n",
@@ -770,12 +810,12 @@
770
  "</div>"
771
  ],
772
  "text/plain": [
773
- " timestamp date time temperature_2m \\\n",
774
- "0 1640995200000 2022-01-01 2022-01-01 00:00:00 6.7 \n",
775
- "1 1640998800000 2022-01-01 2022-01-01 01:00:00 6.6 \n",
776
- "2 1641002400000 2022-01-01 2022-01-01 02:00:00 6.7 \n",
777
- "3 1641006000000 2022-01-01 2022-01-01 03:00:00 6.7 \n",
778
- "4 1641009600000 2022-01-01 2022-01-01 04:00:00 6.7 \n",
779
  "\n",
780
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
781
  "0 100.0 0.0 0.0 0.0 3.0 \n",
@@ -829,8 +869,9 @@
829
  " <tr style=\"text-align: right;\">\n",
830
  " <th></th>\n",
831
  " <th>timestamp</th>\n",
 
832
  " <th>date</th>\n",
833
- " <th>time</th>\n",
834
  " <th>temperature_2m</th>\n",
835
  " <th>relative_humidity_2m</th>\n",
836
  " <th>precipitation</th>\n",
@@ -844,105 +885,110 @@
844
  " </thead>\n",
845
  " <tbody>\n",
846
  " <tr>\n",
847
- " <th>20395</th>\n",
848
- " <td>1714417200000</td>\n",
849
- " <td>2024-04-29</td>\n",
850
- " <td>2024-04-29 19:00:00</td>\n",
851
- " <td>10.6</td>\n",
852
- " <td>76.0</td>\n",
 
853
  " <td>0.0</td>\n",
854
  " <td>0.0</td>\n",
855
  " <td>0.0</td>\n",
856
  " <td>0.0</td>\n",
857
- " <td>1.0</td>\n",
858
- " <td>12.9</td>\n",
859
- " <td>21.2</td>\n",
860
  " </tr>\n",
861
  " <tr>\n",
862
- " <th>20396</th>\n",
863
- " <td>1714420800000</td>\n",
864
- " <td>2024-04-29</td>\n",
865
- " <td>2024-04-29 20:00:00</td>\n",
866
- " <td>9.7</td>\n",
867
- " <td>80.0</td>\n",
868
- " <td>0.0</td>\n",
869
  " <td>0.0</td>\n",
870
  " <td>0.0</td>\n",
871
  " <td>0.0</td>\n",
872
  " <td>0.0</td>\n",
873
- " <td>13.4</td>\n",
874
- " <td>23.8</td>\n",
 
875
  " </tr>\n",
876
  " <tr>\n",
877
- " <th>20397</th>\n",
878
- " <td>1714424400000</td>\n",
879
- " <td>2024-04-29</td>\n",
880
- " <td>2024-04-29 21:00:00</td>\n",
881
- " <td>9.0</td>\n",
882
- " <td>82.0</td>\n",
883
- " <td>0.0</td>\n",
884
  " <td>0.0</td>\n",
885
  " <td>0.0</td>\n",
886
  " <td>0.0</td>\n",
887
  " <td>0.0</td>\n",
888
- " <td>11.3</td>\n",
889
- " <td>22.7</td>\n",
 
890
  " </tr>\n",
891
  " <tr>\n",
892
- " <th>20398</th>\n",
893
- " <td>1714428000000</td>\n",
894
- " <td>2024-04-29</td>\n",
895
- " <td>2024-04-29 22:00:00</td>\n",
896
- " <td>8.6</td>\n",
897
- " <td>83.0</td>\n",
898
- " <td>0.0</td>\n",
899
  " <td>0.0</td>\n",
900
  " <td>0.0</td>\n",
901
  " <td>0.0</td>\n",
902
  " <td>0.0</td>\n",
903
- " <td>11.2</td>\n",
904
- " <td>19.1</td>\n",
 
905
  " </tr>\n",
906
  " <tr>\n",
907
- " <th>20399</th>\n",
908
- " <td>1714431600000</td>\n",
909
- " <td>2024-04-29</td>\n",
910
- " <td>2024-04-29 23:00:00</td>\n",
911
- " <td>8.1</td>\n",
912
- " <td>86.0</td>\n",
913
- " <td>0.0</td>\n",
914
  " <td>0.0</td>\n",
915
  " <td>0.0</td>\n",
916
  " <td>0.0</td>\n",
917
  " <td>0.0</td>\n",
918
- " <td>12.0</td>\n",
919
- " <td>20.2</td>\n",
 
920
  " </tr>\n",
921
  " </tbody>\n",
922
  "</table>\n",
923
  "</div>"
924
  ],
925
  "text/plain": [
926
- " timestamp date time temperature_2m \\\n",
927
- "20395 1714417200000 2024-04-29 2024-04-29 19:00:00 10.6 \n",
928
- "20396 1714420800000 2024-04-29 2024-04-29 20:00:00 9.7 \n",
929
- "20397 1714424400000 2024-04-29 2024-04-29 21:00:00 9.0 \n",
930
- "20398 1714428000000 2024-04-29 2024-04-29 22:00:00 8.6 \n",
931
- "20399 1714431600000 2024-04-29 2024-04-29 23:00:00 8.1 \n",
932
  "\n",
933
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
934
- "20395 76.0 0.0 0.0 0.0 0.0 \n",
935
- "20396 80.0 0.0 0.0 0.0 0.0 \n",
936
- "20397 82.0 0.0 0.0 0.0 0.0 \n",
937
- "20398 83.0 0.0 0.0 0.0 0.0 \n",
938
- "20399 86.0 0.0 0.0 0.0 0.0 \n",
939
  "\n",
940
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
941
- "20395 1.0 12.9 21.2 \n",
942
- "20396 0.0 13.4 23.8 \n",
943
- "20397 0.0 11.3 22.7 \n",
944
- "20398 0.0 11.2 19.1 \n",
945
- "20399 0.0 12.0 20.2 "
946
  ]
947
  },
948
  "execution_count": 14,
@@ -965,24 +1011,25 @@
965
  "output_type": "stream",
966
  "text": [
967
  "<class 'pandas.core.frame.DataFrame'>\n",
968
- "Int64Index: 20400 entries, 0 to 20399\n",
969
- "Data columns (total 12 columns):\n",
970
  " # Column Non-Null Count Dtype \n",
971
  "--- ------ -------------- ----- \n",
972
- " 0 timestamp 20400 non-null int64 \n",
973
- " 1 date 20400 non-null object \n",
974
- " 2 time 20400 non-null datetime64[ns]\n",
975
- " 3 temperature_2m 20400 non-null float64 \n",
976
- " 4 relative_humidity_2m 20400 non-null float64 \n",
977
- " 5 precipitation 20400 non-null float64 \n",
978
- " 6 rain 20400 non-null float64 \n",
979
- " 7 snowfall 20400 non-null float64 \n",
980
- " 8 weather_code 20400 non-null float64 \n",
981
- " 9 cloud_cover 20400 non-null float64 \n",
982
- " 10 wind_speed_10m 20400 non-null float64 \n",
983
- " 11 wind_gusts_10m 20400 non-null float64 \n",
984
- "dtypes: datetime64[ns](1), float64(9), int64(1), object(1)\n",
985
- "memory usage: 2.0+ MB\n"
 
986
  ]
987
  }
988
  ],
@@ -1038,8 +1085,9 @@
1038
  " <tr style=\"text-align: right;\">\n",
1039
  " <th></th>\n",
1040
  " <th>timestamp</th>\n",
 
1041
  " <th>date</th>\n",
1042
- " <th>time</th>\n",
1043
  " <th>temperature_2m</th>\n",
1044
  " <th>relative_humidity_2m</th>\n",
1045
  " <th>precipitation</th>\n",
@@ -1054,104 +1102,109 @@
1054
  " <tbody>\n",
1055
  " <tr>\n",
1056
  " <th>0</th>\n",
1057
- " <td>1714521600000</td>\n",
1058
- " <td>2024-05-01</td>\n",
1059
- " <td>2024-05-01 00:00:00</td>\n",
1060
- " <td>13.4</td>\n",
1061
- " <td>70</td>\n",
 
1062
  " <td>0.0</td>\n",
1063
  " <td>0.0</td>\n",
1064
  " <td>0.0</td>\n",
1065
- " <td>1</td>\n",
1066
- " <td>46</td>\n",
1067
- " <td>20.9</td>\n",
1068
- " <td>36.4</td>\n",
1069
  " </tr>\n",
1070
  " <tr>\n",
1071
  " <th>1</th>\n",
1072
- " <td>1714525200000</td>\n",
1073
- " <td>2024-05-01</td>\n",
1074
- " <td>2024-05-01 01:00:00</td>\n",
1075
- " <td>12.6</td>\n",
1076
- " <td>73</td>\n",
 
1077
  " <td>0.0</td>\n",
1078
  " <td>0.0</td>\n",
1079
  " <td>0.0</td>\n",
1080
- " <td>0</td>\n",
1081
- " <td>18</td>\n",
1082
- " <td>18.0</td>\n",
1083
- " <td>35.6</td>\n",
1084
  " </tr>\n",
1085
  " <tr>\n",
1086
  " <th>2</th>\n",
1087
- " <td>1714528800000</td>\n",
1088
- " <td>2024-05-01</td>\n",
1089
- " <td>2024-05-01 02:00:00</td>\n",
1090
- " <td>12.0</td>\n",
1091
- " <td>75</td>\n",
 
1092
  " <td>0.0</td>\n",
1093
  " <td>0.0</td>\n",
1094
  " <td>0.0</td>\n",
1095
- " <td>2</td>\n",
1096
- " <td>54</td>\n",
1097
- " <td>18.0</td>\n",
1098
- " <td>31.0</td>\n",
1099
  " </tr>\n",
1100
  " <tr>\n",
1101
  " <th>3</th>\n",
1102
- " <td>1714532400000</td>\n",
1103
- " <td>2024-05-01</td>\n",
1104
- " <td>2024-05-01 03:00:00</td>\n",
1105
- " <td>11.5</td>\n",
1106
- " <td>76</td>\n",
1107
- " <td>0.0</td>\n",
1108
- " <td>0.0</td>\n",
1109
- " <td>0.0</td>\n",
1110
  " <td>3</td>\n",
1111
- " <td>97</td>\n",
1112
- " <td>19.4</td>\n",
1113
- " <td>33.1</td>\n",
 
 
 
 
 
 
1114
  " </tr>\n",
1115
  " <tr>\n",
1116
  " <th>4</th>\n",
1117
- " <td>1714536000000</td>\n",
1118
- " <td>2024-05-01</td>\n",
1119
- " <td>2024-05-01 04:00:00</td>\n",
1120
- " <td>11.2</td>\n",
1121
- " <td>78</td>\n",
 
1122
  " <td>0.0</td>\n",
1123
  " <td>0.0</td>\n",
1124
  " <td>0.0</td>\n",
1125
- " <td>3</td>\n",
1126
- " <td>96</td>\n",
1127
- " <td>18.0</td>\n",
1128
- " <td>33.5</td>\n",
1129
  " </tr>\n",
1130
  " </tbody>\n",
1131
  "</table>\n",
1132
  "</div>"
1133
  ],
1134
  "text/plain": [
1135
- " timestamp date time temperature_2m \\\n",
1136
- "0 1714521600000 2024-05-01 2024-05-01 00:00:00 13.4 \n",
1137
- "1 1714525200000 2024-05-01 2024-05-01 01:00:00 12.6 \n",
1138
- "2 1714528800000 2024-05-01 2024-05-01 02:00:00 12.0 \n",
1139
- "3 1714532400000 2024-05-01 2024-05-01 03:00:00 11.5 \n",
1140
- "4 1714536000000 2024-05-01 2024-05-01 04:00:00 11.2 \n",
1141
  "\n",
1142
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1143
- "0 70 0.0 0.0 0.0 1 \n",
1144
- "1 73 0.0 0.0 0.0 0 \n",
1145
- "2 75 0.0 0.0 0.0 2 \n",
1146
- "3 76 0.0 0.0 0.0 3 \n",
1147
- "4 78 0.0 0.0 0.0 3 \n",
1148
  "\n",
1149
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1150
- "0 46 20.9 36.4 \n",
1151
- "1 18 18.0 35.6 \n",
1152
- "2 54 18.0 31.0 \n",
1153
- "3 97 19.4 33.1 \n",
1154
- "4 96 18.0 33.5 "
1155
  ]
1156
  },
1157
  "execution_count": 17,
@@ -1191,8 +1244,9 @@
1191
  " <tr style=\"text-align: right;\">\n",
1192
  " <th></th>\n",
1193
  " <th>timestamp</th>\n",
 
1194
  " <th>date</th>\n",
1195
- " <th>time</th>\n",
1196
  " <th>temperature_2m</th>\n",
1197
  " <th>relative_humidity_2m</th>\n",
1198
  " <th>precipitation</th>\n",
@@ -1207,104 +1261,109 @@
1207
  " <tbody>\n",
1208
  " <tr>\n",
1209
  " <th>115</th>\n",
1210
- " <td>1714935600000</td>\n",
1211
- " <td>2024-05-05</td>\n",
1212
- " <td>2024-05-05 19:00:00</td>\n",
1213
- " <td>12.6</td>\n",
1214
- " <td>79</td>\n",
1215
- " <td>0.0</td>\n",
1216
- " <td>0.0</td>\n",
1217
- " <td>0.0</td>\n",
1218
- " <td>3</td>\n",
1219
- " <td>83</td>\n",
1220
- " <td>11.3</td>\n",
1221
- " <td>25.9</td>\n",
 
1222
  " </tr>\n",
1223
  " <tr>\n",
1224
  " <th>116</th>\n",
1225
- " <td>1714939200000</td>\n",
1226
- " <td>2024-05-05</td>\n",
1227
- " <td>2024-05-05 20:00:00</td>\n",
1228
- " <td>11.7</td>\n",
1229
- " <td>83</td>\n",
1230
- " <td>0.0</td>\n",
1231
- " <td>0.0</td>\n",
1232
- " <td>0.0</td>\n",
1233
- " <td>3</td>\n",
1234
- " <td>91</td>\n",
1235
  " <td>10.1</td>\n",
1236
- " <td>23.0</td>\n",
 
 
 
 
 
 
 
1237
  " </tr>\n",
1238
  " <tr>\n",
1239
  " <th>117</th>\n",
1240
- " <td>1714942800000</td>\n",
1241
- " <td>2024-05-05</td>\n",
1242
- " <td>2024-05-05 21:00:00</td>\n",
1243
- " <td>11.1</td>\n",
1244
- " <td>86</td>\n",
1245
- " <td>0.0</td>\n",
1246
- " <td>0.0</td>\n",
1247
- " <td>0.0</td>\n",
1248
- " <td>3</td>\n",
1249
- " <td>98</td>\n",
1250
  " <td>9.5</td>\n",
1251
- " <td>20.5</td>\n",
 
 
 
 
 
 
 
1252
  " </tr>\n",
1253
  " <tr>\n",
1254
  " <th>118</th>\n",
1255
- " <td>1714946400000</td>\n",
1256
- " <td>2024-05-05</td>\n",
1257
- " <td>2024-05-05 22:00:00</td>\n",
1258
- " <td>10.9</td>\n",
1259
- " <td>87</td>\n",
1260
- " <td>0.0</td>\n",
1261
- " <td>0.0</td>\n",
 
1262
  " <td>0.0</td>\n",
1263
- " <td>3</td>\n",
1264
- " <td>98</td>\n",
1265
- " <td>10.2</td>\n",
1266
- " <td>22.3</td>\n",
1267
  " </tr>\n",
1268
  " <tr>\n",
1269
  " <th>119</th>\n",
1270
- " <td>1714950000000</td>\n",
1271
- " <td>2024-05-05</td>\n",
1272
- " <td>2024-05-05 23:00:00</td>\n",
1273
- " <td>11.0</td>\n",
1274
- " <td>88</td>\n",
 
 
 
1275
  " <td>0.0</td>\n",
1276
- " <td>0.0</td>\n",
1277
- " <td>0.0</td>\n",
1278
- " <td>3</td>\n",
1279
- " <td>97</td>\n",
1280
- " <td>11.9</td>\n",
1281
- " <td>24.1</td>\n",
1282
  " </tr>\n",
1283
  " </tbody>\n",
1284
  "</table>\n",
1285
  "</div>"
1286
  ],
1287
  "text/plain": [
1288
- " timestamp date time temperature_2m \\\n",
1289
- "115 1714935600000 2024-05-05 2024-05-05 19:00:00 12.6 \n",
1290
- "116 1714939200000 2024-05-05 2024-05-05 20:00:00 11.7 \n",
1291
- "117 1714942800000 2024-05-05 2024-05-05 21:00:00 11.1 \n",
1292
- "118 1714946400000 2024-05-05 2024-05-05 22:00:00 10.9 \n",
1293
- "119 1714950000000 2024-05-05 2024-05-05 23:00:00 11.0 \n",
1294
  "\n",
1295
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1296
- "115 79 0.0 0.0 0.0 3 \n",
1297
- "116 83 0.0 0.0 0.0 3 \n",
1298
- "117 86 0.0 0.0 0.0 3 \n",
1299
- "118 87 0.0 0.0 0.0 3 \n",
1300
- "119 88 0.0 0.0 0.0 3 \n",
1301
  "\n",
1302
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1303
- "115 83 11.3 25.9 \n",
1304
- "116 91 10.1 23.0 \n",
1305
- "117 98 9.5 20.5 \n",
1306
- "118 98 10.2 22.3 \n",
1307
- "119 97 11.9 24.1 "
1308
  ]
1309
  },
1310
  "execution_count": 18,
@@ -1328,23 +1387,24 @@
1328
  "text": [
1329
  "<class 'pandas.core.frame.DataFrame'>\n",
1330
  "RangeIndex: 120 entries, 0 to 119\n",
1331
- "Data columns (total 12 columns):\n",
1332
  " # Column Non-Null Count Dtype \n",
1333
  "--- ------ -------------- ----- \n",
1334
  " 0 timestamp 120 non-null int64 \n",
1335
- " 1 date 120 non-null object \n",
1336
- " 2 time 120 non-null datetime64[ns]\n",
1337
- " 3 temperature_2m 120 non-null float64 \n",
1338
- " 4 relative_humidity_2m 120 non-null int64 \n",
1339
- " 5 precipitation 120 non-null float64 \n",
1340
- " 6 rain 120 non-null float64 \n",
1341
- " 7 snowfall 120 non-null float64 \n",
1342
- " 8 weather_code 120 non-null int64 \n",
1343
- " 9 cloud_cover 120 non-null int64 \n",
1344
- " 10 wind_speed_10m 120 non-null float64 \n",
1345
- " 11 wind_gusts_10m 120 non-null float64 \n",
1346
- "dtypes: datetime64[ns](1), float64(6), int64(4), object(1)\n",
1347
- "memory usage: 11.4+ KB\n"
 
1348
  ]
1349
  }
1350
  ],
@@ -1365,97 +1425,14 @@
1365
  "cell_type": "code",
1366
  "execution_count": 20,
1367
  "metadata": {},
1368
- "outputs": [
1369
- {
1370
- "data": {
1371
- "text/html": [
1372
- "<div>\n",
1373
- "<style scoped>\n",
1374
- " .dataframe tbody tr th:only-of-type {\n",
1375
- " vertical-align: middle;\n",
1376
- " }\n",
1377
- "\n",
1378
- " .dataframe tbody tr th {\n",
1379
- " vertical-align: top;\n",
1380
- " }\n",
1381
- "\n",
1382
- " .dataframe thead th {\n",
1383
- " text-align: right;\n",
1384
- " }\n",
1385
- "</style>\n",
1386
- "<table border=\"1\" class=\"dataframe\">\n",
1387
- " <thead>\n",
1388
- " <tr style=\"text-align: right;\">\n",
1389
- " <th></th>\n",
1390
- " <th>date</th>\n",
1391
- " <th>type</th>\n",
1392
- " </tr>\n",
1393
- " </thead>\n",
1394
- " <tbody>\n",
1395
- " <tr>\n",
1396
- " <th>0</th>\n",
1397
- " <td>01/01/2022</td>\n",
1398
- " <td>Not a Workday</td>\n",
1399
- " </tr>\n",
1400
- " <tr>\n",
1401
- " <th>1</th>\n",
1402
- " <td>02/01/2022</td>\n",
1403
- " <td>Not a Workday</td>\n",
1404
- " </tr>\n",
1405
- " <tr>\n",
1406
- " <th>2</th>\n",
1407
- " <td>03/01/2022</td>\n",
1408
- " <td>Workday</td>\n",
1409
- " </tr>\n",
1410
- " <tr>\n",
1411
- " <th>3</th>\n",
1412
- " <td>04/01/2022</td>\n",
1413
- " <td>Workday</td>\n",
1414
- " </tr>\n",
1415
- " <tr>\n",
1416
- " <th>4</th>\n",
1417
- " <td>05/01/2022</td>\n",
1418
- " <td>Workday</td>\n",
1419
- " </tr>\n",
1420
- " </tbody>\n",
1421
- "</table>\n",
1422
- "</div>"
1423
- ],
1424
- "text/plain": [
1425
- " date type\n",
1426
- "0 01/01/2022 Not a Workday\n",
1427
- "1 02/01/2022 Not a Workday\n",
1428
- "2 03/01/2022 Workday\n",
1429
- "3 04/01/2022 Workday\n",
1430
- "4 05/01/2022 Workday"
1431
- ]
1432
- },
1433
- "execution_count": 20,
1434
- "metadata": {},
1435
- "output_type": "execute_result"
1436
- }
1437
- ],
1438
- "source": [
1439
- "# Read csv file with calender\n",
1440
- "calender_df = pd.read_csv('https://raw.githubusercontent.com/Camillahannesbo/MLOPs-Assignment-/main/data/calendar_incl_holiday.csv', delimiter=';', usecols=['date', 'type'])\n",
1441
- " \n",
1442
- "# Display the DataFrame\n",
1443
- "calender_df.head()"
1444
- ]
1445
- },
1446
- {
1447
- "cell_type": "code",
1448
- "execution_count": 21,
1449
- "metadata": {},
1450
  "outputs": [],
1451
  "source": [
1452
- "# Formatting the date column to 'YYYY-MM-DD' dateformat\n",
1453
- "calender_df[\"date\"] = calender_df[\"date\"].map(lambda x: datetime.strptime(x, '%d/%m/%Y').strftime(\"%Y-%m-%d\"))"
1454
  ]
1455
  },
1456
  {
1457
  "cell_type": "code",
1458
- "execution_count": 22,
1459
  "metadata": {},
1460
  "outputs": [
1461
  {
@@ -1480,49 +1457,61 @@
1480
  " <tr style=\"text-align: right;\">\n",
1481
  " <th></th>\n",
1482
  " <th>date</th>\n",
1483
- " <th>type</th>\n",
 
 
1484
  " </tr>\n",
1485
  " </thead>\n",
1486
  " <tbody>\n",
1487
  " <tr>\n",
1488
  " <th>0</th>\n",
1489
  " <td>2022-01-01</td>\n",
1490
- " <td>Not a Workday</td>\n",
 
 
1491
  " </tr>\n",
1492
  " <tr>\n",
1493
  " <th>1</th>\n",
1494
  " <td>2022-01-02</td>\n",
1495
- " <td>Not a Workday</td>\n",
 
 
1496
  " </tr>\n",
1497
  " <tr>\n",
1498
  " <th>2</th>\n",
1499
  " <td>2022-01-03</td>\n",
1500
- " <td>Workday</td>\n",
 
 
1501
  " </tr>\n",
1502
  " <tr>\n",
1503
  " <th>3</th>\n",
1504
  " <td>2022-01-04</td>\n",
1505
- " <td>Workday</td>\n",
 
 
1506
  " </tr>\n",
1507
  " <tr>\n",
1508
  " <th>4</th>\n",
1509
  " <td>2022-01-05</td>\n",
1510
- " <td>Workday</td>\n",
 
 
1511
  " </tr>\n",
1512
  " </tbody>\n",
1513
  "</table>\n",
1514
  "</div>"
1515
  ],
1516
  "text/plain": [
1517
- " date type\n",
1518
- "0 2022-01-01 Not a Workday\n",
1519
- "1 2022-01-02 Not a Workday\n",
1520
- "2 2022-01-03 Workday\n",
1521
- "3 2022-01-04 Workday\n",
1522
- "4 2022-01-05 Workday"
1523
  ]
1524
  },
1525
- "execution_count": 22,
1526
  "metadata": {},
1527
  "output_type": "execute_result"
1528
  }
@@ -1534,7 +1523,7 @@
1534
  },
1535
  {
1536
  "cell_type": "code",
1537
- "execution_count": 23,
1538
  "metadata": {},
1539
  "outputs": [
1540
  {
@@ -1559,49 +1548,61 @@
1559
  " <tr style=\"text-align: right;\">\n",
1560
  " <th></th>\n",
1561
  " <th>date</th>\n",
1562
- " <th>type</th>\n",
 
 
1563
  " </tr>\n",
1564
  " </thead>\n",
1565
  " <tbody>\n",
1566
  " <tr>\n",
1567
  " <th>1091</th>\n",
1568
  " <td>2024-12-27</td>\n",
1569
- " <td>Workday</td>\n",
 
 
1570
  " </tr>\n",
1571
  " <tr>\n",
1572
  " <th>1092</th>\n",
1573
  " <td>2024-12-28</td>\n",
1574
- " <td>Not a Workday</td>\n",
 
 
1575
  " </tr>\n",
1576
  " <tr>\n",
1577
  " <th>1093</th>\n",
1578
  " <td>2024-12-29</td>\n",
1579
- " <td>Not a Workday</td>\n",
 
 
1580
  " </tr>\n",
1581
  " <tr>\n",
1582
  " <th>1094</th>\n",
1583
  " <td>2024-12-30</td>\n",
1584
- " <td>Workday</td>\n",
 
 
1585
  " </tr>\n",
1586
  " <tr>\n",
1587
  " <th>1095</th>\n",
1588
  " <td>2024-12-31</td>\n",
1589
- " <td>Workday</td>\n",
 
 
1590
  " </tr>\n",
1591
  " </tbody>\n",
1592
  "</table>\n",
1593
  "</div>"
1594
  ],
1595
  "text/plain": [
1596
- " date type\n",
1597
- "1091 2024-12-27 Workday\n",
1598
- "1092 2024-12-28 Not a Workday\n",
1599
- "1093 2024-12-29 Not a Workday\n",
1600
- "1094 2024-12-30 Workday\n",
1601
- "1095 2024-12-31 Workday"
1602
  ]
1603
  },
1604
- "execution_count": 23,
1605
  "metadata": {},
1606
  "output_type": "execute_result"
1607
  }
@@ -1611,6 +1612,34 @@
1611
  "calender_df.tail(5)"
1612
  ]
1613
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1614
  {
1615
  "cell_type": "code",
1616
  "execution_count": 24,
@@ -1622,13 +1651,15 @@
1622
  "text": [
1623
  "<class 'pandas.core.frame.DataFrame'>\n",
1624
  "RangeIndex: 1096 entries, 0 to 1095\n",
1625
- "Data columns (total 2 columns):\n",
1626
- " # Column Non-Null Count Dtype \n",
1627
- "--- ------ -------------- ----- \n",
1628
- " 0 date 1096 non-null object\n",
1629
- " 1 type 1096 non-null object\n",
1630
- "dtypes: object(2)\n",
1631
- "memory usage: 17.3+ KB\n"
 
 
1632
  ]
1633
  }
1634
  ],
@@ -1680,9 +1711,9 @@
1680
  "### <span style=\"color:#2656a3;\"> 🪄 Creating Feature Groups\n",
1681
  "A feature group can be seen as a collection of conceptually related features. In this case we create feature groups for the \n",
1682
  "- eletricity price data,\n",
 
1683
  "- weather data,\n",
1684
- "- calender data, and\n",
1685
- "- forecast_renewable_energy.\n",
1686
  "\n",
1687
  "We specify a `primary_key` as `date`, so we are able to join them when we create a dataset for training later in part 03 the training_pipeline.\n",
1688
  "We define a name and a short describtion of the feature group's contents and a version number. \n",
@@ -1698,14 +1729,14 @@
1698
  "metadata": {},
1699
  "outputs": [],
1700
  "source": [
1701
- "# Creating the feature group for the weather data\n",
1702
- "weather_fg = fs.get_or_create_feature_group(\n",
1703
- " name=\"weather_measurements\",\n",
1704
  " version=1,\n",
1705
- " description=\"Weather measurements from Open Meteo API\",\n",
1706
- " primary_key=[\"date\",\"timestamp\"], # ,\"temperature_2m\",\"relative_humidity_2m\",\"precipitation\",\"rain\",\"snowfall\",\"weather_code\",\"cloud_cover\",\"wind_speed_10m\",\"wind_gusts_10m\"\n",
1707
- " event_time=\"timestamp\",\n",
1708
  " online_enabled=True,\n",
 
1709
  ")"
1710
  ]
1711
  },
@@ -1726,18 +1757,18 @@
1726
  "output_type": "stream",
1727
  "text": [
1728
  "Feature Group created successfully, explore it at \n",
1729
- "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/769394\n"
1730
  ]
1731
  },
1732
  {
1733
  "data": {
1734
  "application/vnd.jupyter.widget-view+json": {
1735
- "model_id": "4f5ca05f9abd42d7b45ef34c1a6cf382",
1736
  "version_major": 2,
1737
  "version_minor": 0
1738
  },
1739
  "text/plain": [
1740
- "Uploading Dataframe: 0.00% | | Rows 0/20400 | Elapsed Time: 00:00 | Remaining Time: ?"
1741
  ]
1742
  },
1743
  "metadata": {},
@@ -1747,15 +1778,15 @@
1747
  "name": "stdout",
1748
  "output_type": "stream",
1749
  "text": [
1750
- "Launching job: weather_measurements_1_offline_fg_materialization\n",
1751
  "Job started successfully, you can follow the progress at \n",
1752
- "https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
1753
  ]
1754
  },
1755
  {
1756
  "data": {
1757
  "text/plain": [
1758
- "(<hsfs.core.job.Job at 0x17ef77890>, None)"
1759
  ]
1760
  },
1761
  "execution_count": 27,
@@ -1764,8 +1795,8 @@
1764
  }
1765
  ],
1766
  "source": [
1767
- "# Inserting the weather_df into the feature group named weather_fg\n",
1768
- "weather_fg.insert(historical_weather_df)"
1769
  ]
1770
  },
1771
  {
@@ -1781,32 +1812,25 @@
1781
  "metadata": {},
1782
  "outputs": [],
1783
  "source": [
1784
- "# List of descriptions for weather features\n",
1785
- "weather_feature_descriptions = [\n",
1786
  " {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1787
- " {\"name\": \"date\", \"description\": \"Date of the weather measurement\"},\n",
1788
- " {\"name\": \"time\", \"description\": \"Time of the weather measurement\"},\n",
1789
- " {\"name\": \"temperature_2m\", \"description\": \"Temperature at 2m above ground\"},\n",
1790
- " {\"name\": \"relative_humidity_2m\", \"description\": \"Relative humidity at 2m above ground\"},\n",
1791
- " {\"name\": \"precipitation\", \"description\": \"Precipitation\"},\n",
1792
- " {\"name\": \"rain\", \"description\": \"Rain\"},\n",
1793
- " {\"name\": \"snowfall\", \"description\": \"Snowfall\"}, \n",
1794
- " {\"name\": \"weather_code\", \"description\": \"Weather code\"}, \n",
1795
- " {\"name\": \"cloud_cover\", \"description\": \"Cloud cover\"}, \n",
1796
- " {\"name\": \"wind_speed_10m\", \"description\": \"Wind speed at 10m above ground\"}, \n",
1797
- " {\"name\": \"wind_gusts_10m\", \"description\": \"Wind gusts at 10m above ground\"}, \n",
1798
  "]\n",
1799
  "\n",
1800
  "# Updating feature descriptions\n",
1801
- "for desc in weather_feature_descriptions: \n",
1802
- " weather_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1803
  ]
1804
  },
1805
  {
1806
  "cell_type": "markdown",
1807
  "metadata": {},
1808
  "source": [
1809
- "We replicate the process for both the `electricity_fg`, `forecast_renewable_energy_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
1810
  ]
1811
  },
1812
  {
@@ -1815,67 +1839,25 @@
1815
  "metadata": {},
1816
  "outputs": [],
1817
  "source": [
1818
- "# Creating the feature group for the electricity prices\n",
1819
- "electricity_fg = fs.get_or_create_feature_group(\n",
1820
- " name=\"electricity_prices\",\n",
1821
- " version=1,\n",
1822
- " description=\"Electricity prices from Energidata API\",\n",
1823
- " primary_key=[\"date\",\"timestamp\"], # \"dk1_spotpricedkk_kwh\"\n",
1824
- " online_enabled=True,\n",
1825
- " event_time=\"timestamp\",\n",
1826
- ")"
1827
  ]
1828
  },
1829
  {
1830
  "cell_type": "code",
1831
  "execution_count": 30,
1832
  "metadata": {},
1833
- "outputs": [
1834
- {
1835
- "name": "stdout",
1836
- "output_type": "stream",
1837
- "text": [
1838
- "Feature Group created successfully, explore it at \n",
1839
- "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/768394\n"
1840
- ]
1841
- },
1842
- {
1843
- "data": {
1844
- "application/vnd.jupyter.widget-view+json": {
1845
- "model_id": "a07f970e652943e58fd587eacb087548",
1846
- "version_major": 2,
1847
- "version_minor": 0
1848
- },
1849
- "text/plain": [
1850
- "Uploading Dataframe: 0.00% | | Rows 0/20421 | Elapsed Time: 00:00 | Remaining Time: ?"
1851
- ]
1852
- },
1853
- "metadata": {},
1854
- "output_type": "display_data"
1855
- },
1856
- {
1857
- "name": "stdout",
1858
- "output_type": "stream",
1859
- "text": [
1860
- "Launching job: electricity_prices_1_offline_fg_materialization\n",
1861
- "Job started successfully, you can follow the progress at \n",
1862
- "https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
1863
- ]
1864
- },
1865
- {
1866
- "data": {
1867
- "text/plain": [
1868
- "(<hsfs.core.job.Job at 0x17ef31850>, None)"
1869
- ]
1870
- },
1871
- "execution_count": 30,
1872
- "metadata": {},
1873
- "output_type": "execute_result"
1874
- }
1875
- ],
1876
  "source": [
1877
- "# Inserting the electricity_df into the feature group named electricity_fg\n",
1878
- "electricity_fg.insert(electricity_df)"
1879
  ]
1880
  },
1881
  {
@@ -1884,17 +1866,18 @@
1884
  "metadata": {},
1885
  "outputs": [],
1886
  "source": [
1887
- "# List of descriptions for electricity features\n",
1888
- "electricity_feature_descriptions = [\n",
1889
- " {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1890
- " {\"name\": \"date\", \"description\": \"Date of the electricity measurement\"},\n",
1891
- " {\"name\": \"time\", \"description\": \"Time of the electricity measurement\"},\n",
1892
- " {\"name\": \"dk1_spotpricedkk_kwh\", \"description\": \"Spot price in DKK per KWH\"}, \n",
1893
- "]\n",
 
1894
  "\n",
1895
- "# Updating feature descriptions\n",
1896
- "for desc in electricity_feature_descriptions: \n",
1897
- " electricity_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1898
  ]
1899
  },
1900
  {
@@ -1903,14 +1886,14 @@
1903
  "metadata": {},
1904
  "outputs": [],
1905
  "source": [
1906
- "# Creating the feature group for the electricity prices\n",
1907
- "forecast_renewable_energy_fg = fs.get_or_create_feature_group(\n",
1908
- " name=\"forecast_renewable_energy\",\n",
1909
  " version=1,\n",
1910
- " description=\"Forecast on Renewable Energy on ForecastType from Energidata API\",\n",
1911
- " primary_key=[\"date\",\"timestamp\"], # ,\"dk1_offshore_wind_forecastintraday_kwh\",\"dk1_onshore_wind_forecastintraday_kwh\",\"dk1_solar_power_forecastintraday_kwh\"\n",
1912
- " online_enabled=True,\n",
1913
  " event_time=\"timestamp\",\n",
 
1914
  ")"
1915
  ]
1916
  },
@@ -1924,18 +1907,18 @@
1924
  "output_type": "stream",
1925
  "text": [
1926
  "Feature Group created successfully, explore it at \n",
1927
- "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/768395\n"
1928
  ]
1929
  },
1930
  {
1931
  "data": {
1932
  "application/vnd.jupyter.widget-view+json": {
1933
- "model_id": "01f05ec67a224ab182df4309c9f67293",
1934
  "version_major": 2,
1935
  "version_minor": 0
1936
  },
1937
  "text/plain": [
1938
- "Uploading Dataframe: 0.00% | | Rows 0/14407 | Elapsed Time: 00:00 | Remaining Time: ?"
1939
  ]
1940
  },
1941
  "metadata": {},
@@ -1945,15 +1928,15 @@
1945
  "name": "stdout",
1946
  "output_type": "stream",
1947
  "text": [
1948
- "Launching job: forecast_renewable_energy_1_offline_fg_materialization\n",
1949
  "Job started successfully, you can follow the progress at \n",
1950
- "https://c.app.hopsworks.ai/p/554133/jobs/named/forecast_renewable_energy_1_offline_fg_materialization/executions\n"
1951
  ]
1952
  },
1953
  {
1954
  "data": {
1955
  "text/plain": [
1956
- "(<hsfs.core.job.Job at 0x17dba60d0>, None)"
1957
  ]
1958
  },
1959
  "execution_count": 33,
@@ -1962,8 +1945,8 @@
1962
  }
1963
  ],
1964
  "source": [
1965
- "# Inserting the electricity_df into the feature group named electricity_fg\n",
1966
- "forecast_renewable_energy_fg.insert(forecast_renewable_energy_df)"
1967
  ]
1968
  },
1969
  {
@@ -1972,30 +1955,39 @@
1972
  "metadata": {},
1973
  "outputs": [],
1974
  "source": [
1975
- "# List of descriptions for forecast_renewable_energy features\n",
1976
- "forecast_renewable_energy_feature_descriptions = [\n",
1977
- " {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1978
- " {\"name\": \"date\", \"description\": \"Date\"},\n",
1979
- " {\"name\": \"time\", \"description\": \"Time for the event_time\"},\n",
1980
- " {\"name\": \"dk1_offshore_wind_forecastintraday_kwh\", \"description\": \"The forecast for the coming day at 6am Danish time zone\"},\n",
 
 
 
 
 
 
 
 
 
1981
  "]\n",
1982
  "\n",
1983
  "# Updating feature descriptions\n",
1984
- "for desc in forecast_renewable_energy_feature_descriptions: \n",
1985
- " forecast_renewable_energy_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1986
  ]
1987
  },
1988
  {
1989
  "cell_type": "code",
1990
- "execution_count": 35,
1991
  "metadata": {},
1992
  "outputs": [],
1993
  "source": [
1994
- "# Creating the feature group for the danish holidays\n",
1995
- "danish_holidays_fg = fs.get_or_create_feature_group(\n",
1996
- " name=\"danish_holidayss\",\n",
1997
- " version=1,\n",
1998
- " description=\"Danish holidays calendar.\",\n",
1999
  " online_enabled=True,\n",
2000
  " primary_key=[\"date\"],\n",
2001
  ")"
@@ -2003,7 +1995,7 @@
2003
  },
2004
  {
2005
  "cell_type": "code",
2006
- "execution_count": 36,
2007
  "metadata": {},
2008
  "outputs": [
2009
  {
@@ -2011,13 +2003,13 @@
2011
  "output_type": "stream",
2012
  "text": [
2013
  "Feature Group created successfully, explore it at \n",
2014
- "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/768396\n"
2015
  ]
2016
  },
2017
  {
2018
  "data": {
2019
  "application/vnd.jupyter.widget-view+json": {
2020
- "model_id": "3b74b96c4b1642699979968395c6ea22",
2021
  "version_major": 2,
2022
  "version_minor": 0
2023
  },
@@ -2032,42 +2024,44 @@
2032
  "name": "stdout",
2033
  "output_type": "stream",
2034
  "text": [
2035
- "Launching job: danish_holidayss_1_offline_fg_materialization\n",
2036
  "Job started successfully, you can follow the progress at \n",
2037
- "https://c.app.hopsworks.ai/p/554133/jobs/named/danish_holidayss_1_offline_fg_materialization/executions\n"
2038
  ]
2039
  },
2040
  {
2041
  "data": {
2042
  "text/plain": [
2043
- "(<hsfs.core.job.Job at 0x17ef334d0>, None)"
2044
  ]
2045
  },
2046
- "execution_count": 36,
2047
  "metadata": {},
2048
  "output_type": "execute_result"
2049
  }
2050
  ],
2051
  "source": [
2052
- "# Inserting the calender_df into the feature group named danish_holidays_fg\n",
2053
- "danish_holidays_fg.insert(calender_df)"
2054
  ]
2055
  },
2056
  {
2057
  "cell_type": "code",
2058
- "execution_count": 37,
2059
  "metadata": {},
2060
  "outputs": [],
2061
  "source": [
2062
- "# List of descriptions for danish_holidays features\n",
2063
- "danish_holidays_feature_descriptions = [\n",
2064
  " {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
2065
- " {\"name\": \"type\", \"description\": \"Holyday or not holyday\"},\n",
 
 
2066
  "]\n",
2067
  "\n",
2068
  "# Updating feature descriptions\n",
2069
- "for desc in danish_holidays_feature_descriptions: \n",
2070
- " danish_holidays_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
2071
  ]
2072
  },
2073
  {
 
56
  "\n",
57
  "# Now we import the functions from the features folder\n",
58
  "# This is the functions we have created to generate features for electricity prices and weather measures\n",
59
+ "from features import electricity_prices, weather_measures, calendar\n",
60
  "\n",
61
  "# We go back into the notebooks folder\n",
62
  "%cd notebooks"
 
70
  "source": [
71
  "# Importing the packages for the needed libraries for the Jupyter notebook\n",
72
  "import pandas as pd\n",
73
+ "import numpy as np\n",
74
  "import requests\n",
75
  "from datetime import datetime, timedelta\n",
76
  "\n",
 
147
  " <tr style=\"text-align: right;\">\n",
148
  " <th></th>\n",
149
  " <th>timestamp</th>\n",
150
+ " <th>datetime</th>\n",
151
  " <th>date</th>\n",
152
+ " <th>hour</th>\n",
153
  " <th>dk1_spotpricedkk_kwh</th>\n",
154
  " </tr>\n",
155
  " </thead>\n",
 
159
  " <td>1640995200000</td>\n",
160
  " <td>2022-01-01 00:00:00</td>\n",
161
  " <td>2022-01-01</td>\n",
162
+ " <td>0</td>\n",
163
  " <td>0.37220</td>\n",
164
  " </tr>\n",
165
  " <tr>\n",
 
167
  " <td>1640998800000</td>\n",
168
  " <td>2022-01-01 01:00:00</td>\n",
169
  " <td>2022-01-01</td>\n",
170
+ " <td>1</td>\n",
171
  " <td>0.30735</td>\n",
172
  " </tr>\n",
173
  " <tr>\n",
 
175
  " <td>1641002400000</td>\n",
176
  " <td>2022-01-01 02:00:00</td>\n",
177
  " <td>2022-01-01</td>\n",
178
+ " <td>2</td>\n",
179
  " <td>0.32141</td>\n",
180
  " </tr>\n",
181
  " <tr>\n",
 
183
  " <td>1641006000000</td>\n",
184
  " <td>2022-01-01 03:00:00</td>\n",
185
  " <td>2022-01-01</td>\n",
186
+ " <td>3</td>\n",
187
  " <td>0.33806</td>\n",
188
  " </tr>\n",
189
  " <tr>\n",
 
191
  " <td>1641009600000</td>\n",
192
  " <td>2022-01-01 04:00:00</td>\n",
193
  " <td>2022-01-01</td>\n",
194
+ " <td>4</td>\n",
195
  " <td>0.28013</td>\n",
196
  " </tr>\n",
197
  " </tbody>\n",
 
199
  "</div>"
200
  ],
201
  "text/plain": [
202
+ " timestamp datetime date hour dk1_spotpricedkk_kwh\n",
203
+ "0 1640995200000 2022-01-01 00:00:00 2022-01-01 0 0.37220\n",
204
+ "1 1640998800000 2022-01-01 01:00:00 2022-01-01 1 0.30735\n",
205
+ "2 1641002400000 2022-01-01 02:00:00 2022-01-01 2 0.32141\n",
206
+ "3 1641006000000 2022-01-01 03:00:00 2022-01-01 3 0.33806\n",
207
+ "4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 0.28013"
208
  ]
209
  },
210
  "execution_count": 5,
 
244
  " <tr style=\"text-align: right;\">\n",
245
  " <th></th>\n",
246
  " <th>timestamp</th>\n",
247
+ " <th>datetime</th>\n",
248
  " <th>date</th>\n",
249
+ " <th>hour</th>\n",
250
  " <th>dk1_spotpricedkk_kwh</th>\n",
251
  " </tr>\n",
252
  " </thead>\n",
253
  " <tbody>\n",
254
  " <tr>\n",
255
+ " <th>20440</th>\n",
256
+ " <td>1714590000000</td>\n",
257
+ " <td>2024-05-01 19:00:00</td>\n",
258
+ " <td>2024-05-01</td>\n",
259
+ " <td>19</td>\n",
260
+ " <td>0.37590</td>\n",
261
  " </tr>\n",
262
  " <tr>\n",
263
+ " <th>20441</th>\n",
264
+ " <td>1714593600000</td>\n",
265
+ " <td>2024-05-01 20:00:00</td>\n",
266
+ " <td>2024-05-01</td>\n",
267
+ " <td>20</td>\n",
268
+ " <td>0.37292</td>\n",
269
  " </tr>\n",
270
  " <tr>\n",
271
+ " <th>20442</th>\n",
272
+ " <td>1714597200000</td>\n",
273
+ " <td>2024-05-01 21:00:00</td>\n",
274
+ " <td>2024-05-01</td>\n",
275
+ " <td>21</td>\n",
276
+ " <td>0.25366</td>\n",
277
  " </tr>\n",
278
  " <tr>\n",
279
+ " <th>20443</th>\n",
280
+ " <td>1714600800000</td>\n",
281
+ " <td>2024-05-01 22:00:00</td>\n",
282
+ " <td>2024-05-01</td>\n",
283
+ " <td>22</td>\n",
284
+ " <td>0.22315</td>\n",
285
  " </tr>\n",
286
  " <tr>\n",
287
+ " <th>20444</th>\n",
288
+ " <td>1714604400000</td>\n",
289
+ " <td>2024-05-01 23:00:00</td>\n",
290
+ " <td>2024-05-01</td>\n",
291
+ " <td>23</td>\n",
292
+ " <td>0.16408</td>\n",
293
  " </tr>\n",
294
  " </tbody>\n",
295
  "</table>\n",
296
  "</div>"
297
  ],
298
  "text/plain": [
299
+ " timestamp datetime date hour \\\n",
300
+ "20440 1714590000000 2024-05-01 19:00:00 2024-05-01 19 \n",
301
+ "20441 1714593600000 2024-05-01 20:00:00 2024-05-01 20 \n",
302
+ "20442 1714597200000 2024-05-01 21:00:00 2024-05-01 21 \n",
303
+ "20443 1714600800000 2024-05-01 22:00:00 2024-05-01 22 \n",
304
+ "20444 1714604400000 2024-05-01 23:00:00 2024-05-01 23 \n",
305
+ "\n",
306
+ " dk1_spotpricedkk_kwh \n",
307
+ "20440 0.37590 \n",
308
+ "20441 0.37292 \n",
309
+ "20442 0.25366 \n",
310
+ "20443 0.22315 \n",
311
+ "20444 0.16408 "
312
  ]
313
  },
314
  "execution_count": 6,
 
331
  "output_type": "stream",
332
  "text": [
333
  "<class 'pandas.core.frame.DataFrame'>\n",
334
+ "RangeIndex: 20445 entries, 0 to 20444\n",
335
+ "Data columns (total 5 columns):\n",
336
  " # Column Non-Null Count Dtype \n",
337
  "--- ------ -------------- ----- \n",
338
+ " 0 timestamp 20445 non-null int64 \n",
339
+ " 1 datetime 20445 non-null datetime64[ns]\n",
340
+ " 2 date 20445 non-null object \n",
341
+ " 3 hour 20445 non-null int64 \n",
342
+ " 4 dk1_spotpricedkk_kwh 20445 non-null float64 \n",
343
+ "dtypes: datetime64[ns](1), float64(1), int64(2), object(1)\n",
344
+ "memory usage: 798.8+ KB\n"
345
  ]
346
  }
347
  ],
 
402
  " <tr style=\"text-align: right;\">\n",
403
  " <th></th>\n",
404
  " <th>timestamp</th>\n",
405
+ " <th>datetime</th>\n",
406
  " <th>date</th>\n",
407
+ " <th>hour</th>\n",
408
  " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
409
  " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
410
  " <th>dk1_solar_forecastintraday_kwh</th>\n",
 
416
  " <td>1641024000000</td>\n",
417
  " <td>2022-01-01 08:00:00</td>\n",
418
  " <td>2022-01-01</td>\n",
419
+ " <td>8</td>\n",
420
  " <td>0.611708</td>\n",
421
  " <td>0.236792</td>\n",
422
  " <td>0.000050</td>\n",
 
426
  " <td>1641027600000</td>\n",
427
  " <td>2022-01-01 09:00:00</td>\n",
428
  " <td>2022-01-01</td>\n",
429
+ " <td>9</td>\n",
430
  " <td>0.459708</td>\n",
431
  " <td>0.196667</td>\n",
432
  " <td>0.004841</td>\n",
 
436
  " <td>1641031200000</td>\n",
437
  " <td>2022-01-01 10:00:00</td>\n",
438
  " <td>2022-01-01</td>\n",
439
+ " <td>10</td>\n",
440
  " <td>0.310375</td>\n",
441
  " <td>0.178500</td>\n",
442
  " <td>0.020353</td>\n",
 
446
  " <td>1641034800000</td>\n",
447
  " <td>2022-01-01 11:00:00</td>\n",
448
  " <td>2022-01-01</td>\n",
449
+ " <td>11</td>\n",
450
  " <td>0.320750</td>\n",
451
  " <td>0.201125</td>\n",
452
  " <td>0.035719</td>\n",
 
456
  " <td>1641038400000</td>\n",
457
  " <td>2022-01-01 12:00:00</td>\n",
458
  " <td>2022-01-01</td>\n",
459
+ " <td>12</td>\n",
460
  " <td>0.355667</td>\n",
461
  " <td>0.277667</td>\n",
462
  " <td>0.038027</td>\n",
 
466
  "</div>"
467
  ],
468
  "text/plain": [
469
+ " timestamp datetime date hour \\\n",
470
+ "0 1641024000000 2022-01-01 08:00:00 2022-01-01 8 \n",
471
+ "1 1641027600000 2022-01-01 09:00:00 2022-01-01 9 \n",
472
+ "2 1641031200000 2022-01-01 10:00:00 2022-01-01 10 \n",
473
+ "3 1641034800000 2022-01-01 11:00:00 2022-01-01 11 \n",
474
+ "4 1641038400000 2022-01-01 12:00:00 2022-01-01 12 \n",
475
  "\n",
476
  " dk1_offshore_wind_forecastintraday_kwh \\\n",
477
  "0 0.611708 \n",
 
525
  " <tr style=\"text-align: right;\">\n",
526
  " <th></th>\n",
527
  " <th>timestamp</th>\n",
528
+ " <th>datetime</th>\n",
529
  " <th>date</th>\n",
530
+ " <th>hour</th>\n",
531
  " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
532
  " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
533
  " <th>dk1_solar_forecastintraday_kwh</th>\n",
 
535
  " </thead>\n",
536
  " <tbody>\n",
537
  " <tr>\n",
538
+ " <th>14426</th>\n",
539
+ " <td>1714590000000</td>\n",
540
+ " <td>2024-05-01 19:00:00</td>\n",
541
+ " <td>2024-05-01</td>\n",
542
+ " <td>19</td>\n",
543
+ " <td>0.816250</td>\n",
544
+ " <td>1.382208</td>\n",
545
+ " <td>0.272911</td>\n",
546
  " </tr>\n",
547
  " <tr>\n",
548
+ " <th>14427</th>\n",
549
+ " <td>1714593600000</td>\n",
550
+ " <td>2024-05-01 20:00:00</td>\n",
551
+ " <td>2024-05-01</td>\n",
552
+ " <td>20</td>\n",
553
+ " <td>0.848500</td>\n",
554
+ " <td>1.388583</td>\n",
555
+ " <td>0.046087</td>\n",
556
  " </tr>\n",
557
  " <tr>\n",
558
+ " <th>14428</th>\n",
559
+ " <td>1714597200000</td>\n",
560
+ " <td>2024-05-01 21:00:00</td>\n",
561
+ " <td>2024-05-01</td>\n",
562
+ " <td>21</td>\n",
563
+ " <td>0.886042</td>\n",
564
+ " <td>1.554792</td>\n",
565
+ " <td>0.001339</td>\n",
566
  " </tr>\n",
567
  " <tr>\n",
568
+ " <th>14429</th>\n",
569
+ " <td>1714600800000</td>\n",
570
+ " <td>2024-05-01 22:00:00</td>\n",
571
+ " <td>2024-05-01</td>\n",
572
+ " <td>22</td>\n",
573
+ " <td>0.919417</td>\n",
574
+ " <td>1.698875</td>\n",
575
  " <td>0.000000</td>\n",
576
  " </tr>\n",
577
  " <tr>\n",
578
+ " <th>14430</th>\n",
579
+ " <td>1714604400000</td>\n",
580
+ " <td>2024-05-01 23:00:00</td>\n",
581
+ " <td>2024-05-01</td>\n",
582
+ " <td>23</td>\n",
583
+ " <td>0.934708</td>\n",
584
+ " <td>1.739375</td>\n",
585
  " <td>0.000000</td>\n",
586
  " </tr>\n",
587
  " </tbody>\n",
 
589
  "</div>"
590
  ],
591
  "text/plain": [
592
+ " timestamp datetime date hour \\\n",
593
+ "14426 1714590000000 2024-05-01 19:00:00 2024-05-01 19 \n",
594
+ "14427 1714593600000 2024-05-01 20:00:00 2024-05-01 20 \n",
595
+ "14428 1714597200000 2024-05-01 21:00:00 2024-05-01 21 \n",
596
+ "14429 1714600800000 2024-05-01 22:00:00 2024-05-01 22 \n",
597
+ "14430 1714604400000 2024-05-01 23:00:00 2024-05-01 23 \n",
598
  "\n",
599
  " dk1_offshore_wind_forecastintraday_kwh \\\n",
600
+ "14426 0.816250 \n",
601
+ "14427 0.848500 \n",
602
+ "14428 0.886042 \n",
603
+ "14429 0.919417 \n",
604
+ "14430 0.934708 \n",
605
  "\n",
606
  " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
607
+ "14426 1.382208 0.272911 \n",
608
+ "14427 1.388583 0.046087 \n",
609
+ "14428 1.554792 0.001339 \n",
610
+ "14429 1.698875 0.000000 \n",
611
+ "14430 1.739375 0.000000 "
612
  ]
613
  },
614
  "execution_count": 10,
 
631
  "output_type": "stream",
632
  "text": [
633
  "<class 'pandas.core.frame.DataFrame'>\n",
634
+ "RangeIndex: 14431 entries, 0 to 14430\n",
635
+ "Data columns (total 7 columns):\n",
636
  " # Column Non-Null Count Dtype \n",
637
  "--- ------ -------------- ----- \n",
638
+ " 0 timestamp 14431 non-null int64 \n",
639
+ " 1 datetime 14431 non-null datetime64[ns]\n",
640
+ " 2 date 14431 non-null object \n",
641
+ " 3 hour 14431 non-null int64 \n",
642
+ " 4 dk1_offshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
643
+ " 5 dk1_onshore_wind_forecastintraday_kwh 14415 non-null float64 \n",
644
+ " 6 dk1_solar_forecastintraday_kwh 14415 non-null float64 \n",
645
+ "dtypes: datetime64[ns](1), float64(3), int64(2), object(1)\n",
646
+ "memory usage: 789.3+ KB\n"
647
  ]
648
  }
649
  ],
 
710
  " <tr style=\"text-align: right;\">\n",
711
  " <th></th>\n",
712
  " <th>timestamp</th>\n",
713
+ " <th>datetime</th>\n",
714
  " <th>date</th>\n",
715
+ " <th>hour</th>\n",
716
  " <th>temperature_2m</th>\n",
717
  " <th>relative_humidity_2m</th>\n",
718
  " <th>precipitation</th>\n",
 
728
  " <tr>\n",
729
  " <th>0</th>\n",
730
  " <td>1640995200000</td>\n",
 
731
  " <td>2022-01-01 00:00:00</td>\n",
732
+ " <td>2022-01-01</td>\n",
733
+ " <td>0</td>\n",
734
  " <td>6.7</td>\n",
735
  " <td>100.0</td>\n",
736
  " <td>0.0</td>\n",
 
744
  " <tr>\n",
745
  " <th>1</th>\n",
746
  " <td>1640998800000</td>\n",
 
747
  " <td>2022-01-01 01:00:00</td>\n",
748
+ " <td>2022-01-01</td>\n",
749
+ " <td>1</td>\n",
750
  " <td>6.6</td>\n",
751
  " <td>100.0</td>\n",
752
  " <td>0.0</td>\n",
 
760
  " <tr>\n",
761
  " <th>2</th>\n",
762
  " <td>1641002400000</td>\n",
 
763
  " <td>2022-01-01 02:00:00</td>\n",
764
+ " <td>2022-01-01</td>\n",
765
+ " <td>2</td>\n",
766
  " <td>6.7</td>\n",
767
  " <td>99.0</td>\n",
768
  " <td>0.0</td>\n",
 
776
  " <tr>\n",
777
  " <th>3</th>\n",
778
  " <td>1641006000000</td>\n",
 
779
  " <td>2022-01-01 03:00:00</td>\n",
780
+ " <td>2022-01-01</td>\n",
781
+ " <td>3</td>\n",
782
  " <td>6.7</td>\n",
783
  " <td>100.0</td>\n",
784
  " <td>0.0</td>\n",
 
792
  " <tr>\n",
793
  " <th>4</th>\n",
794
  " <td>1641009600000</td>\n",
 
795
  " <td>2022-01-01 04:00:00</td>\n",
796
+ " <td>2022-01-01</td>\n",
797
+ " <td>4</td>\n",
798
  " <td>6.7</td>\n",
799
  " <td>99.0</td>\n",
800
  " <td>0.0</td>\n",
 
810
  "</div>"
811
  ],
812
  "text/plain": [
813
+ " timestamp datetime date hour temperature_2m \\\n",
814
+ "0 1640995200000 2022-01-01 00:00:00 2022-01-01 0 6.7 \n",
815
+ "1 1640998800000 2022-01-01 01:00:00 2022-01-01 1 6.6 \n",
816
+ "2 1641002400000 2022-01-01 02:00:00 2022-01-01 2 6.7 \n",
817
+ "3 1641006000000 2022-01-01 03:00:00 2022-01-01 3 6.7 \n",
818
+ "4 1641009600000 2022-01-01 04:00:00 2022-01-01 4 6.7 \n",
819
  "\n",
820
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
821
  "0 100.0 0.0 0.0 0.0 3.0 \n",
 
869
  " <tr style=\"text-align: right;\">\n",
870
  " <th></th>\n",
871
  " <th>timestamp</th>\n",
872
+ " <th>datetime</th>\n",
873
  " <th>date</th>\n",
874
+ " <th>hour</th>\n",
875
  " <th>temperature_2m</th>\n",
876
  " <th>relative_humidity_2m</th>\n",
877
  " <th>precipitation</th>\n",
 
885
  " </thead>\n",
886
  " <tbody>\n",
887
  " <tr>\n",
888
+ " <th>20419</th>\n",
889
+ " <td>1714503600000</td>\n",
890
+ " <td>2024-04-30 19:00:00</td>\n",
891
+ " <td>2024-04-30</td>\n",
892
+ " <td>19</td>\n",
893
+ " <td>13.8</td>\n",
894
+ " <td>64.0</td>\n",
895
  " <td>0.0</td>\n",
896
  " <td>0.0</td>\n",
897
  " <td>0.0</td>\n",
898
  " <td>0.0</td>\n",
899
+ " <td>6.0</td>\n",
900
+ " <td>15.3</td>\n",
901
+ " <td>26.3</td>\n",
902
  " </tr>\n",
903
  " <tr>\n",
904
+ " <th>20420</th>\n",
905
+ " <td>1714507200000</td>\n",
906
+ " <td>2024-04-30 20:00:00</td>\n",
907
+ " <td>2024-04-30</td>\n",
908
+ " <td>20</td>\n",
909
+ " <td>13.5</td>\n",
910
+ " <td>66.0</td>\n",
911
  " <td>0.0</td>\n",
912
  " <td>0.0</td>\n",
913
  " <td>0.0</td>\n",
914
  " <td>0.0</td>\n",
915
+ " <td>2.0</td>\n",
916
+ " <td>18.7</td>\n",
917
+ " <td>32.8</td>\n",
918
  " </tr>\n",
919
  " <tr>\n",
920
+ " <th>20421</th>\n",
921
+ " <td>1714510800000</td>\n",
922
+ " <td>2024-04-30 21:00:00</td>\n",
923
+ " <td>2024-04-30</td>\n",
924
+ " <td>21</td>\n",
925
+ " <td>13.4</td>\n",
926
+ " <td>67.0</td>\n",
927
  " <td>0.0</td>\n",
928
  " <td>0.0</td>\n",
929
  " <td>0.0</td>\n",
930
  " <td>0.0</td>\n",
931
+ " <td>13.0</td>\n",
932
+ " <td>21.1</td>\n",
933
+ " <td>38.2</td>\n",
934
  " </tr>\n",
935
  " <tr>\n",
936
+ " <th>20422</th>\n",
937
+ " <td>1714514400000</td>\n",
938
+ " <td>2024-04-30 22:00:00</td>\n",
939
+ " <td>2024-04-30</td>\n",
940
+ " <td>22</td>\n",
941
+ " <td>12.8</td>\n",
942
+ " <td>67.0</td>\n",
943
  " <td>0.0</td>\n",
944
  " <td>0.0</td>\n",
945
  " <td>0.0</td>\n",
946
  " <td>0.0</td>\n",
947
+ " <td>9.0</td>\n",
948
+ " <td>21.0</td>\n",
949
+ " <td>38.5</td>\n",
950
  " </tr>\n",
951
  " <tr>\n",
952
+ " <th>20423</th>\n",
953
+ " <td>1714518000000</td>\n",
954
+ " <td>2024-04-30 23:00:00</td>\n",
955
+ " <td>2024-04-30</td>\n",
956
+ " <td>23</td>\n",
957
+ " <td>12.0</td>\n",
958
+ " <td>70.0</td>\n",
959
  " <td>0.0</td>\n",
960
  " <td>0.0</td>\n",
961
  " <td>0.0</td>\n",
962
  " <td>0.0</td>\n",
963
+ " <td>18.0</td>\n",
964
+ " <td>20.7</td>\n",
965
+ " <td>38.5</td>\n",
966
  " </tr>\n",
967
  " </tbody>\n",
968
  "</table>\n",
969
  "</div>"
970
  ],
971
  "text/plain": [
972
+ " timestamp datetime date hour temperature_2m \\\n",
973
+ "20419 1714503600000 2024-04-30 19:00:00 2024-04-30 19 13.8 \n",
974
+ "20420 1714507200000 2024-04-30 20:00:00 2024-04-30 20 13.5 \n",
975
+ "20421 1714510800000 2024-04-30 21:00:00 2024-04-30 21 13.4 \n",
976
+ "20422 1714514400000 2024-04-30 22:00:00 2024-04-30 22 12.8 \n",
977
+ "20423 1714518000000 2024-04-30 23:00:00 2024-04-30 23 12.0 \n",
978
  "\n",
979
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
980
+ "20419 64.0 0.0 0.0 0.0 0.0 \n",
981
+ "20420 66.0 0.0 0.0 0.0 0.0 \n",
982
+ "20421 67.0 0.0 0.0 0.0 0.0 \n",
983
+ "20422 67.0 0.0 0.0 0.0 0.0 \n",
984
+ "20423 70.0 0.0 0.0 0.0 0.0 \n",
985
  "\n",
986
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
987
+ "20419 6.0 15.3 26.3 \n",
988
+ "20420 2.0 18.7 32.8 \n",
989
+ "20421 13.0 21.1 38.2 \n",
990
+ "20422 9.0 21.0 38.5 \n",
991
+ "20423 18.0 20.7 38.5 "
992
  ]
993
  },
994
  "execution_count": 14,
 
1011
  "output_type": "stream",
1012
  "text": [
1013
  "<class 'pandas.core.frame.DataFrame'>\n",
1014
+ "Int64Index: 20424 entries, 0 to 20423\n",
1015
+ "Data columns (total 13 columns):\n",
1016
  " # Column Non-Null Count Dtype \n",
1017
  "--- ------ -------------- ----- \n",
1018
+ " 0 timestamp 20424 non-null int64 \n",
1019
+ " 1 datetime 20424 non-null datetime64[ns]\n",
1020
+ " 2 date 20424 non-null object \n",
1021
+ " 3 hour 20424 non-null int64 \n",
1022
+ " 4 temperature_2m 20424 non-null float64 \n",
1023
+ " 5 relative_humidity_2m 20424 non-null float64 \n",
1024
+ " 6 precipitation 20424 non-null float64 \n",
1025
+ " 7 rain 20424 non-null float64 \n",
1026
+ " 8 snowfall 20424 non-null float64 \n",
1027
+ " 9 weather_code 20424 non-null float64 \n",
1028
+ " 10 cloud_cover 20424 non-null float64 \n",
1029
+ " 11 wind_speed_10m 20424 non-null float64 \n",
1030
+ " 12 wind_gusts_10m 20424 non-null float64 \n",
1031
+ "dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
1032
+ "memory usage: 2.2+ MB\n"
1033
  ]
1034
  }
1035
  ],
 
1085
  " <tr style=\"text-align: right;\">\n",
1086
  " <th></th>\n",
1087
  " <th>timestamp</th>\n",
1088
+ " <th>datetime</th>\n",
1089
  " <th>date</th>\n",
1090
+ " <th>hour</th>\n",
1091
  " <th>temperature_2m</th>\n",
1092
  " <th>relative_humidity_2m</th>\n",
1093
  " <th>precipitation</th>\n",
 
1102
  " <tbody>\n",
1103
  " <tr>\n",
1104
  " <th>0</th>\n",
1105
+ " <td>1714608000000</td>\n",
1106
+ " <td>2024-05-02 00:00:00</td>\n",
1107
+ " <td>2024-05-02</td>\n",
1108
+ " <td>0</td>\n",
1109
+ " <td>14.9</td>\n",
1110
+ " <td>66.0</td>\n",
1111
  " <td>0.0</td>\n",
1112
  " <td>0.0</td>\n",
1113
  " <td>0.0</td>\n",
1114
+ " <td>0.0</td>\n",
1115
+ " <td>13.0</td>\n",
1116
+ " <td>21.6</td>\n",
1117
+ " <td>41.4</td>\n",
1118
  " </tr>\n",
1119
  " <tr>\n",
1120
  " <th>1</th>\n",
1121
+ " <td>1714611600000</td>\n",
1122
+ " <td>2024-05-02 01:00:00</td>\n",
1123
+ " <td>2024-05-02</td>\n",
1124
+ " <td>1</td>\n",
1125
+ " <td>14.2</td>\n",
1126
+ " <td>71.0</td>\n",
1127
  " <td>0.0</td>\n",
1128
  " <td>0.0</td>\n",
1129
  " <td>0.0</td>\n",
1130
+ " <td>0.0</td>\n",
1131
+ " <td>4.0</td>\n",
1132
+ " <td>20.5</td>\n",
1133
+ " <td>37.1</td>\n",
1134
  " </tr>\n",
1135
  " <tr>\n",
1136
  " <th>2</th>\n",
1137
+ " <td>1714615200000</td>\n",
1138
+ " <td>2024-05-02 02:00:00</td>\n",
1139
+ " <td>2024-05-02</td>\n",
1140
+ " <td>2</td>\n",
1141
+ " <td>13.4</td>\n",
1142
+ " <td>73.0</td>\n",
1143
  " <td>0.0</td>\n",
1144
  " <td>0.0</td>\n",
1145
  " <td>0.0</td>\n",
1146
+ " <td>2.0</td>\n",
1147
+ " <td>70.0</td>\n",
1148
+ " <td>21.2</td>\n",
1149
+ " <td>36.7</td>\n",
1150
  " </tr>\n",
1151
  " <tr>\n",
1152
  " <th>3</th>\n",
1153
+ " <td>1714618800000</td>\n",
1154
+ " <td>2024-05-02 03:00:00</td>\n",
1155
+ " <td>2024-05-02</td>\n",
 
 
 
 
 
1156
  " <td>3</td>\n",
1157
+ " <td>13.2</td>\n",
1158
+ " <td>72.0</td>\n",
1159
+ " <td>0.1</td>\n",
1160
+ " <td>0.1</td>\n",
1161
+ " <td>0.0</td>\n",
1162
+ " <td>51.0</td>\n",
1163
+ " <td>51.0</td>\n",
1164
+ " <td>22.3</td>\n",
1165
+ " <td>39.2</td>\n",
1166
  " </tr>\n",
1167
  " <tr>\n",
1168
  " <th>4</th>\n",
1169
+ " <td>1714622400000</td>\n",
1170
+ " <td>2024-05-02 04:00:00</td>\n",
1171
+ " <td>2024-05-02</td>\n",
1172
+ " <td>4</td>\n",
1173
+ " <td>12.7</td>\n",
1174
+ " <td>73.0</td>\n",
1175
  " <td>0.0</td>\n",
1176
  " <td>0.0</td>\n",
1177
  " <td>0.0</td>\n",
1178
+ " <td>2.0</td>\n",
1179
+ " <td>78.0</td>\n",
1180
+ " <td>21.6</td>\n",
1181
+ " <td>38.9</td>\n",
1182
  " </tr>\n",
1183
  " </tbody>\n",
1184
  "</table>\n",
1185
  "</div>"
1186
  ],
1187
  "text/plain": [
1188
+ " timestamp datetime date hour temperature_2m \\\n",
1189
+ "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
1190
+ "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
1191
+ "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
1192
+ "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
1193
+ "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
1194
  "\n",
1195
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1196
+ "0 66.0 0.0 0.0 0.0 0.0 \n",
1197
+ "1 71.0 0.0 0.0 0.0 0.0 \n",
1198
+ "2 73.0 0.0 0.0 0.0 2.0 \n",
1199
+ "3 72.0 0.1 0.1 0.0 51.0 \n",
1200
+ "4 73.0 0.0 0.0 0.0 2.0 \n",
1201
  "\n",
1202
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1203
+ "0 13.0 21.6 41.4 \n",
1204
+ "1 4.0 20.5 37.1 \n",
1205
+ "2 70.0 21.2 36.7 \n",
1206
+ "3 51.0 22.3 39.2 \n",
1207
+ "4 78.0 21.6 38.9 "
1208
  ]
1209
  },
1210
  "execution_count": 17,
 
1244
  " <tr style=\"text-align: right;\">\n",
1245
  " <th></th>\n",
1246
  " <th>timestamp</th>\n",
1247
+ " <th>datetime</th>\n",
1248
  " <th>date</th>\n",
1249
+ " <th>hour</th>\n",
1250
  " <th>temperature_2m</th>\n",
1251
  " <th>relative_humidity_2m</th>\n",
1252
  " <th>precipitation</th>\n",
 
1261
  " <tbody>\n",
1262
  " <tr>\n",
1263
  " <th>115</th>\n",
1264
+ " <td>1715022000000</td>\n",
1265
+ " <td>2024-05-06 19:00:00</td>\n",
1266
+ " <td>2024-05-06</td>\n",
1267
+ " <td>19</td>\n",
1268
+ " <td>10.7</td>\n",
1269
+ " <td>91.0</td>\n",
1270
+ " <td>1.4</td>\n",
1271
+ " <td>1.4</td>\n",
1272
+ " <td>0.0</td>\n",
1273
+ " <td>61.0</td>\n",
1274
+ " <td>100.0</td>\n",
1275
+ " <td>16.6</td>\n",
1276
+ " <td>32.0</td>\n",
1277
  " </tr>\n",
1278
  " <tr>\n",
1279
  " <th>116</th>\n",
1280
+ " <td>1715025600000</td>\n",
1281
+ " <td>2024-05-06 20:00:00</td>\n",
1282
+ " <td>2024-05-06</td>\n",
1283
+ " <td>20</td>\n",
 
 
 
 
 
 
1284
  " <td>10.1</td>\n",
1285
+ " <td>90.0</td>\n",
1286
+ " <td>1.4</td>\n",
1287
+ " <td>1.4</td>\n",
1288
+ " <td>0.0</td>\n",
1289
+ " <td>61.0</td>\n",
1290
+ " <td>100.0</td>\n",
1291
+ " <td>19.5</td>\n",
1292
+ " <td>37.1</td>\n",
1293
  " </tr>\n",
1294
  " <tr>\n",
1295
  " <th>117</th>\n",
1296
+ " <td>1715029200000</td>\n",
1297
+ " <td>2024-05-06 21:00:00</td>\n",
1298
+ " <td>2024-05-06</td>\n",
1299
+ " <td>21</td>\n",
 
 
 
 
 
 
1300
  " <td>9.5</td>\n",
1301
+ " <td>88.0</td>\n",
1302
+ " <td>1.4</td>\n",
1303
+ " <td>1.4</td>\n",
1304
+ " <td>0.0</td>\n",
1305
+ " <td>61.0</td>\n",
1306
+ " <td>100.0</td>\n",
1307
+ " <td>21.6</td>\n",
1308
+ " <td>42.1</td>\n",
1309
  " </tr>\n",
1310
  " <tr>\n",
1311
  " <th>118</th>\n",
1312
+ " <td>1715032800000</td>\n",
1313
+ " <td>2024-05-06 22:00:00</td>\n",
1314
+ " <td>2024-05-06</td>\n",
1315
+ " <td>22</td>\n",
1316
+ " <td>9.3</td>\n",
1317
+ " <td>86.0</td>\n",
1318
+ " <td>0.6</td>\n",
1319
+ " <td>0.6</td>\n",
1320
  " <td>0.0</td>\n",
1321
+ " <td>3.0</td>\n",
1322
+ " <td>100.0</td>\n",
1323
+ " <td>22.0</td>\n",
1324
+ " <td>41.0</td>\n",
1325
  " </tr>\n",
1326
  " <tr>\n",
1327
  " <th>119</th>\n",
1328
+ " <td>1715036400000</td>\n",
1329
+ " <td>2024-05-06 23:00:00</td>\n",
1330
+ " <td>2024-05-06</td>\n",
1331
+ " <td>23</td>\n",
1332
+ " <td>9.1</td>\n",
1333
+ " <td>84.0</td>\n",
1334
+ " <td>0.6</td>\n",
1335
+ " <td>0.6</td>\n",
1336
  " <td>0.0</td>\n",
1337
+ " <td>3.0</td>\n",
1338
+ " <td>100.0</td>\n",
1339
+ " <td>21.3</td>\n",
1340
+ " <td>40.3</td>\n",
 
 
1341
  " </tr>\n",
1342
  " </tbody>\n",
1343
  "</table>\n",
1344
  "</div>"
1345
  ],
1346
  "text/plain": [
1347
+ " timestamp datetime date hour temperature_2m \\\n",
1348
+ "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
1349
+ "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
1350
+ "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
1351
+ "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
1352
+ "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
1353
  "\n",
1354
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1355
+ "115 91.0 1.4 1.4 0.0 61.0 \n",
1356
+ "116 90.0 1.4 1.4 0.0 61.0 \n",
1357
+ "117 88.0 1.4 1.4 0.0 61.0 \n",
1358
+ "118 86.0 0.6 0.6 0.0 3.0 \n",
1359
+ "119 84.0 0.6 0.6 0.0 3.0 \n",
1360
  "\n",
1361
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1362
+ "115 100.0 16.6 32.0 \n",
1363
+ "116 100.0 19.5 37.1 \n",
1364
+ "117 100.0 21.6 42.1 \n",
1365
+ "118 100.0 22.0 41.0 \n",
1366
+ "119 100.0 21.3 40.3 "
1367
  ]
1368
  },
1369
  "execution_count": 18,
 
1387
  "text": [
1388
  "<class 'pandas.core.frame.DataFrame'>\n",
1389
  "RangeIndex: 120 entries, 0 to 119\n",
1390
+ "Data columns (total 13 columns):\n",
1391
  " # Column Non-Null Count Dtype \n",
1392
  "--- ------ -------------- ----- \n",
1393
  " 0 timestamp 120 non-null int64 \n",
1394
+ " 1 datetime 120 non-null datetime64[ns]\n",
1395
+ " 2 date 120 non-null object \n",
1396
+ " 3 hour 120 non-null int64 \n",
1397
+ " 4 temperature_2m 120 non-null float64 \n",
1398
+ " 5 relative_humidity_2m 120 non-null float64 \n",
1399
+ " 6 precipitation 120 non-null float64 \n",
1400
+ " 7 rain 120 non-null float64 \n",
1401
+ " 8 snowfall 120 non-null float64 \n",
1402
+ " 9 weather_code 120 non-null float64 \n",
1403
+ " 10 cloud_cover 120 non-null float64 \n",
1404
+ " 11 wind_speed_10m 120 non-null float64 \n",
1405
+ " 12 wind_gusts_10m 120 non-null float64 \n",
1406
+ "dtypes: datetime64[ns](1), float64(9), int64(2), object(1)\n",
1407
+ "memory usage: 12.3+ KB\n"
1408
  ]
1409
  }
1410
  ],
 
1425
  "cell_type": "code",
1426
  "execution_count": 20,
1427
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1428
  "outputs": [],
1429
  "source": [
1430
+ "calender_df = calendar.get_calendar()"
 
1431
  ]
1432
  },
1433
  {
1434
  "cell_type": "code",
1435
+ "execution_count": 21,
1436
  "metadata": {},
1437
  "outputs": [
1438
  {
 
1457
  " <tr style=\"text-align: right;\">\n",
1458
  " <th></th>\n",
1459
  " <th>date</th>\n",
1460
+ " <th>day</th>\n",
1461
+ " <th>month</th>\n",
1462
+ " <th>holiday</th>\n",
1463
  " </tr>\n",
1464
  " </thead>\n",
1465
  " <tbody>\n",
1466
  " <tr>\n",
1467
  " <th>0</th>\n",
1468
  " <td>2022-01-01</td>\n",
1469
+ " <td>5</td>\n",
1470
+ " <td>1</td>\n",
1471
+ " <td>1</td>\n",
1472
  " </tr>\n",
1473
  " <tr>\n",
1474
  " <th>1</th>\n",
1475
  " <td>2022-01-02</td>\n",
1476
+ " <td>6</td>\n",
1477
+ " <td>1</td>\n",
1478
+ " <td>1</td>\n",
1479
  " </tr>\n",
1480
  " <tr>\n",
1481
  " <th>2</th>\n",
1482
  " <td>2022-01-03</td>\n",
1483
+ " <td>0</td>\n",
1484
+ " <td>1</td>\n",
1485
+ " <td>0</td>\n",
1486
  " </tr>\n",
1487
  " <tr>\n",
1488
  " <th>3</th>\n",
1489
  " <td>2022-01-04</td>\n",
1490
+ " <td>1</td>\n",
1491
+ " <td>1</td>\n",
1492
+ " <td>0</td>\n",
1493
  " </tr>\n",
1494
  " <tr>\n",
1495
  " <th>4</th>\n",
1496
  " <td>2022-01-05</td>\n",
1497
+ " <td>2</td>\n",
1498
+ " <td>1</td>\n",
1499
+ " <td>0</td>\n",
1500
  " </tr>\n",
1501
  " </tbody>\n",
1502
  "</table>\n",
1503
  "</div>"
1504
  ],
1505
  "text/plain": [
1506
+ " date day month holiday\n",
1507
+ "0 2022-01-01 5 1 1\n",
1508
+ "1 2022-01-02 6 1 1\n",
1509
+ "2 2022-01-03 0 1 0\n",
1510
+ "3 2022-01-04 1 1 0\n",
1511
+ "4 2022-01-05 2 1 0"
1512
  ]
1513
  },
1514
+ "execution_count": 21,
1515
  "metadata": {},
1516
  "output_type": "execute_result"
1517
  }
 
1523
  },
1524
  {
1525
  "cell_type": "code",
1526
+ "execution_count": 22,
1527
  "metadata": {},
1528
  "outputs": [
1529
  {
 
1548
  " <tr style=\"text-align: right;\">\n",
1549
  " <th></th>\n",
1550
  " <th>date</th>\n",
1551
+ " <th>day</th>\n",
1552
+ " <th>month</th>\n",
1553
+ " <th>holiday</th>\n",
1554
  " </tr>\n",
1555
  " </thead>\n",
1556
  " <tbody>\n",
1557
  " <tr>\n",
1558
  " <th>1091</th>\n",
1559
  " <td>2024-12-27</td>\n",
1560
+ " <td>4</td>\n",
1561
+ " <td>12</td>\n",
1562
+ " <td>0</td>\n",
1563
  " </tr>\n",
1564
  " <tr>\n",
1565
  " <th>1092</th>\n",
1566
  " <td>2024-12-28</td>\n",
1567
+ " <td>5</td>\n",
1568
+ " <td>12</td>\n",
1569
+ " <td>1</td>\n",
1570
  " </tr>\n",
1571
  " <tr>\n",
1572
  " <th>1093</th>\n",
1573
  " <td>2024-12-29</td>\n",
1574
+ " <td>6</td>\n",
1575
+ " <td>12</td>\n",
1576
+ " <td>1</td>\n",
1577
  " </tr>\n",
1578
  " <tr>\n",
1579
  " <th>1094</th>\n",
1580
  " <td>2024-12-30</td>\n",
1581
+ " <td>0</td>\n",
1582
+ " <td>12</td>\n",
1583
+ " <td>0</td>\n",
1584
  " </tr>\n",
1585
  " <tr>\n",
1586
  " <th>1095</th>\n",
1587
  " <td>2024-12-31</td>\n",
1588
+ " <td>1</td>\n",
1589
+ " <td>12</td>\n",
1590
+ " <td>0</td>\n",
1591
  " </tr>\n",
1592
  " </tbody>\n",
1593
  "</table>\n",
1594
  "</div>"
1595
  ],
1596
  "text/plain": [
1597
+ " date day month holiday\n",
1598
+ "1091 2024-12-27 4 12 0\n",
1599
+ "1092 2024-12-28 5 12 1\n",
1600
+ "1093 2024-12-29 6 12 1\n",
1601
+ "1094 2024-12-30 0 12 0\n",
1602
+ "1095 2024-12-31 1 12 0"
1603
  ]
1604
  },
1605
+ "execution_count": 22,
1606
  "metadata": {},
1607
  "output_type": "execute_result"
1608
  }
 
1612
  "calender_df.tail(5)"
1613
  ]
1614
  },
1615
+ {
1616
+ "cell_type": "code",
1617
+ "execution_count": 23,
1618
+ "metadata": {},
1619
+ "outputs": [
1620
+ {
1621
+ "name": "stdout",
1622
+ "output_type": "stream",
1623
+ "text": [
1624
+ "<class 'pandas.core.frame.DataFrame'>\n",
1625
+ "RangeIndex: 1096 entries, 0 to 1095\n",
1626
+ "Data columns (total 4 columns):\n",
1627
+ " # Column Non-Null Count Dtype \n",
1628
+ "--- ------ -------------- ----- \n",
1629
+ " 0 date 1096 non-null object\n",
1630
+ " 1 day 1096 non-null int64 \n",
1631
+ " 2 month 1096 non-null int64 \n",
1632
+ " 3 holiday 1096 non-null int64 \n",
1633
+ "dtypes: int64(3), object(1)\n",
1634
+ "memory usage: 34.4+ KB\n"
1635
+ ]
1636
+ }
1637
+ ],
1638
+ "source": [
1639
+ "# Showing the information for the calender dataframe\n",
1640
+ "calender_df.info()"
1641
+ ]
1642
+ },
1643
  {
1644
  "cell_type": "code",
1645
  "execution_count": 24,
 
1651
  "text": [
1652
  "<class 'pandas.core.frame.DataFrame'>\n",
1653
  "RangeIndex: 1096 entries, 0 to 1095\n",
1654
+ "Data columns (total 4 columns):\n",
1655
+ " # Column Non-Null Count Dtype \n",
1656
+ "--- ------ -------------- ----- \n",
1657
+ " 0 date 1096 non-null object\n",
1658
+ " 1 day 1096 non-null int64 \n",
1659
+ " 2 month 1096 non-null int64 \n",
1660
+ " 3 holiday 1096 non-null int64 \n",
1661
+ "dtypes: int64(3), object(1)\n",
1662
+ "memory usage: 34.4+ KB\n"
1663
  ]
1664
  }
1665
  ],
 
1711
  "### <span style=\"color:#2656a3;\"> 🪄 Creating Feature Groups\n",
1712
  "A feature group can be seen as a collection of conceptually related features. In this case we create feature groups for the \n",
1713
  "- eletricity price data,\n",
1714
+ "- forecast_renewable_energy,\n",
1715
  "- weather data,\n",
1716
+ "- calender data.\n",
 
1717
  "\n",
1718
  "We specify a `primary_key` as `date`, so we are able to join them when we create a dataset for training later in part 03 the training_pipeline.\n",
1719
  "We define a name and a short describtion of the feature group's contents and a version number. \n",
 
1729
  "metadata": {},
1730
  "outputs": [],
1731
  "source": [
1732
+ "# Creating the feature group for the electricity prices\n",
1733
+ "electricity_fg = fs.get_or_create_feature_group(\n",
1734
+ " name=\"electricity_prices\",\n",
1735
  " version=1,\n",
1736
+ " description=\"Electricity prices from Energidata API\",\n",
1737
+ " primary_key=[\"date\",\"timestamp\"], \n",
 
1738
  " online_enabled=True,\n",
1739
+ " event_time=\"timestamp\",\n",
1740
  ")"
1741
  ]
1742
  },
 
1757
  "output_type": "stream",
1758
  "text": [
1759
  "Feature Group created successfully, explore it at \n",
1760
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/774496\n"
1761
  ]
1762
  },
1763
  {
1764
  "data": {
1765
  "application/vnd.jupyter.widget-view+json": {
1766
+ "model_id": "a746b736b329402299dc55c4ebd07d53",
1767
  "version_major": 2,
1768
  "version_minor": 0
1769
  },
1770
  "text/plain": [
1771
+ "Uploading Dataframe: 0.00% | | Rows 0/20445 | Elapsed Time: 00:00 | Remaining Time: ?"
1772
  ]
1773
  },
1774
  "metadata": {},
 
1778
  "name": "stdout",
1779
  "output_type": "stream",
1780
  "text": [
1781
+ "Launching job: electricity_prices_1_offline_fg_materialization\n",
1782
  "Job started successfully, you can follow the progress at \n",
1783
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/electricity_prices_1_offline_fg_materialization/executions\n"
1784
  ]
1785
  },
1786
  {
1787
  "data": {
1788
  "text/plain": [
1789
+ "(<hsfs.core.job.Job at 0x16ff23950>, None)"
1790
  ]
1791
  },
1792
  "execution_count": 27,
 
1795
  }
1796
  ],
1797
  "source": [
1798
+ "# Inserting the electricity_df into the feature group named electricity_fg\n",
1799
+ "electricity_fg.insert(electricity_df)"
1800
  ]
1801
  },
1802
  {
 
1812
  "metadata": {},
1813
  "outputs": [],
1814
  "source": [
1815
+ "# List of descriptions for electricity features\n",
1816
+ "electricity_feature_descriptions = [\n",
1817
  " {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1818
+ " {\"name\": \"date\", \"description\": \"Date of the electricity measurement\"},\n",
1819
+ " {\"name\": \"datetime\", \"description\": \"Date and time of the electricity measurement\"},\n",
1820
+ " {\"name\": \"hour\", \"description\": \"Hour of day\"},\n",
1821
+ " {\"name\": \"dk1_spotpricedkk_kwh\", \"description\": \"Spot price in DKK per KWH\"}, \n",
 
 
 
 
 
 
 
1822
  "]\n",
1823
  "\n",
1824
  "# Updating feature descriptions\n",
1825
+ "for desc in electricity_feature_descriptions: \n",
1826
+ " electricity_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1827
  ]
1828
  },
1829
  {
1830
  "cell_type": "markdown",
1831
  "metadata": {},
1832
  "source": [
1833
+ "We replicate the process for both the `forecast_renewable_energy_fg`, `weather_fg` and `danish_holidays_fg` by establishing feature groups and inserting the dataframes into their respective feature groups."
1834
  ]
1835
  },
1836
  {
 
1839
  "metadata": {},
1840
  "outputs": [],
1841
  "source": [
1842
+ "# # Creating the feature group for the electricity prices\n",
1843
+ "# forecast_renewable_energy_fg = fs.get_or_create_feature_group(\n",
1844
+ "# name=\"forecast_renewable_energy\",\n",
1845
+ "# version=1,\n",
1846
+ "# description=\"Forecast on Renewable Energy on ForecastType from Energidata API\",\n",
1847
+ "# primary_key=[\"date\",\"timestamp\"], \n",
1848
+ "# online_enabled=True,\n",
1849
+ "# event_time=\"timestamp\",\n",
1850
+ "# )"
1851
  ]
1852
  },
1853
  {
1854
  "cell_type": "code",
1855
  "execution_count": 30,
1856
  "metadata": {},
1857
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1858
  "source": [
1859
+ "# # Inserting the electricity_df into the feature group named electricity_fg\n",
1860
+ "# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df)"
1861
  ]
1862
  },
1863
  {
 
1866
  "metadata": {},
1867
  "outputs": [],
1868
  "source": [
1869
+ "# # List of descriptions for forecast_renewable_energy features\n",
1870
+ "# forecast_renewable_energy_feature_descriptions = [\n",
1871
+ "# {\"name\": \"timestamp\", \"description\": \"Timestamp for the event_time\"},\n",
1872
+ "# {\"name\": \"date\", \"description\": \"Date of the forecast\"},\n",
1873
+ "# {\"name\": \"datetime\", \"description\": \"Date and time for the forecast\"},\n",
1874
+ "# {\"name\": \"hour\", \"description\": \"Hour of day\"},\n",
1875
+ "# {\"name\": \"dk1_offshore_wind_forecastintraday_kwh\", \"description\": \"The forecast for the coming day at 6am Danish time zone\"},\n",
1876
+ "# ]\n",
1877
  "\n",
1878
+ "# # Updating feature descriptions\n",
1879
+ "# for desc in forecast_renewable_energy_feature_descriptions: \n",
1880
+ "# forecast_renewable_energy_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1881
  ]
1882
  },
1883
  {
 
1886
  "metadata": {},
1887
  "outputs": [],
1888
  "source": [
1889
+ "# Creating the feature group for the weather data\n",
1890
+ "weather_fg = fs.get_or_create_feature_group(\n",
1891
+ " name=\"weather_measurements\",\n",
1892
  " version=1,\n",
1893
+ " description=\"Weather measurements from Open Meteo API\",\n",
1894
+ " primary_key=[\"date\", \"timestamp\"], \n",
 
1895
  " event_time=\"timestamp\",\n",
1896
+ " online_enabled=True,\n",
1897
  ")"
1898
  ]
1899
  },
 
1907
  "output_type": "stream",
1908
  "text": [
1909
  "Feature Group created successfully, explore it at \n",
1910
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/775512\n"
1911
  ]
1912
  },
1913
  {
1914
  "data": {
1915
  "application/vnd.jupyter.widget-view+json": {
1916
+ "model_id": "a172ef7a47d14b8ab9643d864c8189d1",
1917
  "version_major": 2,
1918
  "version_minor": 0
1919
  },
1920
  "text/plain": [
1921
+ "Uploading Dataframe: 0.00% | | Rows 0/20424 | Elapsed Time: 00:00 | Remaining Time: ?"
1922
  ]
1923
  },
1924
  "metadata": {},
 
1928
  "name": "stdout",
1929
  "output_type": "stream",
1930
  "text": [
1931
+ "Launching job: weather_measurements_1_offline_fg_materialization\n",
1932
  "Job started successfully, you can follow the progress at \n",
1933
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/weather_measurements_1_offline_fg_materialization/executions\n"
1934
  ]
1935
  },
1936
  {
1937
  "data": {
1938
  "text/plain": [
1939
+ "(<hsfs.core.job.Job at 0x16ffda610>, None)"
1940
  ]
1941
  },
1942
  "execution_count": 33,
 
1945
  }
1946
  ],
1947
  "source": [
1948
+ "# Inserting the weather_df into the feature group named weather_fg\n",
1949
+ "weather_fg.insert(historical_weather_df)"
1950
  ]
1951
  },
1952
  {
 
1955
  "metadata": {},
1956
  "outputs": [],
1957
  "source": [
1958
+ "# List of descriptions for weather features\n",
1959
+ "weather_feature_descriptions = [\n",
1960
+ " {\"name\": \"timestamp\", \"description\": \"Timestamp for the weather measurement\"},\n",
1961
+ " {\"name\": \"date\", \"description\": \"Date of the weather measurement\"},\n",
1962
+ " {\"name\": \"datetime\", \"description\": \"Date and time of the weather measurement\"},\n",
1963
+ " {\"name\": \"hour\", \"description\": \"Hour of day\"},\n",
1964
+ " {\"name\": \"temperature_2m\", \"description\": \"Temperature at 2m above ground\"},\n",
1965
+ " {\"name\": \"relative_humidity_2m\", \"description\": \"Relative humidity at 2m above ground\"},\n",
1966
+ " {\"name\": \"precipitation\", \"description\": \"Precipitation\"},\n",
1967
+ " {\"name\": \"rain\", \"description\": \"Rain\"},\n",
1968
+ " {\"name\": \"snowfall\", \"description\": \"Snowfall\"}, \n",
1969
+ " {\"name\": \"weather_code\", \"description\": \"Weather code\"}, \n",
1970
+ " {\"name\": \"cloud_cover\", \"description\": \"Cloud cover\"}, \n",
1971
+ " {\"name\": \"wind_speed_10m\", \"description\": \"Wind speed at 10m above ground\"}, \n",
1972
+ " {\"name\": \"wind_gusts_10m\", \"description\": \"Wind gusts at 10m above ground\"}, \n",
1973
  "]\n",
1974
  "\n",
1975
  "# Updating feature descriptions\n",
1976
+ "for desc in weather_feature_descriptions: \n",
1977
+ " weather_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
1978
  ]
1979
  },
1980
  {
1981
  "cell_type": "code",
1982
+ "execution_count": 47,
1983
  "metadata": {},
1984
  "outputs": [],
1985
  "source": [
1986
+ "# Creating the feature group for the danish calendar\n",
1987
+ "danish_calendar_fg = fs.get_or_create_feature_group(\n",
1988
+ " name=\"danish_calendar\",\n",
1989
+ " version=2,\n",
1990
+ " description=\"Danish calendar.\",\n",
1991
  " online_enabled=True,\n",
1992
  " primary_key=[\"date\"],\n",
1993
  ")"
 
1995
  },
1996
  {
1997
  "cell_type": "code",
1998
+ "execution_count": 48,
1999
  "metadata": {},
2000
  "outputs": [
2001
  {
 
2003
  "output_type": "stream",
2004
  "text": [
2005
  "Feature Group created successfully, explore it at \n",
2006
+ "https://c.app.hopsworks.ai:443/p/554133/fs/549956/fg/774498\n"
2007
  ]
2008
  },
2009
  {
2010
  "data": {
2011
  "application/vnd.jupyter.widget-view+json": {
2012
+ "model_id": "3b61548e0184416180285726772cd490",
2013
  "version_major": 2,
2014
  "version_minor": 0
2015
  },
 
2024
  "name": "stdout",
2025
  "output_type": "stream",
2026
  "text": [
2027
+ "Launching job: danish_calendar_2_offline_fg_materialization\n",
2028
  "Job started successfully, you can follow the progress at \n",
2029
+ "https://c.app.hopsworks.ai/p/554133/jobs/named/danish_calendar_2_offline_fg_materialization/executions\n"
2030
  ]
2031
  },
2032
  {
2033
  "data": {
2034
  "text/plain": [
2035
+ "(<hsfs.core.job.Job at 0x16ff71050>, None)"
2036
  ]
2037
  },
2038
+ "execution_count": 48,
2039
  "metadata": {},
2040
  "output_type": "execute_result"
2041
  }
2042
  ],
2043
  "source": [
2044
+ "# Inserting the calender_df into the feature group named danish_calendar_fg\n",
2045
+ "danish_calendar_fg.insert(calender_df)"
2046
  ]
2047
  },
2048
  {
2049
  "cell_type": "code",
2050
+ "execution_count": 49,
2051
  "metadata": {},
2052
  "outputs": [],
2053
  "source": [
2054
+ "# List of descriptions for danish_calendar features\n",
2055
+ "danish_calendar_feature_descriptions = [\n",
2056
  " {\"name\": \"date\", \"description\": \"Date in the calendar\"},\n",
2057
+ " {\"name\": \"day\", \"description\": \"Day number of the week. Monday is 0 and Sunday is 6\"},\n",
2058
+ " {\"name\": \"month\", \"description\": \"Month number of the year\"},\n",
2059
+ " {\"name\": \"holiday\", \"description\": \"Holiday or not holiday\"},\n",
2060
  "]\n",
2061
  "\n",
2062
  "# Updating feature descriptions\n",
2063
+ "for desc in danish_calendar_feature_descriptions: \n",
2064
+ " danish_calendar_fg.update_feature_description(desc[\"name\"], desc[\"description\"])"
2065
  ]
2066
  },
2067
  {
notebooks/2_feature_pipeline.ipynb CHANGED
@@ -45,7 +45,7 @@
45
  "\n",
46
  "# Now we import the functions from the features folder\n",
47
  "# This is the functions we have created to generate features for electricity prices and weather measures\n",
48
- "from features import electricity_prices, weather_measures \n",
49
  "\n",
50
  "# We go back into the notebooks folder\n",
51
  "%cd notebooks"
@@ -126,210 +126,235 @@
126
  " <tr style=\"text-align: right;\">\n",
127
  " <th></th>\n",
128
  " <th>timestamp</th>\n",
129
- " <th>time</th>\n",
130
  " <th>date</th>\n",
 
131
  " <th>dk1_spotpricedkk_kwh</th>\n",
132
  " </tr>\n",
133
  " </thead>\n",
134
  " <tbody>\n",
135
  " <tr>\n",
136
  " <th>0</th>\n",
137
- " <td>1714521600000</td>\n",
138
- " <td>2024-05-01 00:00:00</td>\n",
139
- " <td>2024-05-01</td>\n",
140
- " <td>0.36285</td>\n",
 
141
  " </tr>\n",
142
  " <tr>\n",
143
  " <th>1</th>\n",
144
- " <td>1714525200000</td>\n",
145
- " <td>2024-05-01 01:00:00</td>\n",
146
- " <td>2024-05-01</td>\n",
147
- " <td>0.36315</td>\n",
 
148
  " </tr>\n",
149
  " <tr>\n",
150
  " <th>2</th>\n",
151
- " <td>1714528800000</td>\n",
152
- " <td>2024-05-01 02:00:00</td>\n",
153
- " <td>2024-05-01</td>\n",
154
- " <td>0.36330</td>\n",
 
155
  " </tr>\n",
156
  " <tr>\n",
157
  " <th>3</th>\n",
158
- " <td>1714532400000</td>\n",
159
- " <td>2024-05-01 03:00:00</td>\n",
160
- " <td>2024-05-01</td>\n",
161
- " <td>0.32809</td>\n",
 
162
  " </tr>\n",
163
  " <tr>\n",
164
  " <th>4</th>\n",
165
- " <td>1714536000000</td>\n",
166
- " <td>2024-05-01 04:00:00</td>\n",
167
- " <td>2024-05-01</td>\n",
168
- " <td>0.35659</td>\n",
 
169
  " </tr>\n",
170
  " <tr>\n",
171
  " <th>5</th>\n",
172
- " <td>1714539600000</td>\n",
173
- " <td>2024-05-01 05:00:00</td>\n",
174
- " <td>2024-05-01</td>\n",
175
- " <td>0.33332</td>\n",
 
176
  " </tr>\n",
177
  " <tr>\n",
178
  " <th>6</th>\n",
179
- " <td>1714543200000</td>\n",
180
- " <td>2024-05-01 06:00:00</td>\n",
181
- " <td>2024-05-01</td>\n",
182
- " <td>0.31683</td>\n",
 
183
  " </tr>\n",
184
  " <tr>\n",
185
  " <th>7</th>\n",
186
- " <td>1714546800000</td>\n",
187
- " <td>2024-05-01 07:00:00</td>\n",
188
- " <td>2024-05-01</td>\n",
189
- " <td>0.20943</td>\n",
 
190
  " </tr>\n",
191
  " <tr>\n",
192
  " <th>8</th>\n",
193
- " <td>1714550400000</td>\n",
194
- " <td>2024-05-01 08:00:00</td>\n",
195
- " <td>2024-05-01</td>\n",
196
- " <td>0.06302</td>\n",
 
197
  " </tr>\n",
198
  " <tr>\n",
199
  " <th>9</th>\n",
200
- " <td>1714554000000</td>\n",
201
- " <td>2024-05-01 09:00:00</td>\n",
202
- " <td>2024-05-01</td>\n",
203
- " <td>0.00007</td>\n",
 
204
  " </tr>\n",
205
  " <tr>\n",
206
  " <th>10</th>\n",
207
- " <td>1714557600000</td>\n",
208
- " <td>2024-05-01 10:00:00</td>\n",
209
- " <td>2024-05-01</td>\n",
210
- " <td>-0.00723</td>\n",
 
211
  " </tr>\n",
212
  " <tr>\n",
213
  " <th>11</th>\n",
214
- " <td>1714561200000</td>\n",
215
- " <td>2024-05-01 11:00:00</td>\n",
216
- " <td>2024-05-01</td>\n",
217
- " <td>-0.01805</td>\n",
 
218
  " </tr>\n",
219
  " <tr>\n",
220
  " <th>12</th>\n",
221
- " <td>1714564800000</td>\n",
222
- " <td>2024-05-01 12:00:00</td>\n",
223
- " <td>2024-05-01</td>\n",
224
- " <td>-0.01827</td>\n",
 
225
  " </tr>\n",
226
  " <tr>\n",
227
  " <th>13</th>\n",
228
- " <td>1714568400000</td>\n",
229
- " <td>2024-05-01 13:00:00</td>\n",
230
- " <td>2024-05-01</td>\n",
231
- " <td>-0.01559</td>\n",
 
232
  " </tr>\n",
233
  " <tr>\n",
234
  " <th>14</th>\n",
235
- " <td>1714572000000</td>\n",
236
- " <td>2024-05-01 14:00:00</td>\n",
237
- " <td>2024-05-01</td>\n",
238
- " <td>-0.01536</td>\n",
 
239
  " </tr>\n",
240
  " <tr>\n",
241
  " <th>15</th>\n",
242
- " <td>1714575600000</td>\n",
243
- " <td>2024-05-01 15:00:00</td>\n",
244
- " <td>2024-05-01</td>\n",
245
- " <td>-0.01827</td>\n",
 
246
  " </tr>\n",
247
  " <tr>\n",
248
  " <th>16</th>\n",
249
- " <td>1714579200000</td>\n",
250
- " <td>2024-05-01 16:00:00</td>\n",
251
- " <td>2024-05-01</td>\n",
252
- " <td>-0.00731</td>\n",
 
253
  " </tr>\n",
254
  " <tr>\n",
255
  " <th>17</th>\n",
256
- " <td>1714582800000</td>\n",
257
- " <td>2024-05-01 17:00:00</td>\n",
258
- " <td>2024-05-01</td>\n",
259
- " <td>-0.00075</td>\n",
 
260
  " </tr>\n",
261
  " <tr>\n",
262
  " <th>18</th>\n",
263
- " <td>1714586400000</td>\n",
264
- " <td>2024-05-01 18:00:00</td>\n",
265
- " <td>2024-05-01</td>\n",
266
- " <td>0.11426</td>\n",
 
267
  " </tr>\n",
268
  " <tr>\n",
269
  " <th>19</th>\n",
270
- " <td>1714590000000</td>\n",
271
- " <td>2024-05-01 19:00:00</td>\n",
272
- " <td>2024-05-01</td>\n",
273
- " <td>0.37590</td>\n",
 
274
  " </tr>\n",
275
  " <tr>\n",
276
  " <th>20</th>\n",
277
- " <td>1714593600000</td>\n",
278
- " <td>2024-05-01 20:00:00</td>\n",
279
- " <td>2024-05-01</td>\n",
280
- " <td>0.37292</td>\n",
 
281
  " </tr>\n",
282
  " <tr>\n",
283
  " <th>21</th>\n",
284
- " <td>1714597200000</td>\n",
285
- " <td>2024-05-01 21:00:00</td>\n",
286
- " <td>2024-05-01</td>\n",
287
- " <td>0.25366</td>\n",
 
288
  " </tr>\n",
289
  " <tr>\n",
290
  " <th>22</th>\n",
291
- " <td>1714600800000</td>\n",
292
- " <td>2024-05-01 22:00:00</td>\n",
293
- " <td>2024-05-01</td>\n",
294
- " <td>0.22315</td>\n",
 
295
  " </tr>\n",
296
  " <tr>\n",
297
  " <th>23</th>\n",
298
- " <td>1714604400000</td>\n",
299
- " <td>2024-05-01 23:00:00</td>\n",
300
- " <td>2024-05-01</td>\n",
301
- " <td>0.16408</td>\n",
 
302
  " </tr>\n",
303
  " </tbody>\n",
304
  "</table>\n",
305
  "</div>"
306
  ],
307
  "text/plain": [
308
- " timestamp time date dk1_spotpricedkk_kwh\n",
309
- "0 1714521600000 2024-05-01 00:00:00 2024-05-01 0.36285\n",
310
- "1 1714525200000 2024-05-01 01:00:00 2024-05-01 0.36315\n",
311
- "2 1714528800000 2024-05-01 02:00:00 2024-05-01 0.36330\n",
312
- "3 1714532400000 2024-05-01 03:00:00 2024-05-01 0.32809\n",
313
- "4 1714536000000 2024-05-01 04:00:00 2024-05-01 0.35659\n",
314
- "5 1714539600000 2024-05-01 05:00:00 2024-05-01 0.33332\n",
315
- "6 1714543200000 2024-05-01 06:00:00 2024-05-01 0.31683\n",
316
- "7 1714546800000 2024-05-01 07:00:00 2024-05-01 0.20943\n",
317
- "8 1714550400000 2024-05-01 08:00:00 2024-05-01 0.06302\n",
318
- "9 1714554000000 2024-05-01 09:00:00 2024-05-01 0.00007\n",
319
- "10 1714557600000 2024-05-01 10:00:00 2024-05-01 -0.00723\n",
320
- "11 1714561200000 2024-05-01 11:00:00 2024-05-01 -0.01805\n",
321
- "12 1714564800000 2024-05-01 12:00:00 2024-05-01 -0.01827\n",
322
- "13 1714568400000 2024-05-01 13:00:00 2024-05-01 -0.01559\n",
323
- "14 1714572000000 2024-05-01 14:00:00 2024-05-01 -0.01536\n",
324
- "15 1714575600000 2024-05-01 15:00:00 2024-05-01 -0.01827\n",
325
- "16 1714579200000 2024-05-01 16:00:00 2024-05-01 -0.00731\n",
326
- "17 1714582800000 2024-05-01 17:00:00 2024-05-01 -0.00075\n",
327
- "18 1714586400000 2024-05-01 18:00:00 2024-05-01 0.11426\n",
328
- "19 1714590000000 2024-05-01 19:00:00 2024-05-01 0.37590\n",
329
- "20 1714593600000 2024-05-01 20:00:00 2024-05-01 0.37292\n",
330
- "21 1714597200000 2024-05-01 21:00:00 2024-05-01 0.25366\n",
331
- "22 1714600800000 2024-05-01 22:00:00 2024-05-01 0.22315\n",
332
- "23 1714604400000 2024-05-01 23:00:00 2024-05-01 0.16408"
333
  ]
334
  },
335
  "execution_count": 4,
@@ -355,356 +380,21 @@
355
  "metadata": {},
356
  "outputs": [],
357
  "source": [
358
- "# Fetching non-historical forecast of renewable energy data for area DK1\n",
359
- "forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
360
- " historical=False,\n",
361
- " area=[\"DK1\"]\n",
362
- ")"
363
  ]
364
  },
365
  {
366
  "cell_type": "code",
367
  "execution_count": 6,
368
  "metadata": {},
369
- "outputs": [
370
- {
371
- "data": {
372
- "text/html": [
373
- "<div>\n",
374
- "<style scoped>\n",
375
- " .dataframe tbody tr th:only-of-type {\n",
376
- " vertical-align: middle;\n",
377
- " }\n",
378
- "\n",
379
- " .dataframe tbody tr th {\n",
380
- " vertical-align: top;\n",
381
- " }\n",
382
- "\n",
383
- " .dataframe thead th {\n",
384
- " text-align: right;\n",
385
- " }\n",
386
- "</style>\n",
387
- "<table border=\"1\" class=\"dataframe\">\n",
388
- " <thead>\n",
389
- " <tr style=\"text-align: right;\">\n",
390
- " <th></th>\n",
391
- " <th>timestamp</th>\n",
392
- " <th>time</th>\n",
393
- " <th>date</th>\n",
394
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
395
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
396
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
397
- " </tr>\n",
398
- " </thead>\n",
399
- " <tbody>\n",
400
- " <tr>\n",
401
- " <th>0</th>\n",
402
- " <td>1714521600000</td>\n",
403
- " <td>2024-05-01 00:00:00</td>\n",
404
- " <td>2024-05-01</td>\n",
405
- " <td>0.794000</td>\n",
406
- " <td>1.710417</td>\n",
407
- " <td>0.000170</td>\n",
408
- " </tr>\n",
409
- " <tr>\n",
410
- " <th>1</th>\n",
411
- " <td>1714525200000</td>\n",
412
- " <td>2024-05-01 01:00:00</td>\n",
413
- " <td>2024-05-01</td>\n",
414
- " <td>0.759667</td>\n",
415
- " <td>1.738500</td>\n",
416
- " <td>0.000169</td>\n",
417
- " </tr>\n",
418
- " <tr>\n",
419
- " <th>2</th>\n",
420
- " <td>1714528800000</td>\n",
421
- " <td>2024-05-01 02:00:00</td>\n",
422
- " <td>2024-05-01</td>\n",
423
- " <td>0.688625</td>\n",
424
- " <td>1.527958</td>\n",
425
- " <td>0.000175</td>\n",
426
- " </tr>\n",
427
- " <tr>\n",
428
- " <th>3</th>\n",
429
- " <td>1714532400000</td>\n",
430
- " <td>2024-05-01 03:00:00</td>\n",
431
- " <td>2024-05-01</td>\n",
432
- " <td>0.577083</td>\n",
433
- " <td>1.347667</td>\n",
434
- " <td>0.000189</td>\n",
435
- " </tr>\n",
436
- " <tr>\n",
437
- " <th>4</th>\n",
438
- " <td>1714536000000</td>\n",
439
- " <td>2024-05-01 04:00:00</td>\n",
440
- " <td>2024-05-01</td>\n",
441
- " <td>0.605792</td>\n",
442
- " <td>1.227542</td>\n",
443
- " <td>0.000218</td>\n",
444
- " </tr>\n",
445
- " <tr>\n",
446
- " <th>5</th>\n",
447
- " <td>1714539600000</td>\n",
448
- " <td>2024-05-01 05:00:00</td>\n",
449
- " <td>2024-05-01</td>\n",
450
- " <td>0.590542</td>\n",
451
- " <td>1.316083</td>\n",
452
- " <td>0.004533</td>\n",
453
- " </tr>\n",
454
- " <tr>\n",
455
- " <th>6</th>\n",
456
- " <td>1714543200000</td>\n",
457
- " <td>2024-05-01 06:00:00</td>\n",
458
- " <td>2024-05-01</td>\n",
459
- " <td>0.627333</td>\n",
460
- " <td>1.346875</td>\n",
461
- " <td>0.144934</td>\n",
462
- " </tr>\n",
463
- " <tr>\n",
464
- " <th>7</th>\n",
465
- " <td>1714546800000</td>\n",
466
- " <td>2024-05-01 07:00:00</td>\n",
467
- " <td>2024-05-01</td>\n",
468
- " <td>0.482625</td>\n",
469
- " <td>1.287417</td>\n",
470
- " <td>0.366274</td>\n",
471
- " </tr>\n",
472
- " <tr>\n",
473
- " <th>8</th>\n",
474
- " <td>1714550400000</td>\n",
475
- " <td>2024-05-01 08:00:00</td>\n",
476
- " <td>2024-05-01</td>\n",
477
- " <td>0.474042</td>\n",
478
- " <td>1.279458</td>\n",
479
- " <td>0.786889</td>\n",
480
- " </tr>\n",
481
- " <tr>\n",
482
- " <th>9</th>\n",
483
- " <td>1714554000000</td>\n",
484
- " <td>2024-05-01 09:00:00</td>\n",
485
- " <td>2024-05-01</td>\n",
486
- " <td>0.493583</td>\n",
487
- " <td>1.367208</td>\n",
488
- " <td>1.208868</td>\n",
489
- " </tr>\n",
490
- " <tr>\n",
491
- " <th>10</th>\n",
492
- " <td>1714557600000</td>\n",
493
- " <td>2024-05-01 10:00:00</td>\n",
494
- " <td>2024-05-01</td>\n",
495
- " <td>0.522458</td>\n",
496
- " <td>1.342625</td>\n",
497
- " <td>1.197411</td>\n",
498
- " </tr>\n",
499
- " <tr>\n",
500
- " <th>11</th>\n",
501
- " <td>1714561200000</td>\n",
502
- " <td>2024-05-01 11:00:00</td>\n",
503
- " <td>2024-05-01</td>\n",
504
- " <td>0.566417</td>\n",
505
- " <td>1.223500</td>\n",
506
- " <td>1.282508</td>\n",
507
- " </tr>\n",
508
- " <tr>\n",
509
- " <th>12</th>\n",
510
- " <td>1714564800000</td>\n",
511
- " <td>2024-05-01 12:00:00</td>\n",
512
- " <td>2024-05-01</td>\n",
513
- " <td>0.609958</td>\n",
514
- " <td>1.231542</td>\n",
515
- " <td>1.362772</td>\n",
516
- " </tr>\n",
517
- " <tr>\n",
518
- " <th>13</th>\n",
519
- " <td>1714568400000</td>\n",
520
- " <td>2024-05-01 13:00:00</td>\n",
521
- " <td>2024-05-01</td>\n",
522
- " <td>0.637292</td>\n",
523
- " <td>1.213167</td>\n",
524
- " <td>1.375432</td>\n",
525
- " </tr>\n",
526
- " <tr>\n",
527
- " <th>14</th>\n",
528
- " <td>1714572000000</td>\n",
529
- " <td>2024-05-01 14:00:00</td>\n",
530
- " <td>2024-05-01</td>\n",
531
- " <td>0.662375</td>\n",
532
- " <td>1.187667</td>\n",
533
- " <td>1.347302</td>\n",
534
- " </tr>\n",
535
- " <tr>\n",
536
- " <th>15</th>\n",
537
- " <td>1714575600000</td>\n",
538
- " <td>2024-05-01 15:00:00</td>\n",
539
- " <td>2024-05-01</td>\n",
540
- " <td>0.704667</td>\n",
541
- " <td>1.169292</td>\n",
542
- " <td>1.269182</td>\n",
543
- " </tr>\n",
544
- " <tr>\n",
545
- " <th>16</th>\n",
546
- " <td>1714579200000</td>\n",
547
- " <td>2024-05-01 16:00:00</td>\n",
548
- " <td>2024-05-01</td>\n",
549
- " <td>0.728542</td>\n",
550
- " <td>1.286833</td>\n",
551
- " <td>1.147561</td>\n",
552
- " </tr>\n",
553
- " <tr>\n",
554
- " <th>17</th>\n",
555
- " <td>1714582800000</td>\n",
556
- " <td>2024-05-01 17:00:00</td>\n",
557
- " <td>2024-05-01</td>\n",
558
- " <td>0.744292</td>\n",
559
- " <td>1.393458</td>\n",
560
- " <td>1.062930</td>\n",
561
- " </tr>\n",
562
- " <tr>\n",
563
- " <th>18</th>\n",
564
- " <td>1714586400000</td>\n",
565
- " <td>2024-05-01 18:00:00</td>\n",
566
- " <td>2024-05-01</td>\n",
567
- " <td>0.771792</td>\n",
568
- " <td>1.502625</td>\n",
569
- " <td>0.664276</td>\n",
570
- " </tr>\n",
571
- " <tr>\n",
572
- " <th>19</th>\n",
573
- " <td>1714590000000</td>\n",
574
- " <td>2024-05-01 19:00:00</td>\n",
575
- " <td>2024-05-01</td>\n",
576
- " <td>0.816250</td>\n",
577
- " <td>1.382208</td>\n",
578
- " <td>0.272911</td>\n",
579
- " </tr>\n",
580
- " <tr>\n",
581
- " <th>20</th>\n",
582
- " <td>1714593600000</td>\n",
583
- " <td>2024-05-01 20:00:00</td>\n",
584
- " <td>2024-05-01</td>\n",
585
- " <td>0.848500</td>\n",
586
- " <td>1.388583</td>\n",
587
- " <td>0.046087</td>\n",
588
- " </tr>\n",
589
- " <tr>\n",
590
- " <th>21</th>\n",
591
- " <td>1714597200000</td>\n",
592
- " <td>2024-05-01 21:00:00</td>\n",
593
- " <td>2024-05-01</td>\n",
594
- " <td>0.886042</td>\n",
595
- " <td>1.554792</td>\n",
596
- " <td>0.001339</td>\n",
597
- " </tr>\n",
598
- " <tr>\n",
599
- " <th>22</th>\n",
600
- " <td>1714600800000</td>\n",
601
- " <td>2024-05-01 22:00:00</td>\n",
602
- " <td>2024-05-01</td>\n",
603
- " <td>0.919417</td>\n",
604
- " <td>1.698875</td>\n",
605
- " <td>0.000000</td>\n",
606
- " </tr>\n",
607
- " <tr>\n",
608
- " <th>23</th>\n",
609
- " <td>1714604400000</td>\n",
610
- " <td>2024-05-01 23:00:00</td>\n",
611
- " <td>2024-05-01</td>\n",
612
- " <td>0.934708</td>\n",
613
- " <td>1.739375</td>\n",
614
- " <td>0.000000</td>\n",
615
- " </tr>\n",
616
- " </tbody>\n",
617
- "</table>\n",
618
- "</div>"
619
- ],
620
- "text/plain": [
621
- " timestamp time date \\\n",
622
- "0 1714521600000 2024-05-01 00:00:00 2024-05-01 \n",
623
- "1 1714525200000 2024-05-01 01:00:00 2024-05-01 \n",
624
- "2 1714528800000 2024-05-01 02:00:00 2024-05-01 \n",
625
- "3 1714532400000 2024-05-01 03:00:00 2024-05-01 \n",
626
- "4 1714536000000 2024-05-01 04:00:00 2024-05-01 \n",
627
- "5 1714539600000 2024-05-01 05:00:00 2024-05-01 \n",
628
- "6 1714543200000 2024-05-01 06:00:00 2024-05-01 \n",
629
- "7 1714546800000 2024-05-01 07:00:00 2024-05-01 \n",
630
- "8 1714550400000 2024-05-01 08:00:00 2024-05-01 \n",
631
- "9 1714554000000 2024-05-01 09:00:00 2024-05-01 \n",
632
- "10 1714557600000 2024-05-01 10:00:00 2024-05-01 \n",
633
- "11 1714561200000 2024-05-01 11:00:00 2024-05-01 \n",
634
- "12 1714564800000 2024-05-01 12:00:00 2024-05-01 \n",
635
- "13 1714568400000 2024-05-01 13:00:00 2024-05-01 \n",
636
- "14 1714572000000 2024-05-01 14:00:00 2024-05-01 \n",
637
- "15 1714575600000 2024-05-01 15:00:00 2024-05-01 \n",
638
- "16 1714579200000 2024-05-01 16:00:00 2024-05-01 \n",
639
- "17 1714582800000 2024-05-01 17:00:00 2024-05-01 \n",
640
- "18 1714586400000 2024-05-01 18:00:00 2024-05-01 \n",
641
- "19 1714590000000 2024-05-01 19:00:00 2024-05-01 \n",
642
- "20 1714593600000 2024-05-01 20:00:00 2024-05-01 \n",
643
- "21 1714597200000 2024-05-01 21:00:00 2024-05-01 \n",
644
- "22 1714600800000 2024-05-01 22:00:00 2024-05-01 \n",
645
- "23 1714604400000 2024-05-01 23:00:00 2024-05-01 \n",
646
- "\n",
647
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
648
- "0 0.794000 \n",
649
- "1 0.759667 \n",
650
- "2 0.688625 \n",
651
- "3 0.577083 \n",
652
- "4 0.605792 \n",
653
- "5 0.590542 \n",
654
- "6 0.627333 \n",
655
- "7 0.482625 \n",
656
- "8 0.474042 \n",
657
- "9 0.493583 \n",
658
- "10 0.522458 \n",
659
- "11 0.566417 \n",
660
- "12 0.609958 \n",
661
- "13 0.637292 \n",
662
- "14 0.662375 \n",
663
- "15 0.704667 \n",
664
- "16 0.728542 \n",
665
- "17 0.744292 \n",
666
- "18 0.771792 \n",
667
- "19 0.816250 \n",
668
- "20 0.848500 \n",
669
- "21 0.886042 \n",
670
- "22 0.919417 \n",
671
- "23 0.934708 \n",
672
- "\n",
673
- " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \n",
674
- "0 1.710417 0.000170 \n",
675
- "1 1.738500 0.000169 \n",
676
- "2 1.527958 0.000175 \n",
677
- "3 1.347667 0.000189 \n",
678
- "4 1.227542 0.000218 \n",
679
- "5 1.316083 0.004533 \n",
680
- "6 1.346875 0.144934 \n",
681
- "7 1.287417 0.366274 \n",
682
- "8 1.279458 0.786889 \n",
683
- "9 1.367208 1.208868 \n",
684
- "10 1.342625 1.197411 \n",
685
- "11 1.223500 1.282508 \n",
686
- "12 1.231542 1.362772 \n",
687
- "13 1.213167 1.375432 \n",
688
- "14 1.187667 1.347302 \n",
689
- "15 1.169292 1.269182 \n",
690
- "16 1.286833 1.147561 \n",
691
- "17 1.393458 1.062930 \n",
692
- "18 1.502625 0.664276 \n",
693
- "19 1.382208 0.272911 \n",
694
- "20 1.388583 0.046087 \n",
695
- "21 1.554792 0.001339 \n",
696
- "22 1.698875 0.000000 \n",
697
- "23 1.739375 0.000000 "
698
- ]
699
- },
700
- "execution_count": 6,
701
- "metadata": {},
702
- "output_type": "execute_result"
703
- }
704
- ],
705
  "source": [
706
- "# Display the forecast_renewable_energy dataframe\n",
707
- "forecast_renewable_energy_df"
708
  ]
709
  },
710
  {
@@ -789,8 +479,9 @@
789
  " <tr style=\"text-align: right;\">\n",
790
  " <th></th>\n",
791
  " <th>timestamp</th>\n",
 
792
  " <th>date</th>\n",
793
- " <th>time</th>\n",
794
  " <th>temperature_2m</th>\n",
795
  " <th>relative_humidity_2m</th>\n",
796
  " <th>precipitation</th>\n",
@@ -805,78 +496,83 @@
805
  " <tbody>\n",
806
  " <tr>\n",
807
  " <th>0</th>\n",
808
- " <td>1714521600000</td>\n",
809
- " <td>2024-05-01</td>\n",
810
- " <td>2024-05-01 00:00:00</td>\n",
811
- " <td>13.4</td>\n",
812
- " <td>70</td>\n",
 
813
  " <td>0.0</td>\n",
814
  " <td>0.0</td>\n",
815
  " <td>0.0</td>\n",
816
- " <td>1</td>\n",
817
- " <td>46</td>\n",
818
- " <td>20.9</td>\n",
819
- " <td>36.4</td>\n",
820
  " </tr>\n",
821
  " <tr>\n",
822
  " <th>1</th>\n",
823
- " <td>1714525200000</td>\n",
824
- " <td>2024-05-01</td>\n",
825
- " <td>2024-05-01 01:00:00</td>\n",
826
- " <td>12.6</td>\n",
827
- " <td>73</td>\n",
 
828
  " <td>0.0</td>\n",
829
  " <td>0.0</td>\n",
830
  " <td>0.0</td>\n",
831
- " <td>0</td>\n",
832
- " <td>18</td>\n",
833
- " <td>18.0</td>\n",
834
- " <td>35.6</td>\n",
835
  " </tr>\n",
836
  " <tr>\n",
837
  " <th>2</th>\n",
838
- " <td>1714528800000</td>\n",
839
- " <td>2024-05-01</td>\n",
840
- " <td>2024-05-01 02:00:00</td>\n",
841
- " <td>12.0</td>\n",
842
- " <td>75</td>\n",
 
843
  " <td>0.0</td>\n",
844
  " <td>0.0</td>\n",
845
  " <td>0.0</td>\n",
846
- " <td>2</td>\n",
847
- " <td>54</td>\n",
848
- " <td>18.0</td>\n",
849
- " <td>31.0</td>\n",
850
  " </tr>\n",
851
  " <tr>\n",
852
  " <th>3</th>\n",
853
- " <td>1714532400000</td>\n",
854
- " <td>2024-05-01</td>\n",
855
- " <td>2024-05-01 03:00:00</td>\n",
856
- " <td>11.5</td>\n",
857
- " <td>76</td>\n",
858
- " <td>0.0</td>\n",
859
- " <td>0.0</td>\n",
860
- " <td>0.0</td>\n",
861
  " <td>3</td>\n",
862
- " <td>97</td>\n",
863
- " <td>19.4</td>\n",
864
- " <td>33.1</td>\n",
 
 
 
 
 
 
865
  " </tr>\n",
866
  " <tr>\n",
867
  " <th>4</th>\n",
868
- " <td>1714536000000</td>\n",
869
- " <td>2024-05-01</td>\n",
870
- " <td>2024-05-01 04:00:00</td>\n",
871
- " <td>11.2</td>\n",
872
- " <td>78</td>\n",
 
873
  " <td>0.0</td>\n",
874
  " <td>0.0</td>\n",
875
  " <td>0.0</td>\n",
876
- " <td>3</td>\n",
877
- " <td>96</td>\n",
878
- " <td>18.0</td>\n",
879
- " <td>33.5</td>\n",
880
  " </tr>\n",
881
  " <tr>\n",
882
  " <th>...</th>\n",
@@ -892,128 +588,134 @@
892
  " <td>...</td>\n",
893
  " <td>...</td>\n",
894
  " <td>...</td>\n",
 
895
  " </tr>\n",
896
  " <tr>\n",
897
  " <th>115</th>\n",
898
- " <td>1714935600000</td>\n",
899
- " <td>2024-05-05</td>\n",
900
- " <td>2024-05-05 19:00:00</td>\n",
901
- " <td>12.6</td>\n",
902
- " <td>79</td>\n",
903
- " <td>0.0</td>\n",
 
 
904
  " <td>0.0</td>\n",
905
- " <td>0.0</td>\n",
906
- " <td>3</td>\n",
907
- " <td>83</td>\n",
908
- " <td>11.3</td>\n",
909
- " <td>25.9</td>\n",
910
  " </tr>\n",
911
  " <tr>\n",
912
  " <th>116</th>\n",
913
- " <td>1714939200000</td>\n",
914
- " <td>2024-05-05</td>\n",
915
- " <td>2024-05-05 20:00:00</td>\n",
916
- " <td>11.7</td>\n",
917
- " <td>83</td>\n",
918
- " <td>0.0</td>\n",
919
- " <td>0.0</td>\n",
920
- " <td>0.0</td>\n",
921
- " <td>3</td>\n",
922
- " <td>91</td>\n",
923
  " <td>10.1</td>\n",
924
- " <td>23.0</td>\n",
 
 
 
 
 
 
 
925
  " </tr>\n",
926
  " <tr>\n",
927
  " <th>117</th>\n",
928
- " <td>1714942800000</td>\n",
929
- " <td>2024-05-05</td>\n",
930
- " <td>2024-05-05 21:00:00</td>\n",
931
- " <td>11.1</td>\n",
932
- " <td>86</td>\n",
933
- " <td>0.0</td>\n",
934
- " <td>0.0</td>\n",
935
- " <td>0.0</td>\n",
936
- " <td>3</td>\n",
937
- " <td>98</td>\n",
938
  " <td>9.5</td>\n",
939
- " <td>20.5</td>\n",
 
 
 
 
 
 
 
940
  " </tr>\n",
941
  " <tr>\n",
942
  " <th>118</th>\n",
943
- " <td>1714946400000</td>\n",
944
- " <td>2024-05-05</td>\n",
945
- " <td>2024-05-05 22:00:00</td>\n",
946
- " <td>10.9</td>\n",
947
- " <td>87</td>\n",
948
- " <td>0.0</td>\n",
 
 
949
  " <td>0.0</td>\n",
950
- " <td>0.0</td>\n",
951
- " <td>3</td>\n",
952
- " <td>98</td>\n",
953
- " <td>10.2</td>\n",
954
- " <td>22.3</td>\n",
955
  " </tr>\n",
956
  " <tr>\n",
957
  " <th>119</th>\n",
958
- " <td>1714950000000</td>\n",
959
- " <td>2024-05-05</td>\n",
960
- " <td>2024-05-05 23:00:00</td>\n",
961
- " <td>11.0</td>\n",
962
- " <td>88</td>\n",
963
- " <td>0.0</td>\n",
 
 
964
  " <td>0.0</td>\n",
965
- " <td>0.0</td>\n",
966
- " <td>3</td>\n",
967
- " <td>97</td>\n",
968
- " <td>11.9</td>\n",
969
- " <td>24.1</td>\n",
970
  " </tr>\n",
971
  " </tbody>\n",
972
  "</table>\n",
973
- "<p>120 rows × 12 columns</p>\n",
974
  "</div>"
975
  ],
976
  "text/plain": [
977
- " timestamp date time temperature_2m \\\n",
978
- "0 1714521600000 2024-05-01 2024-05-01 00:00:00 13.4 \n",
979
- "1 1714525200000 2024-05-01 2024-05-01 01:00:00 12.6 \n",
980
- "2 1714528800000 2024-05-01 2024-05-01 02:00:00 12.0 \n",
981
- "3 1714532400000 2024-05-01 2024-05-01 03:00:00 11.5 \n",
982
- "4 1714536000000 2024-05-01 2024-05-01 04:00:00 11.2 \n",
983
- ".. ... ... ... ... \n",
984
- "115 1714935600000 2024-05-05 2024-05-05 19:00:00 12.6 \n",
985
- "116 1714939200000 2024-05-05 2024-05-05 20:00:00 11.7 \n",
986
- "117 1714942800000 2024-05-05 2024-05-05 21:00:00 11.1 \n",
987
- "118 1714946400000 2024-05-05 2024-05-05 22:00:00 10.9 \n",
988
- "119 1714950000000 2024-05-05 2024-05-05 23:00:00 11.0 \n",
989
  "\n",
990
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
991
- "0 70 0.0 0.0 0.0 1 \n",
992
- "1 73 0.0 0.0 0.0 0 \n",
993
- "2 75 0.0 0.0 0.0 2 \n",
994
- "3 76 0.0 0.0 0.0 3 \n",
995
- "4 78 0.0 0.0 0.0 3 \n",
996
  ".. ... ... ... ... ... \n",
997
- "115 79 0.0 0.0 0.0 3 \n",
998
- "116 83 0.0 0.0 0.0 3 \n",
999
- "117 86 0.0 0.0 0.0 3 \n",
1000
- "118 87 0.0 0.0 0.0 3 \n",
1001
- "119 88 0.0 0.0 0.0 3 \n",
1002
  "\n",
1003
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1004
- "0 46 20.9 36.4 \n",
1005
- "1 18 18.0 35.6 \n",
1006
- "2 54 18.0 31.0 \n",
1007
- "3 97 19.4 33.1 \n",
1008
- "4 96 18.0 33.5 \n",
1009
  ".. ... ... ... \n",
1010
- "115 83 11.3 25.9 \n",
1011
- "116 91 10.1 23.0 \n",
1012
- "117 98 9.5 20.5 \n",
1013
- "118 98 10.2 22.3 \n",
1014
- "119 97 11.9 24.1 \n",
1015
  "\n",
1016
- "[120 rows x 12 columns]"
1017
  ]
1018
  },
1019
  "execution_count": 10,
@@ -1030,20 +732,6 @@
1030
  "cell_type": "code",
1031
  "execution_count": 11,
1032
  "metadata": {},
1033
- "outputs": [],
1034
- "source": [
1035
- "# Converting to float type to align with Hopworks Feature Group as it converts the data to float automatically\n",
1036
- "\n",
1037
- "# Converting 'relative_humidity_2m', 'weather_code' and 'cloud_cover' columns to float type\n",
1038
- "weather_forecast_df['relative_humidity_2m'] = weather_forecast_df['relative_humidity_2m'].astype(float)\n",
1039
- "weather_forecast_df['weather_code'] = weather_forecast_df['weather_code'].astype(float)\n",
1040
- "weather_forecast_df['cloud_cover'] = weather_forecast_df['cloud_cover'].astype(float)"
1041
- ]
1042
- },
1043
- {
1044
- "cell_type": "code",
1045
- "execution_count": 12,
1046
- "metadata": {},
1047
  "outputs": [
1048
  {
1049
  "data": {
@@ -1067,8 +755,9 @@
1067
  " <tr style=\"text-align: right;\">\n",
1068
  " <th></th>\n",
1069
  " <th>timestamp</th>\n",
 
1070
  " <th>date</th>\n",
1071
- " <th>time</th>\n",
1072
  " <th>temperature_2m</th>\n",
1073
  " <th>relative_humidity_2m</th>\n",
1074
  " <th>precipitation</th>\n",
@@ -1083,107 +772,112 @@
1083
  " <tbody>\n",
1084
  " <tr>\n",
1085
  " <th>0</th>\n",
1086
- " <td>1714521600000</td>\n",
1087
- " <td>2024-05-01</td>\n",
1088
- " <td>2024-05-01 00:00:00</td>\n",
1089
- " <td>13.4</td>\n",
1090
- " <td>70.0</td>\n",
 
1091
  " <td>0.0</td>\n",
1092
  " <td>0.0</td>\n",
1093
  " <td>0.0</td>\n",
1094
- " <td>1.0</td>\n",
1095
- " <td>46.0</td>\n",
1096
- " <td>20.9</td>\n",
1097
- " <td>36.4</td>\n",
1098
  " </tr>\n",
1099
  " <tr>\n",
1100
  " <th>1</th>\n",
1101
- " <td>1714525200000</td>\n",
1102
- " <td>2024-05-01</td>\n",
1103
- " <td>2024-05-01 01:00:00</td>\n",
1104
- " <td>12.6</td>\n",
1105
- " <td>73.0</td>\n",
 
1106
  " <td>0.0</td>\n",
1107
  " <td>0.0</td>\n",
1108
  " <td>0.0</td>\n",
1109
  " <td>0.0</td>\n",
1110
- " <td>18.0</td>\n",
1111
- " <td>18.0</td>\n",
1112
- " <td>35.6</td>\n",
1113
  " </tr>\n",
1114
  " <tr>\n",
1115
  " <th>2</th>\n",
1116
- " <td>1714528800000</td>\n",
1117
- " <td>2024-05-01</td>\n",
1118
- " <td>2024-05-01 02:00:00</td>\n",
1119
- " <td>12.0</td>\n",
1120
- " <td>75.0</td>\n",
 
1121
  " <td>0.0</td>\n",
1122
  " <td>0.0</td>\n",
1123
  " <td>0.0</td>\n",
1124
  " <td>2.0</td>\n",
1125
- " <td>54.0</td>\n",
1126
- " <td>18.0</td>\n",
1127
- " <td>31.0</td>\n",
1128
  " </tr>\n",
1129
  " <tr>\n",
1130
  " <th>3</th>\n",
1131
- " <td>1714532400000</td>\n",
1132
- " <td>2024-05-01</td>\n",
1133
- " <td>2024-05-01 03:00:00</td>\n",
1134
- " <td>11.5</td>\n",
1135
- " <td>76.0</td>\n",
1136
- " <td>0.0</td>\n",
1137
- " <td>0.0</td>\n",
 
1138
  " <td>0.0</td>\n",
1139
- " <td>3.0</td>\n",
1140
- " <td>97.0</td>\n",
1141
- " <td>19.4</td>\n",
1142
- " <td>33.1</td>\n",
1143
  " </tr>\n",
1144
  " <tr>\n",
1145
  " <th>4</th>\n",
1146
- " <td>1714536000000</td>\n",
1147
- " <td>2024-05-01</td>\n",
1148
- " <td>2024-05-01 04:00:00</td>\n",
1149
- " <td>11.2</td>\n",
1150
- " <td>78.0</td>\n",
 
1151
  " <td>0.0</td>\n",
1152
  " <td>0.0</td>\n",
1153
  " <td>0.0</td>\n",
1154
- " <td>3.0</td>\n",
1155
- " <td>96.0</td>\n",
1156
- " <td>18.0</td>\n",
1157
- " <td>33.5</td>\n",
1158
  " </tr>\n",
1159
  " </tbody>\n",
1160
  "</table>\n",
1161
  "</div>"
1162
  ],
1163
  "text/plain": [
1164
- " timestamp date time temperature_2m \\\n",
1165
- "0 1714521600000 2024-05-01 2024-05-01 00:00:00 13.4 \n",
1166
- "1 1714525200000 2024-05-01 2024-05-01 01:00:00 12.6 \n",
1167
- "2 1714528800000 2024-05-01 2024-05-01 02:00:00 12.0 \n",
1168
- "3 1714532400000 2024-05-01 2024-05-01 03:00:00 11.5 \n",
1169
- "4 1714536000000 2024-05-01 2024-05-01 04:00:00 11.2 \n",
1170
  "\n",
1171
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1172
- "0 70.0 0.0 0.0 0.0 1.0 \n",
1173
- "1 73.0 0.0 0.0 0.0 0.0 \n",
1174
- "2 75.0 0.0 0.0 0.0 2.0 \n",
1175
- "3 76.0 0.0 0.0 0.0 3.0 \n",
1176
- "4 78.0 0.0 0.0 0.0 3.0 \n",
1177
  "\n",
1178
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
1179
- "0 46.0 20.9 36.4 \n",
1180
- "1 18.0 18.0 35.6 \n",
1181
- "2 54.0 18.0 31.0 \n",
1182
- "3 97.0 19.4 33.1 \n",
1183
- "4 96.0 18.0 33.5 "
1184
  ]
1185
  },
1186
- "execution_count": 12,
1187
  "metadata": {},
1188
  "output_type": "execute_result"
1189
  }
@@ -1193,6 +887,164 @@
1193
  "weather_forecast_df.head(5)"
1194
  ]
1195
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1196
  {
1197
  "cell_type": "markdown",
1198
  "metadata": {},
@@ -1241,10 +1093,10 @@
1241
  " version=1,\n",
1242
  ")\n",
1243
  "\n",
1244
- "forecast_renewable_energy_fg = fs.get_feature_group(\n",
1245
- " name=\"forecast_renewable_energy\",\n",
1246
- " version=1,\n",
1247
- ")\n",
1248
  "\n",
1249
  "weather_fg = fs.get_feature_group(\n",
1250
  " name=\"weather_measurements\",\n",
@@ -1268,7 +1120,7 @@
1268
  {
1269
  "data": {
1270
  "application/vnd.jupyter.widget-view+json": {
1271
- "model_id": "f5aa8974bf724e60bf0f29c7a58e83fa",
1272
  "version_major": 2,
1273
  "version_minor": 0
1274
  },
@@ -1291,7 +1143,7 @@
1291
  {
1292
  "data": {
1293
  "text/plain": [
1294
- "(<hsfs.core.job.Job at 0x15f0bcd90>, None)"
1295
  ]
1296
  },
1297
  "execution_count": 15,
@@ -1307,58 +1159,24 @@
1307
  },
1308
  {
1309
  "cell_type": "code",
1310
- "execution_count": 18,
1311
  "metadata": {},
1312
- "outputs": [
1313
- {
1314
- "data": {
1315
- "application/vnd.jupyter.widget-view+json": {
1316
- "model_id": "b2c640cab44b4d66add7c785eb9fd16b",
1317
- "version_major": 2,
1318
- "version_minor": 0
1319
- },
1320
- "text/plain": [
1321
- "Uploading Dataframe: 0.00% | | Rows 0/24 | Elapsed Time: 00:00 | Remaining Time: ?"
1322
- ]
1323
- },
1324
- "metadata": {},
1325
- "output_type": "display_data"
1326
- },
1327
- {
1328
- "name": "stdout",
1329
- "output_type": "stream",
1330
- "text": [
1331
- "Launching job: forecast_renewable_energy_1_offline_fg_materialization\n",
1332
- "Job started successfully, you can follow the progress at \n",
1333
- "https://c.app.hopsworks.ai/p/554133/jobs/named/forecast_renewable_energy_1_offline_fg_materialization/executions\n"
1334
- ]
1335
- },
1336
- {
1337
- "data": {
1338
- "text/plain": [
1339
- "(<hsfs.core.job.Job at 0x15f0fa910>, None)"
1340
- ]
1341
- },
1342
- "execution_count": 18,
1343
- "metadata": {},
1344
- "output_type": "execute_result"
1345
- }
1346
- ],
1347
  "source": [
1348
- "# Inserting the forecast_renewable_energy_df into the feature group named forecast_renewable_energy_fg\n",
1349
- "forecast_renewable_energy_fg.insert(forecast_renewable_energy_df, \n",
1350
- " write_options={\"wait_for_job\" : False})"
1351
  ]
1352
  },
1353
  {
1354
  "cell_type": "code",
1355
- "execution_count": 19,
1356
  "metadata": {},
1357
  "outputs": [
1358
  {
1359
  "data": {
1360
  "application/vnd.jupyter.widget-view+json": {
1361
- "model_id": "61148203d2ed42ce9dcb93d9b33b852b",
1362
  "version_major": 2,
1363
  "version_minor": 0
1364
  },
@@ -1381,10 +1199,10 @@
1381
  {
1382
  "data": {
1383
  "text/plain": [
1384
- "(<hsfs.core.job.Job at 0x30d030190>, None)"
1385
  ]
1386
  },
1387
- "execution_count": 19,
1388
  "metadata": {},
1389
  "output_type": "execute_result"
1390
  }
 
45
  "\n",
46
  "# Now we import the functions from the features folder\n",
47
  "# This is the functions we have created to generate features for electricity prices and weather measures\n",
48
+ "from features import electricity_prices, weather_measures, calendar\n",
49
  "\n",
50
  "# We go back into the notebooks folder\n",
51
  "%cd notebooks"
 
126
  " <tr style=\"text-align: right;\">\n",
127
  " <th></th>\n",
128
  " <th>timestamp</th>\n",
129
+ " <th>datetime</th>\n",
130
  " <th>date</th>\n",
131
+ " <th>hour</th>\n",
132
  " <th>dk1_spotpricedkk_kwh</th>\n",
133
  " </tr>\n",
134
  " </thead>\n",
135
  " <tbody>\n",
136
  " <tr>\n",
137
  " <th>0</th>\n",
138
+ " <td>1714608000000</td>\n",
139
+ " <td>2024-05-02 00:00:00</td>\n",
140
+ " <td>2024-05-02</td>\n",
141
+ " <td>0</td>\n",
142
+ " <td>0.10859</td>\n",
143
  " </tr>\n",
144
  " <tr>\n",
145
  " <th>1</th>\n",
146
+ " <td>1714611600000</td>\n",
147
+ " <td>2024-05-02 01:00:00</td>\n",
148
+ " <td>2024-05-02</td>\n",
149
+ " <td>1</td>\n",
150
+ " <td>0.08160</td>\n",
151
  " </tr>\n",
152
  " <tr>\n",
153
  " <th>2</th>\n",
154
+ " <td>1714615200000</td>\n",
155
+ " <td>2024-05-02 02:00:00</td>\n",
156
+ " <td>2024-05-02</td>\n",
157
+ " <td>2</td>\n",
158
+ " <td>0.07458</td>\n",
159
  " </tr>\n",
160
  " <tr>\n",
161
  " <th>3</th>\n",
162
+ " <td>1714618800000</td>\n",
163
+ " <td>2024-05-02 03:00:00</td>\n",
164
+ " <td>2024-05-02</td>\n",
165
+ " <td>3</td>\n",
166
+ " <td>0.05818</td>\n",
167
  " </tr>\n",
168
  " <tr>\n",
169
  " <th>4</th>\n",
170
+ " <td>1714622400000</td>\n",
171
+ " <td>2024-05-02 04:00:00</td>\n",
172
+ " <td>2024-05-02</td>\n",
173
+ " <td>4</td>\n",
174
+ " <td>0.07928</td>\n",
175
  " </tr>\n",
176
  " <tr>\n",
177
  " <th>5</th>\n",
178
+ " <td>1714626000000</td>\n",
179
+ " <td>2024-05-02 05:00:00</td>\n",
180
+ " <td>2024-05-02</td>\n",
181
+ " <td>5</td>\n",
182
+ " <td>0.22920</td>\n",
183
  " </tr>\n",
184
  " <tr>\n",
185
  " <th>6</th>\n",
186
+ " <td>1714629600000</td>\n",
187
+ " <td>2024-05-02 06:00:00</td>\n",
188
+ " <td>2024-05-02</td>\n",
189
+ " <td>6</td>\n",
190
+ " <td>0.29699</td>\n",
191
  " </tr>\n",
192
  " <tr>\n",
193
  " <th>7</th>\n",
194
+ " <td>1714633200000</td>\n",
195
+ " <td>2024-05-02 07:00:00</td>\n",
196
+ " <td>2024-05-02</td>\n",
197
+ " <td>7</td>\n",
198
+ " <td>0.38605</td>\n",
199
  " </tr>\n",
200
  " <tr>\n",
201
  " <th>8</th>\n",
202
+ " <td>1714636800000</td>\n",
203
+ " <td>2024-05-02 08:00:00</td>\n",
204
+ " <td>2024-05-02</td>\n",
205
+ " <td>8</td>\n",
206
+ " <td>0.43729</td>\n",
207
  " </tr>\n",
208
  " <tr>\n",
209
  " <th>9</th>\n",
210
+ " <td>1714640400000</td>\n",
211
+ " <td>2024-05-02 09:00:00</td>\n",
212
+ " <td>2024-05-02</td>\n",
213
+ " <td>9</td>\n",
214
+ " <td>0.23457</td>\n",
215
  " </tr>\n",
216
  " <tr>\n",
217
  " <th>10</th>\n",
218
+ " <td>1714644000000</td>\n",
219
+ " <td>2024-05-02 10:00:00</td>\n",
220
+ " <td>2024-05-02</td>\n",
221
+ " <td>10</td>\n",
222
+ " <td>0.03804</td>\n",
223
  " </tr>\n",
224
  " <tr>\n",
225
  " <th>11</th>\n",
226
+ " <td>1714647600000</td>\n",
227
+ " <td>2024-05-02 11:00:00</td>\n",
228
+ " <td>2024-05-02</td>\n",
229
+ " <td>11</td>\n",
230
+ " <td>-0.00060</td>\n",
231
  " </tr>\n",
232
  " <tr>\n",
233
  " <th>12</th>\n",
234
+ " <td>1714651200000</td>\n",
235
+ " <td>2024-05-02 12:00:00</td>\n",
236
+ " <td>2024-05-02</td>\n",
237
+ " <td>12</td>\n",
238
+ " <td>-0.01290</td>\n",
239
  " </tr>\n",
240
  " <tr>\n",
241
  " <th>13</th>\n",
242
+ " <td>1714654800000</td>\n",
243
+ " <td>2024-05-02 13:00:00</td>\n",
244
+ " <td>2024-05-02</td>\n",
245
+ " <td>13</td>\n",
246
+ " <td>-0.02014</td>\n",
247
  " </tr>\n",
248
  " <tr>\n",
249
  " <th>14</th>\n",
250
+ " <td>1714658400000</td>\n",
251
+ " <td>2024-05-02 14:00:00</td>\n",
252
+ " <td>2024-05-02</td>\n",
253
+ " <td>14</td>\n",
254
+ " <td>-0.00037</td>\n",
255
  " </tr>\n",
256
  " <tr>\n",
257
  " <th>15</th>\n",
258
+ " <td>1714662000000</td>\n",
259
+ " <td>2024-05-02 15:00:00</td>\n",
260
+ " <td>2024-05-02</td>\n",
261
+ " <td>15</td>\n",
262
+ " <td>-0.01037</td>\n",
263
  " </tr>\n",
264
  " <tr>\n",
265
  " <th>16</th>\n",
266
+ " <td>1714665600000</td>\n",
267
+ " <td>2024-05-02 16:00:00</td>\n",
268
+ " <td>2024-05-02</td>\n",
269
+ " <td>16</td>\n",
270
+ " <td>0.03013</td>\n",
271
  " </tr>\n",
272
  " <tr>\n",
273
  " <th>17</th>\n",
274
+ " <td>1714669200000</td>\n",
275
+ " <td>2024-05-02 17:00:00</td>\n",
276
+ " <td>2024-05-02</td>\n",
277
+ " <td>17</td>\n",
278
+ " <td>0.26045</td>\n",
279
  " </tr>\n",
280
  " <tr>\n",
281
  " <th>18</th>\n",
282
+ " <td>1714672800000</td>\n",
283
+ " <td>2024-05-02 18:00:00</td>\n",
284
+ " <td>2024-05-02</td>\n",
285
+ " <td>18</td>\n",
286
+ " <td>0.29125</td>\n",
287
  " </tr>\n",
288
  " <tr>\n",
289
  " <th>19</th>\n",
290
+ " <td>1714676400000</td>\n",
291
+ " <td>2024-05-02 19:00:00</td>\n",
292
+ " <td>2024-05-02</td>\n",
293
+ " <td>19</td>\n",
294
+ " <td>0.31266</td>\n",
295
  " </tr>\n",
296
  " <tr>\n",
297
  " <th>20</th>\n",
298
+ " <td>1714680000000</td>\n",
299
+ " <td>2024-05-02 20:00:00</td>\n",
300
+ " <td>2024-05-02</td>\n",
301
+ " <td>20</td>\n",
302
+ " <td>0.31318</td>\n",
303
  " </tr>\n",
304
  " <tr>\n",
305
  " <th>21</th>\n",
306
+ " <td>1714683600000</td>\n",
307
+ " <td>2024-05-02 21:00:00</td>\n",
308
+ " <td>2024-05-02</td>\n",
309
+ " <td>21</td>\n",
310
+ " <td>0.31266</td>\n",
311
  " </tr>\n",
312
  " <tr>\n",
313
  " <th>22</th>\n",
314
+ " <td>1714687200000</td>\n",
315
+ " <td>2024-05-02 22:00:00</td>\n",
316
+ " <td>2024-05-02</td>\n",
317
+ " <td>22</td>\n",
318
+ " <td>0.28245</td>\n",
319
  " </tr>\n",
320
  " <tr>\n",
321
  " <th>23</th>\n",
322
+ " <td>1714690800000</td>\n",
323
+ " <td>2024-05-02 23:00:00</td>\n",
324
+ " <td>2024-05-02</td>\n",
325
+ " <td>23</td>\n",
326
+ " <td>0.25306</td>\n",
327
  " </tr>\n",
328
  " </tbody>\n",
329
  "</table>\n",
330
  "</div>"
331
  ],
332
  "text/plain": [
333
+ " timestamp datetime date hour dk1_spotpricedkk_kwh\n",
334
+ "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 0.10859\n",
335
+ "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 0.08160\n",
336
+ "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 0.07458\n",
337
+ "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 0.05818\n",
338
+ "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 0.07928\n",
339
+ "5 1714626000000 2024-05-02 05:00:00 2024-05-02 5 0.22920\n",
340
+ "6 1714629600000 2024-05-02 06:00:00 2024-05-02 6 0.29699\n",
341
+ "7 1714633200000 2024-05-02 07:00:00 2024-05-02 7 0.38605\n",
342
+ "8 1714636800000 2024-05-02 08:00:00 2024-05-02 8 0.43729\n",
343
+ "9 1714640400000 2024-05-02 09:00:00 2024-05-02 9 0.23457\n",
344
+ "10 1714644000000 2024-05-02 10:00:00 2024-05-02 10 0.03804\n",
345
+ "11 1714647600000 2024-05-02 11:00:00 2024-05-02 11 -0.00060\n",
346
+ "12 1714651200000 2024-05-02 12:00:00 2024-05-02 12 -0.01290\n",
347
+ "13 1714654800000 2024-05-02 13:00:00 2024-05-02 13 -0.02014\n",
348
+ "14 1714658400000 2024-05-02 14:00:00 2024-05-02 14 -0.00037\n",
349
+ "15 1714662000000 2024-05-02 15:00:00 2024-05-02 15 -0.01037\n",
350
+ "16 1714665600000 2024-05-02 16:00:00 2024-05-02 16 0.03013\n",
351
+ "17 1714669200000 2024-05-02 17:00:00 2024-05-02 17 0.26045\n",
352
+ "18 1714672800000 2024-05-02 18:00:00 2024-05-02 18 0.29125\n",
353
+ "19 1714676400000 2024-05-02 19:00:00 2024-05-02 19 0.31266\n",
354
+ "20 1714680000000 2024-05-02 20:00:00 2024-05-02 20 0.31318\n",
355
+ "21 1714683600000 2024-05-02 21:00:00 2024-05-02 21 0.31266\n",
356
+ "22 1714687200000 2024-05-02 22:00:00 2024-05-02 22 0.28245\n",
357
+ "23 1714690800000 2024-05-02 23:00:00 2024-05-02 23 0.25306"
358
  ]
359
  },
360
  "execution_count": 4,
 
380
  "metadata": {},
381
  "outputs": [],
382
  "source": [
383
+ "# # Fetching non-historical forecast of renewable energy data for area DK1\n",
384
+ "# forecast_renewable_energy_df = electricity_prices.forecast_renewable_energy(\n",
385
+ "# historical=False,\n",
386
+ "# area=[\"DK1\"]\n",
387
+ "# )"
388
  ]
389
  },
390
  {
391
  "cell_type": "code",
392
  "execution_count": 6,
393
  "metadata": {},
394
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  "source": [
396
+ "# # Display the forecast_renewable_energy dataframe\n",
397
+ "# forecast_renewable_energy_df"
398
  ]
399
  },
400
  {
 
479
  " <tr style=\"text-align: right;\">\n",
480
  " <th></th>\n",
481
  " <th>timestamp</th>\n",
482
+ " <th>datetime</th>\n",
483
  " <th>date</th>\n",
484
+ " <th>hour</th>\n",
485
  " <th>temperature_2m</th>\n",
486
  " <th>relative_humidity_2m</th>\n",
487
  " <th>precipitation</th>\n",
 
496
  " <tbody>\n",
497
  " <tr>\n",
498
  " <th>0</th>\n",
499
+ " <td>1714608000000</td>\n",
500
+ " <td>2024-05-02 00:00:00</td>\n",
501
+ " <td>2024-05-02</td>\n",
502
+ " <td>0</td>\n",
503
+ " <td>14.9</td>\n",
504
+ " <td>66.0</td>\n",
505
  " <td>0.0</td>\n",
506
  " <td>0.0</td>\n",
507
  " <td>0.0</td>\n",
508
+ " <td>0.0</td>\n",
509
+ " <td>13.0</td>\n",
510
+ " <td>21.6</td>\n",
511
+ " <td>41.4</td>\n",
512
  " </tr>\n",
513
  " <tr>\n",
514
  " <th>1</th>\n",
515
+ " <td>1714611600000</td>\n",
516
+ " <td>2024-05-02 01:00:00</td>\n",
517
+ " <td>2024-05-02</td>\n",
518
+ " <td>1</td>\n",
519
+ " <td>14.2</td>\n",
520
+ " <td>71.0</td>\n",
521
  " <td>0.0</td>\n",
522
  " <td>0.0</td>\n",
523
  " <td>0.0</td>\n",
524
+ " <td>0.0</td>\n",
525
+ " <td>4.0</td>\n",
526
+ " <td>20.5</td>\n",
527
+ " <td>37.1</td>\n",
528
  " </tr>\n",
529
  " <tr>\n",
530
  " <th>2</th>\n",
531
+ " <td>1714615200000</td>\n",
532
+ " <td>2024-05-02 02:00:00</td>\n",
533
+ " <td>2024-05-02</td>\n",
534
+ " <td>2</td>\n",
535
+ " <td>13.4</td>\n",
536
+ " <td>73.0</td>\n",
537
  " <td>0.0</td>\n",
538
  " <td>0.0</td>\n",
539
  " <td>0.0</td>\n",
540
+ " <td>2.0</td>\n",
541
+ " <td>70.0</td>\n",
542
+ " <td>21.2</td>\n",
543
+ " <td>36.7</td>\n",
544
  " </tr>\n",
545
  " <tr>\n",
546
  " <th>3</th>\n",
547
+ " <td>1714618800000</td>\n",
548
+ " <td>2024-05-02 03:00:00</td>\n",
549
+ " <td>2024-05-02</td>\n",
 
 
 
 
 
550
  " <td>3</td>\n",
551
+ " <td>13.2</td>\n",
552
+ " <td>72.0</td>\n",
553
+ " <td>0.1</td>\n",
554
+ " <td>0.1</td>\n",
555
+ " <td>0.0</td>\n",
556
+ " <td>51.0</td>\n",
557
+ " <td>51.0</td>\n",
558
+ " <td>22.3</td>\n",
559
+ " <td>39.2</td>\n",
560
  " </tr>\n",
561
  " <tr>\n",
562
  " <th>4</th>\n",
563
+ " <td>1714622400000</td>\n",
564
+ " <td>2024-05-02 04:00:00</td>\n",
565
+ " <td>2024-05-02</td>\n",
566
+ " <td>4</td>\n",
567
+ " <td>12.7</td>\n",
568
+ " <td>73.0</td>\n",
569
  " <td>0.0</td>\n",
570
  " <td>0.0</td>\n",
571
  " <td>0.0</td>\n",
572
+ " <td>2.0</td>\n",
573
+ " <td>78.0</td>\n",
574
+ " <td>21.6</td>\n",
575
+ " <td>38.9</td>\n",
576
  " </tr>\n",
577
  " <tr>\n",
578
  " <th>...</th>\n",
 
588
  " <td>...</td>\n",
589
  " <td>...</td>\n",
590
  " <td>...</td>\n",
591
+ " <td>...</td>\n",
592
  " </tr>\n",
593
  " <tr>\n",
594
  " <th>115</th>\n",
595
+ " <td>1715022000000</td>\n",
596
+ " <td>2024-05-06 19:00:00</td>\n",
597
+ " <td>2024-05-06</td>\n",
598
+ " <td>19</td>\n",
599
+ " <td>10.7</td>\n",
600
+ " <td>91.0</td>\n",
601
+ " <td>1.4</td>\n",
602
+ " <td>1.4</td>\n",
603
  " <td>0.0</td>\n",
604
+ " <td>61.0</td>\n",
605
+ " <td>100.0</td>\n",
606
+ " <td>16.6</td>\n",
607
+ " <td>32.0</td>\n",
 
608
  " </tr>\n",
609
  " <tr>\n",
610
  " <th>116</th>\n",
611
+ " <td>1715025600000</td>\n",
612
+ " <td>2024-05-06 20:00:00</td>\n",
613
+ " <td>2024-05-06</td>\n",
614
+ " <td>20</td>\n",
 
 
 
 
 
 
615
  " <td>10.1</td>\n",
616
+ " <td>90.0</td>\n",
617
+ " <td>1.4</td>\n",
618
+ " <td>1.4</td>\n",
619
+ " <td>0.0</td>\n",
620
+ " <td>61.0</td>\n",
621
+ " <td>100.0</td>\n",
622
+ " <td>19.5</td>\n",
623
+ " <td>37.1</td>\n",
624
  " </tr>\n",
625
  " <tr>\n",
626
  " <th>117</th>\n",
627
+ " <td>1715029200000</td>\n",
628
+ " <td>2024-05-06 21:00:00</td>\n",
629
+ " <td>2024-05-06</td>\n",
630
+ " <td>21</td>\n",
 
 
 
 
 
 
631
  " <td>9.5</td>\n",
632
+ " <td>88.0</td>\n",
633
+ " <td>1.4</td>\n",
634
+ " <td>1.4</td>\n",
635
+ " <td>0.0</td>\n",
636
+ " <td>61.0</td>\n",
637
+ " <td>100.0</td>\n",
638
+ " <td>21.6</td>\n",
639
+ " <td>42.1</td>\n",
640
  " </tr>\n",
641
  " <tr>\n",
642
  " <th>118</th>\n",
643
+ " <td>1715032800000</td>\n",
644
+ " <td>2024-05-06 22:00:00</td>\n",
645
+ " <td>2024-05-06</td>\n",
646
+ " <td>22</td>\n",
647
+ " <td>9.3</td>\n",
648
+ " <td>86.0</td>\n",
649
+ " <td>0.6</td>\n",
650
+ " <td>0.6</td>\n",
651
  " <td>0.0</td>\n",
652
+ " <td>3.0</td>\n",
653
+ " <td>100.0</td>\n",
654
+ " <td>22.0</td>\n",
655
+ " <td>41.0</td>\n",
 
656
  " </tr>\n",
657
  " <tr>\n",
658
  " <th>119</th>\n",
659
+ " <td>1715036400000</td>\n",
660
+ " <td>2024-05-06 23:00:00</td>\n",
661
+ " <td>2024-05-06</td>\n",
662
+ " <td>23</td>\n",
663
+ " <td>9.1</td>\n",
664
+ " <td>84.0</td>\n",
665
+ " <td>0.6</td>\n",
666
+ " <td>0.6</td>\n",
667
  " <td>0.0</td>\n",
668
+ " <td>3.0</td>\n",
669
+ " <td>100.0</td>\n",
670
+ " <td>21.3</td>\n",
671
+ " <td>40.3</td>\n",
 
672
  " </tr>\n",
673
  " </tbody>\n",
674
  "</table>\n",
675
+ "<p>120 rows × 13 columns</p>\n",
676
  "</div>"
677
  ],
678
  "text/plain": [
679
+ " timestamp datetime date hour temperature_2m \\\n",
680
+ "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
681
+ "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
682
+ "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
683
+ "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
684
+ "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
685
+ ".. ... ... ... ... ... \n",
686
+ "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
687
+ "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
688
+ "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
689
+ "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
690
+ "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
691
  "\n",
692
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
693
+ "0 66.0 0.0 0.0 0.0 0.0 \n",
694
+ "1 71.0 0.0 0.0 0.0 0.0 \n",
695
+ "2 73.0 0.0 0.0 0.0 2.0 \n",
696
+ "3 72.0 0.1 0.1 0.0 51.0 \n",
697
+ "4 73.0 0.0 0.0 0.0 2.0 \n",
698
  ".. ... ... ... ... ... \n",
699
+ "115 91.0 1.4 1.4 0.0 61.0 \n",
700
+ "116 90.0 1.4 1.4 0.0 61.0 \n",
701
+ "117 88.0 1.4 1.4 0.0 61.0 \n",
702
+ "118 86.0 0.6 0.6 0.0 3.0 \n",
703
+ "119 84.0 0.6 0.6 0.0 3.0 \n",
704
  "\n",
705
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
706
+ "0 13.0 21.6 41.4 \n",
707
+ "1 4.0 20.5 37.1 \n",
708
+ "2 70.0 21.2 36.7 \n",
709
+ "3 51.0 22.3 39.2 \n",
710
+ "4 78.0 21.6 38.9 \n",
711
  ".. ... ... ... \n",
712
+ "115 100.0 16.6 32.0 \n",
713
+ "116 100.0 19.5 37.1 \n",
714
+ "117 100.0 21.6 42.1 \n",
715
+ "118 100.0 22.0 41.0 \n",
716
+ "119 100.0 21.3 40.3 \n",
717
  "\n",
718
+ "[120 rows x 13 columns]"
719
  ]
720
  },
721
  "execution_count": 10,
 
732
  "cell_type": "code",
733
  "execution_count": 11,
734
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
735
  "outputs": [
736
  {
737
  "data": {
 
755
  " <tr style=\"text-align: right;\">\n",
756
  " <th></th>\n",
757
  " <th>timestamp</th>\n",
758
+ " <th>datetime</th>\n",
759
  " <th>date</th>\n",
760
+ " <th>hour</th>\n",
761
  " <th>temperature_2m</th>\n",
762
  " <th>relative_humidity_2m</th>\n",
763
  " <th>precipitation</th>\n",
 
772
  " <tbody>\n",
773
  " <tr>\n",
774
  " <th>0</th>\n",
775
+ " <td>1714608000000</td>\n",
776
+ " <td>2024-05-02 00:00:00</td>\n",
777
+ " <td>2024-05-02</td>\n",
778
+ " <td>0</td>\n",
779
+ " <td>14.9</td>\n",
780
+ " <td>66.0</td>\n",
781
  " <td>0.0</td>\n",
782
  " <td>0.0</td>\n",
783
  " <td>0.0</td>\n",
784
+ " <td>0.0</td>\n",
785
+ " <td>13.0</td>\n",
786
+ " <td>21.6</td>\n",
787
+ " <td>41.4</td>\n",
788
  " </tr>\n",
789
  " <tr>\n",
790
  " <th>1</th>\n",
791
+ " <td>1714611600000</td>\n",
792
+ " <td>2024-05-02 01:00:00</td>\n",
793
+ " <td>2024-05-02</td>\n",
794
+ " <td>1</td>\n",
795
+ " <td>14.2</td>\n",
796
+ " <td>71.0</td>\n",
797
  " <td>0.0</td>\n",
798
  " <td>0.0</td>\n",
799
  " <td>0.0</td>\n",
800
  " <td>0.0</td>\n",
801
+ " <td>4.0</td>\n",
802
+ " <td>20.5</td>\n",
803
+ " <td>37.1</td>\n",
804
  " </tr>\n",
805
  " <tr>\n",
806
  " <th>2</th>\n",
807
+ " <td>1714615200000</td>\n",
808
+ " <td>2024-05-02 02:00:00</td>\n",
809
+ " <td>2024-05-02</td>\n",
810
+ " <td>2</td>\n",
811
+ " <td>13.4</td>\n",
812
+ " <td>73.0</td>\n",
813
  " <td>0.0</td>\n",
814
  " <td>0.0</td>\n",
815
  " <td>0.0</td>\n",
816
  " <td>2.0</td>\n",
817
+ " <td>70.0</td>\n",
818
+ " <td>21.2</td>\n",
819
+ " <td>36.7</td>\n",
820
  " </tr>\n",
821
  " <tr>\n",
822
  " <th>3</th>\n",
823
+ " <td>1714618800000</td>\n",
824
+ " <td>2024-05-02 03:00:00</td>\n",
825
+ " <td>2024-05-02</td>\n",
826
+ " <td>3</td>\n",
827
+ " <td>13.2</td>\n",
828
+ " <td>72.0</td>\n",
829
+ " <td>0.1</td>\n",
830
+ " <td>0.1</td>\n",
831
  " <td>0.0</td>\n",
832
+ " <td>51.0</td>\n",
833
+ " <td>51.0</td>\n",
834
+ " <td>22.3</td>\n",
835
+ " <td>39.2</td>\n",
836
  " </tr>\n",
837
  " <tr>\n",
838
  " <th>4</th>\n",
839
+ " <td>1714622400000</td>\n",
840
+ " <td>2024-05-02 04:00:00</td>\n",
841
+ " <td>2024-05-02</td>\n",
842
+ " <td>4</td>\n",
843
+ " <td>12.7</td>\n",
844
+ " <td>73.0</td>\n",
845
  " <td>0.0</td>\n",
846
  " <td>0.0</td>\n",
847
  " <td>0.0</td>\n",
848
+ " <td>2.0</td>\n",
849
+ " <td>78.0</td>\n",
850
+ " <td>21.6</td>\n",
851
+ " <td>38.9</td>\n",
852
  " </tr>\n",
853
  " </tbody>\n",
854
  "</table>\n",
855
  "</div>"
856
  ],
857
  "text/plain": [
858
+ " timestamp datetime date hour temperature_2m \\\n",
859
+ "0 1714608000000 2024-05-02 00:00:00 2024-05-02 0 14.9 \n",
860
+ "1 1714611600000 2024-05-02 01:00:00 2024-05-02 1 14.2 \n",
861
+ "2 1714615200000 2024-05-02 02:00:00 2024-05-02 2 13.4 \n",
862
+ "3 1714618800000 2024-05-02 03:00:00 2024-05-02 3 13.2 \n",
863
+ "4 1714622400000 2024-05-02 04:00:00 2024-05-02 4 12.7 \n",
864
  "\n",
865
  " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
866
+ "0 66.0 0.0 0.0 0.0 0.0 \n",
867
+ "1 71.0 0.0 0.0 0.0 0.0 \n",
868
+ "2 73.0 0.0 0.0 0.0 2.0 \n",
869
+ "3 72.0 0.1 0.1 0.0 51.0 \n",
870
+ "4 73.0 0.0 0.0 0.0 2.0 \n",
871
  "\n",
872
  " cloud_cover wind_speed_10m wind_gusts_10m \n",
873
+ "0 13.0 21.6 41.4 \n",
874
+ "1 4.0 20.5 37.1 \n",
875
+ "2 70.0 21.2 36.7 \n",
876
+ "3 51.0 22.3 39.2 \n",
877
+ "4 78.0 21.6 38.9 "
878
  ]
879
  },
880
+ "execution_count": 11,
881
  "metadata": {},
882
  "output_type": "execute_result"
883
  }
 
887
  "weather_forecast_df.head(5)"
888
  ]
889
  },
890
+ {
891
+ "cell_type": "code",
892
+ "execution_count": 12,
893
+ "metadata": {},
894
+ "outputs": [
895
+ {
896
+ "data": {
897
+ "text/html": [
898
+ "<div>\n",
899
+ "<style scoped>\n",
900
+ " .dataframe tbody tr th:only-of-type {\n",
901
+ " vertical-align: middle;\n",
902
+ " }\n",
903
+ "\n",
904
+ " .dataframe tbody tr th {\n",
905
+ " vertical-align: top;\n",
906
+ " }\n",
907
+ "\n",
908
+ " .dataframe thead th {\n",
909
+ " text-align: right;\n",
910
+ " }\n",
911
+ "</style>\n",
912
+ "<table border=\"1\" class=\"dataframe\">\n",
913
+ " <thead>\n",
914
+ " <tr style=\"text-align: right;\">\n",
915
+ " <th></th>\n",
916
+ " <th>timestamp</th>\n",
917
+ " <th>datetime</th>\n",
918
+ " <th>date</th>\n",
919
+ " <th>hour</th>\n",
920
+ " <th>temperature_2m</th>\n",
921
+ " <th>relative_humidity_2m</th>\n",
922
+ " <th>precipitation</th>\n",
923
+ " <th>rain</th>\n",
924
+ " <th>snowfall</th>\n",
925
+ " <th>weather_code</th>\n",
926
+ " <th>cloud_cover</th>\n",
927
+ " <th>wind_speed_10m</th>\n",
928
+ " <th>wind_gusts_10m</th>\n",
929
+ " </tr>\n",
930
+ " </thead>\n",
931
+ " <tbody>\n",
932
+ " <tr>\n",
933
+ " <th>115</th>\n",
934
+ " <td>1715022000000</td>\n",
935
+ " <td>2024-05-06 19:00:00</td>\n",
936
+ " <td>2024-05-06</td>\n",
937
+ " <td>19</td>\n",
938
+ " <td>10.7</td>\n",
939
+ " <td>91.0</td>\n",
940
+ " <td>1.4</td>\n",
941
+ " <td>1.4</td>\n",
942
+ " <td>0.0</td>\n",
943
+ " <td>61.0</td>\n",
944
+ " <td>100.0</td>\n",
945
+ " <td>16.6</td>\n",
946
+ " <td>32.0</td>\n",
947
+ " </tr>\n",
948
+ " <tr>\n",
949
+ " <th>116</th>\n",
950
+ " <td>1715025600000</td>\n",
951
+ " <td>2024-05-06 20:00:00</td>\n",
952
+ " <td>2024-05-06</td>\n",
953
+ " <td>20</td>\n",
954
+ " <td>10.1</td>\n",
955
+ " <td>90.0</td>\n",
956
+ " <td>1.4</td>\n",
957
+ " <td>1.4</td>\n",
958
+ " <td>0.0</td>\n",
959
+ " <td>61.0</td>\n",
960
+ " <td>100.0</td>\n",
961
+ " <td>19.5</td>\n",
962
+ " <td>37.1</td>\n",
963
+ " </tr>\n",
964
+ " <tr>\n",
965
+ " <th>117</th>\n",
966
+ " <td>1715029200000</td>\n",
967
+ " <td>2024-05-06 21:00:00</td>\n",
968
+ " <td>2024-05-06</td>\n",
969
+ " <td>21</td>\n",
970
+ " <td>9.5</td>\n",
971
+ " <td>88.0</td>\n",
972
+ " <td>1.4</td>\n",
973
+ " <td>1.4</td>\n",
974
+ " <td>0.0</td>\n",
975
+ " <td>61.0</td>\n",
976
+ " <td>100.0</td>\n",
977
+ " <td>21.6</td>\n",
978
+ " <td>42.1</td>\n",
979
+ " </tr>\n",
980
+ " <tr>\n",
981
+ " <th>118</th>\n",
982
+ " <td>1715032800000</td>\n",
983
+ " <td>2024-05-06 22:00:00</td>\n",
984
+ " <td>2024-05-06</td>\n",
985
+ " <td>22</td>\n",
986
+ " <td>9.3</td>\n",
987
+ " <td>86.0</td>\n",
988
+ " <td>0.6</td>\n",
989
+ " <td>0.6</td>\n",
990
+ " <td>0.0</td>\n",
991
+ " <td>3.0</td>\n",
992
+ " <td>100.0</td>\n",
993
+ " <td>22.0</td>\n",
994
+ " <td>41.0</td>\n",
995
+ " </tr>\n",
996
+ " <tr>\n",
997
+ " <th>119</th>\n",
998
+ " <td>1715036400000</td>\n",
999
+ " <td>2024-05-06 23:00:00</td>\n",
1000
+ " <td>2024-05-06</td>\n",
1001
+ " <td>23</td>\n",
1002
+ " <td>9.1</td>\n",
1003
+ " <td>84.0</td>\n",
1004
+ " <td>0.6</td>\n",
1005
+ " <td>0.6</td>\n",
1006
+ " <td>0.0</td>\n",
1007
+ " <td>3.0</td>\n",
1008
+ " <td>100.0</td>\n",
1009
+ " <td>21.3</td>\n",
1010
+ " <td>40.3</td>\n",
1011
+ " </tr>\n",
1012
+ " </tbody>\n",
1013
+ "</table>\n",
1014
+ "</div>"
1015
+ ],
1016
+ "text/plain": [
1017
+ " timestamp datetime date hour temperature_2m \\\n",
1018
+ "115 1715022000000 2024-05-06 19:00:00 2024-05-06 19 10.7 \n",
1019
+ "116 1715025600000 2024-05-06 20:00:00 2024-05-06 20 10.1 \n",
1020
+ "117 1715029200000 2024-05-06 21:00:00 2024-05-06 21 9.5 \n",
1021
+ "118 1715032800000 2024-05-06 22:00:00 2024-05-06 22 9.3 \n",
1022
+ "119 1715036400000 2024-05-06 23:00:00 2024-05-06 23 9.1 \n",
1023
+ "\n",
1024
+ " relative_humidity_2m precipitation rain snowfall weather_code \\\n",
1025
+ "115 91.0 1.4 1.4 0.0 61.0 \n",
1026
+ "116 90.0 1.4 1.4 0.0 61.0 \n",
1027
+ "117 88.0 1.4 1.4 0.0 61.0 \n",
1028
+ "118 86.0 0.6 0.6 0.0 3.0 \n",
1029
+ "119 84.0 0.6 0.6 0.0 3.0 \n",
1030
+ "\n",
1031
+ " cloud_cover wind_speed_10m wind_gusts_10m \n",
1032
+ "115 100.0 16.6 32.0 \n",
1033
+ "116 100.0 19.5 37.1 \n",
1034
+ "117 100.0 21.6 42.1 \n",
1035
+ "118 100.0 22.0 41.0 \n",
1036
+ "119 100.0 21.3 40.3 "
1037
+ ]
1038
+ },
1039
+ "execution_count": 12,
1040
+ "metadata": {},
1041
+ "output_type": "execute_result"
1042
+ }
1043
+ ],
1044
+ "source": [
1045
+ "weather_forecast_df.tail(5)"
1046
+ ]
1047
+ },
1048
  {
1049
  "cell_type": "markdown",
1050
  "metadata": {},
 
1093
  " version=1,\n",
1094
  ")\n",
1095
  "\n",
1096
+ "# forecast_renewable_energy_fg = fs.get_feature_group(\n",
1097
+ "# name=\"forecast_renewable_energy\",\n",
1098
+ "# version=1,\n",
1099
+ "# )\n",
1100
  "\n",
1101
  "weather_fg = fs.get_feature_group(\n",
1102
  " name=\"weather_measurements\",\n",
 
1120
  {
1121
  "data": {
1122
  "application/vnd.jupyter.widget-view+json": {
1123
+ "model_id": "0620fe280bda4631b98959a633d0c782",
1124
  "version_major": 2,
1125
  "version_minor": 0
1126
  },
 
1143
  {
1144
  "data": {
1145
  "text/plain": [
1146
+ "(<hsfs.core.job.Job at 0x1782b1810>, None)"
1147
  ]
1148
  },
1149
  "execution_count": 15,
 
1159
  },
1160
  {
1161
  "cell_type": "code",
1162
+ "execution_count": 16,
1163
  "metadata": {},
1164
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  "source": [
1166
+ "# # Inserting the forecast_renewable_energy_df into the feature group named forecast_renewable_energy_fg\n",
1167
+ "# forecast_renewable_energy_fg.insert(forecast_renewable_energy_df, \n",
1168
+ "# write_options={\"wait_for_job\" : False})"
1169
  ]
1170
  },
1171
  {
1172
  "cell_type": "code",
1173
+ "execution_count": 17,
1174
  "metadata": {},
1175
  "outputs": [
1176
  {
1177
  "data": {
1178
  "application/vnd.jupyter.widget-view+json": {
1179
+ "model_id": "e39a33b92b7e48fab45022e590e78157",
1180
  "version_major": 2,
1181
  "version_minor": 0
1182
  },
 
1199
  {
1200
  "data": {
1201
  "text/plain": [
1202
+ "(<hsfs.core.job.Job at 0x1782e7ed0>, None)"
1203
  ]
1204
  },
1205
+ "execution_count": 17,
1206
  "metadata": {},
1207
  "output_type": "execute_result"
1208
  }
notebooks/3_training_pipeline.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/4_batch_inference copy.ipynb ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# <span style=\"font-width:bold; font-size: 3rem; color:#2656a3;\">**Data Engineering and Machine Learning Operations in Business** </span> <span style=\"font-width:bold; font-size: 3rem; color:#333;\">- Part 04: Batch Inference</span>"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "markdown",
12
+ "metadata": {},
13
+ "source": [
14
+ "## <span style='color:#2656a3'> 🗒️ This notebook is divided into the following sections:\n",
15
+ "\n",
16
+ "1. Load batch data.\n",
17
+ "2. Predict using model from Model Registry."
18
+ ]
19
+ },
20
+ {
21
+ "cell_type": "markdown",
22
+ "metadata": {},
23
+ "source": [
24
+ "## <span style='color:#2656a3'> ⚙️ Import of libraries and packages\n",
25
+ "\n",
26
+ "First, we'll install the Python packages required for this notebook. We'll use the --quiet command after specifying the names of the libraries to ensure a silent installation process. Then, we'll proceed to import all the necessary libraries."
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": null,
32
+ "metadata": {},
33
+ "outputs": [],
34
+ "source": [
35
+ "# Importing the packages for the needed libraries for the Jupyter notebook\n",
36
+ "import joblib\n",
37
+ "import inspect \n",
38
+ "import pandas as pd\n",
39
+ "import matplotlib.pyplot as plt\n",
40
+ "from matplotlib.ticker import FuncFormatter\n",
41
+ "import os\n",
42
+ "\n",
43
+ "#%config InlineBackend.figure_format='retina'\n",
44
+ "#%matplotlib inline"
45
+ ]
46
+ },
47
+ {
48
+ "cell_type": "markdown",
49
+ "metadata": {},
50
+ "source": [
51
+ "## <span style=\"color:#2656a3;\"> 📡 Connecting to Hopsworks Feature Store"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": null,
57
+ "metadata": {},
58
+ "outputs": [],
59
+ "source": [
60
+ "# Importing the hopsworks module\n",
61
+ "import hopsworks\n",
62
+ "\n",
63
+ "# Logging in to the Hopsworks project\n",
64
+ "project = hopsworks.login()\n",
65
+ "\n",
66
+ "# Getting the feature store from the project\n",
67
+ "fs = project.get_feature_store() "
68
+ ]
69
+ },
70
+ {
71
+ "cell_type": "markdown",
72
+ "metadata": {},
73
+ "source": [
74
+ "### <span style='color:#2656a3'> ⚙️ Feature View Retrieval"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": null,
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "# Retrieve the 'electricity_feature_view' feature view\n",
84
+ "feature_view = fs.get_feature_view(\n",
85
+ " name='electricity_feature_view',\n",
86
+ " version=1,\n",
87
+ ")"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "markdown",
92
+ "metadata": {},
93
+ "source": [
94
+ "### <span style='color:#2656a3'> 🗄 Model Registry"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": null,
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "# Retrieve the model registry\n",
104
+ "mr = project.get_model_registry()"
105
+ ]
106
+ },
107
+ {
108
+ "cell_type": "markdown",
109
+ "metadata": {},
110
+ "source": [
111
+ "## <span style='color:#2656a3'> 📮 Retrieving model from Model Registry"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": null,
117
+ "metadata": {},
118
+ "outputs": [],
119
+ "source": [
120
+ "# Retrieving the model from the Model Registry\n",
121
+ "retrieved_model = mr.get_model(\n",
122
+ " name=\"electricity_price_prediction_model\", \n",
123
+ " version=1,\n",
124
+ ")\n",
125
+ "\n",
126
+ "# Downloading the saved model to a local directory\n",
127
+ "saved_model_dir = retrieved_model.download()\n",
128
+ "\n",
129
+ "# Loading the saved XGB model\n",
130
+ "retrieved_xgboost_model = joblib.load(saved_model_dir + \"/dk_electricity_model.pkl\")"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "metadata": {},
137
+ "outputs": [],
138
+ "source": [
139
+ "# Display the retrieved XGBoost regressor model\n",
140
+ "retrieved_xgboost_model"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "markdown",
145
+ "metadata": {},
146
+ "source": [
147
+ "## <span style='color:#2656a3'> ✨ Load Batch Data"
148
+ ]
149
+ },
150
+ {
151
+ "cell_type": "code",
152
+ "execution_count": null,
153
+ "metadata": {},
154
+ "outputs": [],
155
+ "source": [
156
+ "import datetime\n",
157
+ "\n",
158
+ "# Calculating the start date as 5 days ago from the current date\n",
159
+ "start_date = datetime.datetime.now() - datetime.timedelta(days=5)\n",
160
+ "\n",
161
+ "# Converting the start date to a timestamp in milliseconds\n",
162
+ "start_time = int(start_date.timestamp()) * 1000\n",
163
+ "\n",
164
+ "# Displaying the start date in timestamp format\n",
165
+ "start_time"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": null,
171
+ "metadata": {},
172
+ "outputs": [],
173
+ "source": [
174
+ "# Initializing batch scoring\n",
175
+ "feature_view.init_batch_scoring(1)\n",
176
+ "\n",
177
+ "# Retrieving batch data from the feature view starting from the specified start time\n",
178
+ "batch_data = feature_view.get_batch_data(\n",
179
+ " start_time=start_time,\n",
180
+ ")\n",
181
+ "\n",
182
+ "batch_data"
183
+ ]
184
+ },
185
+ {
186
+ "cell_type": "code",
187
+ "execution_count": null,
188
+ "metadata": {},
189
+ "outputs": [],
190
+ "source": [
191
+ "# First we go one back in our directory to access the folder with our functions\n",
192
+ "%cd ..\n",
193
+ "\n",
194
+ "# Now we import the functions from the features folder\n",
195
+ "# This is the functions we have created to generate features for electricity prices and weather measures\n",
196
+ "from features import electricity_prices, weather_measures \n",
197
+ "\n",
198
+ "# We go back into the notebooks folder\n",
199
+ "%cd notebooks"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": null,
205
+ "metadata": {},
206
+ "outputs": [],
207
+ "source": [
208
+ "# Fetching weather forecast measures for the next 5 days\n",
209
+ "weather_forecast_df = weather_measures.forecast_weather_measures(\n",
210
+ " forecast_length=5\n",
211
+ ")"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": null,
217
+ "metadata": {},
218
+ "outputs": [],
219
+ "source": [
220
+ "# Read csv file with calender\n",
221
+ "calender_df = pd.read_csv('https://raw.githubusercontent.com/Camillahannesbo/MLOPs-Assignment-/main/data/calendar_incl_holiday.csv', delimiter=';', usecols=['date', 'type'])\n",
222
+ "\n",
223
+ "calender_df"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": null,
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "from datetime import datetime, timedelta\n",
233
+ "\n",
234
+ "# Formatting the date column to 'YYYY-MM-DD' dateformat\n",
235
+ "calender_df[\"date\"] = calender_df[\"date\"].map(lambda x: datetime.strptime(x, '%d/%m/%Y').strftime(\"%Y-%m-%d\"))"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": null,
241
+ "metadata": {},
242
+ "outputs": [],
243
+ "source": [
244
+ "# Add features to the calender dataframe\n",
245
+ "calender_df['date_'] = pd.to_datetime(calender_df['date'])\n",
246
+ "calender_df['day'] = calender_df['date_'].dt.dayofweek\n",
247
+ "calender_df['month'] = calender_df['date_'].dt.month\n",
248
+ "calender_df['holiday'] = np.where(calender_df['type'] == 'Not a Workday', 1, 0)\n",
249
+ "\n",
250
+ "# Drop the columns 'type' and 'date_' to finalize the calender dataframe\n",
251
+ "calender_df = calender_df.drop(['type','date_'], axis=1)\n",
252
+ "\n",
253
+ "merged_df = pd.merge(weather_forecast_df, calender_df, how='inner', left_on='date', right_on='date')"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": null,
259
+ "metadata": {},
260
+ "outputs": [],
261
+ "source": [
262
+ "import numpy as np"
263
+ ]
264
+ },
265
+ {
266
+ "cell_type": "code",
267
+ "execution_count": null,
268
+ "metadata": {},
269
+ "outputs": [],
270
+ "source": [
271
+ "# Display the first 5 rows of the batch data\n",
272
+ "batch_data = merged_df\n",
273
+ "\n",
274
+ "batch_data.tail()"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "markdown",
279
+ "metadata": {},
280
+ "source": [
281
+ "### <span style=\"color:#ff5f27;\">🤖 Making the predictions</span>"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": null,
287
+ "metadata": {},
288
+ "outputs": [],
289
+ "source": [
290
+ "# from sklearn.preprocessing import LabelEncoder\n",
291
+ "\n",
292
+ "# # Create a LabelEncoder object\n",
293
+ "# label_encoder = LabelEncoder()\n",
294
+ "\n",
295
+ "# # Fit the encoder to the data in the 'city_name' column\n",
296
+ "# label_encoder.fit(batch_data[['type']])\n",
297
+ "\n",
298
+ "# # Transform the 'city_name' column data using the fitted encoder\n",
299
+ "# encoded = label_encoder.transform(batch_data[['type']])"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "batch_data"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": null,
314
+ "metadata": {},
315
+ "outputs": [],
316
+ "source": []
317
+ },
318
+ {
319
+ "cell_type": "code",
320
+ "execution_count": null,
321
+ "metadata": {},
322
+ "outputs": [],
323
+ "source": []
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": [
331
+ "# # Convert the output of the label encoding to a dense array and concatenate with the original data\n",
332
+ "# X_batch = pd.concat([batch_data, pd.DataFrame(encoded)], axis=1)\n",
333
+ "\n",
334
+ "X_batch = batch_data\n",
335
+ "\n",
336
+ "# Drop columns 'date', 'city_name', 'unix_time' from the DataFrame 'X'\n",
337
+ "X_batch = X_batch.drop(columns=['date', 'time', 'timestamp'])\n",
338
+ "\n",
339
+ "# # Rename the newly added column with label-encoded city names to 'city_name_encoded'\n",
340
+ "# X_batch = X_batch.rename(columns={0: \"type_encoded\"})\n",
341
+ "\n",
342
+ "# Displaying the first 5 rows of the modified DataFrame\n",
343
+ "X_batch.head()"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": [
352
+ "# Extract the target variable 'dk1_spotpricedkk_kwh' from the batch data\n",
353
+ "y_batch = X_batch.pop('dk1_spotpricedkk_kwh')\n",
354
+ "\n",
355
+ "# Displaying the first 5 rows of the modified DataFrame\n",
356
+ "y_batch.head()"
357
+ ]
358
+ },
359
+ {
360
+ "cell_type": "code",
361
+ "execution_count": null,
362
+ "metadata": {},
363
+ "outputs": [],
364
+ "source": [
365
+ "X_batch"
366
+ ]
367
+ },
368
+ {
369
+ "cell_type": "code",
370
+ "execution_count": null,
371
+ "metadata": {},
372
+ "outputs": [],
373
+ "source": [
374
+ "# Make predictions on the batch data using the retrieved XGBoost regressor model\n",
375
+ "predictions = retrieved_xgboost_model.predict(X_batch)\n",
376
+ "\n",
377
+ "# Display the first 5 predictions\n",
378
+ "predictions[:5]"
379
+ ]
380
+ },
381
+ {
382
+ "cell_type": "code",
383
+ "execution_count": null,
384
+ "metadata": {},
385
+ "outputs": [],
386
+ "source": [
387
+ "label = batch_data[\"time\"]\n",
388
+ "y_pred = retrieved_xgboost_model.predict(X_batch)\n",
389
+ "\n",
390
+ "data = {\n",
391
+ " 'prediction': [y_pred],\n",
392
+ " 'time': [label],\n",
393
+ "}\n",
394
+ "\n",
395
+ "monitor_df = pd.DataFrame(data)\n",
396
+ "monitor_df"
397
+ ]
398
+ },
399
+ {
400
+ "cell_type": "code",
401
+ "execution_count": null,
402
+ "metadata": {},
403
+ "outputs": [],
404
+ "source": [
405
+ "label = batch_data[\"time\"]\n",
406
+ "y_pred = retrieved_xgboost_model.predict(X_batch)\n",
407
+ "\n",
408
+ "data = {\n",
409
+ " 'prediction': y_pred,\n",
410
+ " 'time': label,\n",
411
+ "}\n",
412
+ "\n",
413
+ "monitor_df = pd.DataFrame(data)\n",
414
+ "monitor_df"
415
+ ]
416
+ },
417
+ {
418
+ "cell_type": "markdown",
419
+ "metadata": {},
420
+ "source": [
421
+ "---\n",
422
+ "## <span style=\"color:#ff5f27;\">👾 Next is creating our Streamlit App?</span>"
423
+ ]
424
+ }
425
+ ],
426
+ "metadata": {
427
+ "kernelspec": {
428
+ "display_name": "bds-mlops",
429
+ "language": "python",
430
+ "name": "python3"
431
+ },
432
+ "language_info": {
433
+ "codemirror_mode": {
434
+ "name": "ipython",
435
+ "version": 3
436
+ },
437
+ "file_extension": ".py",
438
+ "mimetype": "text/x-python",
439
+ "name": "python",
440
+ "nbconvert_exporter": "python",
441
+ "pygments_lexer": "ipython3",
442
+ "version": "3.11.8"
443
+ },
444
+ "orig_nbformat": 4
445
+ },
446
+ "nbformat": 4,
447
+ "nbformat_minor": 2
448
+ }
notebooks/4_batch_inference.ipynb CHANGED
@@ -28,7 +28,7 @@
28
  },
29
  {
30
  "cell_type": "code",
31
- "execution_count": 1,
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
@@ -53,20 +53,9 @@
53
  },
54
  {
55
  "cell_type": "code",
56
- "execution_count": 2,
57
  "metadata": {},
58
- "outputs": [
59
- {
60
- "name": "stdout",
61
- "output_type": "stream",
62
- "text": [
63
- "Connected. Call `.close()` to terminate connection gracefully.\n",
64
- "\n",
65
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/554133\n",
66
- "Connected. Call `.close()` to terminate connection gracefully.\n"
67
- ]
68
- }
69
- ],
70
  "source": [
71
  "# Importing the hopsworks module\n",
72
  "import hopsworks\n",
@@ -87,7 +76,7 @@
87
  },
88
  {
89
  "cell_type": "code",
90
- "execution_count": 3,
91
  "metadata": {},
92
  "outputs": [],
93
  "source": [
@@ -107,17 +96,9 @@
107
  },
108
  {
109
  "cell_type": "code",
110
- "execution_count": 4,
111
  "metadata": {},
112
- "outputs": [
113
- {
114
- "name": "stdout",
115
- "output_type": "stream",
116
- "text": [
117
- "Connected. Call `.close()` to terminate connection gracefully.\n"
118
- ]
119
- }
120
- ],
121
  "source": [
122
  "# Retrieve the model registry\n",
123
  "mr = project.get_model_registry()"
@@ -132,17 +113,9 @@
132
  },
133
  {
134
  "cell_type": "code",
135
- "execution_count": 5,
136
  "metadata": {},
137
- "outputs": [
138
- {
139
- "name": "stdout",
140
- "output_type": "stream",
141
- "text": [
142
- "Downloading model artifact (0 dirs, 3 files)... DONE\r"
143
- ]
144
- }
145
- ],
146
  "source": [
147
  "# Retrieving the model from the Model Registry\n",
148
  "retrieved_model = mr.get_model(\n",
@@ -159,457 +132,9 @@
159
  },
160
  {
161
  "cell_type": "code",
162
- "execution_count": 6,
163
  "metadata": {},
164
- "outputs": [
165
- {
166
- "data": {
167
- "text/html": [
168
- "<style>#sk-container-id-1 {\n",
169
- " /* Definition of color scheme common for light and dark mode */\n",
170
- " --sklearn-color-text: black;\n",
171
- " --sklearn-color-line: gray;\n",
172
- " /* Definition of color scheme for unfitted estimators */\n",
173
- " --sklearn-color-unfitted-level-0: #fff5e6;\n",
174
- " --sklearn-color-unfitted-level-1: #f6e4d2;\n",
175
- " --sklearn-color-unfitted-level-2: #ffe0b3;\n",
176
- " --sklearn-color-unfitted-level-3: chocolate;\n",
177
- " /* Definition of color scheme for fitted estimators */\n",
178
- " --sklearn-color-fitted-level-0: #f0f8ff;\n",
179
- " --sklearn-color-fitted-level-1: #d4ebff;\n",
180
- " --sklearn-color-fitted-level-2: #b3dbfd;\n",
181
- " --sklearn-color-fitted-level-3: cornflowerblue;\n",
182
- "\n",
183
- " /* Specific color for light theme */\n",
184
- " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
185
- " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
186
- " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
187
- " --sklearn-color-icon: #696969;\n",
188
- "\n",
189
- " @media (prefers-color-scheme: dark) {\n",
190
- " /* Redefinition of color scheme for dark theme */\n",
191
- " --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
192
- " --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
193
- " --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
194
- " --sklearn-color-icon: #878787;\n",
195
- " }\n",
196
- "}\n",
197
- "\n",
198
- "#sk-container-id-1 {\n",
199
- " color: var(--sklearn-color-text);\n",
200
- "}\n",
201
- "\n",
202
- "#sk-container-id-1 pre {\n",
203
- " padding: 0;\n",
204
- "}\n",
205
- "\n",
206
- "#sk-container-id-1 input.sk-hidden--visually {\n",
207
- " border: 0;\n",
208
- " clip: rect(1px 1px 1px 1px);\n",
209
- " clip: rect(1px, 1px, 1px, 1px);\n",
210
- " height: 1px;\n",
211
- " margin: -1px;\n",
212
- " overflow: hidden;\n",
213
- " padding: 0;\n",
214
- " position: absolute;\n",
215
- " width: 1px;\n",
216
- "}\n",
217
- "\n",
218
- "#sk-container-id-1 div.sk-dashed-wrapped {\n",
219
- " border: 1px dashed var(--sklearn-color-line);\n",
220
- " margin: 0 0.4em 0.5em 0.4em;\n",
221
- " box-sizing: border-box;\n",
222
- " padding-bottom: 0.4em;\n",
223
- " background-color: var(--sklearn-color-background);\n",
224
- "}\n",
225
- "\n",
226
- "#sk-container-id-1 div.sk-container {\n",
227
- " /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
228
- " but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
229
- " so we also need the `!important` here to be able to override the\n",
230
- " default hidden behavior on the sphinx rendered scikit-learn.org.\n",
231
- " See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
232
- " display: inline-block !important;\n",
233
- " position: relative;\n",
234
- "}\n",
235
- "\n",
236
- "#sk-container-id-1 div.sk-text-repr-fallback {\n",
237
- " display: none;\n",
238
- "}\n",
239
- "\n",
240
- "div.sk-parallel-item,\n",
241
- "div.sk-serial,\n",
242
- "div.sk-item {\n",
243
- " /* draw centered vertical line to link estimators */\n",
244
- " background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
245
- " background-size: 2px 100%;\n",
246
- " background-repeat: no-repeat;\n",
247
- " background-position: center center;\n",
248
- "}\n",
249
- "\n",
250
- "/* Parallel-specific style estimator block */\n",
251
- "\n",
252
- "#sk-container-id-1 div.sk-parallel-item::after {\n",
253
- " content: \"\";\n",
254
- " width: 100%;\n",
255
- " border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
256
- " flex-grow: 1;\n",
257
- "}\n",
258
- "\n",
259
- "#sk-container-id-1 div.sk-parallel {\n",
260
- " display: flex;\n",
261
- " align-items: stretch;\n",
262
- " justify-content: center;\n",
263
- " background-color: var(--sklearn-color-background);\n",
264
- " position: relative;\n",
265
- "}\n",
266
- "\n",
267
- "#sk-container-id-1 div.sk-parallel-item {\n",
268
- " display: flex;\n",
269
- " flex-direction: column;\n",
270
- "}\n",
271
- "\n",
272
- "#sk-container-id-1 div.sk-parallel-item:first-child::after {\n",
273
- " align-self: flex-end;\n",
274
- " width: 50%;\n",
275
- "}\n",
276
- "\n",
277
- "#sk-container-id-1 div.sk-parallel-item:last-child::after {\n",
278
- " align-self: flex-start;\n",
279
- " width: 50%;\n",
280
- "}\n",
281
- "\n",
282
- "#sk-container-id-1 div.sk-parallel-item:only-child::after {\n",
283
- " width: 0;\n",
284
- "}\n",
285
- "\n",
286
- "/* Serial-specific style estimator block */\n",
287
- "\n",
288
- "#sk-container-id-1 div.sk-serial {\n",
289
- " display: flex;\n",
290
- " flex-direction: column;\n",
291
- " align-items: center;\n",
292
- " background-color: var(--sklearn-color-background);\n",
293
- " padding-right: 1em;\n",
294
- " padding-left: 1em;\n",
295
- "}\n",
296
- "\n",
297
- "\n",
298
- "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
299
- "clickable and can be expanded/collapsed.\n",
300
- "- Pipeline and ColumnTransformer use this feature and define the default style\n",
301
- "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
302
- "*/\n",
303
- "\n",
304
- "/* Pipeline and ColumnTransformer style (default) */\n",
305
- "\n",
306
- "#sk-container-id-1 div.sk-toggleable {\n",
307
- " /* Default theme specific background. It is overwritten whether we have a\n",
308
- " specific estimator or a Pipeline/ColumnTransformer */\n",
309
- " background-color: var(--sklearn-color-background);\n",
310
- "}\n",
311
- "\n",
312
- "/* Toggleable label */\n",
313
- "#sk-container-id-1 label.sk-toggleable__label {\n",
314
- " cursor: pointer;\n",
315
- " display: block;\n",
316
- " width: 100%;\n",
317
- " margin-bottom: 0;\n",
318
- " padding: 0.5em;\n",
319
- " box-sizing: border-box;\n",
320
- " text-align: center;\n",
321
- "}\n",
322
- "\n",
323
- "#sk-container-id-1 label.sk-toggleable__label-arrow:before {\n",
324
- " /* Arrow on the left of the label */\n",
325
- " content: \"▸\";\n",
326
- " float: left;\n",
327
- " margin-right: 0.25em;\n",
328
- " color: var(--sklearn-color-icon);\n",
329
- "}\n",
330
- "\n",
331
- "#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {\n",
332
- " color: var(--sklearn-color-text);\n",
333
- "}\n",
334
- "\n",
335
- "/* Toggleable content - dropdown */\n",
336
- "\n",
337
- "#sk-container-id-1 div.sk-toggleable__content {\n",
338
- " max-height: 0;\n",
339
- " max-width: 0;\n",
340
- " overflow: hidden;\n",
341
- " text-align: left;\n",
342
- " /* unfitted */\n",
343
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
344
- "}\n",
345
- "\n",
346
- "#sk-container-id-1 div.sk-toggleable__content.fitted {\n",
347
- " /* fitted */\n",
348
- " background-color: var(--sklearn-color-fitted-level-0);\n",
349
- "}\n",
350
- "\n",
351
- "#sk-container-id-1 div.sk-toggleable__content pre {\n",
352
- " margin: 0.2em;\n",
353
- " border-radius: 0.25em;\n",
354
- " color: var(--sklearn-color-text);\n",
355
- " /* unfitted */\n",
356
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
357
- "}\n",
358
- "\n",
359
- "#sk-container-id-1 div.sk-toggleable__content.fitted pre {\n",
360
- " /* unfitted */\n",
361
- " background-color: var(--sklearn-color-fitted-level-0);\n",
362
- "}\n",
363
- "\n",
364
- "#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
365
- " /* Expand drop-down */\n",
366
- " max-height: 200px;\n",
367
- " max-width: 100%;\n",
368
- " overflow: auto;\n",
369
- "}\n",
370
- "\n",
371
- "#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
372
- " content: \"▾\";\n",
373
- "}\n",
374
- "\n",
375
- "/* Pipeline/ColumnTransformer-specific style */\n",
376
- "\n",
377
- "#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
378
- " color: var(--sklearn-color-text);\n",
379
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
380
- "}\n",
381
- "\n",
382
- "#sk-container-id-1 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
383
- " background-color: var(--sklearn-color-fitted-level-2);\n",
384
- "}\n",
385
- "\n",
386
- "/* Estimator-specific style */\n",
387
- "\n",
388
- "/* Colorize estimator box */\n",
389
- "#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
390
- " /* unfitted */\n",
391
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
392
- "}\n",
393
- "\n",
394
- "#sk-container-id-1 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
395
- " /* fitted */\n",
396
- " background-color: var(--sklearn-color-fitted-level-2);\n",
397
- "}\n",
398
- "\n",
399
- "#sk-container-id-1 div.sk-label label.sk-toggleable__label,\n",
400
- "#sk-container-id-1 div.sk-label label {\n",
401
- " /* The background is the default theme color */\n",
402
- " color: var(--sklearn-color-text-on-default-background);\n",
403
- "}\n",
404
- "\n",
405
- "/* On hover, darken the color of the background */\n",
406
- "#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {\n",
407
- " color: var(--sklearn-color-text);\n",
408
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
409
- "}\n",
410
- "\n",
411
- "/* Label box, darken color on hover, fitted */\n",
412
- "#sk-container-id-1 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
413
- " color: var(--sklearn-color-text);\n",
414
- " background-color: var(--sklearn-color-fitted-level-2);\n",
415
- "}\n",
416
- "\n",
417
- "/* Estimator label */\n",
418
- "\n",
419
- "#sk-container-id-1 div.sk-label label {\n",
420
- " font-family: monospace;\n",
421
- " font-weight: bold;\n",
422
- " display: inline-block;\n",
423
- " line-height: 1.2em;\n",
424
- "}\n",
425
- "\n",
426
- "#sk-container-id-1 div.sk-label-container {\n",
427
- " text-align: center;\n",
428
- "}\n",
429
- "\n",
430
- "/* Estimator-specific */\n",
431
- "#sk-container-id-1 div.sk-estimator {\n",
432
- " font-family: monospace;\n",
433
- " border: 1px dotted var(--sklearn-color-border-box);\n",
434
- " border-radius: 0.25em;\n",
435
- " box-sizing: border-box;\n",
436
- " margin-bottom: 0.5em;\n",
437
- " /* unfitted */\n",
438
- " background-color: var(--sklearn-color-unfitted-level-0);\n",
439
- "}\n",
440
- "\n",
441
- "#sk-container-id-1 div.sk-estimator.fitted {\n",
442
- " /* fitted */\n",
443
- " background-color: var(--sklearn-color-fitted-level-0);\n",
444
- "}\n",
445
- "\n",
446
- "/* on hover */\n",
447
- "#sk-container-id-1 div.sk-estimator:hover {\n",
448
- " /* unfitted */\n",
449
- " background-color: var(--sklearn-color-unfitted-level-2);\n",
450
- "}\n",
451
- "\n",
452
- "#sk-container-id-1 div.sk-estimator.fitted:hover {\n",
453
- " /* fitted */\n",
454
- " background-color: var(--sklearn-color-fitted-level-2);\n",
455
- "}\n",
456
- "\n",
457
- "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
458
- "\n",
459
- "/* Common style for \"i\" and \"?\" */\n",
460
- "\n",
461
- ".sk-estimator-doc-link,\n",
462
- "a:link.sk-estimator-doc-link,\n",
463
- "a:visited.sk-estimator-doc-link {\n",
464
- " float: right;\n",
465
- " font-size: smaller;\n",
466
- " line-height: 1em;\n",
467
- " font-family: monospace;\n",
468
- " background-color: var(--sklearn-color-background);\n",
469
- " border-radius: 1em;\n",
470
- " height: 1em;\n",
471
- " width: 1em;\n",
472
- " text-decoration: none !important;\n",
473
- " margin-left: 1ex;\n",
474
- " /* unfitted */\n",
475
- " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
476
- " color: var(--sklearn-color-unfitted-level-1);\n",
477
- "}\n",
478
- "\n",
479
- ".sk-estimator-doc-link.fitted,\n",
480
- "a:link.sk-estimator-doc-link.fitted,\n",
481
- "a:visited.sk-estimator-doc-link.fitted {\n",
482
- " /* fitted */\n",
483
- " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
484
- " color: var(--sklearn-color-fitted-level-1);\n",
485
- "}\n",
486
- "\n",
487
- "/* On hover */\n",
488
- "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
489
- ".sk-estimator-doc-link:hover,\n",
490
- "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
491
- ".sk-estimator-doc-link:hover {\n",
492
- " /* unfitted */\n",
493
- " background-color: var(--sklearn-color-unfitted-level-3);\n",
494
- " color: var(--sklearn-color-background);\n",
495
- " text-decoration: none;\n",
496
- "}\n",
497
- "\n",
498
- "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
499
- ".sk-estimator-doc-link.fitted:hover,\n",
500
- "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
501
- ".sk-estimator-doc-link.fitted:hover {\n",
502
- " /* fitted */\n",
503
- " background-color: var(--sklearn-color-fitted-level-3);\n",
504
- " color: var(--sklearn-color-background);\n",
505
- " text-decoration: none;\n",
506
- "}\n",
507
- "\n",
508
- "/* Span, style for the box shown on hovering the info icon */\n",
509
- ".sk-estimator-doc-link span {\n",
510
- " display: none;\n",
511
- " z-index: 9999;\n",
512
- " position: relative;\n",
513
- " font-weight: normal;\n",
514
- " right: .2ex;\n",
515
- " padding: .5ex;\n",
516
- " margin: .5ex;\n",
517
- " width: min-content;\n",
518
- " min-width: 20ex;\n",
519
- " max-width: 50ex;\n",
520
- " color: var(--sklearn-color-text);\n",
521
- " box-shadow: 2pt 2pt 4pt #999;\n",
522
- " /* unfitted */\n",
523
- " background: var(--sklearn-color-unfitted-level-0);\n",
524
- " border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
525
- "}\n",
526
- "\n",
527
- ".sk-estimator-doc-link.fitted span {\n",
528
- " /* fitted */\n",
529
- " background: var(--sklearn-color-fitted-level-0);\n",
530
- " border: var(--sklearn-color-fitted-level-3);\n",
531
- "}\n",
532
- "\n",
533
- ".sk-estimator-doc-link:hover span {\n",
534
- " display: block;\n",
535
- "}\n",
536
- "\n",
537
- "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
538
- "\n",
539
- "#sk-container-id-1 a.estimator_doc_link {\n",
540
- " float: right;\n",
541
- " font-size: 1rem;\n",
542
- " line-height: 1em;\n",
543
- " font-family: monospace;\n",
544
- " background-color: var(--sklearn-color-background);\n",
545
- " border-radius: 1rem;\n",
546
- " height: 1rem;\n",
547
- " width: 1rem;\n",
548
- " text-decoration: none;\n",
549
- " /* unfitted */\n",
550
- " color: var(--sklearn-color-unfitted-level-1);\n",
551
- " border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
552
- "}\n",
553
- "\n",
554
- "#sk-container-id-1 a.estimator_doc_link.fitted {\n",
555
- " /* fitted */\n",
556
- " border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
557
- " color: var(--sklearn-color-fitted-level-1);\n",
558
- "}\n",
559
- "\n",
560
- "/* On hover */\n",
561
- "#sk-container-id-1 a.estimator_doc_link:hover {\n",
562
- " /* unfitted */\n",
563
- " background-color: var(--sklearn-color-unfitted-level-3);\n",
564
- " color: var(--sklearn-color-background);\n",
565
- " text-decoration: none;\n",
566
- "}\n",
567
- "\n",
568
- "#sk-container-id-1 a.estimator_doc_link.fitted:hover {\n",
569
- " /* fitted */\n",
570
- " background-color: var(--sklearn-color-fitted-level-3);\n",
571
- "}\n",
572
- "</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
573
- " colsample_bylevel=None, colsample_bynode=None,\n",
574
- " colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
575
- " enable_categorical=False, eval_metric=None, feature_types=None,\n",
576
- " gamma=None, grow_policy=None, importance_type=None,\n",
577
- " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
578
- " max_cat_threshold=None, max_cat_to_onehot=None,\n",
579
- " max_delta_step=None, max_depth=None, max_leaves=None,\n",
580
- " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
581
- " multi_strategy=None, n_estimators=None, n_jobs=None,\n",
582
- " num_parallel_tree=None, random_state=None, ...)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;XGBRegressor<span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
583
- " colsample_bylevel=None, colsample_bynode=None,\n",
584
- " colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
585
- " enable_categorical=False, eval_metric=None, feature_types=None,\n",
586
- " gamma=None, grow_policy=None, importance_type=None,\n",
587
- " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
588
- " max_cat_threshold=None, max_cat_to_onehot=None,\n",
589
- " max_delta_step=None, max_depth=None, max_leaves=None,\n",
590
- " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
591
- " multi_strategy=None, n_estimators=None, n_jobs=None,\n",
592
- " num_parallel_tree=None, random_state=None, ...)</pre></div> </div></div></div></div>"
593
- ],
594
- "text/plain": [
595
- "XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
596
- " colsample_bylevel=None, colsample_bynode=None,\n",
597
- " colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
598
- " enable_categorical=False, eval_metric=None, feature_types=None,\n",
599
- " gamma=None, grow_policy=None, importance_type=None,\n",
600
- " interaction_constraints=None, learning_rate=None, max_bin=None,\n",
601
- " max_cat_threshold=None, max_cat_to_onehot=None,\n",
602
- " max_delta_step=None, max_depth=None, max_leaves=None,\n",
603
- " min_child_weight=None, missing=nan, monotone_constraints=None,\n",
604
- " multi_strategy=None, n_estimators=None, n_jobs=None,\n",
605
- " num_parallel_tree=None, random_state=None, ...)"
606
- ]
607
- },
608
- "execution_count": 6,
609
- "metadata": {},
610
- "output_type": "execute_result"
611
- }
612
- ],
613
  "source": [
614
  "# Display the retrieved XGBoost regressor model\n",
615
  "retrieved_xgboost_model"
@@ -624,20 +149,9 @@
624
  },
625
  {
626
  "cell_type": "code",
627
- "execution_count": 7,
628
  "metadata": {},
629
- "outputs": [
630
- {
631
- "data": {
632
- "text/plain": [
633
- "1714138328000"
634
- ]
635
- },
636
- "execution_count": 7,
637
- "metadata": {},
638
- "output_type": "execute_result"
639
- }
640
- ],
641
  "source": [
642
  "import datetime\n",
643
  "\n",
@@ -653,17 +167,9 @@
653
  },
654
  {
655
  "cell_type": "code",
656
- "execution_count": 10,
657
  "metadata": {},
658
- "outputs": [
659
- {
660
- "name": "stdout",
661
- "output_type": "stream",
662
- "text": [
663
- "Finished: Reading data from Hopsworks, using ArrowFlight (2.85s) \n"
664
- ]
665
- }
666
- ],
667
  "source": [
668
  "# Initializing batch scoring\n",
669
  "feature_view.init_batch_scoring(training_dataset_version=1)\n",
@@ -676,199 +182,12 @@
676
  },
677
  {
678
  "cell_type": "code",
679
- "execution_count": 11,
680
  "metadata": {},
681
- "outputs": [
682
- {
683
- "data": {
684
- "text/html": [
685
- "<div>\n",
686
- "<style scoped>\n",
687
- " .dataframe tbody tr th:only-of-type {\n",
688
- " vertical-align: middle;\n",
689
- " }\n",
690
- "\n",
691
- " .dataframe tbody tr th {\n",
692
- " vertical-align: top;\n",
693
- " }\n",
694
- "\n",
695
- " .dataframe thead th {\n",
696
- " text-align: right;\n",
697
- " }\n",
698
- "</style>\n",
699
- "<table border=\"1\" class=\"dataframe\">\n",
700
- " <thead>\n",
701
- " <tr style=\"text-align: right;\">\n",
702
- " <th></th>\n",
703
- " <th>timestamp</th>\n",
704
- " <th>time</th>\n",
705
- " <th>date</th>\n",
706
- " <th>dk1_spotpricedkk_kwh</th>\n",
707
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
708
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
709
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
710
- " <th>temperature_2m</th>\n",
711
- " <th>relative_humidity_2m</th>\n",
712
- " <th>precipitation</th>\n",
713
- " <th>rain</th>\n",
714
- " <th>snowfall</th>\n",
715
- " <th>weather_code</th>\n",
716
- " <th>cloud_cover</th>\n",
717
- " <th>wind_speed_10m</th>\n",
718
- " <th>wind_gusts_10m</th>\n",
719
- " <th>type</th>\n",
720
- " </tr>\n",
721
- " </thead>\n",
722
- " <tbody>\n",
723
- " <tr>\n",
724
- " <th>0</th>\n",
725
- " <td>1714287600000</td>\n",
726
- " <td>2024-04-28 07:00:00+00:00</td>\n",
727
- " <td>2024-04-28</td>\n",
728
- " <td>0.00186</td>\n",
729
- " <td>0.959167</td>\n",
730
- " <td>0.771750</td>\n",
731
- " <td>0.184346</td>\n",
732
- " <td>8.5</td>\n",
733
- " <td>91.0</td>\n",
734
- " <td>0.0</td>\n",
735
- " <td>0.0</td>\n",
736
- " <td>0.0</td>\n",
737
- " <td>2.0</td>\n",
738
- " <td>62.0</td>\n",
739
- " <td>12.8</td>\n",
740
- " <td>22.3</td>\n",
741
- " <td>Not a Workday</td>\n",
742
- " </tr>\n",
743
- " <tr>\n",
744
- " <th>1</th>\n",
745
- " <td>1714392000000</td>\n",
746
- " <td>2024-04-29 12:00:00+00:00</td>\n",
747
- " <td>2024-04-29</td>\n",
748
- " <td>0.26984</td>\n",
749
- " <td>0.649292</td>\n",
750
- " <td>1.123000</td>\n",
751
- " <td>1.615064</td>\n",
752
- " <td>14.1</td>\n",
753
- " <td>48.0</td>\n",
754
- " <td>0.0</td>\n",
755
- " <td>0.0</td>\n",
756
- " <td>0.0</td>\n",
757
- " <td>1.0</td>\n",
758
- " <td>32.0</td>\n",
759
- " <td>17.8</td>\n",
760
- " <td>39.2</td>\n",
761
- " <td>Workday</td>\n",
762
- " </tr>\n",
763
- " <tr>\n",
764
- " <th>2</th>\n",
765
- " <td>1714536000000</td>\n",
766
- " <td>2024-05-01 04:00:00+00:00</td>\n",
767
- " <td>2024-05-01</td>\n",
768
- " <td>0.35659</td>\n",
769
- " <td>0.605792</td>\n",
770
- " <td>1.227542</td>\n",
771
- " <td>0.000218</td>\n",
772
- " <td>11.2</td>\n",
773
- " <td>78.0</td>\n",
774
- " <td>0.0</td>\n",
775
- " <td>0.0</td>\n",
776
- " <td>0.0</td>\n",
777
- " <td>3.0</td>\n",
778
- " <td>96.0</td>\n",
779
- " <td>18.0</td>\n",
780
- " <td>33.5</td>\n",
781
- " <td>Workday</td>\n",
782
- " </tr>\n",
783
- " <tr>\n",
784
- " <th>3</th>\n",
785
- " <td>1714172400000</td>\n",
786
- " <td>2024-04-26 23:00:00+00:00</td>\n",
787
- " <td>2024-04-26</td>\n",
788
- " <td>0.65829</td>\n",
789
- " <td>0.178042</td>\n",
790
- " <td>0.244625</td>\n",
791
- " <td>0.000000</td>\n",
792
- " <td>3.9</td>\n",
793
- " <td>96.0</td>\n",
794
- " <td>0.0</td>\n",
795
- " <td>0.0</td>\n",
796
- " <td>0.0</td>\n",
797
- " <td>1.0</td>\n",
798
- " <td>36.0</td>\n",
799
- " <td>3.9</td>\n",
800
- " <td>8.6</td>\n",
801
- " <td>Workday</td>\n",
802
- " </tr>\n",
803
- " <tr>\n",
804
- " <th>4</th>\n",
805
- " <td>1714258800000</td>\n",
806
- " <td>2024-04-27 23:00:00+00:00</td>\n",
807
- " <td>2024-04-27</td>\n",
808
- " <td>0.48644</td>\n",
809
- " <td>0.657625</td>\n",
810
- " <td>0.999583</td>\n",
811
- " <td>0.000000</td>\n",
812
- " <td>6.9</td>\n",
813
- " <td>93.0</td>\n",
814
- " <td>0.0</td>\n",
815
- " <td>0.0</td>\n",
816
- " <td>0.0</td>\n",
817
- " <td>1.0</td>\n",
818
- " <td>40.0</td>\n",
819
- " <td>16.2</td>\n",
820
- " <td>29.9</td>\n",
821
- " <td>Not a Workday</td>\n",
822
- " </tr>\n",
823
- " </tbody>\n",
824
- "</table>\n",
825
- "</div>"
826
- ],
827
- "text/plain": [
828
- " timestamp time date dk1_spotpricedkk_kwh \\\n",
829
- "0 1714287600000 2024-04-28 07:00:00+00:00 2024-04-28 0.00186 \n",
830
- "1 1714392000000 2024-04-29 12:00:00+00:00 2024-04-29 0.26984 \n",
831
- "2 1714536000000 2024-05-01 04:00:00+00:00 2024-05-01 0.35659 \n",
832
- "3 1714172400000 2024-04-26 23:00:00+00:00 2024-04-26 0.65829 \n",
833
- "4 1714258800000 2024-04-27 23:00:00+00:00 2024-04-27 0.48644 \n",
834
- "\n",
835
- " dk1_offshore_wind_forecastintraday_kwh \\\n",
836
- "0 0.959167 \n",
837
- "1 0.649292 \n",
838
- "2 0.605792 \n",
839
- "3 0.178042 \n",
840
- "4 0.657625 \n",
841
- "\n",
842
- " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \\\n",
843
- "0 0.771750 0.184346 \n",
844
- "1 1.123000 1.615064 \n",
845
- "2 1.227542 0.000218 \n",
846
- "3 0.244625 0.000000 \n",
847
- "4 0.999583 0.000000 \n",
848
- "\n",
849
- " temperature_2m relative_humidity_2m precipitation rain snowfall \\\n",
850
- "0 8.5 91.0 0.0 0.0 0.0 \n",
851
- "1 14.1 48.0 0.0 0.0 0.0 \n",
852
- "2 11.2 78.0 0.0 0.0 0.0 \n",
853
- "3 3.9 96.0 0.0 0.0 0.0 \n",
854
- "4 6.9 93.0 0.0 0.0 0.0 \n",
855
- "\n",
856
- " weather_code cloud_cover wind_speed_10m wind_gusts_10m type \n",
857
- "0 2.0 62.0 12.8 22.3 Not a Workday \n",
858
- "1 1.0 32.0 17.8 39.2 Workday \n",
859
- "2 3.0 96.0 18.0 33.5 Workday \n",
860
- "3 1.0 36.0 3.9 8.6 Workday \n",
861
- "4 1.0 40.0 16.2 29.9 Not a Workday "
862
- ]
863
- },
864
- "execution_count": 11,
865
- "metadata": {},
866
- "output_type": "execute_result"
867
- }
868
- ],
869
  "source": [
870
  "# Display the first 5 rows of the batch data\n",
871
- "batch_data.head(5)"
872
  ]
873
  },
874
  {
@@ -880,215 +199,61 @@
880
  },
881
  {
882
  "cell_type": "code",
883
- "execution_count": 16,
884
  "metadata": {},
885
- "outputs": [
886
- {
887
- "name": "stderr",
888
- "output_type": "stream",
889
- "text": [
890
- "DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
891
- "DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n"
892
- ]
893
- }
894
- ],
895
  "source": [
896
- "from sklearn.preprocessing import LabelEncoder\n",
897
  "\n",
898
- "# Create a LabelEncoder object\n",
899
- "label_encoder = LabelEncoder()\n",
900
  "\n",
901
- "# Fit the encoder to the data in the 'city_name' column\n",
902
- "label_encoder.fit(batch_data[['type']])\n",
903
  "\n",
904
- "# Transform the 'city_name' column data using the fitted encoder\n",
905
- "encoded = label_encoder.transform(batch_data[['type']])"
906
  ]
907
  },
908
  {
909
  "cell_type": "code",
910
- "execution_count": 17,
911
  "metadata": {},
912
- "outputs": [
913
- {
914
- "name": "stderr",
915
- "output_type": "stream",
916
- "text": [
917
- "DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
918
- "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n"
919
- ]
920
- },
921
- {
922
- "data": {
923
- "text/html": [
924
- "<div>\n",
925
- "<style scoped>\n",
926
- " .dataframe tbody tr th:only-of-type {\n",
927
- " vertical-align: middle;\n",
928
- " }\n",
929
- "\n",
930
- " .dataframe tbody tr th {\n",
931
- " vertical-align: top;\n",
932
- " }\n",
933
- "\n",
934
- " .dataframe thead th {\n",
935
- " text-align: right;\n",
936
- " }\n",
937
- "</style>\n",
938
- "<table border=\"1\" class=\"dataframe\">\n",
939
- " <thead>\n",
940
- " <tr style=\"text-align: right;\">\n",
941
- " <th></th>\n",
942
- " <th>dk1_spotpricedkk_kwh</th>\n",
943
- " <th>dk1_offshore_wind_forecastintraday_kwh</th>\n",
944
- " <th>dk1_onshore_wind_forecastintraday_kwh</th>\n",
945
- " <th>dk1_solar_forecastintraday_kwh</th>\n",
946
- " <th>temperature_2m</th>\n",
947
- " <th>relative_humidity_2m</th>\n",
948
- " <th>precipitation</th>\n",
949
- " <th>rain</th>\n",
950
- " <th>snowfall</th>\n",
951
- " <th>weather_code</th>\n",
952
- " <th>cloud_cover</th>\n",
953
- " <th>wind_speed_10m</th>\n",
954
- " <th>wind_gusts_10m</th>\n",
955
- " <th>type_encoded</th>\n",
956
- " </tr>\n",
957
- " </thead>\n",
958
- " <tbody>\n",
959
- " <tr>\n",
960
- " <th>48</th>\n",
961
- " <td>0.48757</td>\n",
962
- " <td>0.428250</td>\n",
963
- " <td>1.065542</td>\n",
964
- " <td>0.712989</td>\n",
965
- " <td>7.9</td>\n",
966
- " <td>67.0</td>\n",
967
- " <td>0.1</td>\n",
968
- " <td>0.1</td>\n",
969
- " <td>0.0</td>\n",
970
- " <td>51.0</td>\n",
971
- " <td>100.0</td>\n",
972
- " <td>24.5</td>\n",
973
- " <td>49.3</td>\n",
974
- " <td>0</td>\n",
975
- " </tr>\n",
976
- " <tr>\n",
977
- " <th>38</th>\n",
978
- " <td>0.52150</td>\n",
979
- " <td>0.374083</td>\n",
980
- " <td>0.968125</td>\n",
981
- " <td>0.740813</td>\n",
982
- " <td>7.5</td>\n",
983
- " <td>67.0</td>\n",
984
- " <td>0.1</td>\n",
985
- " <td>0.1</td>\n",
986
- " <td>0.0</td>\n",
987
- " <td>51.0</td>\n",
988
- " <td>100.0</td>\n",
989
- " <td>23.1</td>\n",
990
- " <td>47.5</td>\n",
991
- " <td>0</td>\n",
992
- " </tr>\n",
993
- " <tr>\n",
994
- " <th>33</th>\n",
995
- " <td>0.53478</td>\n",
996
- " <td>0.322542</td>\n",
997
- " <td>0.848917</td>\n",
998
- " <td>0.666078</td>\n",
999
- " <td>6.8</td>\n",
1000
- " <td>73.0</td>\n",
1001
- " <td>0.1</td>\n",
1002
- " <td>0.1</td>\n",
1003
- " <td>0.0</td>\n",
1004
- " <td>51.0</td>\n",
1005
- " <td>100.0</td>\n",
1006
- " <td>21.2</td>\n",
1007
- " <td>43.9</td>\n",
1008
- " <td>0</td>\n",
1009
- " </tr>\n",
1010
- " <tr>\n",
1011
- " <th>23</th>\n",
1012
- " <td>0.60012</td>\n",
1013
- " <td>0.297750</td>\n",
1014
- " <td>0.743667</td>\n",
1015
- " <td>0.498373</td>\n",
1016
- " <td>6.4</td>\n",
1017
- " <td>76.0</td>\n",
1018
- " <td>0.2</td>\n",
1019
- " <td>0.2</td>\n",
1020
- " <td>0.0</td>\n",
1021
- " <td>51.0</td>\n",
1022
- " <td>81.0</td>\n",
1023
- " <td>19.6</td>\n",
1024
- " <td>40.0</td>\n",
1025
- " <td>0</td>\n",
1026
- " </tr>\n",
1027
- " <tr>\n",
1028
- " <th>18</th>\n",
1029
- " <td>0.70021</td>\n",
1030
- " <td>0.281875</td>\n",
1031
- " <td>0.633917</td>\n",
1032
- " <td>0.315199</td>\n",
1033
- " <td>5.8</td>\n",
1034
- " <td>81.0</td>\n",
1035
- " <td>0.1</td>\n",
1036
- " <td>0.1</td>\n",
1037
- " <td>0.0</td>\n",
1038
- " <td>51.0</td>\n",
1039
- " <td>61.0</td>\n",
1040
- " <td>15.3</td>\n",
1041
- " <td>37.1</td>\n",
1042
- " <td>0</td>\n",
1043
- " </tr>\n",
1044
- " </tbody>\n",
1045
- "</table>\n",
1046
- "</div>"
1047
- ],
1048
- "text/plain": [
1049
- " dk1_spotpricedkk_kwh dk1_offshore_wind_forecastintraday_kwh \\\n",
1050
- "48 0.48757 0.428250 \n",
1051
- "38 0.52150 0.374083 \n",
1052
- "33 0.53478 0.322542 \n",
1053
- "23 0.60012 0.297750 \n",
1054
- "18 0.70021 0.281875 \n",
1055
- "\n",
1056
- " dk1_onshore_wind_forecastintraday_kwh dk1_solar_forecastintraday_kwh \\\n",
1057
- "48 1.065542 0.712989 \n",
1058
- "38 0.968125 0.740813 \n",
1059
- "33 0.848917 0.666078 \n",
1060
- "23 0.743667 0.498373 \n",
1061
- "18 0.633917 0.315199 \n",
1062
- "\n",
1063
- " temperature_2m relative_humidity_2m precipitation rain snowfall \\\n",
1064
- "48 7.9 67.0 0.1 0.1 0.0 \n",
1065
- "38 7.5 67.0 0.1 0.1 0.0 \n",
1066
- "33 6.8 73.0 0.1 0.1 0.0 \n",
1067
- "23 6.4 76.0 0.2 0.2 0.0 \n",
1068
- "18 5.8 81.0 0.1 0.1 0.0 \n",
1069
- "\n",
1070
- " weather_code cloud_cover wind_speed_10m wind_gusts_10m type_encoded \n",
1071
- "48 51.0 100.0 24.5 49.3 0 \n",
1072
- "38 51.0 100.0 23.1 47.5 0 \n",
1073
- "33 51.0 100.0 21.2 43.9 0 \n",
1074
- "23 51.0 81.0 19.6 40.0 0 \n",
1075
- "18 51.0 61.0 15.3 37.1 0 "
1076
- ]
1077
- },
1078
- "execution_count": 17,
1079
- "metadata": {},
1080
- "output_type": "execute_result"
1081
- }
1082
- ],
1083
  "source": [
1084
- "# Convert the output of the label encoding to a dense array and concatenate with the original data\n",
1085
- "X_batch = pd.concat([batch_data, pd.DataFrame(encoded)], axis=1)\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1086
  "\n",
1087
  "# Drop columns 'date', 'city_name', 'unix_time' from the DataFrame 'X'\n",
1088
- "X_batch = X_batch.drop(columns=['date', 'time', 'timestamp', 'type'])\n",
1089
  "\n",
1090
- "# Rename the newly added column with label-encoded city names to 'city_name_encoded'\n",
1091
- "X_batch = X_batch.rename(columns={0: \"type_encoded\"})\n",
1092
  "\n",
1093
  "# Displaying the first 5 rows of the modified DataFrame\n",
1094
  "X_batch.head()"
@@ -1096,25 +261,9 @@
1096
  },
1097
  {
1098
  "cell_type": "code",
1099
- "execution_count": 18,
1100
  "metadata": {},
1101
- "outputs": [
1102
- {
1103
- "data": {
1104
- "text/plain": [
1105
- "48 0.48757\n",
1106
- "38 0.52150\n",
1107
- "33 0.53478\n",
1108
- "23 0.60012\n",
1109
- "18 0.70021\n",
1110
- "Name: dk1_spotpricedkk_kwh, dtype: float64"
1111
- ]
1112
- },
1113
- "execution_count": 18,
1114
- "metadata": {},
1115
- "output_type": "execute_result"
1116
- }
1117
- ],
1118
  "source": [
1119
  "# Extract the target variable 'dk1_spotpricedkk_kwh' from the batch data\n",
1120
  "y_batch = X_batch.pop('dk1_spotpricedkk_kwh')\n",
@@ -1125,67 +274,51 @@
1125
  },
1126
  {
1127
  "cell_type": "code",
1128
- "execution_count": 19,
1129
  "metadata": {},
1130
- "outputs": [
1131
- {
1132
- "data": {
1133
- "text/plain": [
1134
- "48 0.48757\n",
1135
- "38 0.52150\n",
1136
- "33 0.53478\n",
1137
- "23 0.60012\n",
1138
- "18 0.70021\n",
1139
- " ... \n",
1140
- "32 0.37590\n",
1141
- "47 0.37292\n",
1142
- "27 0.25366\n",
1143
- "64 0.22315\n",
1144
- "96 0.16408\n",
1145
- "Name: dk1_spotpricedkk_kwh, Length: 106, dtype: float64"
1146
- ]
1147
- },
1148
- "execution_count": 19,
1149
- "metadata": {},
1150
- "output_type": "execute_result"
1151
- }
1152
- ],
1153
  "source": [
1154
- "# Display the target variable\n",
1155
- "y_batch"
 
 
 
1156
  ]
1157
  },
1158
  {
1159
  "cell_type": "code",
1160
- "execution_count": 20,
1161
  "metadata": {},
1162
- "outputs": [
1163
- {
1164
- "name": "stderr",
1165
- "output_type": "stream",
1166
- "text": [
1167
- "DeprecationWarning: np.find_common_type is deprecated. Please use `np.result_type` or `np.promote_types`.\n",
1168
- "See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information. (Deprecated NumPy 1.25)\n"
1169
- ]
1170
- },
1171
- {
1172
- "data": {
1173
- "text/plain": [
1174
- "array([0.25547686, 0.37913612, 0.33905983, 0.3961694 , 0.5968245 ],\n",
1175
- " dtype=float32)"
1176
- ]
1177
- },
1178
- "execution_count": 20,
1179
- "metadata": {},
1180
- "output_type": "execute_result"
1181
- }
1182
- ],
1183
  "source": [
1184
- "# Make predictions on the batch data using the retrieved XGBoost regressor model\n",
1185
- "predictions = retrieved_xgboost_model.predict(X_batch)\n",
1186
  "\n",
1187
- "# Display the first 5 predictions\n",
1188
- "predictions[:5]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1189
  ]
1190
  },
1191
  {
 
28
  },
29
  {
30
  "cell_type": "code",
31
+ "execution_count": null,
32
  "metadata": {},
33
  "outputs": [],
34
  "source": [
 
53
  },
54
  {
55
  "cell_type": "code",
56
+ "execution_count": null,
57
  "metadata": {},
58
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
59
  "source": [
60
  "# Importing the hopsworks module\n",
61
  "import hopsworks\n",
 
76
  },
77
  {
78
  "cell_type": "code",
79
+ "execution_count": null,
80
  "metadata": {},
81
  "outputs": [],
82
  "source": [
 
96
  },
97
  {
98
  "cell_type": "code",
99
+ "execution_count": null,
100
  "metadata": {},
101
+ "outputs": [],
 
 
 
 
 
 
 
 
102
  "source": [
103
  "# Retrieve the model registry\n",
104
  "mr = project.get_model_registry()"
 
113
  },
114
  {
115
  "cell_type": "code",
116
+ "execution_count": null,
117
  "metadata": {},
118
+ "outputs": [],
 
 
 
 
 
 
 
 
119
  "source": [
120
  "# Retrieving the model from the Model Registry\n",
121
  "retrieved_model = mr.get_model(\n",
 
132
  },
133
  {
134
  "cell_type": "code",
135
+ "execution_count": null,
136
  "metadata": {},
137
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  "source": [
139
  "# Display the retrieved XGBoost regressor model\n",
140
  "retrieved_xgboost_model"
 
149
  },
150
  {
151
  "cell_type": "code",
152
+ "execution_count": null,
153
  "metadata": {},
154
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
155
  "source": [
156
  "import datetime\n",
157
  "\n",
 
167
  },
168
  {
169
  "cell_type": "code",
170
+ "execution_count": null,
171
  "metadata": {},
172
+ "outputs": [],
 
 
 
 
 
 
 
 
173
  "source": [
174
  "# Initializing batch scoring\n",
175
  "feature_view.init_batch_scoring(training_dataset_version=1)\n",
 
182
  },
183
  {
184
  "cell_type": "code",
185
+ "execution_count": null,
186
  "metadata": {},
187
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  "source": [
189
  "# Display the first 5 rows of the batch data\n",
190
+ "batch_data"
191
  ]
192
  },
193
  {
 
199
  },
200
  {
201
  "cell_type": "code",
202
+ "execution_count": null,
203
  "metadata": {},
204
+ "outputs": [],
 
 
 
 
 
 
 
 
 
205
  "source": [
206
+ "# from sklearn.preprocessing import LabelEncoder\n",
207
  "\n",
208
+ "# # Create a LabelEncoder object\n",
209
+ "# label_encoder = LabelEncoder()\n",
210
  "\n",
211
+ "# # Fit the encoder to the data in the 'city_name' column\n",
212
+ "# label_encoder.fit(batch_data[['type']])\n",
213
  "\n",
214
+ "# # Transform the 'city_name' column data using the fitted encoder\n",
215
+ "# encoded = label_encoder.transform(batch_data[['type']])"
216
  ]
217
  },
218
  {
219
  "cell_type": "code",
220
+ "execution_count": null,
221
  "metadata": {},
222
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  "source": [
224
+ "batch_data"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": null,
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": []
233
+ },
234
+ {
235
+ "cell_type": "code",
236
+ "execution_count": null,
237
+ "metadata": {},
238
+ "outputs": [],
239
+ "source": []
240
+ },
241
+ {
242
+ "cell_type": "code",
243
+ "execution_count": null,
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "# # Convert the output of the label encoding to a dense array and concatenate with the original data\n",
248
+ "# X_batch = pd.concat([batch_data, pd.DataFrame(encoded)], axis=1)\n",
249
+ "\n",
250
+ "X_batch = batch_data\n",
251
  "\n",
252
  "# Drop columns 'date', 'city_name', 'unix_time' from the DataFrame 'X'\n",
253
+ "X_batch = X_batch.drop(columns=['date', 'time', 'timestamp'])\n",
254
  "\n",
255
+ "# # Rename the newly added column with label-encoded city names to 'city_name_encoded'\n",
256
+ "# X_batch = X_batch.rename(columns={0: \"type_encoded\"})\n",
257
  "\n",
258
  "# Displaying the first 5 rows of the modified DataFrame\n",
259
  "X_batch.head()"
 
261
  },
262
  {
263
  "cell_type": "code",
264
+ "execution_count": null,
265
  "metadata": {},
266
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
  "source": [
268
  "# Extract the target variable 'dk1_spotpricedkk_kwh' from the batch data\n",
269
  "y_batch = X_batch.pop('dk1_spotpricedkk_kwh')\n",
 
274
  },
275
  {
276
  "cell_type": "code",
277
+ "execution_count": null,
278
  "metadata": {},
279
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  "source": [
281
+ "# Make predictions on the batch data using the retrieved XGBoost regressor model\n",
282
+ "predictions = retrieved_xgboost_model.predict(X_batch)\n",
283
+ "\n",
284
+ "# Display the first 5 predictions\n",
285
+ "predictions[:5]"
286
  ]
287
  },
288
  {
289
  "cell_type": "code",
290
+ "execution_count": null,
291
  "metadata": {},
292
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  "source": [
294
+ "label = batch_data[\"dk1_spotpricedkk_kwh\"]\n",
295
+ "y_pred = retrieved_xgboost_model.predict(X_batch)\n",
296
  "\n",
297
+ "data = {\n",
298
+ " 'prediction': [y_pred],\n",
299
+ " 'label': [label],\n",
300
+ "}\n",
301
+ "\n",
302
+ "monitor_df = pd.DataFrame(data)\n",
303
+ "monitor_df"
304
+ ]
305
+ },
306
+ {
307
+ "cell_type": "code",
308
+ "execution_count": null,
309
+ "metadata": {},
310
+ "outputs": [],
311
+ "source": [
312
+ "label = batch_data[\"dk1_spotpricedkk_kwh\"]\n",
313
+ "y_pred = retrieved_xgboost_model.predict(X_batch)\n",
314
+ "\n",
315
+ "data = {\n",
316
+ " 'prediction': y_pred,\n",
317
+ " 'label': label,\n",
318
+ "}\n",
319
+ "\n",
320
+ "monitor_df = pd.DataFrame(data)\n",
321
+ "monitor_df"
322
  ]
323
  },
324
  {
notebooks/model/dk_electricity_model.pkl CHANGED
Binary files a/notebooks/model/dk_electricity_model.pkl and b/notebooks/model/dk_electricity_model.pkl differ
 
notebooks/test.ipynb CHANGED
The diff for this file is too large to render. See raw diff