mtzeve commited on
Commit
b6d3c53
·
1 Parent(s): 282d804
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
__pycache__/feature_view_freddie.cpython-311.pyc ADDED
Binary file (3.19 kB). View file
 
feature_view_freddie.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 16,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -23,9 +23,73 @@
23
  },
24
  {
25
  "cell_type": "code",
26
- "execution_count": 17,
27
  "metadata": {},
28
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "source": [
30
  "from feature_pipeline import tesla_fg\n",
31
  "from feature_pipeline import news_sentiment_fg"
@@ -33,7 +97,7 @@
33
  },
34
  {
35
  "cell_type": "code",
36
- "execution_count": 18,
37
  "metadata": {},
38
  "outputs": [
39
  {
@@ -42,7 +106,7 @@
42
  "True"
43
  ]
44
  },
45
- "execution_count": 18,
46
  "metadata": {},
47
  "output_type": "execute_result"
48
  }
@@ -56,7 +120,7 @@
56
  },
57
  {
58
  "cell_type": "code",
59
- "execution_count": 19,
60
  "metadata": {},
61
  "outputs": [
62
  {
@@ -79,7 +143,7 @@
79
  },
80
  {
81
  "cell_type": "code",
82
- "execution_count": 22,
83
  "metadata": {},
84
  "outputs": [],
85
  "source": [
@@ -91,7 +155,7 @@
91
  "\n",
92
  " # Define the query\n",
93
  " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
94
- " .join(news_sentiment_fg.select(['date', 'sentiment']))\n",
95
  "\n",
96
  " # Create the feature view\n",
97
  " feature_view = fs.create_feature_view(\n",
@@ -105,7 +169,7 @@
105
  },
106
  {
107
  "cell_type": "code",
108
- "execution_count": 23,
109
  "metadata": {},
110
  "outputs": [
111
  {
@@ -125,6 +189,26 @@
125
  " feature_view, tesla_fg = create_stocks_feature_view(fs, 1)"
126
  ]
127
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  {
129
  "cell_type": "code",
130
  "execution_count": null,
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
23
  },
24
  {
25
  "cell_type": "code",
26
+ "execution_count": 2,
27
  "metadata": {},
28
+ "outputs": [
29
+ {
30
+ "name": "stdout",
31
+ "output_type": "stream",
32
+ "text": [
33
+ " date 1. open 2. high 3. low 4. close 5. volume ticker\n",
34
+ "0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n",
35
+ "1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n",
36
+ "2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n",
37
+ "3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n",
38
+ "4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n",
39
+ "Connected. Call `.close()` to terminate connection gracefully.\n",
40
+ "\n",
41
+ "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
42
+ "Connected. Call `.close()` to terminate connection gracefully.\n",
43
+ "Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n"
44
+ ]
45
+ },
46
+ {
47
+ "data": {
48
+ "application/vnd.jupyter.widget-view+json": {
49
+ "model_id": "a5df97af0c324a9695e2196efb69fdea",
50
+ "version_major": 2,
51
+ "version_minor": 0
52
+ },
53
+ "text/plain": [
54
+ "Uploading Dataframe: 0.00% | | Rows 0/3486 | Elapsed Time: 00:00 | Remaining Time: ?"
55
+ ]
56
+ },
57
+ "metadata": {},
58
+ "output_type": "display_data"
59
+ },
60
+ {
61
+ "name": "stdout",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "Launching job: tesla_stock_2_offline_fg_materialization\n",
65
+ "Job started successfully, you can follow the progress at \n",
66
+ "https://c.app.hopsworks.ai/p/549016/jobs/named/tesla_stock_2_offline_fg_materialization/executions\n"
67
+ ]
68
+ },
69
+ {
70
+ "data": {
71
+ "application/vnd.jupyter.widget-view+json": {
72
+ "model_id": "c5616378dd994bacad48f3d71a04f891",
73
+ "version_major": 2,
74
+ "version_minor": 0
75
+ },
76
+ "text/plain": [
77
+ "Uploading Dataframe: 0.00% | | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?"
78
+ ]
79
+ },
80
+ "metadata": {},
81
+ "output_type": "display_data"
82
+ },
83
+ {
84
+ "name": "stdout",
85
+ "output_type": "stream",
86
+ "text": [
87
+ "Launching job: news_sentiment_updated_2_offline_fg_materialization\n",
88
+ "Job started successfully, you can follow the progress at \n",
89
+ "https://c.app.hopsworks.ai/p/549016/jobs/named/news_sentiment_updated_2_offline_fg_materialization/executions\n"
90
+ ]
91
+ }
92
+ ],
93
  "source": [
94
  "from feature_pipeline import tesla_fg\n",
95
  "from feature_pipeline import news_sentiment_fg"
 
97
  },
98
  {
99
  "cell_type": "code",
100
+ "execution_count": 3,
101
  "metadata": {},
102
  "outputs": [
103
  {
 
106
  "True"
107
  ]
108
  },
109
+ "execution_count": 3,
110
  "metadata": {},
111
  "output_type": "execute_result"
112
  }
 
120
  },
121
  {
122
  "cell_type": "code",
123
+ "execution_count": 4,
124
  "metadata": {},
125
  "outputs": [
126
  {
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 5,
147
  "metadata": {},
148
  "outputs": [],
149
  "source": [
 
155
  "\n",
156
  " # Define the query\n",
157
  " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n",
158
+ " .join(news_sentiment_fg.select(['sentiment']))\n",
159
  "\n",
160
  " # Create the feature view\n",
161
  " feature_view = fs.create_feature_view(\n",
 
169
  },
170
  {
171
  "cell_type": "code",
172
+ "execution_count": 6,
173
  "metadata": {},
174
  "outputs": [
175
  {
 
189
  " feature_view, tesla_fg = create_stocks_feature_view(fs, 1)"
190
  ]
191
  },
192
+ {
193
+ "cell_type": "code",
194
+ "execution_count": 8,
195
+ "metadata": {},
196
+ "outputs": [],
197
+ "source": [
198
+ "def fix_data_from_feature_view(df,start_date,end_date):\n",
199
+ " df = df.sort_values(\"date\")\n",
200
+ " df = df.reset_index()\n",
201
+ " df = df.drop(columns=[\"index\"])\n",
202
+ "\n",
203
+ " # Create a boolean mask for rows that fall within the date range\n",
204
+ " mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))\n",
205
+ " len_df = np.shape(df)\n",
206
+ " df = df[mask] # Use the boolean mask to filter the DataFrame\n",
207
+ " print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))\n",
208
+ "\n",
209
+ " return df"
210
+ ]
211
+ },
212
  {
213
  "cell_type": "code",
214
  "execution_count": null,
feature_view_freddie.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # %%
2
+ # Import necessary libraries
3
+ import pandas as pd # For data manipulation using DataFrames
4
+ import numpy as np # For numerical operations
5
+ import matplotlib.pyplot as plt # For data visualization
6
+ import os # For operating system-related tasks
7
+ import joblib # For saving and loading models
8
+ import hopsworks # For getting access to hopsworks
9
+
10
+
11
+
12
+ # Import specific modules from scikit-learn
13
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
14
+ from sklearn.metrics import accuracy_score # For evaluating model accuracy
15
+
16
+ # %%
17
+ from feature_pipeline import tesla_fg
18
+ from feature_pipeline import news_sentiment_fg
19
+
20
+ # %%
21
+ from dotenv import load_dotenv
22
+ import os
23
+
24
+ load_dotenv()
25
+
26
+ # %%
27
+ api_key = os.environ.get('hopsworks_api')
28
+ project = hopsworks.login(api_key_value=api_key)
29
+ fs = project.get_feature_store()
30
+
31
+ # %%
32
+ def create_stocks_feature_view(fs, version):
33
+
34
+ # Loading in the feature groups
35
+ tesla_fg = fs.get_feature_group('tesla_stock', version=1)
36
+ news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)
37
+
38
+ # Define the query
39
+ ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
40
+ .join(news_sentiment_fg.select(['date','sentiment']))
41
+
42
+ # Create the feature view
43
+ feature_view = fs.create_feature_view(
44
+ name='tesla_stocks_fv',
45
+ query=ds_query,
46
+ labels=['ticker']
47
+ )
48
+
49
+ return feature_view, tesla_fg
50
+
51
+ # %%
52
+ try:
53
+ feature_view = fs.get_feature_view("tesla_stocks_fv", version=1)
54
+ tesla_fg = fs.get_feature_group('tesla_stock', version=1)
55
+ except:
56
+ feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
57
+
58
+ # %%
59
+ def fix_data_from_feature_view(df,start_date,end_date):
60
+ df = df.sort_values("date")
61
+ df = df.reset_index()
62
+ df = df.drop(columns=["index"])
63
+
64
+ # Create a boolean mask for rows that fall within the date range
65
+ mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))
66
+ len_df = np.shape(df)
67
+ df = df[mask] # Use the boolean mask to filter the DataFrame
68
+ print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))
69
+
70
+ return df
71
+
72
+ # %%
73
+ #def create_stocks_feature_view(fs, version):
74
+
75
+ #Loading in the feature groups
76
+ # tesla_fg = fs.get_feature_group('tesla_stock', version = 3)
77
+ # news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)
78
+
79
+ # ds_query = tesla_fg.select(['date','open', 'ticker'])\
80
+ # .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))
81
+
82
+ # return (fs.create_tesla_feature_view(
83
+ # name = 'tsla_stocks_fv',
84
+ # query = ds_query,
85
+ # labels=['ticker']
86
+ # ), tesla_fg)
87
+
88
+ # %%
89
+ #try:
90
+ # feature_view = fs.get_feature_view("tsla_stocks_fv", version=1)
91
+ # tesla_fg = fs.get_feature_group('tesla_stock', version=3)
92
+ #except:
93
+ # feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
94
+
95
+
training_pipeline.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 3,
6
  "metadata": {},
7
  "outputs": [
8
  {
@@ -11,7 +11,7 @@
11
  "True"
12
  ]
13
  },
14
- "execution_count": 3,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
@@ -26,197 +26,119 @@
26
  },
27
  {
28
  "cell_type": "code",
29
- "execution_count": null,
30
- "metadata": {},
31
- "outputs": [],
32
- "source": []
33
- },
34
- {
35
- "cell_type": "code",
36
- "execution_count": 6,
37
  "metadata": {},
38
  "outputs": [
39
  {
40
  "name": "stdout",
41
  "output_type": "stream",
42
  "text": [
43
- "Fetching feature view from hopsworks...\n",
44
- "Connection closed.\n",
45
- "Connected. Call `.close()` to terminate connection gracefully.\n"
46
- ]
47
- },
48
- {
49
- "name": "stdout",
50
- "output_type": "stream",
51
- "text": [
52
  "\n",
53
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
54
- "Connected. Call `.close()` to terminate connection gracefully.\n"
55
- ]
56
- },
57
- {
58
- "name": "stderr",
59
- "output_type": "stream",
60
- "text": [
61
- "../src/arrow/status.cc:137: DoAction result was not fully consumed: Cancelled: Flight cancelled call, with message: CANCELLED. Detail: Cancelled\n"
62
- ]
63
- },
64
- {
65
- "name": "stdout",
66
- "output_type": "stream",
67
- "text": [
68
- "2024-05-06 12:27:55,071 WARNING: DeprecationWarning: ssl.PROTOCOL_TLS is deprecated\n",
69
  "\n",
70
- "Error: Reading data from Hopsworks, using Hive \n"
71
- ]
72
- },
73
- {
74
- "ename": "DatabaseError",
75
- "evalue": "Execution failed on sql: WITH right_fg0 AS (SELECT *\nFROM (SELECT `fg1`.`date` `date`, `fg1`.`open` `open`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`date` `date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\nFROM `mtzeve_featurestore`.`tesla_stock_1` `fg1`\nINNER JOIN `mtzeve_featurestore`.`news_sentiment_updated_1` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`\nWHERE `fg1`.`date` >= TIMESTAMP '1970-01-01 00:16:40.000' AND `fg1`.`date` < TIMESTAMP '2024-05-06 10:27:51.000') NA\nWHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`open` `open`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`date` `date`, `right_fg0`.`sentiment` `sentiment`\nFROM right_fg0)\nTExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:203', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:266', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor216:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750', '*org.apache.hadoop.hive.ql.parse.SemanticException:Ambiguous column reference date in na:44:17', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:rewriteRRForSubQ:SemanticAnalyzer.java:11359', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11338', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11201', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genOPTree:SemanticAnalyzer.java:11987', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:genOPTree:CalcitePlanner.java:597', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:analyzeInternal:SemanticAnalyzer.java:12066', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:analyzeInternal:CalcitePlanner.java:334', 'org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer:analyze:BaseSemanticAnalyzer.java:285', 'org.apache.hadoop.hive.ql.Driver:compile:Driver.java:643', 'org.apache.hadoop.hive.ql.Driver:compileInternal:Driver.java:1683', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1630', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1625', 'org.apache.hadoop.hive.ql.reexec.ReExecDriver:compileAndRespond:ReExecDriver.java:126', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:201'], sqlState='42000', errorCode=10007, errorMessage='Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na'), operationHandle=None)\nunable to rollback",
76
- "output_type": "error",
77
- "traceback": [
78
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
79
- "\u001b[0;31mOperationalError\u001b[0m Traceback (most recent call last)",
80
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2018\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2017\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 2018\u001b[0m cur\u001b[39m.\u001b[39mexecute(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 2019\u001b[0m \u001b[39mreturn\u001b[39;00m cur\n",
81
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pyhive/hive.py:408\u001b[0m, in \u001b[0;36mCursor.execute\u001b[0;34m(self, operation, parameters, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connection\u001b[39m.\u001b[39mclient\u001b[39m.\u001b[39mExecuteStatement(req)\n\u001b[0;32m--> 408\u001b[0m _check_status(response)\n\u001b[1;32m 409\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_operationHandle \u001b[39m=\u001b[39m response\u001b[39m.\u001b[39moperationHandle\n",
82
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pyhive/hive.py:538\u001b[0m, in \u001b[0;36m_check_status\u001b[0;34m(response)\u001b[0m\n\u001b[1;32m 537\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus\u001b[39m.\u001b[39mstatusCode \u001b[39m!=\u001b[39m ttypes\u001b[39m.\u001b[39mTStatusCode\u001b[39m.\u001b[39mSUCCESS_STATUS:\n\u001b[0;32m--> 538\u001b[0m \u001b[39mraise\u001b[39;00m OperationalError(response)\n",
83
- "\u001b[0;31mOperationalError\u001b[0m: TExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:203', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:266', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor216:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750', '*org.apache.hadoop.hive.ql.parse.SemanticException:Ambiguous column reference date in na:44:17', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:rewriteRRForSubQ:SemanticAnalyzer.java:11359', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11338', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11201', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genOPTree:SemanticAnalyzer.java:11987', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:genOPTree:CalcitePlanner.java:597', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:analyzeInternal:SemanticAnalyzer.java:12066', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:analyzeInternal:CalcitePlanner.java:334', 'org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer:analyze:BaseSemanticAnalyzer.java:285', 'org.apache.hadoop.hive.ql.Driver:compile:Driver.java:643', 'org.apache.hadoop.hive.ql.Driver:compileInternal:Driver.java:1683', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1630', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1625', 'org.apache.hadoop.hive.ql.reexec.ReExecDriver:compileAndRespond:ReExecDriver.java:126', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:201'], sqlState='42000', errorCode=10007, errorMessage='Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na'), operationHandle=None)",
84
- "\nDuring handling of the above exception, another exception occurred:\n",
85
- "\u001b[0;31mNotSupportedError\u001b[0m Traceback (most recent call last)",
86
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2022\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2021\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 2022\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcon\u001b[39m.\u001b[39mrollback()\n\u001b[1;32m 2023\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m inner_exc: \u001b[39m# pragma: no cover\u001b[39;00m\n",
87
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pyhive/hive.py:285\u001b[0m, in \u001b[0;36mConnection.rollback\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrollback\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 285\u001b[0m \u001b[39mraise\u001b[39;00m NotSupportedError(\u001b[39m\"\u001b[39m\u001b[39mHive does not have transactions\u001b[39m\u001b[39m\"\u001b[39m)\n",
88
- "\u001b[0;31mNotSupportedError\u001b[0m: Hive does not have transactions",
89
- "\nThe above exception was the direct cause of the following exception:\n",
90
- "\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)",
91
- "\u001b[1;32m/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m fv \u001b[39m=\u001b[39m fs\u001b[39m.\u001b[39mget_feature_view(\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m name \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mtesla_stocks_fv\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m version \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m )\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39m# Get dataframe of training data from feature view\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m df, _ \u001b[39m=\u001b[39m fv\u001b[39m.\u001b[39mtraining_data(read_options\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39muse_hive\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mTrue\u001b[39;00m})\n",
92
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/usage.py:212\u001b[0m, in \u001b[0;36mmethod_logger.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 211\u001b[0m exception \u001b[39m=\u001b[39m e\n\u001b[0;32m--> 212\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 213\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 214\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
93
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/usage.py:208\u001b[0m, in \u001b[0;36mmethod_logger.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m exception \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 206\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 207\u001b[0m \u001b[39m# Call the original method\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m result \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 209\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[1;32m 210\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n",
94
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/feature_view.py:2222\u001b[0m, in \u001b[0;36mFeatureView.training_data\u001b[0;34m(self, start_time, end_time, description, extra_filter, statistics_config, read_options, spine, primary_keys, event_time, training_helper_columns)\u001b[0m\n\u001b[1;32m 2109\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 2110\u001b[0m \u001b[39mCreate the metadata for a training dataset and get the corresponding training data from the offline feature store.\u001b[39;00m\n\u001b[1;32m 2111\u001b[0m \u001b[39mThis returns the training data in memory and does not materialise data in storage.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2205\u001b[0m \u001b[39m (X, y): Tuple of dataframe of features and labels. If there are no labels, y returns `None`.\u001b[39;00m\n\u001b[1;32m 2206\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 2207\u001b[0m td \u001b[39m=\u001b[39m training_dataset\u001b[39m.\u001b[39mTrainingDataset(\n\u001b[1;32m 2208\u001b[0m name\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname,\n\u001b[1;32m 2209\u001b[0m version\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2220\u001b[0m extra_filter\u001b[39m=\u001b[39mextra_filter,\n\u001b[1;32m 2221\u001b[0m )\n\u001b[0;32m-> 2222\u001b[0m td, df \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_feature_view_engine\u001b[39m.\u001b[39mget_training_data(\n\u001b[1;32m 2223\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 2224\u001b[0m read_options,\n\u001b[1;32m 2225\u001b[0m training_dataset_obj\u001b[39m=\u001b[39mtd,\n\u001b[1;32m 2226\u001b[0m spine\u001b[39m=\u001b[39mspine,\n\u001b[1;32m 2227\u001b[0m primary_keys\u001b[39m=\u001b[39mprimary_keys,\n\u001b[1;32m 2228\u001b[0m event_time\u001b[39m=\u001b[39mevent_time,\n\u001b[1;32m 2229\u001b[0m training_helper_columns\u001b[39m=\u001b[39mtraining_helper_columns,\n\u001b[1;32m 2230\u001b[0m )\n\u001b[1;32m 2231\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[1;32m 2232\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mIncremented version to `\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m`.\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(td\u001b[39m.\u001b[39mversion),\n\u001b[1;32m 2233\u001b[0m util\u001b[39m.\u001b[39mVersionWarning,\n\u001b[1;32m 2234\u001b[0m )\n\u001b[1;32m 2235\u001b[0m \u001b[39mreturn\u001b[39;00m df\n",
95
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/core/feature_view_engine.py:346\u001b[0m, in \u001b[0;36mFeatureViewEngine.get_training_data\u001b[0;34m(self, feature_view_obj, read_options, splits, training_dataset_obj, training_dataset_version, spine, primary_keys, event_time, training_helper_columns)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_feature_group_accessibility(feature_view_obj)\n\u001b[1;32m 334\u001b[0m query \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_batch_query(\n\u001b[1;32m 335\u001b[0m feature_view_obj,\n\u001b[1;32m 336\u001b[0m training_dataset_version\u001b[39m=\u001b[39mtd_updated\u001b[39m.\u001b[39mversion,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 344\u001b[0m spine\u001b[39m=\u001b[39mspine,\n\u001b[1;32m 345\u001b[0m )\n\u001b[0;32m--> 346\u001b[0m split_df \u001b[39m=\u001b[39m engine\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39mget_training_data(\n\u001b[1;32m 347\u001b[0m td_updated, feature_view_obj, query, read_options\n\u001b[1;32m 348\u001b[0m )\n\u001b[1;32m 349\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcompute_training_dataset_statistics(\n\u001b[1;32m 350\u001b[0m feature_view_obj, td_updated, split_df\n\u001b[1;32m 351\u001b[0m )\n\u001b[1;32m 353\u001b[0m \u001b[39m# split df into features and labels df\u001b[39;00m\n",
96
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:648\u001b[0m, in \u001b[0;36mEngine.get_training_data\u001b[0;34m(self, training_dataset_obj, feature_view_obj, query_obj, read_options)\u001b[0m\n\u001b[1;32m 644\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prepare_transform_split_df(\n\u001b[1;32m 645\u001b[0m query_obj, training_dataset_obj, feature_view_obj, read_options\n\u001b[1;32m 646\u001b[0m )\n\u001b[1;32m 647\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 648\u001b[0m df \u001b[39m=\u001b[39m query_obj\u001b[39m.\u001b[39mread(read_options\u001b[39m=\u001b[39mread_options)\n\u001b[1;32m 649\u001b[0m transformation_function_engine\u001b[39m.\u001b[39mTransformationFunctionEngine\u001b[39m.\u001b[39mpopulate_builtin_transformation_functions(\n\u001b[1;32m 650\u001b[0m training_dataset_obj, feature_view_obj, df\n\u001b[1;32m 651\u001b[0m )\n\u001b[1;32m 652\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_apply_transformation_function(\n\u001b[1;32m 653\u001b[0m training_dataset_obj\u001b[39m.\u001b[39mtransformation_functions, df\n\u001b[1;32m 654\u001b[0m )\n",
97
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/constructor/query.py:173\u001b[0m, in \u001b[0;36mQuery.read\u001b[0;34m(self, online, dataframe_type, read_options)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mjoins) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m \u001b[39mor\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39min\u001b[39;00m [f\u001b[39m.\u001b[39mtype \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m schema]:\n\u001b[1;32m 169\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 170\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mPandas types casting only supported for feature_group.read()/query.select_all()\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 171\u001b[0m )\n\u001b[0;32m--> 173\u001b[0m \u001b[39mreturn\u001b[39;00m engine\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39msql(\n\u001b[1;32m 174\u001b[0m sql_query,\n\u001b[1;32m 175\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_feature_store_name,\n\u001b[1;32m 176\u001b[0m online_conn,\n\u001b[1;32m 177\u001b[0m dataframe_type,\n\u001b[1;32m 178\u001b[0m read_options,\n\u001b[1;32m 179\u001b[0m schema,\n\u001b[1;32m 180\u001b[0m )\n",
98
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:139\u001b[0m, in \u001b[0;36mEngine.sql\u001b[0;34m(self, sql_query, feature_store, online_conn, dataframe_type, read_options, schema)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msql\u001b[39m(\n\u001b[1;32m 130\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 131\u001b[0m sql_query,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 136\u001b[0m schema\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 137\u001b[0m ):\n\u001b[1;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m online_conn:\n\u001b[0;32m--> 139\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sql_offline(\n\u001b[1;32m 140\u001b[0m sql_query,\n\u001b[1;32m 141\u001b[0m feature_store,\n\u001b[1;32m 142\u001b[0m dataframe_type,\n\u001b[1;32m 143\u001b[0m schema,\n\u001b[1;32m 144\u001b[0m hive_config\u001b[39m=\u001b[39mread_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mhive_config\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mif\u001b[39;00m read_options \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 145\u001b[0m arrow_flight_config\u001b[39m=\u001b[39mread_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39marrow_flight_config\u001b[39m\u001b[39m\"\u001b[39m, {})\n\u001b[1;32m 146\u001b[0m \u001b[39mif\u001b[39;00m read_options\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m {},\n\u001b[1;32m 148\u001b[0m )\n\u001b[1;32m 149\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 150\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jdbc(\n\u001b[1;32m 151\u001b[0m sql_query, online_conn, dataframe_type, read_options, schema\n\u001b[1;32m 152\u001b[0m )\n",
99
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:180\u001b[0m, in \u001b[0;36mEngine._sql_offline\u001b[0;34m(self, sql_query, feature_store, dataframe_type, schema, hive_config, arrow_flight_config)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[39mwith\u001b[39;00m warnings\u001b[39m.\u001b[39mcatch_warnings():\n\u001b[1;32m 179\u001b[0m warnings\u001b[39m.\u001b[39msimplefilter(\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mUserWarning\u001b[39;00m)\n\u001b[0;32m--> 180\u001b[0m result_df \u001b[39m=\u001b[39m util\u001b[39m.\u001b[39mrun_with_loading_animation(\n\u001b[1;32m 181\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mReading data from Hopsworks, using Hive\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m pd\u001b[39m.\u001b[39mread_sql,\n\u001b[1;32m 183\u001b[0m sql_query,\n\u001b[1;32m 184\u001b[0m hive_conn,\n\u001b[1;32m 185\u001b[0m )\n\u001b[1;32m 187\u001b[0m \u001b[39mif\u001b[39;00m schema:\n\u001b[1;32m 188\u001b[0m result_df \u001b[39m=\u001b[39m Engine\u001b[39m.\u001b[39mcast_columns(result_df, schema)\n",
100
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/util.py:427\u001b[0m, in \u001b[0;36mrun_with_loading_animation\u001b[0;34m(message, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 424\u001b[0m end \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 427\u001b[0m result \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 428\u001b[0m end \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mtime()\n\u001b[1;32m 429\u001b[0m \u001b[39mreturn\u001b[39;00m result\n",
101
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:564\u001b[0m, in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m 561\u001b[0m pandas_sql \u001b[39m=\u001b[39m pandasSQL_builder(con)\n\u001b[1;32m 563\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(pandas_sql, SQLiteDatabase):\n\u001b[0;32m--> 564\u001b[0m \u001b[39mreturn\u001b[39;00m pandas_sql\u001b[39m.\u001b[39mread_query(\n\u001b[1;32m 565\u001b[0m sql,\n\u001b[1;32m 566\u001b[0m index_col\u001b[39m=\u001b[39mindex_col,\n\u001b[1;32m 567\u001b[0m params\u001b[39m=\u001b[39mparams,\n\u001b[1;32m 568\u001b[0m coerce_float\u001b[39m=\u001b[39mcoerce_float,\n\u001b[1;32m 569\u001b[0m parse_dates\u001b[39m=\u001b[39mparse_dates,\n\u001b[1;32m 570\u001b[0m chunksize\u001b[39m=\u001b[39mchunksize,\n\u001b[1;32m 571\u001b[0m )\n\u001b[1;32m 573\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 574\u001b[0m _is_table_name \u001b[39m=\u001b[39m pandas_sql\u001b[39m.\u001b[39mhas_table(sql)\n",
102
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2078\u001b[0m, in \u001b[0;36mSQLiteDatabase.read_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize, dtype)\u001b[0m\n\u001b[1;32m 2066\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mread_query\u001b[39m(\n\u001b[1;32m 2067\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 2068\u001b[0m sql,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2074\u001b[0m dtype: DtypeArg \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 2075\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Iterator[DataFrame]:\n\u001b[1;32m 2077\u001b[0m args \u001b[39m=\u001b[39m _convert_params(sql, params)\n\u001b[0;32m-> 2078\u001b[0m cursor \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexecute(\u001b[39m*\u001b[39margs)\n\u001b[1;32m 2079\u001b[0m columns \u001b[39m=\u001b[39m [col_desc[\u001b[39m0\u001b[39m] \u001b[39mfor\u001b[39;00m col_desc \u001b[39min\u001b[39;00m cursor\u001b[39m.\u001b[39mdescription]\n\u001b[1;32m 2081\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
103
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2027\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2023\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m inner_exc: \u001b[39m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 2024\u001b[0m ex \u001b[39m=\u001b[39m DatabaseError(\n\u001b[1;32m 2025\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mExecution failed on sql: \u001b[39m\u001b[39m{\u001b[39;00margs[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m{\u001b[39;00mexc\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39munable to rollback\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 2026\u001b[0m )\n\u001b[0;32m-> 2027\u001b[0m \u001b[39mraise\u001b[39;00m ex \u001b[39mfrom\u001b[39;00m \u001b[39minner_exc\u001b[39;00m\n\u001b[1;32m 2029\u001b[0m ex \u001b[39m=\u001b[39m DatabaseError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mExecution failed on sql \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00margs[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00mexc\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 2030\u001b[0m \u001b[39mraise\u001b[39;00m ex \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n",
104
- "\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql: WITH right_fg0 AS (SELECT *\nFROM (SELECT `fg1`.`date` `date`, `fg1`.`open` `open`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`date` `date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\nFROM `mtzeve_featurestore`.`tesla_stock_1` `fg1`\nINNER JOIN `mtzeve_featurestore`.`news_sentiment_updated_1` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`\nWHERE `fg1`.`date` >= TIMESTAMP '1970-01-01 00:16:40.000' AND `fg1`.`date` < TIMESTAMP '2024-05-06 10:27:51.000') NA\nWHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`open` `open`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`date` `date`, `right_fg0`.`sentiment` `sentiment`\nFROM right_fg0)\nTExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:203', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:266', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor216:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750', '*org.apache.hadoop.hive.ql.parse.SemanticException:Ambiguous column reference date in na:44:17', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:rewriteRRForSubQ:SemanticAnalyzer.java:11359', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11338', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11201', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genOPTree:SemanticAnalyzer.java:11987', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:genOPTree:CalcitePlanner.java:597', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:analyzeInternal:SemanticAnalyzer.java:12066', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:analyzeInternal:CalcitePlanner.java:334', 'org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer:analyze:BaseSemanticAnalyzer.java:285', 'org.apache.hadoop.hive.ql.Driver:compile:Driver.java:643', 'org.apache.hadoop.hive.ql.Driver:compileInternal:Driver.java:1683', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1630', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1625', 'org.apache.hadoop.hive.ql.reexec.ReExecDriver:compileAndRespond:ReExecDriver.java:126', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:201'], sqlState='42000', errorCode=10007, errorMessage='Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na'), operationHandle=None)\nunable to rollback"
105
- ]
106
- }
107
- ],
108
- "source": [
109
- "print('Fetching feature view from hopsworks...')\n",
110
- "api_key = os.environ.get('hopsworks_api')\n",
111
- "project = hopsworks.login(api_key_value=api_key)\n",
112
- "fs = project.get_feature_store()\n",
113
- "\n",
114
- "# Get feature view \n",
115
- "fv = fs.get_feature_view(\n",
116
- " name = 'tesla_stocks_fv',\n",
117
- " version = 1\n",
118
- ")\n",
119
- "# Get dataframe of training data from feature view\n",
120
- "df, _ = fv.training_data(read_options={\"use_hive\": True})"
121
- ]
122
- },
123
- {
124
- "cell_type": "code",
125
- "execution_count": 1,
126
- "metadata": {},
127
- "outputs": [
128
- {
129
- "ename": "ExternalClientError",
130
- "evalue": "host cannot be of type NoneType, host is a non-optional argument to connect to hopsworks from an external environment.",
131
- "output_type": "error",
132
- "traceback": [
133
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
134
- "\u001b[0;31mExternalClientError\u001b[0m Traceback (most recent call last)",
135
- "\u001b[1;32m/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdotenv\u001b[39;00m \u001b[39mimport\u001b[39;00m load_dotenv\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m load_dotenv\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m connection \u001b[39m=\u001b[39m hsfs\u001b[39m.\u001b[39mconnection()\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m api_key \u001b[39m=\u001b[39m os\u001b[39m.\u001b[39menviron\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mhopsworks_api\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m project \u001b[39m=\u001b[39m hopsworks\u001b[39m.\u001b[39mlogin(api_key_value\u001b[39m=\u001b[39mapi_key)\n",
136
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:303\u001b[0m, in \u001b[0;36mConnection.connection\u001b[0;34m(cls, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mconnection\u001b[39m(\n\u001b[1;32m 289\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 300\u001b[0m api_key_value: \u001b[39mstr\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 301\u001b[0m ):\n\u001b[1;32m 302\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Connection factory method, accessible through `hsfs.connection()`.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 303\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m(\n\u001b[1;32m 304\u001b[0m host,\n\u001b[1;32m 305\u001b[0m port,\n\u001b[1;32m 306\u001b[0m project,\n\u001b[1;32m 307\u001b[0m engine,\n\u001b[1;32m 308\u001b[0m region_name,\n\u001b[1;32m 309\u001b[0m secrets_store,\n\u001b[1;32m 310\u001b[0m hostname_verification,\n\u001b[1;32m 311\u001b[0m trust_store_path,\n\u001b[1;32m 312\u001b[0m cert_folder,\n\u001b[1;32m 313\u001b[0m api_key_file,\n\u001b[1;32m 314\u001b[0m api_key_value,\n\u001b[1;32m 315\u001b[0m )\n",
137
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:154\u001b[0m, in \u001b[0;36mConnection.__init__\u001b[0;34m(self, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_value \u001b[39m=\u001b[39m api_key_value\n\u001b[1;32m 152\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connected \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m--> 154\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnect()\n",
138
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/decorators.py:25\u001b[0m, in \u001b[0;36mnot_connected.<locals>.if_not_connected\u001b[0;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mif\u001b[39;00m inst\u001b[39m.\u001b[39m_connected:\n\u001b[1;32m 24\u001b[0m \u001b[39mraise\u001b[39;00m HopsworksConnectionError\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m fn(inst, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
139
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:233\u001b[0m, in \u001b[0;36mConnection.connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[39m# init client\u001b[39;00m\n\u001b[1;32m 232\u001b[0m \u001b[39mif\u001b[39;00m client\u001b[39m.\u001b[39mbase\u001b[39m.\u001b[39mClient\u001b[39m.\u001b[39mREST_ENDPOINT \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m os\u001b[39m.\u001b[39menviron:\n\u001b[0;32m--> 233\u001b[0m client\u001b[39m.\u001b[39minit(\n\u001b[1;32m 234\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mexternal\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 235\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_host,\n\u001b[1;32m 236\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_port,\n\u001b[1;32m 237\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_project,\n\u001b[1;32m 238\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine,\n\u001b[1;32m 239\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_region_name,\n\u001b[1;32m 240\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_secrets_store,\n\u001b[1;32m 241\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_hostname_verification,\n\u001b[1;32m 242\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_trust_store_path,\n\u001b[1;32m 243\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cert_folder,\n\u001b[1;32m 244\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_file,\n\u001b[1;32m 245\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_value,\n\u001b[1;32m 246\u001b[0m )\n\u001b[1;32m 247\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 248\u001b[0m client\u001b[39m.\u001b[39minit(\u001b[39m\"\u001b[39m\u001b[39mhopsworks\u001b[39m\u001b[39m\"\u001b[39m)\n",
140
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/client/__init__.py:41\u001b[0m, in \u001b[0;36minit\u001b[0;34m(client_type, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 39\u001b[0m _client \u001b[39m=\u001b[39m hopsworks\u001b[39m.\u001b[39mClient()\n\u001b[1;32m 40\u001b[0m \u001b[39melif\u001b[39;00m client_type \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mexternal\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m---> 41\u001b[0m _client \u001b[39m=\u001b[39m external\u001b[39m.\u001b[39mClient(\n\u001b[1;32m 42\u001b[0m host,\n\u001b[1;32m 43\u001b[0m port,\n\u001b[1;32m 44\u001b[0m project,\n\u001b[1;32m 45\u001b[0m engine,\n\u001b[1;32m 46\u001b[0m region_name,\n\u001b[1;32m 47\u001b[0m secrets_store,\n\u001b[1;32m 48\u001b[0m hostname_verification,\n\u001b[1;32m 49\u001b[0m trust_store_path,\n\u001b[1;32m 50\u001b[0m cert_folder,\n\u001b[1;32m 51\u001b[0m api_key_file,\n\u001b[1;32m 52\u001b[0m api_key_value,\n\u001b[1;32m 53\u001b[0m )\n",
141
- "File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/client/external.py:54\u001b[0m, in \u001b[0;36mClient.__init__\u001b[0;34m(self, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Initializes a client in an external environment such as AWS Sagemaker.\"\"\"\u001b[39;00m\n\u001b[1;32m 53\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m host:\n\u001b[0;32m---> 54\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mExternalClientError(\u001b[39m\"\u001b[39m\u001b[39mhost\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m project:\n\u001b[1;32m 56\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mExternalClientError(\u001b[39m\"\u001b[39m\u001b[39mproject\u001b[39m\u001b[39m\"\u001b[39m)\n",
142
- "\u001b[0;31mExternalClientError\u001b[0m: host cannot be of type NoneType, host is a non-optional argument to connect to hopsworks from an external environment."
143
  ]
144
  }
145
  ],
146
  "source": [
147
  "import hsfs\n",
148
- "import os \n",
149
- "from dotenv import load_dotenv\n",
150
- "\n",
151
- "load_dotenv\n",
152
  "\n",
153
- "api_key = os.environ.get('hopsworks_api')\n",
154
- "project = hopsworks.login(api_key_value=api_key)\n",
 
 
 
155
  "\n",
156
- "\n",
157
- "\n",
158
- "fv = fs.get_feature_view('tesla_stocks_fv', version=1)"
159
- ]
160
- },
161
- {
162
- "cell_type": "code",
163
- "execution_count": 14,
164
- "metadata": {},
165
- "outputs": [
166
- {
167
- "name": "stdout",
168
- "output_type": "stream",
169
- "text": [
170
- "Fetching feature view from hopsworks...\n",
171
- "Connection closed.\n",
172
- "Connected. Call `.close()` to terminate connection gracefully.\n"
173
- ]
174
- },
175
- {
176
- "name": "stdout",
177
- "output_type": "stream",
178
- "text": [
179
- "\n",
180
- "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
181
- "Connected. Call `.close()` to terminate connection gracefully.\n"
182
- ]
183
- }
184
- ],
185
- "source": [
186
- "print('Fetching feature view from hopsworks...')\n",
187
- "project = hopsworks.login()\n",
188
- "fs = project.get_feature_store()\n",
189
- "\n",
190
- "# Get feature view \n",
191
- "fv = fs.get_feature_view(\n",
192
- " name = 'tesla_stocks_fv',\n",
193
- " version = 1\n",
194
- ")"
195
  ]
196
  },
197
  {
198
  "cell_type": "code",
199
- "execution_count": 20,
200
  "metadata": {},
201
  "outputs": [
202
  {
203
- "name": "stdout",
204
- "output_type": "stream",
205
- "text": [
206
- "Failed to load data: Metadata operation error: (url: https://c.app.hopsworks.ai/hopsworks-api/api/project/549016/dataset/Projects%2Fmtzeve%2Fmtzeve_Training_Datasets%2Ftesla_stocks_fv_1_1%2Ftesla_stocks_fv_1). Server response: \n",
207
- "HTTP code: 400, HTTP reason: Bad Request, body: b'{\"errorCode\":110018,\"errorMsg\":\"Path not found\"}', error code: 110018, error msg: Path not found, user msg: \n"
208
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ],
211
  "source": [
212
- "# Assuming 'fv' is your FeatureView object and you've verified the version number.\n",
213
- "# Here, you will use the method to retrieve training data.\n",
214
- "\n",
215
- "try:\n",
216
- " df_train, _ = fv.get_training_data(training_dataset_version=1)\n",
217
- " print(df_train.head())\n",
218
- "except Exception as e:\n",
219
- " print(\"Failed to load data:\", e)"
220
  ]
221
  }
222
  ],
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 4,
6
  "metadata": {},
7
  "outputs": [
8
  {
 
11
  "True"
12
  ]
13
  },
14
+ "execution_count": 4,
15
  "metadata": {},
16
  "output_type": "execute_result"
17
  }
 
26
  },
27
  {
28
  "cell_type": "code",
29
+ "execution_count": 21,
 
 
 
 
 
 
 
30
  "metadata": {},
31
  "outputs": [
32
  {
33
  "name": "stdout",
34
  "output_type": "stream",
35
  "text": [
36
+ "Connected. Call `.close()` to terminate connection gracefully.\n",
 
 
 
 
 
 
 
 
37
  "\n",
38
+ "Sample data from the feature view:\n",
39
+ "<class 'tuple'>\n",
40
+ "( date open sentiment\n",
41
+ "0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
42
+ "1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
43
+ "2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
44
+ "3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
45
+ "4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
46
+ ".. ... ... ...\n",
47
+ "464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
48
+ "465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
49
+ "466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
50
+ "467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
51
+ "468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
 
 
52
  "\n",
53
+ "[469 rows x 3 columns], ticker\n",
54
+ "0 TSLA\n",
55
+ "1 TSLA\n",
56
+ "2 TSLA\n",
57
+ "3 TSLA\n",
58
+ "4 TSLA\n",
59
+ ".. ...\n",
60
+ "464 TSLA\n",
61
+ "465 TSLA\n",
62
+ "466 TSLA\n",
63
+ "467 TSLA\n",
64
+ "468 TSLA\n",
65
+ "\n",
66
+ "[469 rows x 1 columns])\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  ]
68
  }
69
  ],
70
  "source": [
71
  "import hsfs\n",
 
 
 
 
72
  "\n",
73
+ "# Connection setup\n",
74
+ "# Connect to Hopsworks\n",
75
+ "api_key = os.getenv('hopsworks_api')\n",
76
+ "connection = hsfs.connection()\n",
77
+ "fs = connection.get_feature_store()\n",
78
  "\n",
79
+ "# Get feature view\n",
80
+ "feature_view = fs.get_feature_view(\n",
81
+ " name='tesla_stocks_fv',\n",
82
+ " version=1\n",
83
+ ")\n",
84
+ "td_version, td_job = feature_view.create_train_test_split(\n",
85
+ " description = 'tesla and news sentiment training dataset',\n",
86
+ " data_format = \"csv\",\n",
87
+ " test_size = 0.2,\n",
88
+ " coalesce = True,\n",
89
+ " statistics_config={\n",
90
+ " \"enabled\": True,\n",
91
+ " \"histograms\": False,\n",
92
+ " \"correlations\": False\n",
93
+ " } \n",
94
+ ")\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  ]
96
  },
97
  {
98
  "cell_type": "code",
99
+ "execution_count": 22,
100
  "metadata": {},
101
  "outputs": [
102
  {
103
+ "data": {
104
+ "text/plain": [
105
+ "( date open sentiment\n",
106
+ " 0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
107
+ " 1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
108
+ " 2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
109
+ " 3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
110
+ " 4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
111
+ " .. ... ... ...\n",
112
+ " 464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
113
+ " 465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
114
+ " 466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
115
+ " 467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
116
+ " 468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
117
+ " \n",
118
+ " [469 rows x 3 columns],\n",
119
+ " ticker\n",
120
+ " 0 TSLA\n",
121
+ " 1 TSLA\n",
122
+ " 2 TSLA\n",
123
+ " 3 TSLA\n",
124
+ " 4 TSLA\n",
125
+ " .. ...\n",
126
+ " 464 TSLA\n",
127
+ " 465 TSLA\n",
128
+ " 466 TSLA\n",
129
+ " 467 TSLA\n",
130
+ " 468 TSLA\n",
131
+ " \n",
132
+ " [469 rows x 1 columns])"
133
+ ]
134
+ },
135
+ "execution_count": 22,
136
+ "metadata": {},
137
+ "output_type": "execute_result"
138
  }
139
  ],
140
  "source": [
141
+ "sample_data"
 
 
 
 
 
 
 
142
  ]
143
  }
144
  ],