Spaces:
No application file
No application file
Merge branch 'main' of https://github.com/mtzeve/MLops_mod
Browse files- .DS_Store +0 -0
- __pycache__/feature_view_freddie.cpython-311.pyc +0 -0
- feature_view_freddie.py +95 -0
- training_pipeline.ipynb +90 -168
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
__pycache__/feature_view_freddie.cpython-311.pyc
ADDED
Binary file (3.19 kB). View file
|
|
feature_view_freddie.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
# Import necessary libraries
|
3 |
+
import pandas as pd # For data manipulation using DataFrames
|
4 |
+
import numpy as np # For numerical operations
|
5 |
+
import matplotlib.pyplot as plt # For data visualization
|
6 |
+
import os # For operating system-related tasks
|
7 |
+
import joblib # For saving and loading models
|
8 |
+
import hopsworks # For getting access to hopsworks
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
# Import specific modules from scikit-learn
|
13 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing
|
14 |
+
from sklearn.metrics import accuracy_score # For evaluating model accuracy
|
15 |
+
|
16 |
+
# %%
|
17 |
+
from feature_pipeline import tesla_fg
|
18 |
+
from feature_pipeline import news_sentiment_fg
|
19 |
+
|
20 |
+
# %%
|
21 |
+
from dotenv import load_dotenv
|
22 |
+
import os
|
23 |
+
|
24 |
+
load_dotenv()
|
25 |
+
|
26 |
+
# %%
|
27 |
+
api_key = os.environ.get('hopsworks_api')
|
28 |
+
project = hopsworks.login(api_key_value=api_key)
|
29 |
+
fs = project.get_feature_store()
|
30 |
+
|
31 |
+
# %%
|
32 |
+
def create_stocks_feature_view(fs, version):
|
33 |
+
|
34 |
+
# Loading in the feature groups
|
35 |
+
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
36 |
+
news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)
|
37 |
+
|
38 |
+
# Define the query
|
39 |
+
ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
|
40 |
+
.join(news_sentiment_fg.select(['date','sentiment']))
|
41 |
+
|
42 |
+
# Create the feature view
|
43 |
+
feature_view = fs.create_feature_view(
|
44 |
+
name='tesla_stocks_fv',
|
45 |
+
query=ds_query,
|
46 |
+
labels=['ticker']
|
47 |
+
)
|
48 |
+
|
49 |
+
return feature_view, tesla_fg
|
50 |
+
|
51 |
+
# %%
|
52 |
+
try:
|
53 |
+
feature_view = fs.get_feature_view("tesla_stocks_fv", version=1)
|
54 |
+
tesla_fg = fs.get_feature_group('tesla_stock', version=1)
|
55 |
+
except:
|
56 |
+
feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
57 |
+
|
58 |
+
# %%
|
59 |
+
def fix_data_from_feature_view(df,start_date,end_date):
|
60 |
+
df = df.sort_values("date")
|
61 |
+
df = df.reset_index()
|
62 |
+
df = df.drop(columns=["index"])
|
63 |
+
|
64 |
+
# Create a boolean mask for rows that fall within the date range
|
65 |
+
mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))
|
66 |
+
len_df = np.shape(df)
|
67 |
+
df = df[mask] # Use the boolean mask to filter the DataFrame
|
68 |
+
print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))
|
69 |
+
|
70 |
+
return df
|
71 |
+
|
72 |
+
# %%
|
73 |
+
#def create_stocks_feature_view(fs, version):
|
74 |
+
|
75 |
+
#Loading in the feature groups
|
76 |
+
# tesla_fg = fs.get_feature_group('tesla_stock', version = 3)
|
77 |
+
# news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)
|
78 |
+
|
79 |
+
# ds_query = tesla_fg.select(['date','open', 'ticker'])\
|
80 |
+
# .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))
|
81 |
+
|
82 |
+
# return (fs.create_tesla_feature_view(
|
83 |
+
# name = 'tsla_stocks_fv',
|
84 |
+
# query = ds_query,
|
85 |
+
# labels=['ticker']
|
86 |
+
# ), tesla_fg)
|
87 |
+
|
88 |
+
# %%
|
89 |
+
#try:
|
90 |
+
# feature_view = fs.get_feature_view("tsla_stocks_fv", version=1)
|
91 |
+
# tesla_fg = fs.get_feature_group('tesla_stock', version=3)
|
92 |
+
#except:
|
93 |
+
# feature_view, tesla_fg = create_stocks_feature_view(fs, 1)
|
94 |
+
|
95 |
+
|
training_pipeline.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
@@ -11,7 +11,7 @@
|
|
11 |
"True"
|
12 |
]
|
13 |
},
|
14 |
-
"execution_count":
|
15 |
"metadata": {},
|
16 |
"output_type": "execute_result"
|
17 |
}
|
@@ -26,197 +26,119 @@
|
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
-
"execution_count":
|
30 |
-
"metadata": {},
|
31 |
-
"outputs": [],
|
32 |
-
"source": []
|
33 |
-
},
|
34 |
-
{
|
35 |
-
"cell_type": "code",
|
36 |
-
"execution_count": 6,
|
37 |
"metadata": {},
|
38 |
"outputs": [
|
39 |
{
|
40 |
"name": "stdout",
|
41 |
"output_type": "stream",
|
42 |
"text": [
|
43 |
-
"
|
44 |
-
"Connection closed.\n",
|
45 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
46 |
-
]
|
47 |
-
},
|
48 |
-
{
|
49 |
-
"name": "stdout",
|
50 |
-
"output_type": "stream",
|
51 |
-
"text": [
|
52 |
"\n",
|
53 |
-
"
|
54 |
-
"
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
"
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
"text": [
|
68 |
-
"2024-05-06 12:27:55,071 WARNING: DeprecationWarning: ssl.PROTOCOL_TLS is deprecated\n",
|
69 |
"\n",
|
70 |
-
"
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
"
|
75 |
-
|
76 |
-
"
|
77 |
-
|
78 |
-
"\
|
79 |
-
"\
|
80 |
-
"
|
81 |
-
"
|
82 |
-
"
|
83 |
-
"
|
84 |
-
"\nDuring handling of the above exception, another exception occurred:\n",
|
85 |
-
"\u001b[0;31mNotSupportedError\u001b[0m Traceback (most recent call last)",
|
86 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2022\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2021\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 2022\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcon\u001b[39m.\u001b[39mrollback()\n\u001b[1;32m 2023\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m inner_exc: \u001b[39m# pragma: no cover\u001b[39;00m\n",
|
87 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pyhive/hive.py:285\u001b[0m, in \u001b[0;36mConnection.rollback\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 284\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrollback\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 285\u001b[0m \u001b[39mraise\u001b[39;00m NotSupportedError(\u001b[39m\"\u001b[39m\u001b[39mHive does not have transactions\u001b[39m\u001b[39m\"\u001b[39m)\n",
|
88 |
-
"\u001b[0;31mNotSupportedError\u001b[0m: Hive does not have transactions",
|
89 |
-
"\nThe above exception was the direct cause of the following exception:\n",
|
90 |
-
"\u001b[0;31mDatabaseError\u001b[0m Traceback (most recent call last)",
|
91 |
-
"\u001b[1;32m/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb Cell 3\u001b[0m line \u001b[0;36m1\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m fv \u001b[39m=\u001b[39m fs\u001b[39m.\u001b[39mget_feature_view(\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m name \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mtesla_stocks_fv\u001b[39m\u001b[39m'\u001b[39m,\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m version \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m )\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m \u001b[39m# Get dataframe of training data from feature view\u001b[39;00m\n\u001b[0;32m---> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W4sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m df, _ \u001b[39m=\u001b[39m fv\u001b[39m.\u001b[39mtraining_data(read_options\u001b[39m=\u001b[39m{\u001b[39m\"\u001b[39m\u001b[39muse_hive\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mTrue\u001b[39;00m})\n",
|
92 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/usage.py:212\u001b[0m, in \u001b[0;36mmethod_logger.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 211\u001b[0m exception \u001b[39m=\u001b[39m e\n\u001b[0;32m--> 212\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 213\u001b[0m \u001b[39mfinally\u001b[39;00m:\n\u001b[1;32m 214\u001b[0m \u001b[39mtry\u001b[39;00m:\n",
|
93 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/usage.py:208\u001b[0m, in \u001b[0;36mmethod_logger.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m exception \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 206\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 207\u001b[0m \u001b[39m# Call the original method\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m result \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 209\u001b[0m \u001b[39mreturn\u001b[39;00m result\n\u001b[1;32m 210\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n",
|
94 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/feature_view.py:2222\u001b[0m, in \u001b[0;36mFeatureView.training_data\u001b[0;34m(self, start_time, end_time, description, extra_filter, statistics_config, read_options, spine, primary_keys, event_time, training_helper_columns)\u001b[0m\n\u001b[1;32m 2109\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 2110\u001b[0m \u001b[39mCreate the metadata for a training dataset and get the corresponding training data from the offline feature store.\u001b[39;00m\n\u001b[1;32m 2111\u001b[0m \u001b[39mThis returns the training data in memory and does not materialise data in storage.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2205\u001b[0m \u001b[39m (X, y): Tuple of dataframe of features and labels. If there are no labels, y returns `None`.\u001b[39;00m\n\u001b[1;32m 2206\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 2207\u001b[0m td \u001b[39m=\u001b[39m training_dataset\u001b[39m.\u001b[39mTrainingDataset(\n\u001b[1;32m 2208\u001b[0m name\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname,\n\u001b[1;32m 2209\u001b[0m version\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2220\u001b[0m extra_filter\u001b[39m=\u001b[39mextra_filter,\n\u001b[1;32m 2221\u001b[0m )\n\u001b[0;32m-> 2222\u001b[0m td, df \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_feature_view_engine\u001b[39m.\u001b[39mget_training_data(\n\u001b[1;32m 2223\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 2224\u001b[0m read_options,\n\u001b[1;32m 2225\u001b[0m training_dataset_obj\u001b[39m=\u001b[39mtd,\n\u001b[1;32m 2226\u001b[0m spine\u001b[39m=\u001b[39mspine,\n\u001b[1;32m 2227\u001b[0m primary_keys\u001b[39m=\u001b[39mprimary_keys,\n\u001b[1;32m 2228\u001b[0m event_time\u001b[39m=\u001b[39mevent_time,\n\u001b[1;32m 2229\u001b[0m training_helper_columns\u001b[39m=\u001b[39mtraining_helper_columns,\n\u001b[1;32m 2230\u001b[0m )\n\u001b[1;32m 2231\u001b[0m warnings\u001b[39m.\u001b[39mwarn(\n\u001b[1;32m 2232\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mIncremented version to `\u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m`.\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(td\u001b[39m.\u001b[39mversion),\n\u001b[1;32m 2233\u001b[0m util\u001b[39m.\u001b[39mVersionWarning,\n\u001b[1;32m 2234\u001b[0m )\n\u001b[1;32m 2235\u001b[0m \u001b[39mreturn\u001b[39;00m df\n",
|
95 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/core/feature_view_engine.py:346\u001b[0m, in \u001b[0;36mFeatureViewEngine.get_training_data\u001b[0;34m(self, feature_view_obj, read_options, splits, training_dataset_obj, training_dataset_version, spine, primary_keys, event_time, training_helper_columns)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_feature_group_accessibility(feature_view_obj)\n\u001b[1;32m 334\u001b[0m query \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_batch_query(\n\u001b[1;32m 335\u001b[0m feature_view_obj,\n\u001b[1;32m 336\u001b[0m training_dataset_version\u001b[39m=\u001b[39mtd_updated\u001b[39m.\u001b[39mversion,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 344\u001b[0m spine\u001b[39m=\u001b[39mspine,\n\u001b[1;32m 345\u001b[0m )\n\u001b[0;32m--> 346\u001b[0m split_df \u001b[39m=\u001b[39m engine\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39mget_training_data(\n\u001b[1;32m 347\u001b[0m td_updated, feature_view_obj, query, read_options\n\u001b[1;32m 348\u001b[0m )\n\u001b[1;32m 349\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcompute_training_dataset_statistics(\n\u001b[1;32m 350\u001b[0m feature_view_obj, td_updated, split_df\n\u001b[1;32m 351\u001b[0m )\n\u001b[1;32m 353\u001b[0m \u001b[39m# split df into features and labels df\u001b[39;00m\n",
|
96 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:648\u001b[0m, in \u001b[0;36mEngine.get_training_data\u001b[0;34m(self, training_dataset_obj, feature_view_obj, query_obj, read_options)\u001b[0m\n\u001b[1;32m 644\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prepare_transform_split_df(\n\u001b[1;32m 645\u001b[0m query_obj, training_dataset_obj, feature_view_obj, read_options\n\u001b[1;32m 646\u001b[0m )\n\u001b[1;32m 647\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m--> 648\u001b[0m df \u001b[39m=\u001b[39m query_obj\u001b[39m.\u001b[39mread(read_options\u001b[39m=\u001b[39mread_options)\n\u001b[1;32m 649\u001b[0m transformation_function_engine\u001b[39m.\u001b[39mTransformationFunctionEngine\u001b[39m.\u001b[39mpopulate_builtin_transformation_functions(\n\u001b[1;32m 650\u001b[0m training_dataset_obj, feature_view_obj, df\n\u001b[1;32m 651\u001b[0m )\n\u001b[1;32m 652\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_apply_transformation_function(\n\u001b[1;32m 653\u001b[0m training_dataset_obj\u001b[39m.\u001b[39mtransformation_functions, df\n\u001b[1;32m 654\u001b[0m )\n",
|
97 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/constructor/query.py:173\u001b[0m, in \u001b[0;36mQuery.read\u001b[0;34m(self, online, dataframe_type, read_options)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mjoins) \u001b[39m>\u001b[39m \u001b[39m0\u001b[39m \u001b[39mor\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39min\u001b[39;00m [f\u001b[39m.\u001b[39mtype \u001b[39mfor\u001b[39;00m f \u001b[39min\u001b[39;00m schema]:\n\u001b[1;32m 169\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 170\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mPandas types casting only supported for feature_group.read()/query.select_all()\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 171\u001b[0m )\n\u001b[0;32m--> 173\u001b[0m \u001b[39mreturn\u001b[39;00m engine\u001b[39m.\u001b[39mget_instance()\u001b[39m.\u001b[39msql(\n\u001b[1;32m 174\u001b[0m sql_query,\n\u001b[1;32m 175\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_feature_store_name,\n\u001b[1;32m 176\u001b[0m online_conn,\n\u001b[1;32m 177\u001b[0m dataframe_type,\n\u001b[1;32m 178\u001b[0m read_options,\n\u001b[1;32m 179\u001b[0m schema,\n\u001b[1;32m 180\u001b[0m )\n",
|
98 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:139\u001b[0m, in \u001b[0;36mEngine.sql\u001b[0;34m(self, sql_query, feature_store, online_conn, dataframe_type, read_options, schema)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39msql\u001b[39m(\n\u001b[1;32m 130\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 131\u001b[0m sql_query,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 136\u001b[0m schema\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 137\u001b[0m ):\n\u001b[1;32m 138\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m online_conn:\n\u001b[0;32m--> 139\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sql_offline(\n\u001b[1;32m 140\u001b[0m sql_query,\n\u001b[1;32m 141\u001b[0m feature_store,\n\u001b[1;32m 142\u001b[0m dataframe_type,\n\u001b[1;32m 143\u001b[0m schema,\n\u001b[1;32m 144\u001b[0m hive_config\u001b[39m=\u001b[39mread_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mhive_config\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mif\u001b[39;00m read_options \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 145\u001b[0m arrow_flight_config\u001b[39m=\u001b[39mread_options\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39marrow_flight_config\u001b[39m\u001b[39m\"\u001b[39m, {})\n\u001b[1;32m 146\u001b[0m \u001b[39mif\u001b[39;00m read_options\n\u001b[1;32m 147\u001b[0m \u001b[39melse\u001b[39;00m {},\n\u001b[1;32m 148\u001b[0m )\n\u001b[1;32m 149\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 150\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_jdbc(\n\u001b[1;32m 151\u001b[0m sql_query, online_conn, dataframe_type, read_options, schema\n\u001b[1;32m 152\u001b[0m )\n",
|
99 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/engine/python.py:180\u001b[0m, in \u001b[0;36mEngine._sql_offline\u001b[0;34m(self, sql_query, feature_store, dataframe_type, schema, hive_config, arrow_flight_config)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[39mwith\u001b[39;00m warnings\u001b[39m.\u001b[39mcatch_warnings():\n\u001b[1;32m 179\u001b[0m warnings\u001b[39m.\u001b[39msimplefilter(\u001b[39m\"\u001b[39m\u001b[39mignore\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mUserWarning\u001b[39;00m)\n\u001b[0;32m--> 180\u001b[0m result_df \u001b[39m=\u001b[39m util\u001b[39m.\u001b[39mrun_with_loading_animation(\n\u001b[1;32m 181\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mReading data from Hopsworks, using Hive\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 182\u001b[0m pd\u001b[39m.\u001b[39mread_sql,\n\u001b[1;32m 183\u001b[0m sql_query,\n\u001b[1;32m 184\u001b[0m hive_conn,\n\u001b[1;32m 185\u001b[0m )\n\u001b[1;32m 187\u001b[0m \u001b[39mif\u001b[39;00m schema:\n\u001b[1;32m 188\u001b[0m result_df \u001b[39m=\u001b[39m Engine\u001b[39m.\u001b[39mcast_columns(result_df, schema)\n",
|
100 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/util.py:427\u001b[0m, in \u001b[0;36mrun_with_loading_animation\u001b[0;34m(message, func, *args, **kwargs)\u001b[0m\n\u001b[1;32m 424\u001b[0m end \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n\u001b[1;32m 426\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 427\u001b[0m result \u001b[39m=\u001b[39m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 428\u001b[0m end \u001b[39m=\u001b[39m time\u001b[39m.\u001b[39mtime()\n\u001b[1;32m 429\u001b[0m \u001b[39mreturn\u001b[39;00m result\n",
|
101 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:564\u001b[0m, in \u001b[0;36mread_sql\u001b[0;34m(sql, con, index_col, coerce_float, params, parse_dates, columns, chunksize)\u001b[0m\n\u001b[1;32m 561\u001b[0m pandas_sql \u001b[39m=\u001b[39m pandasSQL_builder(con)\n\u001b[1;32m 563\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(pandas_sql, SQLiteDatabase):\n\u001b[0;32m--> 564\u001b[0m \u001b[39mreturn\u001b[39;00m pandas_sql\u001b[39m.\u001b[39mread_query(\n\u001b[1;32m 565\u001b[0m sql,\n\u001b[1;32m 566\u001b[0m index_col\u001b[39m=\u001b[39mindex_col,\n\u001b[1;32m 567\u001b[0m params\u001b[39m=\u001b[39mparams,\n\u001b[1;32m 568\u001b[0m coerce_float\u001b[39m=\u001b[39mcoerce_float,\n\u001b[1;32m 569\u001b[0m parse_dates\u001b[39m=\u001b[39mparse_dates,\n\u001b[1;32m 570\u001b[0m chunksize\u001b[39m=\u001b[39mchunksize,\n\u001b[1;32m 571\u001b[0m )\n\u001b[1;32m 573\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 574\u001b[0m _is_table_name \u001b[39m=\u001b[39m pandas_sql\u001b[39m.\u001b[39mhas_table(sql)\n",
|
102 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2078\u001b[0m, in \u001b[0;36mSQLiteDatabase.read_query\u001b[0;34m(self, sql, index_col, coerce_float, params, parse_dates, chunksize, dtype)\u001b[0m\n\u001b[1;32m 2066\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mread_query\u001b[39m(\n\u001b[1;32m 2067\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 2068\u001b[0m sql,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 2074\u001b[0m dtype: DtypeArg \u001b[39m|\u001b[39m \u001b[39mNone\u001b[39;00m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 2075\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m DataFrame \u001b[39m|\u001b[39m Iterator[DataFrame]:\n\u001b[1;32m 2077\u001b[0m args \u001b[39m=\u001b[39m _convert_params(sql, params)\n\u001b[0;32m-> 2078\u001b[0m cursor \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mexecute(\u001b[39m*\u001b[39margs)\n\u001b[1;32m 2079\u001b[0m columns \u001b[39m=\u001b[39m [col_desc[\u001b[39m0\u001b[39m] \u001b[39mfor\u001b[39;00m col_desc \u001b[39min\u001b[39;00m cursor\u001b[39m.\u001b[39mdescription]\n\u001b[1;32m 2081\u001b[0m \u001b[39mif\u001b[39;00m chunksize \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n",
|
103 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/pandas/io/sql.py:2027\u001b[0m, in \u001b[0;36mSQLiteDatabase.execute\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2023\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m inner_exc: \u001b[39m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 2024\u001b[0m ex \u001b[39m=\u001b[39m DatabaseError(\n\u001b[1;32m 2025\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mExecution failed on sql: \u001b[39m\u001b[39m{\u001b[39;00margs[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39m{\u001b[39;00mexc\u001b[39m}\u001b[39;00m\u001b[39m\\n\u001b[39;00m\u001b[39munable to rollback\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 2026\u001b[0m )\n\u001b[0;32m-> 2027\u001b[0m \u001b[39mraise\u001b[39;00m ex \u001b[39mfrom\u001b[39;00m \u001b[39minner_exc\u001b[39;00m\n\u001b[1;32m 2029\u001b[0m ex \u001b[39m=\u001b[39m DatabaseError(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mExecution failed on sql \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00margs[\u001b[39m0\u001b[39m]\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m: \u001b[39m\u001b[39m{\u001b[39;00mexc\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 2030\u001b[0m \u001b[39mraise\u001b[39;00m ex \u001b[39mfrom\u001b[39;00m \u001b[39mexc\u001b[39;00m\n",
|
104 |
-
"\u001b[0;31mDatabaseError\u001b[0m: Execution failed on sql: WITH right_fg0 AS (SELECT *\nFROM (SELECT `fg1`.`date` `date`, `fg1`.`open` `open`, `fg1`.`ticker` `ticker`, `fg1`.`ticker` `join_pk_ticker`, `fg1`.`date` `join_evt_date`, `fg0`.`date` `date`, `fg0`.`sentiment` `sentiment`, RANK() OVER (PARTITION BY `fg1`.`ticker`, `fg1`.`date` ORDER BY `fg0`.`date` DESC) pit_rank_hopsworks\nFROM `mtzeve_featurestore`.`tesla_stock_1` `fg1`\nINNER JOIN `mtzeve_featurestore`.`news_sentiment_updated_1` `fg0` ON `fg1`.`ticker` = `fg0`.`ticker` AND `fg1`.`date` >= `fg0`.`date`\nWHERE `fg1`.`date` >= TIMESTAMP '1970-01-01 00:16:40.000' AND `fg1`.`date` < TIMESTAMP '2024-05-06 10:27:51.000') NA\nWHERE `pit_rank_hopsworks` = 1) (SELECT `right_fg0`.`date` `date`, `right_fg0`.`open` `open`, `right_fg0`.`ticker` `ticker`, `right_fg0`.`date` `date`, `right_fg0`.`sentiment` `sentiment`\nFROM right_fg0)\nTExecuteStatementResp(status=TStatus(statusCode=3, infoMessages=['*org.apache.hive.service.cli.HiveSQLException:Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na:28:27', 'org.apache.hive.service.cli.operation.Operation:toSQLException:Operation.java:343', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:203', 'org.apache.hive.service.cli.operation.SQLOperation:runInternal:SQLOperation.java:266', 'org.apache.hive.service.cli.operation.Operation:run:Operation.java:255', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatementInternal:HiveSessionImpl.java:541', 'org.apache.hive.service.cli.session.HiveSessionImpl:executeStatement:HiveSessionImpl.java:516', 'sun.reflect.GeneratedMethodAccessor216:invoke::-1', 'sun.reflect.DelegatingMethodAccessorImpl:invoke:DelegatingMethodAccessorImpl.java:43', 'java.lang.reflect.Method:invoke:Method.java:498', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:78', 'org.apache.hive.service.cli.session.HiveSessionProxy:access$000:HiveSessionProxy.java:36', 'org.apache.hive.service.cli.session.HiveSessionProxy$1:run:HiveSessionProxy.java:63', 'java.security.AccessController:doPrivileged:AccessController.java:-2', 'javax.security.auth.Subject:doAs:Subject.java:422', 'org.apache.hadoop.security.UserGroupInformation:doAs:UserGroupInformation.java:1821', 'org.apache.hive.service.cli.session.HiveSessionProxy:invoke:HiveSessionProxy.java:59', 'com.sun.proxy.$Proxy53:executeStatement::-1', 'org.apache.hive.service.cli.CLIService:executeStatement:CLIService.java:281', 'org.apache.hive.service.cli.thrift.ThriftCLIService:ExecuteStatement:ThriftCLIService.java:712', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1557', 'org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement:getResult:TCLIService.java:1542', 'org.apache.thrift.ProcessFunction:process:ProcessFunction.java:39', 'org.apache.thrift.TBaseProcessor:process:TBaseProcessor.java:39', 'org.apache.hive.service.auth.TSetIpAddressProcessor:process:TSetIpAddressProcessor.java:56', 'org.apache.thrift.server.TThreadPoolServer$WorkerProcess:run:TThreadPoolServer.java:286', 'java.util.concurrent.ThreadPoolExecutor:runWorker:ThreadPoolExecutor.java:1149', 'java.util.concurrent.ThreadPoolExecutor$Worker:run:ThreadPoolExecutor.java:624', 'java.lang.Thread:run:Thread.java:750', '*org.apache.hadoop.hive.ql.parse.SemanticException:Ambiguous column reference date in na:44:17', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:rewriteRRForSubQ:SemanticAnalyzer.java:11359', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11338', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11188', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11215', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genPlan:SemanticAnalyzer.java:11201', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:genOPTree:SemanticAnalyzer.java:11987', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:genOPTree:CalcitePlanner.java:597', 'org.apache.hadoop.hive.ql.parse.SemanticAnalyzer:analyzeInternal:SemanticAnalyzer.java:12066', 'org.apache.hadoop.hive.ql.parse.CalcitePlanner:analyzeInternal:CalcitePlanner.java:334', 'org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer:analyze:BaseSemanticAnalyzer.java:285', 'org.apache.hadoop.hive.ql.Driver:compile:Driver.java:643', 'org.apache.hadoop.hive.ql.Driver:compileInternal:Driver.java:1683', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1630', 'org.apache.hadoop.hive.ql.Driver:compileAndRespond:Driver.java:1625', 'org.apache.hadoop.hive.ql.reexec.ReExecDriver:compileAndRespond:ReExecDriver.java:126', 'org.apache.hive.service.cli.operation.SQLOperation:prepare:SQLOperation.java:201'], sqlState='42000', errorCode=10007, errorMessage='Error while compiling statement: FAILED: SemanticException [Error 10007]: Ambiguous column reference date in na'), operationHandle=None)\nunable to rollback"
|
105 |
-
]
|
106 |
-
}
|
107 |
-
],
|
108 |
-
"source": [
|
109 |
-
"print('Fetching feature view from hopsworks...')\n",
|
110 |
-
"api_key = os.environ.get('hopsworks_api')\n",
|
111 |
-
"project = hopsworks.login(api_key_value=api_key)\n",
|
112 |
-
"fs = project.get_feature_store()\n",
|
113 |
-
"\n",
|
114 |
-
"# Get feature view \n",
|
115 |
-
"fv = fs.get_feature_view(\n",
|
116 |
-
" name = 'tesla_stocks_fv',\n",
|
117 |
-
" version = 1\n",
|
118 |
-
")\n",
|
119 |
-
"# Get dataframe of training data from feature view\n",
|
120 |
-
"df, _ = fv.training_data(read_options={\"use_hive\": True})"
|
121 |
-
]
|
122 |
-
},
|
123 |
-
{
|
124 |
-
"cell_type": "code",
|
125 |
-
"execution_count": 1,
|
126 |
-
"metadata": {},
|
127 |
-
"outputs": [
|
128 |
-
{
|
129 |
-
"ename": "ExternalClientError",
|
130 |
-
"evalue": "host cannot be of type NoneType, host is a non-optional argument to connect to hopsworks from an external environment.",
|
131 |
-
"output_type": "error",
|
132 |
-
"traceback": [
|
133 |
-
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
134 |
-
"\u001b[0;31mExternalClientError\u001b[0m Traceback (most recent call last)",
|
135 |
-
"\u001b[1;32m/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb Cell 2\u001b[0m line \u001b[0;36m7\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdotenv\u001b[39;00m \u001b[39mimport\u001b[39;00m load_dotenv\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m load_dotenv\n\u001b[0;32m----> <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m connection \u001b[39m=\u001b[39m hsfs\u001b[39m.\u001b[39mconnection()\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=7'>8</a>\u001b[0m api_key \u001b[39m=\u001b[39m os\u001b[39m.\u001b[39menviron\u001b[39m.\u001b[39mget(\u001b[39m'\u001b[39m\u001b[39mhopsworks_api\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[1;32m <a href='vscode-notebook-cell:/Users/manos/Documents/BDS/Mlops_2version/MLops_mod/training_pipeline.ipynb#W3sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m project \u001b[39m=\u001b[39m hopsworks\u001b[39m.\u001b[39mlogin(api_key_value\u001b[39m=\u001b[39mapi_key)\n",
|
136 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:303\u001b[0m, in \u001b[0;36mConnection.connection\u001b[0;34m(cls, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 288\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mconnection\u001b[39m(\n\u001b[1;32m 289\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 300\u001b[0m api_key_value: \u001b[39mstr\u001b[39m \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 301\u001b[0m ):\n\u001b[1;32m 302\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Connection factory method, accessible through `hsfs.connection()`.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 303\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m(\n\u001b[1;32m 304\u001b[0m host,\n\u001b[1;32m 305\u001b[0m port,\n\u001b[1;32m 306\u001b[0m project,\n\u001b[1;32m 307\u001b[0m engine,\n\u001b[1;32m 308\u001b[0m region_name,\n\u001b[1;32m 309\u001b[0m secrets_store,\n\u001b[1;32m 310\u001b[0m hostname_verification,\n\u001b[1;32m 311\u001b[0m trust_store_path,\n\u001b[1;32m 312\u001b[0m cert_folder,\n\u001b[1;32m 313\u001b[0m api_key_file,\n\u001b[1;32m 314\u001b[0m api_key_value,\n\u001b[1;32m 315\u001b[0m )\n",
|
137 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:154\u001b[0m, in \u001b[0;36mConnection.__init__\u001b[0;34m(self, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 151\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_value \u001b[39m=\u001b[39m api_key_value\n\u001b[1;32m 152\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_connected \u001b[39m=\u001b[39m \u001b[39mFalse\u001b[39;00m\n\u001b[0;32m--> 154\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mconnect()\n",
|
138 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/decorators.py:25\u001b[0m, in \u001b[0;36mnot_connected.<locals>.if_not_connected\u001b[0;34m(inst, *args, **kwargs)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[39mif\u001b[39;00m inst\u001b[39m.\u001b[39m_connected:\n\u001b[1;32m 24\u001b[0m \u001b[39mraise\u001b[39;00m HopsworksConnectionError\n\u001b[0;32m---> 25\u001b[0m \u001b[39mreturn\u001b[39;00m fn(inst, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
|
139 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/connection.py:233\u001b[0m, in \u001b[0;36mConnection.connect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 231\u001b[0m \u001b[39m# init client\u001b[39;00m\n\u001b[1;32m 232\u001b[0m \u001b[39mif\u001b[39;00m client\u001b[39m.\u001b[39mbase\u001b[39m.\u001b[39mClient\u001b[39m.\u001b[39mREST_ENDPOINT \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m os\u001b[39m.\u001b[39menviron:\n\u001b[0;32m--> 233\u001b[0m client\u001b[39m.\u001b[39minit(\n\u001b[1;32m 234\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mexternal\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 235\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_host,\n\u001b[1;32m 236\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_port,\n\u001b[1;32m 237\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_project,\n\u001b[1;32m 238\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine,\n\u001b[1;32m 239\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_region_name,\n\u001b[1;32m 240\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_secrets_store,\n\u001b[1;32m 241\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_hostname_verification,\n\u001b[1;32m 242\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_trust_store_path,\n\u001b[1;32m 243\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_cert_folder,\n\u001b[1;32m 244\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_file,\n\u001b[1;32m 245\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_api_key_value,\n\u001b[1;32m 246\u001b[0m )\n\u001b[1;32m 247\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 248\u001b[0m client\u001b[39m.\u001b[39minit(\u001b[39m\"\u001b[39m\u001b[39mhopsworks\u001b[39m\u001b[39m\"\u001b[39m)\n",
|
140 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/client/__init__.py:41\u001b[0m, in \u001b[0;36minit\u001b[0;34m(client_type, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 39\u001b[0m _client \u001b[39m=\u001b[39m hopsworks\u001b[39m.\u001b[39mClient()\n\u001b[1;32m 40\u001b[0m \u001b[39melif\u001b[39;00m client_type \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mexternal\u001b[39m\u001b[39m\"\u001b[39m:\n\u001b[0;32m---> 41\u001b[0m _client \u001b[39m=\u001b[39m external\u001b[39m.\u001b[39mClient(\n\u001b[1;32m 42\u001b[0m host,\n\u001b[1;32m 43\u001b[0m port,\n\u001b[1;32m 44\u001b[0m project,\n\u001b[1;32m 45\u001b[0m engine,\n\u001b[1;32m 46\u001b[0m region_name,\n\u001b[1;32m 47\u001b[0m secrets_store,\n\u001b[1;32m 48\u001b[0m hostname_verification,\n\u001b[1;32m 49\u001b[0m trust_store_path,\n\u001b[1;32m 50\u001b[0m cert_folder,\n\u001b[1;32m 51\u001b[0m api_key_file,\n\u001b[1;32m 52\u001b[0m api_key_value,\n\u001b[1;32m 53\u001b[0m )\n",
|
141 |
-
"File \u001b[0;32m/Applications/anaconda3/lib/python3.11/site-packages/hsfs/client/external.py:54\u001b[0m, in \u001b[0;36mClient.__init__\u001b[0;34m(self, host, port, project, engine, region_name, secrets_store, hostname_verification, trust_store_path, cert_folder, api_key_file, api_key_value)\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Initializes a client in an external environment such as AWS Sagemaker.\"\"\"\u001b[39;00m\n\u001b[1;32m 53\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m host:\n\u001b[0;32m---> 54\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mExternalClientError(\u001b[39m\"\u001b[39m\u001b[39mhost\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m project:\n\u001b[1;32m 56\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mExternalClientError(\u001b[39m\"\u001b[39m\u001b[39mproject\u001b[39m\u001b[39m\"\u001b[39m)\n",
|
142 |
-
"\u001b[0;31mExternalClientError\u001b[0m: host cannot be of type NoneType, host is a non-optional argument to connect to hopsworks from an external environment."
|
143 |
]
|
144 |
}
|
145 |
],
|
146 |
"source": [
|
147 |
"import hsfs\n",
|
148 |
-
"import os \n",
|
149 |
-
"from dotenv import load_dotenv\n",
|
150 |
-
"\n",
|
151 |
-
"load_dotenv\n",
|
152 |
"\n",
|
153 |
-
"
|
154 |
-
"
|
|
|
|
|
|
|
155 |
"\n",
|
156 |
-
"\n",
|
157 |
-
"\n",
|
158 |
-
"
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
{
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
173 |
-
]
|
174 |
-
},
|
175 |
-
{
|
176 |
-
"name": "stdout",
|
177 |
-
"output_type": "stream",
|
178 |
-
"text": [
|
179 |
-
"\n",
|
180 |
-
"Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/549016\n",
|
181 |
-
"Connected. Call `.close()` to terminate connection gracefully.\n"
|
182 |
-
]
|
183 |
-
}
|
184 |
-
],
|
185 |
-
"source": [
|
186 |
-
"print('Fetching feature view from hopsworks...')\n",
|
187 |
-
"project = hopsworks.login()\n",
|
188 |
-
"fs = project.get_feature_store()\n",
|
189 |
-
"\n",
|
190 |
-
"# Get feature view \n",
|
191 |
-
"fv = fs.get_feature_view(\n",
|
192 |
-
" name = 'tesla_stocks_fv',\n",
|
193 |
-
" version = 1\n",
|
194 |
-
")"
|
195 |
]
|
196 |
},
|
197 |
{
|
198 |
"cell_type": "code",
|
199 |
-
"execution_count":
|
200 |
"metadata": {},
|
201 |
"outputs": [
|
202 |
{
|
203 |
-
"
|
204 |
-
|
205 |
-
"
|
206 |
-
|
207 |
-
|
208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
}
|
210 |
],
|
211 |
"source": [
|
212 |
-
"
|
213 |
-
"# Here, you will use the method to retrieve training data.\n",
|
214 |
-
"\n",
|
215 |
-
"try:\n",
|
216 |
-
" df_train, _ = fv.get_training_data(training_dataset_version=1)\n",
|
217 |
-
" print(df_train.head())\n",
|
218 |
-
"except Exception as e:\n",
|
219 |
-
" print(\"Failed to load data:\", e)"
|
220 |
]
|
221 |
}
|
222 |
],
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 4,
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
|
|
11 |
"True"
|
12 |
]
|
13 |
},
|
14 |
+
"execution_count": 4,
|
15 |
"metadata": {},
|
16 |
"output_type": "execute_result"
|
17 |
}
|
|
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
+
"execution_count": 21,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
"metadata": {},
|
31 |
"outputs": [
|
32 |
{
|
33 |
"name": "stdout",
|
34 |
"output_type": "stream",
|
35 |
"text": [
|
36 |
+
"Connected. Call `.close()` to terminate connection gracefully.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"\n",
|
38 |
+
"Sample data from the feature view:\n",
|
39 |
+
"<class 'tuple'>\n",
|
40 |
+
"( date open sentiment\n",
|
41 |
+
"0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
|
42 |
+
"1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
|
43 |
+
"2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
|
44 |
+
"3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
|
45 |
+
"4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
|
46 |
+
".. ... ... ...\n",
|
47 |
+
"464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
|
48 |
+
"465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
|
49 |
+
"466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
|
50 |
+
"467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
|
51 |
+
"468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
|
|
|
|
|
52 |
"\n",
|
53 |
+
"[469 rows x 3 columns], ticker\n",
|
54 |
+
"0 TSLA\n",
|
55 |
+
"1 TSLA\n",
|
56 |
+
"2 TSLA\n",
|
57 |
+
"3 TSLA\n",
|
58 |
+
"4 TSLA\n",
|
59 |
+
".. ...\n",
|
60 |
+
"464 TSLA\n",
|
61 |
+
"465 TSLA\n",
|
62 |
+
"466 TSLA\n",
|
63 |
+
"467 TSLA\n",
|
64 |
+
"468 TSLA\n",
|
65 |
+
"\n",
|
66 |
+
"[469 rows x 1 columns])\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
]
|
68 |
}
|
69 |
],
|
70 |
"source": [
|
71 |
"import hsfs\n",
|
|
|
|
|
|
|
|
|
72 |
"\n",
|
73 |
+
"# Connection setup\n",
|
74 |
+
"# Connect to Hopsworks\n",
|
75 |
+
"api_key = os.getenv('hopsworks_api')\n",
|
76 |
+
"connection = hsfs.connection()\n",
|
77 |
+
"fs = connection.get_feature_store()\n",
|
78 |
"\n",
|
79 |
+
"# Get feature view\n",
|
80 |
+
"feature_view = fs.get_feature_view(\n",
|
81 |
+
" name='tesla_stocks_fv',\n",
|
82 |
+
" version=1\n",
|
83 |
+
")\n",
|
84 |
+
"td_version, td_job = feature_view.create_train_test_split(\n",
|
85 |
+
" description = 'tesla and news sentiment training dataset',\n",
|
86 |
+
" data_format = \"csv\",\n",
|
87 |
+
" test_size = 0.2,\n",
|
88 |
+
" coalesce = True,\n",
|
89 |
+
" statistics_config={\n",
|
90 |
+
" \"enabled\": True,\n",
|
91 |
+
" \"histograms\": False,\n",
|
92 |
+
" \"correlations\": False\n",
|
93 |
+
" } \n",
|
94 |
+
")\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
]
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
+
"execution_count": 22,
|
100 |
"metadata": {},
|
101 |
"outputs": [
|
102 |
{
|
103 |
+
"data": {
|
104 |
+
"text/plain": [
|
105 |
+
"( date open sentiment\n",
|
106 |
+
" 0 2023-06-26T00:00:00.000Z 250.065 0.119444\n",
|
107 |
+
" 1 2023-07-25T00:00:00.000Z 272.380 0.119444\n",
|
108 |
+
" 2 2023-01-10T00:00:00.000Z 121.070 0.102207\n",
|
109 |
+
" 3 2023-05-11T00:00:00.000Z 168.700 0.141296\n",
|
110 |
+
" 4 2023-08-01T00:00:00.000Z 266.260 0.011111\n",
|
111 |
+
" .. ... ... ...\n",
|
112 |
+
" 464 2022-12-22T00:00:00.000Z 136.000 0.102207\n",
|
113 |
+
" 465 2023-08-23T00:00:00.000Z 229.340 0.024046\n",
|
114 |
+
" 466 2022-09-08T00:00:00.000Z 281.300 0.087306\n",
|
115 |
+
" 467 2023-07-06T00:00:00.000Z 278.090 0.119444\n",
|
116 |
+
" 468 2023-10-27T00:00:00.000Z 210.600 0.164868\n",
|
117 |
+
" \n",
|
118 |
+
" [469 rows x 3 columns],\n",
|
119 |
+
" ticker\n",
|
120 |
+
" 0 TSLA\n",
|
121 |
+
" 1 TSLA\n",
|
122 |
+
" 2 TSLA\n",
|
123 |
+
" 3 TSLA\n",
|
124 |
+
" 4 TSLA\n",
|
125 |
+
" .. ...\n",
|
126 |
+
" 464 TSLA\n",
|
127 |
+
" 465 TSLA\n",
|
128 |
+
" 466 TSLA\n",
|
129 |
+
" 467 TSLA\n",
|
130 |
+
" 468 TSLA\n",
|
131 |
+
" \n",
|
132 |
+
" [469 rows x 1 columns])"
|
133 |
+
]
|
134 |
+
},
|
135 |
+
"execution_count": 22,
|
136 |
+
"metadata": {},
|
137 |
+
"output_type": "execute_result"
|
138 |
}
|
139 |
],
|
140 |
"source": [
|
141 |
+
"sample_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
]
|
143 |
}
|
144 |
],
|