In [1]:
# Importing necessary libraries
import pandas as pd               # For data manipulation using DataFrames
import numpy as np                # For numerical operations
import matplotlib.pyplot as plt   # For data visualization
import os                         # For operating system-related tasks
import joblib                     # For saving and loading models
import hopsworks                  # For getting access to hopsworks

from SML.feature_pipeline import tesla_fg, news_sentiment_fg   #Loading in the tesla_fg & news sentiment fg
#Making the notebook able to fetch from the .env file
from dotenv import load_dotenv
import os

load_dotenv()

#Getting connected to hopsworks
api_key = os.environ.get('hopsworks_api')
project = hopsworks.login(api_key_value=api_key)
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399
Connected. Call `.close()` to terminate connection gracefully.
         date  1. open   2. high  3. low  4. close   5. volume ticker
0  2024-05-13   170.00  175.4000  169.00    171.89  67018903.0   TSLA
1  2024-05-10   173.05  173.0599  167.75    168.47  72627178.0   TSLA
2  2024-05-09   175.01  175.6200  171.37    171.97  65950292.0   TSLA
3  2024-05-08   171.59  176.0600  170.15    174.72  79969488.0   TSLA
4  2024-05-07   182.40  183.2600  177.40    177.81  75045854.0   TSLA
Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')


Uploading Dataframe: 0.00% |          | Rows 0/3492 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: tesla_stock_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions


Uploading Dataframe: 0.00% |          | Rows 0/74 | Elapsed Time: 00:00 | Remaining Time: ?

Launching job: news_sentiment_updated_1_offline_fg_materialization
Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions
Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399
Connected. Call `.close()` to terminate connection gracefully.


In [2]:
#Defining the function to create feature view

def create_stocks_feature_view(fs, version):

    # Loading in the feature groups
    tesla_fg = fs.get_feature_group('tesla_stock', version=5)
    news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=5)

    # Defining the query
    ds_query = tesla_fg.select(['date', 'open', 'ticker'])\
        .join(news_sentiment_fg.select(['sentiment']))

    # Creating the feature view
    feature_view = fs.create_feature_view(
        name='tesla_stocks_fv',
        query=ds_query,
        labels=['open']
    )

    return feature_view, tesla_fg

In [3]:
#Creating the feature view
try:
    feature_view = fs.get_feature_view("tesla_stocks_fv", version=5)
    tesla_fg = fs.get_feature_group('tesla_stock', version=5)
except:
    feature_view, tesla_fg = create_stocks_feature_view(fs, 5)

Feature view created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/693399/fs/689222/fv/tesla_stocks_fv/version/5


In [4]:
#Defining a function to get fixed data from the feature view
def fix_data_from_feature_view(df,start_date,end_date):
    df = df.sort_values("date")
    df = df.reset_index()
    df = df.drop(columns=["index"])

    # Create a boolean mask for rows that fall within the date range
    mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))
    len_df = np.shape(df)
    df = df[mask] # Use the boolean mask to filter the DataFrame
    print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))

    # Get rid off all non-business days
    isBusinessDay, is_open = extract_business_day(start_date,end_date)
    is_open = [not i for i in is_open] # Invert the mask to be able to drop all non-buisiness days

    filtered_df = df.drop(df[is_open].index) # Use the mask to filter the rows of the DataFrame
    print('From shape {} to {} after removing non-business days'.format(np.shape(df),np.shape(filtered_df)))
    print(filtered_df)
    
    return filtered_df