{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n", "Connected. Call `.close()` to terminate connection gracefully.\n", " date 1. open 2. high 3. low 4. close 5. volume ticker\n", "0 2024-05-13 170.00 175.4000 169.00 171.89 67018903.0 TSLA\n", "1 2024-05-10 173.05 173.0599 167.75 168.47 72627178.0 TSLA\n", "2 2024-05-09 175.01 175.6200 171.37 171.97 65950292.0 TSLA\n", "3 2024-05-08 171.59 176.0600 170.15 174.72 79969488.0 TSLA\n", "4 2024-05-07 182.40 183.2600 177.40 177.81 75045854.0 TSLA\n", "Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "aea133b66b924b1d9e2f35592658cc73", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Uploading Dataframe: 0.00% | | Rows 0/3492 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Launching job: tesla_stock_1_offline_fg_materialization\n", "Job started successfully, you can follow the progress at \n", "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_1_offline_fg_materialization/executions\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b84d15d1c321483fb29e66b310fd95e2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Uploading Dataframe: 0.00% | | Rows 0/74 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Launching job: news_sentiment_updated_1_offline_fg_materialization\n", "Job started successfully, you can follow the progress at \n", "https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_1_offline_fg_materialization/executions\n", "Connection closed.\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n", "Connected. Call `.close()` to terminate connection gracefully.\n" ] } ], "source": [ "# Importing necessary libraries\n", "import pandas as pd # For data manipulation using DataFrames\n", "import numpy as np # For numerical operations\n", "import matplotlib.pyplot as plt # For data visualization\n", "import os # For operating system-related tasks\n", "import joblib # For saving and loading models\n", "import hopsworks # For getting access to hopsworks\n", "\n", "from SML.feature_pipeline import tesla_fg, news_sentiment_fg #Loading in the tesla_fg & news sentiment fg\n", "#Making the notebook able to fetch from the .env file\n", "from dotenv import load_dotenv\n", "import os\n", "\n", "load_dotenv()\n", "\n", "#Getting connected to hopsworks\n", "api_key = os.environ.get('hopsworks_api')\n", "project = hopsworks.login(api_key_value=api_key)\n", "fs = project.get_feature_store()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#Defining the function to create feature view\n", "\n", "def create_stocks_feature_view(fs, version):\n", "\n", " # Loading in the feature groups\n", " tesla_fg = fs.get_feature_group('tesla_stock', version=5)\n", " news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=5)\n", "\n", " # Defining the query\n", " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n", " .join(news_sentiment_fg.select(['sentiment']))\n", "\n", " # Creating the feature view\n", " feature_view = fs.create_feature_view(\n", " name='tesla_stocks_fv',\n", " query=ds_query,\n", " labels=['open']\n", " )\n", "\n", " return feature_view, tesla_fg" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature view created successfully, explore it at \n", "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fv/tesla_stocks_fv/version/5\n" ] } ], "source": [ "#Creating the feature view\n", "try:\n", " feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=5)\n", " tesla_fg = fs.get_feature_group('tesla_stock', version=5)\n", "except:\n", " feature_view, tesla_fg = create_stocks_feature_view(fs, 5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Defining a function to get fixed data from the feature view\n", "def fix_data_from_feature_view(df,start_date,end_date):\n", " df = df.sort_values(\"date\")\n", " df = df.reset_index()\n", " df = df.drop(columns=[\"index\"])\n", "\n", " # Create a boolean mask for rows that fall within the date range\n", " mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))\n", " len_df = np.shape(df)\n", " df = df[mask] # Use the boolean mask to filter the DataFrame\n", " print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))\n", "\n", " # Get rid off all non-business days\n", " isBusinessDay, is_open = extract_business_day(start_date,end_date)\n", " is_open = [not i for i in is_open] # Invert the mask to be able to drop all non-buisiness days\n", "\n", " filtered_df = df.drop(df[is_open].index) # Use the mask to filter the rows of the DataFrame\n", " print('From shape {} to {} after removing non-business days'.format(np.shape(df),np.shape(filtered_df)))\n", " print(filtered_df)\n", " \n", " return filtered_df" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }