{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import necessary libraries\n", "import pandas as pd # For data manipulation using DataFrames\n", "import numpy as np # For numerical operations\n", "import matplotlib.pyplot as plt # For data visualization\n", "import os # For operating system-related tasks\n", "import joblib # For saving and loading models\n", "import hopsworks # For getting access to hopsworks\n", "\n", "\n", "\n", "# Import specific modules from scikit-learn\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder # For data preprocessing\n", "from sklearn.metrics import accuracy_score # For evaluating model accuracy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " date 1. open 2. high 3. low 4. close 5. volume ticker\n", "0 2024-05-03 182.10 184.78 178.4200 181.19 75491539.0 TSLA\n", "1 2024-05-02 182.86 184.60 176.0200 180.01 89148041.0 TSLA\n", "2 2024-05-01 182.00 185.86 179.0100 179.99 92829719.0 TSLA\n", "3 2024-04-30 186.98 190.95 182.8401 183.28 127031787.0 TSLA\n", "4 2024-04-29 188.42 198.87 184.5400 194.05 243869678.0 TSLA\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "Index(['date', 'open', 'high', 'low', 'close', 'volume', 'ticker'], dtype='object')\n", "2024-05-06 13:44:59,122 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n", "\n", "Feature Group created successfully, explore it at \n", "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/787797\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1b857e05ae714fc09a2a7fcd05f56a73", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Uploading Dataframe: 0.00% | | Rows 0/3486 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Launching job: tesla_stock_2_offline_fg_materialization\n", "Job started successfully, you can follow the progress at \n", "https://c.app.hopsworks.ai/p/693399/jobs/named/tesla_stock_2_offline_fg_materialization/executions\n", "2024-05-06 13:45:08,516 WARNING: DeprecationWarning: Providing event_time as a single-element list is deprecated and will be dropped in future versions. Provide the feature_name string instead.\n", "\n", "Feature Group created successfully, explore it at \n", "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fg/785786\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "98e7ee7cb2c943b8893d0ae2a7254104", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Uploading Dataframe: 0.00% | | Rows 0/66 | Elapsed Time: 00:00 | Remaining Time: ?" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Launching job: news_sentiment_updated_2_offline_fg_materialization\n", "Job started successfully, you can follow the progress at \n", "https://c.app.hopsworks.ai/p/693399/jobs/named/news_sentiment_updated_2_offline_fg_materialization/executions\n" ] } ], "source": [ "from feature_pipeline import tesla_fg\n", "from feature_pipeline import news_sentiment_fg" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from dotenv import load_dotenv\n", "import os\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connection closed.\n", "Connected. Call `.close()` to terminate connection gracefully.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/693399\n", "Connected. Call `.close()` to terminate connection gracefully.\n" ] } ], "source": [ "api_key = os.environ.get('hopsworks_api')\n", "project = hopsworks.login(api_key_value=api_key)\n", "fs = project.get_feature_store()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def create_stocks_feature_view(fs, version):\n", "\n", " # Loading in the feature groups\n", " tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n", " news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version=1)\n", "\n", " # Define the query\n", " ds_query = tesla_fg.select(['date', 'open', 'ticker'])\\\n", " .join(news_sentiment_fg.select(['sentiment']))\n", "\n", " # Create the feature view\n", " feature_view = fs.create_feature_view(\n", " name='tesla_stocks_fv',\n", " query=ds_query,\n", " labels=['open']\n", " )\n", "\n", " return feature_view, tesla_fg" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature view created successfully, explore it at \n", "https://c.app.hopsworks.ai:443/p/693399/fs/689222/fv/tesla_stocks_fv/version/1\n" ] } ], "source": [ "try:\n", " feature_view = fs.get_feature_view(\"tesla_stocks_fv\", version=1)\n", " tesla_fg = fs.get_feature_group('tesla_stock', version=1)\n", "except:\n", " feature_view, tesla_fg = create_stocks_feature_view(fs, 1)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def fix_data_from_feature_view(df,start_date,end_date):\n", " df = df.sort_values(\"date\")\n", " df = df.reset_index()\n", " df = df.drop(columns=[\"index\"])\n", "\n", " # Create a boolean mask for rows that fall within the date range\n", " mask = (pd.to_datetime(df['date']) >= pd.to_datetime(start_date)) & (pd.to_datetime(df['date']) <= pd.to_datetime(end_date))\n", " len_df = np.shape(df)\n", " df = df[mask] # Use the boolean mask to filter the DataFrame\n", " print('From shape {} to {} after cropping to given date range: {} to {}'.format(len_df,np.shape(df),start_date,end_date))\n", "\n", " # Get rid off all non-business days\n", " isBusinessDay, is_open = extract_business_day(start_date,end_date)\n", " is_open = [not i for i in is_open] # Invert the mask to be able to drop all non-buisiness days\n", "\n", " filtered_df = df.drop(df[is_open].index) # Use the mask to filter the rows of the DataFrame\n", " print('From shape {} to {} after removing non-business days'.format(np.shape(df),np.shape(filtered_df)))\n", " print(filtered_df)\n", " \n", " return filtered_df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#def create_stocks_feature_view(fs, version):\n", "\n", " #Loading in the feature groups\n", "# tesla_fg = fs.get_feature_group('tesla_stock', version = 3)\n", "# news_sentiment_fg = fs.get_feature_group('news_sentiment_updated', version = 2)\n", "\n", "# ds_query = tesla_fg.select(['date','open', 'ticker'])\\\n", "# .join(news_sentiment_fg.select_except(['ticker','time', 'amp_url', 'image_url']))\n", " \n", "# return (fs.create_tesla_feature_view(\n", "# name = 'tsla_stocks_fv',\n", "# query = ds_query,\n", "# labels=['ticker']\n", "# ), tesla_fg)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#try:\n", "# feature_view = fs.get_feature_view(\"tsla_stocks_fv\", version=1)\n", "# tesla_fg = fs.get_feature_group('tesla_stock', version=3)\n", "#except:\n", "# feature_view, tesla_fg = create_stocks_feature_view(fs, 1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }